[NET]: Size listen hash tables using backlog hint

We currently allocate a fixed size (TCP_SYNQ_HSIZE=512) slots hash table for
each LISTEN socket, regardless of various parameters (listen backlog for
example)

On x86_64, this means order-1 allocations (might fail), even for 'small'
sockets, expecting few connections. On the contrary, a huge server wanting a
backlog of 50000 is slowed down a bit because of this fixed limit.

This patch makes the sizing of listen hash table a dynamic parameter,
depending of :
- net.core.somaxconn tunable (default is 128)
- net.ipv4.tcp_max_syn_backlog tunable (default : 256, 1024 or 128)
- backlog value given by user application  (2nd parameter of listen())

For large allocations (bigger than PAGE_SIZE), we use vmalloc() instead of
kmalloc().

We still limit memory allocation with the two existing tunables (somaxconn &
tcp_max_syn_backlog). So for standard setups, this patch actually reduce RAM
usage.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2006-11-16 02:30:37 -08:00 committed by David S. Miller
parent 3c62f75aac
commit 72a3effaf6
9 changed files with 38 additions and 24 deletions

View file

@ -15,6 +15,7 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <net/request_sock.h>
@ -29,22 +30,31 @@
* it is absolutely not enough even at 100conn/sec. 256 cures most
* of problems. This value is adjusted to 128 for very small machines
* (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
* Further increasing requires to change hash table size.
* Note : Dont forget somaxconn that may limit backlog too.
*/
int sysctl_max_syn_backlog = 256;
int reqsk_queue_alloc(struct request_sock_queue *queue,
const int nr_table_entries)
unsigned int nr_table_entries)
{
const int lopt_size = sizeof(struct listen_sock) +
nr_table_entries * sizeof(struct request_sock *);
struct listen_sock *lopt = kzalloc(lopt_size, GFP_KERNEL);
size_t lopt_size = sizeof(struct listen_sock);
struct listen_sock *lopt;
nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
nr_table_entries = max_t(u32, nr_table_entries, 8);
nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
lopt_size += nr_table_entries * sizeof(struct request_sock *);
if (lopt_size > PAGE_SIZE)
lopt = __vmalloc(lopt_size,
GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL);
else
lopt = kzalloc(lopt_size, GFP_KERNEL);
if (lopt == NULL)
return -ENOMEM;
for (lopt->max_qlen_log = 6;
(1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
for (lopt->max_qlen_log = 3;
(1 << lopt->max_qlen_log) < nr_table_entries;
lopt->max_qlen_log++);
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
@ -65,9 +75,11 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
{
/* make all the listen_opt local to us */
struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
size_t lopt_size = sizeof(struct listen_sock) +
lopt->nr_table_entries * sizeof(struct request_sock *);
if (lopt->qlen != 0) {
int i;
unsigned int i;
for (i = 0; i < lopt->nr_table_entries; i++) {
struct request_sock *req;
@ -81,7 +93,10 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
}
BUG_TRAP(lopt->qlen == 0);
kfree(lopt);
if (lopt_size > PAGE_SIZE)
vfree(lopt);
else
kfree(lopt);
}
EXPORT_SYMBOL(reqsk_queue_destroy);