[INET]: Remove per bucket rwlock in tcp/dccp ehash table.

As done two years ago on IP route cache table (commit
22c047ccbc) , we can avoid using one
lock per hash bucket for the huge TCP/DCCP hash tables.

On a typical x86_64 platform, this saves about 2MB or 4MB of ram, for
litle performance differences. (we hit a different cache line for the
rwlock, but then the bucket cache line have a better sharing factor
among cpus, since we dirty it less often). For netstat or ss commands
that want a full scan of hash table, we perform fewer memory accesses.

Using a 'small' table of hashed rwlocks should be more than enough to
provide correct SMP concurrency between different buckets, without
using too much memory. Sizing of this table depends on
num_possible_cpus() and various CONFIG settings.

This patch provides some locking abstraction that may ease a future
work using a different model for TCP/DCCP table.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2007-11-07 02:40:20 -08:00 committed by David S. Miller
parent efac52762b
commit 230140cffa
8 changed files with 106 additions and 37 deletions

View file

@ -37,7 +37,6 @@
* I'll experiment with dynamic table growth later.
*/
struct inet_ehash_bucket {
rwlock_t lock;
struct hlist_head chain;
struct hlist_head twchain;
};
@ -100,6 +99,9 @@ struct inet_hashinfo {
* TIME_WAIT sockets use a separate chain (twchain).
*/
struct inet_ehash_bucket *ehash;
rwlock_t *ehash_locks;
unsigned int ehash_size;
unsigned int ehash_locks_mask;
/* Ok, let's try this, I give up, we do need a local binding
* TCP hash as well as the others for fast bind/connect.
@ -107,7 +109,7 @@ struct inet_hashinfo {
struct inet_bind_hashbucket *bhash;
unsigned int bhash_size;
unsigned int ehash_size;
/* Note : 4 bytes padding on 64 bit arches */
/* All sockets in TCP_LISTEN state will be in here. This is the only
* table where wildcard'd TCP sockets can exist. Hash function here
@ -134,6 +136,62 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
}
static inline rwlock_t *inet_ehash_lockp(
struct inet_hashinfo *hashinfo,
unsigned int hash)
{
return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask];
}
static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
unsigned int i, size = 256;
#if defined(CONFIG_PROVE_LOCKING)
unsigned int nr_pcpus = 2;
#else
unsigned int nr_pcpus = num_possible_cpus();
#endif
if (nr_pcpus >= 4)
size = 512;
if (nr_pcpus >= 8)
size = 1024;
if (nr_pcpus >= 16)
size = 2048;
if (nr_pcpus >= 32)
size = 4096;
if (sizeof(rwlock_t) != 0) {
#ifdef CONFIG_NUMA
if (size * sizeof(rwlock_t) > PAGE_SIZE)
hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t));
else
#endif
hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t),
GFP_KERNEL);
if (!hashinfo->ehash_locks)
return ENOMEM;
for (i = 0; i < size; i++)
rwlock_init(&hashinfo->ehash_locks[i]);
}
hashinfo->ehash_locks_mask = size - 1;
return 0;
}
static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
{
if (hashinfo->ehash_locks) {
#ifdef CONFIG_NUMA
unsigned int size = (hashinfo->ehash_locks_mask + 1) *
sizeof(rwlock_t);
if (size > PAGE_SIZE)
vfree(hashinfo->ehash_locks);
else
#else
kfree(hashinfo->ehash_locks);
#endif
hashinfo->ehash_locks = NULL;
}
}
extern struct inet_bind_bucket *
inet_bind_bucket_create(struct kmem_cache *cachep,
struct inet_bind_hashbucket *head,
@ -222,7 +280,7 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo,
sk->sk_hash = inet_sk_ehashfn(sk);
head = inet_ehash_bucket(hashinfo, sk->sk_hash);
list = &head->chain;
lock = &head->lock;
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
write_lock(lock);
}
__sk_add_node(sk, list);
@ -253,7 +311,7 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk)
inet_listen_wlock(hashinfo);
lock = &hashinfo->lhash_lock;
} else {
lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock;
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
write_lock_bh(lock);
}
@ -354,9 +412,10 @@ static inline struct sock *
*/
unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
prefetch(head->chain.first);
read_lock(&head->lock);
read_lock(lock);
sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
@ -369,7 +428,7 @@ static inline struct sock *
}
sk = NULL;
out:
read_unlock(&head->lock);
read_unlock(lock);
return sk;
hit:
sock_hold(sk);