net: sock_def_readable() and friends RCU conversion

sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.

RCU conversion is pretty much needed :

1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).

[Future patch will add a list anchor for wakeup coalescing]

2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().

3) Respect RCU grace period when freeing a "struct socket_wq"

4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"

5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep

6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.

7) Change all sk_has_sleeper() callers to :
  - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
  - Use wq_has_sleeper() to eventually wakeup tasks.
  - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)

8) sock_wake_async() is modified to use rcu protection as well.

9) Exceptions :
  macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.

Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2010-04-29 11:01:49 +00:00 committed by David S. Miller
parent 83d7eb2979
commit 4381548237
16 changed files with 181 additions and 114 deletions

View file

@ -313,13 +313,16 @@ static inline int unix_writable(struct sock *sk)
static void unix_write_space(struct sock *sk)
{
read_lock(&sk->sk_callback_lock);
struct socket_wq *wq;
rcu_read_lock();
if (unix_writable(sk)) {
if (sk_has_sleeper(sk))
wake_up_interruptible_sync(sk_sleep(sk));
wq = rcu_dereference(sk->sk_wq);
if (wq_has_sleeper(wq))
wake_up_interruptible_sync(&wq->wait);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
read_unlock(&sk->sk_callback_lock);
rcu_read_unlock();
}
/* When dgram socket disconnects (or changes its peer), we clear its receive
@ -406,9 +409,7 @@ static int unix_release_sock(struct sock *sk, int embrion)
skpair->sk_err = ECONNRESET;
unix_state_unlock(skpair);
skpair->sk_state_change(skpair);
read_lock(&skpair->sk_callback_lock);
sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
read_unlock(&skpair->sk_callback_lock);
}
sock_put(skpair); /* It may now die */
unix_peer(sk) = NULL;
@ -1142,7 +1143,7 @@ restart:
newsk->sk_peercred.pid = task_tgid_vnr(current);
current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
newu = unix_sk(newsk);
newsk->sk_sleep = &newu->peer_wait;
newsk->sk_wq = &newu->peer_wq;
otheru = unix_sk(other);
/* copy address information from listening to new sock*/
@ -1931,12 +1932,10 @@ static int unix_shutdown(struct socket *sock, int mode)
other->sk_shutdown |= peer_mode;
unix_state_unlock(other);
other->sk_state_change(other);
read_lock(&other->sk_callback_lock);
if (peer_mode == SHUTDOWN_MASK)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
else if (peer_mode & RCV_SHUTDOWN)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
read_unlock(&other->sk_callback_lock);
}
if (other)
sock_put(other);