inet: add RCU protection to inet->opt

We lack proper synchronization to manipulate inet->opt ip_options

Problem is ip_make_skb() calls ip_setup_cork() and
ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options),
without any protection against another thread manipulating inet->opt.

Another thread can change inet->opt pointer and free old one under us.

Use RCU to protect inet->opt (changed to inet->inet_opt).

Instead of handling atomic refcounts, just copy ip_options when
necessary, to avoid cache line dirtying.

We cant insert an rcu_head in struct ip_options since its included in
skb->cb[], so this patch is large because I had to introduce a new
ip_options_rcu structure.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Eric Dumazet 2011-04-21 09:45:37 +00:00 committed by David S. Miller
parent 0a14842f5a
commit f6d8bd051c
17 changed files with 241 additions and 168 deletions

View file

@ -154,6 +154,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct flowi4 fl4;
struct rtable *rt;
int err;
struct ip_options_rcu *inet_opt;
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
@ -162,10 +163,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return -EAFNOSUPPORT;
nexthop = daddr = usin->sin_addr.s_addr;
if (inet->opt && inet->opt->srr) {
inet_opt = rcu_dereference_protected(inet->inet_opt,
sock_owned_by_user(sk));
if (inet_opt && inet_opt->opt.srr) {
if (!daddr)
return -EINVAL;
nexthop = inet->opt->faddr;
nexthop = inet_opt->opt.faddr;
}
orig_sport = inet->inet_sport;
@ -186,7 +189,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return -ENETUNREACH;
}
if (!inet->opt || !inet->opt->srr)
if (!inet_opt || !inet_opt->opt.srr)
daddr = rt->rt_dst;
if (!inet->inet_saddr)
@ -222,8 +225,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr = daddr;
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet->opt)
inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
@ -820,17 +823,18 @@ static void syn_flood_warning(const struct sk_buff *skb)
/*
* Save and compile IPv4 options into the request_sock if needed.
*/
static struct ip_options *tcp_v4_save_options(struct sock *sk,
struct sk_buff *skb)
static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
struct ip_options *dopt = NULL;
const struct ip_options *opt = &(IPCB(skb)->opt);
struct ip_options_rcu *dopt = NULL;
if (opt && opt->optlen) {
int opt_size = optlength(opt);
int opt_size = sizeof(*dopt) + opt->optlen;
dopt = kmalloc(opt_size, GFP_ATOMIC);
if (dopt) {
if (ip_options_echo(dopt, skb)) {
if (ip_options_echo(&dopt->opt, skb)) {
kfree(dopt);
dopt = NULL;
}
@ -1411,6 +1415,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
struct ip_options_rcu *inet_opt;
if (sk_acceptq_is_full(sk))
goto exit_overflow;
@ -1431,13 +1436,14 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->inet_daddr = ireq->rmt_addr;
newinet->inet_rcv_saddr = ireq->loc_addr;
newinet->inet_saddr = ireq->loc_addr;
newinet->opt = ireq->opt;
inet_opt = ireq->opt;
rcu_assign_pointer(newinet->inet_opt, inet_opt);
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newinet->opt)
inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
newinet->inet_id = newtp->write_seq ^ jiffies;
tcp_mtup_init(newsk);