From 532ae2f10e6eab2ec66ecad805d57d3d70cea020 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 12 Nov 2018 18:27:15 +0800 Subject: [PATCH 1/3] sctp: do reuseport_select_sock in __sctp_rcv_lookup_endpoint This is a part of sk_reuseport support for sctp, and it selects a sock by the hashkey of lport, paddr and dport by default. It will work until sk_reuseport support is added in sctp_get_port_local() in the next patch. v1->v2: - define lport as __be16 instead of __be32 as Marcelo pointed in __sctp_rcv_lookup_endpoint(). Acked-by: Neil Horman Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/input.c | 69 ++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 7ab08a5b36dc..00f995e37795 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -57,6 +57,7 @@ #include #include #include +#include /* Forward declarations for internal helpers. */ static int sctp_rcv_ootb(struct sk_buff *); @@ -65,8 +66,10 @@ static struct sctp_association *__sctp_rcv_lookup(struct net *net, const union sctp_addr *paddr, const union sctp_addr *laddr, struct sctp_transport **transportp); -static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, - const union sctp_addr *laddr); +static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( + struct net *net, struct sk_buff *skb, + const union sctp_addr *laddr, + const union sctp_addr *daddr); static struct sctp_association *__sctp_lookup_association( struct net *net, const union sctp_addr *local, @@ -171,7 +174,7 @@ int sctp_rcv(struct sk_buff *skb) asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport); if (!asoc) - ep = __sctp_rcv_lookup_endpoint(net, &dest); + ep = __sctp_rcv_lookup_endpoint(net, skb, &dest, &src); /* Retrieve the common input handling substructure. */ rcvr = asoc ? &asoc->base : &ep->base; @@ -771,16 +774,35 @@ void sctp_unhash_endpoint(struct sctp_endpoint *ep) local_bh_enable(); } +static inline __u32 sctp_hashfn(const struct net *net, __be16 lport, + const union sctp_addr *paddr, __u32 seed) +{ + __u32 addr; + + if (paddr->sa.sa_family == AF_INET6) + addr = jhash(&paddr->v6.sin6_addr, 16, seed); + else + addr = (__force __u32)paddr->v4.sin_addr.s_addr; + + return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 | + (__force __u32)lport, net_hash_mix(net), seed); +} + /* Look up an endpoint. */ -static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, - const union sctp_addr *laddr) +static struct sctp_endpoint *__sctp_rcv_lookup_endpoint( + struct net *net, struct sk_buff *skb, + const union sctp_addr *laddr, + const union sctp_addr *paddr) { struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_endpoint *ep; + struct sock *sk; + __be16 lport; int hash; - hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port)); + lport = laddr->v4.sin_port; + hash = sctp_ep_hashfn(net, ntohs(lport)); head = &sctp_ep_hashtable[hash]; read_lock(&head->lock); sctp_for_each_hentry(epb, &head->chain) { @@ -792,6 +814,15 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, ep = sctp_sk(net->sctp.ctl_sock)->ep; hit: + sk = ep->base.sk; + if (sk->sk_reuseport) { + __u32 phash = sctp_hashfn(net, lport, paddr, 0); + + sk = reuseport_select_sock(sk, phash, skb, + sizeof(struct sctphdr)); + if (sk) + ep = sctp_sk(sk)->ep; + } sctp_endpoint_hold(ep); read_unlock(&head->lock); return ep; @@ -830,35 +861,17 @@ out: static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed) { const struct sctp_transport *t = data; - const union sctp_addr *paddr = &t->ipaddr; - const struct net *net = sock_net(t->asoc->base.sk); - __be16 lport = htons(t->asoc->base.bind_addr.port); - __u32 addr; - if (paddr->sa.sa_family == AF_INET6) - addr = jhash(&paddr->v6.sin6_addr, 16, seed); - else - addr = (__force __u32)paddr->v4.sin_addr.s_addr; - - return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 | - (__force __u32)lport, net_hash_mix(net), seed); + return sctp_hashfn(sock_net(t->asoc->base.sk), + htons(t->asoc->base.bind_addr.port), + &t->ipaddr, seed); } static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed) { const struct sctp_hash_cmp_arg *x = data; - const union sctp_addr *paddr = x->paddr; - const struct net *net = x->net; - __be16 lport = x->lport; - __u32 addr; - if (paddr->sa.sa_family == AF_INET6) - addr = jhash(&paddr->v6.sin6_addr, 16, seed); - else - addr = (__force __u32)paddr->v4.sin_addr.s_addr; - - return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 | - (__force __u32)lport, net_hash_mix(net), seed); + return sctp_hashfn(x->net, x->lport, x->paddr, seed); } static const struct rhashtable_params sctp_hash_params = { From 76c6d988aeb3c15d57ea0c245a3b5f27802c1fbe Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 12 Nov 2018 18:27:16 +0800 Subject: [PATCH 2/3] sctp: add sock_reuseport for the sock in __sctp_hash_endpoint This is a part of sk_reuseport support for sctp. It defines a helper sctp_bind_addrs_check() to check if the bind_addrs in two socks are matched. It will add sock_reuseport if they are completely matched, and return err if they are partly matched, and alloc sock_reuseport if all socks are not matched at all. It will work until sk_reuseport support is added in sctp_get_port_local() in the next patch. v1->v2: - use 'laddr->valid && laddr2->valid' check instead as Marcelo pointed in sctp_bind_addrs_check(). Acked-by: Neil Horman Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 +- include/net/sctp/structs.h | 2 ++ net/core/sock_reuseport.c | 1 + net/sctp/bind_addr.c | 28 ++++++++++++++++++ net/sctp/input.c | 60 +++++++++++++++++++++++++++++++++----- net/sctp/socket.c | 3 +- 6 files changed, 85 insertions(+), 11 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 9a3b48a35e90..cdf2e80abc44 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -152,7 +152,7 @@ int sctp_primitive_RECONF(struct net *net, struct sctp_association *asoc, */ int sctp_rcv(struct sk_buff *skb); int sctp_v4_err(struct sk_buff *skb, u32 info); -void sctp_hash_endpoint(struct sctp_endpoint *); +int sctp_hash_endpoint(struct sctp_endpoint *ep); void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, struct sctphdr *, struct sctp_association **, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index a11f93790476..15d017f33a46 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1190,6 +1190,8 @@ int sctp_bind_addr_conflict(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *, struct sctp_sock *); int sctp_bind_addr_state(const struct sctp_bind_addr *bp, const union sctp_addr *addr); +int sctp_bind_addrs_check(struct sctp_sock *sp, + struct sctp_sock *sp2, int cnt2); union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, const union sctp_addr *addrs, int addrcnt, diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index ba5cba56f574..d8fe3e549373 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -187,6 +187,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany) call_rcu(&old_reuse->rcu, reuseport_free_rcu); return 0; } +EXPORT_SYMBOL(reuseport_add_sock); void reuseport_detach_sock(struct sock *sk) { diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 7df3704982f5..ebf28adba789 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -337,6 +337,34 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp, return match; } +int sctp_bind_addrs_check(struct sctp_sock *sp, + struct sctp_sock *sp2, int cnt2) +{ + struct sctp_bind_addr *bp2 = &sp2->ep->base.bind_addr; + struct sctp_bind_addr *bp = &sp->ep->base.bind_addr; + struct sctp_sockaddr_entry *laddr, *laddr2; + bool exist = false; + int cnt = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + list_for_each_entry_rcu(laddr2, &bp2->address_list, list) { + if (sp->pf->af->cmp_addr(&laddr->a, &laddr2->a) && + laddr->valid && laddr2->valid) { + exist = true; + goto next; + } + } + cnt = 0; + break; +next: + cnt++; + } + rcu_read_unlock(); + + return (cnt == cnt2) ? 0 : (exist ? -EEXIST : 1); +} + /* Does the address 'addr' conflict with any addresses in * the bp. */ diff --git a/net/sctp/input.c b/net/sctp/input.c index 00f995e37795..d7a649d240e5 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -724,43 +724,87 @@ discard: } /* Insert endpoint into the hash table. */ -static void __sctp_hash_endpoint(struct sctp_endpoint *ep) +static int __sctp_hash_endpoint(struct sctp_endpoint *ep) { - struct net *net = sock_net(ep->base.sk); - struct sctp_ep_common *epb; + struct sock *sk = ep->base.sk; + struct net *net = sock_net(sk); struct sctp_hashbucket *head; + struct sctp_ep_common *epb; epb = &ep->base; - epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; + if (sk->sk_reuseport) { + bool any = sctp_is_ep_boundall(sk); + struct sctp_ep_common *epb2; + struct list_head *list; + int cnt = 0, err = 1; + + list_for_each(list, &ep->base.bind_addr.address_list) + cnt++; + + sctp_for_each_hentry(epb2, &head->chain) { + struct sock *sk2 = epb2->sk; + + if (!net_eq(sock_net(sk2), net) || sk2 == sk || + !uid_eq(sock_i_uid(sk2), sock_i_uid(sk)) || + !sk2->sk_reuseport) + continue; + + err = sctp_bind_addrs_check(sctp_sk(sk2), + sctp_sk(sk), cnt); + if (!err) { + err = reuseport_add_sock(sk, sk2, any); + if (err) + return err; + break; + } else if (err < 0) { + return err; + } + } + + if (err) { + err = reuseport_alloc(sk, any); + if (err) + return err; + } + } + write_lock(&head->lock); hlist_add_head(&epb->node, &head->chain); write_unlock(&head->lock); + return 0; } /* Add an endpoint to the hash. Local BH-safe. */ -void sctp_hash_endpoint(struct sctp_endpoint *ep) +int sctp_hash_endpoint(struct sctp_endpoint *ep) { + int err; + local_bh_disable(); - __sctp_hash_endpoint(ep); + err = __sctp_hash_endpoint(ep); local_bh_enable(); + + return err; } /* Remove endpoint from the hash table. */ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) { - struct net *net = sock_net(ep->base.sk); + struct sock *sk = ep->base.sk; struct sctp_hashbucket *head; struct sctp_ep_common *epb; epb = &ep->base; - epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port); + epb->hashent = sctp_ep_hashfn(sock_net(sk), epb->bind_addr.port); head = &sctp_ep_hashtable[epb->hashent]; + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); + write_lock(&head->lock); hlist_del_init(&epb->node); write_unlock(&head->lock); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 739f3e50120d..2e955f1dbe3f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7852,8 +7852,7 @@ static int sctp_listen_start(struct sock *sk, int backlog) } sk->sk_max_ack_backlog = backlog; - sctp_hash_endpoint(ep); - return 0; + return sctp_hash_endpoint(ep); } /* From 6ba84574026792ce33a40c7da721dea36d0f3973 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 12 Nov 2018 18:27:17 +0800 Subject: [PATCH 3/3] sctp: process sk_reuseport in sctp_get_port_local When socks' sk_reuseport is set, the same port and address are allowed to be bound into these socks who have the same uid. Note that the difference from sk_reuse is that it allows multiple socks to listen on the same port and address. Acked-by: Neil Horman Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 +++- net/sctp/socket.c | 46 +++++++++++++++++++++++++++----------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 15d017f33a46..af9d494120ba 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -96,7 +96,9 @@ struct sctp_stream; struct sctp_bind_bucket { unsigned short port; - unsigned short fastreuse; + signed char fastreuse; + signed char fastreuseport; + kuid_t fastuid; struct hlist_node node; struct hlist_head owner; struct net *net; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2e955f1dbe3f..5299add6d7aa 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7644,8 +7644,10 @@ static struct sctp_bind_bucket *sctp_bucket_create( static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) { - bool reuse = (sk->sk_reuse || sctp_sk(sk)->reuse); + struct sctp_sock *sp = sctp_sk(sk); + bool reuse = (sk->sk_reuse || sp->reuse); struct sctp_bind_hashbucket *head; /* hash list */ + kuid_t uid = sock_i_uid(sk); struct sctp_bind_bucket *pp; unsigned short snum; int ret; @@ -7721,7 +7723,10 @@ pp_found: pr_debug("%s: found a possible match\n", __func__); - if (pp->fastreuse && reuse && sk->sk_state != SCTP_SS_LISTENING) + if ((pp->fastreuse && reuse && + sk->sk_state != SCTP_SS_LISTENING) || + (pp->fastreuseport && sk->sk_reuseport && + uid_eq(pp->fastuid, uid))) goto success; /* Run through the list of sockets bound to the port @@ -7735,16 +7740,18 @@ pp_found: * in an endpoint. */ sk_for_each_bound(sk2, &pp->owner) { - struct sctp_endpoint *ep2; - ep2 = sctp_sk(sk2)->ep; + struct sctp_sock *sp2 = sctp_sk(sk2); + struct sctp_endpoint *ep2 = sp2->ep; if (sk == sk2 || - (reuse && (sk2->sk_reuse || sctp_sk(sk2)->reuse) && - sk2->sk_state != SCTP_SS_LISTENING)) + (reuse && (sk2->sk_reuse || sp2->reuse) && + sk2->sk_state != SCTP_SS_LISTENING) || + (sk->sk_reuseport && sk2->sk_reuseport && + uid_eq(uid, sock_i_uid(sk2)))) continue; - if (sctp_bind_addr_conflict(&ep2->base.bind_addr, addr, - sctp_sk(sk2), sctp_sk(sk))) { + if (sctp_bind_addr_conflict(&ep2->base.bind_addr, + addr, sp2, sp)) { ret = (long)sk2; goto fail_unlock; } @@ -7767,19 +7774,32 @@ pp_not_found: pp->fastreuse = 1; else pp->fastreuse = 0; - } else if (pp->fastreuse && - (!reuse || sk->sk_state == SCTP_SS_LISTENING)) - pp->fastreuse = 0; + + if (sk->sk_reuseport) { + pp->fastreuseport = 1; + pp->fastuid = uid; + } else { + pp->fastreuseport = 0; + } + } else { + if (pp->fastreuse && + (!reuse || sk->sk_state == SCTP_SS_LISTENING)) + pp->fastreuse = 0; + + if (pp->fastreuseport && + (!sk->sk_reuseport || !uid_eq(pp->fastuid, uid))) + pp->fastreuseport = 0; + } /* We are set, so fill up all the data in the hash table * entry, tie the socket list information with the rest of the * sockets FIXME: Blurry, NPI (ipg). */ success: - if (!sctp_sk(sk)->bind_hash) { + if (!sp->bind_hash) { inet_sk(sk)->inet_num = snum; sk_add_bind_node(sk, &pp->owner); - sctp_sk(sk)->bind_hash = pp; + sp->bind_hash = pp; } ret = 0;