mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-23 07:01:23 +00:00
fib: RCU conversion of fib_lookup()
fib_lookup() converted to be called in RCU protected context, no reference taken and released on a contended cache line (fib_clntref) fib_table_lookup() and fib_semantic_match() get an additional parameter. struct fib_info gets an rcu_head field, and is freed after an rcu grace period. Stress test : (Sending 160.000.000 UDP frames on same neighbour, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_HASH) (about same results for FIB_TRIE) Before patch : real 1m31.199s user 0m13.761s sys 23m24.780s After patch: real 1m5.375s user 0m14.997s sys 15m50.115s Before patch Profile : 13044.00 15.4% __ip_route_output_key vmlinux 8438.00 10.0% dst_destroy vmlinux 5983.00 7.1% fib_semantic_match vmlinux 5410.00 6.4% fib_rules_lookup vmlinux 4803.00 5.7% neigh_lookup vmlinux 4420.00 5.2% _raw_spin_lock vmlinux 3883.00 4.6% rt_set_nexthop vmlinux 3261.00 3.9% _raw_read_lock vmlinux 2794.00 3.3% fib_table_lookup vmlinux 2374.00 2.8% neigh_resolve_output vmlinux 2153.00 2.5% dst_alloc vmlinux 1502.00 1.8% _raw_read_lock_bh vmlinux 1484.00 1.8% kmem_cache_alloc vmlinux 1407.00 1.7% eth_header vmlinux 1406.00 1.7% ipv4_dst_destroy vmlinux 1298.00 1.5% __copy_from_user_ll vmlinux 1174.00 1.4% dev_queue_xmit vmlinux 1000.00 1.2% ip_output vmlinux After patch Profile : 13712.00 15.8% dst_destroy vmlinux 8548.00 9.9% __ip_route_output_key vmlinux 7017.00 8.1% neigh_lookup vmlinux 4554.00 5.3% fib_semantic_match vmlinux 4067.00 4.7% _raw_read_lock vmlinux 3491.00 4.0% dst_alloc vmlinux 3186.00 3.7% neigh_resolve_output vmlinux 3103.00 3.6% fib_table_lookup vmlinux 2098.00 2.4% _raw_read_lock_bh vmlinux 2081.00 2.4% kmem_cache_alloc vmlinux 2013.00 2.3% _raw_spin_lock vmlinux 1763.00 2.0% __copy_from_user_ll vmlinux 1763.00 2.0% ip_output vmlinux 1761.00 2.0% ipv4_dst_destroy vmlinux 1631.00 1.9% eth_header vmlinux 1440.00 1.7% _raw_read_unlock_bh vmlinux Reference results, if IP route cache is enabled : real 0m29.718s user 0m10.845s sys 7m37.341s 25213.00 29.5% __ip_route_output_key vmlinux 9011.00 10.5% dst_release vmlinux 4817.00 5.6% ip_push_pending_frames vmlinux 4232.00 5.0% ip_finish_output vmlinux 3940.00 4.6% udp_sendmsg vmlinux 3730.00 4.4% __copy_from_user_ll vmlinux 3716.00 4.4% ip_route_output_flow vmlinux 2451.00 2.9% __xfrm_lookup vmlinux 2221.00 2.6% ip_append_data vmlinux 1718.00 2.0% _raw_spin_lock_bh vmlinux 1655.00 1.9% __alloc_skb vmlinux 1572.00 1.8% sock_wfree vmlinux 1345.00 1.6% kfree vmlinux Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
c2952c314b
commit
ebc0ffae5d
10 changed files with 72 additions and 77 deletions
|
@ -1773,12 +1773,15 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
|
|||
|
||||
if (rt->fl.iif == 0)
|
||||
src = rt->rt_src;
|
||||
else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
|
||||
src = FIB_RES_PREFSRC(res);
|
||||
fib_res_put(&res);
|
||||
} else
|
||||
src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
|
||||
else {
|
||||
rcu_read_lock();
|
||||
if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0)
|
||||
src = FIB_RES_PREFSRC(res);
|
||||
else
|
||||
src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
|
||||
RT_SCOPE_UNIVERSE);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
memcpy(addr, &src, 4);
|
||||
}
|
||||
|
||||
|
@ -2081,6 +2084,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
|
|||
* Such approach solves two big problems:
|
||||
* 1. Not simplex devices are handled properly.
|
||||
* 2. IP spoofing attempts are filtered with 100% of guarantee.
|
||||
* called with rcu_read_lock()
|
||||
*/
|
||||
|
||||
static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
|
||||
|
@ -2102,7 +2106,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
|
|||
unsigned hash;
|
||||
__be32 spec_dst;
|
||||
int err = -EINVAL;
|
||||
int free_res = 0;
|
||||
struct net * net = dev_net(dev);
|
||||
|
||||
/* IP on this device is disabled. */
|
||||
|
@ -2134,12 +2137,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
|
|||
/*
|
||||
* Now we are ready to route packet.
|
||||
*/
|
||||
if ((err = fib_lookup(net, &fl, &res)) != 0) {
|
||||
err = fib_lookup(net, &fl, &res);
|
||||
if (err != 0) {
|
||||
if (!IN_DEV_FORWARD(in_dev))
|
||||
goto e_hostunreach;
|
||||
goto no_route;
|
||||
}
|
||||
free_res = 1;
|
||||
|
||||
RT_CACHE_STAT_INC(in_slow_tot);
|
||||
|
||||
|
@ -2148,8 +2151,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
|
|||
|
||||
if (res.type == RTN_LOCAL) {
|
||||
err = fib_validate_source(saddr, daddr, tos,
|
||||
net->loopback_dev->ifindex,
|
||||
dev, &spec_dst, &itag, skb->mark);
|
||||
net->loopback_dev->ifindex,
|
||||
dev, &spec_dst, &itag, skb->mark);
|
||||
if (err < 0)
|
||||
goto martian_source_keep_err;
|
||||
if (err)
|
||||
|
@ -2164,9 +2167,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
|
|||
goto martian_destination;
|
||||
|
||||
err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
|
||||
done:
|
||||
if (free_res)
|
||||
fib_res_put(&res);
|
||||
out: return err;
|
||||
|
||||
brd_input:
|
||||
|
@ -2226,7 +2226,7 @@ local_input:
|
|||
rth->rt_type = res.type;
|
||||
hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
|
||||
err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
|
||||
goto done;
|
||||
goto out;
|
||||
|
||||
no_route:
|
||||
RT_CACHE_STAT_INC(in_no_route);
|
||||
|
@ -2249,21 +2249,21 @@ martian_destination:
|
|||
|
||||
e_hostunreach:
|
||||
err = -EHOSTUNREACH;
|
||||
goto done;
|
||||
goto out;
|
||||
|
||||
e_inval:
|
||||
err = -EINVAL;
|
||||
goto done;
|
||||
goto out;
|
||||
|
||||
e_nobufs:
|
||||
err = -ENOBUFS;
|
||||
goto done;
|
||||
goto out;
|
||||
|
||||
martian_source:
|
||||
err = -EINVAL;
|
||||
martian_source_keep_err:
|
||||
ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
|
||||
goto done;
|
||||
goto out;
|
||||
}
|
||||
|
||||
int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
|
||||
|
@ -2349,6 +2349,7 @@ skip_cache:
|
|||
}
|
||||
EXPORT_SYMBOL(ip_route_input_common);
|
||||
|
||||
/* called with rcu_read_lock() */
|
||||
static int __mkroute_output(struct rtable **result,
|
||||
struct fib_result *res,
|
||||
const struct flowi *fl,
|
||||
|
@ -2373,18 +2374,13 @@ static int __mkroute_output(struct rtable **result,
|
|||
if (dev_out->flags & IFF_LOOPBACK)
|
||||
flags |= RTCF_LOCAL;
|
||||
|
||||
rcu_read_lock();
|
||||
in_dev = __in_dev_get_rcu(dev_out);
|
||||
if (!in_dev) {
|
||||
rcu_read_unlock();
|
||||
if (!in_dev)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (res->type == RTN_BROADCAST) {
|
||||
flags |= RTCF_BROADCAST | RTCF_LOCAL;
|
||||
if (res->fi) {
|
||||
fib_info_put(res->fi);
|
||||
res->fi = NULL;
|
||||
}
|
||||
res->fi = NULL;
|
||||
} else if (res->type == RTN_MULTICAST) {
|
||||
flags |= RTCF_MULTICAST | RTCF_LOCAL;
|
||||
if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
|
||||
|
@ -2394,10 +2390,8 @@ static int __mkroute_output(struct rtable **result,
|
|||
* default one, but do not gateway in this case.
|
||||
* Yes, it is hack.
|
||||
*/
|
||||
if (res->fi && res->prefixlen < 4) {
|
||||
fib_info_put(res->fi);
|
||||
if (res->fi && res->prefixlen < 4)
|
||||
res->fi = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -2467,6 +2461,7 @@ static int __mkroute_output(struct rtable **result,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* called with rcu_read_lock() */
|
||||
static int ip_mkroute_output(struct rtable **rp,
|
||||
struct fib_result *res,
|
||||
const struct flowi *fl,
|
||||
|
@ -2509,7 +2504,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
|
|||
struct fib_result res;
|
||||
unsigned int flags = 0;
|
||||
struct net_device *dev_out = NULL;
|
||||
int free_res = 0;
|
||||
int err;
|
||||
|
||||
|
||||
|
@ -2636,15 +2630,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
|
|||
err = -ENETUNREACH;
|
||||
goto out;
|
||||
}
|
||||
free_res = 1;
|
||||
|
||||
if (res.type == RTN_LOCAL) {
|
||||
if (!fl.fl4_src)
|
||||
fl.fl4_src = fl.fl4_dst;
|
||||
dev_out = net->loopback_dev;
|
||||
fl.oif = dev_out->ifindex;
|
||||
if (res.fi)
|
||||
fib_info_put(res.fi);
|
||||
res.fi = NULL;
|
||||
flags |= RTCF_LOCAL;
|
||||
goto make_route;
|
||||
|
@ -2668,8 +2659,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
|
|||
make_route:
|
||||
err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
|
||||
|
||||
if (free_res)
|
||||
fib_res_put(&res);
|
||||
out: return err;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue