diff --git a/include/net/dropreason.h b/include/net/dropreason.h index c1cbcdbaf149..70539288f995 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -68,6 +68,9 @@ FN(IP_INADDRERRORS) \ FN(IP_INNOROUTES) \ FN(PKT_TOO_BIG) \ + FN(DUP_FRAG) \ + FN(FRAG_REASM_TIMEOUT) \ + FN(FRAG_TOO_FAR) \ FNe(MAX) /** @@ -80,6 +83,8 @@ enum skb_drop_reason { * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case) */ SKB_NOT_DROPPED_YET = 0, + /** @SKB_CONSUMED: packet has been consumed */ + SKB_CONSUMED, /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */ SKB_DROP_REASON_NOT_SPECIFIED, /** @SKB_DROP_REASON_NO_SOCKET: socket not found */ @@ -298,6 +303,15 @@ enum skb_drop_reason { * MTU) */ SKB_DROP_REASON_PKT_TOO_BIG, + /** @SKB_DROP_REASON_DUP_FRAG: duplicate fragment */ + SKB_DROP_REASON_DUP_FRAG, + /** @SKB_DROP_REASON_FRAG_REASM_TIMEOUT: fragment reassembly timeout */ + SKB_DROP_REASON_FRAG_REASM_TIMEOUT, + /** + * @SKB_DROP_REASON_FRAG_TOO_FAR: ipv4 fragment too far. + * (/proc/sys/net/ipv4/ipfrag_max_dist) + */ + SKB_DROP_REASON_FRAG_TOO_FAR, /** * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be * used as a real 'reason' diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0b0876610553..b23ddec3cd5c 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -7,6 +7,7 @@ #include #include #include +#include /* Per netns frag queues directory */ struct fqdir { @@ -34,12 +35,14 @@ struct fqdir { * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction * @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable + * @INET_FRAG_DROP: if skbs must be dropped (instead of being consumed) */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), INET_FRAG_HASH_DEAD = BIT(3), + INET_FRAG_DROP = BIT(4), }; struct frag_v4_compare_key { @@ -139,7 +142,8 @@ void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); /* Free all skbs in the queue; return the sum of their truesizes. */ -unsigned int inet_frag_rbtree_purge(struct rb_root *root); +unsigned int inet_frag_rbtree_purge(struct rb_root *root, + enum skb_drop_reason reason); static inline void inet_frag_put(struct inet_frag_queue *q) { diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 5052c66e22d2..7321ffe3a108 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -76,6 +76,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) if (fq->q.flags & INET_FRAG_COMPLETE) goto out; + fq->q.flags |= INET_FRAG_DROP; inet_frag_kill(&fq->q); dev = dev_get_by_index_rcu(net, fq->iif); @@ -101,7 +102,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) spin_unlock(&fq->q.lock); icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); - kfree_skb(head); + kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT); goto out_rcu_unlock; out: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1d84a17eada5..42a35b59fb1e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -94,6 +94,7 @@ EXPORT_SYMBOL(sysctl_max_skb_frags); #undef FN #define FN(reason) [SKB_DROP_REASON_##reason] = #reason, const char * const drop_reasons[] = { + [SKB_CONSUMED] = "CONSUMED", DEFINE_DROP_REASON(FN, FN) }; EXPORT_SYMBOL(drop_reasons); @@ -768,7 +769,7 @@ static void skb_free_head(struct sk_buff *skb) } } -static void skb_release_data(struct sk_buff *skb) +static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason) { struct skb_shared_info *shinfo = skb_shinfo(skb); int i; @@ -791,7 +792,7 @@ static void skb_release_data(struct sk_buff *skb) free_head: if (shinfo->frag_list) - kfree_skb_list(shinfo->frag_list); + kfree_skb_list_reason(shinfo->frag_list, reason); skb_free_head(skb); exit: @@ -854,11 +855,11 @@ void skb_release_head_state(struct sk_buff *skb) } /* Free everything but the sk_buff shell. */ -static void skb_release_all(struct sk_buff *skb) +static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason) { skb_release_head_state(skb); if (likely(skb->head)) - skb_release_data(skb); + skb_release_data(skb, reason); } /** @@ -872,7 +873,7 @@ static void skb_release_all(struct sk_buff *skb) void __kfree_skb(struct sk_buff *skb) { - skb_release_all(skb); + skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED); kfree_skbmem(skb); } EXPORT_SYMBOL(__kfree_skb); @@ -894,7 +895,10 @@ kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX); - trace_kfree_skb(skb, __builtin_return_address(0), reason); + if (reason == SKB_CONSUMED) + trace_consume_skb(skb); + else + trace_kfree_skb(skb, __builtin_return_address(0), reason); __kfree_skb(skb); } EXPORT_SYMBOL(kfree_skb_reason); @@ -1052,7 +1056,7 @@ EXPORT_SYMBOL(consume_skb); void __consume_stateless_skb(struct sk_buff *skb) { trace_consume_skb(skb); - skb_release_data(skb); + skb_release_data(skb, SKB_CONSUMED); kfree_skbmem(skb); } @@ -1077,7 +1081,7 @@ static void napi_skb_cache_put(struct sk_buff *skb) void __kfree_skb_defer(struct sk_buff *skb) { - skb_release_all(skb); + skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED); napi_skb_cache_put(skb); } @@ -1115,7 +1119,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; } - skb_release_all(skb); + skb_release_all(skb, SKB_CONSUMED); napi_skb_cache_put(skb); } EXPORT_SYMBOL(napi_consume_skb); @@ -1246,7 +1250,7 @@ EXPORT_SYMBOL_GPL(alloc_skb_for_msg); */ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) { - skb_release_all(dst); + skb_release_all(dst, SKB_CONSUMED); return __skb_clone(dst, src); } EXPORT_SYMBOL_GPL(skb_morph); @@ -1869,7 +1873,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); - skb_release_data(skb); + skb_release_data(skb, SKB_CONSUMED); } else { skb_free_head(skb); } @@ -6209,7 +6213,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); - skb_release_data(skb); + skb_release_data(skb, SKB_CONSUMED); } else { /* we can reuse existing recount- all we did was * relocate values @@ -6352,7 +6356,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, kfree(data); return -ENOMEM; } - skb_release_data(skb); + skb_release_data(skb, SKB_CONSUMED); skb->head = data; skb->head_frag = 0; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c9f9ac5013a7..7072fc0783ef 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -133,6 +133,7 @@ static void inet_frags_free_cb(void *ptr, void *arg) count = del_timer_sync(&fq->timer) ? 1 : 0; spin_lock_bh(&fq->lock); + fq->flags |= INET_FRAG_DROP; if (!(fq->flags & INET_FRAG_COMPLETE)) { fq->flags |= INET_FRAG_COMPLETE; count++; @@ -260,7 +261,8 @@ static void inet_frag_destroy_rcu(struct rcu_head *head) kmem_cache_free(f->frags_cachep, q); } -unsigned int inet_frag_rbtree_purge(struct rb_root *root) +unsigned int inet_frag_rbtree_purge(struct rb_root *root, + enum skb_drop_reason reason) { struct rb_node *p = rb_first(root); unsigned int sum = 0; @@ -274,7 +276,7 @@ unsigned int inet_frag_rbtree_purge(struct rb_root *root) struct sk_buff *next = FRAG_CB(skb)->next_frag; sum += skb->truesize; - kfree_skb(skb); + kfree_skb_reason(skb, reason); skb = next; } } @@ -284,17 +286,21 @@ EXPORT_SYMBOL(inet_frag_rbtree_purge); void inet_frag_destroy(struct inet_frag_queue *q) { - struct fqdir *fqdir; unsigned int sum, sum_truesize = 0; + enum skb_drop_reason reason; struct inet_frags *f; + struct fqdir *fqdir; WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); + reason = (q->flags & INET_FRAG_DROP) ? + SKB_DROP_REASON_FRAG_REASM_TIMEOUT : + SKB_CONSUMED; WARN_ON(del_timer(&q->timer) != 0); /* Release all fragment data. */ fqdir = q->fqdir; f = fqdir->f; - sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); + sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments, reason); sum = sum_truesize + f->qsize; call_rcu(&q->rcu, inet_frag_destroy_rcu); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fb153569889e..69c00ffdcf3e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -153,6 +153,7 @@ static void ip_expire(struct timer_list *t) if (qp->q.flags & INET_FRAG_COMPLETE) goto out; + qp->q.flags |= INET_FRAG_DROP; ipq_kill(qp); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); @@ -194,7 +195,7 @@ out: spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); - kfree_skb(head); + kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT); ipq_put(qp); } @@ -254,7 +255,8 @@ static int ip_frag_reinit(struct ipq *qp) return -ETIMEDOUT; } - sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments); + sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments, + SKB_DROP_REASON_FRAG_TOO_FAR); sub_frag_mem_limit(qp->q.fqdir, sum_truesize); qp->q.flags = 0; @@ -278,10 +280,14 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) struct net_device *dev; unsigned int fragsize; int err = -ENOENT; + SKB_DR(reason); u8 ecn; - if (qp->q.flags & INET_FRAG_COMPLETE) + /* If reassembly is already done, @skb must be a duplicate frag. */ + if (qp->q.flags & INET_FRAG_COMPLETE) { + SKB_DR_SET(reason, DUP_FRAG); goto err; + } if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && unlikely(ip_frag_too_far(qp)) && @@ -382,8 +388,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) insert_error: if (err == IPFRAG_DUP) { - kfree_skb(skb); - return -EINVAL; + SKB_DR_SET(reason, DUP_FRAG); + err = -EINVAL; + goto err; } err = -EINVAL; __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); @@ -391,7 +398,7 @@ discard_qp: inet_frag_kill(&qp->q); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); err: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return err; } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 38db0064d661..d13240f13607 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -253,7 +253,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, if (err) { if (err == IPFRAG_DUP) { /* No error for duplicates, pretend they got queued. */ - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_DUP_FRAG); return -EINPROGRESS; } goto insert_error; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index ff866f2a879e..5bc8a28e67f9 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -112,10 +112,14 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev_tail; struct net_device *dev; int err = -ENOENT; + SKB_DR(reason); u8 ecn; - if (fq->q.flags & INET_FRAG_COMPLETE) + /* If reassembly is already done, @skb must be a duplicate frag. */ + if (fq->q.flags & INET_FRAG_COMPLETE) { + SKB_DR_SET(reason, DUP_FRAG); goto err; + } err = -EINVAL; offset = ntohs(fhdr->frag_off) & ~0x7; @@ -226,8 +230,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, insert_error: if (err == IPFRAG_DUP) { - kfree_skb(skb); - return -EINVAL; + SKB_DR_SET(reason, DUP_FRAG); + err = -EINVAL; + goto err; } err = -EINVAL; __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), @@ -237,7 +242,7 @@ discard_fq: __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); err: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return err; }