Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

There were quite a few overlapping sets of changes here.

Daniel's bug fix for off-by-ones in the new BPF branch instructions,
along with the added allowances for "data_end > ptr + x" forms
collided with the metadata additions.

Along with those three changes came veritifer test cases, which in
their final form I tried to group together properly.  If I had just
trimmed GIT's conflict tags as-is, this would have split up the
meta tests unnecessarily.

In the socketmap code, a set of preemption disabling changes
overlapped with the rename of bpf_compute_data_end() to
bpf_compute_data_pointers().

Changes were made to the mv88e6060.c driver set addr method
which got removed in net-next.

The hyperv transport socket layer had a locking change in 'net'
which overlapped with a change of socket state macro usage
in 'net-next'.

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-10-22 13:36:53 +01:00
commit f8ddadc4db
415 changed files with 4551 additions and 2007 deletions

View file

@ -102,7 +102,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
if (array_size >= U32_MAX - PAGE_SIZE ||
elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) {
bpf_array_alloc_percpu(array)) {
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}

View file

@ -72,7 +72,7 @@ static LIST_HEAD(dev_map_list);
static u64 dev_map_bitmap_size(const union bpf_attr *attr)
{
return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long);
}
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
@ -81,6 +81,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
int err = -EINVAL;
u64 cost;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
@ -114,8 +117,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
err = -ENOMEM;
/* A per cpu bitfield with a bit per possible net device */
dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr),
__alignof__(unsigned long));
dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr),
__alignof__(unsigned long),
GFP_KERNEL | __GFP_NOWARN);
if (!dtab->flush_needed)
goto free_dtab;

View file

@ -318,10 +318,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
*/
goto free_htab;
if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE)
/* make sure the size for pcpu_alloc() is reasonable */
goto free_htab;
htab->elem_size = sizeof(struct htab_elem) +
round_up(htab->map.key_size, 8);
if (percpu)

View file

@ -39,6 +39,7 @@
#include <linux/workqueue.h>
#include <linux/list.h>
#include <net/strparser.h>
#include <net/tcp.h>
#define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
@ -104,9 +105,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
return SK_DROP;
skb_orphan(skb);
/* We need to ensure that BPF metadata for maps is also cleared
* when we orphan the skb so that we don't have the possibility
* to reference a stale map.
*/
TCP_SKB_CB(skb)->bpf.map = NULL;
skb->sk = psock->sock;
bpf_compute_data_pointers(skb);
preempt_disable();
rc = (*prog->bpf_func)(skb, prog->insnsi);
preempt_enable();
skb->sk = NULL;
return rc;
@ -117,17 +125,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
struct sock *sk;
int rc;
/* Because we use per cpu values to feed input from sock redirect
* in BPF program to do_sk_redirect_map() call we need to ensure we
* are not preempted. RCU read lock is not sufficient in this case
* with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
*/
preempt_disable();
rc = smap_verdict_func(psock, skb);
switch (rc) {
case SK_REDIRECT:
sk = do_sk_redirect_map();
preempt_enable();
sk = do_sk_redirect_map(skb);
if (likely(sk)) {
struct smap_psock *peer = smap_psock_sk(sk);
@ -144,8 +145,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
/* Fall through and free skb otherwise */
case SK_DROP:
default:
if (rc != SK_REDIRECT)
preempt_enable();
kfree_skb(skb);
}
}
@ -490,6 +489,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
int err = -EINVAL;
u64 cost;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
@ -843,6 +845,12 @@ static int sock_map_update_elem(struct bpf_map *map,
return -EINVAL;
}
if (skops.sk->sk_type != SOCK_STREAM ||
skops.sk->sk_protocol != IPPROTO_TCP) {
fput(socket->file);
return -EOPNOTSUPP;
}
err = sock_map_ctx_update_elem(&skops, map, key, flags);
fput(socket->file);
return err;

View file

@ -1006,7 +1006,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
/* ctx accesses must be at a fixed offset, so that we can
* determine what type of data were returned.
*/
if (!tnum_is_const(reg->var_off)) {
if (reg->off) {
verbose(env,
"dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
regno, reg->off, off - reg->off);
return -EACCES;
}
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
@ -1015,7 +1021,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
tn_buf, off, size);
return -EACCES;
}
off += reg->var_off.value;
err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
if (!err && t == BPF_READ && value_regno >= 0) {
/* ctx access returns either a scalar, or a
@ -2341,12 +2346,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
static void find_good_pkt_pointers(struct bpf_verifier_state *state,
struct bpf_reg_state *dst_reg,
enum bpf_reg_type type)
enum bpf_reg_type type,
bool range_right_open)
{
struct bpf_reg_state *regs = state->regs, *reg;
u16 new_range;
int i;
if (dst_reg->off < 0)
if (dst_reg->off < 0 ||
(dst_reg->off == 0 && range_right_open))
/* This doesn't give us any range */
return;
@ -2357,9 +2365,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
*/
return;
/* LLVM can generate four kind of checks:
new_range = dst_reg->off;
if (range_right_open)
new_range--;
/* Examples for register markings:
*
* Type 1/2:
* pkt_data in dst register:
*
* r2 = r3;
* r2 += 8;
@ -2376,7 +2388,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
* r2=pkt(id=n,off=8,r=0)
* r3=pkt(id=n,off=0,r=0)
*
* Type 3/4:
* pkt_data in src register:
*
* r2 = r3;
* r2 += 8;
@ -2394,7 +2406,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
* r3=pkt(id=n,off=0,r=0)
*
* Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
* so that range of bytes [r3, r3 + 8) is safe to access.
* or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
* and [r3, r3 + 8-1) respectively is safe to access depending on
* the check.
*/
/* If our ids match, then we must have the same max_value. And we
@ -2405,14 +2419,14 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
for (i = 0; i < MAX_BPF_REG; i++)
if (regs[i].type == type && regs[i].id == dst_reg->id)
/* keep the maximum range already checked */
regs[i].range = max_t(u16, regs[i].range, dst_reg->off);
regs[i].range = max(regs[i].range, new_range);
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
if (state->stack_slot_type[i] != STACK_SPILL)
continue;
reg = &state->spilled_regs[i / BPF_REG_SIZE];
if (reg->type == type && reg->id == dst_reg->id)
reg->range = max_t(u16, reg->range, dst_reg->off);
reg->range = max_t(u16, reg->range, new_range);
}
}
@ -2776,39 +2790,71 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
find_good_pkt_pointers(this_branch, dst_reg, PTR_TO_PACKET);
/* pkt_data' > pkt_end */
find_good_pkt_pointers(this_branch, dst_reg,
PTR_TO_PACKET, false);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
dst_reg->type == PTR_TO_PACKET_END &&
regs[insn->src_reg].type == PTR_TO_PACKET) {
/* pkt_end > pkt_data' */
find_good_pkt_pointers(other_branch, &regs[insn->src_reg],
PTR_TO_PACKET, true);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
find_good_pkt_pointers(other_branch, dst_reg, PTR_TO_PACKET);
/* pkt_data' < pkt_end */
find_good_pkt_pointers(other_branch, dst_reg, PTR_TO_PACKET,
true);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
dst_reg->type == PTR_TO_PACKET_END &&
regs[insn->src_reg].type == PTR_TO_PACKET) {
/* pkt_end < pkt_data' */
find_good_pkt_pointers(this_branch, &regs[insn->src_reg],
PTR_TO_PACKET, false);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
/* pkt_data' >= pkt_end */
find_good_pkt_pointers(this_branch, dst_reg,
PTR_TO_PACKET, true);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
dst_reg->type == PTR_TO_PACKET_END &&
regs[insn->src_reg].type == PTR_TO_PACKET) {
/* pkt_end >= pkt_data' */
find_good_pkt_pointers(other_branch, &regs[insn->src_reg],
PTR_TO_PACKET);
PTR_TO_PACKET, false);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
/* pkt_data' <= pkt_end */
find_good_pkt_pointers(other_branch, dst_reg,
PTR_TO_PACKET, false);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
dst_reg->type == PTR_TO_PACKET_END &&
regs[insn->src_reg].type == PTR_TO_PACKET) {
/* pkt_end <= pkt_data' */
find_good_pkt_pointers(this_branch, &regs[insn->src_reg],
PTR_TO_PACKET);
PTR_TO_PACKET, true);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
dst_reg->type == PTR_TO_PACKET_META &&
reg_is_init_pkt_pointer(&regs[insn->src_reg], PTR_TO_PACKET)) {
find_good_pkt_pointers(this_branch, dst_reg, PTR_TO_PACKET_META);
find_good_pkt_pointers(this_branch, dst_reg,
PTR_TO_PACKET_META, false);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
dst_reg->type == PTR_TO_PACKET_META &&
reg_is_init_pkt_pointer(&regs[insn->src_reg], PTR_TO_PACKET)) {
find_good_pkt_pointers(other_branch, dst_reg, PTR_TO_PACKET_META);
find_good_pkt_pointers(other_branch, dst_reg,
PTR_TO_PACKET_META, false);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
regs[insn->src_reg].type == PTR_TO_PACKET_META) {
find_good_pkt_pointers(other_branch, &regs[insn->src_reg],
PTR_TO_PACKET_META);
PTR_TO_PACKET_META, false);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
regs[insn->src_reg].type == PTR_TO_PACKET_META) {
find_good_pkt_pointers(this_branch, &regs[insn->src_reg],
PTR_TO_PACKET_META);
PTR_TO_PACKET_META, false);
} else if (is_pointer_value(env, insn->dst_reg)) {
verbose(env, "R%d pointer comparison prohibited\n",
insn->dst_reg);