Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Minor conflict, a CHECK was placed into an if() statement
in net-next, whilst a newline was added to that CHECK
call in 'net'.  Thanks to Daniel for the merge resolution.

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-05-07 23:35:08 -04:00
commit 01adc4851a
107 changed files with 8853 additions and 2714 deletions

View file

@ -8,6 +8,9 @@ obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
ifeq ($(CONFIG_XDP_SOCKETS),y)
obj-$(CONFIG_BPF_SYSCALL) += xskmap.o
endif
obj-$(CONFIG_BPF_SYSCALL) += offload.o
ifeq ($(CONFIG_STREAM_PARSER),y)
ifeq ($(CONFIG_INET),y)

View file

@ -31,6 +31,7 @@
#include <linux/rbtree_latch.h>
#include <linux/kallsyms.h>
#include <linux/rcupdate.h>
#include <linux/perf_event.h>
#include <asm/unaligned.h>
@ -633,23 +634,6 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
break;
case BPF_LD | BPF_ABS | BPF_W:
case BPF_LD | BPF_ABS | BPF_H:
case BPF_LD | BPF_ABS | BPF_B:
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
break;
case BPF_LD | BPF_IND | BPF_W:
case BPF_LD | BPF_IND | BPF_H:
case BPF_LD | BPF_IND | BPF_B:
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
*to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg);
*to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0);
break;
case BPF_LD | BPF_IMM | BPF_DW:
*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
@ -890,14 +874,7 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
INSN_3(LDX, MEM, W), \
INSN_3(LDX, MEM, DW), \
/* Immediate based. */ \
INSN_3(LD, IMM, DW), \
/* Misc (old cBPF carry-over). */ \
INSN_3(LD, ABS, B), \
INSN_3(LD, ABS, H), \
INSN_3(LD, ABS, W), \
INSN_3(LD, IND, B), \
INSN_3(LD, IND, H), \
INSN_3(LD, IND, W)
INSN_3(LD, IMM, DW)
bool bpf_opcode_in_insntable(u8 code)
{
@ -907,6 +884,13 @@ bool bpf_opcode_in_insntable(u8 code)
[0 ... 255] = false,
/* Now overwrite non-defaults ... */
BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
/* UAPI exposed, but rewritten opcodes. cBPF carry-over. */
[BPF_LD | BPF_ABS | BPF_B] = true,
[BPF_LD | BPF_ABS | BPF_H] = true,
[BPF_LD | BPF_ABS | BPF_W] = true,
[BPF_LD | BPF_IND | BPF_B] = true,
[BPF_LD | BPF_IND | BPF_H] = true,
[BPF_LD | BPF_IND | BPF_W] = true,
};
#undef BPF_INSN_3_TBL
#undef BPF_INSN_2_TBL
@ -937,8 +921,6 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
#undef BPF_INSN_3_LBL
#undef BPF_INSN_2_LBL
u32 tail_call_cnt = 0;
void *ptr;
int off;
#define CONT ({ insn++; goto select_insn; })
#define CONT_JMP ({ insn++; goto select_insn; })
@ -1265,67 +1247,6 @@ out:
atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
(DST + insn->off));
CONT;
LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
off = IMM;
load_word:
/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
* appearing in the programs where ctx == skb
* (see may_access_skb() in the verifier). All programs
* keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6,
* bpf_convert_filter() saves it in BPF_R6, internal BPF
* verifier will check that BPF_R6 == ctx.
*
* BPF_ABS and BPF_IND are wrappers of function calls,
* so they scratch BPF_R1-BPF_R5 registers, preserve
* BPF_R6-BPF_R9, and store return value into BPF_R0.
*
* Implicit input:
* ctx == skb == BPF_R6 == CTX
*
* Explicit input:
* SRC == any register
* IMM == 32-bit immediate
*
* Output:
* BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
*/
ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = get_unaligned_be32(ptr);
CONT;
}
return 0;
LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
off = IMM;
load_half:
ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = get_unaligned_be16(ptr);
CONT;
}
return 0;
LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
off = IMM;
load_byte:
ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
if (likely(ptr != NULL)) {
BPF_R0 = *(u8 *)ptr;
CONT;
}
return 0;
LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
off = IMM + SRC;
goto load_word;
LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
off = IMM + SRC;
goto load_half;
LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
off = IMM + SRC;
goto load_byte;
default_label:
/* If we ever reach this, we have a bug somewhere. Die hard here
@ -1722,6 +1643,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
aux = container_of(work, struct bpf_prog_aux, work);
if (bpf_prog_is_dev_bound(aux))
bpf_prog_offload_destroy(aux->prog);
#ifdef CONFIG_PERF_EVENTS
if (aux->prog->has_callchain_buf)
put_callchain_buffers();
#endif
for (i = 0; i < aux->func_cnt; i++)
bpf_jit_free(aux->func[i]);
if (aux->func_cnt) {
@ -1794,6 +1719,7 @@ bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
{
return -ENOTSUPP;
}
EXPORT_SYMBOL_GPL(bpf_event_output);
/* Always built-in helper functions. */
const struct bpf_func_proto bpf_tail_call_proto = {
@ -1840,9 +1766,3 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
#include <linux/bpf_trace.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
/* These are only used within the BPF_SYSCALL code */
#ifdef CONFIG_BPF_SYSCALL
EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);
#endif

View file

@ -429,13 +429,6 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
ret = bpf_obj_do_pin(pname, raw, type);
if (ret != 0)
bpf_any_put(raw, type);
if ((trace_bpf_obj_pin_prog_enabled() ||
trace_bpf_obj_pin_map_enabled()) && !ret) {
if (type == BPF_TYPE_PROG)
trace_bpf_obj_pin_prog(raw, ufd, pname);
if (type == BPF_TYPE_MAP)
trace_bpf_obj_pin_map(raw, ufd, pname);
}
out:
putname(pname);
return ret;
@ -502,15 +495,8 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
else
goto out;
if (ret < 0) {
if (ret < 0)
bpf_any_put(raw, type);
} else if (trace_bpf_obj_get_prog_enabled() ||
trace_bpf_obj_get_map_enabled()) {
if (type == BPF_TYPE_PROG)
trace_bpf_obj_get_prog(raw, ret, pname);
if (type == BPF_TYPE_MAP)
trace_bpf_obj_get_map(raw, ret, pname);
}
out:
putname(pname);
return ret;

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017 Netronome Systems, Inc.
* Copyright (C) 2017-2018 Netronome Systems, Inc.
*
* This software is licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
@ -474,8 +474,10 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
struct bpf_prog_offload *offload;
bool ret;
if (!bpf_prog_is_dev_bound(prog->aux) || !bpf_map_is_dev_bound(map))
if (!bpf_prog_is_dev_bound(prog->aux))
return false;
if (!bpf_map_is_dev_bound(map))
return bpf_map_offload_neutral(map);
down_read(&bpf_devs_lock);
offload = prog->aux->offload;

View file

@ -262,16 +262,11 @@ out:
return ret;
}
static void stack_map_get_build_id_offset(struct bpf_map *map,
struct stack_map_bucket *bucket,
static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
u64 *ips, u32 trace_nr, bool user)
{
int i;
struct vm_area_struct *vma;
struct bpf_stack_build_id *id_offs;
bucket->nr = trace_nr;
id_offs = (struct bpf_stack_build_id *)bucket->data;
/*
* We cannot do up_read() in nmi context, so build_id lookup is
@ -361,8 +356,10 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
pcpu_freelist_pop(&smap->freelist);
if (unlikely(!new_bucket))
return -ENOMEM;
stack_map_get_build_id_offset(map, new_bucket, ips,
trace_nr, user);
new_bucket->nr = trace_nr;
stack_map_get_build_id_offset(
(struct bpf_stack_build_id *)new_bucket->data,
ips, trace_nr, user);
trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
if (hash_matches && bucket->nr == trace_nr &&
memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
@ -405,6 +402,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
u64, flags)
{
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
bool user = flags & BPF_F_USER_STACK;
struct perf_callchain_entry *trace;
bool kernel = !user;
int err = -EINVAL;
u64 *ips;
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
BPF_F_USER_BUILD_ID)))
goto clear;
if (kernel && user_build_id)
goto clear;
elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
: sizeof(u64);
if (unlikely(size % elem_size))
goto clear;
num_elem = size / elem_size;
if (sysctl_perf_event_max_stack < num_elem)
init_nr = 0;
else
init_nr = sysctl_perf_event_max_stack - num_elem;
trace = get_perf_callchain(regs, init_nr, kernel, user,
sysctl_perf_event_max_stack, false, false);
if (unlikely(!trace))
goto err_fault;
trace_nr = trace->nr - init_nr;
if (trace_nr < skip)
goto err_fault;
trace_nr -= skip;
trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
copy_len = trace_nr * elem_size;
ips = trace->ip + skip + init_nr;
if (user && user_build_id)
stack_map_get_build_id_offset(buf, ips, trace_nr, user);
else
memcpy(buf, ips, copy_len);
if (size > copy_len)
memset(buf + copy_len, 0, size - copy_len);
return copy_len;
err_fault:
err = -EFAULT;
clear:
memset(buf, 0, size);
return err;
}
const struct bpf_func_proto bpf_get_stack_proto = {
.func = bpf_get_stack,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
/* Called from eBPF program */
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
{

View file

@ -282,6 +282,7 @@ void bpf_map_put(struct bpf_map *map)
{
__bpf_map_put(map, true);
}
EXPORT_SYMBOL_GPL(bpf_map_put);
void bpf_map_put_with_uref(struct bpf_map *map)
{
@ -503,7 +504,6 @@ static int map_create(union bpf_attr *attr)
return err;
}
trace_bpf_map_create(map, err);
return err;
free_map:
@ -544,6 +544,7 @@ struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
atomic_inc(&map->usercnt);
return map;
}
EXPORT_SYMBOL_GPL(bpf_map_inc);
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
{
@ -663,7 +664,6 @@ static int map_lookup_elem(union bpf_attr *attr)
if (copy_to_user(uvalue, value, value_size) != 0)
goto free_value;
trace_bpf_map_lookup_elem(map, ufd, key, value);
err = 0;
free_value:
@ -760,8 +760,6 @@ static int map_update_elem(union bpf_attr *attr)
__this_cpu_dec(bpf_prog_active);
preempt_enable();
out:
if (!err)
trace_bpf_map_update_elem(map, ufd, key, value);
free_value:
kfree(value);
free_key:
@ -814,8 +812,6 @@ static int map_delete_elem(union bpf_attr *attr)
__this_cpu_dec(bpf_prog_active);
preempt_enable();
out:
if (!err)
trace_bpf_map_delete_elem(map, ufd, key);
kfree(key);
err_put:
fdput(f);
@ -879,7 +875,6 @@ out:
if (copy_to_user(unext_key, next_key, map->key_size) != 0)
goto free_next_key;
trace_bpf_map_next_key(map, ufd, key, next_key);
err = 0;
free_next_key:
@ -1027,7 +1022,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
if (atomic_dec_and_test(&prog->aux->refcnt)) {
int i;
trace_bpf_prog_put_rcu(prog);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
@ -1194,11 +1188,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
bool attach_drv)
{
struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv);
if (!IS_ERR(prog))
trace_bpf_prog_get_type(prog);
return prog;
return __bpf_prog_get(ufd, &type, attach_drv);
}
EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
@ -1373,7 +1363,6 @@ static int bpf_prog_load(union bpf_attr *attr)
}
bpf_prog_kallsyms_add(prog);
trace_bpf_prog_load(prog, err);
return err;
free_used_maps:

View file

@ -43,6 +43,16 @@ struct tnum tnum_rshift(struct tnum a, u8 shift)
return TNUM(a.value >> shift, a.mask >> shift);
}
struct tnum tnum_arshift(struct tnum a, u8 min_shift)
{
/* if a.value is negative, arithmetic shifting by minimum shift
* will have larger negative offset compared to more shifting.
* If a.value is nonnegative, arithmetic shifting by minimum shift
* will have larger positive offset compare to more shifting.
*/
return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift);
}
struct tnum tnum_add(struct tnum a, struct tnum b)
{
u64 sm, sv, sigma, chi, mu;

View file

@ -22,6 +22,7 @@
#include <linux/stringify.h>
#include <linux/bsearch.h>
#include <linux/sort.h>
#include <linux/perf_event.h>
#include "disasm.h"
@ -164,6 +165,8 @@ struct bpf_call_arg_meta {
bool pkt_access;
int regno;
int access_size;
s64 msize_smax_value;
u64 msize_umax_value;
};
static DEFINE_MUTEX(bpf_verifier_lock);
@ -738,18 +741,19 @@ enum reg_arg_type {
static int cmp_subprogs(const void *a, const void *b)
{
return *(int *)a - *(int *)b;
return ((struct bpf_subprog_info *)a)->start -
((struct bpf_subprog_info *)b)->start;
}
static int find_subprog(struct bpf_verifier_env *env, int off)
{
u32 *p;
struct bpf_subprog_info *p;
p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
sizeof(env->subprog_starts[0]), cmp_subprogs);
p = bsearch(&off, env->subprog_info, env->subprog_cnt,
sizeof(env->subprog_info[0]), cmp_subprogs);
if (!p)
return -ENOENT;
return p - env->subprog_starts;
return p - env->subprog_info;
}
@ -769,18 +773,24 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
verbose(env, "too many subprograms\n");
return -E2BIG;
}
env->subprog_starts[env->subprog_cnt++] = off;
sort(env->subprog_starts, env->subprog_cnt,
sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
env->subprog_info[env->subprog_cnt++].start = off;
sort(env->subprog_info, env->subprog_cnt,
sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
return 0;
}
static int check_subprogs(struct bpf_verifier_env *env)
{
int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
/* Add entry function. */
ret = add_subprog(env, 0);
if (ret < 0)
return ret;
/* determine subprog starts. The end is one before the next starts */
for (i = 0; i < insn_cnt; i++) {
if (insn[i].code != (BPF_JMP | BPF_CALL))
@ -800,16 +810,18 @@ static int check_subprogs(struct bpf_verifier_env *env)
return ret;
}
/* Add a fake 'exit' subprog which could simplify subprog iteration
* logic. 'subprog_cnt' should not be increased.
*/
subprog[env->subprog_cnt].start = insn_cnt;
if (env->log.level > 1)
for (i = 0; i < env->subprog_cnt; i++)
verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
verbose(env, "func#%d @%d\n", i, subprog[i].start);
/* now check that all jumps are within the same subprog */
subprog_start = 0;
if (env->subprog_cnt == cur_subprog)
subprog_end = insn_cnt;
else
subprog_end = env->subprog_starts[cur_subprog++];
subprog_start = subprog[cur_subprog].start;
subprog_end = subprog[cur_subprog + 1].start;
for (i = 0; i < insn_cnt; i++) {
u8 code = insn[i].code;
@ -834,10 +846,9 @@ next:
return -EINVAL;
}
subprog_start = subprog_end;
if (env->subprog_cnt == cur_subprog)
subprog_end = insn_cnt;
else
subprog_end = env->subprog_starts[cur_subprog++];
cur_subprog++;
if (cur_subprog < env->subprog_cnt)
subprog_end = subprog[cur_subprog + 1].start;
}
}
return 0;
@ -1470,13 +1481,13 @@ static int update_stack_depth(struct bpf_verifier_env *env,
const struct bpf_func_state *func,
int off)
{
u16 stack = env->subprog_stack_depth[func->subprogno];
u16 stack = env->subprog_info[func->subprogno].stack_depth;
if (stack >= -off)
return 0;
/* update known max for given subprogram */
env->subprog_stack_depth[func->subprogno] = -off;
env->subprog_info[func->subprogno].stack_depth = -off;
return 0;
}
@ -1488,9 +1499,9 @@ static int update_stack_depth(struct bpf_verifier_env *env,
*/
static int check_max_stack_depth(struct bpf_verifier_env *env)
{
int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end;
int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
int ret_insn[MAX_CALL_FRAMES];
int ret_prog[MAX_CALL_FRAMES];
@ -1498,17 +1509,14 @@ process_func:
/* round up to 32-bytes, since this is granularity
* of interpreter stack size
*/
depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
if (depth > MAX_BPF_STACK) {
verbose(env, "combined stack size of %d calls is %d. Too large\n",
frame + 1, depth);
return -EACCES;
}
continue_func:
if (env->subprog_cnt == subprog)
subprog_end = insn_cnt;
else
subprog_end = env->subprog_starts[subprog];
subprog_end = subprog[idx + 1].start;
for (; i < subprog_end; i++) {
if (insn[i].code != (BPF_JMP | BPF_CALL))
continue;
@ -1516,17 +1524,16 @@ continue_func:
continue;
/* remember insn and function to return to */
ret_insn[frame] = i + 1;
ret_prog[frame] = subprog;
ret_prog[frame] = idx;
/* find the callee */
i = i + insn[i].imm + 1;
subprog = find_subprog(env, i);
if (subprog < 0) {
idx = find_subprog(env, i);
if (idx < 0) {
WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
i);
return -EFAULT;
}
subprog++;
frame++;
if (frame >= MAX_CALL_FRAMES) {
WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
@ -1539,10 +1546,10 @@ continue_func:
*/
if (frame == 0)
return 0;
depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
frame--;
i = ret_insn[frame];
subprog = ret_prog[frame];
idx = ret_prog[frame];
goto continue_func;
}
@ -1558,8 +1565,7 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
start);
return -EFAULT;
}
subprog++;
return env->subprog_stack_depth[subprog];
return env->subprog_info[subprog].stack_depth;
}
#endif
@ -1984,6 +1990,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
/* remember the mem_size which may be used later
* to refine return values.
*/
meta->msize_smax_value = reg->smax_value;
meta->msize_umax_value = reg->umax_value;
/* The register is SCALAR_VALUE; the access check
* happens using its boundaries.
*/
@ -2061,8 +2073,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (func_id != BPF_FUNC_redirect_map)
goto error;
break;
/* Restrict bpf side of cpumap, open when use-cases appear */
/* Restrict bpf side of cpumap and xskmap, open when use-cases
* appear.
*/
case BPF_MAP_TYPE_CPUMAP:
case BPF_MAP_TYPE_XSKMAP:
if (func_id != BPF_FUNC_redirect_map)
goto error;
break;
@ -2087,7 +2102,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_tail_call:
if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
goto error;
if (env->subprog_cnt) {
if (env->subprog_cnt > 1) {
verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
return -EINVAL;
}
@ -2109,7 +2124,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
break;
case BPF_FUNC_redirect_map:
if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
map->map_type != BPF_MAP_TYPE_CPUMAP)
map->map_type != BPF_MAP_TYPE_CPUMAP &&
map->map_type != BPF_MAP_TYPE_XSKMAP)
goto error;
break;
case BPF_FUNC_sk_redirect_map:
@ -2259,7 +2275,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* remember the callsite, it will be used by bpf_exit */
*insn_idx /* callsite */,
state->curframe + 1 /* frameno within this callchain */,
subprog + 1 /* subprog number within this prog */);
subprog /* subprog number within this prog */);
/* copy r1 - r5 args that callee can access */
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
@ -2323,6 +2339,23 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
return 0;
}
static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
int func_id,
struct bpf_call_arg_meta *meta)
{
struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
if (ret_type != RET_INTEGER ||
(func_id != BPF_FUNC_get_stack &&
func_id != BPF_FUNC_probe_read_str))
return;
ret_reg->smax_value = meta->msize_smax_value;
ret_reg->umax_value = meta->msize_umax_value;
__reg_deduce_bounds(ret_reg);
__reg_bound_offset(ret_reg);
}
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{
const struct bpf_func_proto *fn = NULL;
@ -2446,10 +2479,30 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
err = check_map_func_compatibility(env, meta.map_ptr, func_id);
if (err)
return err;
if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
const char *err_str;
#ifdef CONFIG_PERF_EVENTS
err = get_callchain_buffers(sysctl_perf_event_max_stack);
err_str = "cannot get callchain buffer for func %s#%d\n";
#else
err = -ENOTSUPP;
err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
#endif
if (err) {
verbose(env, err_str, func_id_name(func_id), func_id);
return err;
}
env->prog->has_callchain_buf = true;
}
if (changes_data)
clear_all_pkt_pointers(env);
return 0;
@ -2894,10 +2947,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
dst_reg->umin_value <<= umin_val;
dst_reg->umax_value <<= umax_val;
}
if (src_known)
dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
else
dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val);
dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
/* We may learn something more from the var_off */
__update_reg_bounds(dst_reg);
break;
@ -2925,16 +2975,35 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
*/
dst_reg->smin_value = S64_MIN;
dst_reg->smax_value = S64_MAX;
if (src_known)
dst_reg->var_off = tnum_rshift(dst_reg->var_off,
umin_val);
else
dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val);
dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
dst_reg->umin_value >>= umax_val;
dst_reg->umax_value >>= umin_val;
/* We may learn something more from the var_off */
__update_reg_bounds(dst_reg);
break;
case BPF_ARSH:
if (umax_val >= insn_bitness) {
/* Shifts greater than 31 or 63 are undefined.
* This includes shifts by a negative number.
*/
mark_reg_unknown(env, regs, insn->dst_reg);
break;
}
/* Upon reaching here, src_known is true and
* umax_val is equal to umin_val.
*/
dst_reg->smin_value >>= umin_val;
dst_reg->smax_value >>= umin_val;
dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
/* blow away the dst_reg umin_value/umax_value and rely on
* dst_reg var_off to refine the result.
*/
dst_reg->umin_value = 0;
dst_reg->umax_value = U64_MAX;
__update_reg_bounds(dst_reg);
break;
default:
mark_reg_unknown(env, regs, insn->dst_reg);
break;
@ -3818,7 +3887,12 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL;
}
if (env->subprog_cnt) {
if (!env->ops->gen_ld_abs) {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
}
if (env->subprog_cnt > 1) {
/* when program has LD_ABS insn JITs and interpreter assume
* that r1 == ctx == skb which is not the case for callees
* that can have arbitrary arguments. It's problematic
@ -4849,15 +4923,15 @@ process_bpf_exit:
verbose(env, "processed %d insns (limit %d), stack depth ",
insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
for (i = 0; i < env->subprog_cnt + 1; i++) {
u32 depth = env->subprog_stack_depth[i];
for (i = 0; i < env->subprog_cnt; i++) {
u32 depth = env->subprog_info[i].stack_depth;
verbose(env, "%d", depth);
if (i + 1 < env->subprog_cnt + 1)
if (i + 1 < env->subprog_cnt)
verbose(env, "+");
}
verbose(env, "\n");
env->prog->aux->stack_depth = env->subprog_stack_depth[0];
env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
return 0;
}
@ -4981,7 +5055,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
/* hold the map. If the program is rejected by verifier,
* the map will be released by release_maps() or it
* will be used by the valid program until it's unloaded
* and all maps are released in free_bpf_prog_info()
* and all maps are released in free_used_maps()
*/
map = bpf_map_inc(map, false);
if (IS_ERR(map)) {
@ -5063,10 +5137,11 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
if (len == 1)
return;
for (i = 0; i < env->subprog_cnt; i++) {
if (env->subprog_starts[i] < off)
/* NOTE: fake 'exit' subprog should be updated as well. */
for (i = 0; i <= env->subprog_cnt; i++) {
if (env->subprog_info[i].start < off)
continue;
env->subprog_starts[i] += len - 1;
env->subprog_info[i].start += len - 1;
}
}
@ -5230,7 +5305,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
void *old_bpf_func;
int err = -ENOMEM;
if (env->subprog_cnt == 0)
if (env->subprog_cnt <= 1)
return 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
@ -5246,7 +5321,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
/* temporarily remember subprog id inside insn instead of
* aux_data, since next loop will split up all insns into funcs
*/
insn->off = subprog + 1;
insn->off = subprog;
/* remember original imm in case JIT fails and fallback
* to interpreter will be needed
*/
@ -5255,16 +5330,13 @@ static int jit_subprogs(struct bpf_verifier_env *env)
insn->imm = 1;
}
func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
func = kzalloc(sizeof(prog) * env->subprog_cnt, GFP_KERNEL);
if (!func)
return -ENOMEM;
for (i = 0; i <= env->subprog_cnt; i++) {
for (i = 0; i < env->subprog_cnt; i++) {
subprog_start = subprog_end;
if (env->subprog_cnt == i)
subprog_end = prog->len;
else
subprog_end = env->subprog_starts[i];
subprog_end = env->subprog_info[i + 1].start;
len = subprog_end - subprog_start;
func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
@ -5281,7 +5353,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* Long term would need debug info to populate names
*/
func[i]->aux->name[0] = 'F';
func[i]->aux->stack_depth = env->subprog_stack_depth[i];
func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
func[i]->jit_requested = 1;
func[i] = bpf_int_jit_compile(func[i]);
if (!func[i]->jited) {
@ -5294,7 +5366,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* now populate all bpf_calls with correct addresses and
* run last pass of JIT
*/
for (i = 0; i <= env->subprog_cnt; i++) {
for (i = 0; i < env->subprog_cnt; i++) {
insn = func[i]->insnsi;
for (j = 0; j < func[i]->len; j++, insn++) {
if (insn->code != (BPF_JMP | BPF_CALL) ||
@ -5307,7 +5379,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
__bpf_call_base;
}
}
for (i = 0; i <= env->subprog_cnt; i++) {
for (i = 0; i < env->subprog_cnt; i++) {
old_bpf_func = func[i]->bpf_func;
tmp = bpf_int_jit_compile(func[i]);
if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
@ -5321,7 +5393,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
/* finally lock prog and jit images for all functions and
* populate kallsysm
*/
for (i = 0; i <= env->subprog_cnt; i++) {
for (i = 0; i < env->subprog_cnt; i++) {
bpf_prog_lock_ro(func[i]);
bpf_prog_kallsyms_add(func[i]);
}
@ -5338,7 +5410,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
continue;
insn->off = env->insn_aux_data[i].call_imm;
subprog = find_subprog(env, i + insn->off + 1);
addr = (unsigned long)func[subprog + 1]->bpf_func;
addr = (unsigned long)func[subprog]->bpf_func;
addr &= PAGE_MASK;
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
addr - __bpf_call_base;
@ -5347,10 +5419,10 @@ static int jit_subprogs(struct bpf_verifier_env *env)
prog->jited = 1;
prog->bpf_func = func[0]->bpf_func;
prog->aux->func = func;
prog->aux->func_cnt = env->subprog_cnt + 1;
prog->aux->func_cnt = env->subprog_cnt;
return 0;
out_free:
for (i = 0; i <= env->subprog_cnt; i++)
for (i = 0; i < env->subprog_cnt; i++)
if (func[i])
bpf_jit_free(func[i]);
kfree(func);
@ -5453,6 +5525,25 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
continue;
}
if (BPF_CLASS(insn->code) == BPF_LD &&
(BPF_MODE(insn->code) == BPF_ABS ||
BPF_MODE(insn->code) == BPF_IND)) {
cnt = env->ops->gen_ld_abs(insn, insn_buf);
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
}
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
continue;
}
if (insn->code != (BPF_JMP | BPF_CALL))
continue;
if (insn->src_reg == BPF_PSEUDO_CALL)
@ -5650,16 +5741,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
env->strict_alignment = true;
if (bpf_prog_is_dev_bound(env->prog->aux)) {
ret = bpf_prog_offload_verifier_prep(env);
if (ret)
goto err_unlock;
}
ret = replace_map_fd_with_map_ptr(env);
if (ret < 0)
goto skip_full_check;
if (bpf_prog_is_dev_bound(env->prog->aux)) {
ret = bpf_prog_offload_verifier_prep(env);
if (ret)
goto skip_full_check;
}
env->explored_states = kcalloc(env->prog->len,
sizeof(struct bpf_verifier_state_list *),
GFP_USER);
@ -5730,7 +5821,7 @@ skip_full_check:
err_release_maps:
if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
* them now. Otherwise free_bpf_prog_info() will release them.
* them now. Otherwise free_used_maps() will release them.
*/
release_maps(env);
*prog = env->prog;

241
kernel/bpf/xskmap.c Normal file
View file

@ -0,0 +1,241 @@
// SPDX-License-Identifier: GPL-2.0
/* XSKMAP used for AF_XDP sockets
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/bpf.h>
#include <linux/capability.h>
#include <net/xdp_sock.h>
#include <linux/slab.h>
#include <linux/sched.h>
struct xsk_map {
struct bpf_map map;
struct xdp_sock **xsk_map;
struct list_head __percpu *flush_list;
};
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
{
int cpu, err = -EINVAL;
struct xsk_map *m;
u64 cost;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 ||
attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
return ERR_PTR(-EINVAL);
m = kzalloc(sizeof(*m), GFP_USER);
if (!m)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&m->map, attr);
cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
cost += sizeof(struct list_head) * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
goto free_m;
m->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
/* Notice returns -EPERM on if map size is larger than memlock limit */
err = bpf_map_precharge_memlock(m->map.pages);
if (err)
goto free_m;
err = -ENOMEM;
m->flush_list = alloc_percpu(struct list_head);
if (!m->flush_list)
goto free_m;
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
sizeof(struct xdp_sock *),
m->map.numa_node);
if (!m->xsk_map)
goto free_percpu;
return &m->map;
free_percpu:
free_percpu(m->flush_list);
free_m:
kfree(m);
return ERR_PTR(err);
}
static void xsk_map_free(struct bpf_map *map)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
int i;
synchronize_net();
for (i = 0; i < map->max_entries; i++) {
struct xdp_sock *xs;
xs = m->xsk_map[i];
if (!xs)
continue;
sock_put((struct sock *)xs);
}
free_percpu(m->flush_list);
bpf_map_area_free(m->xsk_map);
kfree(m);
}
static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
u32 index = key ? *(u32 *)key : U32_MAX;
u32 *next = next_key;
if (index >= m->map.max_entries) {
*next = 0;
return 0;
}
if (index == m->map.max_entries - 1)
return -ENOENT;
*next = index + 1;
return 0;
}
struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct xdp_sock *xs;
if (key >= map->max_entries)
return NULL;
xs = READ_ONCE(m->xsk_map[key]);
return xs;
}
int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
struct xdp_sock *xs)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
int err;
err = xsk_rcv(xs, xdp);
if (err)
return err;
if (!xs->flush_node.prev)
list_add(&xs->flush_node, flush_list);
return 0;
}
void __xsk_map_flush(struct bpf_map *map)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct list_head *flush_list = this_cpu_ptr(m->flush_list);
struct xdp_sock *xs, *tmp;
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
xsk_flush(xs);
__list_del(xs->flush_node.prev, xs->flush_node.next);
xs->flush_node.prev = NULL;
}
}
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
{
return NULL;
}
static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
u32 i = *(u32 *)key, fd = *(u32 *)value;
struct xdp_sock *xs, *old_xs;
struct socket *sock;
int err;
if (unlikely(map_flags > BPF_EXIST))
return -EINVAL;
if (unlikely(i >= m->map.max_entries))
return -E2BIG;
if (unlikely(map_flags == BPF_NOEXIST))
return -EEXIST;
sock = sockfd_lookup(fd, &err);
if (!sock)
return err;
if (sock->sk->sk_family != PF_XDP) {
sockfd_put(sock);
return -EOPNOTSUPP;
}
xs = (struct xdp_sock *)sock->sk;
if (!xsk_is_setup_for_bpf_map(xs)) {
sockfd_put(sock);
return -EOPNOTSUPP;
}
sock_hold(sock->sk);
old_xs = xchg(&m->xsk_map[i], xs);
if (old_xs) {
/* Make sure we've flushed everything. */
synchronize_net();
sock_put((struct sock *)old_xs);
}
sockfd_put(sock);
return 0;
}
static int xsk_map_delete_elem(struct bpf_map *map, void *key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
struct xdp_sock *old_xs;
int k = *(u32 *)key;
if (k >= map->max_entries)
return -EINVAL;
old_xs = xchg(&m->xsk_map[k], NULL);
if (old_xs) {
/* Make sure we've flushed everything. */
synchronize_net();
sock_put((struct sock *)old_xs);
}
return 0;
}
const struct bpf_map_ops xsk_map_ops = {
.map_alloc = xsk_map_alloc,
.map_free = xsk_map_free,
.map_get_next_key = xsk_map_get_next_key,
.map_lookup_elem = xsk_map_lookup_elem,
.map_update_elem = xsk_map_update_elem,
.map_delete_elem = xsk_map_delete_elem,
};