From 5964d1e4594eb1dbfc1e2a34ec89eb48f6b03e75 Mon Sep 17 00:00:00 2001 From: Li kunyu Date: Sat, 5 Aug 2023 01:59:29 +0800 Subject: [PATCH 01/19] bpf: bpf_struct_ops: Remove unnecessary initial values of variables err and tlinks is assigned first, so it does not need to initialize the assignment. Signed-off-by: Li kunyu Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230804175929.2867-1-kunyu@nfschina.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/bpf_struct_ops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 116a0ce378ec..eaff04eefb31 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -374,9 +374,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, struct bpf_struct_ops_value *uvalue, *kvalue; const struct btf_member *member; const struct btf_type *t = st_ops->type; - struct bpf_tramp_links *tlinks = NULL; + struct bpf_tramp_links *tlinks; void *udata, *kdata; - int prog_fd, err = 0; + int prog_fd, err; void *image, *image_end; u32 i; @@ -815,7 +815,7 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map struct bpf_struct_ops_map *st_map, *old_st_map; struct bpf_map *old_map; struct bpf_struct_ops_link *st_link; - int err = 0; + int err; st_link = container_of(link, struct bpf_struct_ops_link, link); st_map = container_of(new_map, struct bpf_struct_ops_map, map); From d210f9735e13e9b1ef6ffbb636ee051f615bd109 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Aug 2023 15:11:11 +0200 Subject: [PATCH 02/19] bpf: Fix mprog detachment for empty mprog entry syzbot reported an UBSAN array-index-out-of-bounds access in bpf_mprog_read() upon bpf_mprog_detach(). While it did not have a reproducer, I was able to manually reproduce through an empty mprog entry which just has miniq present. The latter is important given otherwise we get an ENOENT error as tcx detaches the whole mprog entry. The index 4294967295 was triggered via NULL dtuple.prog which then attempts to detach from the back. bpf_mprog_fetch() in this case did hit the idx == total and therefore tried to grab the entry at idx -1. Fix it by adding an explicit bpf_mprog_total() check in bpf_mprog_detach() and bail out early with ENOENT. Fixes: 053c8e1f235d ("bpf: Add generic attach/detach/query API for multi-progs") Reported-by: syzbot+0c06ba0f831fe07a8f27@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20230804131112.11012-1-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- kernel/bpf/mprog.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/mprog.c b/kernel/bpf/mprog.c index f7816d2bc3e4..32d2c4829eb8 100644 --- a/kernel/bpf/mprog.c +++ b/kernel/bpf/mprog.c @@ -337,6 +337,8 @@ int bpf_mprog_detach(struct bpf_mprog_entry *entry, return -EINVAL; if (revision && revision != bpf_mprog_revision(entry)) return -ESTALE; + if (!bpf_mprog_total(entry)) + return -ENOENT; ret = bpf_mprog_tuple_relative(&rtuple, id_or_fd, flags, prog ? prog->type : BPF_PROG_TYPE_UNSPEC); From 21ce6abe178a15a0354eaaf86bea07e302075a20 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Aug 2023 15:11:12 +0200 Subject: [PATCH 03/19] selftests/bpf: Add test for detachment on empty mprog entry Add a detachment test case with miniq present to assert that with and without the miniq we get the same error. # ./test_progs -t tc_opts #244 tc_opts_after:OK #245 tc_opts_append:OK #246 tc_opts_basic:OK #247 tc_opts_before:OK #248 tc_opts_chain_classic:OK #249 tc_opts_delete_empty:OK #250 tc_opts_demixed:OK #251 tc_opts_detach:OK #252 tc_opts_detach_after:OK #253 tc_opts_detach_before:OK #254 tc_opts_dev_cleanup:OK #255 tc_opts_invalid:OK #256 tc_opts_mixed:OK #257 tc_opts_prepend:OK #258 tc_opts_replace:OK #259 tc_opts_revision:OK Summary: 16/0 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/r/20230804131112.11012-2-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/tc_opts.c | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index 7914100f9b46..39bd253e41aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -2237,3 +2237,34 @@ void serial_test_tc_opts_detach_after(void) test_tc_opts_detach_after_target(BPF_TCX_INGRESS); test_tc_opts_detach_after_target(BPF_TCX_EGRESS); } + +static void test_tc_opts_delete_empty(int target, bool chain_tc_old) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + int err; + + assert_mprog_count(target, 0); + if (chain_tc_old) { + tc_hook.attach_point = target == BPF_TCX_INGRESS ? + BPF_TC_INGRESS : BPF_TC_EGRESS; + err = bpf_tc_hook_create(&tc_hook); + ASSERT_OK(err, "bpf_tc_hook_create"); + __assert_mprog_count(target, 0, true, loopback); + } + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_EQ(err, -ENOENT, "prog_detach"); + if (chain_tc_old) { + tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&tc_hook); + } + assert_mprog_count(target, 0); +} + +void serial_test_tc_opts_delete_empty(void) +{ + test_tc_opts_delete_empty(BPF_TCX_INGRESS, false); + test_tc_opts_delete_empty(BPF_TCX_EGRESS, false); + test_tc_opts_delete_empty(BPF_TCX_INGRESS, true); + test_tc_opts_delete_empty(BPF_TCX_EGRESS, true); +} From 9eab71bd887ae28882c29a82cb0cdb00e1654c54 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 4 Aug 2023 09:58:31 -0700 Subject: [PATCH 04/19] selftests/bpf: fix the incorrect verification of port numbers. Check port numbers before calling htons(). According to Dan Carpenter's report, Smatch identified incorrect port number checks. It is expected that the returned port number is an integer, with negative numbers indicating errors. However, the value was mistakenly verified after being translated by htons(). Major changes from v1: - Move the variable 'port' to the same line of 'err'. Fixes: 539c7e67aa4a ("selftests/bpf: Verify that the cgroup_skb filters receive expected packets.") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/bpf/cafd6585-d5a2-4096-b94f-7556f5aa7737@moroto.mountain/ Acked-by: Yonghong Song Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20230804165831.173627-1-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c index 95bab61a1e57..d686ef19f705 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c @@ -110,11 +110,12 @@ static int connect_client_server_v6(int client_fd, int listen_fd) .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_LOOPBACK_INIT, }; - int err; + int err, port; - addr.sin6_port = htons(get_sock_port_v6(listen_fd)); - if (addr.sin6_port < 0) + port = get_sock_port_v6(listen_fd); + if (port < 0) return -1; + addr.sin6_port = htons(port); err = connect(client_fd, (struct sockaddr *)&addr, sizeof(addr)); if (err < 0) { From 8a60a041eada0fbfdc7b6b7a10fdf68ae6a840ce Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 3 Aug 2023 17:51:01 -0700 Subject: [PATCH 05/19] bpf: fix inconsistent return types of bpf_xdp_copy_buf(). Fix inconsistent return types in two implementations of bpf_xdp_copy_buf(). There are two implementations: one is an empty implementation whose return type does not match the actual implementation. Suggested-by: Alexei Starovoitov Signed-off-by: Kui-Feng Lee Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20230804005101.1534505-1-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/filter.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 2d6fe30bad5f..761af6b3cf2b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1572,10 +1572,9 @@ static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) return NULL; } -static inline void *bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, - unsigned long len, bool flush) +static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf, + unsigned long len, bool flush) { - return NULL; } #endif /* CONFIG_NET */ From 5426700e6841bf72e652e34b5cec68eadf442435 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 3 Aug 2023 16:12:06 -0700 Subject: [PATCH 06/19] bpf: fix bpf_dynptr_slice() to stop return an ERR_PTR. Verify if the pointer obtained from bpf_xdp_pointer() is either an error or NULL before returning it. The function bpf_dynptr_slice() mistakenly returned an ERR_PTR. Instead of solely checking for NULL, it should also verify if the pointer returned by bpf_xdp_pointer() is an error or NULL. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/bpf/d1360219-85c3-4a03-9449-253ea905f9d1@moroto.mountain/ Fixes: 66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr") Suggested-by: Alexei Starovoitov Signed-off-by: Kui-Feng Lee Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20230803231206.1060485-1-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 56ce5008aedd..eb91cae0612a 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2270,7 +2270,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset case BPF_DYNPTR_TYPE_XDP: { void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len); - if (xdp_ptr) + if (!IS_ERR_OR_NULL(xdp_ptr)) return xdp_ptr; if (!buffer__opt) From dde3979bb3451c820b540151f5903bfd2e814f60 Mon Sep 17 00:00:00 2001 From: Sergey Kacheev Date: Wed, 2 Aug 2023 21:22:14 +0300 Subject: [PATCH 07/19] libbpf: Use local includes inside the library In our monrepo, we try to minimize special processing when importing (aka vendor) third-party source code. Ideally, we try to import directly from the repositories with the code without changing it, we try to stick to the source code dependency instead of the artifact dependency. In the current situation, a patch has to be made for libbpf to fix the includes in bpf headers so that they work directly from libbpf/src. Signed-off-by: Sergey Kacheev Acked-by: Yonghong Song Link: https://lore.kernel.org/r/CAJVhQqUg6OKq6CpVJP5ng04Dg+z=igevPpmuxTqhsR3dKvd9+Q@mail.gmail.com Signed-off-by: Martin KaFai Lau --- tools/lib/bpf/bpf_tracing.h | 2 +- tools/lib/bpf/usdt.bpf.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index be076a4041ab..3803479dbe10 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -2,7 +2,7 @@ #ifndef __BPF_TRACING_H__ #define __BPF_TRACING_H__ -#include +#include "bpf_helpers.h" /* Scan the ARCH passed in from ARCH env variable (see Makefile) */ #if defined(__TARGET_ARCH_x86) diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 0bd4c135acc2..f6763300b26a 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -4,8 +4,8 @@ #define __USDT_BPF_H__ #include -#include -#include +#include "bpf_helpers.h" +#include "bpf_tracing.h" /* Below types and maps are internal implementation details of libbpf's USDT * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should From 1e8e2efb34023c5113ec679eae3c59ae2cd57b13 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 3 Aug 2023 10:31:28 +0800 Subject: [PATCH 08/19] bpf: change bpf_alu_sign_string and bpf_movsx_string to static The bpf_alu_sign_string and bpf_movsx_string introduced in commit f835bb622299 ("bpf: Add kernel/bpftool asm support for new instructions") are only used in disasm.c now, change them to static. Signed-off-by: Yang Yingliang Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308050615.wxAn1v2J-lkp@intel.com/ Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20230803023128.3753323-1-yangyingliang@huawei.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/disasm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index ef7c107c7b8f..49940c26a227 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -87,12 +87,12 @@ const char *const bpf_alu_string[16] = { [BPF_END >> 4] = "endian", }; -const char *const bpf_alu_sign_string[16] = { +static const char *const bpf_alu_sign_string[16] = { [BPF_DIV >> 4] = "s/=", [BPF_MOD >> 4] = "s%=", }; -const char *const bpf_movsx_string[4] = { +static const char *const bpf_movsx_string[4] = { [0] = "(s8)", [1] = "(s16)", [3] = "(s32)", From 2369e52657d371c58da0b826f8b87f01611cfc59 Mon Sep 17 00:00:00 2001 From: Will Hawkins Date: Mon, 7 Aug 2023 10:06:48 -0400 Subject: [PATCH 09/19] bpf, docs: Formalize type notation and function semantics in ISA standard Give a single place where the shorthand for types are defined and the semantics of helper functions are described. Signed-off-by: Will Hawkins Acked-by: David Vernet Link: https://lore.kernel.org/r/20230807140651.122484-1-hawkinsw@obs.cr Signed-off-by: Martin KaFai Lau --- .../bpf/standardization/instruction-set.rst | 82 +++++++++++++++++-- 1 file changed, 74 insertions(+), 8 deletions(-) diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst index 655494ac7af6..25be958130dc 100644 --- a/Documentation/bpf/standardization/instruction-set.rst +++ b/Documentation/bpf/standardization/instruction-set.rst @@ -10,9 +10,71 @@ This document specifies version 1.0 of the eBPF instruction set. Documentation conventions ========================= -For brevity, this document uses the type notion "u64", "u32", etc. -to mean an unsigned integer whose width is the specified number of bits, -and "s32", etc. to mean a signed integer of the specified number of bits. +For brevity and consistency, this document refers to families +of types using a shorthand syntax and refers to several expository, +mnemonic functions when describing the semantics of instructions. +The range of valid values for those types and the semantics of those +functions are defined in the following subsections. + +Types +----- +This document refers to integer types with the notation `SN` to specify +a type's signedness (`S`) and bit width (`N`), respectively. + +.. table:: Meaning of signedness notation. + + ==== ========= + `S` Meaning + ==== ========= + `u` unsigned + `s` signed + ==== ========= + +.. table:: Meaning of bit-width notation. + + ===== ========= + `N` Bit width + ===== ========= + `8` 8 bits + `16` 16 bits + `32` 32 bits + `64` 64 bits + `128` 128 bits + ===== ========= + +For example, `u32` is a type whose valid values are all the 32-bit unsigned +numbers and `s16` is a types whose valid values are all the 16-bit signed +numbers. + +Functions +--------- +* `htobe16`: Takes an unsigned 16-bit number in host-endian format and + returns the equivalent number as an unsigned 16-bit number in big-endian + format. +* `htobe32`: Takes an unsigned 32-bit number in host-endian format and + returns the equivalent number as an unsigned 32-bit number in big-endian + format. +* `htobe64`: Takes an unsigned 64-bit number in host-endian format and + returns the equivalent number as an unsigned 64-bit number in big-endian + format. +* `htole16`: Takes an unsigned 16-bit number in host-endian format and + returns the equivalent number as an unsigned 16-bit number in little-endian + format. +* `htole32`: Takes an unsigned 32-bit number in host-endian format and + returns the equivalent number as an unsigned 32-bit number in little-endian + format. +* `htole64`: Takes an unsigned 64-bit number in host-endian format and + returns the equivalent number as an unsigned 64-bit number in little-endian + format. +* `bswap16`: Takes an unsigned 16-bit number in either big- or little-endian + format and returns the equivalent number with the same bit width but + opposite endianness. +* `bswap32`: Takes an unsigned 32-bit number in either big- or little-endian + format and returns the equivalent number with the same bit width but + opposite endianness. +* `bswap64`: Takes an unsigned 64-bit number in either big- or little-endian + format and returns the equivalent number with the same bit width but + opposite endianness. Registers and calling convention ================================ @@ -252,19 +314,23 @@ are supported: 16, 32 and 64. Examples: -``BPF_ALU | BPF_TO_LE | BPF_END`` with imm = 16 means:: +``BPF_ALU | BPF_TO_LE | BPF_END`` with imm = 16/32/64 means:: dst = htole16(dst) + dst = htole32(dst) + dst = htole64(dst) -``BPF_ALU | BPF_TO_BE | BPF_END`` with imm = 64 means:: +``BPF_ALU | BPF_TO_BE | BPF_END`` with imm = 16/32/64 means:: + dst = htobe16(dst) + dst = htobe32(dst) dst = htobe64(dst) ``BPF_ALU64 | BPF_TO_LE | BPF_END`` with imm = 16/32/64 means:: - dst = bswap16 dst - dst = bswap32 dst - dst = bswap64 dst + dst = bswap16(dst) + dst = bswap32(dst) + dst = bswap64(dst) Jump instructions ----------------- From db2baf82b098aa10ac16f34e44732ec450fb11c7 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 7 Aug 2023 10:57:21 -0700 Subject: [PATCH 10/19] bpf: Fix an incorrect verification success with movsx insn syzbot reports a verifier bug which triggers a runtime panic. The test bpf program is: 0: (62) *(u32 *)(r10 -8) = 553656332 1: (bf) r1 = (s16)r10 2: (07) r1 += -8 3: (b7) r2 = 3 4: (bd) if r2 <= r1 goto pc+0 5: (85) call bpf_trace_printk#-138320 6: (b7) r0 = 0 7: (95) exit At insn 1, the current implementation keeps 'r1' as a frame pointer, which caused later bpf_trace_printk helper call crash since frame pointer address is not valid any more. Note that at insn 4, the 'pointer vs. scalar' comparison is allowed for privileged prog run. To fix the problem with above insn 1, the fix in the patch adopts similar pattern to existing 'R1 = (u32) R2' handling. For unprivileged prog run, verification will fail with 'R sign-extension part of pointer'. For privileged prog run, the dst_reg 'r1' will be marked as an unknown scalar, so later 'bpf_trace_pointk' helper will complain since it expected certain pointers. Reported-by: syzbot+d61b595e9205573133b3@syzkaller.appspotmail.com Fixes: 8100928c8814 ("bpf: Support new sign-extension mov insns") Signed-off-by: Yonghong Song Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20230807175721.671696-1-yonghong.song@linux.dev Signed-off-by: Martin KaFai Lau --- kernel/bpf/verifier.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 132f25dab931..4ccca1f6c998 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13165,17 +13165,26 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) dst_reg->subreg_def = DEF_NOT_SUBREG; } else { /* case: R1 = (s8, s16 s32)R2 */ - bool no_sext; + if (is_pointer_value(env, insn->src_reg)) { + verbose(env, + "R%d sign-extension part of pointer\n", + insn->src_reg); + return -EACCES; + } else if (src_reg->type == SCALAR_VALUE) { + bool no_sext; - no_sext = src_reg->umax_value < (1ULL << (insn->off - 1)); - if (no_sext && need_id) - src_reg->id = ++env->id_gen; - copy_register_state(dst_reg, src_reg); - if (!no_sext) - dst_reg->id = 0; - coerce_reg_to_size_sx(dst_reg, insn->off >> 3); - dst_reg->live |= REG_LIVE_WRITTEN; - dst_reg->subreg_def = DEF_NOT_SUBREG; + no_sext = src_reg->umax_value < (1ULL << (insn->off - 1)); + if (no_sext && need_id) + src_reg->id = ++env->id_gen; + copy_register_state(dst_reg, src_reg); + if (!no_sext) + dst_reg->id = 0; + coerce_reg_to_size_sx(dst_reg, insn->off >> 3); + dst_reg->live |= REG_LIVE_WRITTEN; + dst_reg->subreg_def = DEF_NOT_SUBREG; + } else { + mark_reg_unknown(env, regs, insn->dst_reg); + } } } else { /* R1 = (u32) R2 */ From a5c0a42bd3746091777642d0588102a3a42ac031 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 7 Aug 2023 10:57:26 -0700 Subject: [PATCH 11/19] selftests/bpf: Add a movsx selftest for sign-extension of R10 A movsx selftest is added for sign-extension of frame pointer R10. The verification fails for both privileged and unprivileged prog runs. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20230807175726.672394-1-yonghong.song@linux.dev Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/progs/verifier_movsx.c | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index 9568089932d7..be6f69a6b659 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -198,6 +198,28 @@ l0_%=: \ : __clobber_all); } +SEC("socket") +__description("MOV64SX, S16, R10 Sign Extension") +__failure __msg("R1 type=scalar expected=fp, pkt, pkt_meta, map_key, map_value, mem, ringbuf_mem, buf, trusted_ptr_") +__failure_unpriv __msg_unpriv("R10 sign-extension part of pointer") +__naked void mov64sx_s16_r10(void) +{ + asm volatile (" \ + r1 = 553656332; \ + *(u32 *)(r10 - 8) = r1; \ + r1 = (s16)r10; \ + r1 += -8; \ + r2 = 3; \ + if r2 <= r1 goto l0_%=; \ +l0_%=: \ + call %[bpf_trace_printk]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_trace_printk) + : __clobber_all); +} + #else SEC("socket") From a3c485a5d8d47af5d2d1a0e5c3b7a1ed223669f9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Aug 2023 10:59:54 +0200 Subject: [PATCH 12/19] bpf: Add support for bpf_get_func_ip helper for uprobe program Adding support for bpf_get_func_ip helper for uprobe program to return probed address for both uprobe and return uprobe. We discussed this in [1] and agreed that uprobe can have special use of bpf_get_func_ip helper that differs from kprobe. The kprobe bpf_get_func_ip returns: - address of the function if probe is attach on function entry for both kprobe and return kprobe - 0 if the probe is not attach on function entry The uprobe bpf_get_func_ip returns: - address of the probe for both uprobe and return uprobe The reason for this semantic change is that kernel can't really tell if the probe user space address is function entry. The uprobe program is actually kprobe type program attached as uprobe. One of the consequences of this design is that uprobes do not have its own set of helpers, but share them with kprobes. As we need different functionality for bpf_get_func_ip helper for uprobe, I'm adding the bool value to the bpf_trace_run_ctx, so the helper can detect that it's executed in uprobe context and call specific code. The is_uprobe bool is set as true in bpf_prog_run_array_sleepable, which is currently used only for executing bpf programs in uprobe. Renaming bpf_prog_run_array_sleepable to bpf_prog_run_array_uprobe to address that it's only used for uprobes and that it sets the run_ctx.is_uprobe as suggested by Yafang Shao. Suggested-by: Andrii Nakryiko Tested-by: Alan Maguire [1] https://lore.kernel.org/bpf/CAEf4BzZ=xLVkG5eurEuvLU79wAMtwho7ReR+XJAgwhFF4M-7Cg@mail.gmail.com/ Signed-off-by: Jiri Olsa Tested-by: Viktor Malik Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20230807085956.2344866-2-jolsa@kernel.org Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 9 +++++++-- include/uapi/linux/bpf.h | 7 ++++++- kernel/trace/bpf_trace.c | 11 ++++++++++- kernel/trace/trace_probe.h | 5 +++++ kernel/trace/trace_uprobe.c | 7 +------ tools/include/uapi/linux/bpf.h | 7 ++++++- 6 files changed, 35 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index abe75063630b..db3fe5a61b05 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1819,6 +1819,7 @@ struct bpf_cg_run_ctx { struct bpf_trace_run_ctx { struct bpf_run_ctx run_ctx; u64 bpf_cookie; + bool is_uprobe; }; struct bpf_tramp_run_ctx { @@ -1867,6 +1868,8 @@ bpf_prog_run_array(const struct bpf_prog_array *array, if (unlikely(!array)) return ret; + run_ctx.is_uprobe = false; + migrate_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; @@ -1891,8 +1894,8 @@ bpf_prog_run_array(const struct bpf_prog_array *array, * rcu-protected dynamically sized maps. */ static __always_inline u32 -bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu, - const void *ctx, bpf_prog_run_fn run_prog) +bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu, + const void *ctx, bpf_prog_run_fn run_prog) { const struct bpf_prog_array_item *item; const struct bpf_prog *prog; @@ -1906,6 +1909,8 @@ bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu, rcu_read_lock_trace(); migrate_disable(); + run_ctx.is_uprobe = true; + array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held()); if (unlikely(!array)) goto out; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 70da85200695..d21deb46f49f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5086,9 +5086,14 @@ union bpf_attr { * u64 bpf_get_func_ip(void *ctx) * Description * Get address of the traced function (for tracing and kprobe programs). + * + * When called for kprobe program attached as uprobe it returns + * probe address for both entry and return uprobe. + * * Return - * Address of the traced function. + * Address of the traced function for kprobe. * 0 for kprobes placed within the function (not at the entry). + * Address of the probe for uprobe and return uprobe. * * u64 bpf_get_attach_cookie(void *ctx) * Description diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d6296d51a826..792445e1f3f0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1055,7 +1055,16 @@ static unsigned long get_entry_ip(unsigned long fentry_ip) BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) { - struct kprobe *kp = kprobe_running(); + struct bpf_trace_run_ctx *run_ctx __maybe_unused; + struct kprobe *kp; + +#ifdef CONFIG_UPROBES + run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); + if (run_ctx->is_uprobe) + return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr; +#endif + + kp = kprobe_running(); if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY)) return 0; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 01ea148723de..7dde806be91e 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -519,3 +519,8 @@ void __trace_probe_log_err(int offset, int err); #define trace_probe_log_err(offs, err) \ __trace_probe_log_err(offs, TP_ERR_##err) + +struct uprobe_dispatch_data { + struct trace_uprobe *tu; + unsigned long bp_addr; +}; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 555c223c3232..576b3bcb8ebd 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -88,11 +88,6 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev) static int register_uprobe_event(struct trace_uprobe *tu); static int unregister_uprobe_event(struct trace_uprobe *tu); -struct uprobe_dispatch_data { - struct trace_uprobe *tu; - unsigned long bp_addr; -}; - static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); static int uretprobe_dispatcher(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs); @@ -1352,7 +1347,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, if (bpf_prog_array_valid(call)) { u32 ret; - ret = bpf_prog_run_array_sleepable(call->prog_array, regs, bpf_prog_run); + ret = bpf_prog_run_array_uprobe(call->prog_array, regs, bpf_prog_run); if (!ret) return; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 70da85200695..d21deb46f49f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5086,9 +5086,14 @@ union bpf_attr { * u64 bpf_get_func_ip(void *ctx) * Description * Get address of the traced function (for tracing and kprobe programs). + * + * When called for kprobe program attached as uprobe it returns + * probe address for both entry and return uprobe. + * * Return - * Address of the traced function. + * Address of the traced function for kprobe. * 0 for kprobes placed within the function (not at the entry). + * Address of the probe for uprobe and return uprobe. * * u64 bpf_get_attach_cookie(void *ctx) * Description From e43163ed1c0a655083a811404e422f4c045637eb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Aug 2023 10:59:55 +0200 Subject: [PATCH 13/19] selftests/bpf: Add bpf_get_func_ip tests for uprobe on function entry Adding get_func_ip tests for uprobe on function entry that validates that bpf_get_func_ip returns proper values from both uprobe and return uprobe. Tested-by: Alan Maguire Signed-off-by: Jiri Olsa Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20230807085956.2344866-3-jolsa@kernel.org Signed-off-by: Martin KaFai Lau --- .../bpf/prog_tests/get_func_ip_test.c | 11 ++++++++ .../selftests/bpf/progs/get_func_ip_test.c | 25 +++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c index fede8ef58b5b..114cdbc04caf 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c @@ -1,6 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include #include "get_func_ip_test.skel.h" +#include "get_func_ip_uprobe_test.skel.h" + +static noinline void uprobe_trigger(void) +{ +} static void test_function_entry(void) { @@ -20,6 +25,8 @@ static void test_function_entry(void) if (!ASSERT_OK(err, "get_func_ip_test__attach")) goto cleanup; + skel->bss->uprobe_trigger = (unsigned long) uprobe_trigger; + prog_fd = bpf_program__fd(skel->progs.test1); err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); @@ -30,11 +37,15 @@ static void test_function_entry(void) ASSERT_OK(err, "test_run"); + uprobe_trigger(); + ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); ASSERT_EQ(skel->bss->test2_result, 1, "test2_result"); ASSERT_EQ(skel->bss->test3_result, 1, "test3_result"); ASSERT_EQ(skel->bss->test4_result, 1, "test4_result"); ASSERT_EQ(skel->bss->test5_result, 1, "test5_result"); + ASSERT_EQ(skel->bss->test7_result, 1, "test7_result"); + ASSERT_EQ(skel->bss->test8_result, 1, "test8_result"); cleanup: get_func_ip_test__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c index 8559e698b40d..8956eb78a226 100644 --- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include "vmlinux.h" #include #include -#include char _license[] SEC("license") = "GPL"; @@ -83,3 +82,25 @@ int test6(struct pt_regs *ctx) test6_result = (const void *) addr == 0; return 0; } + +unsigned long uprobe_trigger; + +__u64 test7_result = 0; +SEC("uprobe//proc/self/exe:uprobe_trigger") +int BPF_UPROBE(test7) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test7_result = (const void *) addr == (const void *) uprobe_trigger; + return 0; +} + +__u64 test8_result = 0; +SEC("uretprobe//proc/self/exe:uprobe_trigger") +int BPF_URETPROBE(test8, int ret) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test8_result = (const void *) addr == (const void *) uprobe_trigger; + return 0; +} From 7febf573a58b55170782985c031dfaf805662297 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 7 Aug 2023 10:59:56 +0200 Subject: [PATCH 14/19] selftests/bpf: Add bpf_get_func_ip test for uprobe inside function Adding get_func_ip test for uprobe inside function that validates the get_func_ip helper returns correct probe address value. Tested-by: Alan Maguire Signed-off-by: Jiri Olsa Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20230807085956.2344866-4-jolsa@kernel.org Signed-off-by: Martin KaFai Lau --- .../bpf/prog_tests/get_func_ip_test.c | 46 +++++++++++++++++-- .../bpf/progs/get_func_ip_uprobe_test.c | 18 ++++++++ 2 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c index 114cdbc04caf..c40242dfa8fb 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c @@ -51,11 +51,17 @@ cleanup: get_func_ip_test__destroy(skel); } -/* test6 is x86_64 specific because of the instruction - * offset, disabling it for all other archs - */ #ifdef __x86_64__ -static void test_function_body(void) +extern void uprobe_trigger_body(void); +asm( +".globl uprobe_trigger_body\n" +".type uprobe_trigger_body, @function\n" +"uprobe_trigger_body:\n" +" nop\n" +" ret\n" +); + +static void test_function_body_kprobe(void) { struct get_func_ip_test *skel = NULL; LIBBPF_OPTS(bpf_test_run_opts, topts); @@ -67,6 +73,9 @@ static void test_function_body(void) if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open")) return; + /* test6 is x86_64 specific and is disabled by default, + * enable it for body test. + */ bpf_program__set_autoload(skel->progs.test6, true); err = get_func_ip_test__load(skel); @@ -90,6 +99,35 @@ cleanup: bpf_link__destroy(link6); get_func_ip_test__destroy(skel); } + +static void test_function_body_uprobe(void) +{ + struct get_func_ip_uprobe_test *skel = NULL; + int err; + + skel = get_func_ip_uprobe_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "get_func_ip_uprobe_test__open_and_load")) + return; + + err = get_func_ip_uprobe_test__attach(skel); + if (!ASSERT_OK(err, "get_func_ip_test__attach")) + goto cleanup; + + skel->bss->uprobe_trigger_body = (unsigned long) uprobe_trigger_body; + + uprobe_trigger_body(); + + ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); + +cleanup: + get_func_ip_uprobe_test__destroy(skel); +} + +static void test_function_body(void) +{ + test_function_body_kprobe(); + test_function_body_uprobe(); +} #else #define test_function_body() #endif diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c new file mode 100644 index 000000000000..052f8a4345a8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +unsigned long uprobe_trigger_body; + +__u64 test1_result = 0; +SEC("uprobe//proc/self/exe:uprobe_trigger_body+1") +int BPF_UPROBE(test1) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test1_result = (const void *) addr == (const void *) uprobe_trigger_body + 1; + return 0; +} From e546a119801f732f5c5c620d59be2e85aa732a8b Mon Sep 17 00:00:00 2001 From: Will Hawkins Date: Tue, 8 Aug 2023 17:25:01 -0400 Subject: [PATCH 15/19] bpf, docs: Fix small typo and define semantics of sign extension Add additional precision on the semantics of the sign extension operations in BPF. In addition, fix a very minor typo. Signed-off-by: Will Hawkins Acked-by: David Vernet Link: https://lore.kernel.org/r/20230808212503.197834-1-hawkinsw@obs.cr Signed-off-by: Martin KaFai Lau --- .../bpf/standardization/instruction-set.rst | 39 ++++++++++++++----- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/Documentation/bpf/standardization/instruction-set.rst b/Documentation/bpf/standardization/instruction-set.rst index 25be958130dc..4f73e9dc8d9e 100644 --- a/Documentation/bpf/standardization/instruction-set.rst +++ b/Documentation/bpf/standardization/instruction-set.rst @@ -76,6 +76,27 @@ Functions format and returns the equivalent number with the same bit width but opposite endianness. + +Definitions +----------- + +.. glossary:: + + Sign Extend + To `sign extend an` ``X`` `-bit number, A, to a` ``Y`` `-bit number, B ,` means to + + #. Copy all ``X`` bits from `A` to the lower ``X`` bits of `B`. + #. Set the value of the remaining ``Y`` - ``X`` bits of `B` to the value of + the most-significant bit of `A`. + +.. admonition:: Example + + Sign extend an 8-bit number ``A`` to a 16-bit number ``B`` on a big-endian platform: + :: + + A: 10000110 + B: 11111111 10000110 + Registers and calling convention ================================ @@ -234,7 +255,7 @@ BPF_SMOD 0x90 1 dst = (src != 0) ? (dst s% src) : dst BPF_XOR 0xa0 0 dst ^= src BPF_MOV 0xb0 0 dst = src BPF_MOVSX 0xb0 8/16/32 dst = (s8,s16,s32)src -BPF_ARSH 0xc0 0 sign extending dst >>= (src & mask) +BPF_ARSH 0xc0 0 :term:`sign extending` dst >>= (src & mask) BPF_END 0xd0 0 byte swap operations (see `Byte swap instructions`_ below) ========= ===== ======= ========================================================== @@ -266,22 +287,22 @@ where '(u32)' indicates that the upper 32 bits are zeroed. Note that most instructions have instruction offset of 0. Only three instructions (``BPF_SDIV``, ``BPF_SMOD``, ``BPF_MOVSX``) have a non-zero offset. -The devision and modulo operations support both unsigned and signed flavors. +The division and modulo operations support both unsigned and signed flavors. For unsigned operations (``BPF_DIV`` and ``BPF_MOD``), for ``BPF_ALU``, 'imm' is interpreted as a 32-bit unsigned value. For ``BPF_ALU64``, -'imm' is first sign extended from 32 to 64 bits, and then interpreted as -a 64-bit unsigned value. +'imm' is first :term:`sign extended` from 32 to 64 bits, and then +interpreted as a 64-bit unsigned value. For signed operations (``BPF_SDIV`` and ``BPF_SMOD``), for ``BPF_ALU``, 'imm' is interpreted as a 32-bit signed value. For ``BPF_ALU64``, 'imm' -is first sign extended from 32 to 64 bits, and then interpreted as a -64-bit signed value. +is first :term:`sign extended` from 32 to 64 bits, and then +interpreted as a 64-bit signed value. The ``BPF_MOVSX`` instruction does a move operation with sign extension. -``BPF_ALU | BPF_MOVSX`` sign extends 8-bit and 16-bit operands into 32 +``BPF_ALU | BPF_MOVSX`` :term:`sign extends` 8-bit and 16-bit operands into 32 bit operands, and zeroes the remaining upper 32 bits. -``BPF_ALU64 | BPF_MOVSX`` sign extends 8-bit, 16-bit, and 32-bit +``BPF_ALU64 | BPF_MOVSX`` :term:`sign extends` 8-bit, 16-bit, and 32-bit operands into 64 bit operands. Shift operations use a mask of 0x3F (63) for 64-bit operations and 0x1F (31) @@ -466,7 +487,7 @@ Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW`` and Sign-extension load operations ------------------------------ -The ``BPF_MEMSX`` mode modifier is used to encode sign-extension load +The ``BPF_MEMSX`` mode modifier is used to encode :term:`sign-extension` load instructions that transfer data between a register and memory. ``BPF_MEMSX | | BPF_LDX`` means:: From 96ead1e702902c21513354174942415eed0958f3 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Tue, 8 Aug 2023 09:28:58 -0700 Subject: [PATCH 16/19] selftests/bpf: remove duplicated functions The file cgroup_tcp_skb.c contains redundant implementations of the similar functions (create_server_sock_v6(), connect_client_server_v6() and get_sock_port_v6()) found in network_helpers.c. Let's eliminate these duplicated functions. Changes from v1: - Remove get_sock_port_v6() as well. v1: https://lore.kernel.org/all/20230807193840.567962-1-thinker.li@gmail.com/ Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20230808162858.326871-1-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/cgroup_tcp_skb.c | 93 ++++--------------- 1 file changed, 17 insertions(+), 76 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c index d686ef19f705..a1542faf7873 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c @@ -9,6 +9,7 @@ #include "testing_helpers.h" #include "cgroup_tcp_skb.skel.h" #include "cgroup_tcp_skb.h" +#include "network_helpers.h" #define CGROUP_TCP_SKB_PATH "/test_cgroup_tcp_skb" @@ -58,80 +59,13 @@ static int create_client_sock_v6(void) return fd; } -static int create_server_sock_v6(void) -{ - struct sockaddr_in6 addr = { - .sin6_family = AF_INET6, - .sin6_port = htons(0), - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; - int fd, err; - - fd = socket(AF_INET6, SOCK_STREAM, 0); - if (fd < 0) { - perror("socket"); - return -1; - } - - err = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); - if (err < 0) { - perror("bind"); - return -1; - } - - err = listen(fd, 1); - if (err < 0) { - perror("listen"); - return -1; - } - - return fd; -} - -static int get_sock_port_v6(int fd) -{ - struct sockaddr_in6 addr; - socklen_t len; - int err; - - len = sizeof(addr); - err = getsockname(fd, (struct sockaddr *)&addr, &len); - if (err < 0) { - perror("getsockname"); - return -1; - } - - return ntohs(addr.sin6_port); -} - -static int connect_client_server_v6(int client_fd, int listen_fd) -{ - struct sockaddr_in6 addr = { - .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; - int err, port; - - port = get_sock_port_v6(listen_fd); - if (port < 0) - return -1; - addr.sin6_port = htons(port); - - err = connect(client_fd, (struct sockaddr *)&addr, sizeof(addr)); - if (err < 0) { - perror("connect"); - return -1; - } - - return 0; -} - /* Connect to the server in a cgroup from the outside of the cgroup. */ static int talk_to_cgroup(int *client_fd, int *listen_fd, int *service_fd, struct cgroup_tcp_skb *skel) { int err, cp; char buf[5]; + int port; /* Create client & server socket */ err = join_root_cgroup(); @@ -143,14 +77,17 @@ static int talk_to_cgroup(int *client_fd, int *listen_fd, int *service_fd, err = join_cgroup(CGROUP_TCP_SKB_PATH); if (!ASSERT_OK(err, "join_cgroup")) return -1; - *listen_fd = create_server_sock_v6(); + *listen_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_GE(*listen_fd, 0, "listen_fd")) return -1; - skel->bss->g_sock_port = get_sock_port_v6(*listen_fd); + port = get_socket_local_port(*listen_fd); + if (!ASSERT_GE(port, 0, "get_socket_local_port")) + return -1; + skel->bss->g_sock_port = ntohs(port); /* Connect client to server */ - err = connect_client_server_v6(*client_fd, *listen_fd); - if (!ASSERT_OK(err, "connect_client_server_v6")) + err = connect_fd_to_fd(*client_fd, *listen_fd, 0); + if (!ASSERT_OK(err, "connect_fd_to_fd")) return -1; *service_fd = accept(*listen_fd, NULL, NULL); if (!ASSERT_GE(*service_fd, 0, "service_fd")) @@ -175,12 +112,13 @@ static int talk_to_outside(int *client_fd, int *listen_fd, int *service_fd, { int err, cp; char buf[5]; + int port; /* Create client & server socket */ err = join_root_cgroup(); if (!ASSERT_OK(err, "join_root_cgroup")) return -1; - *listen_fd = create_server_sock_v6(); + *listen_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_GE(*listen_fd, 0, "listen_fd")) return -1; err = join_cgroup(CGROUP_TCP_SKB_PATH); @@ -192,11 +130,14 @@ static int talk_to_outside(int *client_fd, int *listen_fd, int *service_fd, err = join_root_cgroup(); if (!ASSERT_OK(err, "join_root_cgroup")) return -1; - skel->bss->g_sock_port = get_sock_port_v6(*listen_fd); + port = get_socket_local_port(*listen_fd); + if (!ASSERT_GE(port, 0, "get_socket_local_port")) + return -1; + skel->bss->g_sock_port = ntohs(port); /* Connect client to server */ - err = connect_client_server_v6(*client_fd, *listen_fd); - if (!ASSERT_OK(err, "connect_client_server_v6")) + err = connect_fd_to_fd(*client_fd, *listen_fd, 0); + if (!ASSERT_OK(err, "connect_fd_to_fd")) return -1; *service_fd = accept(*listen_fd, NULL, NULL); if (!ASSERT_GE(*service_fd, 0, "service_fd")) From 898f55f50a00f56dac1ef55f5478c10a7511d0a6 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 8 Aug 2023 19:27:55 +0300 Subject: [PATCH 17/19] selftests/bpf: relax expected log messages to allow emitting BPF_ST Update [1] to LLVM BPF backend seeks to enable generation of BPF_ST instruction when CPUv4 is selected. This affects expected log messages for the following selftests: - log_fixup/missing_map - spin_lock/lock_id_mapval_preserve - spin_lock/lock_id_innermapval_preserve Expected messages in these tests hard-code instruction numbers for BPF programs compiled from C. These instruction numbers change when BPF_ST is allowed because single BPF_ST instruction replaces a pair of BPF_MOV/BPF_STX instructions, e.g.: r1 = 42; *(u32 *)(r10 - 8) = r1; ---> *(u32 *)(r10 - 8) = 42; This commit updates expected log messages to avoid matching specific instruction numbers (program position still could be uniquely identified). [1] https://reviews.llvm.org/D140804 "[BPF] support for BPF_ST instruction in codegen" Signed-off-by: Eduard Zingerman Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20230808162755.392606-1-eddyz87@gmail.com Signed-off-by: Martin KaFai Lau --- .../selftests/bpf/prog_tests/log_fixup.c | 2 +- .../selftests/bpf/prog_tests/spin_lock.c | 37 ++++++++++++++++--- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index dba71d98a227..effd78b2a657 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -124,7 +124,7 @@ static void missing_map(void) ASSERT_FALSE(bpf_map__autocreate(skel->maps.missing_map), "missing_map_autocreate"); ASSERT_HAS_SUBSTR(log_buf, - "8: \n" + ": \n" "BPF map 'missing_map' is referenced but wasn't created\n", "log_buf"); diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index d9270bd3d920..f29c08d93beb 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include @@ -19,12 +20,16 @@ static struct { "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mapval_preserve", - "8: (bf) r1 = r0 ; R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0) " - "R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n9: (85) call bpf_this_cpu_ptr#154\n" + "[0-9]\\+: (bf) r1 = r0 ;" + " R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)" + " R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n" + "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_innermapval_preserve", - "13: (bf) r1 = r0 ; R0=map_value(id=2,off=0,ks=4,vs=8,imm=0) " - "R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n14: (85) call bpf_this_cpu_ptr#154\n" + "[0-9]\\+: (bf) r1 = r0 ;" + " R0=map_value(id=2,off=0,ks=4,vs=8,imm=0)" + " R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n" + "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, { "lock_id_mismatch_kptr_global", "bpf_spin_unlock of different lock" }, @@ -45,6 +50,24 @@ static struct { { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" }, }; +static int match_regex(const char *pattern, const char *string) +{ + int err, rc; + regex_t re; + + err = regcomp(&re, pattern, REG_NOSUB); + if (err) { + char errbuf[512]; + + regerror(err, &re, errbuf, sizeof(errbuf)); + PRINT_FAIL("Can't compile regex: %s\n", errbuf); + return -1; + } + rc = regexec(&re, string, 0, NULL, 0); + regfree(&re); + return rc == 0 ? 1 : 0; +} + static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg) { LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, @@ -74,7 +97,11 @@ static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg) goto end; } - if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + ret = match_regex(err_msg, log_buf); + if (!ASSERT_GE(ret, 0, "match_regex")) + goto end; + + if (!ASSERT_TRUE(ret, "no match for expected error message")) { fprintf(stderr, "Expected: %s\n", err_msg); fprintf(stderr, "Verifier: %s\n", log_buf); } From 526bc5ba19e8701a2b03fdbd2c01e684f0cf9010 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 8 Aug 2023 22:55:31 +0800 Subject: [PATCH 18/19] bpf: lru: Remove unused declaration bpf_lru_promote() Commit 3a08c2fd7634 ("bpf: LRU List") declared but never implemented this. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230808145531.19692-1-yuehaibing@huawei.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/bpf_lru_list.h | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index 8f3c8b2b4490..cbd8d3720c2b 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -75,6 +75,5 @@ void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, void bpf_lru_destroy(struct bpf_lru *lru); struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash); void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node); -void bpf_lru_promote(struct bpf_lru *lru, struct bpf_lru_node *node); #endif From 2adbb7637fd1fcec93f4680ddb5ddbbd1a91aefb Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Tue, 8 Aug 2023 22:57:41 +0800 Subject: [PATCH 19/19] bpf: btf: Remove two unused function declarations Commit db559117828d ("bpf: Consolidate spin_lock, timer management into btf_record") removed the implementations but leave declarations. Signed-off-by: Yue Haibing Link: https://lore.kernel.org/r/20230808145741.33292-1-yuehaibing@huawei.com Signed-off-by: Martin KaFai Lau --- include/linux/btf.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index cac9f304e27a..df64cc642074 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -204,8 +204,6 @@ u32 btf_nr_types(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); -int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); -int btf_find_timer(const struct btf *btf, const struct btf_type *t); struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, u32 field_mask, u32 value_size); int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec);