mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-21 22:21:21 +00:00
bpf: implement getsockopt and setsockopt hooks
Implement new BPF_PROG_TYPE_CGROUP_SOCKOPT program type and BPF_CGROUP_{G,S}ETSOCKOPT cgroup hooks. BPF_CGROUP_SETSOCKOPT can modify user setsockopt arguments before passing them down to the kernel or bypass kernel completely. BPF_CGROUP_GETSOCKOPT can can inspect/modify getsockopt arguments that kernel returns. Both hooks reuse existing PTR_TO_PACKET{,_END} infrastructure. The buffer memory is pre-allocated (because I don't think there is a precedent for working with __user memory from bpf). This might be slow to do for each {s,g}etsockopt call, that's why I've added __cgroup_bpf_prog_array_is_empty that exits early if there is nothing attached to a cgroup. Note, however, that there is a race between __cgroup_bpf_prog_array_is_empty and BPF_PROG_RUN_ARRAY where cgroup program layout might have changed; this should not be a problem because in general there is a race between multiple calls to {s,g}etsocktop and user adding/removing bpf progs from a cgroup. The return code of the BPF program is handled as follows: * 0: EPERM * 1: success, continue with next BPF program in the cgroup chain v9: * allow overwriting setsockopt arguments (Alexei Starovoitov): * use set_fs (same as kernel_setsockopt) * buffer is always kzalloc'd (no small on-stack buffer) v8: * use s32 for optlen (Andrii Nakryiko) v7: * return only 0 or 1 (Alexei Starovoitov) * always run all progs (Alexei Starovoitov) * use optval=0 as kernel bypass in setsockopt (Alexei Starovoitov) (decided to use optval=-1 instead, optval=0 might be a valid input) * call getsockopt hook after kernel handlers (Alexei Starovoitov) v6: * rework cgroup chaining; stop as soon as bpf program returns 0 or 2; see patch with the documentation for the details * drop Andrii's and Martin's Acked-by (not sure they are comfortable with the new state of things) v5: * skip copy_to_user() and put_user() when ret == 0 (Martin Lau) v4: * don't export bpf_sk_fullsock helper (Martin Lau) * size != sizeof(__u64) for uapi pointers (Martin Lau) * offsetof instead of bpf_ctx_range when checking ctx access (Martin Lau) v3: * typos in BPF_PROG_CGROUP_SOCKOPT_RUN_ARRAY comments (Andrii Nakryiko) * reverse christmas tree in BPF_PROG_CGROUP_SOCKOPT_RUN_ARRAY (Andrii Nakryiko) * use __bpf_md_ptr instead of __u32 for optval{,_end} (Martin Lau) * use BPF_FIELD_SIZEOF() for consistency (Martin Lau) * new CG_SOCKOPT_ACCESS macro to wrap repeated parts v2: * moved bpf_sockopt_kern fields around to remove a hole (Martin Lau) * aligned bpf_sockopt_kern->buf to 8 bytes (Martin Lau) * bpf_prog_array_is_empty instead of bpf_prog_array_length (Martin Lau) * added [0,2] return code check to verifier (Martin Lau) * dropped unused buf[64] from the stack (Martin Lau) * use PTR_TO_SOCKET for bpf_sockopt->sk (Martin Lau) * dropped bpf_target_off from ctx rewrites (Martin Lau) * use return code for kernel bypass (Martin Lau & Andrii Nakryiko) Cc: Andrii Nakryiko <andriin@fb.com> Cc: Martin Lau <kafai@fb.com> Signed-off-by: Stanislav Fomichev <sdf@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
3b1c667e47
commit
0d01da6afc
11 changed files with 472 additions and 1 deletions
30
net/socket.c
30
net/socket.c
|
@ -2051,6 +2051,8 @@ SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
|
|||
static int __sys_setsockopt(int fd, int level, int optname,
|
||||
char __user *optval, int optlen)
|
||||
{
|
||||
mm_segment_t oldfs = get_fs();
|
||||
char *kernel_optval = NULL;
|
||||
int err, fput_needed;
|
||||
struct socket *sock;
|
||||
|
||||
|
@ -2063,6 +2065,22 @@ static int __sys_setsockopt(int fd, int level, int optname,
|
|||
if (err)
|
||||
goto out_put;
|
||||
|
||||
err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
|
||||
&optname, optval, &optlen,
|
||||
&kernel_optval);
|
||||
|
||||
if (err < 0) {
|
||||
goto out_put;
|
||||
} else if (err > 0) {
|
||||
err = 0;
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
if (kernel_optval) {
|
||||
set_fs(KERNEL_DS);
|
||||
optval = (char __user __force *)kernel_optval;
|
||||
}
|
||||
|
||||
if (level == SOL_SOCKET)
|
||||
err =
|
||||
sock_setsockopt(sock, level, optname, optval,
|
||||
|
@ -2071,6 +2089,11 @@ static int __sys_setsockopt(int fd, int level, int optname,
|
|||
err =
|
||||
sock->ops->setsockopt(sock, level, optname, optval,
|
||||
optlen);
|
||||
|
||||
if (kernel_optval) {
|
||||
set_fs(oldfs);
|
||||
kfree(kernel_optval);
|
||||
}
|
||||
out_put:
|
||||
fput_light(sock->file, fput_needed);
|
||||
}
|
||||
|
@ -2093,6 +2116,7 @@ static int __sys_getsockopt(int fd, int level, int optname,
|
|||
{
|
||||
int err, fput_needed;
|
||||
struct socket *sock;
|
||||
int max_optlen;
|
||||
|
||||
sock = sockfd_lookup_light(fd, &err, &fput_needed);
|
||||
if (sock != NULL) {
|
||||
|
@ -2100,6 +2124,8 @@ static int __sys_getsockopt(int fd, int level, int optname,
|
|||
if (err)
|
||||
goto out_put;
|
||||
|
||||
max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
|
||||
|
||||
if (level == SOL_SOCKET)
|
||||
err =
|
||||
sock_getsockopt(sock, level, optname, optval,
|
||||
|
@ -2108,6 +2134,10 @@ static int __sys_getsockopt(int fd, int level, int optname,
|
|||
err =
|
||||
sock->ops->getsockopt(sock, level, optname, optval,
|
||||
optlen);
|
||||
|
||||
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
|
||||
optval, optlen,
|
||||
max_optlen, err);
|
||||
out_put:
|
||||
fput_light(sock->file, fput_needed);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue