mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-04-01 03:54:02 +00:00
bpf: Introduce helper bpf_get_task_stack()
Introduce helper bpf_get_task_stack(), which dumps stack trace of given task. This is different to bpf_get_stack(), which gets stack track of current task. One potential use case of bpf_get_task_stack() is to call it from bpf_iter__task and dump all /proc/<pid>/stack to a seq_file. bpf_get_task_stack() uses stack_trace_save_tsk() instead of get_perf_callchain() for kernel stack. The benefit of this choice is that stack_trace_save_tsk() doesn't require changes in arch/. The downside of using stack_trace_save_tsk() is that stack_trace_save_tsk() dumps the stack trace to unsigned long array. For 32-bit systems, we need to translate it to u64 array. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Andrii Nakryiko <andriin@fb.com> Link: https://lore.kernel.org/bpf/20200630062846.664389-3-songliubraving@fb.com
This commit is contained in:
parent
d141b8bc57
commit
fa28dcb82a
7 changed files with 153 additions and 7 deletions
|
@ -1627,6 +1627,7 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
|
||||||
extern const struct bpf_func_proto bpf_get_current_comm_proto;
|
extern const struct bpf_func_proto bpf_get_current_comm_proto;
|
||||||
extern const struct bpf_func_proto bpf_get_stackid_proto;
|
extern const struct bpf_func_proto bpf_get_stackid_proto;
|
||||||
extern const struct bpf_func_proto bpf_get_stack_proto;
|
extern const struct bpf_func_proto bpf_get_stack_proto;
|
||||||
|
extern const struct bpf_func_proto bpf_get_task_stack_proto;
|
||||||
extern const struct bpf_func_proto bpf_sock_map_update_proto;
|
extern const struct bpf_func_proto bpf_sock_map_update_proto;
|
||||||
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
|
extern const struct bpf_func_proto bpf_sock_hash_update_proto;
|
||||||
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
|
extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
|
||||||
|
|
|
@ -3285,6 +3285,39 @@ union bpf_attr {
|
||||||
* Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
|
* Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
|
||||||
* Return
|
* Return
|
||||||
* *sk* if casting is valid, or NULL otherwise.
|
* *sk* if casting is valid, or NULL otherwise.
|
||||||
|
*
|
||||||
|
* long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
|
||||||
|
* Description
|
||||||
|
* Return a user or a kernel stack in bpf program provided buffer.
|
||||||
|
* To achieve this, the helper needs *task*, which is a valid
|
||||||
|
* pointer to struct task_struct. To store the stacktrace, the
|
||||||
|
* bpf program provides *buf* with a nonnegative *size*.
|
||||||
|
*
|
||||||
|
* The last argument, *flags*, holds the number of stack frames to
|
||||||
|
* skip (from 0 to 255), masked with
|
||||||
|
* **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
|
||||||
|
* the following flags:
|
||||||
|
*
|
||||||
|
* **BPF_F_USER_STACK**
|
||||||
|
* Collect a user space stack instead of a kernel stack.
|
||||||
|
* **BPF_F_USER_BUILD_ID**
|
||||||
|
* Collect buildid+offset instead of ips for user stack,
|
||||||
|
* only valid if **BPF_F_USER_STACK** is also specified.
|
||||||
|
*
|
||||||
|
* **bpf_get_task_stack**\ () can collect up to
|
||||||
|
* **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
|
||||||
|
* to sufficient large buffer size. Note that
|
||||||
|
* this limit can be controlled with the **sysctl** program, and
|
||||||
|
* that it should be manually increased in order to profile long
|
||||||
|
* user stacks (such as stacks for Java programs). To do so, use:
|
||||||
|
*
|
||||||
|
* ::
|
||||||
|
*
|
||||||
|
* # sysctl kernel.perf_event_max_stack=<new value>
|
||||||
|
* Return
|
||||||
|
* A non-negative value equal to or less than *size* on success,
|
||||||
|
* or a negative error in case of failure.
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
#define __BPF_FUNC_MAPPER(FN) \
|
#define __BPF_FUNC_MAPPER(FN) \
|
||||||
FN(unspec), \
|
FN(unspec), \
|
||||||
|
@ -3427,7 +3460,9 @@ union bpf_attr {
|
||||||
FN(skc_to_tcp_sock), \
|
FN(skc_to_tcp_sock), \
|
||||||
FN(skc_to_tcp_timewait_sock), \
|
FN(skc_to_tcp_timewait_sock), \
|
||||||
FN(skc_to_tcp_request_sock), \
|
FN(skc_to_tcp_request_sock), \
|
||||||
FN(skc_to_udp6_sock),
|
FN(skc_to_udp6_sock), \
|
||||||
|
FN(get_task_stack), \
|
||||||
|
/* */
|
||||||
|
|
||||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||||
* function eBPF program intends to call
|
* function eBPF program intends to call
|
||||||
|
|
|
@ -348,6 +348,40 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct perf_callchain_entry *
|
||||||
|
get_callchain_entry_for_task(struct task_struct *task, u32 init_nr)
|
||||||
|
{
|
||||||
|
struct perf_callchain_entry *entry;
|
||||||
|
int rctx;
|
||||||
|
|
||||||
|
entry = get_callchain_entry(&rctx);
|
||||||
|
|
||||||
|
if (!entry)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
entry->nr = init_nr +
|
||||||
|
stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr),
|
||||||
|
sysctl_perf_event_max_stack - init_nr, 0);
|
||||||
|
|
||||||
|
/* stack_trace_save_tsk() works on unsigned long array, while
|
||||||
|
* perf_callchain_entry uses u64 array. For 32-bit systems, it is
|
||||||
|
* necessary to fix this mismatch.
|
||||||
|
*/
|
||||||
|
if (__BITS_PER_LONG != 64) {
|
||||||
|
unsigned long *from = (unsigned long *) entry->ip;
|
||||||
|
u64 *to = entry->ip;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* copy data from the end to avoid using extra buffer */
|
||||||
|
for (i = entry->nr - 1; i >= (int)init_nr; i--)
|
||||||
|
to[i] = (u64)(from[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
put_callchain_entry(rctx);
|
||||||
|
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
||||||
u64, flags)
|
u64, flags)
|
||||||
{
|
{
|
||||||
|
@ -448,8 +482,8 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
|
||||||
.arg3_type = ARG_ANYTHING,
|
.arg3_type = ARG_ANYTHING,
|
||||||
};
|
};
|
||||||
|
|
||||||
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
|
static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
|
||||||
u64, flags)
|
void *buf, u32 size, u64 flags)
|
||||||
{
|
{
|
||||||
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
|
u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
|
||||||
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
|
bool user_build_id = flags & BPF_F_USER_BUILD_ID;
|
||||||
|
@ -471,13 +505,22 @@ BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
|
||||||
if (unlikely(size % elem_size))
|
if (unlikely(size % elem_size))
|
||||||
goto clear;
|
goto clear;
|
||||||
|
|
||||||
|
/* cannot get valid user stack for task without user_mode regs */
|
||||||
|
if (task && user && !user_mode(regs))
|
||||||
|
goto err_fault;
|
||||||
|
|
||||||
num_elem = size / elem_size;
|
num_elem = size / elem_size;
|
||||||
if (sysctl_perf_event_max_stack < num_elem)
|
if (sysctl_perf_event_max_stack < num_elem)
|
||||||
init_nr = 0;
|
init_nr = 0;
|
||||||
else
|
else
|
||||||
init_nr = sysctl_perf_event_max_stack - num_elem;
|
init_nr = sysctl_perf_event_max_stack - num_elem;
|
||||||
trace = get_perf_callchain(regs, init_nr, kernel, user,
|
|
||||||
sysctl_perf_event_max_stack, false, false);
|
if (kernel && task)
|
||||||
|
trace = get_callchain_entry_for_task(task, init_nr);
|
||||||
|
else
|
||||||
|
trace = get_perf_callchain(regs, init_nr, kernel, user,
|
||||||
|
sysctl_perf_event_max_stack,
|
||||||
|
false, false);
|
||||||
if (unlikely(!trace))
|
if (unlikely(!trace))
|
||||||
goto err_fault;
|
goto err_fault;
|
||||||
|
|
||||||
|
@ -505,6 +548,12 @@ clear:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
|
||||||
|
u64, flags)
|
||||||
|
{
|
||||||
|
return __bpf_get_stack(regs, NULL, buf, size, flags);
|
||||||
|
}
|
||||||
|
|
||||||
const struct bpf_func_proto bpf_get_stack_proto = {
|
const struct bpf_func_proto bpf_get_stack_proto = {
|
||||||
.func = bpf_get_stack,
|
.func = bpf_get_stack,
|
||||||
.gpl_only = true,
|
.gpl_only = true,
|
||||||
|
@ -515,6 +564,26 @@ const struct bpf_func_proto bpf_get_stack_proto = {
|
||||||
.arg4_type = ARG_ANYTHING,
|
.arg4_type = ARG_ANYTHING,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
|
||||||
|
u32, size, u64, flags)
|
||||||
|
{
|
||||||
|
struct pt_regs *regs = task_pt_regs(task);
|
||||||
|
|
||||||
|
return __bpf_get_stack(regs, task, buf, size, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bpf_get_task_stack_btf_ids[5];
|
||||||
|
const struct bpf_func_proto bpf_get_task_stack_proto = {
|
||||||
|
.func = bpf_get_task_stack,
|
||||||
|
.gpl_only = false,
|
||||||
|
.ret_type = RET_INTEGER,
|
||||||
|
.arg1_type = ARG_PTR_TO_BTF_ID,
|
||||||
|
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
|
||||||
|
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
|
||||||
|
.arg4_type = ARG_ANYTHING,
|
||||||
|
.btf_id = bpf_get_task_stack_btf_ids,
|
||||||
|
};
|
||||||
|
|
||||||
/* Called from eBPF program */
|
/* Called from eBPF program */
|
||||||
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
|
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
|
||||||
{
|
{
|
||||||
|
|
|
@ -4864,7 +4864,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
|
if ((func_id == BPF_FUNC_get_stack ||
|
||||||
|
func_id == BPF_FUNC_get_task_stack) &&
|
||||||
|
!env->prog->has_callchain_buf) {
|
||||||
const char *err_str;
|
const char *err_str;
|
||||||
|
|
||||||
#ifdef CONFIG_PERF_EVENTS
|
#ifdef CONFIG_PERF_EVENTS
|
||||||
|
|
|
@ -1137,6 +1137,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||||
return &bpf_ringbuf_query_proto;
|
return &bpf_ringbuf_query_proto;
|
||||||
case BPF_FUNC_jiffies64:
|
case BPF_FUNC_jiffies64:
|
||||||
return &bpf_jiffies64_proto;
|
return &bpf_jiffies64_proto;
|
||||||
|
case BPF_FUNC_get_task_stack:
|
||||||
|
return &bpf_get_task_stack_proto;
|
||||||
default:
|
default:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -426,6 +426,7 @@ class PrinterHelpers(Printer):
|
||||||
'struct tcp_timewait_sock',
|
'struct tcp_timewait_sock',
|
||||||
'struct tcp_request_sock',
|
'struct tcp_request_sock',
|
||||||
'struct udp6_sock',
|
'struct udp6_sock',
|
||||||
|
'struct task_struct',
|
||||||
|
|
||||||
'struct __sk_buff',
|
'struct __sk_buff',
|
||||||
'struct sk_msg_md',
|
'struct sk_msg_md',
|
||||||
|
@ -468,6 +469,7 @@ class PrinterHelpers(Printer):
|
||||||
'struct tcp_timewait_sock',
|
'struct tcp_timewait_sock',
|
||||||
'struct tcp_request_sock',
|
'struct tcp_request_sock',
|
||||||
'struct udp6_sock',
|
'struct udp6_sock',
|
||||||
|
'struct task_struct',
|
||||||
}
|
}
|
||||||
mapped_types = {
|
mapped_types = {
|
||||||
'u8': '__u8',
|
'u8': '__u8',
|
||||||
|
|
|
@ -3285,6 +3285,39 @@ union bpf_attr {
|
||||||
* Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
|
* Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
|
||||||
* Return
|
* Return
|
||||||
* *sk* if casting is valid, or NULL otherwise.
|
* *sk* if casting is valid, or NULL otherwise.
|
||||||
|
*
|
||||||
|
* long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
|
||||||
|
* Description
|
||||||
|
* Return a user or a kernel stack in bpf program provided buffer.
|
||||||
|
* To achieve this, the helper needs *task*, which is a valid
|
||||||
|
* pointer to struct task_struct. To store the stacktrace, the
|
||||||
|
* bpf program provides *buf* with a nonnegative *size*.
|
||||||
|
*
|
||||||
|
* The last argument, *flags*, holds the number of stack frames to
|
||||||
|
* skip (from 0 to 255), masked with
|
||||||
|
* **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
|
||||||
|
* the following flags:
|
||||||
|
*
|
||||||
|
* **BPF_F_USER_STACK**
|
||||||
|
* Collect a user space stack instead of a kernel stack.
|
||||||
|
* **BPF_F_USER_BUILD_ID**
|
||||||
|
* Collect buildid+offset instead of ips for user stack,
|
||||||
|
* only valid if **BPF_F_USER_STACK** is also specified.
|
||||||
|
*
|
||||||
|
* **bpf_get_task_stack**\ () can collect up to
|
||||||
|
* **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
|
||||||
|
* to sufficient large buffer size. Note that
|
||||||
|
* this limit can be controlled with the **sysctl** program, and
|
||||||
|
* that it should be manually increased in order to profile long
|
||||||
|
* user stacks (such as stacks for Java programs). To do so, use:
|
||||||
|
*
|
||||||
|
* ::
|
||||||
|
*
|
||||||
|
* # sysctl kernel.perf_event_max_stack=<new value>
|
||||||
|
* Return
|
||||||
|
* A non-negative value equal to or less than *size* on success,
|
||||||
|
* or a negative error in case of failure.
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
#define __BPF_FUNC_MAPPER(FN) \
|
#define __BPF_FUNC_MAPPER(FN) \
|
||||||
FN(unspec), \
|
FN(unspec), \
|
||||||
|
@ -3427,7 +3460,9 @@ union bpf_attr {
|
||||||
FN(skc_to_tcp_sock), \
|
FN(skc_to_tcp_sock), \
|
||||||
FN(skc_to_tcp_timewait_sock), \
|
FN(skc_to_tcp_timewait_sock), \
|
||||||
FN(skc_to_tcp_request_sock), \
|
FN(skc_to_tcp_request_sock), \
|
||||||
FN(skc_to_udp6_sock),
|
FN(skc_to_udp6_sock), \
|
||||||
|
FN(get_task_stack), \
|
||||||
|
/* */
|
||||||
|
|
||||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||||
* function eBPF program intends to call
|
* function eBPF program intends to call
|
||||||
|
|
Loading…
Add table
Reference in a new issue