From a5285ad9e30fd90b88a11adcab97bd4c3ffe44eb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 16 Nov 2013 02:02:09 +0100 Subject: [PATCH 1/8] perf tools: Tag thread comm as overriden The problem is that when a thread overrides its default ":%pid" comm, we forget to tag the thread comm as overriden. Hence, this overriden comm is not inherited on future forks. Fix it. Signed-off-by: Frederic Weisbecker Tested-by: David Ahern Acked-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131116010207.GA18855@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index cd8e2f592719..49eaf1d7d89d 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -70,14 +70,13 @@ int thread__set_comm(struct thread *thread, const char *str, u64 timestamp) /* Override latest entry if it had no specific time coverage */ if (!curr->start) { comm__override(curr, str, timestamp); - return 0; + } else { + new = comm__new(str, timestamp); + if (!new) + return -ENOMEM; + list_add(&new->list, &thread->comm_list); } - new = comm__new(str, timestamp); - if (!new) - return -ENOMEM; - - list_add(&new->list, &thread->comm_list); thread->comm_set = true; return 0; From 210e812f036736aeda097d9a6ef84b1f2b334bae Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Nov 2013 11:20:43 +0900 Subject: [PATCH 2/8] perf header: Fix bogus group name When processing event group descriptor in perf file header, we reuse an allocated group name but forgot to prevent it from freeing. Reported-by: Stephane Eranian Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1384741244-7271-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 369c03648f88..559c516f1cec 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2078,8 +2078,10 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused, if (evsel->idx == (int) desc[i].leader_idx) { evsel->leader = evsel; /* {anon_group} is a dummy name */ - if (strcmp(desc[i].name, "{anon_group}")) + if (strcmp(desc[i].name, "{anon_group}")) { evsel->group_name = desc[i].name; + desc[i].name = NULL; + } evsel->nr_members = desc[i].nr_members; if (i >= nr_groups || nr > 0) { From 50a2740b839ece03b305facd3fc07cdc3b74247c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Nov 2013 11:20:44 +0900 Subject: [PATCH 3/8] perf header: Fix possible memory leaks in process_group_desc() After processing all group descriptors or encountering an error, it frees all descriptors. However, current logic can leak memory since it might not traverse all descriptors. Note that the 'i' can have different value than nr_groups when an error occurred and it's safe to call free(desc[i].name) for every desc since we already make it NULL when it's reused for group names. Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1384741244-7271-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 559c516f1cec..1cd035708931 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2107,7 +2107,7 @@ static int process_group_desc(struct perf_file_section *section __maybe_unused, ret = 0; out_free: - while ((int) --i >= 0) + for (i = 0; i < nr_groups; i++) free(desc[i].name); free(desc); From eff2c92f86c2ac2a0eab3749d58be39592293c3a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 18 Nov 2013 14:23:14 -0500 Subject: [PATCH 4/8] tools lib traceevent: Fix use of multiple options in processing field Jiri Olsa reported that the scsi_dispatch_cmd_done event failed to parse with: Error: expected type 5 but read 4 Error: expected type 5 but read 4 The problem is with this part of the print_fmt: __print_symbolic(((REC->result) >> 24) & 0xff, ... The __print_symbolic() helper function's first parameter is the field to use to determine what symbol to print based on the value of the result. The parser can handle one operation, but it can not handle multiple operations ('>>' and '&'). Add code to process all operations for the field argument for __print_symbolic() as well as __print_flags(). Reported-by: Jiri Olsa Signed-off-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20131118142314.27ca334b@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 0362d575de7d..8a5b65df6efb 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1606,6 +1606,24 @@ process_arg(struct event_format *event, struct print_arg *arg, char **tok) static enum event_type process_op(struct event_format *event, struct print_arg *arg, char **tok); +/* + * For __print_symbolic() and __print_flags, we need to completely + * evaluate the first argument, which defines what to print next. + */ +static enum event_type +process_field_arg(struct event_format *event, struct print_arg *arg, char **tok) +{ + enum event_type type; + + type = process_arg(event, arg, tok); + + while (type == EVENT_OP) { + type = process_op(event, arg, tok); + } + + return type; +} + static enum event_type process_cond(struct event_format *event, struct print_arg *top, char **tok) { @@ -2371,7 +2389,7 @@ process_flags(struct event_format *event, struct print_arg *arg, char **tok) goto out_free; } - type = process_arg(event, field, &token); + type = process_field_arg(event, field, &token); /* Handle operations in the first argument */ while (type == EVENT_OP) @@ -2424,7 +2442,8 @@ process_symbols(struct event_format *event, struct print_arg *arg, char **tok) goto out_free; } - type = process_arg(event, field, &token); + type = process_field_arg(event, field, &token); + if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free_field; From d5b5f391d434c5cc8bcb1ab2d759738797b85f52 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Nov 2013 16:23:04 +0100 Subject: [PATCH 5/8] ftrace, perf: Avoid infinite event generation loop Vince's perf-trinity fuzzer found yet another 'interesting' problem. When we sample the irq_work_exit tracepoint with period==1 (or PERF_SAMPLE_PERIOD) and we add an fasync SIGNAL handler we create an infinite event generation loop: ,-> | irq_work_exit() -> | trace_irq_work_exit() -> | ... | __perf_event_overflow() -> (due to fasync) | irq_work_queue() -> (irq_work_list must be empty) '--------- arch_irq_work_raise() Similar things can happen due to regular poll() wakeups if we exceed the ring-buffer wakeup watermark, or have an event_limit. To avoid this, dis-allow sampling this particular tracepoint. In order to achieve this, create a special perf_perm function pointer for each event and call this (when set) on trying to create a tracepoint perf event. [ roasted: use expr... to allow for ',' in your expression ] Reported-by: Vince Weaver Tested-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Cc: Dave Jones Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20131114152304.GC5364@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/trace/irq_vectors.h | 11 +++++++++++ include/linux/ftrace_event.h | 16 ++++++++++++++++ include/linux/tracepoint.h | 4 ++++ include/trace/ftrace.h | 7 +++++++ kernel/trace/trace_event_perf.c | 6 ++++++ 5 files changed, 44 insertions(+) diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 2874df24e7a4..4cab890007a7 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -71,6 +71,17 @@ DEFINE_IRQ_VECTOR_EVENT(x86_platform_ipi); */ DEFINE_IRQ_VECTOR_EVENT(irq_work); +/* + * We must dis-allow sampling irq_work_exit() because perf event sampling + * itself can cause irq_work, which would lead to an infinite loop; + * + * 1) irq_work_exit happens + * 2) generates perf sample + * 3) generates irq_work + * 4) goto 1 + */ +TRACE_EVENT_PERF_PERM(irq_work_exit, is_sampling_event(p_event) ? -EPERM : 0); + /* * call_function - called when entering/exiting a call function interrupt * vector handler diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 9abbe630c456..8c9b7a1c4138 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -248,6 +248,9 @@ struct ftrace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; + + int (*perf_perm)(struct ftrace_event_call *, + struct perf_event *); #endif }; @@ -317,6 +320,19 @@ struct ftrace_event_file { } \ early_initcall(trace_init_flags_##name); +#define __TRACE_EVENT_PERF_PERM(name, expr...) \ + static int perf_perm_##name(struct ftrace_event_call *tp_event, \ + struct perf_event *p_event) \ + { \ + return ({ expr; }); \ + } \ + static int __init trace_init_perf_perm_##name(void) \ + { \ + event_##name.perf_perm = &perf_perm_##name; \ + return 0; \ + } \ + early_initcall(trace_init_perf_perm_##name); + #define PERF_MAX_TRACE_SIZE 2048 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index ebeab360d851..f16dc0a40049 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -267,6 +267,8 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_FLAGS(event, flag) +#define TRACE_EVENT_PERF_PERM(event, expr...) + #endif /* DECLARE_TRACE */ #ifndef TRACE_EVENT @@ -399,4 +401,6 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_FLAGS(event, flag) +#define TRACE_EVENT_PERF_PERM(event, expr...) + #endif /* ifdef TRACE_EVENT (see note above) */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 52594b20179e..6b852f60f8ae 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -90,6 +90,10 @@ #define TRACE_EVENT_FLAGS(name, value) \ __TRACE_EVENT_FLAGS(name, value) +#undef TRACE_EVENT_PERF_PERM +#define TRACE_EVENT_PERF_PERM(name, expr...) \ + __TRACE_EVENT_PERF_PERM(name, expr) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -140,6 +144,9 @@ #undef TRACE_EVENT_FLAGS #define TRACE_EVENT_FLAGS(event, flag) +#undef TRACE_EVENT_PERF_PERM +#define TRACE_EVENT_PERF_PERM(event, expr...) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 78e27e3b52ac..630889f68b1d 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -24,6 +24,12 @@ static int total_ref_count; static int perf_trace_event_perm(struct ftrace_event_call *tp_event, struct perf_event *p_event) { + if (tp_event->perf_perm) { + int ret = tp_event->perf_perm(tp_event, p_event); + if (ret) + return ret; + } + /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event) && perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) From 06db0b21712f878b808480ef31097637013bbf0f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Sep 2013 13:14:47 +0200 Subject: [PATCH 6/8] perf: Remove fragile swevent hlist optimization Currently we only allocate a single cpu hashtable for per-cpu swevents; do away with this optimization for it is fragile in the face of things like perf_pmu_migrate_context(). The easiest thing is to make sure all CPUs are consistent wrt state. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20130913111447.GN31370@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index d724e7757cd1..72348dc192c1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5680,11 +5680,6 @@ static void swevent_hlist_put(struct perf_event *event) { int cpu; - if (event->cpu != -1) { - swevent_hlist_put_cpu(event, event->cpu); - return; - } - for_each_possible_cpu(cpu) swevent_hlist_put_cpu(event, cpu); } @@ -5718,9 +5713,6 @@ static int swevent_hlist_get(struct perf_event *event) int err; int cpu, failed_cpu; - if (event->cpu != -1) - return swevent_hlist_get_cpu(event, event->cpu); - get_online_cpus(); for_each_possible_cpu(cpu) { err = swevent_hlist_get_cpu(event, cpu); From 0022cedd4a7d8a87841351e2b018bb6794cf2e67 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Fri, 15 Nov 2013 12:39:45 -0500 Subject: [PATCH 7/8] perf/trace: Properly use u64 to hold event_id The 64-bit attr.config value for perf trace events was being copied into an "int" before doing a comparison, meaning the top 32 bits were being truncated. As far as I can tell this didn't cause any errors, but it did mean it was possible to create valid aliases for all the tracepoint ids which I don't think was intended. (For example, 0xffffffff00000018 and 0x18 both enable the same tracepoint). Signed-off-by: Vince Weaver Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1311151236100.11932@vincent-weaver-1.um.maine.edu Signed-off-by: Ingo Molnar --- kernel/trace/trace_event_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 630889f68b1d..e854f420e033 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -179,7 +179,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, int perf_trace_init(struct perf_event *p_event) { struct ftrace_event_call *tp_event; - int event_id = p_event->attr.config; + u64 event_id = p_event->attr.config; int ret = -EINVAL; mutex_lock(&event_mutex); From 6b5fa0ba4f85a8499287aefaf3f1375450c40c6d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 19 Nov 2013 16:14:51 -0300 Subject: [PATCH 8/8] tools lib traceevent: Fix conversion of pointer to integer of different size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc complaint on 32-bit system: /home/acme/git/linux/tools/lib/traceevent/event-parse.c: In function ‘eval_num_arg’: /home/acme/git/linux/tools/lib/traceevent/event-parse.c:3468:9: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] This is because the eval_num_arg returns everything as an 'unsigned long long', so it converts a void pointer to a wider integer, fix it by converting the void pointer to an integer of the same size, 'unsigned long', before casting it to 'unsigned long long'. Acked-by: Steven Rostedt Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-yllx4aqcg06v5n4vjpwiiuld@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 8a5b65df6efb..217c82ee3665 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3465,7 +3465,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg * is in the bottom half of the 32 bit field. */ offset &= 0xffff; - val = (unsigned long long)(data + offset); + val = (unsigned long long)((unsigned long)data + offset); break; default: /* not sure what to do there */ return 0;