mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-23 07:01:23 +00:00
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Kernel improvements: - watchdog driver improvements by Li Zefan - Power7 CPI stack events related improvements by Sukadev Bhattiprolu - event multiplexing via hrtimers and other improvements by Stephane Eranian - kernel stack use optimization by Andrew Hunter - AMD IOMMU uncore PMU support by Suravee Suthikulpanit - NMI handling rate-limits by Dave Hansen - various hw_breakpoint fixes by Oleg Nesterov - hw_breakpoint overflow period sampling and related signal handling fixes by Jiri Olsa - Intel Haswell PMU support by Andi Kleen Tooling improvements: - Reset SIGTERM handler in workload child process, fix from David Ahern. - Makefile reorganization, prep work for Kconfig patches, from Jiri Olsa. - Add automated make test suite, from Jiri Olsa. - Add --percent-limit option to 'top' and 'report', from Namhyung Kim. - Sorting improvements, from Namhyung Kim. - Expand definition of sysfs format attribute, from Michael Ellerman. Tooling fixes: - 'perf tests' fixes from Jiri Olsa. - Make Power7 CPI stack events available in sysfs, from Sukadev Bhattiprolu. - Handle death by SIGTERM in 'perf record', fix from David Ahern. - Fix printing of perf_event_paranoid message, from David Ahern. - Handle realloc failures in 'perf kvm', from David Ahern. - Fix divide by 0 in variance, from David Ahern. - Save parent pid in thread struct, from David Ahern. - Handle JITed code in shared memory, from Andi Kleen. - Fixes for 'perf diff', from Jiri Olsa. - Remove some unused struct members, from Jiri Olsa. - Add missing liblk.a dependency for python/perf.so, fix from Jiri Olsa. - Respect CROSS_COMPILE in liblk.a, from Rabin Vincent. - No need to do locking when adding hists in perf report, only 'top' needs that, from Namhyung Kim. - Fix alignment of symbol column in in the hists browser (top, report) when -v is given, from NAmhyung Kim. - Fix 'perf top' -E option behavior, from Namhyung Kim. - Fix bug in isupper() and islower(), from Sukadev Bhattiprolu. - Fix compile errors in bp_signal 'perf test', from Sukadev Bhattiprolu. ... and more things" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (102 commits) perf/x86: Disable PEBS-LL in intel_pmu_pebs_disable() perf/x86: Fix shared register mutual exclusion enforcement perf/x86/intel: Support full width counting x86: Add NMI duration tracepoints perf: Drop sample rate when sampling is too slow x86: Warn when NMI handlers take large amounts of time hw_breakpoint: Introduce "struct bp_cpuinfo" hw_breakpoint: Simplify *register_wide_hw_breakpoint() hw_breakpoint: Introduce cpumask_of_bp() hw_breakpoint: Simplify the "weight" usage in toggle_bp_slot() paths hw_breakpoint: Simplify list/idx mess in toggle_bp_slot() paths perf/x86/intel: Add mem-loads/stores support for Haswell perf/x86/intel: Support Haswell/v4 LBR format perf/x86/intel: Move NMI clearing to end of PMI handler perf/x86/intel: Add Haswell PEBS support perf/x86/intel: Add simple Haswell PMU support perf/x86/intel: Add Haswell PEBS record support perf/x86/intel: Fix sparse warning perf/x86/amd: AMD IOMMU Performance Counter PERF uncore PMU implementation perf/x86/amd: Add IOMMU Performance Counter resource management ...
This commit is contained in:
commit
f0bb4c0ab0
71 changed files with 2949 additions and 1055 deletions
|
@ -165,10 +165,28 @@ int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free'
|
|||
/*
|
||||
* max perf event sample rate
|
||||
*/
|
||||
#define DEFAULT_MAX_SAMPLE_RATE 100000
|
||||
int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
|
||||
static int max_samples_per_tick __read_mostly =
|
||||
DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
|
||||
#define DEFAULT_MAX_SAMPLE_RATE 100000
|
||||
#define DEFAULT_SAMPLE_PERIOD_NS (NSEC_PER_SEC / DEFAULT_MAX_SAMPLE_RATE)
|
||||
#define DEFAULT_CPU_TIME_MAX_PERCENT 25
|
||||
|
||||
int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
|
||||
|
||||
static int max_samples_per_tick __read_mostly = DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
|
||||
static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS;
|
||||
|
||||
static atomic_t perf_sample_allowed_ns __read_mostly =
|
||||
ATOMIC_INIT( DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100);
|
||||
|
||||
void update_perf_cpu_limits(void)
|
||||
{
|
||||
u64 tmp = perf_sample_period_ns;
|
||||
|
||||
tmp *= sysctl_perf_cpu_time_max_percent;
|
||||
tmp = do_div(tmp, 100);
|
||||
atomic_set(&perf_sample_allowed_ns, tmp);
|
||||
}
|
||||
|
||||
static int perf_rotate_context(struct perf_cpu_context *cpuctx);
|
||||
|
||||
int perf_proc_update_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
|
@ -180,10 +198,78 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
|
|||
return ret;
|
||||
|
||||
max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
|
||||
perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
|
||||
update_perf_cpu_limits();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT;
|
||||
|
||||
int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int ret = proc_dointvec(table, write, buffer, lenp, ppos);
|
||||
|
||||
if (ret || !write)
|
||||
return ret;
|
||||
|
||||
update_perf_cpu_limits();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* perf samples are done in some very critical code paths (NMIs).
|
||||
* If they take too much CPU time, the system can lock up and not
|
||||
* get any real work done. This will drop the sample rate when
|
||||
* we detect that events are taking too long.
|
||||
*/
|
||||
#define NR_ACCUMULATED_SAMPLES 128
|
||||
DEFINE_PER_CPU(u64, running_sample_length);
|
||||
|
||||
void perf_sample_event_took(u64 sample_len_ns)
|
||||
{
|
||||
u64 avg_local_sample_len;
|
||||
u64 local_samples_len = __get_cpu_var(running_sample_length);
|
||||
|
||||
if (atomic_read(&perf_sample_allowed_ns) == 0)
|
||||
return;
|
||||
|
||||
/* decay the counter by 1 average sample */
|
||||
local_samples_len = __get_cpu_var(running_sample_length);
|
||||
local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES;
|
||||
local_samples_len += sample_len_ns;
|
||||
__get_cpu_var(running_sample_length) = local_samples_len;
|
||||
|
||||
/*
|
||||
* note: this will be biased artifically low until we have
|
||||
* seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us
|
||||
* from having to maintain a count.
|
||||
*/
|
||||
avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
|
||||
|
||||
if (avg_local_sample_len <= atomic_read(&perf_sample_allowed_ns))
|
||||
return;
|
||||
|
||||
if (max_samples_per_tick <= 1)
|
||||
return;
|
||||
|
||||
max_samples_per_tick = DIV_ROUND_UP(max_samples_per_tick, 2);
|
||||
sysctl_perf_event_sample_rate = max_samples_per_tick * HZ;
|
||||
perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
|
||||
|
||||
printk_ratelimited(KERN_WARNING
|
||||
"perf samples too long (%lld > %d), lowering "
|
||||
"kernel.perf_event_max_sample_rate to %d\n",
|
||||
avg_local_sample_len,
|
||||
atomic_read(&perf_sample_allowed_ns),
|
||||
sysctl_perf_event_sample_rate);
|
||||
|
||||
update_perf_cpu_limits();
|
||||
}
|
||||
|
||||
static atomic64_t perf_event_id;
|
||||
|
||||
static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
|
||||
|
@ -655,6 +741,106 @@ perf_cgroup_mark_enabled(struct perf_event *event,
|
|||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* set default to be dependent on timer tick just
|
||||
* like original code
|
||||
*/
|
||||
#define PERF_CPU_HRTIMER (1000 / HZ)
|
||||
/*
|
||||
* function must be called with interrupts disbled
|
||||
*/
|
||||
static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
enum hrtimer_restart ret = HRTIMER_NORESTART;
|
||||
int rotations = 0;
|
||||
|
||||
WARN_ON(!irqs_disabled());
|
||||
|
||||
cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
|
||||
|
||||
rotations = perf_rotate_context(cpuctx);
|
||||
|
||||
/*
|
||||
* arm timer if needed
|
||||
*/
|
||||
if (rotations) {
|
||||
hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
|
||||
ret = HRTIMER_RESTART;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* CPU is going down */
|
||||
void perf_cpu_hrtimer_cancel(int cpu)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct pmu *pmu;
|
||||
unsigned long flags;
|
||||
|
||||
if (WARN_ON(cpu != smp_processor_id()))
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
list_for_each_entry_rcu(pmu, &pmus, entry) {
|
||||
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
|
||||
|
||||
if (pmu->task_ctx_nr == perf_sw_context)
|
||||
continue;
|
||||
|
||||
hrtimer_cancel(&cpuctx->hrtimer);
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
|
||||
{
|
||||
struct hrtimer *hr = &cpuctx->hrtimer;
|
||||
struct pmu *pmu = cpuctx->ctx.pmu;
|
||||
int timer;
|
||||
|
||||
/* no multiplexing needed for SW PMU */
|
||||
if (pmu->task_ctx_nr == perf_sw_context)
|
||||
return;
|
||||
|
||||
/*
|
||||
* check default is sane, if not set then force to
|
||||
* default interval (1/tick)
|
||||
*/
|
||||
timer = pmu->hrtimer_interval_ms;
|
||||
if (timer < 1)
|
||||
timer = pmu->hrtimer_interval_ms = PERF_CPU_HRTIMER;
|
||||
|
||||
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
|
||||
|
||||
hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
|
||||
hr->function = perf_cpu_hrtimer_handler;
|
||||
}
|
||||
|
||||
static void perf_cpu_hrtimer_restart(struct perf_cpu_context *cpuctx)
|
||||
{
|
||||
struct hrtimer *hr = &cpuctx->hrtimer;
|
||||
struct pmu *pmu = cpuctx->ctx.pmu;
|
||||
|
||||
/* not for SW PMU */
|
||||
if (pmu->task_ctx_nr == perf_sw_context)
|
||||
return;
|
||||
|
||||
if (hrtimer_active(hr))
|
||||
return;
|
||||
|
||||
if (!hrtimer_callback_running(hr))
|
||||
__hrtimer_start_range_ns(hr, cpuctx->hrtimer_interval,
|
||||
0, HRTIMER_MODE_REL_PINNED, 0);
|
||||
}
|
||||
|
||||
void perf_pmu_disable(struct pmu *pmu)
|
||||
{
|
||||
int *count = this_cpu_ptr(pmu->pmu_disable_count);
|
||||
|
@ -1503,6 +1689,7 @@ group_sched_in(struct perf_event *group_event,
|
|||
|
||||
if (event_sched_in(group_event, cpuctx, ctx)) {
|
||||
pmu->cancel_txn(pmu);
|
||||
perf_cpu_hrtimer_restart(cpuctx);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
|
@ -1549,6 +1736,8 @@ group_error:
|
|||
|
||||
pmu->cancel_txn(pmu);
|
||||
|
||||
perf_cpu_hrtimer_restart(cpuctx);
|
||||
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
|
@ -1804,8 +1993,10 @@ static int __perf_event_enable(void *info)
|
|||
* If this event can't go on and it's part of a
|
||||
* group, then the whole group has to come off.
|
||||
*/
|
||||
if (leader != event)
|
||||
if (leader != event) {
|
||||
group_sched_out(leader, cpuctx, ctx);
|
||||
perf_cpu_hrtimer_restart(cpuctx);
|
||||
}
|
||||
if (leader->attr.pinned) {
|
||||
update_group_times(leader);
|
||||
leader->state = PERF_EVENT_STATE_ERROR;
|
||||
|
@ -2552,7 +2743,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
|
|||
* because they're strictly cpu affine and rotate_start is called with IRQs
|
||||
* disabled, while rotate_context is called from IRQ context.
|
||||
*/
|
||||
static void perf_rotate_context(struct perf_cpu_context *cpuctx)
|
||||
static int perf_rotate_context(struct perf_cpu_context *cpuctx)
|
||||
{
|
||||
struct perf_event_context *ctx = NULL;
|
||||
int rotate = 0, remove = 1;
|
||||
|
@ -2591,6 +2782,8 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
|
|||
done:
|
||||
if (remove)
|
||||
list_del_init(&cpuctx->rotation_list);
|
||||
|
||||
return rotate;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
|
@ -2622,10 +2815,6 @@ void perf_event_task_tick(void)
|
|||
ctx = cpuctx->task_ctx;
|
||||
if (ctx)
|
||||
perf_adjust_freq_unthr_context(ctx, throttled);
|
||||
|
||||
if (cpuctx->jiffies_interval == 1 ||
|
||||
!(jiffies % cpuctx->jiffies_interval))
|
||||
perf_rotate_context(cpuctx);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5036,7 +5225,7 @@ static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
|
|||
* sign as trigger.
|
||||
*/
|
||||
|
||||
static u64 perf_swevent_set_period(struct perf_event *event)
|
||||
u64 perf_swevent_set_period(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 period = hwc->last_period;
|
||||
|
@ -5979,9 +6168,56 @@ type_show(struct device *dev, struct device_attribute *attr, char *page)
|
|||
return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->type);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
perf_event_mux_interval_ms_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
|
||||
return snprintf(page, PAGE_SIZE-1, "%d\n", pmu->hrtimer_interval_ms);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
perf_event_mux_interval_ms_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(dev);
|
||||
int timer, cpu, ret;
|
||||
|
||||
ret = kstrtoint(buf, 0, &timer);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (timer < 1)
|
||||
return -EINVAL;
|
||||
|
||||
/* same value, noting to do */
|
||||
if (timer == pmu->hrtimer_interval_ms)
|
||||
return count;
|
||||
|
||||
pmu->hrtimer_interval_ms = timer;
|
||||
|
||||
/* update all cpuctx for this PMU */
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct perf_cpu_context *cpuctx;
|
||||
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
|
||||
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);
|
||||
|
||||
if (hrtimer_active(&cpuctx->hrtimer))
|
||||
hrtimer_forward_now(&cpuctx->hrtimer, cpuctx->hrtimer_interval);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
|
||||
|
||||
static struct device_attribute pmu_dev_attrs[] = {
|
||||
__ATTR_RO(type),
|
||||
__ATTR_NULL,
|
||||
__ATTR_RO(type),
|
||||
__ATTR_RW(perf_event_mux_interval_ms),
|
||||
__ATTR_NULL,
|
||||
};
|
||||
|
||||
static int pmu_bus_running;
|
||||
|
@ -6027,7 +6263,7 @@ free_dev:
|
|||
static struct lock_class_key cpuctx_mutex;
|
||||
static struct lock_class_key cpuctx_lock;
|
||||
|
||||
int perf_pmu_register(struct pmu *pmu, char *name, int type)
|
||||
int perf_pmu_register(struct pmu *pmu, const char *name, int type)
|
||||
{
|
||||
int cpu, ret;
|
||||
|
||||
|
@ -6076,7 +6312,9 @@ skip_type:
|
|||
lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
|
||||
cpuctx->ctx.type = cpu_context;
|
||||
cpuctx->ctx.pmu = pmu;
|
||||
cpuctx->jiffies_interval = 1;
|
||||
|
||||
__perf_cpu_hrtimer_init(cpuctx, cpu);
|
||||
|
||||
INIT_LIST_HEAD(&cpuctx->rotation_list);
|
||||
cpuctx->unique_pmu = pmu;
|
||||
}
|
||||
|
@ -6402,11 +6640,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
|
|||
if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL))
|
||||
return -EINVAL;
|
||||
|
||||
/* kernel level capture: check permissions */
|
||||
if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
|
||||
&& perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
/* propagate priv level, when not set for branch */
|
||||
if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) {
|
||||
|
||||
|
@ -6424,6 +6657,10 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
|
|||
*/
|
||||
attr->branch_sample_type = mask;
|
||||
}
|
||||
/* privileged levels capture (kernel, hv): check permissions */
|
||||
if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM)
|
||||
&& perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
|
||||
|
@ -7476,7 +7713,6 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
|
|||
case CPU_DOWN_PREPARE:
|
||||
perf_event_exit_cpu(cpu);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -46,23 +46,26 @@
|
|||
#include <linux/smp.h>
|
||||
|
||||
#include <linux/hw_breakpoint.h>
|
||||
|
||||
|
||||
/*
|
||||
* Constraints data
|
||||
*/
|
||||
struct bp_cpuinfo {
|
||||
/* Number of pinned cpu breakpoints in a cpu */
|
||||
unsigned int cpu_pinned;
|
||||
/* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
|
||||
unsigned int *tsk_pinned;
|
||||
/* Number of non-pinned cpu/task breakpoints in a cpu */
|
||||
unsigned int flexible; /* XXX: placeholder, see fetch_this_slot() */
|
||||
};
|
||||
|
||||
/* Number of pinned cpu breakpoints in a cpu */
|
||||
static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
|
||||
|
||||
/* Number of pinned task breakpoints in a cpu */
|
||||
static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]);
|
||||
|
||||
/* Number of non-pinned cpu/task breakpoints in a cpu */
|
||||
static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
|
||||
|
||||
static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
|
||||
static int nr_slots[TYPE_MAX];
|
||||
|
||||
static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
|
||||
{
|
||||
return per_cpu_ptr(bp_cpuinfo + type, cpu);
|
||||
}
|
||||
|
||||
/* Keep track of the breakpoints attached to tasks */
|
||||
static LIST_HEAD(bp_task_head);
|
||||
|
||||
|
@ -96,8 +99,8 @@ static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
|
|||
*/
|
||||
static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
|
||||
{
|
||||
unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
|
||||
int i;
|
||||
unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
|
||||
|
||||
for (i = nr_slots[type] - 1; i >= 0; i--) {
|
||||
if (tsk_pinned[i] > 0)
|
||||
|
@ -127,6 +130,13 @@ static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
|
|||
return count;
|
||||
}
|
||||
|
||||
static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
|
||||
{
|
||||
if (bp->cpu >= 0)
|
||||
return cpumask_of(bp->cpu);
|
||||
return cpu_possible_mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* Report the number of pinned/un-pinned breakpoints we have in
|
||||
* a given cpu (cpu > -1) or in all of them (cpu = -1).
|
||||
|
@ -135,25 +145,15 @@ static void
|
|||
fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
|
||||
enum bp_type_idx type)
|
||||
{
|
||||
int cpu = bp->cpu;
|
||||
struct task_struct *tsk = bp->hw.bp_target;
|
||||
const struct cpumask *cpumask = cpumask_of_bp(bp);
|
||||
int cpu;
|
||||
|
||||
if (cpu >= 0) {
|
||||
slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
|
||||
if (!tsk)
|
||||
slots->pinned += max_task_bp_pinned(cpu, type);
|
||||
else
|
||||
slots->pinned += task_bp_pinned(cpu, bp, type);
|
||||
slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
|
||||
for_each_cpu(cpu, cpumask) {
|
||||
struct bp_cpuinfo *info = get_bp_info(cpu, type);
|
||||
int nr;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
unsigned int nr;
|
||||
|
||||
nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
|
||||
if (!tsk)
|
||||
nr = info->cpu_pinned;
|
||||
if (!bp->hw.bp_target)
|
||||
nr += max_task_bp_pinned(cpu, type);
|
||||
else
|
||||
nr += task_bp_pinned(cpu, bp, type);
|
||||
|
@ -161,8 +161,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
|
|||
if (nr > slots->pinned)
|
||||
slots->pinned = nr;
|
||||
|
||||
nr = per_cpu(nr_bp_flexible[type], cpu);
|
||||
|
||||
nr = info->flexible;
|
||||
if (nr > slots->flexible)
|
||||
slots->flexible = nr;
|
||||
}
|
||||
|
@ -182,29 +181,19 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight)
|
|||
/*
|
||||
* Add a pinned breakpoint for the given task in our constraint table
|
||||
*/
|
||||
static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
|
||||
static void toggle_bp_task_slot(struct perf_event *bp, int cpu,
|
||||
enum bp_type_idx type, int weight)
|
||||
{
|
||||
unsigned int *tsk_pinned;
|
||||
int old_count = 0;
|
||||
int old_idx = 0;
|
||||
int idx = 0;
|
||||
unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
|
||||
int old_idx, new_idx;
|
||||
|
||||
old_count = task_bp_pinned(cpu, bp, type);
|
||||
old_idx = old_count - 1;
|
||||
idx = old_idx + weight;
|
||||
old_idx = task_bp_pinned(cpu, bp, type) - 1;
|
||||
new_idx = old_idx + weight;
|
||||
|
||||
/* tsk_pinned[n] is the number of tasks having n breakpoints */
|
||||
tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
|
||||
if (enable) {
|
||||
tsk_pinned[idx]++;
|
||||
if (old_count > 0)
|
||||
tsk_pinned[old_idx]--;
|
||||
} else {
|
||||
tsk_pinned[idx]--;
|
||||
if (old_count > 0)
|
||||
tsk_pinned[old_idx]++;
|
||||
}
|
||||
if (old_idx >= 0)
|
||||
tsk_pinned[old_idx]--;
|
||||
if (new_idx >= 0)
|
||||
tsk_pinned[new_idx]++;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -214,33 +203,26 @@ static void
|
|||
toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
|
||||
int weight)
|
||||
{
|
||||
int cpu = bp->cpu;
|
||||
struct task_struct *tsk = bp->hw.bp_target;
|
||||
const struct cpumask *cpumask = cpumask_of_bp(bp);
|
||||
int cpu;
|
||||
|
||||
if (!enable)
|
||||
weight = -weight;
|
||||
|
||||
/* Pinned counter cpu profiling */
|
||||
if (!tsk) {
|
||||
|
||||
if (enable)
|
||||
per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
|
||||
else
|
||||
per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
|
||||
if (!bp->hw.bp_target) {
|
||||
get_bp_info(bp->cpu, type)->cpu_pinned += weight;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Pinned counter task profiling */
|
||||
|
||||
if (!enable)
|
||||
list_del(&bp->hw.bp_list);
|
||||
|
||||
if (cpu >= 0) {
|
||||
toggle_bp_task_slot(bp, cpu, enable, type, weight);
|
||||
} else {
|
||||
for_each_possible_cpu(cpu)
|
||||
toggle_bp_task_slot(bp, cpu, enable, type, weight);
|
||||
}
|
||||
for_each_cpu(cpu, cpumask)
|
||||
toggle_bp_task_slot(bp, cpu, type, weight);
|
||||
|
||||
if (enable)
|
||||
list_add_tail(&bp->hw.bp_list, &bp_task_head);
|
||||
else
|
||||
list_del(&bp->hw.bp_list);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -261,8 +243,8 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
|
|||
*
|
||||
* - If attached to a single cpu, check:
|
||||
*
|
||||
* (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
|
||||
* + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM
|
||||
* (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
|
||||
* + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
|
||||
*
|
||||
* -> If there are already non-pinned counters in this cpu, it means
|
||||
* there is already a free slot for them.
|
||||
|
@ -272,8 +254,8 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
|
|||
*
|
||||
* - If attached to every cpus, check:
|
||||
*
|
||||
* (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
|
||||
* + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM
|
||||
* (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
|
||||
* + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
|
||||
*
|
||||
* -> This is roughly the same, except we check the number of per cpu
|
||||
* bp for every cpu and we keep the max one. Same for the per tasks
|
||||
|
@ -284,16 +266,16 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
|
|||
*
|
||||
* - If attached to a single cpu, check:
|
||||
*
|
||||
* ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
|
||||
* + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM
|
||||
* ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
|
||||
* + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
|
||||
*
|
||||
* -> Same checks as before. But now the nr_bp_flexible, if any, must keep
|
||||
* -> Same checks as before. But now the info->flexible, if any, must keep
|
||||
* one register at least (or they will never be fed).
|
||||
*
|
||||
* - If attached to every cpus, check:
|
||||
*
|
||||
* ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
|
||||
* + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
|
||||
* ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
|
||||
* + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
|
||||
*/
|
||||
static int __reserve_bp_slot(struct perf_event *bp)
|
||||
{
|
||||
|
@ -518,8 +500,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
|
|||
perf_overflow_handler_t triggered,
|
||||
void *context)
|
||||
{
|
||||
struct perf_event * __percpu *cpu_events, **pevent, *bp;
|
||||
long err;
|
||||
struct perf_event * __percpu *cpu_events, *bp;
|
||||
long err = 0;
|
||||
int cpu;
|
||||
|
||||
cpu_events = alloc_percpu(typeof(*cpu_events));
|
||||
|
@ -528,31 +510,21 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
|
|||
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu) {
|
||||
pevent = per_cpu_ptr(cpu_events, cpu);
|
||||
bp = perf_event_create_kernel_counter(attr, cpu, NULL,
|
||||
triggered, context);
|
||||
|
||||
*pevent = bp;
|
||||
|
||||
if (IS_ERR(bp)) {
|
||||
err = PTR_ERR(bp);
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
put_online_cpus();
|
||||
|
||||
return cpu_events;
|
||||
|
||||
fail:
|
||||
for_each_online_cpu(cpu) {
|
||||
pevent = per_cpu_ptr(cpu_events, cpu);
|
||||
if (IS_ERR(*pevent))
|
||||
break;
|
||||
unregister_hw_breakpoint(*pevent);
|
||||
}
|
||||
|
||||
per_cpu(*cpu_events, cpu) = bp;
|
||||
}
|
||||
put_online_cpus();
|
||||
|
||||
free_percpu(cpu_events);
|
||||
if (likely(!err))
|
||||
return cpu_events;
|
||||
|
||||
unregister_wide_hw_breakpoint(cpu_events);
|
||||
return (void __percpu __force *)ERR_PTR(err);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
|
||||
|
@ -564,12 +536,10 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
|
|||
void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
|
||||
{
|
||||
int cpu;
|
||||
struct perf_event **pevent;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
pevent = per_cpu_ptr(cpu_events, cpu);
|
||||
unregister_hw_breakpoint(*pevent);
|
||||
}
|
||||
for_each_possible_cpu(cpu)
|
||||
unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
|
||||
|
||||
free_percpu(cpu_events);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
|
||||
|
@ -612,6 +582,11 @@ static int hw_breakpoint_add(struct perf_event *bp, int flags)
|
|||
if (!(flags & PERF_EF_START))
|
||||
bp->hw.state = PERF_HES_STOPPED;
|
||||
|
||||
if (is_sampling_event(bp)) {
|
||||
bp->hw.last_period = bp->hw.sample_period;
|
||||
perf_swevent_set_period(bp);
|
||||
}
|
||||
|
||||
return arch_install_hw_breakpoint(bp);
|
||||
}
|
||||
|
||||
|
@ -650,7 +625,6 @@ static struct pmu perf_breakpoint = {
|
|||
|
||||
int __init init_hw_breakpoint(void)
|
||||
{
|
||||
unsigned int **task_bp_pinned;
|
||||
int cpu, err_cpu;
|
||||
int i;
|
||||
|
||||
|
@ -659,10 +633,11 @@ int __init init_hw_breakpoint(void)
|
|||
|
||||
for_each_possible_cpu(cpu) {
|
||||
for (i = 0; i < TYPE_MAX; i++) {
|
||||
task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu);
|
||||
*task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i],
|
||||
GFP_KERNEL);
|
||||
if (!*task_bp_pinned)
|
||||
struct bp_cpuinfo *info = get_bp_info(cpu, i);
|
||||
|
||||
info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
|
||||
GFP_KERNEL);
|
||||
if (!info->tsk_pinned)
|
||||
goto err_alloc;
|
||||
}
|
||||
}
|
||||
|
@ -676,7 +651,7 @@ int __init init_hw_breakpoint(void)
|
|||
err_alloc:
|
||||
for_each_possible_cpu(err_cpu) {
|
||||
for (i = 0; i < TYPE_MAX; i++)
|
||||
kfree(per_cpu(nr_task_bp_pinned[i], err_cpu));
|
||||
kfree(get_bp_info(err_cpu, i)->tsk_pinned);
|
||||
if (err_cpu == cpu)
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -120,7 +120,6 @@ extern int blk_iopoll_enabled;
|
|||
/* Constants used for minimum and maximum */
|
||||
#ifdef CONFIG_LOCKUP_DETECTOR
|
||||
static int sixty = 60;
|
||||
static int neg_one = -1;
|
||||
#endif
|
||||
|
||||
static int zero;
|
||||
|
@ -814,7 +813,7 @@ static struct ctl_table kern_table[] = {
|
|||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dowatchdog,
|
||||
.extra1 = &neg_one,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &sixty,
|
||||
},
|
||||
{
|
||||
|
@ -1044,6 +1043,15 @@ static struct ctl_table kern_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = perf_proc_update_handler,
|
||||
},
|
||||
{
|
||||
.procname = "perf_cpu_time_max_percent",
|
||||
.data = &sysctl_perf_cpu_time_max_percent,
|
||||
.maxlen = sizeof(sysctl_perf_cpu_time_max_percent),
|
||||
.mode = 0644,
|
||||
.proc_handler = perf_cpu_time_max_percent_handler,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one_hundred,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_KMEMCHECK
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue