mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-16 03:28:40 +00:00
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "Six kernel side fixes: three related to NMI handling on AMD systems, a race fix, a kexec initialization fix and a PEBS sampling fix" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/core: Fix perf_event_disable_inatomic() race x86/perf/amd: Remove need to check "running" bit in NMI handler x86/perf/amd: Resolve NMI latency issues for active PMCs x86/perf/amd: Resolve race condition when disabling PMC perf/x86/intel: Initialize TFA MSR perf/x86/intel: Fix handling of wakeup_events for multi-entry PEBS
This commit is contained in:
commit
73fdb2c908
5 changed files with 190 additions and 27 deletions
|
@ -3,10 +3,14 @@
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
|
#include <linux/delay.h>
|
||||||
#include <asm/apicdef.h>
|
#include <asm/apicdef.h>
|
||||||
|
#include <asm/nmi.h>
|
||||||
|
|
||||||
#include "../perf_event.h"
|
#include "../perf_event.h"
|
||||||
|
|
||||||
|
static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
|
||||||
|
|
||||||
static __initconst const u64 amd_hw_cache_event_ids
|
static __initconst const u64 amd_hw_cache_event_ids
|
||||||
[PERF_COUNT_HW_CACHE_MAX]
|
[PERF_COUNT_HW_CACHE_MAX]
|
||||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||||
|
@ -429,6 +433,132 @@ static void amd_pmu_cpu_dead(int cpu)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When a PMC counter overflows, an NMI is used to process the event and
|
||||||
|
* reset the counter. NMI latency can result in the counter being updated
|
||||||
|
* before the NMI can run, which can result in what appear to be spurious
|
||||||
|
* NMIs. This function is intended to wait for the NMI to run and reset
|
||||||
|
* the counter to avoid possible unhandled NMI messages.
|
||||||
|
*/
|
||||||
|
#define OVERFLOW_WAIT_COUNT 50
|
||||||
|
|
||||||
|
static void amd_pmu_wait_on_overflow(int idx)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
u64 counter;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wait for the counter to be reset if it has overflowed. This loop
|
||||||
|
* should exit very, very quickly, but just in case, don't wait
|
||||||
|
* forever...
|
||||||
|
*/
|
||||||
|
for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
|
||||||
|
rdmsrl(x86_pmu_event_addr(idx), counter);
|
||||||
|
if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Might be in IRQ context, so can't sleep */
|
||||||
|
udelay(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void amd_pmu_disable_all(void)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
x86_pmu_disable_all();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This shouldn't be called from NMI context, but add a safeguard here
|
||||||
|
* to return, since if we're in NMI context we can't wait for an NMI
|
||||||
|
* to reset an overflowed counter value.
|
||||||
|
*/
|
||||||
|
if (in_nmi())
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check each counter for overflow and wait for it to be reset by the
|
||||||
|
* NMI if it has overflowed. This relies on the fact that all active
|
||||||
|
* counters are always enabled when this function is caled and
|
||||||
|
* ARCH_PERFMON_EVENTSEL_INT is always set.
|
||||||
|
*/
|
||||||
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||||
|
if (!test_bit(idx, cpuc->active_mask))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
amd_pmu_wait_on_overflow(idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void amd_pmu_disable_event(struct perf_event *event)
|
||||||
|
{
|
||||||
|
x86_pmu_disable_event(event);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This can be called from NMI context (via x86_pmu_stop). The counter
|
||||||
|
* may have overflowed, but either way, we'll never see it get reset
|
||||||
|
* by the NMI if we're already in the NMI. And the NMI latency support
|
||||||
|
* below will take care of any pending NMI that might have been
|
||||||
|
* generated by the overflow.
|
||||||
|
*/
|
||||||
|
if (in_nmi())
|
||||||
|
return;
|
||||||
|
|
||||||
|
amd_pmu_wait_on_overflow(event->hw.idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Because of NMI latency, if multiple PMC counters are active or other sources
|
||||||
|
* of NMIs are received, the perf NMI handler can handle one or more overflowed
|
||||||
|
* PMC counters outside of the NMI associated with the PMC overflow. If the NMI
|
||||||
|
* doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
|
||||||
|
* back-to-back NMI support won't be active. This PMC handler needs to take into
|
||||||
|
* account that this can occur, otherwise this could result in unknown NMI
|
||||||
|
* messages being issued. Examples of this is PMC overflow while in the NMI
|
||||||
|
* handler when multiple PMCs are active or PMC overflow while handling some
|
||||||
|
* other source of an NMI.
|
||||||
|
*
|
||||||
|
* Attempt to mitigate this by using the number of active PMCs to determine
|
||||||
|
* whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
|
||||||
|
* any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
|
||||||
|
* number of active PMCs or 2. The value of 2 is used in case an NMI does not
|
||||||
|
* arrive at the LAPIC in time to be collapsed into an already pending NMI.
|
||||||
|
*/
|
||||||
|
static int amd_pmu_handle_irq(struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
int active, handled;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Obtain the active count before calling x86_pmu_handle_irq() since
|
||||||
|
* it is possible that x86_pmu_handle_irq() may make a counter
|
||||||
|
* inactive (through x86_pmu_stop).
|
||||||
|
*/
|
||||||
|
active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX);
|
||||||
|
|
||||||
|
/* Process any counter overflows */
|
||||||
|
handled = x86_pmu_handle_irq(regs);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If a counter was handled, record the number of possible remaining
|
||||||
|
* NMIs that can occur.
|
||||||
|
*/
|
||||||
|
if (handled) {
|
||||||
|
this_cpu_write(perf_nmi_counter,
|
||||||
|
min_t(unsigned int, 2, active));
|
||||||
|
|
||||||
|
return handled;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this_cpu_read(perf_nmi_counter))
|
||||||
|
return NMI_DONE;
|
||||||
|
|
||||||
|
this_cpu_dec(perf_nmi_counter);
|
||||||
|
|
||||||
|
return NMI_HANDLED;
|
||||||
|
}
|
||||||
|
|
||||||
static struct event_constraint *
|
static struct event_constraint *
|
||||||
amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||||
struct perf_event *event)
|
struct perf_event *event)
|
||||||
|
@ -621,11 +751,11 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config)
|
||||||
|
|
||||||
static __initconst const struct x86_pmu amd_pmu = {
|
static __initconst const struct x86_pmu amd_pmu = {
|
||||||
.name = "AMD",
|
.name = "AMD",
|
||||||
.handle_irq = x86_pmu_handle_irq,
|
.handle_irq = amd_pmu_handle_irq,
|
||||||
.disable_all = x86_pmu_disable_all,
|
.disable_all = amd_pmu_disable_all,
|
||||||
.enable_all = x86_pmu_enable_all,
|
.enable_all = x86_pmu_enable_all,
|
||||||
.enable = x86_pmu_enable_event,
|
.enable = x86_pmu_enable_event,
|
||||||
.disable = x86_pmu_disable_event,
|
.disable = amd_pmu_disable_event,
|
||||||
.hw_config = amd_pmu_hw_config,
|
.hw_config = amd_pmu_hw_config,
|
||||||
.schedule_events = x86_schedule_events,
|
.schedule_events = x86_schedule_events,
|
||||||
.eventsel = MSR_K7_EVNTSEL0,
|
.eventsel = MSR_K7_EVNTSEL0,
|
||||||
|
@ -732,7 +862,7 @@ void amd_pmu_enable_virt(void)
|
||||||
cpuc->perf_ctr_virt_mask = 0;
|
cpuc->perf_ctr_virt_mask = 0;
|
||||||
|
|
||||||
/* Reload all events */
|
/* Reload all events */
|
||||||
x86_pmu_disable_all();
|
amd_pmu_disable_all();
|
||||||
x86_pmu_enable_all(0);
|
x86_pmu_enable_all(0);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
|
EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
|
||||||
|
@ -750,7 +880,7 @@ void amd_pmu_disable_virt(void)
|
||||||
cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
|
cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
|
||||||
|
|
||||||
/* Reload all events */
|
/* Reload all events */
|
||||||
x86_pmu_disable_all();
|
amd_pmu_disable_all();
|
||||||
x86_pmu_enable_all(0);
|
x86_pmu_enable_all(0);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
|
EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
|
||||||
|
|
|
@ -1349,8 +1349,9 @@ void x86_pmu_stop(struct perf_event *event, int flags)
|
||||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
struct hw_perf_event *hwc = &event->hw;
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
|
||||||
if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
|
if (test_bit(hwc->idx, cpuc->active_mask)) {
|
||||||
x86_pmu.disable(event);
|
x86_pmu.disable(event);
|
||||||
|
__clear_bit(hwc->idx, cpuc->active_mask);
|
||||||
cpuc->events[hwc->idx] = NULL;
|
cpuc->events[hwc->idx] = NULL;
|
||||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||||
hwc->state |= PERF_HES_STOPPED;
|
hwc->state |= PERF_HES_STOPPED;
|
||||||
|
@ -1447,16 +1448,8 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
|
||||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||||
|
|
||||||
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
|
||||||
if (!test_bit(idx, cpuc->active_mask)) {
|
if (!test_bit(idx, cpuc->active_mask))
|
||||||
/*
|
|
||||||
* Though we deactivated the counter some cpus
|
|
||||||
* might still deliver spurious interrupts still
|
|
||||||
* in flight. Catch them:
|
|
||||||
*/
|
|
||||||
if (__test_and_clear_bit(idx, cpuc->running))
|
|
||||||
handled++;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
event = cpuc->events[idx];
|
event = cpuc->events[idx];
|
||||||
|
|
||||||
|
|
|
@ -3185,7 +3185,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
if (event->attr.precise_ip) {
|
if (event->attr.precise_ip) {
|
||||||
if (!event->attr.freq) {
|
if (!(event->attr.freq || event->attr.wakeup_events)) {
|
||||||
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
|
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
|
||||||
if (!(event->attr.sample_type &
|
if (!(event->attr.sample_type &
|
||||||
~intel_pmu_large_pebs_flags(event)))
|
~intel_pmu_large_pebs_flags(event)))
|
||||||
|
@ -3575,6 +3575,12 @@ static void intel_pmu_cpu_starting(int cpu)
|
||||||
|
|
||||||
cpuc->lbr_sel = NULL;
|
cpuc->lbr_sel = NULL;
|
||||||
|
|
||||||
|
if (x86_pmu.flags & PMU_FL_TFA) {
|
||||||
|
WARN_ON_ONCE(cpuc->tfa_shadow);
|
||||||
|
cpuc->tfa_shadow = ~0ULL;
|
||||||
|
intel_set_tfa(cpuc, false);
|
||||||
|
}
|
||||||
|
|
||||||
if (x86_pmu.version > 1)
|
if (x86_pmu.version > 1)
|
||||||
flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
|
flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
|
||||||
|
|
||||||
|
|
|
@ -2009,8 +2009,8 @@ event_sched_out(struct perf_event *event,
|
||||||
event->pmu->del(event, 0);
|
event->pmu->del(event, 0);
|
||||||
event->oncpu = -1;
|
event->oncpu = -1;
|
||||||
|
|
||||||
if (event->pending_disable) {
|
if (READ_ONCE(event->pending_disable) >= 0) {
|
||||||
event->pending_disable = 0;
|
WRITE_ONCE(event->pending_disable, -1);
|
||||||
state = PERF_EVENT_STATE_OFF;
|
state = PERF_EVENT_STATE_OFF;
|
||||||
}
|
}
|
||||||
perf_event_set_state(event, state);
|
perf_event_set_state(event, state);
|
||||||
|
@ -2198,7 +2198,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
|
||||||
|
|
||||||
void perf_event_disable_inatomic(struct perf_event *event)
|
void perf_event_disable_inatomic(struct perf_event *event)
|
||||||
{
|
{
|
||||||
event->pending_disable = 1;
|
WRITE_ONCE(event->pending_disable, smp_processor_id());
|
||||||
|
/* can fail, see perf_pending_event_disable() */
|
||||||
irq_work_queue(&event->pending);
|
irq_work_queue(&event->pending);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5810,10 +5811,45 @@ void perf_event_wakeup(struct perf_event *event)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void perf_pending_event_disable(struct perf_event *event)
|
||||||
|
{
|
||||||
|
int cpu = READ_ONCE(event->pending_disable);
|
||||||
|
|
||||||
|
if (cpu < 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (cpu == smp_processor_id()) {
|
||||||
|
WRITE_ONCE(event->pending_disable, -1);
|
||||||
|
perf_event_disable_local(event);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* CPU-A CPU-B
|
||||||
|
*
|
||||||
|
* perf_event_disable_inatomic()
|
||||||
|
* @pending_disable = CPU-A;
|
||||||
|
* irq_work_queue();
|
||||||
|
*
|
||||||
|
* sched-out
|
||||||
|
* @pending_disable = -1;
|
||||||
|
*
|
||||||
|
* sched-in
|
||||||
|
* perf_event_disable_inatomic()
|
||||||
|
* @pending_disable = CPU-B;
|
||||||
|
* irq_work_queue(); // FAILS
|
||||||
|
*
|
||||||
|
* irq_work_run()
|
||||||
|
* perf_pending_event()
|
||||||
|
*
|
||||||
|
* But the event runs on CPU-B and wants disabling there.
|
||||||
|
*/
|
||||||
|
irq_work_queue_on(&event->pending, cpu);
|
||||||
|
}
|
||||||
|
|
||||||
static void perf_pending_event(struct irq_work *entry)
|
static void perf_pending_event(struct irq_work *entry)
|
||||||
{
|
{
|
||||||
struct perf_event *event = container_of(entry,
|
struct perf_event *event = container_of(entry, struct perf_event, pending);
|
||||||
struct perf_event, pending);
|
|
||||||
int rctx;
|
int rctx;
|
||||||
|
|
||||||
rctx = perf_swevent_get_recursion_context();
|
rctx = perf_swevent_get_recursion_context();
|
||||||
|
@ -5822,10 +5858,7 @@ static void perf_pending_event(struct irq_work *entry)
|
||||||
* and we won't recurse 'further'.
|
* and we won't recurse 'further'.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (event->pending_disable) {
|
perf_pending_event_disable(event);
|
||||||
event->pending_disable = 0;
|
|
||||||
perf_event_disable_local(event);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (event->pending_wakeup) {
|
if (event->pending_wakeup) {
|
||||||
event->pending_wakeup = 0;
|
event->pending_wakeup = 0;
|
||||||
|
@ -10236,6 +10269,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
||||||
|
|
||||||
|
|
||||||
init_waitqueue_head(&event->waitq);
|
init_waitqueue_head(&event->waitq);
|
||||||
|
event->pending_disable = -1;
|
||||||
init_irq_work(&event->pending, perf_pending_event);
|
init_irq_work(&event->pending, perf_pending_event);
|
||||||
|
|
||||||
mutex_init(&event->mmap_mutex);
|
mutex_init(&event->mmap_mutex);
|
||||||
|
|
|
@ -392,7 +392,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
|
||||||
* store that will be enabled on successful return
|
* store that will be enabled on successful return
|
||||||
*/
|
*/
|
||||||
if (!handle->size) { /* A, matches D */
|
if (!handle->size) { /* A, matches D */
|
||||||
event->pending_disable = 1;
|
event->pending_disable = smp_processor_id();
|
||||||
perf_output_wakeup(handle);
|
perf_output_wakeup(handle);
|
||||||
local_set(&rb->aux_nest, 0);
|
local_set(&rb->aux_nest, 0);
|
||||||
goto err_put;
|
goto err_put;
|
||||||
|
@ -480,7 +480,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
|
||||||
|
|
||||||
if (wakeup) {
|
if (wakeup) {
|
||||||
if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
|
if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
|
||||||
handle->event->pending_disable = 1;
|
handle->event->pending_disable = smp_processor_id();
|
||||||
perf_output_wakeup(handle);
|
perf_output_wakeup(handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue