Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull irq core changes from Ingo Molnar:
 "The biggest changes are the IRQ-work and printk changes from Frederic
  Weisbecker, which prepare the code for 'full dynticks' (the ability to
  stop or slow down the periodic tick arbitrarily, not just in idle time
  as today):

   - Don't stop tick with irq works pending.  This fix is generally
     useful and concerns archs that can't raise self IPIs.

   - Flush irq works before CPU offlining.

   - Introduce "lazy" irq works that can wait for the next tick to be
     executed, unless it's stopped.

   - Implement klogd wake up using irq work.  This removes the ad-hoc
     printk_tick()/printk_needs_cpu() hooks and make it working even in
     dynticks mode.

   - Cleanups and fixes."

* 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  genirq: Export enable/disable_percpu_irq()
  arch Kconfig: Remove references to IRQ_PER_CPU
  irq_work: Remove return value from the irq_work_queue() function
  genirq: Avoid deadlock in spurious handling
  printk: Wake up klogd using irq_work
  irq_work: Make self-IPIs optable
  irq_work: Warn if there's still work on cpu_down
  irq_work: Flush work on CPU_DYING
  irq_work: Don't stop the tick with pending works
  nohz: Add API to check tick state
  irq_work: Remove CONFIG_HAVE_IRQ_WORK
  irq_work: Fix racy check on work pending flag
  irq_work: Fix racy IRQ_WORK_BUSY flag setting
This commit is contained in:
Linus Torvalds 2013-02-19 17:47:58 -08:00
commit b7133a9a10
25 changed files with 177 additions and 108 deletions

View file

@ -1524,6 +1524,7 @@ void enable_percpu_irq(unsigned int irq, unsigned int type)
out:
irq_put_desc_unlock(desc, flags);
}
EXPORT_SYMBOL_GPL(enable_percpu_irq);
void disable_percpu_irq(unsigned int irq)
{
@ -1537,6 +1538,7 @@ void disable_percpu_irq(unsigned int irq)
irq_percpu_disable(desc, cpu);
irq_put_desc_unlock(desc, flags);
}
EXPORT_SYMBOL_GPL(disable_percpu_irq);
/*
* Internal function to unregister a percpu irqaction.

View file

@ -80,13 +80,11 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
/*
* All handlers must agree on IRQF_SHARED, so we test just the
* first. Check for action->next as well.
* first.
*/
action = desc->action;
if (!action || !(action->flags & IRQF_SHARED) ||
(action->flags & __IRQF_TIMER) ||
(action->handler(irq, action->dev_id) == IRQ_HANDLED) ||
!action->next)
(action->flags & __IRQF_TIMER))
goto out;
/* Already running on another processor */
@ -104,6 +102,7 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
do {
if (handle_irq_event(desc) == IRQ_HANDLED)
ret = IRQ_HANDLED;
/* Make sure that there is still a valid action */
action = desc->action;
} while ((desc->istate & IRQS_PENDING) && action);
desc->istate &= ~IRQS_POLL_INPROGRESS;

View file

@ -12,37 +12,36 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
#include <linux/sched.h>
#include <linux/tick.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <asm/processor.h>
/*
* An entry can be in one of four states:
*
* free NULL, 0 -> {claimed} : free to be used
* claimed NULL, 3 -> {pending} : claimed to be enqueued
* pending next, 3 -> {busy} : queued, pending callback
* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
*/
#define IRQ_WORK_PENDING 1UL
#define IRQ_WORK_BUSY 2UL
#define IRQ_WORK_FLAGS 3UL
static DEFINE_PER_CPU(struct llist_head, irq_work_list);
static DEFINE_PER_CPU(int, irq_work_raised);
/*
* Claim the entry so that no one else will poke at it.
*/
static bool irq_work_claim(struct irq_work *work)
{
unsigned long flags, nflags;
unsigned long flags, oflags, nflags;
/*
* Start with our best wish as a premise but only trust any
* flag value after cmpxchg() result.
*/
flags = work->flags & ~IRQ_WORK_PENDING;
for (;;) {
flags = work->flags;
if (flags & IRQ_WORK_PENDING)
return false;
nflags = flags | IRQ_WORK_FLAGS;
if (cmpxchg(&work->flags, flags, nflags) == flags)
oflags = cmpxchg(&work->flags, flags, nflags);
if (oflags == flags)
break;
if (oflags & IRQ_WORK_PENDING)
return false;
flags = oflags;
cpu_relax();
}
@ -57,57 +56,69 @@ void __weak arch_irq_work_raise(void)
}
/*
* Queue the entry and raise the IPI if needed.
*/
static void __irq_work_queue(struct irq_work *work)
{
bool empty;
preempt_disable();
empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
/* The list was empty, raise self-interrupt to start processing. */
if (empty)
arch_irq_work_raise();
preempt_enable();
}
/*
* Enqueue the irq_work @entry, returns true on success, failure when the
* @entry was already enqueued by someone else.
* Enqueue the irq_work @entry unless it's already pending
* somewhere.
*
* Can be re-enqueued while the callback is still in progress.
*/
bool irq_work_queue(struct irq_work *work)
void irq_work_queue(struct irq_work *work)
{
if (!irq_work_claim(work)) {
/*
* Already enqueued, can't do!
*/
return false;
/* Only queue if not already pending */
if (!irq_work_claim(work))
return;
/* Queue the entry and raise the IPI if needed. */
preempt_disable();
llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
/*
* If the work is not "lazy" or the tick is stopped, raise the irq
* work interrupt (if supported by the arch), otherwise, just wait
* for the next tick.
*/
if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
arch_irq_work_raise();
}
__irq_work_queue(work);
return true;
preempt_enable();
}
EXPORT_SYMBOL_GPL(irq_work_queue);
/*
* Run the irq_work entries on this cpu. Requires to be ran from hardirq
* context with local IRQs disabled.
*/
void irq_work_run(void)
bool irq_work_needs_cpu(void)
{
struct llist_head *this_list;
this_list = &__get_cpu_var(irq_work_list);
if (llist_empty(this_list))
return false;
/* All work should have been flushed before going offline */
WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
return true;
}
static void __irq_work_run(void)
{
unsigned long flags;
struct irq_work *work;
struct llist_head *this_list;
struct llist_node *llnode;
/*
* Reset the "raised" state right before we check the list because
* an NMI may enqueue after we find the list empty from the runner.
*/
__this_cpu_write(irq_work_raised, 0);
barrier();
this_list = &__get_cpu_var(irq_work_list);
if (llist_empty(this_list))
return;
BUG_ON(!in_irq());
BUG_ON(!irqs_disabled());
llnode = llist_del_all(this_list);
@ -119,16 +130,31 @@ void irq_work_run(void)
/*
* Clear the PENDING bit, after this point the @work
* can be re-used.
* Make it immediately visible so that other CPUs trying
* to claim that work don't rely on us to handle their data
* while we are in the middle of the func.
*/
work->flags = IRQ_WORK_BUSY;
flags = work->flags & ~IRQ_WORK_PENDING;
xchg(&work->flags, flags);
work->func(work);
/*
* Clear the BUSY bit and return to the free state if
* no-one else claimed it meanwhile.
*/
(void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0);
(void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
}
}
/*
* Run the irq_work entries on this cpu. Requires to be ran from hardirq
* context with local IRQs disabled.
*/
void irq_work_run(void)
{
BUG_ON(!in_irq());
__irq_work_run();
}
EXPORT_SYMBOL_GPL(irq_work_run);
/*
@ -143,3 +169,35 @@ void irq_work_sync(struct irq_work *work)
cpu_relax();
}
EXPORT_SYMBOL_GPL(irq_work_sync);
#ifdef CONFIG_HOTPLUG_CPU
static int irq_work_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
switch (action) {
case CPU_DYING:
/* Called from stop_machine */
if (WARN_ON_ONCE(cpu != smp_processor_id()))
break;
__irq_work_run();
break;
default:
break;
}
return NOTIFY_OK;
}
static struct notifier_block cpu_notify;
static __init int irq_work_init_cpu_notifier(void)
{
cpu_notify.notifier_call = irq_work_cpu_notify;
cpu_notify.priority = 0;
register_cpu_notifier(&cpu_notify);
return 0;
}
device_initcall(irq_work_init_cpu_notifier);
#endif /* CONFIG_HOTPLUG_CPU */

View file

@ -42,6 +42,7 @@
#include <linux/notifier.h>
#include <linux/rculist.h>
#include <linux/poll.h>
#include <linux/irq_work.h>
#include <asm/uaccess.h>
@ -1959,30 +1960,32 @@ int is_console_locked(void)
static DEFINE_PER_CPU(int, printk_pending);
static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
void printk_tick(void)
static void wake_up_klogd_work_func(struct irq_work *irq_work)
{
if (__this_cpu_read(printk_pending)) {
int pending = __this_cpu_xchg(printk_pending, 0);
if (pending & PRINTK_PENDING_SCHED) {
char *buf = __get_cpu_var(printk_sched_buf);
printk(KERN_WARNING "[sched_delayed] %s", buf);
}
if (pending & PRINTK_PENDING_WAKEUP)
wake_up_interruptible(&log_wait);
int pending = __this_cpu_xchg(printk_pending, 0);
if (pending & PRINTK_PENDING_SCHED) {
char *buf = __get_cpu_var(printk_sched_buf);
printk(KERN_WARNING "[sched_delayed] %s", buf);
}
if (pending & PRINTK_PENDING_WAKEUP)
wake_up_interruptible(&log_wait);
}
int printk_needs_cpu(int cpu)
{
if (cpu_is_offline(cpu))
printk_tick();
return __this_cpu_read(printk_pending);
}
static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
.func = wake_up_klogd_work_func,
.flags = IRQ_WORK_LAZY,
};
void wake_up_klogd(void)
{
if (waitqueue_active(&log_wait))
preempt_disable();
if (waitqueue_active(&log_wait)) {
this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
}
preempt_enable();
}
static void console_cont_flush(char *text, size_t size)
@ -2462,6 +2465,7 @@ int printk_sched(const char *fmt, ...)
va_end(args);
__this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
local_irq_restore(flags);
return r;

View file

@ -20,6 +20,7 @@
#include <linux/profile.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/irq_work.h>
#include <asm/irq_regs.h>
@ -28,7 +29,7 @@
/*
* Per cpu nohz control structure
*/
static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
/*
* The time, when the last jiffy update happened. Protected by jiffies_lock.
@ -331,8 +332,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
time_delta = timekeeping_max_deferment();
} while (read_seqretry(&jiffies_lock, seq));
if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
arch_needs_cpu(cpu)) {
if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
next_jiffies = last_jiffies + 1;
delta_jiffies = 1;
} else {

View file

@ -1351,7 +1351,6 @@ void update_process_times(int user_tick)
account_process_tick(p, user_tick);
run_local_timers();
rcu_check_callbacks(cpu, user_tick);
printk_tick();
#ifdef CONFIG_IRQ_WORK
if (in_irq())
irq_work_run();