Merge branch 'rcu-tasks.2014.09.10a' into HEAD

rcu-tasks.2014.09.10a: Add RCU-tasks flavor of RCU.
This commit is contained in:
Paul E. McKenney 2014-09-16 10:10:44 -07:00
commit 96b4672703
24 changed files with 613 additions and 93 deletions

View file

@ -612,6 +612,52 @@ static struct rcu_torture_ops sched_ops = {
.name = "sched"
};
#ifdef CONFIG_TASKS_RCU
/*
* Definitions for RCU-tasks torture testing.
*/
static int tasks_torture_read_lock(void)
{
return 0;
}
static void tasks_torture_read_unlock(int idx)
{
}
static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
{
call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb);
}
static struct rcu_torture_ops tasks_ops = {
.ttype = RCU_TASKS_FLAVOR,
.init = rcu_sync_torture_init,
.readlock = tasks_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = tasks_torture_read_unlock,
.completed = rcu_no_completed,
.deferred_free = rcu_tasks_torture_deferred_free,
.sync = synchronize_rcu_tasks,
.exp_sync = synchronize_rcu_tasks,
.call = call_rcu_tasks,
.cb_barrier = rcu_barrier_tasks,
.fqs = NULL,
.stats = NULL,
.irq_capable = 1,
.name = "tasks"
};
#define RCUTORTURE_TASKS_OPS &tasks_ops,
#else /* #ifdef CONFIG_TASKS_RCU */
#define RCUTORTURE_TASKS_OPS
#endif /* #else #ifdef CONFIG_TASKS_RCU */
/*
* RCU torture priority-boost testing. Runs one real-time thread per
* CPU for moderate bursts, repeatedly registering RCU callbacks and
@ -678,7 +724,7 @@ static int rcu_torture_boost(void *arg)
}
call_rcu_time = jiffies;
}
cond_resched();
cond_resched_rcu_qs();
stutter_wait("rcu_torture_boost");
if (torture_must_stop())
goto checkwait;
@ -1082,7 +1128,7 @@ rcu_torture_reader(void *arg)
__this_cpu_inc(rcu_torture_batch[completed]);
preempt_enable();
cur_ops->readunlock(idx);
cond_resched();
cond_resched_rcu_qs();
stutter_wait("rcu_torture_reader");
} while (!torture_must_stop());
if (irqreader && cur_ops->irq_capable) {
@ -1344,7 +1390,8 @@ static int rcu_torture_barrier_cbs(void *arg)
if (atomic_dec_and_test(&barrier_cbs_count))
wake_up(&barrier_wq);
} while (!torture_must_stop());
cur_ops->cb_barrier();
if (cur_ops->cb_barrier != NULL)
cur_ops->cb_barrier();
destroy_rcu_head_on_stack(&rcu);
torture_kthread_stopping("rcu_torture_barrier_cbs");
return 0;
@ -1585,6 +1632,7 @@ rcu_torture_init(void)
int firsterr = 0;
static struct rcu_torture_ops *torture_ops[] = {
&rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
RCUTORTURE_TASKS_OPS
};
if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))

View file

@ -72,7 +72,7 @@ static void rcu_idle_enter_common(long long newval)
current->pid, current->comm,
idle->pid, idle->comm); /* must be idle task! */
}
rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
rcu_sched_qs(); /* implies rcu_bh_inc() */
barrier();
rcu_dynticks_nesting = newval;
}
@ -217,7 +217,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
* are at it, given that any rcu quiescent state is also an rcu_bh
* quiescent state. Use "+" instead of "||" to defeat short circuiting.
*/
void rcu_sched_qs(int cpu)
void rcu_sched_qs(void)
{
unsigned long flags;
@ -231,7 +231,7 @@ void rcu_sched_qs(int cpu)
/*
* Record an rcu_bh quiescent state.
*/
void rcu_bh_qs(int cpu)
void rcu_bh_qs(void)
{
unsigned long flags;
@ -251,9 +251,11 @@ void rcu_check_callbacks(int cpu, int user)
{
RCU_TRACE(check_cpu_stalls());
if (user || rcu_is_cpu_rrupt_from_idle())
rcu_sched_qs(cpu);
rcu_sched_qs();
else if (!in_softirq())
rcu_bh_qs(cpu);
rcu_bh_qs();
if (user)
rcu_note_voluntary_context_switch(current);
}
/*

View file

@ -197,22 +197,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
* one since the start of the grace period, this just sets a flag.
* The caller must have disabled preemption.
*/
void rcu_sched_qs(int cpu)
void rcu_sched_qs(void)
{
struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
if (rdp->passed_quiesce == 0)
trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
rdp->passed_quiesce = 1;
if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) {
trace_rcu_grace_period(TPS("rcu_sched"),
__this_cpu_read(rcu_sched_data.gpnum),
TPS("cpuqs"));
__this_cpu_write(rcu_sched_data.passed_quiesce, 1);
}
}
void rcu_bh_qs(int cpu)
void rcu_bh_qs(void)
{
struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
if (rdp->passed_quiesce == 0)
trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
rdp->passed_quiesce = 1;
if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
trace_rcu_grace_period(TPS("rcu_bh"),
__this_cpu_read(rcu_bh_data.gpnum),
TPS("cpuqs"));
__this_cpu_write(rcu_bh_data.passed_quiesce, 1);
}
}
static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
@ -287,7 +289,7 @@ static void rcu_momentary_dyntick_idle(void)
void rcu_note_context_switch(int cpu)
{
trace_rcu_utilization(TPS("Start context switch"));
rcu_sched_qs(cpu);
rcu_sched_qs();
rcu_preempt_note_context_switch(cpu);
if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
rcu_momentary_dyntick_idle();
@ -535,6 +537,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
atomic_inc(&rdtp->dynticks);
smp_mb__after_atomic(); /* Force ordering with next sojourn. */
WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
rcu_dynticks_task_enter();
/*
* It is illegal to enter an extended quiescent state while
@ -651,6 +654,7 @@ void rcu_irq_exit(void)
static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
int user)
{
rcu_dynticks_task_exit();
smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */
atomic_inc(&rdtp->dynticks);
/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
@ -1656,7 +1660,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
rnp->level, rnp->grplo,
rnp->grphi, rnp->qsmask);
raw_spin_unlock_irq(&rnp->lock);
cond_resched();
cond_resched_rcu_qs();
}
mutex_unlock(&rsp->onoff_mutex);
@ -1746,7 +1750,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
/* smp_mb() provided by prior unlock-lock pair. */
nocb += rcu_future_gp_cleanup(rsp, rnp);
raw_spin_unlock_irq(&rnp->lock);
cond_resched();
cond_resched_rcu_qs();
}
rnp = rcu_get_root(rsp);
raw_spin_lock_irq(&rnp->lock);
@ -1795,7 +1799,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
/* Locking provides needed memory barrier. */
if (rcu_gp_init(rsp))
break;
cond_resched();
cond_resched_rcu_qs();
WARN_ON(signal_pending(current));
trace_rcu_grace_period(rsp->name,
ACCESS_ONCE(rsp->gpnum),
@ -1838,10 +1842,10 @@ static int __noreturn rcu_gp_kthread(void *arg)
trace_rcu_grace_period(rsp->name,
ACCESS_ONCE(rsp->gpnum),
TPS("fqsend"));
cond_resched();
cond_resched_rcu_qs();
} else {
/* Deal with stray signal. */
cond_resched();
cond_resched_rcu_qs();
WARN_ON(signal_pending(current));
trace_rcu_grace_period(rsp->name,
ACCESS_ONCE(rsp->gpnum),
@ -2401,8 +2405,8 @@ void rcu_check_callbacks(int cpu, int user)
* at least not while the corresponding CPU is online.
*/
rcu_sched_qs(cpu);
rcu_bh_qs(cpu);
rcu_sched_qs();
rcu_bh_qs();
} else if (!in_softirq()) {
@ -2413,11 +2417,13 @@ void rcu_check_callbacks(int cpu, int user)
* critical section, so note it.
*/
rcu_bh_qs(cpu);
rcu_bh_qs();
}
rcu_preempt_check_callbacks(cpu);
if (rcu_pending(cpu))
invoke_rcu_core();
if (user)
rcu_note_voluntary_context_switch(current);
trace_rcu_utilization(TPS("End scheduler-tick"));
}
@ -2440,7 +2446,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
struct rcu_node *rnp;
rcu_for_each_leaf_node(rsp, rnp) {
cond_resched();
cond_resched_rcu_qs();
mask = 0;
raw_spin_lock_irqsave(&rnp->lock, flags);
smp_mb__after_unlock_lock();

View file

@ -615,6 +615,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
static void rcu_bind_gp_kthread(void);
static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
static void rcu_dynticks_task_enter(void);
static void rcu_dynticks_task_exit(void);
#endif /* #ifndef RCU_TREE_NONCORE */

View file

@ -128,18 +128,19 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
* not in a quiescent state. There might be any number of tasks blocked
* while in an RCU read-side critical section.
*
* Unlike the other rcu_*_qs() functions, callers to this function
* must disable irqs in order to protect the assignment to
* ->rcu_read_unlock_special.
* As with the other rcu_*_qs() functions, callers to this function
* must disable preemption.
*/
static void rcu_preempt_qs(int cpu)
static void rcu_preempt_qs(void)
{
struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
if (rdp->passed_quiesce == 0)
trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs"));
rdp->passed_quiesce = 1;
current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
trace_rcu_grace_period(TPS("rcu_preempt"),
__this_cpu_read(rcu_preempt_data.gpnum),
TPS("cpuqs"));
__this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
current->rcu_read_unlock_special.b.need_qs = false;
}
}
/*
@ -163,14 +164,14 @@ static void rcu_preempt_note_context_switch(int cpu)
struct rcu_node *rnp;
if (t->rcu_read_lock_nesting > 0 &&
(t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
!t->rcu_read_unlock_special.b.blocked) {
/* Possibly blocking in an RCU read-side critical section. */
rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
rnp = rdp->mynode;
raw_spin_lock_irqsave(&rnp->lock, flags);
smp_mb__after_unlock_lock();
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
t->rcu_read_unlock_special.b.blocked = true;
t->rcu_blocked_node = rnp;
/*
@ -212,7 +213,7 @@ static void rcu_preempt_note_context_switch(int cpu)
: rnp->gpnum + 1);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
} else if (t->rcu_read_lock_nesting < 0 &&
t->rcu_read_unlock_special) {
t->rcu_read_unlock_special.s) {
/*
* Complete exit from RCU read-side critical section on
@ -230,9 +231,7 @@ static void rcu_preempt_note_context_switch(int cpu)
* grace period, then the fact that the task has been enqueued
* means that we continue to block the current grace period.
*/
local_irq_save(flags);
rcu_preempt_qs(cpu);
local_irq_restore(flags);
rcu_preempt_qs();
}
/*
@ -313,7 +312,7 @@ void rcu_read_unlock_special(struct task_struct *t)
bool drop_boost_mutex = false;
#endif /* #ifdef CONFIG_RCU_BOOST */
struct rcu_node *rnp;
int special;
union rcu_special special;
/* NMI handlers cannot block and cannot safely manipulate state. */
if (in_nmi())
@ -323,12 +322,13 @@ void rcu_read_unlock_special(struct task_struct *t)
/*
* If RCU core is waiting for this CPU to exit critical section,
* let it know that we have done so.
* let it know that we have done so. Because irqs are disabled,
* t->rcu_read_unlock_special cannot change.
*/
special = t->rcu_read_unlock_special;
if (special & RCU_READ_UNLOCK_NEED_QS) {
rcu_preempt_qs(smp_processor_id());
if (!t->rcu_read_unlock_special) {
if (special.b.need_qs) {
rcu_preempt_qs();
if (!t->rcu_read_unlock_special.s) {
local_irq_restore(flags);
return;
}
@ -341,8 +341,8 @@ void rcu_read_unlock_special(struct task_struct *t)
}
/* Clean up if blocked during RCU read-side critical section. */
if (special & RCU_READ_UNLOCK_BLOCKED) {
t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
if (special.b.blocked) {
t->rcu_read_unlock_special.b.blocked = false;
/*
* Remove this task from the list it blocked on. The
@ -626,12 +626,13 @@ static void rcu_preempt_check_callbacks(int cpu)
struct task_struct *t = current;
if (t->rcu_read_lock_nesting == 0) {
rcu_preempt_qs(cpu);
rcu_preempt_qs();
return;
}
if (t->rcu_read_lock_nesting > 0 &&
per_cpu(rcu_preempt_data, cpu).qs_pending)
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
per_cpu(rcu_preempt_data, cpu).qs_pending &&
!per_cpu(rcu_preempt_data, cpu).passed_quiesce)
t->rcu_read_unlock_special.b.need_qs = true;
}
#ifdef CONFIG_RCU_BOOST
@ -915,7 +916,7 @@ void exit_rcu(void)
return;
t->rcu_read_lock_nesting = 1;
barrier();
t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
t->rcu_read_unlock_special.b.blocked = true;
__rcu_read_unlock();
}
@ -1816,7 +1817,7 @@ static int rcu_oom_notify(struct notifier_block *self,
get_online_cpus();
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
cond_resched();
cond_resched_rcu_qs();
}
put_online_cpus();
@ -3162,3 +3163,19 @@ static void rcu_bind_gp_kthread(void)
housekeeping_affine(current);
#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
}
/* Record the current task on dyntick-idle entry. */
static void rcu_dynticks_task_enter(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
}
/* Record no current task on dyntick-idle exit. */
static void rcu_dynticks_task_exit(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
}

View file

@ -47,6 +47,8 @@
#include <linux/hardirq.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/tick.h>
#define CREATE_TRACE_POINTS
@ -91,7 +93,7 @@ void __rcu_read_unlock(void)
barrier(); /* critical section before exit code. */
t->rcu_read_lock_nesting = INT_MIN;
barrier(); /* assign before ->rcu_read_unlock_special load */
if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
rcu_read_unlock_special(t);
barrier(); /* ->rcu_read_unlock_special load before assign */
t->rcu_read_lock_nesting = 0;
@ -379,3 +381,312 @@ static int __init check_cpu_stall_init(void)
early_initcall(check_cpu_stall_init);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
#ifdef CONFIG_TASKS_RCU
/*
* Simple variant of RCU whose quiescent states are voluntary context switch,
* user-space execution, and idle. As such, grace periods can take one good
* long time. There are no read-side primitives similar to rcu_read_lock()
* and rcu_read_unlock() because this implementation is intended to get
* the system into a safe state for some of the manipulations involved in
* tracing and the like. Finally, this implementation does not support
* high call_rcu_tasks() rates from multiple CPUs. If this is required,
* per-CPU callback lists will be needed.
*/
/* Global list of callbacks and associated lock. */
static struct rcu_head *rcu_tasks_cbs_head;
static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
/* Track exiting tasks in order to allow them to be waited for. */
DEFINE_SRCU(tasks_rcu_exit_srcu);
/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
module_param(rcu_task_stall_timeout, int, 0644);
static void rcu_spawn_tasks_kthread(void);
/*
* Post an RCU-tasks callback. First call must be from process context
* after the scheduler if fully operational.
*/
void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
{
unsigned long flags;
bool needwake;
rhp->next = NULL;
rhp->func = func;
raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
needwake = !rcu_tasks_cbs_head;
*rcu_tasks_cbs_tail = rhp;
rcu_tasks_cbs_tail = &rhp->next;
raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
if (needwake) {
rcu_spawn_tasks_kthread();
wake_up(&rcu_tasks_cbs_wq);
}
}
EXPORT_SYMBOL_GPL(call_rcu_tasks);
/**
* synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
*
* Control will return to the caller some time after a full rcu-tasks
* grace period has elapsed, in other words after all currently
* executing rcu-tasks read-side critical sections have elapsed. These
* read-side critical sections are delimited by calls to schedule(),
* cond_resched_rcu_qs(), idle execution, userspace execution, calls
* to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
*
* This is a very specialized primitive, intended only for a few uses in
* tracing and other situations requiring manipulation of function
* preambles and profiling hooks. The synchronize_rcu_tasks() function
* is not (yet) intended for heavy use from multiple CPUs.
*
* Note that this guarantee implies further memory-ordering guarantees.
* On systems with more than one CPU, when synchronize_rcu_tasks() returns,
* each CPU is guaranteed to have executed a full memory barrier since the
* end of its last RCU-tasks read-side critical section whose beginning
* preceded the call to synchronize_rcu_tasks(). In addition, each CPU
* having an RCU-tasks read-side critical section that extends beyond
* the return from synchronize_rcu_tasks() is guaranteed to have executed
* a full memory barrier after the beginning of synchronize_rcu_tasks()
* and before the beginning of that RCU-tasks read-side critical section.
* Note that these guarantees include CPUs that are offline, idle, or
* executing in user mode, as well as CPUs that are executing in the kernel.
*
* Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
* to its caller on CPU B, then both CPU A and CPU B are guaranteed
* to have executed a full memory barrier during the execution of
* synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
* (but again only if the system has more than one CPU).
*/
void synchronize_rcu_tasks(void)
{
/* Complain if the scheduler has not started. */
rcu_lockdep_assert(!rcu_scheduler_active,
"synchronize_rcu_tasks called too soon");
/* Wait for the grace period. */
wait_rcu_gp(call_rcu_tasks);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
/**
* rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
*
* Although the current implementation is guaranteed to wait, it is not
* obligated to, for example, if there are no pending callbacks.
*/
void rcu_barrier_tasks(void)
{
/* There is only one callback queue, so this is easy. ;-) */
synchronize_rcu_tasks();
}
EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
/* See if tasks are still holding out, complain if so. */
static void check_holdout_task(struct task_struct *t,
bool needreport, bool *firstreport)
{
int cpu;
if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
!ACCESS_ONCE(t->on_rq) ||
(IS_ENABLED(CONFIG_NO_HZ_FULL) &&
!is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
ACCESS_ONCE(t->rcu_tasks_holdout) = false;
list_del_init(&t->rcu_tasks_holdout_list);
put_task_struct(t);
return;
}
if (!needreport)
return;
if (*firstreport) {
pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
*firstreport = false;
}
cpu = task_cpu(t);
pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
t, ".I"[is_idle_task(t)],
"N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
t->rcu_tasks_idle_cpu, cpu);
sched_show_task(t);
}
/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
static int __noreturn rcu_tasks_kthread(void *arg)
{
unsigned long flags;
struct task_struct *g, *t;
unsigned long lastreport;
struct rcu_head *list;
struct rcu_head *next;
LIST_HEAD(rcu_tasks_holdouts);
/* FIXME: Add housekeeping affinity. */
/*
* Each pass through the following loop makes one check for
* newly arrived callbacks, and, if there are some, waits for
* one RCU-tasks grace period and then invokes the callbacks.
* This loop is terminated by the system going down. ;-)
*/
for (;;) {
/* Pick up any new callbacks. */
raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
list = rcu_tasks_cbs_head;
rcu_tasks_cbs_head = NULL;
rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
/* If there were none, wait a bit and start over. */
if (!list) {
wait_event_interruptible(rcu_tasks_cbs_wq,
rcu_tasks_cbs_head);
if (!rcu_tasks_cbs_head) {
WARN_ON(signal_pending(current));
schedule_timeout_interruptible(HZ/10);
}
continue;
}
/*
* Wait for all pre-existing t->on_rq and t->nvcsw
* transitions to complete. Invoking synchronize_sched()
* suffices because all these transitions occur with
* interrupts disabled. Without this synchronize_sched(),
* a read-side critical section that started before the
* grace period might be incorrectly seen as having started
* after the grace period.
*
* This synchronize_sched() also dispenses with the
* need for a memory barrier on the first store to
* ->rcu_tasks_holdout, as it forces the store to happen
* after the beginning of the grace period.
*/
synchronize_sched();
/*
* There were callbacks, so we need to wait for an
* RCU-tasks grace period. Start off by scanning
* the task list for tasks that are not already
* voluntarily blocked. Mark these tasks and make
* a list of them in rcu_tasks_holdouts.
*/
rcu_read_lock();
for_each_process_thread(g, t) {
if (t != current && ACCESS_ONCE(t->on_rq) &&
!is_idle_task(t)) {
get_task_struct(t);
t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
ACCESS_ONCE(t->rcu_tasks_holdout) = true;
list_add(&t->rcu_tasks_holdout_list,
&rcu_tasks_holdouts);
}
}
rcu_read_unlock();
/*
* Wait for tasks that are in the process of exiting.
* This does only part of the job, ensuring that all
* tasks that were previously exiting reach the point
* where they have disabled preemption, allowing the
* later synchronize_sched() to finish the job.
*/
synchronize_srcu(&tasks_rcu_exit_srcu);
/*
* Each pass through the following loop scans the list
* of holdout tasks, removing any that are no longer
* holdouts. When the list is empty, we are done.
*/
lastreport = jiffies;
while (!list_empty(&rcu_tasks_holdouts)) {
bool firstreport;
bool needreport;
int rtst;
struct task_struct *t1;
schedule_timeout_interruptible(HZ);
rtst = ACCESS_ONCE(rcu_task_stall_timeout);
needreport = rtst > 0 &&
time_after(jiffies, lastreport + rtst);
if (needreport)
lastreport = jiffies;
firstreport = true;
WARN_ON(signal_pending(current));
list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
rcu_tasks_holdout_list) {
check_holdout_task(t, needreport, &firstreport);
cond_resched();
}
}
/*
* Because ->on_rq and ->nvcsw are not guaranteed
* to have a full memory barriers prior to them in the
* schedule() path, memory reordering on other CPUs could
* cause their RCU-tasks read-side critical sections to
* extend past the end of the grace period. However,
* because these ->nvcsw updates are carried out with
* interrupts disabled, we can use synchronize_sched()
* to force the needed ordering on all such CPUs.
*
* This synchronize_sched() also confines all
* ->rcu_tasks_holdout accesses to be within the grace
* period, avoiding the need for memory barriers for
* ->rcu_tasks_holdout accesses.
*
* In addition, this synchronize_sched() waits for exiting
* tasks to complete their final preempt_disable() region
* of execution, cleaning up after the synchronize_srcu()
* above.
*/
synchronize_sched();
/* Invoke the callbacks. */
while (list) {
next = list->next;
local_bh_disable();
list->func(list);
local_bh_enable();
list = next;
cond_resched();
}
schedule_timeout_uninterruptible(HZ/10);
}
}
/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
static void rcu_spawn_tasks_kthread(void)
{
static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
static struct task_struct *rcu_tasks_kthread_ptr;
struct task_struct *t;
if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) {
smp_mb(); /* Ensure caller sees full kthread. */
return;
}
mutex_lock(&rcu_tasks_kthread_mutex);
if (rcu_tasks_kthread_ptr) {
mutex_unlock(&rcu_tasks_kthread_mutex);
return;
}
t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
BUG_ON(IS_ERR(t));
smp_mb(); /* Ensure others see full kthread. */
ACCESS_ONCE(rcu_tasks_kthread_ptr) = t;
mutex_unlock(&rcu_tasks_kthread_mutex);
}
#endif /* #ifdef CONFIG_TASKS_RCU */