Merge branches 'doc.2019.12.10a', 'exp.2019.12.09a', 'fixes.2020.01.24a', 'kfree_rcu.2020.01.24a', 'list.2020.01.10a', 'preempt.2020.01.24a' and 'torture.2019.12.09a' into HEAD

doc.2019.12.10a: Documentations updates
exp.2019.12.09a: Expedited grace-period updates
fixes.2020.01.24a: Miscellaneous fixes
kfree_rcu.2020.01.24a: Batch kfree_rcu() work
list.2020.01.10a: RCU-protected-list updates
preempt.2020.01.24a: Preemptible RCU updates
torture.2019.12.09a: Torture-test updates
This commit is contained in:
Paul E. McKenney 2020-01-24 10:37:27 -08:00
39 changed files with 1070 additions and 575 deletions

View file

@ -220,7 +220,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
* blocked tasks.
*/
if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) {
rnp->gp_tasks = &t->rcu_node_entry;
WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry);
WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq);
}
if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
@ -290,8 +290,8 @@ void rcu_note_context_switch(bool preempt)
trace_rcu_utilization(TPS("Start context switch"));
lockdep_assert_irqs_disabled();
WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
if (t->rcu_read_lock_nesting > 0 &&
WARN_ON_ONCE(!preempt && rcu_preempt_depth() > 0);
if (rcu_preempt_depth() > 0 &&
!t->rcu_read_unlock_special.b.blocked) {
/* Possibly blocking in an RCU read-side critical section. */
@ -340,7 +340,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
*/
static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
{
return rnp->gp_tasks != NULL;
return READ_ONCE(rnp->gp_tasks) != NULL;
}
/* Bias and limit values for ->rcu_read_lock_nesting. */
@ -348,6 +348,21 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
#define RCU_NEST_NMAX (-INT_MAX / 2)
#define RCU_NEST_PMAX (INT_MAX / 2)
static void rcu_preempt_read_enter(void)
{
current->rcu_read_lock_nesting++;
}
static void rcu_preempt_read_exit(void)
{
current->rcu_read_lock_nesting--;
}
static void rcu_preempt_depth_set(int val)
{
current->rcu_read_lock_nesting = val;
}
/*
* Preemptible RCU implementation for rcu_read_lock().
* Just increment ->rcu_read_lock_nesting, shared state will be updated
@ -355,9 +370,9 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
*/
void __rcu_read_lock(void)
{
current->rcu_read_lock_nesting++;
rcu_preempt_read_enter();
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
WARN_ON_ONCE(current->rcu_read_lock_nesting > RCU_NEST_PMAX);
WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
barrier(); /* critical section after entry code. */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
@ -373,19 +388,19 @@ void __rcu_read_unlock(void)
{
struct task_struct *t = current;
if (t->rcu_read_lock_nesting != 1) {
--t->rcu_read_lock_nesting;
if (rcu_preempt_depth() != 1) {
rcu_preempt_read_exit();
} else {
barrier(); /* critical section before exit code. */
t->rcu_read_lock_nesting = -RCU_NEST_BIAS;
rcu_preempt_depth_set(-RCU_NEST_BIAS);
barrier(); /* assign before ->rcu_read_unlock_special load */
if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
rcu_read_unlock_special(t);
barrier(); /* ->rcu_read_unlock_special load before assign */
t->rcu_read_lock_nesting = 0;
rcu_preempt_depth_set(0);
}
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
int rrln = t->rcu_read_lock_nesting;
int rrln = rcu_preempt_depth();
WARN_ON_ONCE(rrln < 0 && rrln > RCU_NEST_NMAX);
}
@ -444,15 +459,9 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
local_irq_restore(flags);
return;
}
t->rcu_read_unlock_special.b.deferred_qs = false;
if (special.b.need_qs) {
t->rcu_read_unlock_special.s = 0;
if (special.b.need_qs)
rcu_qs();
t->rcu_read_unlock_special.b.need_qs = false;
if (!t->rcu_read_unlock_special.s && !rdp->exp_deferred_qs) {
local_irq_restore(flags);
return;
}
}
/*
* Respond to a request by an expedited grace period for a
@ -460,17 +469,11 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
* tasks are handled when removing the task from the
* blocked-tasks list below.
*/
if (rdp->exp_deferred_qs) {
if (rdp->exp_deferred_qs)
rcu_report_exp_rdp(rdp);
if (!t->rcu_read_unlock_special.s) {
local_irq_restore(flags);
return;
}
}
/* Clean up if blocked during RCU read-side critical section. */
if (special.b.blocked) {
t->rcu_read_unlock_special.b.blocked = false;
/*
* Remove this task from the list it blocked on. The task
@ -485,7 +488,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq &&
(!empty_norm || rnp->qsmask));
empty_exp = sync_rcu_preempt_exp_done(rnp);
empty_exp = sync_rcu_exp_done(rnp);
smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
np = rcu_next_node_entry(t, rnp);
list_del_init(&t->rcu_node_entry);
@ -493,7 +496,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
rnp->gp_seq, t->pid);
if (&t->rcu_node_entry == rnp->gp_tasks)
rnp->gp_tasks = np;
WRITE_ONCE(rnp->gp_tasks, np);
if (&t->rcu_node_entry == rnp->exp_tasks)
rnp->exp_tasks = np;
if (IS_ENABLED(CONFIG_RCU_BOOST)) {
@ -509,7 +512,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
* Note that rcu_report_unblock_qs_rnp() releases rnp->lock,
* so we must take a snapshot of the expedited state.
*/
empty_exp_now = sync_rcu_preempt_exp_done(rnp);
empty_exp_now = sync_rcu_exp_done(rnp);
if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
rnp->gp_seq,
@ -551,7 +554,7 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
return (__this_cpu_read(rcu_data.exp_deferred_qs) ||
READ_ONCE(t->rcu_read_unlock_special.s)) &&
t->rcu_read_lock_nesting <= 0;
rcu_preempt_depth() <= 0;
}
/*
@ -564,16 +567,16 @@ static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
static void rcu_preempt_deferred_qs(struct task_struct *t)
{
unsigned long flags;
bool couldrecurse = t->rcu_read_lock_nesting >= 0;
bool couldrecurse = rcu_preempt_depth() >= 0;
if (!rcu_preempt_need_deferred_qs(t))
return;
if (couldrecurse)
t->rcu_read_lock_nesting -= RCU_NEST_BIAS;
rcu_preempt_depth_set(rcu_preempt_depth() - RCU_NEST_BIAS);
local_irq_save(flags);
rcu_preempt_deferred_qs_irqrestore(t, flags);
if (couldrecurse)
t->rcu_read_lock_nesting += RCU_NEST_BIAS;
rcu_preempt_depth_set(rcu_preempt_depth() + RCU_NEST_BIAS);
}
/*
@ -610,9 +613,8 @@ static void rcu_read_unlock_special(struct task_struct *t)
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp = rdp->mynode;
t->rcu_read_unlock_special.b.exp_hint = false;
exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) ||
(rdp->grpmask & rnp->expmask) ||
(rdp->grpmask & READ_ONCE(rnp->expmask)) ||
tick_nohz_full_cpu(rdp->cpu);
// Need to defer quiescent state until everything is enabled.
if (irqs_were_disabled && use_softirq &&
@ -640,7 +642,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
local_irq_restore(flags);
return;
}
WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false);
rcu_preempt_deferred_qs_irqrestore(t, flags);
}
@ -648,8 +649,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
* Check that the list of blocked tasks for the newly completed grace
* period is in fact empty. It is a serious bug to complete a grace
* period that still has RCU readers blocked! This function must be
* invoked -before- updating this rnp's ->gp_seq, and the rnp's ->lock
* must be held by the caller.
* invoked -before- updating this rnp's ->gp_seq.
*
* Also, if there are blocked tasks on the list, they automatically
* block the newly created grace period, so set up ->gp_tasks accordingly.
@ -659,11 +659,12 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
struct task_struct *t;
RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
raw_lockdep_assert_held_rcu_node(rnp);
if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
dump_blkd_tasks(rnp, 10);
if (rcu_preempt_has_tasks(rnp) &&
(rnp->qsmaskinit || rnp->wait_blkd_tasks)) {
rnp->gp_tasks = rnp->blkd_tasks.next;
WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next);
t = container_of(rnp->gp_tasks, struct task_struct,
rcu_node_entry);
trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"),
@ -686,7 +687,7 @@ static void rcu_flavor_sched_clock_irq(int user)
if (user || rcu_is_cpu_rrupt_from_idle()) {
rcu_note_voluntary_context_switch(current);
}
if (t->rcu_read_lock_nesting > 0 ||
if (rcu_preempt_depth() > 0 ||
(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
/* No QS, force context switch if deferred. */
if (rcu_preempt_need_deferred_qs(t)) {
@ -696,13 +697,13 @@ static void rcu_flavor_sched_clock_irq(int user)
} else if (rcu_preempt_need_deferred_qs(t)) {
rcu_preempt_deferred_qs(t); /* Report deferred QS. */
return;
} else if (!t->rcu_read_lock_nesting) {
} else if (!rcu_preempt_depth()) {
rcu_qs(); /* Report immediate QS. */
return;
}
/* If GP is oldish, ask for help from rcu_read_unlock_special(). */
if (t->rcu_read_lock_nesting > 0 &&
if (rcu_preempt_depth() > 0 &&
__this_cpu_read(rcu_data.core_needs_qs) &&
__this_cpu_read(rcu_data.cpu_no_qs.b.norm) &&
!t->rcu_read_unlock_special.b.need_qs &&
@ -723,11 +724,11 @@ void exit_rcu(void)
struct task_struct *t = current;
if (unlikely(!list_empty(&current->rcu_node_entry))) {
t->rcu_read_lock_nesting = 1;
rcu_preempt_depth_set(1);
barrier();
WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
} else if (unlikely(t->rcu_read_lock_nesting)) {
t->rcu_read_lock_nesting = 1;
} else if (unlikely(rcu_preempt_depth())) {
rcu_preempt_depth_set(1);
} else {
return;
}
@ -757,7 +758,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n",
__func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext);
pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n",
__func__, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks);
__func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks,
rnp->exp_tasks);
pr_info("%s: ->blkd_tasks", __func__);
i = 0;
list_for_each(lhp, &rnp->blkd_tasks) {
@ -788,7 +790,7 @@ static void __init rcu_bootup_announce(void)
}
/*
* Note a quiescent state for PREEMPT=n. Because we do not need to know
* Note a quiescent state for PREEMPTION=n. Because we do not need to know
* how many quiescent states passed, just if there was at least one since
* the start of the grace period, this just sets a flag. The caller must
* have disabled preemption.
@ -838,7 +840,7 @@ void rcu_all_qs(void)
EXPORT_SYMBOL_GPL(rcu_all_qs);
/*
* Note a PREEMPT=n context switch. The caller must have disabled interrupts.
* Note a PREEMPTION=n context switch. The caller must have disabled interrupts.
*/
void rcu_note_context_switch(bool preempt)
{
@ -1262,10 +1264,9 @@ static void rcu_prepare_for_idle(void)
/*
* This code is invoked when a CPU goes idle, at which point we want
* to have the CPU do everything required for RCU so that it can enter
* the energy-efficient dyntick-idle mode. This is handled by a
* state machine implemented by rcu_prepare_for_idle() below.
* the energy-efficient dyntick-idle mode.
*
* The following three proprocessor symbols control this state machine:
* The following preprocessor symbol controls this:
*
* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
* to sleep in dyntick-idle mode with RCU callbacks pending. This
@ -1274,21 +1275,15 @@ static void rcu_prepare_for_idle(void)
* number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
* system. And if you are -that- concerned about energy efficiency,
* just power the system down and be done with it!
* RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is
* permitted to sleep in dyntick-idle mode with only lazy RCU
* callbacks pending. Setting this too high can OOM your system.
*
* The values below work well in practice. If future workloads require
* The value below works well in practice. If future workloads require
* adjustment, they can be converted into kernel config parameters, though
* making the state machine smarter might be a better option.
*/
#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
module_param(rcu_idle_gp_delay, int, 0644);
static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
module_param(rcu_idle_lazy_gp_delay, int, 0644);
/*
* Try to advance callbacks on the current CPU, but only if it has been
@ -1327,8 +1322,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
/*
* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
* to invoke. If the CPU has callbacks, try to advance them. Tell the
* caller to set the timeout based on whether or not there are non-lazy
* callbacks.
* caller about what to set the timeout.
*
* The caller must have disabled interrupts.
*/
@ -1354,25 +1348,18 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt)
}
rdp->last_accelerate = jiffies;
/* Request timer delay depending on laziness, and round. */
rdp->all_lazy = !rcu_segcblist_n_nonlazy_cbs(&rdp->cblist);
if (rdp->all_lazy) {
dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
} else {
dj = round_up(rcu_idle_gp_delay + jiffies,
rcu_idle_gp_delay) - jiffies;
}
/* Request timer and round. */
dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies;
*nextevt = basemono + dj * TICK_NSEC;
return 0;
}
/*
* Prepare a CPU for idle from an RCU perspective. The first major task
* is to sense whether nohz mode has been enabled or disabled via sysfs.
* The second major task is to check to see if a non-lazy callback has
* arrived at a CPU that previously had only lazy callbacks. The third
* major task is to accelerate (that is, assign grace-period numbers to)
* any recently arrived callbacks.
* Prepare a CPU for idle from an RCU perspective. The first major task is to
* sense whether nohz mode has been enabled or disabled via sysfs. The second
* major task is to accelerate (that is, assign grace-period numbers to) any
* recently arrived callbacks.
*
* The caller must have disabled interrupts.
*/
@ -1398,17 +1385,6 @@ static void rcu_prepare_for_idle(void)
if (!tne)
return;
/*
* If a non-lazy callback arrived at a CPU having only lazy
* callbacks, invoke RCU core for the side-effect of recalculating
* idle duration on re-entry to idle.
*/
if (rdp->all_lazy && rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)) {
rdp->all_lazy = false;
invoke_rcu_core();
return;
}
/*
* If we have not yet accelerated this jiffy, accelerate all
* callbacks on this CPU.
@ -2321,6 +2297,8 @@ static void __init rcu_organize_nocb_kthreads(void)
{
int cpu;
bool firsttime = true;
bool gotnocbs = false;
bool gotnocbscbs = true;
int ls = rcu_nocb_gp_stride;
int nl = 0; /* Next GP kthread. */
struct rcu_data *rdp;
@ -2343,21 +2321,31 @@ static void __init rcu_organize_nocb_kthreads(void)
rdp = per_cpu_ptr(&rcu_data, cpu);
if (rdp->cpu >= nl) {
/* New GP kthread, set up for CBs & next GP. */
gotnocbs = true;
nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
rdp->nocb_gp_rdp = rdp;
rdp_gp = rdp;
if (!firsttime && dump_tree)
pr_cont("\n");
firsttime = false;
pr_alert("%s: No-CB GP kthread CPU %d:", __func__, cpu);
if (dump_tree) {
if (!firsttime)
pr_cont("%s\n", gotnocbscbs
? "" : " (self only)");
gotnocbscbs = false;
firsttime = false;
pr_alert("%s: No-CB GP kthread CPU %d:",
__func__, cpu);
}
} else {
/* Another CB kthread, link to previous GP kthread. */
gotnocbscbs = true;
rdp->nocb_gp_rdp = rdp_gp;
rdp_prev->nocb_next_cb_rdp = rdp;
pr_alert(" %d", cpu);
if (dump_tree)
pr_cont(" %d", cpu);
}
rdp_prev = rdp;
}
if (gotnocbs && dump_tree)
pr_cont("%s\n", gotnocbscbs ? "" : " (self only)");
}
/*