rcu: Parallelize expedited grace-period initialization

The latency of RCU expedited grace periods grows with increasing numbers
of CPUs, eventually failing to be all that expedited.  Much of the growth
in latency is in the initialization phase, so this commit uses workqueues
to carry out this initialization concurrently on a rcu_node-by-rcu_node
basis.

This change makes use of a new rcu_par_gp_wq because flushing a work
item from another work item running from the same workqueue can result
in deadlock.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Nicholas Piggin <npiggin@gmail.com>
This commit is contained in:
Paul E. McKenney 2018-02-01 22:05:38 -08:00
parent 60cc43fc88
commit 25f3d7effa
4 changed files with 123 additions and 81 deletions

View file

@ -4168,6 +4168,7 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
}
struct workqueue_struct *rcu_gp_wq;
struct workqueue_struct *rcu_par_gp_wq;
void __init rcu_init(void)
{
@ -4199,6 +4200,8 @@ void __init rcu_init(void)
/* Create workqueue for expedited GPs and for Tree SRCU. */
rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_gp_wq);
rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
WARN_ON(!rcu_par_gp_wq);
}
#include "tree_exp.h"