diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 568cfbc24bc0..fe53976e088e 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -150,9 +150,25 @@ struct workqueue_attrs { /** * @cpumask: allowed CPUs + * + * Work items in this workqueue are affine to these CPUs and not allowed + * to execute on other CPUs. A pool serving a workqueue must have the + * same @cpumask. */ cpumask_var_t cpumask; + /** + * @__pod_cpumask: internal attribute used to create per-pod pools + * + * Internal use only. + * + * Per-pod unbound worker pools are used to improve locality. Always a + * subset of ->cpumask. A workqueue can be associated with multiple + * worker pools with disjoint @__pod_cpumask's. Whether the enforcement + * of a pool's @__pod_cpumask is strict depends on @affn_strict. + */ + cpumask_var_t __pod_cpumask; + /* * Below fields aren't properties of a worker_pool. They only modify how * :c:func:`apply_workqueue_attrs` select pools and thus don't diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e941fa052a2b..e61b4291bec8 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -366,7 +366,6 @@ static bool wq_online; /* can kworkers be created yet? */ /* buf for wq_update_unbound_pod_attrs(), protected by CPU hotplug exclusion */ static struct workqueue_attrs *wq_update_pod_attrs_buf; -static cpumask_var_t wq_update_pod_cpumask_buf; static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */ @@ -2050,6 +2049,11 @@ static struct worker *alloc_worker(int node) return worker; } +static cpumask_t *pool_allowed_cpus(struct worker_pool *pool) +{ + return pool->attrs->__pod_cpumask; +} + /** * worker_attach_to_pool() - attach a worker to a pool * @worker: worker to be attached @@ -2075,7 +2079,7 @@ static void worker_attach_to_pool(struct worker *worker, kthread_set_per_cpu(worker->task, pool->cpu); if (worker->rescue_wq) - set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); + set_cpus_allowed_ptr(worker->task, pool_allowed_cpus(pool)); list_add_tail(&worker->node, &pool->workers); worker->pool = pool; @@ -2167,7 +2171,7 @@ static struct worker *create_worker(struct worker_pool *pool) } set_user_nice(worker->task, pool->attrs->nice); - kthread_bind_mask(worker->task, pool->attrs->cpumask); + kthread_bind_mask(worker->task, pool_allowed_cpus(pool)); /* successful, attach the worker to the pool */ worker_attach_to_pool(worker, pool); @@ -3672,6 +3676,7 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs) { if (attrs) { free_cpumask_var(attrs->cpumask); + free_cpumask_var(attrs->__pod_cpumask); kfree(attrs); } } @@ -3693,6 +3698,8 @@ struct workqueue_attrs *alloc_workqueue_attrs(void) goto fail; if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL)) goto fail; + if (!alloc_cpumask_var(&attrs->__pod_cpumask, GFP_KERNEL)) + goto fail; cpumask_copy(attrs->cpumask, cpu_possible_mask); attrs->affn_scope = wq_affn_dfl; @@ -3707,6 +3714,7 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, { to->nice = from->nice; cpumask_copy(to->cpumask, from->cpumask); + cpumask_copy(to->__pod_cpumask, from->__pod_cpumask); /* * Unlike hash and equality test, copying shouldn't ignore wq-only @@ -3735,6 +3743,8 @@ static u32 wqattrs_hash(const struct workqueue_attrs *attrs) hash = jhash_1word(attrs->nice, hash); hash = jhash(cpumask_bits(attrs->cpumask), BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash); + hash = jhash(cpumask_bits(attrs->__pod_cpumask), + BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash); return hash; } @@ -3746,6 +3756,8 @@ static bool wqattrs_equal(const struct workqueue_attrs *a, return false; if (!cpumask_equal(a->cpumask, b->cpumask)) return false; + if (!cpumask_equal(a->__pod_cpumask, b->__pod_cpumask)) + return false; return true; } @@ -3998,9 +4010,9 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) } } - /* If cpumask is contained inside a NUMA pod, that's our NUMA node */ + /* If __pod_cpumask is contained inside a NUMA pod, that's our node */ for (pod = 0; pod < pt->nr_pods; pod++) { - if (cpumask_subset(attrs->cpumask, pt->pod_cpus[pod])) { + if (cpumask_subset(attrs->__pod_cpumask, pt->pod_cpus[pod])) { node = pt->pod_node[pod]; break; } @@ -4190,11 +4202,10 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq, * @attrs: the wq_attrs of the default pwq of the target workqueue * @cpu: the target CPU * @cpu_going_down: if >= 0, the CPU to consider as offline - * @cpumask: outarg, the resulting cpumask * * Calculate the cpumask a workqueue with @attrs should use on @pod. If * @cpu_going_down is >= 0, that cpu is considered offline during calculation. - * The result is stored in @cpumask. + * The result is stored in @attrs->__pod_cpumask. * * If pod affinity is not enabled, @attrs->cpumask is always used. If enabled * and @pod has online CPUs requested by @attrs, the returned cpumask is the @@ -4202,27 +4213,27 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq, * * The caller is responsible for ensuring that the cpumask of @pod stays stable. */ -static void wq_calc_pod_cpumask(const struct workqueue_attrs *attrs, int cpu, - int cpu_going_down, cpumask_t *cpumask) +static void wq_calc_pod_cpumask(struct workqueue_attrs *attrs, int cpu, + int cpu_going_down) { const struct wq_pod_type *pt = wqattrs_pod_type(attrs); int pod = pt->cpu_pod[cpu]; /* does @pod have any online CPUs @attrs wants? */ - cpumask_and(cpumask, pt->pod_cpus[pod], attrs->cpumask); - cpumask_and(cpumask, cpumask, cpu_online_mask); + cpumask_and(attrs->__pod_cpumask, pt->pod_cpus[pod], attrs->cpumask); + cpumask_and(attrs->__pod_cpumask, attrs->__pod_cpumask, cpu_online_mask); if (cpu_going_down >= 0) - cpumask_clear_cpu(cpu_going_down, cpumask); + cpumask_clear_cpu(cpu_going_down, attrs->__pod_cpumask); - if (cpumask_empty(cpumask)) { - cpumask_copy(cpumask, attrs->cpumask); + if (cpumask_empty(attrs->__pod_cpumask)) { + cpumask_copy(attrs->__pod_cpumask, attrs->cpumask); return; } /* yeap, return possible CPUs in @pod that @attrs wants */ - cpumask_and(cpumask, attrs->cpumask, pt->pod_cpus[pod]); + cpumask_and(attrs->__pod_cpumask, attrs->cpumask, pt->pod_cpus[pod]); - if (cpumask_empty(cpumask)) + if (cpumask_empty(attrs->__pod_cpumask)) pr_warn_once("WARNING: workqueue cpumask: online intersect > " "possible intersect\n"); } @@ -4276,7 +4287,7 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, const cpumask_var_t unbound_cpumask) { struct apply_wqattrs_ctx *ctx; - struct workqueue_attrs *new_attrs, *tmp_attrs; + struct workqueue_attrs *new_attrs; int cpu; lockdep_assert_held(&wq_pool_mutex); @@ -4288,8 +4299,7 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_cpu_ids), GFP_KERNEL); new_attrs = alloc_workqueue_attrs(); - tmp_attrs = alloc_workqueue_attrs(); - if (!ctx || !new_attrs || !tmp_attrs) + if (!ctx || !new_attrs) goto out_free; /* @@ -4299,23 +4309,18 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, */ copy_workqueue_attrs(new_attrs, attrs); wqattrs_actualize_cpumask(new_attrs, unbound_cpumask); + cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask); ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs); if (!ctx->dfl_pwq) goto out_free; - /* - * We may create multiple pwqs with differing cpumasks. Make a copy of - * @new_attrs which will be modified and used to obtain pools. - */ - copy_workqueue_attrs(tmp_attrs, new_attrs); - for_each_possible_cpu(cpu) { if (new_attrs->ordered) { ctx->dfl_pwq->refcnt++; ctx->pwq_tbl[cpu] = ctx->dfl_pwq; } else { - wq_calc_pod_cpumask(new_attrs, cpu, -1, tmp_attrs->cpumask); - ctx->pwq_tbl[cpu] = alloc_unbound_pwq(wq, tmp_attrs); + wq_calc_pod_cpumask(new_attrs, cpu, -1); + ctx->pwq_tbl[cpu] = alloc_unbound_pwq(wq, new_attrs); if (!ctx->pwq_tbl[cpu]) goto out_free; } @@ -4324,14 +4329,13 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, /* save the user configured attrs and sanitize it. */ copy_workqueue_attrs(new_attrs, attrs); cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask); + cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask); ctx->attrs = new_attrs; ctx->wq = wq; - free_workqueue_attrs(tmp_attrs); return ctx; out_free: - free_workqueue_attrs(tmp_attrs); free_workqueue_attrs(new_attrs); apply_wqattrs_cleanup(ctx); return ERR_PTR(-ENOMEM); @@ -4459,7 +4463,6 @@ static void wq_update_pod(struct workqueue_struct *wq, int cpu, int off_cpu = online ? -1 : hotplug_cpu; struct pool_workqueue *old_pwq = NULL, *pwq; struct workqueue_attrs *target_attrs; - cpumask_t *cpumask; lockdep_assert_held(&wq_pool_mutex); @@ -4472,20 +4475,18 @@ static void wq_update_pod(struct workqueue_struct *wq, int cpu, * CPU hotplug exclusion. */ target_attrs = wq_update_pod_attrs_buf; - cpumask = wq_update_pod_cpumask_buf; copy_workqueue_attrs(target_attrs, wq->unbound_attrs); wqattrs_actualize_cpumask(target_attrs, wq_unbound_cpumask); /* nothing to do if the target cpumask matches the current pwq */ - wq_calc_pod_cpumask(target_attrs, cpu, off_cpu, cpumask); + wq_calc_pod_cpumask(target_attrs, cpu, off_cpu); pwq = rcu_dereference_protected(*per_cpu_ptr(wq->cpu_pwq, cpu), lockdep_is_held(&wq_pool_mutex)); - if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask)) + if (wqattrs_equal(target_attrs, pwq->pool->attrs)) return; /* create a new pwq */ - cpumask_copy(target_attrs->cpumask, cpumask); pwq = alloc_unbound_pwq(wq, target_attrs); if (!pwq) { pr_warn("workqueue: allocation failed while updating CPU pod affinity of \"%s\"\n", @@ -5409,7 +5410,7 @@ static void rebind_workers(struct worker_pool *pool) for_each_pool_worker(worker, pool) { kthread_set_per_cpu(worker->task, pool->cpu); WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, - pool->attrs->cpumask) < 0); + pool_allowed_cpus(pool)) < 0); } raw_spin_lock_irq(&pool->lock); @@ -6424,8 +6425,6 @@ void __init workqueue_init_early(void) wq_update_pod_attrs_buf = alloc_workqueue_attrs(); BUG_ON(!wq_update_pod_attrs_buf); - BUG_ON(!alloc_cpumask_var(&wq_update_pod_cpumask_buf, GFP_KERNEL)); - /* initialize WQ_AFFN_SYSTEM pods */ pt->pod_cpus = kcalloc(1, sizeof(pt->pod_cpus[0]), GFP_KERNEL); pt->pod_node = kcalloc(1, sizeof(pt->pod_node[0]), GFP_KERNEL); @@ -6451,6 +6450,7 @@ void __init workqueue_init_early(void) BUG_ON(init_worker_pool(pool)); pool->cpu = cpu; cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu)); + cpumask_copy(pool->attrs->__pod_cpumask, cpumask_of(cpu)); pool->attrs->nice = std_nice[i++]; pool->node = cpu_to_node(cpu);