diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1f8bfc952015..e1093c443ff9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6896,8 +6896,21 @@ static void balance_push(struct rq *rq) * Both the cpu-hotplug and stop task are in this case and are * required to complete the hotplug process. */ - if (is_per_cpu_kthread(push_task)) + if (is_per_cpu_kthread(push_task)) { + /* + * If this is the idle task on the outgoing CPU try to wake + * up the hotplug control thread which might wait for the + * last task to vanish. The rcuwait_active() check is + * accurate here because the waiter is pinned on this CPU + * and can't obviously be running in parallel. + */ + if (!rq->nr_running && rcuwait_active(&rq->hotplug_wait)) { + raw_spin_unlock(&rq->lock); + rcuwait_wake_up(&rq->hotplug_wait); + raw_spin_lock(&rq->lock); + } return; + } get_task_struct(push_task); /* @@ -6928,6 +6941,20 @@ static void balance_push_set(int cpu, bool on) rq_unlock_irqrestore(rq, &rf); } +/* + * Invoked from a CPUs hotplug control thread after the CPU has been marked + * inactive. All tasks which are not per CPU kernel threads are either + * pushed off this CPU now via balance_push() or placed on a different CPU + * during wakeup. Wait until the CPU is quiescent. + */ +static void balance_hotplug_wait(void) +{ + struct rq *rq = this_rq(); + + rcuwait_wait_event(&rq->hotplug_wait, rq->nr_running == 1, + TASK_UNINTERRUPTIBLE); +} + #else static inline void balance_push(struct rq *rq) @@ -6938,6 +6965,10 @@ static inline void balance_push_set(int cpu, bool on) { } +static inline void balance_hotplug_wait(void) +{ +} + #endif /* CONFIG_HOTPLUG_CPU */ void set_rq_online(struct rq *rq) @@ -7092,6 +7123,10 @@ int sched_cpu_deactivate(unsigned int cpu) return ret; } sched_domains_numa_masks_clear(cpu); + + /* Wait for all non per CPU kernel threads to vanish. */ + balance_hotplug_wait(); + return 0; } @@ -7332,6 +7367,9 @@ void __init sched_init(void) rq_csd_init(rq, &rq->nohz_csd, nohz_csd_func); #endif +#ifdef CONFIG_HOTPLUG_CPU + rcuwait_init(&rq->hotplug_wait); +#endif #endif /* CONFIG_SMP */ hrtick_rq_init(rq); atomic_set(&rq->nr_iowait, 0); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a71ac84acc1e..c6f707a6d9d4 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1004,6 +1004,10 @@ struct rq { /* This is used to determine avg_idle's max value */ u64 max_idle_balance_cost; + +#ifdef CONFIG_HOTPLUG_CPU + struct rcuwait hotplug_wait; +#endif #endif /* CONFIG_SMP */ #ifdef CONFIG_IRQ_TIME_ACCOUNTING