mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-06-17 20:25:19 +00:00
blk-mq: drain I/O when all CPUs in a hctx are offline
Most of blk-mq drivers depend on managed IRQ's auto-affinity to setup up queue mapping. Thomas mentioned the following point[1]: "That was the constraint of managed interrupts from the very beginning: The driver/subsystem has to quiesce the interrupt line and the associated queue _before_ it gets shutdown in CPU unplug and not fiddle with it until it's restarted by the core when the CPU is plugged in again." However, current blk-mq implementation doesn't quiesce hw queue before the last CPU in the hctx is shutdown. Even worse, CPUHP_BLK_MQ_DEAD is a cpuhp state handled after the CPU is down, so there isn't any chance to quiesce the hctx before shutting down the CPU. Add new CPUHP_AP_BLK_MQ_ONLINE state to stop allocating from blk-mq hctxs where the last CPU goes away, and wait for completion of in-flight requests. This guarantees that there is no inflight I/O before shutting down the managed IRQ. Add a BLK_MQ_F_STACKING and set it for dm-rq and loop, so we don't need to wait for completion of in-flight requests from these drivers to avoid a potential dead-lock. It is safe to do this for stacking drivers as those do not use interrupts at all and their I/O completions are triggered by underlying devices I/O completion. [1] https://lore.kernel.org/linux-block/alpine.DEB.2.21.1904051331270.1802@nanos.tec.linutronix.de/ [hch: different retry mechanism, merged two patches, minor cleanups] Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Hannes Reinecke <hare@suse.de> Reviewed-by: Daniel Wagner <dwagner@suse.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
602380d28e
commit
bf0beec060
7 changed files with 133 additions and 4 deletions
|
@ -140,6 +140,8 @@ struct blk_mq_hw_ctx {
|
|||
*/
|
||||
atomic_t nr_active;
|
||||
|
||||
/** @cpuhp_online: List to store request if CPU is going to die */
|
||||
struct hlist_node cpuhp_online;
|
||||
/** @cpuhp_dead: List to store request if some CPU die. */
|
||||
struct hlist_node cpuhp_dead;
|
||||
/** @kobj: Kernel object for sysfs. */
|
||||
|
@ -391,6 +393,11 @@ struct blk_mq_ops {
|
|||
enum {
|
||||
BLK_MQ_F_SHOULD_MERGE = 1 << 0,
|
||||
BLK_MQ_F_TAG_SHARED = 1 << 1,
|
||||
/*
|
||||
* Set when this device requires underlying blk-mq device for
|
||||
* completing IO:
|
||||
*/
|
||||
BLK_MQ_F_STACKING = 1 << 2,
|
||||
BLK_MQ_F_BLOCKING = 1 << 5,
|
||||
BLK_MQ_F_NO_SCHED = 1 << 6,
|
||||
BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
|
||||
|
@ -400,6 +407,9 @@ enum {
|
|||
BLK_MQ_S_TAG_ACTIVE = 1,
|
||||
BLK_MQ_S_SCHED_RESTART = 2,
|
||||
|
||||
/* hw queue is inactive after all its CPUs become offline */
|
||||
BLK_MQ_S_INACTIVE = 3,
|
||||
|
||||
BLK_MQ_MAX_DEPTH = 10240,
|
||||
|
||||
BLK_MQ_CPU_WORK_BATCH = 8,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue