block-6.1-2022-11-05

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmNmdGEQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpvLpD/9pL9SLpoUAnSvYAzaJC0dJhFHzQhmQgA55
 qxwgC4NDyxYgvsLuHVVoR5qRSNQO37nKkgoeHsqSX56UTloQnggurg0Cr94VJcjC
 seT1++Dl6BPz1M9h/UFS2cwm26GC+cQmsLIQoACSi0lNEOLytPP/emq6Vuqz0udx
 ah1ACXebiHe07A8Kvpt7orHlpM/dKH0/4g5/7h0E5RWrC9yg1WEOHPjd/MQ5amy0
 9YkhtqM5OQfNsVY0DcRbgRPr115xSi/L6No3Q6pMAVqzM7ZRk3iD039be7Sooqn8
 sl54gZB3AWGzgrFnJLjKCcQg4qg/wyYhZXEuV2JdzYeXCBK6RMcV0I2hP6vWP7Au
 dqlw5khvQOwx32qYNlXHU7g/ve5qY7hblIHbyqtKjQicIQ8LP18Ek1QWQcywiK4E
 hyYJ/3gYRjVqigyw32++cMSRLbLktiY38+J7NxujIj6J1aOYosCA5kIxTSa11tLG
 VGeXny5CS5l0zrl3irGBRI1Qi33T0hnbmf99v+MndFhRfsYAF8tKwuJyI+d+rJvj
 S8grDzsmlzwe1INXEbnMEg+SsHOPe5On0bzYIYX9Oi0BSsZf1i4u7SdDb9tu2Tiw
 WSJyYBNGCsl7wFSoLmY75j1OvWY/iXYPKqZ8bt9STQbO9vL+VHksFzcnnkvzrBG6
 Zs1uD17jwQ==
 =JQlF
 -----END PGP SIGNATURE-----

Merge tag 'block-6.1-2022-11-05' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - Fixes for the ublk driver (Ming)

 - Fixes for error handling memory leaks (Chen Jun, Chen Zhongjin)

 - Explicitly clear the last request in a chain when the plug is
   flushed, as it may have already been issued (Al)

* tag 'block-6.1-2022-11-05' of git://git.kernel.dk/linux:
  block: blk_add_rq_to_plug(): clear stale 'last' after flush
  blk-mq: Fix kmemleak in blk_mq_init_allocated_queue
  block: Fix possible memory leak for rq_wb on add_disk failure
  ublk_drv: add ublk_queue_cmd() for cleanup
  ublk_drv: avoid to touch io_uring cmd in blk_mq io path
  ublk_drv: comment on ublk_driver entry of Kconfig
  ublk_drv: return flag of UBLK_F_URING_CMD_COMP_IN_TASK in case of module
This commit is contained in:
Linus Torvalds 2022-11-05 09:02:28 -07:00
commit 4869f5750a
4 changed files with 77 additions and 50 deletions

View file

@ -1262,6 +1262,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
(!blk_queue_nomerges(rq->q) && (!blk_queue_nomerges(rq->q) &&
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
blk_mq_flush_plug_list(plug, false); blk_mq_flush_plug_list(plug, false);
last = NULL;
trace_block_plug(rq->q); trace_block_plug(rq->q);
} }
@ -4193,9 +4194,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
return 0; return 0;
err_hctxs: err_hctxs:
xa_destroy(&q->hctx_table); blk_mq_release(q);
q->nr_hw_queues = 0;
blk_mq_sysfs_deinit(q);
err_poll: err_poll:
blk_stat_free_callback(q->poll_cb); blk_stat_free_callback(q->poll_cb);
q->poll_cb = NULL; q->poll_cb = NULL;

View file

@ -527,6 +527,7 @@ out_unregister_bdi:
bdi_unregister(disk->bdi); bdi_unregister(disk->bdi);
out_unregister_queue: out_unregister_queue:
blk_unregister_queue(disk); blk_unregister_queue(disk);
rq_qos_exit(disk->queue);
out_put_slave_dir: out_put_slave_dir:
kobject_put(disk->slave_dir); kobject_put(disk->slave_dir);
out_put_holder_dir: out_put_holder_dir:

View file

@ -408,6 +408,12 @@ config BLK_DEV_UBLK
definition isn't finalized yet, and might change according to future definition isn't finalized yet, and might change according to future
requirement, so mark is as experimental now. requirement, so mark is as experimental now.
Say Y if you want to get better performance because task_work_add()
can be used in IO path for replacing io_uring cmd, which will become
shared between IO tasks and ubq daemon, meantime task_work_add() can
can handle batch more effectively, but task_work_add() isn't exported
for module, so ublk has to be built to kernel.
source "drivers/block/rnbd/Kconfig" source "drivers/block/rnbd/Kconfig"
endif # BLK_DEV endif # BLK_DEV

View file

@ -57,11 +57,14 @@
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD) #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
struct ublk_rq_data { struct ublk_rq_data {
struct callback_head work; union {
struct callback_head work;
struct llist_node node;
};
}; };
struct ublk_uring_cmd_pdu { struct ublk_uring_cmd_pdu {
struct request *req; struct ublk_queue *ubq;
}; };
/* /*
@ -119,6 +122,8 @@ struct ublk_queue {
struct task_struct *ubq_daemon; struct task_struct *ubq_daemon;
char *io_cmd_buf; char *io_cmd_buf;
struct llist_head io_cmds;
unsigned long io_addr; /* mapped vm address */ unsigned long io_addr; /* mapped vm address */
unsigned int max_io_sz; unsigned int max_io_sz;
bool force_abort; bool force_abort;
@ -764,8 +769,12 @@ static inline void __ublk_rq_task_work(struct request *req)
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
{ {
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
struct ublk_queue *ubq = pdu->ubq;
struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
struct ublk_rq_data *data;
__ublk_rq_task_work(pdu->req); llist_for_each_entry(data, io_cmds, node)
__ublk_rq_task_work(blk_mq_rq_from_pdu(data));
} }
static void ublk_rq_task_work_fn(struct callback_head *work) static void ublk_rq_task_work_fn(struct callback_head *work)
@ -777,6 +786,54 @@ static void ublk_rq_task_work_fn(struct callback_head *work)
__ublk_rq_task_work(req); __ublk_rq_task_work(req);
} }
static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq)
{
struct ublk_io *io = &ubq->ios[rq->tag];
/*
* If the check pass, we know that this is a re-issued request aborted
* previously in monitor_work because the ubq_daemon(cmd's task) is
* PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
* because this ioucmd's io_uring context may be freed now if no inflight
* ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
*
* Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
* the tag). Then the request is re-started(allocating the tag) and we are here.
* Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
* guarantees that here is a re-issued request aborted previously.
*/
if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) {
struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
struct ublk_rq_data *data;
llist_for_each_entry(data, io_cmds, node)
__ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data));
} else {
struct io_uring_cmd *cmd = io->cmd;
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
pdu->ubq = ubq;
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
}
}
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq,
bool last)
{
struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
if (ublk_can_use_task_work(ubq)) {
enum task_work_notify_mode notify_mode = last ?
TWA_SIGNAL_NO_IPI : TWA_NONE;
if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
__ublk_abort_rq(ubq, rq);
} else {
if (llist_add(&data->node, &ubq->io_cmds))
ublk_submit_cmd(ubq, rq);
}
}
static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd) const struct blk_mq_queue_data *bd)
{ {
@ -788,6 +845,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
res = ublk_setup_iod(ubq, rq); res = ublk_setup_iod(ubq, rq);
if (unlikely(res != BLK_STS_OK)) if (unlikely(res != BLK_STS_OK))
return BLK_STS_IOERR; return BLK_STS_IOERR;
/* With recovery feature enabled, force_abort is set in /* With recovery feature enabled, force_abort is set in
* ublk_stop_dev() before calling del_gendisk(). We have to * ublk_stop_dev() before calling del_gendisk(). We have to
* abort all requeued and new rqs here to let del_gendisk() * abort all requeued and new rqs here to let del_gendisk()
@ -803,41 +861,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(bd->rq); blk_mq_start_request(bd->rq);
if (unlikely(ubq_daemon_is_dying(ubq))) { if (unlikely(ubq_daemon_is_dying(ubq))) {
fail:
__ublk_abort_rq(ubq, rq); __ublk_abort_rq(ubq, rq);
return BLK_STS_OK; return BLK_STS_OK;
} }
if (ublk_can_use_task_work(ubq)) { ublk_queue_cmd(ubq, rq, bd->last);
struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
enum task_work_notify_mode notify_mode = bd->last ?
TWA_SIGNAL_NO_IPI : TWA_NONE;
if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
goto fail;
} else {
struct ublk_io *io = &ubq->ios[rq->tag];
struct io_uring_cmd *cmd = io->cmd;
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
/*
* If the check pass, we know that this is a re-issued request aborted
* previously in monitor_work because the ubq_daemon(cmd's task) is
* PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
* because this ioucmd's io_uring context may be freed now if no inflight
* ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
*
* Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
* the tag). Then the request is re-started(allocating the tag) and we are here.
* Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
* guarantees that here is a re-issued request aborted previously.
*/
if ((io->flags & UBLK_IO_FLAG_ABORTED))
goto fail;
pdu->req = rq;
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
}
return BLK_STS_OK; return BLK_STS_OK;
} }
@ -1164,22 +1192,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
} }
static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
int tag, struct io_uring_cmd *cmd) int tag)
{ {
struct ublk_queue *ubq = ublk_get_queue(ub, q_id); struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
if (ublk_can_use_task_work(ubq)) { ublk_queue_cmd(ubq, req, true);
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
/* should not fail since we call it just in ubq->ubq_daemon */
task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI);
} else {
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
pdu->req = req;
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
}
} }
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
@ -1267,7 +1285,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
io->addr = ub_cmd->addr; io->addr = ub_cmd->addr;
io->cmd = cmd; io->cmd = cmd;
io->flags |= UBLK_IO_FLAG_ACTIVE; io->flags |= UBLK_IO_FLAG_ACTIVE;
ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd); ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
break; break;
default: default:
goto out; goto out;
@ -1658,6 +1676,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
*/ */
ub->dev_info.flags &= UBLK_F_ALL; ub->dev_info.flags &= UBLK_F_ALL;
if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
/* We are not ready to support zero copy */ /* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY; ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;