From 5b5e20f421c0b6d437b3dec13e53674161998d56 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 17 Aug 2015 10:31:47 +0800 Subject: [PATCH 01/24] block: loop: set QUEUE_FLAG_NOMERGES for request queue of loop It doesn't make sense to enable merge because the I/O submitted to backing file is handled page by page. Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f9889b6bc02c..eee751032cad 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1595,6 +1595,12 @@ static int loop_add(struct loop_device **l, int i) } lo->lo_queue->queuedata = lo; + /* + * It doesn't make sense to enable merge because the I/O + * submitted to backing file is handled page by page. + */ + queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); + INIT_LIST_HEAD(&lo->write_cmd_head); INIT_WORK(&lo->write_work, loop_queue_write_work); From e03a3d7a94e2485b6e2fa3fb630b9b3a30b65718 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 17 Aug 2015 10:31:48 +0800 Subject: [PATCH 02/24] block: loop: use kthread_work The following patch will use dio/aio to submit IO to backing file, then it needn't to schedule IO concurrently from work, so use kthread_work for decreasing context switch cost a lot. For non-AIO case, single thread has been used for long long time, and it was just converted to work in v4.0, which has caused performance regression for fedora live booting already. In discussion[1], even though submitting I/O via work concurrently can improve random read IO throughput, meantime it might hurt sequential read IO performance, so better to restore to single thread behaviour. For the following AIO support, it is better to use multi hw-queue with per-hwq kthread than current work approach suppose there is so high performance requirement for loop. [1] http://marc.info/?t=143082678400002&r=1&w=2 Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 79 ++++++++++++++------------------------------ drivers/block/loop.h | 10 +++--- 2 files changed, 28 insertions(+), 61 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index eee751032cad..1875aadb31b0 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -688,6 +688,23 @@ static void loop_config_discard(struct loop_device *lo) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); } +static void loop_unprepare_queue(struct loop_device *lo) +{ + flush_kthread_worker(&lo->worker); + kthread_stop(lo->worker_task); +} + +static int loop_prepare_queue(struct loop_device *lo) +{ + init_kthread_worker(&lo->worker); + lo->worker_task = kthread_run(kthread_worker_fn, + &lo->worker, "loop%d", lo->lo_number); + if (IS_ERR(lo->worker_task)) + return -ENOMEM; + set_user_nice(lo->worker_task, MIN_NICE); + return 0; +} + static int loop_set_fd(struct loop_device *lo, fmode_t mode, struct block_device *bdev, unsigned int arg) { @@ -745,11 +762,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); if ((loff_t)(sector_t)size != size) goto out_putf; - error = -ENOMEM; - lo->wq = alloc_workqueue("kloopd%d", - WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16, - lo->lo_number); - if (!lo->wq) + error = loop_prepare_queue(lo); + if (error) goto out_putf; error = 0; @@ -903,8 +917,7 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; - destroy_workqueue(lo->wq); - lo->wq = NULL; + loop_unprepare_queue(lo); mutex_unlock(&lo->lo_ctl_mutex); /* * Need not hold lo_ctl_mutex to fput backing file. @@ -1461,23 +1474,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (lo->lo_state != Lo_bound) return -EIO; - if (cmd->rq->cmd_flags & REQ_WRITE) { - struct loop_device *lo = cmd->rq->q->queuedata; - bool need_sched = true; - - spin_lock_irq(&lo->lo_lock); - if (lo->write_started) - need_sched = false; - else - lo->write_started = true; - list_add_tail(&cmd->list, &lo->write_cmd_head); - spin_unlock_irq(&lo->lo_lock); - - if (need_sched) - queue_work(lo->wq, &lo->write_work); - } else { - queue_work(lo->wq, &cmd->read_work); - } + queue_kthread_work(&lo->worker, &cmd->work); return BLK_MQ_RQ_QUEUE_OK; } @@ -1499,35 +1496,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd) blk_mq_complete_request(cmd->rq); } -static void loop_queue_write_work(struct work_struct *work) -{ - struct loop_device *lo = - container_of(work, struct loop_device, write_work); - LIST_HEAD(cmd_list); - - spin_lock_irq(&lo->lo_lock); - repeat: - list_splice_init(&lo->write_cmd_head, &cmd_list); - spin_unlock_irq(&lo->lo_lock); - - while (!list_empty(&cmd_list)) { - struct loop_cmd *cmd = list_first_entry(&cmd_list, - struct loop_cmd, list); - list_del_init(&cmd->list); - loop_handle_cmd(cmd); - } - - spin_lock_irq(&lo->lo_lock); - if (!list_empty(&lo->write_cmd_head)) - goto repeat; - lo->write_started = false; - spin_unlock_irq(&lo->lo_lock); -} - -static void loop_queue_read_work(struct work_struct *work) +static void loop_queue_work(struct kthread_work *work) { struct loop_cmd *cmd = - container_of(work, struct loop_cmd, read_work); + container_of(work, struct loop_cmd, work); loop_handle_cmd(cmd); } @@ -1539,7 +1511,7 @@ static int loop_init_request(void *data, struct request *rq, struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->rq = rq; - INIT_WORK(&cmd->read_work, loop_queue_read_work); + init_kthread_work(&cmd->work, loop_queue_work); return 0; } @@ -1601,9 +1573,6 @@ static int loop_add(struct loop_device **l, int i) */ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); - INIT_LIST_HEAD(&lo->write_cmd_head); - INIT_WORK(&lo->write_work, loop_queue_write_work); - disk = lo->lo_disk = alloc_disk(1 << part_shift); if (!disk) goto out_free_queue; diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 25e8997ed246..b6c7d21a453a 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include /* Possible states of device */ @@ -54,12 +54,10 @@ struct loop_device { gfp_t old_gfp_mask; spinlock_t lo_lock; - struct workqueue_struct *wq; - struct list_head write_cmd_head; - struct work_struct write_work; - bool write_started; int lo_state; struct mutex lo_ctl_mutex; + struct kthread_worker worker; + struct task_struct *worker_task; struct request_queue *lo_queue; struct blk_mq_tag_set tag_set; @@ -67,7 +65,7 @@ struct loop_device { }; struct loop_cmd { - struct work_struct read_work; + struct kthread_work work; struct request *rq; struct list_head list; }; From 2e5ab5f379f96a6207c45be40c357ebb1beb8ef3 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 17 Aug 2015 10:31:49 +0800 Subject: [PATCH 03/24] block: loop: prepare for supporing direct IO This patches provides one interface for enabling direct IO from user space: - userspace(such as losetup) can pass 'file' which is opened/fcntl as O_DIRECT Also __loop_update_dio() is introduced to check if direct I/O can be used on current loop setting. The last big change is to introduce LO_FLAGS_DIRECT_IO flag for userspace to know if direct IO is used to access backing file. Cc: linux-api@vger.kernel.org Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 77 +++++++++++++++++++++++++++++++++++++++ drivers/block/loop.h | 1 + include/uapi/linux/loop.h | 1 + 3 files changed, 79 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1875aadb31b0..06cad7edc0d3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -164,6 +164,62 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file) return get_size(lo->lo_offset, lo->lo_sizelimit, file); } +static void __loop_update_dio(struct loop_device *lo, bool dio) +{ + struct file *file = lo->lo_backing_file; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + unsigned short sb_bsize = 0; + unsigned dio_align = 0; + bool use_dio; + + if (inode->i_sb->s_bdev) { + sb_bsize = bdev_logical_block_size(inode->i_sb->s_bdev); + dio_align = sb_bsize - 1; + } + + /* + * We support direct I/O only if lo_offset is aligned with the + * logical I/O size of backing device, and the logical block + * size of loop is bigger than the backing device's and the loop + * needn't transform transfer. + * + * TODO: the above condition may be loosed in the future, and + * direct I/O may be switched runtime at that time because most + * of requests in sane appplications should be PAGE_SIZE algined + */ + if (dio) { + if (queue_logical_block_size(lo->lo_queue) >= sb_bsize && + !(lo->lo_offset & dio_align) && + mapping->a_ops->direct_IO && + !lo->transfer) + use_dio = true; + else + use_dio = false; + } else { + use_dio = false; + } + + if (lo->use_dio == use_dio) + return; + + /* flush dirty pages before changing direct IO */ + vfs_fsync(file, 0); + + /* + * The flag of LO_FLAGS_DIRECT_IO is handled similarly with + * LO_FLAGS_READ_ONLY, both are set from kernel, and losetup + * will get updated by ioctl(LOOP_GET_STATUS) + */ + blk_mq_freeze_queue(lo->lo_queue); + lo->use_dio = use_dio; + if (use_dio) + lo->lo_flags |= LO_FLAGS_DIRECT_IO; + else + lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; + blk_mq_unfreeze_queue(lo->lo_queue); +} + static int figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { @@ -421,6 +477,12 @@ struct switch_request { struct completion wait; }; +static inline void loop_update_dio(struct loop_device *lo) +{ + __loop_update_dio(lo, io_is_direct(lo->lo_backing_file) | + lo->use_dio); +} + /* * Do the actual switch; called from the BIO completion routine */ @@ -441,6 +503,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p) mapping->host->i_bdev->bd_block_size : PAGE_SIZE; lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); + loop_update_dio(lo); } /* @@ -627,11 +690,19 @@ static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) return sprintf(buf, "%s\n", partscan ? "1" : "0"); } +static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf) +{ + int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO); + + return sprintf(buf, "%s\n", dio ? "1" : "0"); +} + LOOP_ATTR_RO(backing_file); LOOP_ATTR_RO(offset); LOOP_ATTR_RO(sizelimit); LOOP_ATTR_RO(autoclear); LOOP_ATTR_RO(partscan); +LOOP_ATTR_RO(dio); static struct attribute *loop_attrs[] = { &loop_attr_backing_file.attr, @@ -639,6 +710,7 @@ static struct attribute *loop_attrs[] = { &loop_attr_sizelimit.attr, &loop_attr_autoclear.attr, &loop_attr_partscan.attr, + &loop_attr_dio.attr, NULL, }; @@ -770,6 +842,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); + lo->use_dio = false; lo->lo_blocksize = lo_blocksize; lo->lo_device = bdev; lo->lo_flags = lo_flags; @@ -783,6 +856,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) blk_queue_flush(lo->lo_queue, REQ_FLUSH); + loop_update_dio(lo); set_capacity(lo->lo_disk, size); bd_set_size(bdev, size << 9); loop_sysfs_init(lo); @@ -1001,6 +1075,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_key_owner = uid; } + /* update dio if lo_offset or transfer is changed */ + __loop_update_dio(lo, lo->use_dio); + return 0; } diff --git a/drivers/block/loop.h b/drivers/block/loop.h index b6c7d21a453a..d1de2217c09a 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -58,6 +58,7 @@ struct loop_device { struct mutex lo_ctl_mutex; struct kthread_worker worker; struct task_struct *worker_task; + bool use_dio; struct request_queue *lo_queue; struct blk_mq_tag_set tag_set; diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index e0cecd2eabdc..949851ce681f 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -21,6 +21,7 @@ enum { LO_FLAGS_READ_ONLY = 1, LO_FLAGS_AUTOCLEAR = 4, LO_FLAGS_PARTSCAN = 8, + LO_FLAGS_DIRECT_IO = 16, }; #include /* for __kernel_old_dev_t */ From ab1cb278bc7027663adbfb0b81404f8398437e11 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 17 Aug 2015 10:31:50 +0800 Subject: [PATCH 04/24] block: loop: introduce ioctl command of LOOP_SET_DIRECT_IO If loop block is mounted via 'mount -o loop', it isn't easy to pass file descriptor opened as O_DIRECT, so this patch introduces a new command to support direct IO for this case. Cc: linux-api@vger.kernel.org Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 19 +++++++++++++++++++ include/uapi/linux/loop.h | 1 + 2 files changed, 20 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 06cad7edc0d3..75db3b98ec2b 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1228,6 +1228,20 @@ static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); } +static int loop_set_dio(struct loop_device *lo, unsigned long arg) +{ + int error = -ENXIO; + if (lo->lo_state != Lo_bound) + goto out; + + __loop_update_dio(lo, !!arg); + if (lo->use_dio == !!arg) + return 0; + error = -EINVAL; + out: + return error; +} + static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1271,6 +1285,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) err = loop_set_capacity(lo, bdev); break; + case LOOP_SET_DIRECT_IO: + err = -EPERM; + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + err = loop_set_dio(lo, arg); + break; default: err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 949851ce681f..c8125ec1f4f2 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -87,6 +87,7 @@ struct loop_info64 { #define LOOP_GET_STATUS64 0x4C05 #define LOOP_CHANGE_FD 0x4C06 #define LOOP_SET_CAPACITY 0x4C07 +#define LOOP_SET_DIRECT_IO 0x4C08 /* /dev/loop-control interface */ #define LOOP_CTL_ADD 0x4C80 From bc07c10a3603a5ab3ef01ba42b3d41f9ac63d1b6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 17 Aug 2015 10:31:51 +0800 Subject: [PATCH 05/24] block: loop: support DIO & AIO There are at least 3 advantages to use direct I/O and AIO on read/write loop's backing file: 1) double cache can be avoided, then memory usage gets decreased a lot 2) not like user space direct I/O, there isn't cost of pinning pages 3) avoid context switch for obtaining good throughput - in buffered file read, random I/O top throughput is often obtained only if they are submitted concurrently from lots of tasks; but for sequential I/O, most of times they can be hit from page cache, so concurrent submissions often introduce unnecessary context switch and can't improve throughput much. There was such discussion[1] to use non-blocking I/O to improve the problem for application. - with direct I/O and AIO, concurrent submissions can be avoided and random read throughput can't be affected meantime xfstests(-g auto, ext4) is basically passed when running with direct I/O(aio), one exception is generic/232, but it failed in loop buffered I/O(4.2-rc6-next-20150814) too. Follows the fio test result for performance purpose: 4 jobs fio test inside ext4 file system over loop block 1) How to run - KVM: 4 VCPUs, 2G RAM - linux kernel: 4.2-rc6-next-20150814(base) with the patchset - the loop block is over one image on SSD. - linux psync, 4 jobs, size 1500M, ext4 over loop block - test result: IOPS from fio output 2) Throughput(IOPS) becomes a bit better with direct I/O(aio) ------------------------------------------------------------- test cases |randread |read |randwrite |write | ------------------------------------------------------------- base |8015 |113811 |67442 |106978 ------------------------------------------------------------- base+loop aio |8136 |125040 |67811 |111376 ------------------------------------------------------------- - somehow, it should be caused by more page cache avaiable for application or one extra page copy is avoided in case of direct I/O 3) context switch - context switch decreased by ~50% with loop direct I/O(aio) compared with loop buffered I/O(4.2-rc6-next-20150814) 4) memory usage from /proc/meminfo ------------------------------------------------------------- | Buffers | Cached ------------------------------------------------------------- base | > 760MB | ~950MB ------------------------------------------------------------- base+loop direct I/O(aio) | < 5MB | ~1.6GB ------------------------------------------------------------- - so there are much more page caches available for application with direct I/O [1] https://lwn.net/Articles/612483/ Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 98 ++++++++++++++++++++++++++++++++++++++++++-- drivers/block/loop.h | 2 + 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 75db3b98ec2b..23376084a5cb 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -445,6 +445,90 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq) return ret; } +static inline void handle_partial_read(struct loop_cmd *cmd, long bytes) +{ + if (bytes < 0 || (cmd->rq->cmd_flags & REQ_WRITE)) + return; + + if (unlikely(bytes < blk_rq_bytes(cmd->rq))) { + struct bio *bio = cmd->rq->bio; + + bio_advance(bio, bytes); + zero_fill_bio(bio); + } +} + +static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2) +{ + struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); + struct request *rq = cmd->rq; + + handle_partial_read(cmd, ret); + + if (ret > 0) + ret = 0; + else if (ret < 0) + ret = -EIO; + + rq->errors = ret; + blk_mq_complete_request(rq); +} + +static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, + loff_t pos, bool rw) +{ + struct iov_iter iter; + struct bio_vec *bvec; + struct bio *bio = cmd->rq->bio; + struct file *file = lo->lo_backing_file; + int ret; + + /* nomerge for loop request queue */ + WARN_ON(cmd->rq->bio != cmd->rq->biotail); + + bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); + iov_iter_bvec(&iter, ITER_BVEC | rw, bvec, + bio_segments(bio), blk_rq_bytes(cmd->rq)); + + cmd->iocb.ki_pos = pos; + cmd->iocb.ki_filp = file; + cmd->iocb.ki_complete = lo_rw_aio_complete; + cmd->iocb.ki_flags = IOCB_DIRECT; + + if (rw == WRITE) + ret = file->f_op->write_iter(&cmd->iocb, &iter); + else + ret = file->f_op->read_iter(&cmd->iocb, &iter); + + if (ret != -EIOCBQUEUED) + cmd->iocb.ki_complete(&cmd->iocb, ret, 0); + return 0; +} + + +static inline int lo_rw_simple(struct loop_device *lo, + struct request *rq, loff_t pos, bool rw) +{ + struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); + + if (cmd->use_aio) + return lo_rw_aio(lo, cmd, pos, rw); + + /* + * lo_write_simple and lo_read_simple should have been covered + * by io submit style function like lo_rw_aio(), one blocker + * is that lo_read_simple() need to call flush_dcache_page after + * the page is written from kernel, and it isn't easy to handle + * this in io submit style function which submits all segments + * of the req at one time. And direct read IO doesn't need to + * run flush_dcache_page(). + */ + if (rw == WRITE) + return lo_write_simple(lo, rq, pos); + else + return lo_read_simple(lo, rq, pos); +} + static int do_req_filebacked(struct loop_device *lo, struct request *rq) { loff_t pos; @@ -460,13 +544,13 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) else if (lo->transfer) ret = lo_write_transfer(lo, rq, pos); else - ret = lo_write_simple(lo, rq, pos); + ret = lo_rw_simple(lo, rq, pos, WRITE); } else { if (lo->transfer) ret = lo_read_transfer(lo, rq, pos); else - ret = lo_read_simple(lo, rq, pos); + ret = lo_rw_simple(lo, rq, pos, READ); } return ret; @@ -1570,6 +1654,12 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (lo->lo_state != Lo_bound) return -EIO; + if (lo->use_dio && !(cmd->rq->cmd_flags & (REQ_FLUSH | + REQ_DISCARD))) + cmd->use_aio = true; + else + cmd->use_aio = false; + queue_kthread_work(&lo->worker, &cmd->work); return BLK_MQ_RQ_QUEUE_OK; @@ -1589,7 +1679,9 @@ static void loop_handle_cmd(struct loop_cmd *cmd) failed: if (ret) cmd->rq->errors = -EIO; - blk_mq_complete_request(cmd->rq); + /* complete non-aio request */ + if (!cmd->use_aio || ret) + blk_mq_complete_request(cmd->rq); } static void loop_queue_work(struct kthread_work *work) diff --git a/drivers/block/loop.h b/drivers/block/loop.h index d1de2217c09a..fb2237c73e61 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -69,6 +69,8 @@ struct loop_cmd { struct kthread_work work; struct request *rq; struct list_head list; + bool use_aio; /* use AIO interface to handle I/O */ + struct kiocb iocb; }; /* Support for loadable transfer modules */ From f7e7868b4743f1cc5e59e6e0ddd3ccf9cfe53a1b Mon Sep 17 00:00:00 2001 From: Georgios Toptsidis Date: Fri, 25 Sep 2015 10:50:08 +0300 Subject: [PATCH 06/24] cdrom: Random writing support for BD-RE media Recently, i bought a blu-ray writer and noticed that while cdrecord worked perfectly, random writing didn't work on rewritable bd-re media. For example, dd if=/dev/zero of=/dev/sr0 bs=32768 count=2 gave the usual "read-only file system" message. After checking if the problem lies with my burner or firmware, i grep-ed the kernel source for EROFS. One of the results was in the cdrom driver. I tried to follow the function chain and ended in the cdrom_is_dvd_rw function where writing is permitted only for DVD-RAM and DVD+RW media. I added a new case label for 0x43 which is the profile name of BD-RE and now it works correctly for BD-RE too. Maybe there is a better way of implementing this, like a new function checking for blu-ray support and called from cdrom_open_write like it happens for mrw and dvdram media, but adding the case label worked. Thank you for your time. Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 5d28a45d2960..c206ccda899b 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -885,6 +885,7 @@ static int cdrom_is_dvd_rw(struct cdrom_device_info *cdi) switch (cdi->mmc3_profile) { case 0x12: /* DVD-RAM */ case 0x1A: /* DVD+RW */ + case 0x43: /* BD-RE */ return 0; default: return 1; From 188c3568f814fea965947ed24739987ba9c5a87e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 1 Oct 2015 17:14:10 -0600 Subject: [PATCH 07/24] NVMe: Reference count open namespaces Dynamic namespace attachment means the namespace may be removed at any time, so the namespace reference count can not be tied to the device reference count. This fixes a NULL dereference if an opened namespace is detached from a controller. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 29 ++++++++++++++++++++--------- include/linux/nvme.h | 1 + 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 6f04771f1019..b02ae3d759d7 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1943,6 +1943,18 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, #define nvme_compat_ioctl NULL #endif +static void nvme_free_ns(struct kref *kref) +{ + struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); + + spin_lock(&dev_list_lock); + ns->disk->private_data = NULL; + spin_unlock(&dev_list_lock); + + put_disk(ns->disk); + kfree(ns); +} + static int nvme_open(struct block_device *bdev, fmode_t mode) { int ret = 0; @@ -1952,21 +1964,25 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) ns = bdev->bd_disk->private_data; if (!ns) ret = -ENXIO; - else if (!kref_get_unless_zero(&ns->dev->kref)) + else if (!kref_get_unless_zero(&ns->kref)) ret = -ENXIO; + else if (!kref_get_unless_zero(&ns->dev->kref)) { + kref_put(&ns->kref, nvme_free_ns); + ret = -ENXIO; + } spin_unlock(&dev_list_lock); return ret; } static void nvme_free_dev(struct kref *kref); - static void nvme_release(struct gendisk *disk, fmode_t mode) { struct nvme_ns *ns = disk->private_data; struct nvme_dev *dev = ns->dev; kref_put(&dev->kref, nvme_free_dev); + kref_put(&ns->kref, nvme_free_ns); } static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo) @@ -2126,6 +2142,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) if (!disk) goto out_free_queue; + kref_init(&ns->kref); ns->ns_id = nsid; ns->disk = disk; ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ @@ -2360,13 +2377,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) static void nvme_free_namespace(struct nvme_ns *ns) { list_del(&ns->list); - - spin_lock(&dev_list_lock); - ns->disk->private_data = NULL; - spin_unlock(&dev_list_lock); - - put_disk(ns->disk); - kfree(ns); + kref_put(&ns->kref, nvme_free_ns); } static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b5812c395351..992b9c118678 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -135,6 +135,7 @@ struct nvme_ns { struct nvme_dev *dev; struct request_queue *queue; struct gendisk *disk; + struct kref kref; unsigned ns_id; int lba_shift; From 5105aa555c1c681ae281ea0d6108efd0a5d8a5e8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 2 Oct 2015 10:37:28 -0600 Subject: [PATCH 08/24] NVMe: Namespace removal simplifications This liberates namespace removal from the device, allowing gendisk references to be closed independent of the nvme controller reference count. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 42 ++++++++++----------------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index b02ae3d759d7..904b54fcbbcd 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1943,6 +1943,7 @@ static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, #define nvme_compat_ioctl NULL #endif +static void nvme_free_dev(struct kref *kref); static void nvme_free_ns(struct kref *kref) { struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref); @@ -1951,6 +1952,7 @@ static void nvme_free_ns(struct kref *kref) ns->disk->private_data = NULL; spin_unlock(&dev_list_lock); + kref_put(&ns->dev->kref, nvme_free_dev); put_disk(ns->disk); kfree(ns); } @@ -1966,22 +1968,14 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) ret = -ENXIO; else if (!kref_get_unless_zero(&ns->kref)) ret = -ENXIO; - else if (!kref_get_unless_zero(&ns->dev->kref)) { - kref_put(&ns->kref, nvme_free_ns); - ret = -ENXIO; - } spin_unlock(&dev_list_lock); return ret; } -static void nvme_free_dev(struct kref *kref); static void nvme_release(struct gendisk *disk, fmode_t mode) { struct nvme_ns *ns = disk->private_data; - struct nvme_dev *dev = ns->dev; - - kref_put(&dev->kref, nvme_free_dev); kref_put(&ns->kref, nvme_free_ns); } @@ -2179,6 +2173,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) if (nvme_revalidate_disk(ns->disk)) goto out_free_disk; + kref_get(&dev->kref); add_disk(ns->disk); if (ns->ms) { struct block_device *bd = bdget_disk(ns->disk, 0); @@ -2374,12 +2369,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) return result; } -static void nvme_free_namespace(struct nvme_ns *ns) -{ - list_del(&ns->list); - kref_put(&ns->kref, nvme_free_ns); -} - static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) { struct nvme_ns *nsa = container_of(a, struct nvme_ns, list); @@ -2421,7 +2410,9 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (kill || !blk_queue_dying(ns->queue)) { blk_mq_abort_requeue_list(ns->queue); blk_cleanup_queue(ns->queue); - } + } + list_del_init(&ns->list); + kref_put(&ns->kref, nvme_free_ns); } static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) @@ -2432,18 +2423,14 @@ static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) for (i = 1; i <= nn; i++) { ns = nvme_find_ns(dev, i); if (ns) { - if (revalidate_disk(ns->disk)) { + if (revalidate_disk(ns->disk)) nvme_ns_remove(ns); - nvme_free_namespace(ns); - } } else nvme_alloc_ns(dev, i); } list_for_each_entry_safe(ns, next, &dev->namespaces, list) { - if (ns->ns_id > nn) { + if (ns->ns_id > nn) nvme_ns_remove(ns); - nvme_free_namespace(ns); - } } list_sort(NULL, &dev->namespaces, ns_cmp); } @@ -2833,9 +2820,9 @@ static void nvme_dev_shutdown(struct nvme_dev *dev) static void nvme_dev_remove(struct nvme_dev *dev) { - struct nvme_ns *ns; + struct nvme_ns *ns, *next; - list_for_each_entry(ns, &dev->namespaces, list) + list_for_each_entry_safe(ns, next, &dev->namespaces, list) nvme_ns_remove(ns); } @@ -2891,21 +2878,12 @@ static void nvme_release_instance(struct nvme_dev *dev) spin_unlock(&dev_list_lock); } -static void nvme_free_namespaces(struct nvme_dev *dev) -{ - struct nvme_ns *ns, *next; - - list_for_each_entry_safe(ns, next, &dev->namespaces, list) - nvme_free_namespace(ns); -} - static void nvme_free_dev(struct kref *kref) { struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); put_device(dev->dev); put_device(dev->device); - nvme_free_namespaces(dev); nvme_release_instance(dev); if (dev->tagset.tags) blk_mq_free_tag_set(&dev->tagset); From 0a7385ad69f0f210c5cfbfd334b42423a6e05e5a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 2 Oct 2015 10:37:29 -0600 Subject: [PATCH 09/24] NVMe: Simplify device resume on io queue failure Releasing IO queues and disks was done in a work queue outside the controller resume context to delete namespaces if the controller failed after a resume from suspend. This is unnecessary since we can resume a device asynchronously. This patch makes resume use probe_work so it can directly remove namespaces if the device is manageable but not IO capable. Since the deleting disks was the only reason we had the convoluted "reset_workfn", this patch removes that unnecessary indirection. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 34 ++++++---------------------------- include/linux/nvme.h | 1 - 2 files changed, 6 insertions(+), 29 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 904b54fcbbcd..bf35846558c8 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1285,7 +1285,6 @@ static void nvme_abort_req(struct request *req) list_del_init(&dev->node); dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); - dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); out: spin_unlock_irqrestore(&dev_list_lock, flags); @@ -2089,7 +2088,6 @@ static int nvme_kthread(void *data) dev_warn(dev->dev, "Failed status: %x, reset controller\n", readl(&dev->bar->csts)); - dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); continue; } @@ -3025,14 +3023,6 @@ static int nvme_remove_dead_ctrl(void *arg) return 0; } -static void nvme_remove_disks(struct work_struct *ws) -{ - struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); - - nvme_free_queues(dev, 1); - nvme_dev_remove(dev); -} - static int nvme_dev_resume(struct nvme_dev *dev) { int ret; @@ -3041,10 +3031,9 @@ static int nvme_dev_resume(struct nvme_dev *dev) if (ret) return ret; if (dev->online_queues < 2) { - spin_lock(&dev_list_lock); - dev->reset_workfn = nvme_remove_disks; - queue_work(nvme_workq, &dev->reset_work); - spin_unlock(&dev_list_lock); + dev_warn(dev->dev, "IO queues not created\n"); + nvme_free_queues(dev, 1); + nvme_dev_remove(dev); } else { nvme_unfreeze_queues(dev); nvme_dev_add(dev); @@ -3091,12 +3080,6 @@ static void nvme_reset_failed_dev(struct work_struct *ws) nvme_dev_reset(dev); } -static void nvme_reset_workfn(struct work_struct *work) -{ - struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); - dev->reset_workfn(work); -} - static int nvme_reset(struct nvme_dev *dev) { int ret = -EBUSY; @@ -3106,7 +3089,6 @@ static int nvme_reset(struct nvme_dev *dev) spin_lock(&dev_list_lock); if (!work_pending(&dev->reset_work)) { - dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); ret = 0; } @@ -3159,8 +3141,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto free; INIT_LIST_HEAD(&dev->namespaces); - dev->reset_workfn = nvme_reset_failed_dev; - INIT_WORK(&dev->reset_work, nvme_reset_workfn); + INIT_WORK(&dev->reset_work, nvme_reset_failed_dev); dev->dev = get_device(&pdev->dev); pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); @@ -3223,7 +3204,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) if (prepare) nvme_dev_shutdown(dev); else - nvme_dev_resume(dev); + schedule_work(&dev->probe_work); } static void nvme_shutdown(struct pci_dev *pdev) @@ -3277,10 +3258,7 @@ static int nvme_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); - if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) { - ndev->reset_workfn = nvme_reset_failed_dev; - queue_work(nvme_workq, &ndev->reset_work); - } + schedule_work(&ndev->probe_work); return 0; } #endif diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 992b9c118678..7725b4c8b718 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -104,7 +104,6 @@ struct nvme_dev { struct list_head namespaces; struct kref kref; struct device *device; - work_func_t reset_workfn; struct work_struct reset_work; struct work_struct probe_work; struct work_struct scan_work; From 201cf1ecdfe5ea2774cbb21d4214c98ec8b418de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Oct 2015 18:48:36 +0200 Subject: [PATCH 10/24] nvme: delete dev from dev_list in nvme_reset Device resets need to delete the device from the device list before kicking of the reset an re-probe, otherwise we get the device added to the list twice. nvme_reset is the only side missing this deletion at the moment, and this patch adds it. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index bf35846558c8..be35b1d18854 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -3089,6 +3089,7 @@ static int nvme_reset(struct nvme_dev *dev) spin_lock(&dev_list_lock); if (!work_pending(&dev->reset_work)) { + list_del_init(&dev->node); queue_work(nvme_workq, &dev->reset_work); ret = 0; } From 77b50d9e15e113fdb871218aa0f2e3bed12ee731 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Oct 2015 17:41:18 +0200 Subject: [PATCH 11/24] nvme: merge nvme_dev_reset into nvme_reset_failed_dev And give the resulting function a more descriptive name. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index be35b1d18854..509ad4baccd8 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -84,7 +84,6 @@ static wait_queue_head_t nvme_kthread_wait; static struct class *nvme_class; -static void nvme_reset_failed_dev(struct work_struct *ws); static int nvme_reset(struct nvme_dev *dev); static int nvme_process_cq(struct nvme_queue *nvmeq); @@ -3053,8 +3052,9 @@ static void nvme_dead_ctrl(struct nvme_dev *dev) } } -static void nvme_dev_reset(struct nvme_dev *dev) +static void nvme_reset_work(struct work_struct *ws) { + struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); bool in_probe = work_busy(&dev->probe_work); nvme_dev_shutdown(dev); @@ -3074,12 +3074,6 @@ static void nvme_dev_reset(struct nvme_dev *dev) schedule_work(&dev->probe_work); } -static void nvme_reset_failed_dev(struct work_struct *ws) -{ - struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); - nvme_dev_reset(dev); -} - static int nvme_reset(struct nvme_dev *dev) { int ret = -EBUSY; @@ -3142,7 +3136,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto free; INIT_LIST_HEAD(&dev->namespaces); - INIT_WORK(&dev->reset_work, nvme_reset_failed_dev); + INIT_WORK(&dev->reset_work, nvme_reset_work); dev->dev = get_device(&pdev->dev); pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); From 90667892c5a78b47080359883a569a260e9e87ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Oct 2015 18:49:23 +0200 Subject: [PATCH 12/24] nvme: factor reset code into a common helper Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 48 +++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 509ad4baccd8..e03a95bd4ee4 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -84,6 +84,7 @@ static wait_queue_head_t nvme_kthread_wait; static struct class *nvme_class; +static int __nvme_reset(struct nvme_dev *dev); static int nvme_reset(struct nvme_dev *dev); static int nvme_process_cq(struct nvme_queue *nvmeq); @@ -1276,17 +1277,13 @@ static void nvme_abort_req(struct request *req) struct nvme_command cmd; if (!nvmeq->qid || cmd_rq->aborted) { - unsigned long flags; - - spin_lock_irqsave(&dev_list_lock, flags); - if (work_busy(&dev->reset_work)) - goto out; - list_del_init(&dev->node); - dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n", - req->tag, nvmeq->qid); - queue_work(nvme_workq, &dev->reset_work); - out: - spin_unlock_irqrestore(&dev_list_lock, flags); + spin_lock(&dev_list_lock); + if (!__nvme_reset(dev)) { + dev_warn(dev->dev, + "I/O %d QID %d timeout, reset controller\n", + req->tag, nvmeq->qid); + } + spin_unlock(&dev_list_lock); return; } @@ -2081,13 +2078,11 @@ static int nvme_kthread(void *data) if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) || csts & NVME_CSTS_CFS) { - if (work_busy(&dev->reset_work)) - continue; - list_del_init(&dev->node); - dev_warn(dev->dev, - "Failed status: %x, reset controller\n", - readl(&dev->bar->csts)); - queue_work(nvme_workq, &dev->reset_work); + if (!__nvme_reset(dev)) { + dev_warn(dev->dev, + "Failed status: %x, reset controller\n", + readl(&dev->bar->csts)); + } continue; } for (i = 0; i < dev->queue_count; i++) { @@ -3074,19 +3069,24 @@ static void nvme_reset_work(struct work_struct *ws) schedule_work(&dev->probe_work); } +static int __nvme_reset(struct nvme_dev *dev) +{ + if (work_pending(&dev->reset_work)) + return -EBUSY; + list_del_init(&dev->node); + queue_work(nvme_workq, &dev->reset_work); + return 0; +} + static int nvme_reset(struct nvme_dev *dev) { - int ret = -EBUSY; + int ret; if (!dev->admin_q || blk_queue_dying(dev->admin_q)) return -ENODEV; spin_lock(&dev_list_lock); - if (!work_pending(&dev->reset_work)) { - list_del_init(&dev->node); - queue_work(nvme_workq, &dev->reset_work); - ret = 0; - } + ret = __nvme_reset(dev); spin_unlock(&dev_list_lock); if (!ret) { From 3cf519b5a8d4d067e3de19736283c9414402d3a2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Oct 2015 09:49:23 +0200 Subject: [PATCH 13/24] nvme: merge nvme_dev_start, nvme_dev_resume and nvme_async_probe And give the resulting function a sensible name. This keeps all the error handling in a single place and will allow for further improvements to it. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 53 +++++++++++++++------------------------ 1 file changed, 20 insertions(+), 33 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e03a95bd4ee4..61cfff34c3b8 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -87,6 +87,7 @@ static struct class *nvme_class; static int __nvme_reset(struct nvme_dev *dev); static int nvme_reset(struct nvme_dev *dev); static int nvme_process_cq(struct nvme_queue *nvmeq); +static void nvme_dead_ctrl(struct nvme_dev *dev); struct async_cmd_info { struct kthread_work work; @@ -2949,14 +2950,15 @@ static const struct file_operations nvme_dev_fops = { .compat_ioctl = nvme_dev_ioctl, }; -static int nvme_dev_start(struct nvme_dev *dev) +static void nvme_probe_work(struct work_struct *work) { - int result; + struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work); bool start_thread = false; + int result; result = nvme_dev_map(dev); if (result) - return result; + goto out; result = nvme_configure_admin_queue(dev); if (result) @@ -2991,7 +2993,17 @@ static int nvme_dev_start(struct nvme_dev *dev) goto free_tags; dev->event_limit = 1; - return result; + + if (dev->online_queues < 2) { + dev_warn(dev->dev, "IO queues not created\n"); + nvme_free_queues(dev, 1); + nvme_dev_remove(dev); + } else { + nvme_unfreeze_queues(dev); + nvme_dev_add(dev); + } + + return; free_tags: nvme_dev_remove_admin(dev); @@ -3003,7 +3015,9 @@ static int nvme_dev_start(struct nvme_dev *dev) nvme_dev_list_remove(dev); unmap: nvme_dev_unmap(dev); - return result; + out: + if (!work_busy(&dev->reset_work)) + nvme_dead_ctrl(dev); } static int nvme_remove_dead_ctrl(void *arg) @@ -3017,24 +3031,6 @@ static int nvme_remove_dead_ctrl(void *arg) return 0; } -static int nvme_dev_resume(struct nvme_dev *dev) -{ - int ret; - - ret = nvme_dev_start(dev); - if (ret) - return ret; - if (dev->online_queues < 2) { - dev_warn(dev->dev, "IO queues not created\n"); - nvme_free_queues(dev, 1); - nvme_dev_remove(dev); - } else { - nvme_unfreeze_queues(dev); - nvme_dev_add(dev); - } - return 0; -} - static void nvme_dead_ctrl(struct nvme_dev *dev) { dev_warn(dev->dev, "Device failed to resume\n"); @@ -3113,7 +3109,6 @@ static ssize_t nvme_sysfs_reset(struct device *dev, } static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset); -static void nvme_async_probe(struct work_struct *work); static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int node, result = -ENOMEM; @@ -3164,7 +3159,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_LIST_HEAD(&dev->node); INIT_WORK(&dev->scan_work, nvme_dev_scan); - INIT_WORK(&dev->probe_work, nvme_async_probe); + INIT_WORK(&dev->probe_work, nvme_probe_work); schedule_work(&dev->probe_work); return 0; @@ -3184,14 +3179,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) return result; } -static void nvme_async_probe(struct work_struct *work) -{ - struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work); - - if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work)) - nvme_dead_ctrl(dev); -} - static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) { struct nvme_dev *dev = pci_get_drvdata(pdev); From 2659e57b906562bb020fb093b0c1b670b9700314 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Oct 2015 18:51:31 +0200 Subject: [PATCH 14/24] nvme: properly handle partially initialized queues in nvme_create_io_queues This avoids having to clean up later in a seemingly unrelated place. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 61cfff34c3b8..01a6d1b2d7e5 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -2189,6 +2189,13 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) kfree(ns); } +/* + * Create I/O queues. Failing to create an I/O queue is not an issue, + * we can continue with less than the desired amount of queues, and + * even a controller without I/O queues an still be used to issue + * admin commands. This might be useful to upgrade a buggy firmware + * for example. + */ static void nvme_create_io_queues(struct nvme_dev *dev) { unsigned i; @@ -2198,8 +2205,10 @@ static void nvme_create_io_queues(struct nvme_dev *dev) break; for (i = dev->online_queues; i <= dev->queue_count - 1; i++) - if (nvme_create_queue(dev->queues[i], i)) + if (nvme_create_queue(dev->queues[i], i)) { + nvme_free_queues(dev, i); break; + } } static int set_queue_count(struct nvme_dev *dev, int count) @@ -2994,9 +3003,12 @@ static void nvme_probe_work(struct work_struct *work) dev->event_limit = 1; + /* + * Keep the controller around but remove all namespaces if we don't have + * any working I/O queue. + */ if (dev->online_queues < 2) { dev_warn(dev->dev, "IO queues not created\n"); - nvme_free_queues(dev, 1); nvme_dev_remove(dev); } else { nvme_unfreeze_queues(dev); From f11bb3e244c4b14e2d0a3b9d7e41895752997170 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Oct 2015 15:46:41 +0200 Subject: [PATCH 15/24] nvme: add a local nvme.h header Add a new drivers/block/nvme.h which contains all the driver internal interface. Signed-off-by: Christoph Hellwig Acked-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 3 +- drivers/block/nvme-scsi.c | 2 +- drivers/block/nvme.h | 133 ++++++++++++++++++++++++++++++++++++++ include/linux/nvme.h | 114 -------------------------------- 4 files changed, 136 insertions(+), 116 deletions(-) create mode 100644 drivers/block/nvme.h diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 01a6d1b2d7e5..a20f66a44b96 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -12,7 +12,6 @@ * more details. */ -#include #include #include #include @@ -43,6 +42,8 @@ #include #include +#include "nvme.h" + #define NVME_MINORS (1U << MINORBITS) #define NVME_Q_DEPTH 1024 #define NVME_AQ_DEPTH 256 diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index e5a63f06fb0f..c3d8d3887a31 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c @@ -17,7 +17,6 @@ * each command is translated. */ -#include #include #include #include @@ -45,6 +44,7 @@ #include #include +#include "nvme.h" static int sg_version_num = 30534; /* 2 digits for each component */ diff --git a/drivers/block/nvme.h b/drivers/block/nvme.h new file mode 100644 index 000000000000..c1f41bf3c0f2 --- /dev/null +++ b/drivers/block/nvme.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _NVME_H +#define _NVME_H + +#include +#include +#include +#include + +extern unsigned char nvme_io_timeout; +#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) + +/* + * Represents an NVM Express device. Each nvme_dev is a PCI function. + */ +struct nvme_dev { + struct list_head node; + struct nvme_queue **queues; + struct request_queue *admin_q; + struct blk_mq_tag_set tagset; + struct blk_mq_tag_set admin_tagset; + u32 __iomem *dbs; + struct device *dev; + struct dma_pool *prp_page_pool; + struct dma_pool *prp_small_pool; + int instance; + unsigned queue_count; + unsigned online_queues; + unsigned max_qid; + int q_depth; + u32 db_stride; + u32 ctrl_config; + struct msix_entry *entry; + struct nvme_bar __iomem *bar; + struct list_head namespaces; + struct kref kref; + struct device *device; + struct work_struct reset_work; + struct work_struct probe_work; + struct work_struct scan_work; + char name[12]; + char serial[20]; + char model[40]; + char firmware_rev[8]; + bool subsystem; + u32 max_hw_sectors; + u32 stripe_size; + u32 page_size; + void __iomem *cmb; + dma_addr_t cmb_dma_addr; + u64 cmb_size; + u32 cmbsz; + u16 oncs; + u16 abort_limit; + u8 event_limit; + u8 vwc; +}; + +/* + * An NVM Express namespace is equivalent to a SCSI LUN + */ +struct nvme_ns { + struct list_head list; + + struct nvme_dev *dev; + struct request_queue *queue; + struct gendisk *disk; + struct kref kref; + + unsigned ns_id; + int lba_shift; + u16 ms; + bool ext; + u8 pi_type; + u64 mode_select_num_blocks; + u32 mode_select_block_len; +}; + +/* + * The nvme_iod describes the data in an I/O, including the list of PRP + * entries. You can't see it in this data structure because C doesn't let + * me express that. Use nvme_alloc_iod to ensure there's enough space + * allocated to store the PRP list. + */ +struct nvme_iod { + unsigned long private; /* For the use of the submitter of the I/O */ + int npages; /* In the PRP list. 0 means small pool in use */ + int offset; /* Of PRP list */ + int nents; /* Used in scatterlist */ + int length; /* Of data, in bytes */ + dma_addr_t first_dma; + struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */ + struct scatterlist sg[0]; +}; + +static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) +{ + return (sector >> (ns->lba_shift - 9)); +} + +int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buf, unsigned bufflen); +int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buffer, void __user *ubuffer, unsigned bufflen, + u32 *result, unsigned timeout); +int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id); +int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, + struct nvme_id_ns **id); +int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log); +int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, + dma_addr_t dma_addr, u32 *result); +int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, + dma_addr_t dma_addr, u32 *result); + +struct sg_io_hdr; + +int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); +int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg); +int nvme_sg_get_version_num(int __user *ip); + +#endif /* _NVME_H */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7725b4c8b718..364cb9adbbbc 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -16,9 +16,6 @@ #define _LINUX_NVME_H #include -#include -#include -#include struct nvme_bar { __u64 cap; /* Controller Capabilities */ @@ -76,115 +73,4 @@ enum { NVME_CSTS_SHST_MASK = 3 << 2, }; -extern unsigned char nvme_io_timeout; -#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) - -/* - * Represents an NVM Express device. Each nvme_dev is a PCI function. - */ -struct nvme_dev { - struct list_head node; - struct nvme_queue **queues; - struct request_queue *admin_q; - struct blk_mq_tag_set tagset; - struct blk_mq_tag_set admin_tagset; - u32 __iomem *dbs; - struct device *dev; - struct dma_pool *prp_page_pool; - struct dma_pool *prp_small_pool; - int instance; - unsigned queue_count; - unsigned online_queues; - unsigned max_qid; - int q_depth; - u32 db_stride; - u32 ctrl_config; - struct msix_entry *entry; - struct nvme_bar __iomem *bar; - struct list_head namespaces; - struct kref kref; - struct device *device; - struct work_struct reset_work; - struct work_struct probe_work; - struct work_struct scan_work; - char name[12]; - char serial[20]; - char model[40]; - char firmware_rev[8]; - bool subsystem; - u32 max_hw_sectors; - u32 stripe_size; - u32 page_size; - void __iomem *cmb; - dma_addr_t cmb_dma_addr; - u64 cmb_size; - u32 cmbsz; - u16 oncs; - u16 abort_limit; - u8 event_limit; - u8 vwc; -}; - -/* - * An NVM Express namespace is equivalent to a SCSI LUN - */ -struct nvme_ns { - struct list_head list; - - struct nvme_dev *dev; - struct request_queue *queue; - struct gendisk *disk; - struct kref kref; - - unsigned ns_id; - int lba_shift; - u16 ms; - bool ext; - u8 pi_type; - u64 mode_select_num_blocks; - u32 mode_select_block_len; -}; - -/* - * The nvme_iod describes the data in an I/O, including the list of PRP - * entries. You can't see it in this data structure because C doesn't let - * me express that. Use nvme_alloc_iod to ensure there's enough space - * allocated to store the PRP list. - */ -struct nvme_iod { - unsigned long private; /* For the use of the submitter of the I/O */ - int npages; /* In the PRP list. 0 means small pool in use */ - int offset; /* Of PRP list */ - int nents; /* Used in scatterlist */ - int length; /* Of data, in bytes */ - dma_addr_t first_dma; - struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */ - struct scatterlist sg[0]; -}; - -static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) -{ - return (sector >> (ns->lba_shift - 9)); -} - -int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - void *buf, unsigned bufflen); -int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - void *buffer, void __user *ubuffer, unsigned bufflen, - u32 *result, unsigned timeout); -int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id); -int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, - struct nvme_id_ns **id); -int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log); -int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, - dma_addr_t dma_addr, u32 *result); -int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, - dma_addr_t dma_addr, u32 *result); - -struct sg_io_hdr; - -int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); -int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg); -int nvme_sg_get_version_num(int __user *ip); - #endif /* _LINUX_NVME_H */ From 9d99a8dda154f38307d43d9c9aa504bd3703d596 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Oct 2015 15:25:49 +0200 Subject: [PATCH 16/24] nvme: move hardware structures out of the uapi version of nvme.h Currently all NVMe command and completion structures are exposed to userspace through the uapi version of nvme.h. They are not an ABI between the kernel and userspace, and will change in C-incompatible way for future versions of the spec. Move them to the kernel version of the file and rename the uapi header to nvme_ioctl.h so that userspace can easily detect the presence of the new clean header. Nvme-cli already carries a local copy of the header, so it won't be affected by this move. Signed-off-by: Christoph Hellwig Acked-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 1 + include/linux/nvme.h | 526 +++++++++++++++++++++++++++- include/uapi/linux/nvme.h | 589 -------------------------------- include/uapi/linux/nvme_ioctl.h | 65 ++++ 4 files changed, 590 insertions(+), 591 deletions(-) delete mode 100644 include/uapi/linux/nvme.h create mode 100644 include/uapi/linux/nvme_ioctl.h diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index a20f66a44b96..a526696d684d 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -42,6 +42,7 @@ #include #include +#include #include "nvme.h" #define NVME_MINORS (1U << MINORBITS) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 364cb9adbbbc..91a805437876 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -15,8 +15,6 @@ #ifndef _LINUX_NVME_H #define _LINUX_NVME_H -#include - struct nvme_bar { __u64 cap; /* Controller Capabilities */ __u32 vs; /* Version */ @@ -73,4 +71,528 @@ enum { NVME_CSTS_SHST_MASK = 3 << 2, }; +struct nvme_id_power_state { + __le16 max_power; /* centiwatts */ + __u8 rsvd2; + __u8 flags; + __le32 entry_lat; /* microseconds */ + __le32 exit_lat; /* microseconds */ + __u8 read_tput; + __u8 read_lat; + __u8 write_tput; + __u8 write_lat; + __le16 idle_power; + __u8 idle_scale; + __u8 rsvd19; + __le16 active_power; + __u8 active_work_scale; + __u8 rsvd23[9]; +}; + +enum { + NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0, + NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, +}; + +struct nvme_id_ctrl { + __le16 vid; + __le16 ssvid; + char sn[20]; + char mn[40]; + char fr[8]; + __u8 rab; + __u8 ieee[3]; + __u8 mic; + __u8 mdts; + __u16 cntlid; + __u32 ver; + __u8 rsvd84[172]; + __le16 oacs; + __u8 acl; + __u8 aerl; + __u8 frmw; + __u8 lpa; + __u8 elpe; + __u8 npss; + __u8 avscc; + __u8 apsta; + __le16 wctemp; + __le16 cctemp; + __u8 rsvd270[242]; + __u8 sqes; + __u8 cqes; + __u8 rsvd514[2]; + __le32 nn; + __le16 oncs; + __le16 fuses; + __u8 fna; + __u8 vwc; + __le16 awun; + __le16 awupf; + __u8 nvscc; + __u8 rsvd531; + __le16 acwu; + __u8 rsvd534[2]; + __le32 sgls; + __u8 rsvd540[1508]; + struct nvme_id_power_state psd[32]; + __u8 vs[1024]; +}; + +enum { + NVME_CTRL_ONCS_COMPARE = 1 << 0, + NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, + NVME_CTRL_ONCS_DSM = 1 << 2, + NVME_CTRL_VWC_PRESENT = 1 << 0, +}; + +struct nvme_lbaf { + __le16 ms; + __u8 ds; + __u8 rp; +}; + +struct nvme_id_ns { + __le64 nsze; + __le64 ncap; + __le64 nuse; + __u8 nsfeat; + __u8 nlbaf; + __u8 flbas; + __u8 mc; + __u8 dpc; + __u8 dps; + __u8 nmic; + __u8 rescap; + __u8 fpi; + __u8 rsvd33; + __le16 nawun; + __le16 nawupf; + __le16 nacwu; + __le16 nabsn; + __le16 nabo; + __le16 nabspf; + __u16 rsvd46; + __le64 nvmcap[2]; + __u8 rsvd64[40]; + __u8 nguid[16]; + __u8 eui64[8]; + struct nvme_lbaf lbaf[16]; + __u8 rsvd192[192]; + __u8 vs[3712]; +}; + +enum { + NVME_NS_FEAT_THIN = 1 << 0, + NVME_NS_FLBAS_LBA_MASK = 0xf, + NVME_NS_FLBAS_META_EXT = 0x10, + NVME_LBAF_RP_BEST = 0, + NVME_LBAF_RP_BETTER = 1, + NVME_LBAF_RP_GOOD = 2, + NVME_LBAF_RP_DEGRADED = 3, + NVME_NS_DPC_PI_LAST = 1 << 4, + NVME_NS_DPC_PI_FIRST = 1 << 3, + NVME_NS_DPC_PI_TYPE3 = 1 << 2, + NVME_NS_DPC_PI_TYPE2 = 1 << 1, + NVME_NS_DPC_PI_TYPE1 = 1 << 0, + NVME_NS_DPS_PI_FIRST = 1 << 3, + NVME_NS_DPS_PI_MASK = 0x7, + NVME_NS_DPS_PI_TYPE1 = 1, + NVME_NS_DPS_PI_TYPE2 = 2, + NVME_NS_DPS_PI_TYPE3 = 3, +}; + +struct nvme_smart_log { + __u8 critical_warning; + __u8 temperature[2]; + __u8 avail_spare; + __u8 spare_thresh; + __u8 percent_used; + __u8 rsvd6[26]; + __u8 data_units_read[16]; + __u8 data_units_written[16]; + __u8 host_reads[16]; + __u8 host_writes[16]; + __u8 ctrl_busy_time[16]; + __u8 power_cycles[16]; + __u8 power_on_hours[16]; + __u8 unsafe_shutdowns[16]; + __u8 media_errors[16]; + __u8 num_err_log_entries[16]; + __le32 warning_temp_time; + __le32 critical_comp_time; + __le16 temp_sensor[8]; + __u8 rsvd216[296]; +}; + +enum { + NVME_SMART_CRIT_SPARE = 1 << 0, + NVME_SMART_CRIT_TEMPERATURE = 1 << 1, + NVME_SMART_CRIT_RELIABILITY = 1 << 2, + NVME_SMART_CRIT_MEDIA = 1 << 3, + NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, +}; + +enum { + NVME_AER_NOTICE_NS_CHANGED = 0x0002, +}; + +struct nvme_lba_range_type { + __u8 type; + __u8 attributes; + __u8 rsvd2[14]; + __u64 slba; + __u64 nlb; + __u8 guid[16]; + __u8 rsvd48[16]; +}; + +enum { + NVME_LBART_TYPE_FS = 0x01, + NVME_LBART_TYPE_RAID = 0x02, + NVME_LBART_TYPE_CACHE = 0x03, + NVME_LBART_TYPE_SWAP = 0x04, + + NVME_LBART_ATTRIB_TEMP = 1 << 0, + NVME_LBART_ATTRIB_HIDE = 1 << 1, +}; + +struct nvme_reservation_status { + __le32 gen; + __u8 rtype; + __u8 regctl[2]; + __u8 resv5[2]; + __u8 ptpls; + __u8 resv10[13]; + struct { + __le16 cntlid; + __u8 rcsts; + __u8 resv3[5]; + __le64 hostid; + __le64 rkey; + } regctl_ds[]; +}; + +/* I/O commands */ + +enum nvme_opcode { + nvme_cmd_flush = 0x00, + nvme_cmd_write = 0x01, + nvme_cmd_read = 0x02, + nvme_cmd_write_uncor = 0x04, + nvme_cmd_compare = 0x05, + nvme_cmd_write_zeroes = 0x08, + nvme_cmd_dsm = 0x09, + nvme_cmd_resv_register = 0x0d, + nvme_cmd_resv_report = 0x0e, + nvme_cmd_resv_acquire = 0x11, + nvme_cmd_resv_release = 0x15, +}; + +struct nvme_common_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le32 cdw2[2]; + __le64 metadata; + __le64 prp1; + __le64 prp2; + __le32 cdw10[6]; +}; + +struct nvme_rw_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2; + __le64 metadata; + __le64 prp1; + __le64 prp2; + __le64 slba; + __le16 length; + __le16 control; + __le32 dsmgmt; + __le32 reftag; + __le16 apptag; + __le16 appmask; +}; + +enum { + NVME_RW_LR = 1 << 15, + NVME_RW_FUA = 1 << 14, + NVME_RW_DSM_FREQ_UNSPEC = 0, + NVME_RW_DSM_FREQ_TYPICAL = 1, + NVME_RW_DSM_FREQ_RARE = 2, + NVME_RW_DSM_FREQ_READS = 3, + NVME_RW_DSM_FREQ_WRITES = 4, + NVME_RW_DSM_FREQ_RW = 5, + NVME_RW_DSM_FREQ_ONCE = 6, + NVME_RW_DSM_FREQ_PREFETCH = 7, + NVME_RW_DSM_FREQ_TEMP = 8, + NVME_RW_DSM_LATENCY_NONE = 0 << 4, + NVME_RW_DSM_LATENCY_IDLE = 1 << 4, + NVME_RW_DSM_LATENCY_NORM = 2 << 4, + NVME_RW_DSM_LATENCY_LOW = 3 << 4, + NVME_RW_DSM_SEQ_REQ = 1 << 6, + NVME_RW_DSM_COMPRESSED = 1 << 7, + NVME_RW_PRINFO_PRCHK_REF = 1 << 10, + NVME_RW_PRINFO_PRCHK_APP = 1 << 11, + NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, + NVME_RW_PRINFO_PRACT = 1 << 13, +}; + +struct nvme_dsm_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 nr; + __le32 attributes; + __u32 rsvd12[4]; +}; + +enum { + NVME_DSMGMT_IDR = 1 << 0, + NVME_DSMGMT_IDW = 1 << 1, + NVME_DSMGMT_AD = 1 << 2, +}; + +struct nvme_dsm_range { + __le32 cattr; + __le32 nlb; + __le64 slba; +}; + +/* Admin commands */ + +enum nvme_admin_opcode { + nvme_admin_delete_sq = 0x00, + nvme_admin_create_sq = 0x01, + nvme_admin_get_log_page = 0x02, + nvme_admin_delete_cq = 0x04, + nvme_admin_create_cq = 0x05, + nvme_admin_identify = 0x06, + nvme_admin_abort_cmd = 0x08, + nvme_admin_set_features = 0x09, + nvme_admin_get_features = 0x0a, + nvme_admin_async_event = 0x0c, + nvme_admin_activate_fw = 0x10, + nvme_admin_download_fw = 0x11, + nvme_admin_format_nvm = 0x80, + nvme_admin_security_send = 0x81, + nvme_admin_security_recv = 0x82, +}; + +enum { + NVME_QUEUE_PHYS_CONTIG = (1 << 0), + NVME_CQ_IRQ_ENABLED = (1 << 1), + NVME_SQ_PRIO_URGENT = (0 << 1), + NVME_SQ_PRIO_HIGH = (1 << 1), + NVME_SQ_PRIO_MEDIUM = (2 << 1), + NVME_SQ_PRIO_LOW = (3 << 1), + NVME_FEAT_ARBITRATION = 0x01, + NVME_FEAT_POWER_MGMT = 0x02, + NVME_FEAT_LBA_RANGE = 0x03, + NVME_FEAT_TEMP_THRESH = 0x04, + NVME_FEAT_ERR_RECOVERY = 0x05, + NVME_FEAT_VOLATILE_WC = 0x06, + NVME_FEAT_NUM_QUEUES = 0x07, + NVME_FEAT_IRQ_COALESCE = 0x08, + NVME_FEAT_IRQ_CONFIG = 0x09, + NVME_FEAT_WRITE_ATOMIC = 0x0a, + NVME_FEAT_ASYNC_EVENT = 0x0b, + NVME_FEAT_AUTO_PST = 0x0c, + NVME_FEAT_SW_PROGRESS = 0x80, + NVME_FEAT_HOST_ID = 0x81, + NVME_FEAT_RESV_MASK = 0x82, + NVME_FEAT_RESV_PERSIST = 0x83, + NVME_LOG_ERROR = 0x01, + NVME_LOG_SMART = 0x02, + NVME_LOG_FW_SLOT = 0x03, + NVME_LOG_RESERVATION = 0x80, + NVME_FWACT_REPL = (0 << 3), + NVME_FWACT_REPL_ACTV = (1 << 3), + NVME_FWACT_ACTV = (2 << 3), +}; + +struct nvme_identify { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 cns; + __u32 rsvd11[5]; +}; + +struct nvme_features { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 fid; + __le32 dword11; + __u32 rsvd12[4]; +}; + +struct nvme_create_cq { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __u64 rsvd8; + __le16 cqid; + __le16 qsize; + __le16 cq_flags; + __le16 irq_vector; + __u32 rsvd12[4]; +}; + +struct nvme_create_sq { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __u64 rsvd8; + __le16 sqid; + __le16 qsize; + __le16 sq_flags; + __le16 cqid; + __u32 rsvd12[4]; +}; + +struct nvme_delete_queue { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[9]; + __le16 qid; + __u16 rsvd10; + __u32 rsvd11[5]; +}; + +struct nvme_abort_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[9]; + __le16 sqid; + __u16 cid; + __u32 rsvd11[5]; +}; + +struct nvme_download_firmware { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __le64 prp2; + __le32 numd; + __le32 offset; + __u32 rsvd12[4]; +}; + +struct nvme_format_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[4]; + __le32 cdw10; + __u32 rsvd11[5]; +}; + +struct nvme_command { + union { + struct nvme_common_command common; + struct nvme_rw_command rw; + struct nvme_identify identify; + struct nvme_features features; + struct nvme_create_cq create_cq; + struct nvme_create_sq create_sq; + struct nvme_delete_queue delete_queue; + struct nvme_download_firmware dlfw; + struct nvme_format_cmd format; + struct nvme_dsm_cmd dsm; + struct nvme_abort_cmd abort; + }; +}; + +enum { + NVME_SC_SUCCESS = 0x0, + NVME_SC_INVALID_OPCODE = 0x1, + NVME_SC_INVALID_FIELD = 0x2, + NVME_SC_CMDID_CONFLICT = 0x3, + NVME_SC_DATA_XFER_ERROR = 0x4, + NVME_SC_POWER_LOSS = 0x5, + NVME_SC_INTERNAL = 0x6, + NVME_SC_ABORT_REQ = 0x7, + NVME_SC_ABORT_QUEUE = 0x8, + NVME_SC_FUSED_FAIL = 0x9, + NVME_SC_FUSED_MISSING = 0xa, + NVME_SC_INVALID_NS = 0xb, + NVME_SC_CMD_SEQ_ERROR = 0xc, + NVME_SC_SGL_INVALID_LAST = 0xd, + NVME_SC_SGL_INVALID_COUNT = 0xe, + NVME_SC_SGL_INVALID_DATA = 0xf, + NVME_SC_SGL_INVALID_METADATA = 0x10, + NVME_SC_SGL_INVALID_TYPE = 0x11, + NVME_SC_LBA_RANGE = 0x80, + NVME_SC_CAP_EXCEEDED = 0x81, + NVME_SC_NS_NOT_READY = 0x82, + NVME_SC_RESERVATION_CONFLICT = 0x83, + NVME_SC_CQ_INVALID = 0x100, + NVME_SC_QID_INVALID = 0x101, + NVME_SC_QUEUE_SIZE = 0x102, + NVME_SC_ABORT_LIMIT = 0x103, + NVME_SC_ABORT_MISSING = 0x104, + NVME_SC_ASYNC_LIMIT = 0x105, + NVME_SC_FIRMWARE_SLOT = 0x106, + NVME_SC_FIRMWARE_IMAGE = 0x107, + NVME_SC_INVALID_VECTOR = 0x108, + NVME_SC_INVALID_LOG_PAGE = 0x109, + NVME_SC_INVALID_FORMAT = 0x10a, + NVME_SC_FIRMWARE_NEEDS_RESET = 0x10b, + NVME_SC_INVALID_QUEUE = 0x10c, + NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d, + NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, + NVME_SC_FEATURE_NOT_PER_NS = 0x10f, + NVME_SC_FW_NEEDS_RESET_SUBSYS = 0x110, + NVME_SC_BAD_ATTRIBUTES = 0x180, + NVME_SC_INVALID_PI = 0x181, + NVME_SC_READ_ONLY = 0x182, + NVME_SC_WRITE_FAULT = 0x280, + NVME_SC_READ_ERROR = 0x281, + NVME_SC_GUARD_CHECK = 0x282, + NVME_SC_APPTAG_CHECK = 0x283, + NVME_SC_REFTAG_CHECK = 0x284, + NVME_SC_COMPARE_FAILED = 0x285, + NVME_SC_ACCESS_DENIED = 0x286, + NVME_SC_DNR = 0x4000, +}; + +struct nvme_completion { + __le32 result; /* Used by admin commands to return data */ + __u32 rsvd; + __le16 sq_head; /* how much of this queue may be reclaimed */ + __le16 sq_id; /* submission queue that generated this entry */ + __u16 command_id; /* of the command which completed */ + __le16 status; /* did the command fail, and if so, why? */ +}; + +#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8)) + #endif /* _LINUX_NVME_H */ diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h deleted file mode 100644 index 8864194a4151..000000000000 --- a/include/uapi/linux/nvme.h +++ /dev/null @@ -1,589 +0,0 @@ -/* - * Definitions for the NVM Express interface - * Copyright (c) 2011-2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef _UAPI_LINUX_NVME_H -#define _UAPI_LINUX_NVME_H - -#include - -struct nvme_id_power_state { - __le16 max_power; /* centiwatts */ - __u8 rsvd2; - __u8 flags; - __le32 entry_lat; /* microseconds */ - __le32 exit_lat; /* microseconds */ - __u8 read_tput; - __u8 read_lat; - __u8 write_tput; - __u8 write_lat; - __le16 idle_power; - __u8 idle_scale; - __u8 rsvd19; - __le16 active_power; - __u8 active_work_scale; - __u8 rsvd23[9]; -}; - -enum { - NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0, - NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, -}; - -struct nvme_id_ctrl { - __le16 vid; - __le16 ssvid; - char sn[20]; - char mn[40]; - char fr[8]; - __u8 rab; - __u8 ieee[3]; - __u8 mic; - __u8 mdts; - __u16 cntlid; - __u32 ver; - __u8 rsvd84[172]; - __le16 oacs; - __u8 acl; - __u8 aerl; - __u8 frmw; - __u8 lpa; - __u8 elpe; - __u8 npss; - __u8 avscc; - __u8 apsta; - __le16 wctemp; - __le16 cctemp; - __u8 rsvd270[242]; - __u8 sqes; - __u8 cqes; - __u8 rsvd514[2]; - __le32 nn; - __le16 oncs; - __le16 fuses; - __u8 fna; - __u8 vwc; - __le16 awun; - __le16 awupf; - __u8 nvscc; - __u8 rsvd531; - __le16 acwu; - __u8 rsvd534[2]; - __le32 sgls; - __u8 rsvd540[1508]; - struct nvme_id_power_state psd[32]; - __u8 vs[1024]; -}; - -enum { - NVME_CTRL_ONCS_COMPARE = 1 << 0, - NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, - NVME_CTRL_ONCS_DSM = 1 << 2, - NVME_CTRL_VWC_PRESENT = 1 << 0, -}; - -struct nvme_lbaf { - __le16 ms; - __u8 ds; - __u8 rp; -}; - -struct nvme_id_ns { - __le64 nsze; - __le64 ncap; - __le64 nuse; - __u8 nsfeat; - __u8 nlbaf; - __u8 flbas; - __u8 mc; - __u8 dpc; - __u8 dps; - __u8 nmic; - __u8 rescap; - __u8 fpi; - __u8 rsvd33; - __le16 nawun; - __le16 nawupf; - __le16 nacwu; - __le16 nabsn; - __le16 nabo; - __le16 nabspf; - __u16 rsvd46; - __le64 nvmcap[2]; - __u8 rsvd64[40]; - __u8 nguid[16]; - __u8 eui64[8]; - struct nvme_lbaf lbaf[16]; - __u8 rsvd192[192]; - __u8 vs[3712]; -}; - -enum { - NVME_NS_FEAT_THIN = 1 << 0, - NVME_NS_FLBAS_LBA_MASK = 0xf, - NVME_NS_FLBAS_META_EXT = 0x10, - NVME_LBAF_RP_BEST = 0, - NVME_LBAF_RP_BETTER = 1, - NVME_LBAF_RP_GOOD = 2, - NVME_LBAF_RP_DEGRADED = 3, - NVME_NS_DPC_PI_LAST = 1 << 4, - NVME_NS_DPC_PI_FIRST = 1 << 3, - NVME_NS_DPC_PI_TYPE3 = 1 << 2, - NVME_NS_DPC_PI_TYPE2 = 1 << 1, - NVME_NS_DPC_PI_TYPE1 = 1 << 0, - NVME_NS_DPS_PI_FIRST = 1 << 3, - NVME_NS_DPS_PI_MASK = 0x7, - NVME_NS_DPS_PI_TYPE1 = 1, - NVME_NS_DPS_PI_TYPE2 = 2, - NVME_NS_DPS_PI_TYPE3 = 3, -}; - -struct nvme_smart_log { - __u8 critical_warning; - __u8 temperature[2]; - __u8 avail_spare; - __u8 spare_thresh; - __u8 percent_used; - __u8 rsvd6[26]; - __u8 data_units_read[16]; - __u8 data_units_written[16]; - __u8 host_reads[16]; - __u8 host_writes[16]; - __u8 ctrl_busy_time[16]; - __u8 power_cycles[16]; - __u8 power_on_hours[16]; - __u8 unsafe_shutdowns[16]; - __u8 media_errors[16]; - __u8 num_err_log_entries[16]; - __le32 warning_temp_time; - __le32 critical_comp_time; - __le16 temp_sensor[8]; - __u8 rsvd216[296]; -}; - -enum { - NVME_SMART_CRIT_SPARE = 1 << 0, - NVME_SMART_CRIT_TEMPERATURE = 1 << 1, - NVME_SMART_CRIT_RELIABILITY = 1 << 2, - NVME_SMART_CRIT_MEDIA = 1 << 3, - NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, -}; - -enum { - NVME_AER_NOTICE_NS_CHANGED = 0x0002, -}; - -struct nvme_lba_range_type { - __u8 type; - __u8 attributes; - __u8 rsvd2[14]; - __u64 slba; - __u64 nlb; - __u8 guid[16]; - __u8 rsvd48[16]; -}; - -enum { - NVME_LBART_TYPE_FS = 0x01, - NVME_LBART_TYPE_RAID = 0x02, - NVME_LBART_TYPE_CACHE = 0x03, - NVME_LBART_TYPE_SWAP = 0x04, - - NVME_LBART_ATTRIB_TEMP = 1 << 0, - NVME_LBART_ATTRIB_HIDE = 1 << 1, -}; - -struct nvme_reservation_status { - __le32 gen; - __u8 rtype; - __u8 regctl[2]; - __u8 resv5[2]; - __u8 ptpls; - __u8 resv10[13]; - struct { - __le16 cntlid; - __u8 rcsts; - __u8 resv3[5]; - __le64 hostid; - __le64 rkey; - } regctl_ds[]; -}; - -/* I/O commands */ - -enum nvme_opcode { - nvme_cmd_flush = 0x00, - nvme_cmd_write = 0x01, - nvme_cmd_read = 0x02, - nvme_cmd_write_uncor = 0x04, - nvme_cmd_compare = 0x05, - nvme_cmd_write_zeroes = 0x08, - nvme_cmd_dsm = 0x09, - nvme_cmd_resv_register = 0x0d, - nvme_cmd_resv_report = 0x0e, - nvme_cmd_resv_acquire = 0x11, - nvme_cmd_resv_release = 0x15, -}; - -struct nvme_common_command { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __le32 cdw2[2]; - __le64 metadata; - __le64 prp1; - __le64 prp2; - __le32 cdw10[6]; -}; - -struct nvme_rw_command { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2; - __le64 metadata; - __le64 prp1; - __le64 prp2; - __le64 slba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le32 reftag; - __le16 apptag; - __le16 appmask; -}; - -enum { - NVME_RW_LR = 1 << 15, - NVME_RW_FUA = 1 << 14, - NVME_RW_DSM_FREQ_UNSPEC = 0, - NVME_RW_DSM_FREQ_TYPICAL = 1, - NVME_RW_DSM_FREQ_RARE = 2, - NVME_RW_DSM_FREQ_READS = 3, - NVME_RW_DSM_FREQ_WRITES = 4, - NVME_RW_DSM_FREQ_RW = 5, - NVME_RW_DSM_FREQ_ONCE = 6, - NVME_RW_DSM_FREQ_PREFETCH = 7, - NVME_RW_DSM_FREQ_TEMP = 8, - NVME_RW_DSM_LATENCY_NONE = 0 << 4, - NVME_RW_DSM_LATENCY_IDLE = 1 << 4, - NVME_RW_DSM_LATENCY_NORM = 2 << 4, - NVME_RW_DSM_LATENCY_LOW = 3 << 4, - NVME_RW_DSM_SEQ_REQ = 1 << 6, - NVME_RW_DSM_COMPRESSED = 1 << 7, - NVME_RW_PRINFO_PRCHK_REF = 1 << 10, - NVME_RW_PRINFO_PRCHK_APP = 1 << 11, - NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, - NVME_RW_PRINFO_PRACT = 1 << 13, -}; - -struct nvme_dsm_cmd { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2[2]; - __le64 prp1; - __le64 prp2; - __le32 nr; - __le32 attributes; - __u32 rsvd12[4]; -}; - -enum { - NVME_DSMGMT_IDR = 1 << 0, - NVME_DSMGMT_IDW = 1 << 1, - NVME_DSMGMT_AD = 1 << 2, -}; - -struct nvme_dsm_range { - __le32 cattr; - __le32 nlb; - __le64 slba; -}; - -/* Admin commands */ - -enum nvme_admin_opcode { - nvme_admin_delete_sq = 0x00, - nvme_admin_create_sq = 0x01, - nvme_admin_get_log_page = 0x02, - nvme_admin_delete_cq = 0x04, - nvme_admin_create_cq = 0x05, - nvme_admin_identify = 0x06, - nvme_admin_abort_cmd = 0x08, - nvme_admin_set_features = 0x09, - nvme_admin_get_features = 0x0a, - nvme_admin_async_event = 0x0c, - nvme_admin_activate_fw = 0x10, - nvme_admin_download_fw = 0x11, - nvme_admin_format_nvm = 0x80, - nvme_admin_security_send = 0x81, - nvme_admin_security_recv = 0x82, -}; - -enum { - NVME_QUEUE_PHYS_CONTIG = (1 << 0), - NVME_CQ_IRQ_ENABLED = (1 << 1), - NVME_SQ_PRIO_URGENT = (0 << 1), - NVME_SQ_PRIO_HIGH = (1 << 1), - NVME_SQ_PRIO_MEDIUM = (2 << 1), - NVME_SQ_PRIO_LOW = (3 << 1), - NVME_FEAT_ARBITRATION = 0x01, - NVME_FEAT_POWER_MGMT = 0x02, - NVME_FEAT_LBA_RANGE = 0x03, - NVME_FEAT_TEMP_THRESH = 0x04, - NVME_FEAT_ERR_RECOVERY = 0x05, - NVME_FEAT_VOLATILE_WC = 0x06, - NVME_FEAT_NUM_QUEUES = 0x07, - NVME_FEAT_IRQ_COALESCE = 0x08, - NVME_FEAT_IRQ_CONFIG = 0x09, - NVME_FEAT_WRITE_ATOMIC = 0x0a, - NVME_FEAT_ASYNC_EVENT = 0x0b, - NVME_FEAT_AUTO_PST = 0x0c, - NVME_FEAT_SW_PROGRESS = 0x80, - NVME_FEAT_HOST_ID = 0x81, - NVME_FEAT_RESV_MASK = 0x82, - NVME_FEAT_RESV_PERSIST = 0x83, - NVME_LOG_ERROR = 0x01, - NVME_LOG_SMART = 0x02, - NVME_LOG_FW_SLOT = 0x03, - NVME_LOG_RESERVATION = 0x80, - NVME_FWACT_REPL = (0 << 3), - NVME_FWACT_REPL_ACTV = (1 << 3), - NVME_FWACT_ACTV = (2 << 3), -}; - -struct nvme_identify { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2[2]; - __le64 prp1; - __le64 prp2; - __le32 cns; - __u32 rsvd11[5]; -}; - -struct nvme_features { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2[2]; - __le64 prp1; - __le64 prp2; - __le32 fid; - __le32 dword11; - __u32 rsvd12[4]; -}; - -struct nvme_create_cq { - __u8 opcode; - __u8 flags; - __u16 command_id; - __u32 rsvd1[5]; - __le64 prp1; - __u64 rsvd8; - __le16 cqid; - __le16 qsize; - __le16 cq_flags; - __le16 irq_vector; - __u32 rsvd12[4]; -}; - -struct nvme_create_sq { - __u8 opcode; - __u8 flags; - __u16 command_id; - __u32 rsvd1[5]; - __le64 prp1; - __u64 rsvd8; - __le16 sqid; - __le16 qsize; - __le16 sq_flags; - __le16 cqid; - __u32 rsvd12[4]; -}; - -struct nvme_delete_queue { - __u8 opcode; - __u8 flags; - __u16 command_id; - __u32 rsvd1[9]; - __le16 qid; - __u16 rsvd10; - __u32 rsvd11[5]; -}; - -struct nvme_abort_cmd { - __u8 opcode; - __u8 flags; - __u16 command_id; - __u32 rsvd1[9]; - __le16 sqid; - __u16 cid; - __u32 rsvd11[5]; -}; - -struct nvme_download_firmware { - __u8 opcode; - __u8 flags; - __u16 command_id; - __u32 rsvd1[5]; - __le64 prp1; - __le64 prp2; - __le32 numd; - __le32 offset; - __u32 rsvd12[4]; -}; - -struct nvme_format_cmd { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2[4]; - __le32 cdw10; - __u32 rsvd11[5]; -}; - -struct nvme_command { - union { - struct nvme_common_command common; - struct nvme_rw_command rw; - struct nvme_identify identify; - struct nvme_features features; - struct nvme_create_cq create_cq; - struct nvme_create_sq create_sq; - struct nvme_delete_queue delete_queue; - struct nvme_download_firmware dlfw; - struct nvme_format_cmd format; - struct nvme_dsm_cmd dsm; - struct nvme_abort_cmd abort; - }; -}; - -enum { - NVME_SC_SUCCESS = 0x0, - NVME_SC_INVALID_OPCODE = 0x1, - NVME_SC_INVALID_FIELD = 0x2, - NVME_SC_CMDID_CONFLICT = 0x3, - NVME_SC_DATA_XFER_ERROR = 0x4, - NVME_SC_POWER_LOSS = 0x5, - NVME_SC_INTERNAL = 0x6, - NVME_SC_ABORT_REQ = 0x7, - NVME_SC_ABORT_QUEUE = 0x8, - NVME_SC_FUSED_FAIL = 0x9, - NVME_SC_FUSED_MISSING = 0xa, - NVME_SC_INVALID_NS = 0xb, - NVME_SC_CMD_SEQ_ERROR = 0xc, - NVME_SC_SGL_INVALID_LAST = 0xd, - NVME_SC_SGL_INVALID_COUNT = 0xe, - NVME_SC_SGL_INVALID_DATA = 0xf, - NVME_SC_SGL_INVALID_METADATA = 0x10, - NVME_SC_SGL_INVALID_TYPE = 0x11, - NVME_SC_LBA_RANGE = 0x80, - NVME_SC_CAP_EXCEEDED = 0x81, - NVME_SC_NS_NOT_READY = 0x82, - NVME_SC_RESERVATION_CONFLICT = 0x83, - NVME_SC_CQ_INVALID = 0x100, - NVME_SC_QID_INVALID = 0x101, - NVME_SC_QUEUE_SIZE = 0x102, - NVME_SC_ABORT_LIMIT = 0x103, - NVME_SC_ABORT_MISSING = 0x104, - NVME_SC_ASYNC_LIMIT = 0x105, - NVME_SC_FIRMWARE_SLOT = 0x106, - NVME_SC_FIRMWARE_IMAGE = 0x107, - NVME_SC_INVALID_VECTOR = 0x108, - NVME_SC_INVALID_LOG_PAGE = 0x109, - NVME_SC_INVALID_FORMAT = 0x10a, - NVME_SC_FIRMWARE_NEEDS_RESET = 0x10b, - NVME_SC_INVALID_QUEUE = 0x10c, - NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d, - NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, - NVME_SC_FEATURE_NOT_PER_NS = 0x10f, - NVME_SC_FW_NEEDS_RESET_SUBSYS = 0x110, - NVME_SC_BAD_ATTRIBUTES = 0x180, - NVME_SC_INVALID_PI = 0x181, - NVME_SC_READ_ONLY = 0x182, - NVME_SC_WRITE_FAULT = 0x280, - NVME_SC_READ_ERROR = 0x281, - NVME_SC_GUARD_CHECK = 0x282, - NVME_SC_APPTAG_CHECK = 0x283, - NVME_SC_REFTAG_CHECK = 0x284, - NVME_SC_COMPARE_FAILED = 0x285, - NVME_SC_ACCESS_DENIED = 0x286, - NVME_SC_DNR = 0x4000, -}; - -struct nvme_completion { - __le32 result; /* Used by admin commands to return data */ - __u32 rsvd; - __le16 sq_head; /* how much of this queue may be reclaimed */ - __le16 sq_id; /* submission queue that generated this entry */ - __u16 command_id; /* of the command which completed */ - __le16 status; /* did the command fail, and if so, why? */ -}; - -struct nvme_user_io { - __u8 opcode; - __u8 flags; - __u16 control; - __u16 nblocks; - __u16 rsvd; - __u64 metadata; - __u64 addr; - __u64 slba; - __u32 dsmgmt; - __u32 reftag; - __u16 apptag; - __u16 appmask; -}; - -struct nvme_passthru_cmd { - __u8 opcode; - __u8 flags; - __u16 rsvd1; - __u32 nsid; - __u32 cdw2; - __u32 cdw3; - __u64 metadata; - __u64 addr; - __u32 metadata_len; - __u32 data_len; - __u32 cdw10; - __u32 cdw11; - __u32 cdw12; - __u32 cdw13; - __u32 cdw14; - __u32 cdw15; - __u32 timeout_ms; - __u32 result; -}; - -#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8)) - -#define nvme_admin_cmd nvme_passthru_cmd - -#define NVME_IOCTL_ID _IO('N', 0x40) -#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) -#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) -#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) -#define NVME_IOCTL_RESET _IO('N', 0x44) -#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) - -#endif /* _UAPI_LINUX_NVME_H */ diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h new file mode 100644 index 000000000000..c4b2a3f90829 --- /dev/null +++ b/include/uapi/linux/nvme_ioctl.h @@ -0,0 +1,65 @@ +/* + * Definitions for the NVM Express ioctl interface + * Copyright (c) 2011-2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _UAPI_LINUX_NVME_IOCTL_H +#define _UAPI_LINUX_NVME_IOCTL_H + +#include + +struct nvme_user_io { + __u8 opcode; + __u8 flags; + __u16 control; + __u16 nblocks; + __u16 rsvd; + __u64 metadata; + __u64 addr; + __u64 slba; + __u32 dsmgmt; + __u32 reftag; + __u16 apptag; + __u16 appmask; +}; + +struct nvme_passthru_cmd { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u32 result; +}; + +#define nvme_admin_cmd nvme_passthru_cmd + +#define NVME_IOCTL_ID _IO('N', 0x40) +#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd) +#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io) +#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd) +#define NVME_IOCTL_RESET _IO('N', 0x44) +#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45) + +#endif /* _UAPI_LINUX_NVME_IOCTL_H */ From 08c69640cfcbdcc7aaed31c05bbfaf03bb60611c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Oct 2015 15:27:16 +0200 Subject: [PATCH 17/24] nvme.h: add missing nvme_id_ctrl endianess annotations Signed-off-by: Christoph Hellwig Acked-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 91a805437876..9668d3571497 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -104,8 +104,8 @@ struct nvme_id_ctrl { __u8 ieee[3]; __u8 mic; __u8 mdts; - __u16 cntlid; - __u32 ver; + __le16 cntlid; + __le32 ver; __u8 rsvd84[172]; __le16 oacs; __u8 acl; From 57dacad5f2288e3de91f99b29f07b4a2793446d2 Mon Sep 17 00:00:00 2001 From: Jay Sternberg Date: Fri, 9 Oct 2015 18:17:06 +0200 Subject: [PATCH 18/24] nvme: move to a new drivers/nvme/host directory This patch moves the NVMe driver from drivers/block/ to its own new drivers/nvme/host/ directory. This is in preparation of splitting the current monolithic driver up and add support for the upcoming NVMe over Fabrics standard. The drivers/nvme/host/ is chose to leave space for a NVMe target implementation in addition to this host side driver. Signed-off-by: Jay Sternberg [hch: rebased, renamed core.c to pci.c, slight tweaks] Signed-off-by: Christoph Hellwig Acked-by: Keith Busch Signed-off-by: Jens Axboe --- MAINTAINERS | 2 +- drivers/Kconfig | 2 ++ drivers/Makefile | 1 + drivers/block/Kconfig | 11 ----------- drivers/block/Makefile | 2 -- drivers/nvme/Kconfig | 1 + drivers/nvme/Makefile | 2 ++ drivers/nvme/host/Kconfig | 10 ++++++++++ drivers/nvme/host/Makefile | 4 ++++ drivers/{block => nvme/host}/nvme.h | 0 drivers/{block/nvme-core.c => nvme/host/pci.c} | 0 drivers/{block/nvme-scsi.c => nvme/host/scsi.c} | 0 12 files changed, 21 insertions(+), 14 deletions(-) create mode 100644 drivers/nvme/Kconfig create mode 100644 drivers/nvme/Makefile create mode 100644 drivers/nvme/host/Kconfig create mode 100644 drivers/nvme/host/Makefile rename drivers/{block => nvme/host}/nvme.h (100%) rename drivers/{block/nvme-core.c => nvme/host/pci.c} (100%) rename drivers/{block/nvme-scsi.c => nvme/host/scsi.c} (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 797236befd27..d104ec95a5b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7448,7 +7448,7 @@ M: Matthew Wilcox L: linux-nvme@lists.infradead.org T: git git://git.infradead.org/users/willy/linux-nvme.git S: Supported -F: drivers/block/nvme* +F: drivers/nvme/host/ F: include/linux/nvme.h NVMEM FRAMEWORK diff --git a/drivers/Kconfig b/drivers/Kconfig index 46b4a8e0f859..e69ec82ac80a 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -18,6 +18,8 @@ source "drivers/pnp/Kconfig" source "drivers/block/Kconfig" +source "drivers/nvme/Kconfig" + # misc before ide - BLK_DEV_SGIIOC4 depends on SGI_IOC4 source "drivers/misc/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index b250b36b54f2..42f9dd5f07c8 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -70,6 +70,7 @@ obj-$(CONFIG_NUBUS) += nubus/ obj-y += macintosh/ obj-$(CONFIG_IDE) += ide/ obj-$(CONFIG_SCSI) += scsi/ +obj-y += nvme/ obj-$(CONFIG_ATA) += ata/ obj-$(CONFIG_TARGET_CORE) += target/ obj-$(CONFIG_MTD) += mtd/ diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 1b8094d4d7af..29819e719afa 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -310,17 +310,6 @@ config BLK_DEV_NBD If unsure, say N. -config BLK_DEV_NVME - tristate "NVM Express block device" - depends on PCI - ---help--- - The NVM Express driver is for solid state drives directly - connected to the PCI or PCI Express bus. If you know you - don't have one of these, it is safe to answer N. - - To compile this driver as a module, choose M here: the - module will be called nvme. - config BLK_DEV_SKD tristate "STEC S1120 Block Driver" depends on PCI diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 02b688d1438d..671329023ec2 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_XILINX_SYSACE) += xsysace.o obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_MG_DISK) += mg_disk.o obj-$(CONFIG_SUNVDC) += sunvdc.o -obj-$(CONFIG_BLK_DEV_NVME) += nvme.o obj-$(CONFIG_BLK_DEV_SKD) += skd.o obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o @@ -44,6 +43,5 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o obj-$(CONFIG_ZRAM) += zram/ -nvme-y := nvme-core.o nvme-scsi.o skd-y := skd_main.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/nvme/Kconfig b/drivers/nvme/Kconfig new file mode 100644 index 000000000000..a39d9431eaec --- /dev/null +++ b/drivers/nvme/Kconfig @@ -0,0 +1 @@ +source "drivers/nvme/host/Kconfig" diff --git a/drivers/nvme/Makefile b/drivers/nvme/Makefile new file mode 100644 index 000000000000..9421e829d2a9 --- /dev/null +++ b/drivers/nvme/Makefile @@ -0,0 +1,2 @@ + +obj-y += host/ diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig new file mode 100644 index 000000000000..0089f78b4071 --- /dev/null +++ b/drivers/nvme/host/Kconfig @@ -0,0 +1,10 @@ +config BLK_DEV_NVME + tristate "NVM Express block device" + depends on PCI + ---help--- + The NVM Express driver is for solid state drives directly + connected to the PCI or PCI Express bus. If you know you + don't have one of these, it is safe to answer N. + + To compile this driver as a module, choose M here: the + module will be called nvme. diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile new file mode 100644 index 000000000000..cfb6679ec245 --- /dev/null +++ b/drivers/nvme/host/Makefile @@ -0,0 +1,4 @@ + +obj-$(CONFIG_BLK_DEV_NVME) += nvme.o + +nvme-y += pci.o scsi.o diff --git a/drivers/block/nvme.h b/drivers/nvme/host/nvme.h similarity index 100% rename from drivers/block/nvme.h rename to drivers/nvme/host/nvme.h diff --git a/drivers/block/nvme-core.c b/drivers/nvme/host/pci.c similarity index 100% rename from drivers/block/nvme-core.c rename to drivers/nvme/host/pci.c diff --git a/drivers/block/nvme-scsi.c b/drivers/nvme/host/scsi.c similarity index 100% rename from drivers/block/nvme-scsi.c rename to drivers/nvme/host/scsi.c From 2812dfe370516ef958b5c9e2eca1b2f002236d2d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Oct 2015 18:19:20 +0200 Subject: [PATCH 19/24] =?UTF-8?q?nvme:=20include=20=20?= =?UTF-8?q?in=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buildbot complains about this even if it doesn't generate a a build warning. But it's an easy fix, so here we go: Reported-by: kbuild test robot Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9668d3571497..3af5f454c04a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -15,6 +15,8 @@ #ifndef _LINUX_NVME_H #define _LINUX_NVME_H +#include + struct nvme_bar { __u64 cap; /* Controller Capabilities */ __u32 vs; /* Version */ From 11feb18f4edb1423ed6091908c45de7ade30d5b7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 12 Oct 2015 11:37:38 -0600 Subject: [PATCH 20/24] NVMe: Add explicit block config dependency The nvme driver was moved from drivers/block, losing our implicit dependency on CONFIG_BLOCK. This makes it an explicit driver dependency. Reported-by: Jim Davis Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 0089f78b4071..002a94abdbc4 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -1,6 +1,6 @@ config BLK_DEV_NVME tristate "NVM Express block device" - depends on PCI + depends on PCI && BLOCK ---help--- The NVM Express driver is for solid state drives directly connected to the PCI or PCI Express bus. If you know you From 3d42e67fe5ebc1e5c3aae9b1037e38ec99a362cc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Oct 2015 22:29:48 +0200 Subject: [PATCH 21/24] nvme: fix 32-bit build warning Compiling the nvme driver on 32-bit warns about a cast from a __u64 variable to a pointer: drivers/block/nvme-core.c: In function 'nvme_submit_io': drivers/block/nvme-core.c:1847:4: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] (void __user *)io.addr, length, NULL, 0); The cast here is intentional and safe, so we can shut up the gcc warning by adding an intermediate cast to 'uintptr_t'. I had previously submitted a patch to fix this problem in the nvme driver, but it was accepted on the same day that two new warnings got added. For clarification, I also change the third instance of this cast to use uintptr_t instead of unsigned long now. Signed-off-by: Arnd Bergmann Fixes: d29ec8241c10e ("nvme: submit internal commands through the block layer") Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a526696d684d..ad58ee3c3b57 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1802,7 +1802,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) length = (io.nblocks + 1) << ns->lba_shift; meta_len = (io.nblocks + 1) * ns->ms; - metadata = (void __user *)(unsigned long)io.metadata; + metadata = (void __user *)(uintptr_t)io.metadata; write = io.opcode & 1; if (ns->ext) { @@ -1842,7 +1842,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.metadata = cpu_to_le64(meta_dma); status = __nvme_submit_sync_cmd(ns->queue, &c, NULL, - (void __user *)io.addr, length, NULL, 0); + (void __user *)(uintptr_t)io.addr, length, NULL, 0); unmap: if (meta) { if (status == NVME_SC_SUCCESS && !write) { @@ -1884,7 +1884,7 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, timeout = msecs_to_jiffies(cmd.timeout_ms); status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c, - NULL, (void __user *)cmd.addr, cmd.data_len, + NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, &cmd.result, timeout); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) From 1951feae88c5a39105a704188ccf910faf1d0c50 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Oct 2015 21:23:39 +0200 Subject: [PATCH 22/24] nvme: use an integer value to Linux errno values Use a separate integer variable to hold the signed Linux errno values we pass back to the block layer. Note that for pass through commands those might still be NVMe values, but those fit into the int as well. Fixes: f4829a9b7a61: ("blk-mq: fix racy updates of rq->errors") Reported-by: Dan Carpenter Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ad58ee3c3b57..f73c574d59f5 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -606,8 +606,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_iod *iod = ctx; struct request *req = iod_get_private(iod); struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); - u16 status = le16_to_cpup(&cqe->status) >> 1; + int error; if (unlikely(status)) { if (!(status & NVME_SC_DNR || blk_noretry_request(req)) @@ -624,9 +624,11 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, if (req->cmd_type == REQ_TYPE_DRV_PRIV) { if (cmd_rq->ctx == CMD_CTX_CANCELLED) - status = -EINTR; + error = -EINTR; + else + error = status; } else { - status = nvme_error_status(status); + error = nvme_error_status(status); } } @@ -638,7 +640,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, if (cmd_rq->aborted) dev_warn(nvmeq->dev->dev, "completing aborted command with status:%04x\n", - status); + error); if (iod->nents) { dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents, @@ -652,7 +654,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, } nvme_free_iod(nvmeq->dev, iod); - blk_mq_complete_request(req, status); + blk_mq_complete_request(req, error); } /* length is in bytes. gfp flags indicates whether we may sleep. */ From ef658fc2a6809b42dd7002229fd174a9a1645707 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Oct 2015 09:49:57 -0600 Subject: [PATCH 23/24] NVMe: initialize error to '0' Reported-by: Keith Busch Fixes: 1951feae88c5 ("nvme: use an integer value to Linux errno values") Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f73c574d59f5..22d83752ae87 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -607,7 +607,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, struct request *req = iod_get_private(iod); struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); u16 status = le16_to_cpup(&cqe->status) >> 1; - int error; + int error = 0; if (unlikely(status)) { if (!(status & NVME_SC_DNR || blk_noretry_request(req)) From b3975e94f5688691f487ea00126dabe8f5bee3af Mon Sep 17 00:00:00 2001 From: Jay Freyensee Date: Wed, 28 Oct 2015 16:37:51 +0900 Subject: [PATCH 24/24] Update target repo for nvme patch contributions Per http://www.nvmexpress.org/resources/linux-driver-information/, the old nvme git repo is stale. Updating MAINTAINERS to the Supported target currently used by the community. Signed-off-by: Jay Freyensee Updated by me to add Keith as the maintainer, me as the co-maintainer. Signed-off-by: Jens Axboe --- MAINTAINERS | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d104ec95a5b5..f1d5a59432fc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7444,9 +7444,11 @@ F: drivers/video/fbdev/riva/ F: drivers/video/fbdev/nvidia/ NVM EXPRESS DRIVER -M: Matthew Wilcox +M: Keith Busch +M: Jens Axboe L: linux-nvme@lists.infradead.org -T: git git://git.infradead.org/users/willy/linux-nvme.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git +W: https://kernel.googlesource.com/pub/scm/linux/kernel/git/axboe/linux-block/ S: Supported F: drivers/nvme/host/ F: include/linux/nvme.h