mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-07-23 23:32:14 +00:00
Create a configfs attribute in each nvme-fabrics namespace to enable P2P memory use. The attribute may be enabled (with a boolean) or a specific P2P device may be given (with the device's PCI name). When enabled, the namespace will ensure the underlying block device supports P2P and is compatible with any specified P2P device. If no device was specified it will ensure there is compatible P2P memory somewhere in the system. Enabling a namespace with P2P memory will fail with EINVAL (and an appropriate dmesg error) if any of these conditions are not met. Once a controller is set up on a specific port, the P2P device to use for each namespace will be found and stored in a radix tree by namespace ID. When memory is allocated for a request, the tree is used to look up the P2P device to allocate memory against. If no device is in the tree (because no appropriate device was found), or if allocation of P2P memory fails, fall back to using regular memory. Signed-off-by: Stephen Bates <sbates@raithlin.com> Signed-off-by: Steve Wise <swise@opengridcomputing.com> [hch: partial rewrite of the initial code] Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Logan Gunthorpe <logang@deltatee.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
251 lines
6 KiB
C
251 lines
6 KiB
C
/*
|
|
* NVMe I/O command implementation.
|
|
* Copyright (c) 2015-2016 HGST, a Western Digital Company.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
#include <linux/blkdev.h>
|
|
#include <linux/module.h>
|
|
#include "nvmet.h"
|
|
|
|
int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
|
|
{
|
|
int ret;
|
|
|
|
ns->bdev = blkdev_get_by_path(ns->device_path,
|
|
FMODE_READ | FMODE_WRITE, NULL);
|
|
if (IS_ERR(ns->bdev)) {
|
|
ret = PTR_ERR(ns->bdev);
|
|
if (ret != -ENOTBLK) {
|
|
pr_err("failed to open block device %s: (%ld)\n",
|
|
ns->device_path, PTR_ERR(ns->bdev));
|
|
}
|
|
ns->bdev = NULL;
|
|
return ret;
|
|
}
|
|
ns->size = i_size_read(ns->bdev->bd_inode);
|
|
ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
|
|
return 0;
|
|
}
|
|
|
|
void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
|
|
{
|
|
if (ns->bdev) {
|
|
blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
|
|
ns->bdev = NULL;
|
|
}
|
|
}
|
|
|
|
static void nvmet_bio_done(struct bio *bio)
|
|
{
|
|
struct nvmet_req *req = bio->bi_private;
|
|
|
|
nvmet_req_complete(req,
|
|
bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
|
|
|
|
if (bio != &req->b.inline_bio)
|
|
bio_put(bio);
|
|
}
|
|
|
|
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
|
|
{
|
|
int sg_cnt = req->sg_cnt;
|
|
struct bio *bio = &req->b.inline_bio;
|
|
struct scatterlist *sg;
|
|
sector_t sector;
|
|
blk_qc_t cookie;
|
|
int op, op_flags = 0, i;
|
|
|
|
if (!req->sg_cnt) {
|
|
nvmet_req_complete(req, 0);
|
|
return;
|
|
}
|
|
|
|
if (req->cmd->rw.opcode == nvme_cmd_write) {
|
|
op = REQ_OP_WRITE;
|
|
op_flags = REQ_SYNC | REQ_IDLE;
|
|
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
|
|
op_flags |= REQ_FUA;
|
|
} else {
|
|
op = REQ_OP_READ;
|
|
}
|
|
|
|
if (is_pci_p2pdma_page(sg_page(req->sg)))
|
|
op_flags |= REQ_NOMERGE;
|
|
|
|
sector = le64_to_cpu(req->cmd->rw.slba);
|
|
sector <<= (req->ns->blksize_shift - 9);
|
|
|
|
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
|
|
bio_set_dev(bio, req->ns->bdev);
|
|
bio->bi_iter.bi_sector = sector;
|
|
bio->bi_private = req;
|
|
bio->bi_end_io = nvmet_bio_done;
|
|
bio_set_op_attrs(bio, op, op_flags);
|
|
|
|
for_each_sg(req->sg, sg, req->sg_cnt, i) {
|
|
while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
|
|
!= sg->length) {
|
|
struct bio *prev = bio;
|
|
|
|
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
|
|
bio_set_dev(bio, req->ns->bdev);
|
|
bio->bi_iter.bi_sector = sector;
|
|
bio_set_op_attrs(bio, op, op_flags);
|
|
|
|
bio_chain(bio, prev);
|
|
submit_bio(prev);
|
|
}
|
|
|
|
sector += sg->length >> 9;
|
|
sg_cnt--;
|
|
}
|
|
|
|
cookie = submit_bio(bio);
|
|
|
|
blk_poll(bdev_get_queue(req->ns->bdev), cookie);
|
|
}
|
|
|
|
static void nvmet_bdev_execute_flush(struct nvmet_req *req)
|
|
{
|
|
struct bio *bio = &req->b.inline_bio;
|
|
|
|
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
|
|
bio_set_dev(bio, req->ns->bdev);
|
|
bio->bi_private = req;
|
|
bio->bi_end_io = nvmet_bio_done;
|
|
bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
|
|
|
|
submit_bio(bio);
|
|
}
|
|
|
|
u16 nvmet_bdev_flush(struct nvmet_req *req)
|
|
{
|
|
if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL, NULL))
|
|
return NVME_SC_INTERNAL | NVME_SC_DNR;
|
|
return 0;
|
|
}
|
|
|
|
static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns,
|
|
struct nvme_dsm_range *range, struct bio **bio)
|
|
{
|
|
int ret;
|
|
|
|
ret = __blkdev_issue_discard(ns->bdev,
|
|
le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
|
|
le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
|
|
GFP_KERNEL, 0, bio);
|
|
if (ret && ret != -EOPNOTSUPP)
|
|
return NVME_SC_INTERNAL | NVME_SC_DNR;
|
|
return 0;
|
|
}
|
|
|
|
static void nvmet_bdev_execute_discard(struct nvmet_req *req)
|
|
{
|
|
struct nvme_dsm_range range;
|
|
struct bio *bio = NULL;
|
|
int i;
|
|
u16 status;
|
|
|
|
for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
|
|
status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
|
|
sizeof(range));
|
|
if (status)
|
|
break;
|
|
|
|
status = nvmet_bdev_discard_range(req->ns, &range, &bio);
|
|
if (status)
|
|
break;
|
|
}
|
|
|
|
if (bio) {
|
|
bio->bi_private = req;
|
|
bio->bi_end_io = nvmet_bio_done;
|
|
if (status) {
|
|
bio->bi_status = BLK_STS_IOERR;
|
|
bio_endio(bio);
|
|
} else {
|
|
submit_bio(bio);
|
|
}
|
|
} else {
|
|
nvmet_req_complete(req, status);
|
|
}
|
|
}
|
|
|
|
static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
|
|
{
|
|
switch (le32_to_cpu(req->cmd->dsm.attributes)) {
|
|
case NVME_DSMGMT_AD:
|
|
nvmet_bdev_execute_discard(req);
|
|
return;
|
|
case NVME_DSMGMT_IDR:
|
|
case NVME_DSMGMT_IDW:
|
|
default:
|
|
/* Not supported yet */
|
|
nvmet_req_complete(req, 0);
|
|
return;
|
|
}
|
|
}
|
|
|
|
static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
|
|
{
|
|
struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
|
|
struct bio *bio = NULL;
|
|
u16 status = NVME_SC_SUCCESS;
|
|
sector_t sector;
|
|
sector_t nr_sector;
|
|
|
|
sector = le64_to_cpu(write_zeroes->slba) <<
|
|
(req->ns->blksize_shift - 9);
|
|
nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
|
|
(req->ns->blksize_shift - 9));
|
|
|
|
if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
|
|
GFP_KERNEL, &bio, 0))
|
|
status = NVME_SC_INTERNAL | NVME_SC_DNR;
|
|
|
|
if (bio) {
|
|
bio->bi_private = req;
|
|
bio->bi_end_io = nvmet_bio_done;
|
|
submit_bio(bio);
|
|
} else {
|
|
nvmet_req_complete(req, status);
|
|
}
|
|
}
|
|
|
|
u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
|
|
{
|
|
struct nvme_command *cmd = req->cmd;
|
|
|
|
switch (cmd->common.opcode) {
|
|
case nvme_cmd_read:
|
|
case nvme_cmd_write:
|
|
req->execute = nvmet_bdev_execute_rw;
|
|
req->data_len = nvmet_rw_len(req);
|
|
return 0;
|
|
case nvme_cmd_flush:
|
|
req->execute = nvmet_bdev_execute_flush;
|
|
req->data_len = 0;
|
|
return 0;
|
|
case nvme_cmd_dsm:
|
|
req->execute = nvmet_bdev_execute_dsm;
|
|
req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
|
|
sizeof(struct nvme_dsm_range);
|
|
return 0;
|
|
case nvme_cmd_write_zeroes:
|
|
req->execute = nvmet_bdev_execute_write_zeroes;
|
|
return 0;
|
|
default:
|
|
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
|
|
req->sq->qid);
|
|
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
|
|
}
|
|
}
|