Skip to content

Commit

Permalink
nvme: use bio_integrity_map_user
Browse files Browse the repository at this point in the history
Map user metadata buffers directly. Now that the bio tracks the
metadata, nvme doesn't need special metadata handling and tracking with
callbacks and additional fields in the pdu.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20231130215309.2923568-3-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
keithbusch authored and axboe committed Dec 2, 2023
1 parent 492c5d4 commit d6aacee
Showing 1 changed file with 29 additions and 168 deletions.
197 changes: 29 additions & 168 deletions drivers/nvme/host/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,58 +96,6 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval)
return (void __user *)ptrval;
}

static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
unsigned len, u32 seed)
{
struct bio_integrity_payload *bip;
int ret = -ENOMEM;
void *buf;
struct bio *bio = req->bio;

buf = kmalloc(len, GFP_KERNEL);
if (!buf)
goto out;

if (req_op(req) == REQ_OP_DRV_OUT) {
ret = -EFAULT;
if (copy_from_user(buf, ubuf, len))
goto out_free_meta;
} else {
memset(buf, 0, len);
}

bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
if (IS_ERR(bip)) {
ret = PTR_ERR(bip);
goto out_free_meta;
}

bip->bip_iter.bi_sector = seed;
ret = bio_integrity_add_page(bio, virt_to_page(buf), len,
offset_in_page(buf));
if (ret != len) {
ret = -ENOMEM;
goto out_free_meta;
}

req->cmd_flags |= REQ_INTEGRITY;
return buf;
out_free_meta:
kfree(buf);
out:
return ERR_PTR(ret);
}

static int nvme_finish_user_metadata(struct request *req, void __user *ubuf,
void *meta, unsigned len, int ret)
{
if (!ret && req_op(req) == REQ_OP_DRV_IN &&
copy_to_user(ubuf, meta, len))
ret = -EFAULT;
kfree(meta);
return ret;
}

static struct request *nvme_alloc_user_request(struct request_queue *q,
struct nvme_command *cmd, blk_opf_t rq_flags,
blk_mq_req_flags_t blk_flags)
Expand All @@ -164,14 +112,12 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,

static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
u32 meta_seed, void **metap, struct io_uring_cmd *ioucmd,
unsigned int flags)
u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
struct block_device *bdev = ns ? ns->disk->part0 : NULL;
struct bio *bio = NULL;
void *meta = NULL;
int ret;

if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
Expand All @@ -193,18 +139,17 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,

if (ret)
goto out;

bio = req->bio;
if (bdev)
if (bdev) {
bio_set_dev(bio, bdev);

if (bdev && meta_buffer && meta_len) {
meta = nvme_add_user_metadata(req, meta_buffer, meta_len,
meta_seed);
if (IS_ERR(meta)) {
ret = PTR_ERR(meta);
goto out_unmap;
if (meta_buffer && meta_len) {
ret = bio_integrity_map_user(bio, meta_buffer, meta_len,
meta_seed);
if (ret)
goto out_unmap;
req->cmd_flags |= REQ_INTEGRITY;
}
*metap = meta;
}

return ret;
Expand All @@ -225,7 +170,6 @@ static int nvme_submit_user_cmd(struct request_queue *q,
struct nvme_ns *ns = q->queuedata;
struct nvme_ctrl *ctrl;
struct request *req;
void *meta = NULL;
struct bio *bio;
u32 effects;
int ret;
Expand All @@ -237,7 +181,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
meta_len, meta_seed, &meta, NULL, flags);
meta_len, meta_seed, NULL, flags);
if (ret)
return ret;
}
Expand All @@ -249,9 +193,6 @@ static int nvme_submit_user_cmd(struct request_queue *q,
ret = nvme_execute_rq(req, false);
if (result)
*result = le64_to_cpu(nvme_req(req)->result.u64);
if (meta)
ret = nvme_finish_user_metadata(req, meta_buffer, meta,
meta_len, ret);
if (bio)
blk_rq_unmap_user(bio);
blk_mq_free_request(req);
Expand Down Expand Up @@ -446,19 +387,10 @@ struct nvme_uring_data {
* Expect build errors if this grows larger than that.
*/
struct nvme_uring_cmd_pdu {
union {
struct bio *bio;
struct request *req;
};
u32 meta_len;
u32 nvme_status;
union {
struct {
void *meta; /* kernel-resident buffer */
void __user *meta_buffer;
};
u64 result;
} u;
struct request *req;
struct bio *bio;
u64 result;
int status;
};

static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
Expand All @@ -467,40 +399,14 @@ static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu;
}

static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd,
unsigned issue_flags)
{
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
struct request *req = pdu->req;
int status;
u64 result;

if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
status = -EINTR;
else
status = nvme_req(req)->status;

result = le64_to_cpu(nvme_req(req)->result.u64);

if (pdu->meta_len)
status = nvme_finish_user_metadata(req, pdu->u.meta_buffer,
pdu->u.meta, pdu->meta_len, status);
if (req->bio)
blk_rq_unmap_user(req->bio);
blk_mq_free_request(req);

io_uring_cmd_done(ioucmd, status, result, issue_flags);
}

static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
unsigned issue_flags)
{
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);

if (pdu->bio)
blk_rq_unmap_user(pdu->bio);

io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result, issue_flags);
io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags);
}

static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
Expand All @@ -509,53 +415,24 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
struct io_uring_cmd *ioucmd = req->end_io_data;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);

req->bio = pdu->bio;
if (nvme_req(req)->flags & NVME_REQ_CANCELLED) {
pdu->nvme_status = -EINTR;
} else {
pdu->nvme_status = nvme_req(req)->status;
if (!pdu->nvme_status)
pdu->nvme_status = blk_status_to_errno(err);
}
pdu->u.result = le64_to_cpu(nvme_req(req)->result.u64);
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
pdu->status = -EINTR;
else
pdu->status = nvme_req(req)->status;
pdu->result = le64_to_cpu(nvme_req(req)->result.u64);

/*
* For iopoll, complete it directly.
* Otherwise, move the completion to task work.
*/
if (blk_rq_is_poll(req)) {
WRITE_ONCE(ioucmd->cookie, NULL);
if (blk_rq_is_poll(req))
nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
} else {
else
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
}

return RQ_END_IO_FREE;
}

static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
blk_status_t err)
{
struct io_uring_cmd *ioucmd = req->end_io_data;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);

req->bio = pdu->bio;
pdu->req = req;

/*
* For iopoll, complete it directly.
* Otherwise, move the completion to task work.
*/
if (blk_rq_is_poll(req)) {
WRITE_ONCE(ioucmd->cookie, NULL);
nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED);
} else {
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb);
}

return RQ_END_IO_NONE;
}

static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
{
Expand All @@ -567,7 +444,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct request *req;
blk_opf_t rq_flags = REQ_ALLOC_CACHE;
blk_mq_req_flags_t blk_flags = 0;
void *meta = NULL;
int ret;

c.common.opcode = READ_ONCE(cmd->opcode);
Expand Down Expand Up @@ -615,27 +491,16 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
if (d.addr && d.data_len) {
ret = nvme_map_user_request(req, d.addr,
d.data_len, nvme_to_user_ptr(d.metadata),
d.metadata_len, 0, &meta, ioucmd, vec);
d.metadata_len, 0, ioucmd, vec);
if (ret)
return ret;
}

if (blk_rq_is_poll(req)) {
ioucmd->flags |= IORING_URING_CMD_POLLED;
WRITE_ONCE(ioucmd->cookie, req);
}

/* to free bio on completion, as req->bio will be null at that time */
pdu->bio = req->bio;
pdu->meta_len = d.metadata_len;
pdu->req = req;
req->end_io_data = ioucmd;
if (pdu->meta_len) {
pdu->u.meta = meta;
pdu->u.meta_buffer = nvme_to_user_ptr(d.metadata);
req->end_io = nvme_uring_cmd_end_io_meta;
} else {
req->end_io = nvme_uring_cmd_end_io;
}
req->end_io = nvme_uring_cmd_end_io;
blk_execute_rq_nowait(req, false);
return -EIOCBQUEUED;
}
Expand Down Expand Up @@ -786,16 +651,12 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
struct io_comp_batch *iob,
unsigned int poll_flags)
{
struct request *req;
int ret = 0;

if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
return 0;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
struct request *req = pdu->req;

req = READ_ONCE(ioucmd->cookie);
if (req && blk_rq_is_poll(req))
ret = blk_rq_poll(req, iob, poll_flags);
return ret;
return blk_rq_poll(req, iob, poll_flags);
return 0;
}
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
Expand Down

0 comments on commit d6aacee

Please sign in to comment.