Skip to content

Commit 9408d8a

Browse files
keithbuschaxboe
authored andcommitted
nvme: improved uring polling
Drivers can poll requests directly, so use that. We just need to ensure the driver's request was allocated from a polled hctx, so a special driver flag is added to struct io_uring_cmd. The allows unshared and multipath namespaces to use the same polling callback, and multipath is guaranteed to get the same queue as the command was submitted on. Previously multipath polling might check a different path and poll the wrong info. The other bonus is we don't need a bio payload in order to poll, allowing commands like 'flush' and 'write zeroes' to be submitted on the same high priority queue as read and write commands. Finally, using the request based polling skips the unnecessary bio overhead. Signed-off-by: Keith Busch <kbusch@kernel.org> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20230612190343.2087040-3-kbusch@meta.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent f6c80cf commit 9408d8a

4 files changed

Lines changed: 22 additions & 54 deletions

File tree

drivers/nvme/host/ioctl.c

Lines changed: 19 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
505505
{
506506
struct io_uring_cmd *ioucmd = req->end_io_data;
507507
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
508-
void *cookie = READ_ONCE(ioucmd->cookie);
509508

510509
req->bio = pdu->bio;
511510
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
@@ -518,10 +517,12 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
518517
* For iopoll, complete it directly.
519518
* Otherwise, move the completion to task work.
520519
*/
521-
if (cookie != NULL && blk_rq_is_poll(req))
520+
if (blk_rq_is_poll(req)) {
521+
WRITE_ONCE(ioucmd->cookie, NULL);
522522
nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
523-
else
523+
} else {
524524
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
525+
}
525526

526527
return RQ_END_IO_FREE;
527528
}
@@ -531,7 +532,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
531532
{
532533
struct io_uring_cmd *ioucmd = req->end_io_data;
533534
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
534-
void *cookie = READ_ONCE(ioucmd->cookie);
535535

536536
req->bio = pdu->bio;
537537
pdu->req = req;
@@ -540,10 +540,12 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
540540
* For iopoll, complete it directly.
541541
* Otherwise, move the completion to task work.
542542
*/
543-
if (cookie != NULL && blk_rq_is_poll(req))
543+
if (blk_rq_is_poll(req)) {
544+
WRITE_ONCE(ioucmd->cookie, NULL);
544545
nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED);
545-
else
546+
} else {
546547
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb);
548+
}
547549

548550
return RQ_END_IO_NONE;
549551
}
@@ -599,7 +601,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
599601
if (issue_flags & IO_URING_F_IOPOLL)
600602
rq_flags |= REQ_POLLED;
601603

602-
retry:
603604
req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags);
604605
if (IS_ERR(req))
605606
return PTR_ERR(req);
@@ -613,17 +614,11 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
613614
return ret;
614615
}
615616

616-
if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) {
617-
if (unlikely(!req->bio)) {
618-
/* we can't poll this, so alloc regular req instead */
619-
blk_mq_free_request(req);
620-
rq_flags &= ~REQ_POLLED;
621-
goto retry;
622-
} else {
623-
WRITE_ONCE(ioucmd->cookie, req->bio);
624-
req->bio->bi_opf |= REQ_POLLED;
625-
}
617+
if (blk_rq_is_poll(req)) {
618+
ioucmd->flags |= IORING_URING_CMD_POLLED;
619+
WRITE_ONCE(ioucmd->cookie, req);
626620
}
621+
627622
/* to free bio on completion, as req->bio will be null at that time */
628623
pdu->bio = req->bio;
629624
pdu->meta_len = d.metadata_len;
@@ -785,18 +780,16 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
785780
struct io_comp_batch *iob,
786781
unsigned int poll_flags)
787782
{
788-
struct bio *bio;
783+
struct request *req;
789784
int ret = 0;
790-
struct nvme_ns *ns;
791-
struct request_queue *q;
785+
786+
if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
787+
return 0;
792788

793789
rcu_read_lock();
794-
bio = READ_ONCE(ioucmd->cookie);
795-
ns = container_of(file_inode(ioucmd->file)->i_cdev,
796-
struct nvme_ns, cdev);
797-
q = ns->queue;
798-
if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev)
799-
ret = bio_poll(bio, iob, poll_flags);
790+
req = READ_ONCE(ioucmd->cookie);
791+
if (req && blk_rq_is_poll(req))
792+
ret = blk_rq_poll(req, iob, poll_flags);
800793
rcu_read_unlock();
801794
return ret;
802795
}
@@ -890,31 +883,6 @@ int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
890883
srcu_read_unlock(&head->srcu, srcu_idx);
891884
return ret;
892885
}
893-
894-
int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
895-
struct io_comp_batch *iob,
896-
unsigned int poll_flags)
897-
{
898-
struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
899-
struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
900-
int srcu_idx = srcu_read_lock(&head->srcu);
901-
struct nvme_ns *ns = nvme_find_path(head);
902-
struct bio *bio;
903-
int ret = 0;
904-
struct request_queue *q;
905-
906-
if (ns) {
907-
rcu_read_lock();
908-
bio = READ_ONCE(ioucmd->cookie);
909-
q = ns->queue;
910-
if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio
911-
&& bio->bi_bdev)
912-
ret = bio_poll(bio, iob, poll_flags);
913-
rcu_read_unlock();
914-
}
915-
srcu_read_unlock(&head->srcu, srcu_idx);
916-
return ret;
917-
}
918886
#endif /* CONFIG_NVME_MULTIPATH */
919887

920888
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)

drivers/nvme/host/multipath.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ static const struct file_operations nvme_ns_head_chr_fops = {
470470
.unlocked_ioctl = nvme_ns_head_chr_ioctl,
471471
.compat_ioctl = compat_ptr_ioctl,
472472
.uring_cmd = nvme_ns_head_chr_uring_cmd,
473-
.uring_cmd_iopoll = nvme_ns_head_chr_uring_cmd_iopoll,
473+
.uring_cmd_iopoll = nvme_ns_chr_uring_cmd_iopoll,
474474
};
475475

476476
static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)

drivers/nvme/host/nvme.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -854,8 +854,6 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd,
854854
unsigned long arg);
855855
int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
856856
struct io_comp_batch *iob, unsigned int poll_flags);
857-
int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
858-
struct io_comp_batch *iob, unsigned int poll_flags);
859857
int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
860858
unsigned int issue_flags);
861859
int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,

include/uapi/linux/io_uring.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,8 +244,10 @@ enum io_uring_op {
244244
* sqe->uring_cmd_flags
245245
* IORING_URING_CMD_FIXED use registered buffer; pass this flag
246246
* along with setting sqe->buf_index.
247+
* IORING_URING_CMD_POLLED driver use only
247248
*/
248249
#define IORING_URING_CMD_FIXED (1U << 0)
250+
#define IORING_URING_CMD_POLLED (1U << 31)
249251

250252

251253
/*

0 commit comments

Comments
 (0)