Skip to content

Commit 3c7d76d

Browse files
committed
io_uring: IOPOLL polling improvements
io_uring manages issued and pending IOPOLL read/write requests in a singly linked list. One downside of that is that individual items cannot easily be removed from that list, and as a result, io_uring will only complete a completed request N in that list if 0..N-1 are also complete. For homogenous IO this isn't necessarily an issue, but if different devices are involved in polling in the same ring, or if disparate IO from the same device is being polled for, this can defer completion of some requests unnecessarily. Move to a doubly linked list for iopoll completions instead, making it possible to easily complete whatever requests that were polled done successfully. Co-developed-by: Fengnan Chang <fengnanchang@gmail.com> Link: https://lore.kernel.org/io-uring/20251210085501.84261-1-changfengnan@bytedance.com/ Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent f8f9c1f commit 3c7d76d

6 files changed

Lines changed: 32 additions & 58 deletions

File tree

include/linux/io_uring_types.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ struct io_ring_ctx {
316316
* manipulate the list, hence no extra locking is needed there.
317317
*/
318318
bool poll_multi_queue;
319-
struct io_wq_work_list iopoll_list;
319+
struct list_head iopoll_list;
320320

321321
struct io_file_table file_table;
322322
struct io_rsrc_data buf_table;
@@ -708,7 +708,16 @@ struct io_kiocb {
708708

709709
atomic_t refs;
710710
bool cancel_seq_set;
711-
struct io_task_work io_task_work;
711+
712+
/*
713+
* IOPOLL doesn't use task_work, so use the ->iopoll_node list
714+
* entry to manage pending iopoll requests.
715+
*/
716+
union {
717+
struct io_task_work io_task_work;
718+
struct list_head iopoll_node;
719+
};
720+
712721
union {
713722
/*
714723
* for polled requests, i.e. IORING_OP_POLL_ADD and async armed

io_uring/cancel.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
534534
/* SQPOLL thread does its own polling */
535535
if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
536536
is_sqpoll_thread) {
537-
while (!wq_list_empty(&ctx->iopoll_list)) {
537+
while (!list_empty(&ctx->iopoll_list)) {
538538
io_iopoll_try_reap_events(ctx);
539539
ret = true;
540540
cond_resched();

io_uring/io_uring.c

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
334334
init_waitqueue_head(&ctx->poll_wq);
335335
spin_lock_init(&ctx->completion_lock);
336336
raw_spin_lock_init(&ctx->timeout_lock);
337-
INIT_WQ_LIST(&ctx->iopoll_list);
337+
INIT_LIST_HEAD(&ctx->iopoll_list);
338338
INIT_LIST_HEAD(&ctx->defer_list);
339339
INIT_LIST_HEAD(&ctx->timeout_list);
340340
INIT_LIST_HEAD(&ctx->ltimeout_list);
@@ -1561,7 +1561,7 @@ __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
15611561
return;
15621562

15631563
mutex_lock(&ctx->uring_lock);
1564-
while (!wq_list_empty(&ctx->iopoll_list)) {
1564+
while (!list_empty(&ctx->iopoll_list)) {
15651565
/* let it sleep and repeat later if can't complete a request */
15661566
if (io_do_iopoll(ctx, true) == 0)
15671567
break;
@@ -1626,21 +1626,18 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events)
16261626
* forever, while the workqueue is stuck trying to acquire the
16271627
* very same mutex.
16281628
*/
1629-
if (wq_list_empty(&ctx->iopoll_list) ||
1630-
io_task_work_pending(ctx)) {
1629+
if (list_empty(&ctx->iopoll_list) || io_task_work_pending(ctx)) {
16311630
u32 tail = ctx->cached_cq_tail;
16321631

16331632
(void) io_run_local_work_locked(ctx, min_events);
16341633

1635-
if (task_work_pending(current) ||
1636-
wq_list_empty(&ctx->iopoll_list)) {
1634+
if (task_work_pending(current) || list_empty(&ctx->iopoll_list)) {
16371635
mutex_unlock(&ctx->uring_lock);
16381636
io_run_task_work();
16391637
mutex_lock(&ctx->uring_lock);
16401638
}
16411639
/* some requests don't go through iopoll_list */
1642-
if (tail != ctx->cached_cq_tail ||
1643-
wq_list_empty(&ctx->iopoll_list))
1640+
if (tail != ctx->cached_cq_tail || list_empty(&ctx->iopoll_list))
16441641
break;
16451642
}
16461643
ret = io_do_iopoll(ctx, !min_events);
@@ -1683,25 +1680,17 @@ static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags)
16831680
* how we do polling eventually, not spinning if we're on potentially
16841681
* different devices.
16851682
*/
1686-
if (wq_list_empty(&ctx->iopoll_list)) {
1683+
if (list_empty(&ctx->iopoll_list)) {
16871684
ctx->poll_multi_queue = false;
16881685
} else if (!ctx->poll_multi_queue) {
16891686
struct io_kiocb *list_req;
16901687

1691-
list_req = container_of(ctx->iopoll_list.first, struct io_kiocb,
1692-
comp_list);
1688+
list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, iopoll_node);
16931689
if (list_req->file != req->file)
16941690
ctx->poll_multi_queue = true;
16951691
}
16961692

1697-
/*
1698-
* For fast devices, IO may have already completed. If it has, add
1699-
* it to the front so we find it first.
1700-
*/
1701-
if (READ_ONCE(req->iopoll_completed))
1702-
wq_list_add_head(&req->comp_list, &ctx->iopoll_list);
1703-
else
1704-
wq_list_add_tail(&req->comp_list, &ctx->iopoll_list);
1693+
list_add_tail(&req->iopoll_node, &ctx->iopoll_list);
17051694

17061695
if (unlikely(needs_lock)) {
17071696
/*

io_uring/rw.c

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1315,9 +1315,9 @@ static int io_uring_hybrid_poll(struct io_kiocb *req,
13151315

13161316
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
13171317
{
1318-
struct io_wq_work_node *pos, *start, *prev;
13191318
unsigned int poll_flags = 0;
13201319
DEFINE_IO_COMP_BATCH(iob);
1320+
struct io_kiocb *req, *tmp;
13211321
int nr_events = 0;
13221322

13231323
/*
@@ -1327,8 +1327,7 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
13271327
if (ctx->poll_multi_queue || force_nonspin)
13281328
poll_flags |= BLK_POLL_ONESHOT;
13291329

1330-
wq_list_for_each(pos, start, &ctx->iopoll_list) {
1331-
struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
1330+
list_for_each_entry(req, &ctx->iopoll_list, iopoll_node) {
13321331
int ret;
13331332

13341333
/*
@@ -1357,31 +1356,20 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
13571356

13581357
if (!rq_list_empty(&iob.req_list))
13591358
iob.complete(&iob);
1360-
else if (!pos)
1361-
return 0;
1362-
1363-
prev = start;
1364-
wq_list_for_each_resume(pos, prev) {
1365-
struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
13661359

1360+
list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, iopoll_node) {
13671361
/* order with io_complete_rw_iopoll(), e.g. ->result updates */
13681362
if (!smp_load_acquire(&req->iopoll_completed))
1369-
break;
1363+
continue;
1364+
list_del(&req->iopoll_node);
1365+
wq_list_add_tail(&req->comp_list, &ctx->submit_state.compl_reqs);
13701366
nr_events++;
13711367
req->cqe.flags = io_put_kbuf(req, req->cqe.res, NULL);
13721368
if (req->opcode != IORING_OP_URING_CMD)
13731369
io_req_rw_cleanup(req, 0);
13741370
}
1375-
if (unlikely(!nr_events))
1376-
return 0;
1377-
1378-
pos = start ? start->next : ctx->iopoll_list.first;
1379-
wq_list_cut(&ctx->iopoll_list, prev, start);
1380-
1381-
if (WARN_ON_ONCE(!wq_list_empty(&ctx->submit_state.compl_reqs)))
1382-
return 0;
1383-
ctx->submit_state.compl_reqs.first = pos;
1384-
__io_submit_flush_completions(ctx);
1371+
if (nr_events)
1372+
__io_submit_flush_completions(ctx);
13851373
return nr_events;
13861374
}
13871375

io_uring/slist.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@
99
#define wq_list_for_each(pos, prv, head) \
1010
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
1111

12-
#define wq_list_for_each_resume(pos, prv) \
13-
for (; pos; prv = pos, pos = (pos)->next)
14-
1512
#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL)
1613

1714
#define INIT_WQ_LIST(list) do { \
@@ -43,15 +40,6 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
4340
}
4441
}
4542

46-
static inline void wq_list_add_head(struct io_wq_work_node *node,
47-
struct io_wq_work_list *list)
48-
{
49-
node->next = list->first;
50-
if (!node->next)
51-
list->last = node;
52-
WRITE_ONCE(list->first, node);
53-
}
54-
5543
static inline void wq_list_cut(struct io_wq_work_list *list,
5644
struct io_wq_work_node *last,
5745
struct io_wq_work_node *prev)

io_uring/sqpoll.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd,
212212
if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE)
213213
to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE;
214214

215-
if (to_submit || !wq_list_empty(&ctx->iopoll_list)) {
215+
if (to_submit || !list_empty(&ctx->iopoll_list)) {
216216
const struct cred *creds = NULL;
217217

218218
io_sq_start_worktime(ist);
@@ -221,7 +221,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd,
221221
creds = override_creds(ctx->sq_creds);
222222

223223
mutex_lock(&ctx->uring_lock);
224-
if (!wq_list_empty(&ctx->iopoll_list))
224+
if (!list_empty(&ctx->iopoll_list))
225225
io_do_iopoll(ctx, true);
226226

227227
/*
@@ -344,7 +344,7 @@ static int io_sq_thread(void *data)
344344
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
345345
int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist);
346346

347-
if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list)))
347+
if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list)))
348348
sqt_spin = true;
349349
}
350350
if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE))
@@ -379,7 +379,7 @@ static int io_sq_thread(void *data)
379379
atomic_or(IORING_SQ_NEED_WAKEUP,
380380
&ctx->rings->sq_flags);
381381
if ((ctx->flags & IORING_SETUP_IOPOLL) &&
382-
!wq_list_empty(&ctx->iopoll_list)) {
382+
!list_empty(&ctx->iopoll_list)) {
383383
needs_sched = false;
384384
break;
385385
}

0 commit comments

Comments
 (0)