Skip to content

Commit 595e522

Browse files
committed
io_uring/poll: don't enable lazy wake for POLLEXCLUSIVE
There are a few quirks around using lazy wake for poll unconditionally, and one of them is related the EPOLLEXCLUSIVE. Those may trigger exclusive wakeups, which wake a limited number of entries in the wait queue. If that wake number is less than the number of entries someone is waiting for (and that someone is also using DEFER_TASKRUN), then we can get stuck waiting for more entries while we should be processing the ones we already got. If we're doing exclusive poll waits, flag the request as not being compatible with lazy wakeups. Reported-by: Pavel Begunkov <asml.silence@gmail.com> Fixes: 6ce4a93 ("io_uring/poll: use IOU_F_TWQ_LAZY_WAKE for wakeups") Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 705318a commit 595e522

2 files changed

Lines changed: 20 additions & 3 deletions

File tree

include/linux/io_uring_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ enum {
434434
/* keep async read/write and isreg together and in order */
435435
REQ_F_SUPPORT_NOWAIT_BIT,
436436
REQ_F_ISREG_BIT,
437+
REQ_F_POLL_NO_LAZY_BIT,
437438

438439
/* not a real bit, just to check we're not overflowing the space */
439440
__REQ_F_LAST_BIT,
@@ -501,6 +502,8 @@ enum {
501502
REQ_F_CLEAR_POLLIN = BIT(REQ_F_CLEAR_POLLIN_BIT),
502503
/* hashed into ->cancel_hash_locked, protected by ->uring_lock */
503504
REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT),
505+
/* don't use lazy poll wake for this request */
506+
REQ_F_POLL_NO_LAZY = BIT(REQ_F_POLL_NO_LAZY_BIT),
504507
};
505508

506509
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);

io_uring/poll.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -366,11 +366,16 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
366366

367367
static void __io_poll_execute(struct io_kiocb *req, int mask)
368368
{
369+
unsigned flags = 0;
370+
369371
io_req_set_res(req, mask, 0);
370372
req->io_task_work.func = io_poll_task_func;
371373

372374
trace_io_uring_task_add(req, mask);
373-
__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
375+
376+
if (!(req->flags & REQ_F_POLL_NO_LAZY))
377+
flags = IOU_F_TWQ_LAZY_WAKE;
378+
__io_req_task_work_add(req, flags);
374379
}
375380

376381
static inline void io_poll_execute(struct io_kiocb *req, int res)
@@ -526,10 +531,19 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
526531
poll->head = head;
527532
poll->wait.private = (void *) wqe_private;
528533

529-
if (poll->events & EPOLLEXCLUSIVE)
534+
if (poll->events & EPOLLEXCLUSIVE) {
535+
/*
536+
* Exclusive waits may only wake a limited amount of entries
537+
* rather than all of them, this may interfere with lazy
538+
* wake if someone does wait(events > 1). Ensure we don't do
539+
* lazy wake for those, as we need to process each one as they
540+
* come in.
541+
*/
542+
req->flags |= REQ_F_POLL_NO_LAZY;
530543
add_wait_queue_exclusive(head, &poll->wait);
531-
else
544+
} else {
532545
add_wait_queue(head, &poll->wait);
546+
}
533547
}
534548

535549
static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,

0 commit comments

Comments
 (0)