@@ -1713,9 +1713,9 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
17131713}
17141714
17151715/* Returns true if there are no backlogged entries after the flush */
1716- static bool io_cqring_overflow_flush (struct io_ring_ctx * ctx , bool force ,
1717- struct task_struct * tsk ,
1718- struct files_struct * files )
1716+ static bool __io_cqring_overflow_flush (struct io_ring_ctx * ctx , bool force ,
1717+ struct task_struct * tsk ,
1718+ struct files_struct * files )
17191719{
17201720 struct io_rings * rings = ctx -> rings ;
17211721 struct io_kiocb * req , * tmp ;
@@ -1768,6 +1768,20 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
17681768 return all_flushed ;
17691769}
17701770
1771+ static void io_cqring_overflow_flush (struct io_ring_ctx * ctx , bool force ,
1772+ struct task_struct * tsk ,
1773+ struct files_struct * files )
1774+ {
1775+ if (test_bit (0 , & ctx -> cq_check_overflow )) {
1776+ /* iopoll syncs against uring_lock, not completion_lock */
1777+ if (ctx -> flags & IORING_SETUP_IOPOLL )
1778+ mutex_lock (& ctx -> uring_lock );
1779+ __io_cqring_overflow_flush (ctx , force , tsk , files );
1780+ if (ctx -> flags & IORING_SETUP_IOPOLL )
1781+ mutex_unlock (& ctx -> uring_lock );
1782+ }
1783+ }
1784+
17711785static void __io_cqring_fill_event (struct io_kiocb * req , long res , long cflags )
17721786{
17731787 struct io_ring_ctx * ctx = req -> ctx ;
@@ -2314,20 +2328,8 @@ static void io_double_put_req(struct io_kiocb *req)
23142328 io_free_req (req );
23152329}
23162330
2317- static unsigned io_cqring_events (struct io_ring_ctx * ctx , bool noflush )
2331+ static unsigned io_cqring_events (struct io_ring_ctx * ctx )
23182332{
2319- if (test_bit (0 , & ctx -> cq_check_overflow )) {
2320- /*
2321- * noflush == true is from the waitqueue handler, just ensure
2322- * we wake up the task, and the next invocation will flush the
2323- * entries. We cannot safely to it from here.
2324- */
2325- if (noflush )
2326- return -1U ;
2327-
2328- io_cqring_overflow_flush (ctx , false, NULL , NULL );
2329- }
2330-
23312333 /* See comment at the top of this file */
23322334 smp_rmb ();
23332335 return __io_cqring_events (ctx );
@@ -2552,7 +2554,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
25522554 * If we do, we can potentially be spinning for commands that
25532555 * already triggered a CQE (eg in error).
25542556 */
2555- if (io_cqring_events (ctx , false))
2557+ if (test_bit (0 , & ctx -> cq_check_overflow ))
2558+ __io_cqring_overflow_flush (ctx , false, NULL , NULL );
2559+ if (io_cqring_events (ctx ))
25562560 break ;
25572561
25582562 /*
@@ -6827,7 +6831,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
68276831
68286832 /* if we have a backlog and couldn't flush it all, return BUSY */
68296833 if (test_bit (0 , & ctx -> sq_check_overflow )) {
6830- if (!io_cqring_overflow_flush (ctx , false, NULL , NULL ))
6834+ if (!__io_cqring_overflow_flush (ctx , false, NULL , NULL ))
68316835 return - EBUSY ;
68326836 }
68336837
@@ -7090,7 +7094,7 @@ struct io_wait_queue {
70907094 unsigned nr_timeouts ;
70917095};
70927096
7093- static inline bool io_should_wake (struct io_wait_queue * iowq , bool noflush )
7097+ static inline bool io_should_wake (struct io_wait_queue * iowq )
70947098{
70957099 struct io_ring_ctx * ctx = iowq -> ctx ;
70967100
@@ -7099,7 +7103,7 @@ static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush)
70997103 * started waiting. For timeouts, we always want to return to userspace,
71007104 * regardless of event count.
71017105 */
7102- return io_cqring_events (ctx , noflush ) >= iowq -> to_wait ||
7106+ return io_cqring_events (ctx ) >= iowq -> to_wait ||
71037107 atomic_read (& ctx -> cq_timeouts ) != iowq -> nr_timeouts ;
71047108}
71057109
@@ -7109,11 +7113,13 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
71097113 struct io_wait_queue * iowq = container_of (curr , struct io_wait_queue ,
71107114 wq );
71117115
7112- /* use noflush == true, as we can't safely rely on locking context */
7113- if (!io_should_wake (iowq , true))
7114- return -1 ;
7115-
7116- return autoremove_wake_function (curr , mode , wake_flags , key );
7116+ /*
7117+ * Cannot safely flush overflowed CQEs from here, ensure we wake up
7118+ * the task, and the next invocation will do it.
7119+ */
7120+ if (io_should_wake (iowq ) || test_bit (0 , & iowq -> ctx -> cq_check_overflow ))
7121+ return autoremove_wake_function (curr , mode , wake_flags , key );
7122+ return -1 ;
71177123}
71187124
71197125static int io_run_task_work_sig (void )
@@ -7150,7 +7156,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
71507156 int ret = 0 ;
71517157
71527158 do {
7153- if (io_cqring_events (ctx , false) >= min_events )
7159+ io_cqring_overflow_flush (ctx , false, NULL , NULL );
7160+ if (io_cqring_events (ctx ) >= min_events )
71547161 return 0 ;
71557162 if (!io_run_task_work ())
71567163 break ;
@@ -7178,6 +7185,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
71787185 iowq .nr_timeouts = atomic_read (& ctx -> cq_timeouts );
71797186 trace_io_uring_cqring_wait (ctx , min_events );
71807187 do {
7188+ io_cqring_overflow_flush (ctx , false, NULL , NULL );
71817189 prepare_to_wait_exclusive (& ctx -> wait , & iowq .wq ,
71827190 TASK_INTERRUPTIBLE );
71837191 /* make sure we run task_work before checking for signals */
@@ -7186,8 +7194,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
71867194 continue ;
71877195 else if (ret < 0 )
71887196 break ;
7189- if (io_should_wake (& iowq , false ))
7197+ if (io_should_wake (& iowq ))
71907198 break ;
7199+ if (test_bit (0 , & ctx -> cq_check_overflow ))
7200+ continue ;
71917201 if (uts ) {
71927202 timeout = schedule_timeout (timeout );
71937203 if (timeout == 0 ) {
@@ -8625,7 +8635,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
86258635 smp_rmb ();
86268636 if (!io_sqring_full (ctx ))
86278637 mask |= EPOLLOUT | EPOLLWRNORM ;
8628- if (io_cqring_events (ctx , false))
8638+ io_cqring_overflow_flush (ctx , false, NULL , NULL );
8639+ if (io_cqring_events (ctx ))
86298640 mask |= EPOLLIN | EPOLLRDNORM ;
86308641
86318642 return mask ;
@@ -8683,7 +8694,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
86838694 /* if force is set, the ring is going away. always drop after that */
86848695 ctx -> cq_overflow_flushed = 1 ;
86858696 if (ctx -> rings )
8686- io_cqring_overflow_flush (ctx , true, NULL , NULL );
8697+ __io_cqring_overflow_flush (ctx , true, NULL , NULL );
86878698 mutex_unlock (& ctx -> uring_lock );
86888699
86898700 io_kill_timeouts (ctx , NULL , NULL );
@@ -8857,9 +8868,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
88578868 }
88588869
88598870 io_cancel_defer_files (ctx , task , files );
8860- io_ring_submit_lock (ctx , (ctx -> flags & IORING_SETUP_IOPOLL ));
88618871 io_cqring_overflow_flush (ctx , true, task , files );
8862- io_ring_submit_unlock (ctx , (ctx -> flags & IORING_SETUP_IOPOLL ));
88638872
88648873 if (!files )
88658874 __io_uring_cancel_task_requests (ctx , task );
@@ -9195,13 +9204,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
91959204 */
91969205 ret = 0 ;
91979206 if (ctx -> flags & IORING_SETUP_SQPOLL ) {
9198- if (!list_empty_careful (& ctx -> cq_overflow_list )) {
9199- bool needs_lock = ctx -> flags & IORING_SETUP_IOPOLL ;
9207+ io_cqring_overflow_flush (ctx , false, NULL , NULL );
92009208
9201- io_ring_submit_lock (ctx , needs_lock );
9202- io_cqring_overflow_flush (ctx , false, NULL , NULL );
9203- io_ring_submit_unlock (ctx , needs_lock );
9204- }
92059209 if (flags & IORING_ENTER_SQ_WAKEUP )
92069210 wake_up (& ctx -> sq_data -> wait );
92079211 if (flags & IORING_ENTER_SQ_WAIT )
0 commit comments