Skip to content

Commit 46dc07d

Browse files
axboegregkh
authored andcommitted
io_uring: ensure ctx->rings is stable for task work flags manipulation
Commit 9618908 upstream. If DEFER_TASKRUN | SETUP_TASKRUN is used and task work is added while the ring is being resized, it's possible for the OR'ing of IORING_SQ_TASKRUN to happen in the small window of swapping into the new rings and the old rings being freed. Prevent this by adding a 2nd ->rings pointer, ->rings_rcu, which is protected by RCU. The task work flags manipulation is inside RCU already, and if the resize ring freeing is done post an RCU synchronize, then there's no need to add locking to the fast path of task work additions. Note: this is only done for DEFER_TASKRUN, as that's the only setup mode that supports ring resizing. If this ever changes, then they too need to use the io_ctx_mark_taskrun() helper. Link: https://lore.kernel.org/io-uring/20260309062759.482210-1-naup96721@gmail.com/ Cc: stable@vger.kernel.org Fixes: 79cfe9e ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS") Reported-by: Hao-Yu Yang <naup96721@gmail.com> Suggested-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent a24f1d8 commit 46dc07d

3 files changed

Lines changed: 35 additions & 2 deletions

File tree

include/linux/io_uring_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ struct io_ring_ctx {
371371
* regularly bounce b/w CPUs.
372372
*/
373373
struct {
374+
struct io_rings __rcu *rings_rcu;
374375
struct llist_head work_llist;
375376
struct llist_head retry_llist;
376377
unsigned long check_cq;

io_uring/io_uring.c

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,6 +1202,21 @@ void tctx_task_work(struct callback_head *cb)
12021202
WARN_ON_ONCE(ret);
12031203
}
12041204

1205+
/*
1206+
* Sets IORING_SQ_TASKRUN in the sq_flags shared with userspace, using the
1207+
* RCU protected rings pointer to be safe against concurrent ring resizing.
1208+
*/
1209+
static void io_ctx_mark_taskrun(struct io_ring_ctx *ctx)
1210+
{
1211+
lockdep_assert_in_rcu_read_lock();
1212+
1213+
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) {
1214+
struct io_rings *rings = rcu_dereference(ctx->rings_rcu);
1215+
1216+
atomic_or(IORING_SQ_TASKRUN, &rings->sq_flags);
1217+
}
1218+
}
1219+
12051220
static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
12061221
{
12071222
struct io_ring_ctx *ctx = req->ctx;
@@ -1256,8 +1271,7 @@ static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
12561271
*/
12571272

12581273
if (!head) {
1259-
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
1260-
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
1274+
io_ctx_mark_taskrun(ctx);
12611275
if (ctx->has_evfd)
12621276
io_eventfd_signal(ctx, false);
12631277
}
@@ -1281,6 +1295,10 @@ static void io_req_normal_work_add(struct io_kiocb *req)
12811295
if (!llist_add(&req->io_task_work.node, &tctx->task_list))
12821296
return;
12831297

1298+
/*
1299+
* Doesn't need to use ->rings_rcu, as resizing isn't supported for
1300+
* !DEFER_TASKRUN.
1301+
*/
12841302
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
12851303
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
12861304

@@ -2760,6 +2778,7 @@ static void io_rings_free(struct io_ring_ctx *ctx)
27602778
io_free_region(ctx->user, &ctx->sq_region);
27612779
io_free_region(ctx->user, &ctx->ring_region);
27622780
ctx->rings = NULL;
2781+
RCU_INIT_POINTER(ctx->rings_rcu, NULL);
27632782
ctx->sq_sqes = NULL;
27642783
}
27652784

@@ -3389,6 +3408,7 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
33893408
if (ret)
33903409
return ret;
33913410
ctx->rings = rings = io_region_get_ptr(&ctx->ring_region);
3411+
rcu_assign_pointer(ctx->rings_rcu, rings);
33923412
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
33933413
ctx->sq_array = (u32 *)((char *)rings + rl->sq_array_offset);
33943414

io_uring/register.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,15 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
545545
ctx->sq_entries = p->sq_entries;
546546
ctx->cq_entries = p->cq_entries;
547547

548+
/*
549+
* Just mark any flag we may have missed and that the application
550+
* should act on unconditionally. Worst case it'll be an extra
551+
* syscall.
552+
*/
553+
atomic_or(IORING_SQ_TASKRUN | IORING_SQ_NEED_WAKEUP, &n.rings->sq_flags);
548554
ctx->rings = n.rings;
555+
rcu_assign_pointer(ctx->rings_rcu, n.rings);
556+
549557
ctx->sq_sqes = n.sq_sqes;
550558
swap_old(ctx, o, n, ring_region);
551559
swap_old(ctx, o, n, sq_region);
@@ -554,6 +562,10 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
554562
out:
555563
spin_unlock(&ctx->completion_lock);
556564
mutex_unlock(&ctx->mmap_lock);
565+
566+
/* Wait for concurrent io_ctx_mark_taskrun() */
567+
if (to_free == &o)
568+
synchronize_rcu_expedited();
557569
io_register_free_rings(ctx, to_free);
558570

559571
if (ctx->sq_data)

0 commit comments

Comments
 (0)