Skip to content

Commit 5247c03

Browse files
isilenceaxboe
authored andcommitted
io_uring: introduce non-circular SQ
Outside of SQPOLL, normally SQ entries are consumed by the time the submission syscall returns. For those cases we don't need a circular buffer and the head/tail tracking, instead the kernel can assume that entries always start from the beginning of the SQ at index 0. This patch introduces a setup flag doing exactly that. It's a simpler and helps to keeps SQEs hot in cache. The feature is optional and enabled by setting IORING_SETUP_SQ_REWIND. The flag is rejected if passed together with SQPOLL as it'd require waiting for SQ before each submission. It also requires IORING_SETUP_NO_SQARRAY, which can be supported but it's unlikely there will be users, so leave more space for future optimisations. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 0105b05 commit 5247c03

3 files changed

Lines changed: 36 additions & 8 deletions

File tree

include/uapi/linux/io_uring.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,18 @@ enum io_uring_sqe_flags_bit {
237237
*/
238238
#define IORING_SETUP_SQE_MIXED (1U << 19)
239239

240+
/*
241+
* When set, io_uring ignores SQ head and tail and fetches SQEs to submit
242+
* starting from index 0 instead from the index stored in the head pointer.
243+
* IOW, the user should place all SQE at the beginning of the SQ memory
244+
* before issuing a submission syscall.
245+
*
246+
* It requires IORING_SETUP_NO_SQARRAY and is incompatible with
247+
* IORING_SETUP_SQPOLL. The user must also never change the SQ head and tail
248+
* values and keep it set to 0. Any other value is undefined behaviour.
249+
*/
250+
#define IORING_SETUP_SQ_REWIND (1U << 20)
251+
240252
enum io_uring_op {
241253
IORING_OP_NOP,
242254
IORING_OP_READV,

io_uring/io_uring.c

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1945,12 +1945,16 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
19451945
{
19461946
struct io_rings *rings = ctx->rings;
19471947

1948-
/*
1949-
* Ensure any loads from the SQEs are done at this point,
1950-
* since once we write the new head, the application could
1951-
* write new data to them.
1952-
*/
1953-
smp_store_release(&rings->sq.head, ctx->cached_sq_head);
1948+
if (ctx->flags & IORING_SETUP_SQ_REWIND) {
1949+
ctx->cached_sq_head = 0;
1950+
} else {
1951+
/*
1952+
* Ensure any loads from the SQEs are done at this point,
1953+
* since once we write the new head, the application could
1954+
* write new data to them.
1955+
*/
1956+
smp_store_release(&rings->sq.head, ctx->cached_sq_head);
1957+
}
19541958
}
19551959

19561960
/*
@@ -1996,10 +2000,15 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
19962000
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
19972001
__must_hold(&ctx->uring_lock)
19982002
{
1999-
unsigned int entries = io_sqring_entries(ctx);
2003+
unsigned int entries;
20002004
unsigned int left;
20012005
int ret;
20022006

2007+
if (ctx->flags & IORING_SETUP_SQ_REWIND)
2008+
entries = ctx->sq_entries;
2009+
else
2010+
entries = io_sqring_entries(ctx);
2011+
20032012
entries = min(nr, entries);
20042013
if (unlikely(!entries))
20052014
return 0;
@@ -2728,6 +2737,12 @@ static int io_uring_sanitise_params(struct io_uring_params *p)
27282737
if (flags & ~IORING_SETUP_FLAGS)
27292738
return -EINVAL;
27302739

2740+
if (flags & IORING_SETUP_SQ_REWIND) {
2741+
if ((flags & IORING_SETUP_SQPOLL) ||
2742+
!(flags & IORING_SETUP_NO_SQARRAY))
2743+
return -EINVAL;
2744+
}
2745+
27312746
/* There is no way to mmap rings without a real fd */
27322747
if ((flags & IORING_SETUP_REGISTERED_FD_ONLY) &&
27332748
!(flags & IORING_SETUP_NO_MMAP))

io_uring/io_uring.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ struct io_ctx_config {
6969
IORING_SETUP_NO_SQARRAY |\
7070
IORING_SETUP_HYBRID_IOPOLL |\
7171
IORING_SETUP_CQE_MIXED |\
72-
IORING_SETUP_SQE_MIXED)
72+
IORING_SETUP_SQE_MIXED |\
73+
IORING_SETUP_SQ_REWIND)
7374

7475
#define IORING_ENTER_FLAGS (IORING_ENTER_GETEVENTS |\
7576
IORING_ENTER_SQ_WAKEUP |\

0 commit comments

Comments
 (0)