Skip to content

Commit dbc7d45

Browse files
committed
io_uring: manage provided buffers strictly ordered
Workloads using provided buffers benefit from using and returning buffers in the right order, and so does TLBs for that matter. Manage the internal buffer list in a straight list, rather than use the head buffer as the insertion node. Use a hashed list for the buffer group IDs instead of xarray, the overhead is much lower this way. xarray provides internal locking and other trickery that is handy for some uses cases, but io_uring already locks internally for the buffer manipulation and needs none of that. This is good for about a 2% reduction in overhead, combination of the improved management and the fact that the workload has an easier time bundling back provided buffers. Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 9aa8dfd commit dbc7d45

1 file changed

Lines changed: 92 additions & 62 deletions

File tree

fs/io_uring.c

Lines changed: 92 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,12 @@ struct io_rsrc_data {
264264
bool quiesce;
265265
};
266266

267+
struct io_buffer_list {
268+
struct list_head list;
269+
struct list_head buf_list;
270+
__u16 bgid;
271+
};
272+
267273
struct io_buffer {
268274
struct list_head list;
269275
__u64 addr;
@@ -334,6 +340,8 @@ struct io_ev_fd {
334340
struct rcu_head rcu;
335341
};
336342

343+
#define IO_BUFFERS_HASH_BITS 5
344+
337345
struct io_ring_ctx {
338346
/* const or read-mostly hot data */
339347
struct {
@@ -386,7 +394,7 @@ struct io_ring_ctx {
386394
struct list_head timeout_list;
387395
struct list_head ltimeout_list;
388396
struct list_head cq_overflow_list;
389-
struct xarray io_buffers;
397+
struct list_head *io_buffers;
390398
struct list_head io_buffers_cache;
391399
struct list_head apoll_cache;
392400
struct xarray personalities;
@@ -1361,32 +1369,34 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
13611369
return cflags;
13621370
}
13631371

1372+
static struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
1373+
unsigned int bgid)
1374+
{
1375+
struct list_head *hash_list;
1376+
struct io_buffer_list *bl;
1377+
1378+
hash_list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
1379+
list_for_each_entry(bl, hash_list, list)
1380+
if (bl->bgid == bgid || bgid == -1U)
1381+
return bl;
1382+
1383+
return NULL;
1384+
}
1385+
13641386
static void io_kbuf_recycle(struct io_kiocb *req)
13651387
{
13661388
struct io_ring_ctx *ctx = req->ctx;
1367-
struct io_buffer *head, *buf;
1389+
struct io_buffer_list *bl;
1390+
struct io_buffer *buf;
13681391

13691392
if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
13701393
return;
13711394

13721395
lockdep_assert_held(&ctx->uring_lock);
13731396

13741397
buf = req->kbuf;
1375-
1376-
head = xa_load(&ctx->io_buffers, buf->bgid);
1377-
if (head) {
1378-
list_add(&buf->list, &head->list);
1379-
} else {
1380-
int ret;
1381-
1382-
INIT_LIST_HEAD(&buf->list);
1383-
1384-
/* if we fail, just leave buffer attached */
1385-
ret = xa_insert(&ctx->io_buffers, buf->bgid, buf, GFP_KERNEL);
1386-
if (unlikely(ret < 0))
1387-
return;
1388-
}
1389-
1398+
bl = io_buffer_get_list(ctx, buf->bgid);
1399+
list_add(&buf->list, &bl->buf_list);
13901400
req->flags &= ~REQ_F_BUFFER_SELECTED;
13911401
req->kbuf = NULL;
13921402
}
@@ -1501,7 +1511,7 @@ static __cold void io_fallback_req_func(struct work_struct *work)
15011511
static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
15021512
{
15031513
struct io_ring_ctx *ctx;
1504-
int hash_bits;
1514+
int i, hash_bits;
15051515

15061516
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
15071517
if (!ctx)
@@ -1528,6 +1538,13 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
15281538
/* set invalid range, so io_import_fixed() fails meeting it */
15291539
ctx->dummy_ubuf->ubuf = -1UL;
15301540

1541+
ctx->io_buffers = kcalloc(1U << IO_BUFFERS_HASH_BITS,
1542+
sizeof(struct list_head), GFP_KERNEL);
1543+
if (!ctx->io_buffers)
1544+
goto err;
1545+
for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++)
1546+
INIT_LIST_HEAD(&ctx->io_buffers[i]);
1547+
15311548
if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
15321549
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
15331550
goto err;
@@ -1539,7 +1556,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
15391556
INIT_LIST_HEAD(&ctx->io_buffers_cache);
15401557
INIT_LIST_HEAD(&ctx->apoll_cache);
15411558
init_completion(&ctx->ref_comp);
1542-
xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
15431559
xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
15441560
mutex_init(&ctx->uring_lock);
15451561
init_waitqueue_head(&ctx->cq_wait);
@@ -1568,6 +1584,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
15681584
err:
15691585
kfree(ctx->dummy_ubuf);
15701586
kfree(ctx->cancel_hash);
1587+
kfree(ctx->io_buffers);
15711588
kfree(ctx);
15721589
return NULL;
15731590
}
@@ -3351,30 +3368,36 @@ static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
33513368
mutex_lock(&ctx->uring_lock);
33523369
}
33533370

3371+
static void io_buffer_add_list(struct io_ring_ctx *ctx,
3372+
struct io_buffer_list *bl, unsigned int bgid)
3373+
{
3374+
struct list_head *list;
3375+
3376+
list = &ctx->io_buffers[hash_32(bgid, IO_BUFFERS_HASH_BITS)];
3377+
INIT_LIST_HEAD(&bl->buf_list);
3378+
bl->bgid = bgid;
3379+
list_add(&bl->list, list);
3380+
}
3381+
33543382
static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
33553383
int bgid, unsigned int issue_flags)
33563384
{
33573385
struct io_buffer *kbuf = req->kbuf;
3358-
struct io_buffer *head;
33593386
bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
3387+
struct io_ring_ctx *ctx = req->ctx;
3388+
struct io_buffer_list *bl;
33603389

33613390
if (req->flags & REQ_F_BUFFER_SELECTED)
33623391
return kbuf;
33633392

3364-
io_ring_submit_lock(req->ctx, needs_lock);
3393+
io_ring_submit_lock(ctx, needs_lock);
33653394

3366-
lockdep_assert_held(&req->ctx->uring_lock);
3395+
lockdep_assert_held(&ctx->uring_lock);
33673396

3368-
head = xa_load(&req->ctx->io_buffers, bgid);
3369-
if (head) {
3370-
if (!list_empty(&head->list)) {
3371-
kbuf = list_last_entry(&head->list, struct io_buffer,
3372-
list);
3373-
list_del(&kbuf->list);
3374-
} else {
3375-
kbuf = head;
3376-
xa_erase(&req->ctx->io_buffers, bgid);
3377-
}
3397+
bl = io_buffer_get_list(ctx, bgid);
3398+
if (bl && !list_empty(&bl->buf_list)) {
3399+
kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list);
3400+
list_del(&kbuf->list);
33783401
if (*len > kbuf->len)
33793402
*len = kbuf->len;
33803403
req->flags |= REQ_F_BUFFER_SELECTED;
@@ -4669,8 +4692,8 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
46694692
return 0;
46704693
}
46714694

4672-
static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
4673-
int bgid, unsigned nbufs)
4695+
static int __io_remove_buffers(struct io_ring_ctx *ctx,
4696+
struct io_buffer_list *bl, unsigned nbufs)
46744697
{
46754698
unsigned i = 0;
46764699

@@ -4679,17 +4702,16 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
46794702
return 0;
46804703

46814704
/* the head kbuf is the list itself */
4682-
while (!list_empty(&buf->list)) {
4705+
while (!list_empty(&bl->buf_list)) {
46834706
struct io_buffer *nxt;
46844707

4685-
nxt = list_first_entry(&buf->list, struct io_buffer, list);
4708+
nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
46864709
list_del(&nxt->list);
46874710
if (++i == nbufs)
46884711
return i;
46894712
cond_resched();
46904713
}
46914714
i++;
4692-
xa_erase(&ctx->io_buffers, bgid);
46934715

46944716
return i;
46954717
}
@@ -4698,7 +4720,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
46984720
{
46994721
struct io_provide_buf *p = &req->pbuf;
47004722
struct io_ring_ctx *ctx = req->ctx;
4701-
struct io_buffer *head;
4723+
struct io_buffer_list *bl;
47024724
int ret = 0;
47034725
bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
47044726

@@ -4707,9 +4729,9 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
47074729
lockdep_assert_held(&ctx->uring_lock);
47084730

47094731
ret = -ENOENT;
4710-
head = xa_load(&ctx->io_buffers, p->bgid);
4711-
if (head)
4712-
ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
4732+
bl = io_buffer_get_list(ctx, p->bgid);
4733+
if (bl)
4734+
ret = __io_remove_buffers(ctx, bl, p->nbufs);
47134735
if (ret < 0)
47144736
req_set_fail(req);
47154737

@@ -4798,7 +4820,7 @@ static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
47984820
}
47994821

48004822
static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
4801-
struct io_buffer **head)
4823+
struct io_buffer_list *bl)
48024824
{
48034825
struct io_buffer *buf;
48044826
u64 addr = pbuf->addr;
@@ -4810,45 +4832,43 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
48104832
break;
48114833
buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer,
48124834
list);
4813-
list_del(&buf->list);
4835+
list_move_tail(&buf->list, &bl->buf_list);
48144836
buf->addr = addr;
48154837
buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
48164838
buf->bid = bid;
48174839
buf->bgid = pbuf->bgid;
48184840
addr += pbuf->len;
48194841
bid++;
4820-
if (!*head) {
4821-
INIT_LIST_HEAD(&buf->list);
4822-
*head = buf;
4823-
} else {
4824-
list_add_tail(&buf->list, &(*head)->list);
4825-
}
48264842
cond_resched();
48274843
}
48284844

4829-
return i ? i : -ENOMEM;
4845+
return i ? 0 : -ENOMEM;
48304846
}
48314847

48324848
static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
48334849
{
48344850
struct io_provide_buf *p = &req->pbuf;
48354851
struct io_ring_ctx *ctx = req->ctx;
4836-
struct io_buffer *head, *list;
4852+
struct io_buffer_list *bl;
48374853
int ret = 0;
48384854
bool needs_lock = issue_flags & IO_URING_F_UNLOCKED;
48394855

48404856
io_ring_submit_lock(ctx, needs_lock);
48414857

48424858
lockdep_assert_held(&ctx->uring_lock);
48434859

4844-
list = head = xa_load(&ctx->io_buffers, p->bgid);
4845-
4846-
ret = io_add_buffers(ctx, p, &head);
4847-
if (ret >= 0 && !list) {
4848-
ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
4849-
if (ret < 0)
4850-
__io_remove_buffers(ctx, head, p->bgid, -1U);
4860+
bl = io_buffer_get_list(ctx, p->bgid);
4861+
if (unlikely(!bl)) {
4862+
bl = kmalloc(sizeof(*bl), GFP_KERNEL);
4863+
if (!bl) {
4864+
ret = -ENOMEM;
4865+
goto err;
4866+
}
4867+
io_buffer_add_list(ctx, bl, p->bgid);
48514868
}
4869+
4870+
ret = io_add_buffers(ctx, p, bl);
4871+
err:
48524872
if (ret < 0)
48534873
req_set_fail(req);
48544874
/* complete before unlock, IOPOLL may need the lock */
@@ -9936,11 +9956,20 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
99369956

99379957
static void io_destroy_buffers(struct io_ring_ctx *ctx)
99389958
{
9939-
struct io_buffer *buf;
9940-
unsigned long index;
9959+
int i;
9960+
9961+
for (i = 0; i < (1U << IO_BUFFERS_HASH_BITS); i++) {
9962+
struct list_head *list = &ctx->io_buffers[i];
99419963

9942-
xa_for_each(&ctx->io_buffers, index, buf)
9943-
__io_remove_buffers(ctx, buf, index, -1U);
9964+
while (!list_empty(list)) {
9965+
struct io_buffer_list *bl;
9966+
9967+
bl = list_first_entry(list, struct io_buffer_list, list);
9968+
__io_remove_buffers(ctx, bl, -1U);
9969+
list_del(&bl->list);
9970+
kfree(bl);
9971+
}
9972+
}
99449973

99459974
while (!list_empty(&ctx->io_buffers_pages)) {
99469975
struct page *page;
@@ -10049,6 +10078,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
1004910078
io_free_napi_list(ctx);
1005010079
kfree(ctx->cancel_hash);
1005110080
kfree(ctx->dummy_ubuf);
10081+
kfree(ctx->io_buffers);
1005210082
kfree(ctx);
1005310083
}
1005410084

0 commit comments

Comments
 (0)