Skip to content

Commit cf96310

Browse files
spikehaxboe
authored andcommitted
io_uring/zcrx: add io_zcrx_area
Add io_zcrx_area that represents a region of userspace memory that is used for zero copy. During ifq registration, userspace passes in the uaddr and len of userspace memory, which is then pinned by the kernel. Each net_iov is mapped to one of these pages. The freelist is a spinlock protected list that keeps track of all the net_iovs/pages that aren't used. For now, there is only one area per ifq and area registration happens implicitly as part of ifq registration. There is no API for adding/removing areas yet. The struct for area registration is there for future extensibility once we support multiple areas and TCP devmem. Reviewed-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: David Wei <dw@davidwei.uk> Acked-by: Jakub Kicinski <kuba@kernel.org> Link: https://lore.kernel.org/r/20250215000947.789731-3-dw@davidwei.uk Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 6f37787 commit cf96310

5 files changed

Lines changed: 114 additions & 3 deletions

File tree

include/uapi/linux/io_uring.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,15 @@ struct io_uring_zcrx_offsets {
981981
__u64 __resv[2];
982982
};
983983

984+
struct io_uring_zcrx_area_reg {
985+
__u64 addr;
986+
__u64 len;
987+
__u64 rq_area_token;
988+
__u32 flags;
989+
__u32 __resv1;
990+
__u64 __resv2[2];
991+
};
992+
984993
/*
985994
* Argument for IORING_REGISTER_ZCRX_IFQ
986995
*/

io_uring/rsrc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
7777
return 0;
7878
}
7979

80-
static int io_buffer_validate(struct iovec *iov)
80+
int io_buffer_validate(struct iovec *iov)
8181
{
8282
unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1);
8383

io_uring/rsrc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
6868
unsigned size, unsigned type);
6969
int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
7070
unsigned int size, unsigned int type);
71+
int io_buffer_validate(struct iovec *iov);
7172

7273
bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
7374
struct io_imu_folio_data *data);

io_uring/zcrx.c

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "kbuf.h"
1111
#include "memmap.h"
1212
#include "zcrx.h"
13+
#include "rsrc.h"
1314

1415
#define IO_RQ_MAX_ENTRIES 32768
1516

@@ -44,6 +45,79 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
4445
ifq->rqes = NULL;
4546
}
4647

48+
static void io_zcrx_free_area(struct io_zcrx_area *area)
49+
{
50+
kvfree(area->freelist);
51+
kvfree(area->nia.niovs);
52+
if (area->pages) {
53+
unpin_user_pages(area->pages, area->nia.num_niovs);
54+
kvfree(area->pages);
55+
}
56+
kfree(area);
57+
}
58+
59+
static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
60+
struct io_zcrx_area **res,
61+
struct io_uring_zcrx_area_reg *area_reg)
62+
{
63+
struct io_zcrx_area *area;
64+
int i, ret, nr_pages;
65+
struct iovec iov;
66+
67+
if (area_reg->flags || area_reg->rq_area_token)
68+
return -EINVAL;
69+
if (area_reg->__resv1 || area_reg->__resv2[0] || area_reg->__resv2[1])
70+
return -EINVAL;
71+
if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
72+
return -EINVAL;
73+
74+
iov.iov_base = u64_to_user_ptr(area_reg->addr);
75+
iov.iov_len = area_reg->len;
76+
ret = io_buffer_validate(&iov);
77+
if (ret)
78+
return ret;
79+
80+
ret = -ENOMEM;
81+
area = kzalloc(sizeof(*area), GFP_KERNEL);
82+
if (!area)
83+
goto err;
84+
85+
area->pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len,
86+
&nr_pages);
87+
if (IS_ERR(area->pages)) {
88+
ret = PTR_ERR(area->pages);
89+
area->pages = NULL;
90+
goto err;
91+
}
92+
area->nia.num_niovs = nr_pages;
93+
94+
area->nia.niovs = kvmalloc_array(nr_pages, sizeof(area->nia.niovs[0]),
95+
GFP_KERNEL | __GFP_ZERO);
96+
if (!area->nia.niovs)
97+
goto err;
98+
99+
area->freelist = kvmalloc_array(nr_pages, sizeof(area->freelist[0]),
100+
GFP_KERNEL | __GFP_ZERO);
101+
if (!area->freelist)
102+
goto err;
103+
104+
for (i = 0; i < nr_pages; i++)
105+
area->freelist[i] = i;
106+
107+
area->free_count = nr_pages;
108+
area->ifq = ifq;
109+
/* we're only supporting one area per ifq for now */
110+
area->area_id = 0;
111+
area_reg->rq_area_token = (u64)area->area_id << IORING_ZCRX_AREA_SHIFT;
112+
spin_lock_init(&area->freelist_lock);
113+
*res = area;
114+
return 0;
115+
err:
116+
if (area)
117+
io_zcrx_free_area(area);
118+
return ret;
119+
}
120+
47121
static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
48122
{
49123
struct io_zcrx_ifq *ifq;
@@ -59,13 +133,17 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
59133

60134
static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
61135
{
136+
if (ifq->area)
137+
io_zcrx_free_area(ifq->area);
138+
62139
io_free_rbuf_ring(ifq);
63140
kfree(ifq);
64141
}
65142

66143
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
67144
struct io_uring_zcrx_ifq_reg __user *arg)
68145
{
146+
struct io_uring_zcrx_area_reg area;
69147
struct io_uring_zcrx_ifq_reg reg;
70148
struct io_uring_region_desc rd;
71149
struct io_zcrx_ifq *ifq;
@@ -99,7 +177,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
99177
}
100178
reg.rq_entries = roundup_pow_of_two(reg.rq_entries);
101179

102-
if (!reg.area_ptr)
180+
if (copy_from_user(&area, u64_to_user_ptr(reg.area_ptr), sizeof(area)))
103181
return -EFAULT;
104182

105183
ifq = io_zcrx_ifq_alloc(ctx);
@@ -110,6 +188,10 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
110188
if (ret)
111189
goto err;
112190

191+
ret = io_zcrx_create_area(ifq, &ifq->area, &area);
192+
if (ret)
193+
goto err;
194+
113195
ifq->rq_entries = reg.rq_entries;
114196
ifq->if_rxq = reg.if_rxq;
115197

@@ -122,7 +204,10 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
122204
ret = -EFAULT;
123205
goto err;
124206
}
125-
207+
if (copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) {
208+
ret = -EFAULT;
209+
goto err;
210+
}
126211
ctx->ifq = ifq;
127212
return 0;
128213
err:

io_uring/zcrx.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,25 @@
33
#define IOU_ZC_RX_H
44

55
#include <linux/io_uring_types.h>
6+
#include <net/page_pool/types.h>
7+
8+
struct io_zcrx_area {
9+
struct net_iov_area nia;
10+
struct io_zcrx_ifq *ifq;
11+
12+
u16 area_id;
13+
struct page **pages;
14+
15+
/* freelist */
16+
spinlock_t freelist_lock ____cacheline_aligned_in_smp;
17+
u32 free_count;
18+
u32 *freelist;
19+
};
620

721
struct io_zcrx_ifq {
822
struct io_ring_ctx *ctx;
23+
struct io_zcrx_area *area;
24+
925
struct io_uring *rq_ring;
1026
struct io_uring_zcrx_rqe *rqes;
1127
u32 rq_entries;

0 commit comments

Comments
 (0)