Skip to content

Commit 475eb39

Browse files
isilenceaxboe
authored andcommitted
io_uring/zcrx: add sync refill queue flushing
Add an zcrx interface via IORING_REGISTER_ZCRX_CTRL that forces the kernel to flush / consume entries from the refill queue. Just as with the IORING_REGISTER_ZCRX_REFILL attempt, the motivation is to address cases where the refill queue becomes full, and the user can't return buffers and needs to stash them. It's still a slow path, and the user should size refill queue appropriately, but it should be helpful for handling temporary traffic spikes and other unpredictable conditions. The interface is simpler comparing to ZCRX_REFILL as it doesn't need temporary refill entry arrays and gives natural batching, whereas ZCRX_REFILL requires even more user logic to be somewhat efficient. Also, add a structure for the operation. It's not currently used but can serve for future improvements like limiting the number of buffers to process, etc. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent d663976 commit 475eb39

2 files changed

Lines changed: 80 additions & 4 deletions

File tree

include/uapi/linux/io_uring.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1082,13 +1082,21 @@ struct io_uring_zcrx_ifq_reg {
10821082
};
10831083

10841084
enum zcrx_ctrl_op {
1085+
ZCRX_CTRL_FLUSH_RQ,
1086+
10851087
__ZCRX_CTRL_LAST,
10861088
};
10871089

1090+
struct zcrx_ctrl_flush_rq {
1091+
__u64 __resv[6];
1092+
};
1093+
10881094
struct zcrx_ctrl {
10891095
__u32 zcrx_id;
10901096
__u32 op; /* see enum zcrx_ctrl_op */
1091-
__u64 __resv[8];
1097+
__u64 __resv[2];
1098+
1099+
struct zcrx_ctrl_flush_rq zc_flush;
10921100
};
10931101

10941102
#ifdef __cplusplus

io_uring/zcrx.c

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,71 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = {
941941
.uninstall = io_pp_uninstall,
942942
};
943943

944+
static unsigned zcrx_parse_rq(netmem_ref *netmem_array, unsigned nr,
945+
struct io_zcrx_ifq *zcrx)
946+
{
947+
unsigned int mask = zcrx->rq_entries - 1;
948+
unsigned int i;
949+
950+
guard(spinlock_bh)(&zcrx->rq_lock);
951+
952+
nr = min(nr, io_zcrx_rqring_entries(zcrx));
953+
for (i = 0; i < nr; i++) {
954+
struct io_uring_zcrx_rqe *rqe = io_zcrx_get_rqe(zcrx, mask);
955+
struct net_iov *niov;
956+
957+
if (!io_parse_rqe(rqe, zcrx, &niov))
958+
break;
959+
netmem_array[i] = net_iov_to_netmem(niov);
960+
}
961+
962+
smp_store_release(&zcrx->rq_ring->head, zcrx->cached_rq_head);
963+
return i;
964+
}
965+
966+
#define ZCRX_FLUSH_BATCH 32
967+
968+
static void zcrx_return_buffers(netmem_ref *netmems, unsigned nr)
969+
{
970+
unsigned i;
971+
972+
for (i = 0; i < nr; i++) {
973+
netmem_ref netmem = netmems[i];
974+
struct net_iov *niov = netmem_to_net_iov(netmem);
975+
976+
if (!io_zcrx_put_niov_uref(niov))
977+
continue;
978+
if (!page_pool_unref_and_test(netmem))
979+
continue;
980+
io_zcrx_return_niov(niov);
981+
}
982+
}
983+
984+
static int zcrx_flush_rq(struct io_ring_ctx *ctx, struct io_zcrx_ifq *zcrx,
985+
struct zcrx_ctrl *ctrl)
986+
{
987+
struct zcrx_ctrl_flush_rq *frq = &ctrl->zc_flush;
988+
netmem_ref netmems[ZCRX_FLUSH_BATCH];
989+
unsigned total = 0;
990+
unsigned nr;
991+
992+
if (!mem_is_zero(&frq->__resv, sizeof(frq->__resv)))
993+
return -EINVAL;
994+
995+
do {
996+
nr = zcrx_parse_rq(netmems, ZCRX_FLUSH_BATCH, zcrx);
997+
998+
zcrx_return_buffers(netmems, nr);
999+
total += nr;
1000+
1001+
if (fatal_signal_pending(current))
1002+
break;
1003+
cond_resched();
1004+
} while (nr == ZCRX_FLUSH_BATCH && total < zcrx->rq_entries);
1005+
1006+
return 0;
1007+
}
1008+
9441009
int io_zcrx_ctrl(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
9451010
{
9461011
struct zcrx_ctrl ctrl;
@@ -956,10 +1021,13 @@ int io_zcrx_ctrl(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
9561021
zcrx = xa_load(&ctx->zcrx_ctxs, ctrl.zcrx_id);
9571022
if (!zcrx)
9581023
return -ENXIO;
959-
if (ctrl.op >= __ZCRX_CTRL_LAST)
960-
return -EOPNOTSUPP;
9611024

962-
return -EINVAL;
1025+
switch (ctrl.op) {
1026+
case ZCRX_CTRL_FLUSH_RQ:
1027+
return zcrx_flush_rq(ctx, zcrx, &ctrl);
1028+
}
1029+
1030+
return -EOPNOTSUPP;
9631031
}
9641032

9651033
static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,

0 commit comments

Comments
 (0)