Skip to content

Commit 0abcfd8

Browse files
committed
Merge tag 'for-6.19/io_uring-20251201' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring updates from Jens Axboe: - Unify how task_work cancelations are detected, placing it in the task_work running state rather than needing to check the task state - Series cleaning up and moving the cancelation code to where it belongs, in cancel.c - Cleanup of waitid and futex argument handling - Add support for mixed sized SQEs. 6.18 added support for mixed sized CQEs, improving flexibility and efficiency of workloads that need big CQEs. This adds similar support for SQEs, where the occasional need for a 128b SQE doesn't necessitate having all SQEs be 128b in size - Introduce zcrx and SQ/CQ layout queries. The former returns what zcrx features are available. And both return the ring size information to help with allocation size calculation for user provided rings like IORING_SETUP_NO_MMAP and IORING_MEM_REGION_TYPE_USER - Zcrx updates for 6.19. It includes a bunch of small patches, IORING_REGISTER_ZCRX_CTRL and RQ flushing and David's work on sharing zcrx b/w multiple io_uring instances - Series cleaning up ring initializations, notable deduplicating ring size and offset calculations. It also moves most of the checking before doing any allocations, making the code simpler - Add support for getsockname and getpeername, which is mostly a trivial hookup after a bit of refactoring on the networking side - Various fixes and cleanups * tag 'for-6.19/io_uring-20251201' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: (68 commits) io_uring: Introduce getsockname io_uring cmd socket: Split out a getsockname helper for io_uring socket: Unify getsockname and getpeername implementation io_uring/query: drop unused io_handle_query_entry() ctx arg io_uring/kbuf: remove obsolete buf_nr_pages and update comments io_uring/register: use correct location for io_rings_layout io_uring/zcrx: share an ifq between rings io_uring/zcrx: add io_fill_zcrx_offsets() io_uring/zcrx: export zcrx via a file io_uring/zcrx: move io_zcrx_scrub() and dependencies up io_uring/zcrx: count zcrx users io_uring/zcrx: add sync refill queue flushing io_uring/zcrx: introduce IORING_REGISTER_ZCRX_CTRL io_uring/zcrx: elide passing msg flags io_uring/zcrx: use folio_nr_pages() instead of shift operation io_uring/zcrx: convert to use netmem_desc io_uring/query: introduce rings info query io_uring/query: introduce zcrx query io_uring: move cq/sq user offset init around io_uring: pre-calculate scq layout ...
2 parents 8f7aa3d + 5d24321 commit 0abcfd8

46 files changed

Lines changed: 1299 additions & 856 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

block/ioctl.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -769,14 +769,16 @@ struct blk_iou_cmd {
769769
bool nowait;
770770
};
771771

772-
static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags)
772+
static void blk_cmd_complete(struct io_tw_req tw_req, io_tw_token_t tw)
773773
{
774+
struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
774775
struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd);
775776

776777
if (bic->res == -EAGAIN && bic->nowait)
777778
io_uring_cmd_issue_blocking(cmd);
778779
else
779-
io_uring_cmd_done(cmd, bic->res, issue_flags);
780+
io_uring_cmd_done(cmd, bic->res,
781+
IO_URING_CMD_TASK_WORK_ISSUE_FLAGS);
780782
}
781783

782784
static void bio_cmd_bio_end_io(struct bio *bio)

drivers/block/ublk_drv.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,10 +1302,9 @@ static bool ublk_start_io(const struct ublk_queue *ubq, struct request *req,
13021302
return true;
13031303
}
13041304

1305-
static void ublk_dispatch_req(struct ublk_queue *ubq,
1306-
struct request *req,
1307-
unsigned int issue_flags)
1305+
static void ublk_dispatch_req(struct ublk_queue *ubq, struct request *req)
13081306
{
1307+
unsigned int issue_flags = IO_URING_CMD_TASK_WORK_ISSUE_FLAGS;
13091308
int tag = req->tag;
13101309
struct ublk_io *io = &ubq->ios[tag];
13111310

@@ -1348,13 +1347,13 @@ static void ublk_dispatch_req(struct ublk_queue *ubq,
13481347
ublk_complete_io_cmd(io, req, UBLK_IO_RES_OK, issue_flags);
13491348
}
13501349

1351-
static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd,
1352-
unsigned int issue_flags)
1350+
static void ublk_cmd_tw_cb(struct io_tw_req tw_req, io_tw_token_t tw)
13531351
{
1352+
struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
13541353
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
13551354
struct ublk_queue *ubq = pdu->ubq;
13561355

1357-
ublk_dispatch_req(ubq, pdu->req, issue_flags);
1356+
ublk_dispatch_req(ubq, pdu->req);
13581357
}
13591358

13601359
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
@@ -1366,17 +1365,17 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
13661365
io_uring_cmd_complete_in_task(cmd, ublk_cmd_tw_cb);
13671366
}
13681367

1369-
static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd,
1370-
unsigned int issue_flags)
1368+
static void ublk_cmd_list_tw_cb(struct io_tw_req tw_req, io_tw_token_t tw)
13711369
{
1370+
struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
13721371
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
13731372
struct request *rq = pdu->req_list;
13741373
struct request *next;
13751374

13761375
do {
13771376
next = rq->rq_next;
13781377
rq->rq_next = NULL;
1379-
ublk_dispatch_req(rq->mq_hctx->driver_data, rq, issue_flags);
1378+
ublk_dispatch_req(rq->mq_hctx->driver_data, rq);
13801379
rq = next;
13811380
} while (rq);
13821381
}
@@ -2523,9 +2522,10 @@ static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
25232522
return NULL;
25242523
}
25252524

2526-
static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd,
2527-
unsigned int issue_flags)
2525+
static void ublk_ch_uring_cmd_cb(struct io_tw_req tw_req, io_tw_token_t tw)
25282526
{
2527+
unsigned int issue_flags = IO_URING_CMD_TASK_WORK_ISSUE_FLAGS;
2528+
struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
25292529
int ret = ublk_ch_uring_cmd_local(cmd, issue_flags);
25302530

25312531
if (ret != -EIOCBQUEUED)

drivers/nvme/host/ioctl.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -398,14 +398,15 @@ static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu(
398398
return io_uring_cmd_to_pdu(ioucmd, struct nvme_uring_cmd_pdu);
399399
}
400400

401-
static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd,
402-
unsigned issue_flags)
401+
static void nvme_uring_task_cb(struct io_tw_req tw_req, io_tw_token_t tw)
403402
{
403+
struct io_uring_cmd *ioucmd = io_uring_cmd_from_tw(tw_req);
404404
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
405405

406406
if (pdu->bio)
407407
blk_rq_unmap_user(pdu->bio);
408-
io_uring_cmd_done32(ioucmd, pdu->status, pdu->result, issue_flags);
408+
io_uring_cmd_done32(ioucmd, pdu->status, pdu->result,
409+
IO_URING_CMD_TASK_WORK_ISSUE_FLAGS);
409410
}
410411

411412
static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,

fs/btrfs/ioctl.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4632,8 +4632,9 @@ struct io_btrfs_cmd {
46324632
struct btrfs_uring_priv *priv;
46334633
};
46344634

4635-
static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int issue_flags)
4635+
static void btrfs_uring_read_finished(struct io_tw_req tw_req, io_tw_token_t tw)
46364636
{
4637+
struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
46374638
struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(cmd, struct io_btrfs_cmd);
46384639
struct btrfs_uring_priv *priv = bc->priv;
46394640
struct btrfs_inode *inode = BTRFS_I(file_inode(priv->iocb.ki_filp));
@@ -4678,7 +4679,7 @@ static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int iss
46784679
btrfs_unlock_extent(io_tree, priv->start, priv->lockend, &priv->cached_state);
46794680
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
46804681

4681-
io_uring_cmd_done(cmd, ret, issue_flags);
4682+
io_uring_cmd_done(cmd, ret, IO_URING_CMD_TASK_WORK_ISSUE_FLAGS);
46824683
add_rchar(current, ret);
46834684

46844685
for (index = 0; index < priv->nr_pages; index++)

fs/fuse/dev_uring.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,14 +1209,15 @@ static void fuse_uring_send(struct fuse_ring_ent *ent, struct io_uring_cmd *cmd,
12091209
* User buffers are not mapped yet - the application does not have permission
12101210
* to write to it - this has to be executed in ring task context.
12111211
*/
1212-
static void fuse_uring_send_in_task(struct io_uring_cmd *cmd,
1213-
unsigned int issue_flags)
1212+
static void fuse_uring_send_in_task(struct io_tw_req tw_req, io_tw_token_t tw)
12141213
{
1214+
unsigned int issue_flags = IO_URING_CMD_TASK_WORK_ISSUE_FLAGS;
1215+
struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req);
12151216
struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
12161217
struct fuse_ring_queue *queue = ent->queue;
12171218
int err;
12181219

1219-
if (!(issue_flags & IO_URING_F_TASK_DEAD)) {
1220+
if (!tw.cancel) {
12201221
err = fuse_uring_prepare_send(ent, ent->fuse_req);
12211222
if (err) {
12221223
fuse_uring_next_fuse_req(ent, queue, issue_flags);

include/linux/io_uring/cmd.h

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,13 @@
1111
/* io_uring_cmd is being issued again */
1212
#define IORING_URING_CMD_REISSUE (1U << 31)
1313

14-
typedef void (*io_uring_cmd_tw_t)(struct io_uring_cmd *cmd,
15-
unsigned issue_flags);
16-
1714
struct io_uring_cmd {
1815
struct file *file;
1916
const struct io_uring_sqe *sqe;
20-
/* callback to defer completions to task context */
21-
io_uring_cmd_tw_t task_work_cb;
2217
u32 cmd_op;
2318
u32 flags;
2419
u8 pdu[32]; /* available inline for free use */
20+
u8 unused[8];
2521
};
2622

2723
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
@@ -60,7 +56,7 @@ void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret, u64 res2,
6056
unsigned issue_flags, bool is_cqe32);
6157

6258
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
63-
io_uring_cmd_tw_t task_work_cb,
59+
io_req_tw_func_t task_work_cb,
6460
unsigned flags);
6561

6662
/*
@@ -109,7 +105,7 @@ static inline void __io_uring_cmd_done(struct io_uring_cmd *cmd, s32 ret,
109105
{
110106
}
111107
static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
112-
io_uring_cmd_tw_t task_work_cb, unsigned flags)
108+
io_req_tw_func_t task_work_cb, unsigned flags)
113109
{
114110
}
115111
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
@@ -132,15 +128,23 @@ static inline bool io_uring_mshot_cmd_post_cqe(struct io_uring_cmd *ioucmd,
132128
}
133129
#endif
134130

131+
static inline struct io_uring_cmd *io_uring_cmd_from_tw(struct io_tw_req tw_req)
132+
{
133+
return io_kiocb_to_cmd(tw_req.req, struct io_uring_cmd);
134+
}
135+
136+
/* task_work executor checks the deferred list completion */
137+
#define IO_URING_CMD_TASK_WORK_ISSUE_FLAGS IO_URING_F_COMPLETE_DEFER
138+
135139
/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
136140
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
137-
io_uring_cmd_tw_t task_work_cb)
141+
io_req_tw_func_t task_work_cb)
138142
{
139143
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
140144
}
141145

142146
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
143-
io_uring_cmd_tw_t task_work_cb)
147+
io_req_tw_func_t task_work_cb)
144148
{
145149
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
146150
}

include/linux/io_uring_types.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ enum io_uring_cmd_flags {
3939
/* set when uring wants to cancel a previously issued command */
4040
IO_URING_F_CANCEL = (1 << 11),
4141
IO_URING_F_COMPAT = (1 << 12),
42-
IO_URING_F_TASK_DEAD = (1 << 13),
4342
};
4443

4544
struct io_wq_work_node {
@@ -328,8 +327,8 @@ struct io_ring_ctx {
328327

329328
/*
330329
* Modifications are protected by ->uring_lock and ->mmap_lock.
331-
* The flags, buf_pages and buf_nr_pages fields should be stable
332-
* once published.
330+
* The buffer list's io mapped region should be stable once
331+
* published.
333332
*/
334333
struct xarray io_bl_xa;
335334

@@ -474,6 +473,7 @@ struct io_ring_ctx {
474473
* ONLY core io_uring.c should instantiate this struct.
475474
*/
476475
struct io_tw_state {
476+
bool cancel;
477477
};
478478
/* Alias to use in code that doesn't instantiate struct io_tw_state */
479479
typedef struct io_tw_state io_tw_token_t;
@@ -614,7 +614,11 @@ enum {
614614
REQ_F_SQE_COPIED = IO_REQ_FLAG(REQ_F_SQE_COPIED_BIT),
615615
};
616616

617-
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw);
617+
struct io_tw_req {
618+
struct io_kiocb *req;
619+
};
620+
621+
typedef void (*io_req_tw_func_t)(struct io_tw_req tw_req, io_tw_token_t tw);
618622

619623
struct io_task_work {
620624
struct llist_node node;

include/linux/netdevice.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3417,6 +3417,7 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex);
34173417
struct net_device *__dev_get_by_index(struct net *net, int ifindex);
34183418
struct net_device *netdev_get_by_index(struct net *net, int ifindex,
34193419
netdevice_tracker *tracker, gfp_t gfp);
3420+
struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
34203421
struct net_device *netdev_get_by_name(struct net *net, const char *name,
34213422
netdevice_tracker *tracker, gfp_t gfp);
34223423
struct net_device *netdev_get_by_flags_rcu(struct net *net, netdevice_tracker *tracker,

include/linux/socket.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -468,10 +468,10 @@ extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
468468
int addrlen);
469469
extern int __sys_listen(int fd, int backlog);
470470
extern int __sys_listen_socket(struct socket *sock, int backlog);
471+
extern int do_getsockname(struct socket *sock, int peer,
472+
struct sockaddr __user *usockaddr, int __user *usockaddr_len);
471473
extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
472-
int __user *usockaddr_len);
473-
extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
474-
int __user *usockaddr_len);
474+
int __user *usockaddr_len, int peer);
475475
extern int __sys_socketpair(int family, int type, int protocol,
476476
int __user *usockvec);
477477
extern int __sys_shutdown_sock(struct socket *sock, int how);

include/uapi/linux/io_uring.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,12 @@ enum io_uring_sqe_flags_bit {
231231
*/
232232
#define IORING_SETUP_CQE_MIXED (1U << 18)
233233

234+
/*
235+
* Allow both 64b and 128b SQEs. If a 128b SQE is posted, it will have
236+
* a 128b opcode.
237+
*/
238+
#define IORING_SETUP_SQE_MIXED (1U << 19)
239+
234240
enum io_uring_op {
235241
IORING_OP_NOP,
236242
IORING_OP_READV,
@@ -295,6 +301,8 @@ enum io_uring_op {
295301
IORING_OP_READV_FIXED,
296302
IORING_OP_WRITEV_FIXED,
297303
IORING_OP_PIPE,
304+
IORING_OP_NOP128,
305+
IORING_OP_URING_CMD128,
298306

299307
/* this goes last, obviously */
300308
IORING_OP_LAST,
@@ -689,6 +697,9 @@ enum io_uring_register_op {
689697
/* query various aspects of io_uring, see linux/io_uring/query.h */
690698
IORING_REGISTER_QUERY = 35,
691699

700+
/* auxiliary zcrx configuration, see enum zcrx_ctrl_op */
701+
IORING_REGISTER_ZCRX_CTRL = 36,
702+
692703
/* this goes last */
693704
IORING_REGISTER_LAST,
694705

@@ -998,6 +1009,7 @@ enum io_uring_socket_op {
9981009
SOCKET_URING_OP_GETSOCKOPT,
9991010
SOCKET_URING_OP_SETSOCKOPT,
10001011
SOCKET_URING_OP_TX_TIMESTAMP,
1012+
SOCKET_URING_OP_GETSOCKNAME,
10011013
};
10021014

10031015
/*
@@ -1052,6 +1064,10 @@ struct io_uring_zcrx_area_reg {
10521064
__u64 __resv2[2];
10531065
};
10541066

1067+
enum zcrx_reg_flags {
1068+
ZCRX_REG_IMPORT = 1,
1069+
};
1070+
10551071
/*
10561072
* Argument for IORING_REGISTER_ZCRX_IFQ
10571073
*/
@@ -1070,6 +1086,33 @@ struct io_uring_zcrx_ifq_reg {
10701086
__u64 __resv[3];
10711087
};
10721088

1089+
enum zcrx_ctrl_op {
1090+
ZCRX_CTRL_FLUSH_RQ,
1091+
ZCRX_CTRL_EXPORT,
1092+
1093+
__ZCRX_CTRL_LAST,
1094+
};
1095+
1096+
struct zcrx_ctrl_flush_rq {
1097+
__u64 __resv[6];
1098+
};
1099+
1100+
struct zcrx_ctrl_export {
1101+
__u32 zcrx_fd;
1102+
__u32 __resv1[11];
1103+
};
1104+
1105+
struct zcrx_ctrl {
1106+
__u32 zcrx_id;
1107+
__u32 op; /* see enum zcrx_ctrl_op */
1108+
__u64 __resv[2];
1109+
1110+
union {
1111+
struct zcrx_ctrl_export zc_export;
1112+
struct zcrx_ctrl_flush_rq zc_flush;
1113+
};
1114+
};
1115+
10731116
#ifdef __cplusplus
10741117
}
10751118
#endif

0 commit comments

Comments
 (0)