@@ -75,6 +75,7 @@ struct ublk_rq_data {
7575
7676struct ublk_uring_cmd_pdu {
7777 struct ublk_queue * ubq ;
78+ u16 tag ;
7879};
7980
8081/*
@@ -141,14 +142,13 @@ struct ublk_queue {
141142 unsigned int max_io_sz ;
142143 bool force_abort ;
143144 bool timeout ;
145+ bool canceling ;
144146 unsigned short nr_io_ready ; /* how many ios setup */
145147 spinlock_t cancel_lock ;
146148 struct ublk_device * dev ;
147149 struct ublk_io ios [];
148150};
149151
150- #define UBLK_DAEMON_MONITOR_PERIOD (5 * HZ)
151-
152152struct ublk_device {
153153 struct gendisk * ub_disk ;
154154
@@ -179,11 +179,6 @@ struct ublk_device {
179179 unsigned int nr_queues_ready ;
180180 unsigned int nr_privileged_daemon ;
181181
182- /*
183- * Our ubq->daemon may be killed without any notification, so
184- * monitor each queue's daemon periodically
185- */
186- struct delayed_work monitor_work ;
187182 struct work_struct quiesce_work ;
188183 struct work_struct stop_work ;
189184};
@@ -194,10 +189,11 @@ struct ublk_params_header {
194189 __u32 types ;
195190};
196191
192+ static bool ublk_abort_requests (struct ublk_device * ub , struct ublk_queue * ubq );
193+
197194static inline unsigned int ublk_req_build_flags (struct request * req );
198195static inline struct ublksrv_io_desc * ublk_get_iod (struct ublk_queue * ubq ,
199196 int tag );
200-
201197static inline bool ublk_dev_is_user_copy (const struct ublk_device * ub )
202198{
203199 return ub -> dev_info .flags & UBLK_F_USER_COPY ;
@@ -1123,8 +1119,6 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
11231119 blk_mq_requeue_request (rq , false);
11241120 else
11251121 blk_mq_end_request (rq , BLK_STS_IOERR );
1126-
1127- mod_delayed_work (system_wq , & ubq -> dev -> monitor_work , 0 );
11281122}
11291123
11301124static inline void __ublk_rq_task_work (struct request * req ,
@@ -1245,30 +1239,28 @@ static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)
12451239 io = & ubq -> ios [rq -> tag ];
12461240 /*
12471241 * If the check pass, we know that this is a re-issued request aborted
1248- * previously in monitor_work because the ubq_daemon(cmd's task) is
1242+ * previously in cancel fn because the ubq_daemon(cmd's task) is
12491243 * PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
12501244 * because this ioucmd's io_uring context may be freed now if no inflight
12511245 * ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
12521246 *
1253- * Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
1247+ * Note: cancel fn sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
12541248 * the tag). Then the request is re-started(allocating the tag) and we are here.
12551249 * Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
12561250 * guarantees that here is a re-issued request aborted previously.
12571251 */
12581252 if (unlikely (io -> flags & UBLK_IO_FLAG_ABORTED )) {
12591253 ublk_abort_io_cmds (ubq );
12601254 } else {
1261- struct io_uring_cmd * cmd = io -> cmd ;
1262- struct ublk_uring_cmd_pdu * pdu = ublk_get_uring_cmd_pdu (cmd );
1263-
1264- pdu -> ubq = ubq ;
1265- io_uring_cmd_complete_in_task (cmd , ublk_rq_task_work_cb );
1255+ io_uring_cmd_complete_in_task (io -> cmd , ublk_rq_task_work_cb );
12661256 }
12671257}
12681258
12691259static enum blk_eh_timer_return ublk_timeout (struct request * rq )
12701260{
12711261 struct ublk_queue * ubq = rq -> mq_hctx -> driver_data ;
1262+ unsigned int nr_inflight = 0 ;
1263+ int i ;
12721264
12731265 if (ubq -> flags & UBLK_F_UNPRIVILEGED_DEV ) {
12741266 if (!ubq -> timeout ) {
@@ -1279,6 +1271,29 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq)
12791271 return BLK_EH_DONE ;
12801272 }
12811273
1274+ if (!ubq_daemon_is_dying (ubq ))
1275+ return BLK_EH_RESET_TIMER ;
1276+
1277+ for (i = 0 ; i < ubq -> q_depth ; i ++ ) {
1278+ struct ublk_io * io = & ubq -> ios [i ];
1279+
1280+ if (!(io -> flags & UBLK_IO_FLAG_ACTIVE ))
1281+ nr_inflight ++ ;
1282+ }
1283+
1284+ /* cancelable uring_cmd can't help us if all commands are in-flight */
1285+ if (nr_inflight == ubq -> q_depth ) {
1286+ struct ublk_device * ub = ubq -> dev ;
1287+
1288+ if (ublk_abort_requests (ub , ubq )) {
1289+ if (ublk_can_use_recovery (ub ))
1290+ schedule_work (& ub -> quiesce_work );
1291+ else
1292+ schedule_work (& ub -> stop_work );
1293+ }
1294+ return BLK_EH_DONE ;
1295+ }
1296+
12821297 return BLK_EH_RESET_TIMER ;
12831298}
12841299
@@ -1308,7 +1323,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
13081323
13091324 blk_mq_start_request (bd -> rq );
13101325
1311- if (unlikely (ubq_daemon_is_dying ( ubq ) )) {
1326+ if (unlikely (ubq -> canceling )) {
13121327 __ublk_abort_rq (ubq , rq );
13131328 return BLK_STS_OK ;
13141329 }
@@ -1416,9 +1431,9 @@ static void ublk_commit_completion(struct ublk_device *ub,
14161431}
14171432
14181433/*
1419- * When -> ubq_daemon is exiting, either new request is ended immediately,
1420- * or any queued io command is drained , so it is safe to abort queue
1421- * lockless
1434+ * Called from ubq_daemon context via cancel fn, meantime quiesce ublk
1435+ * blk-mq queue , so we are called exclusively with blk-mq and ubq_daemon
1436+ * context, so everything is serialized.
14221437 */
14231438static void ublk_abort_queue (struct ublk_device * ub , struct ublk_queue * ubq )
14241439{
@@ -1441,10 +1456,17 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
14411456 }
14421457}
14431458
1444- static bool ublk_abort_requests (struct ublk_device * ub )
1459+ static bool ublk_abort_requests (struct ublk_device * ub , struct ublk_queue * ubq )
14451460{
14461461 struct gendisk * disk ;
1447- int i ;
1462+
1463+ spin_lock (& ubq -> cancel_lock );
1464+ if (ubq -> canceling ) {
1465+ spin_unlock (& ubq -> cancel_lock );
1466+ return false;
1467+ }
1468+ ubq -> canceling = true;
1469+ spin_unlock (& ubq -> cancel_lock );
14481470
14491471 spin_lock (& ub -> lock );
14501472 disk = ub -> ub_disk ;
@@ -1458,53 +1480,69 @@ static bool ublk_abort_requests(struct ublk_device *ub)
14581480
14591481 /* Now we are serialized with ublk_queue_rq() */
14601482 blk_mq_quiesce_queue (disk -> queue );
1461- for (i = 0 ; i < ub -> dev_info .nr_hw_queues ; i ++ ) {
1462- struct ublk_queue * ubq = ublk_get_queue (ub , i );
1463-
1464- if (ubq_daemon_is_dying (ubq )) {
1465- /* abort queue is for making forward progress */
1466- ublk_abort_queue (ub , ubq );
1467- }
1468- }
1483+ /* abort queue is for making forward progress */
1484+ ublk_abort_queue (ub , ubq );
14691485 blk_mq_unquiesce_queue (disk -> queue );
14701486 put_device (disk_to_dev (disk ));
14711487
14721488 return true;
14731489}
14741490
1475- static void ublk_daemon_monitor_work (struct work_struct * work )
1491+ static void ublk_cancel_cmd (struct ublk_queue * ubq , struct ublk_io * io ,
1492+ unsigned int issue_flags )
14761493{
1477- struct ublk_device * ub =
1478- container_of (work , struct ublk_device , monitor_work .work );
1479- int i ;
1494+ bool done ;
14801495
1481- for ( i = 0 ; i < ub -> dev_info . nr_hw_queues ; i ++ ) {
1482- struct ublk_queue * ubq = ublk_get_queue ( ub , i ) ;
1496+ if (!( io -> flags & UBLK_IO_FLAG_ACTIVE ))
1497+ return ;
14831498
1484- if (ubq_daemon_is_dying (ubq ))
1485- goto found ;
1486- }
1487- return ;
1499+ spin_lock (& ubq -> cancel_lock );
1500+ done = !!(io -> flags & UBLK_IO_FLAG_CANCELED );
1501+ if (!done )
1502+ io -> flags |= UBLK_IO_FLAG_CANCELED ;
1503+ spin_unlock (& ubq -> cancel_lock );
1504+
1505+ if (!done )
1506+ io_uring_cmd_done (io -> cmd , UBLK_IO_RES_ABORT , 0 , issue_flags );
1507+ }
1508+
1509+ /*
1510+ * The ublk char device won't be closed when calling cancel fn, so both
1511+ * ublk device and queue are guaranteed to be live
1512+ */
1513+ static void ublk_uring_cmd_cancel_fn (struct io_uring_cmd * cmd ,
1514+ unsigned int issue_flags )
1515+ {
1516+ struct ublk_uring_cmd_pdu * pdu = ublk_get_uring_cmd_pdu (cmd );
1517+ struct ublk_queue * ubq = pdu -> ubq ;
1518+ struct task_struct * task ;
1519+ struct ublk_device * ub ;
1520+ bool need_schedule ;
1521+ struct ublk_io * io ;
14881522
1489- found :
1490- if (!ublk_abort_requests (ub ))
1523+ if (WARN_ON_ONCE (!ubq ))
14911524 return ;
14921525
1493- if (ublk_can_use_recovery (ub ))
1494- schedule_work (& ub -> quiesce_work );
1495- else
1496- schedule_work (& ub -> stop_work );
1526+ if (WARN_ON_ONCE (pdu -> tag >= ubq -> q_depth ))
1527+ return ;
14971528
1498- /*
1499- * We can't schedule monitor work after ub's state is not UBLK_S_DEV_LIVE.
1500- * after ublk_remove() or __ublk_quiesce_dev() is started.
1501- *
1502- * No need ub->mutex, monitor work are canceled after state is marked
1503- * as not LIVE, so new state is observed reliably.
1504- */
1505- if (ub -> dev_info .state == UBLK_S_DEV_LIVE )
1506- schedule_delayed_work (& ub -> monitor_work ,
1507- UBLK_DAEMON_MONITOR_PERIOD );
1529+ task = io_uring_cmd_get_task (cmd );
1530+ if (WARN_ON_ONCE (task && task != ubq -> ubq_daemon ))
1531+ return ;
1532+
1533+ ub = ubq -> dev ;
1534+ need_schedule = ublk_abort_requests (ub , ubq );
1535+
1536+ io = & ubq -> ios [pdu -> tag ];
1537+ WARN_ON_ONCE (io -> cmd != cmd );
1538+ ublk_cancel_cmd (ubq , & ubq -> ios [pdu -> tag ], issue_flags );
1539+
1540+ if (need_schedule ) {
1541+ if (ublk_can_use_recovery (ub ))
1542+ schedule_work (& ub -> quiesce_work );
1543+ else
1544+ schedule_work (& ub -> stop_work );
1545+ }
15081546}
15091547
15101548static inline bool ublk_queue_ready (struct ublk_queue * ubq )
@@ -1516,24 +1554,8 @@ static void ublk_cancel_queue(struct ublk_queue *ubq)
15161554{
15171555 int i ;
15181556
1519- for (i = 0 ; i < ubq -> q_depth ; i ++ ) {
1520- struct ublk_io * io = & ubq -> ios [i ];
1521-
1522- if (io -> flags & UBLK_IO_FLAG_ACTIVE ) {
1523- bool done ;
1524-
1525- spin_lock (& ubq -> cancel_lock );
1526- done = !!(io -> flags & UBLK_IO_FLAG_CANCELED );
1527- if (!done )
1528- io -> flags |= UBLK_IO_FLAG_CANCELED ;
1529- spin_unlock (& ubq -> cancel_lock );
1530-
1531- if (!done )
1532- io_uring_cmd_done (io -> cmd ,
1533- UBLK_IO_RES_ABORT , 0 ,
1534- IO_URING_F_UNLOCKED );
1535- }
1536- }
1557+ for (i = 0 ; i < ubq -> q_depth ; i ++ )
1558+ ublk_cancel_cmd (ubq , & ubq -> ios [i ], IO_URING_F_UNLOCKED );
15371559}
15381560
15391561/* Cancel all pending commands, must be called after del_gendisk() returns */
@@ -1580,15 +1602,6 @@ static void __ublk_quiesce_dev(struct ublk_device *ub)
15801602 blk_mq_quiesce_queue (ub -> ub_disk -> queue );
15811603 ublk_wait_tagset_rqs_idle (ub );
15821604 ub -> dev_info .state = UBLK_S_DEV_QUIESCED ;
1583- /* we are going to release task_struct of ubq_daemon and resets
1584- * ->ubq_daemon to NULL. So in monitor_work, check on ubq_daemon causes UAF.
1585- * Besides, monitor_work is not necessary in QUIESCED state since we have
1586- * already scheduled quiesce_work and quiesced all ubqs.
1587- *
1588- * Do not let monitor_work schedule itself if state it QUIESCED. And we cancel
1589- * it here and re-schedule it in END_USER_RECOVERY to avoid UAF.
1590- */
1591- cancel_delayed_work_sync (& ub -> monitor_work );
15921605}
15931606
15941607static void ublk_quiesce_work_fn (struct work_struct * work )
@@ -1651,7 +1664,6 @@ static void ublk_stop_dev(struct ublk_device *ub)
16511664 unlock :
16521665 mutex_unlock (& ub -> mutex );
16531666 ublk_cancel_dev (ub );
1654- cancel_delayed_work_sync (& ub -> monitor_work );
16551667}
16561668
16571669/* device can only be started after all IOs are ready */
@@ -1702,6 +1714,21 @@ static inline void ublk_fill_io_cmd(struct ublk_io *io,
17021714 io -> addr = buf_addr ;
17031715}
17041716
1717+ static inline void ublk_prep_cancel (struct io_uring_cmd * cmd ,
1718+ unsigned int issue_flags ,
1719+ struct ublk_queue * ubq , unsigned int tag )
1720+ {
1721+ struct ublk_uring_cmd_pdu * pdu = ublk_get_uring_cmd_pdu (cmd );
1722+
1723+ /*
1724+ * Safe to refer to @ubq since ublk_queue won't be died until its
1725+ * commands are completed
1726+ */
1727+ pdu -> ubq = ubq ;
1728+ pdu -> tag = tag ;
1729+ io_uring_cmd_mark_cancelable (cmd , issue_flags );
1730+ }
1731+
17051732static int __ublk_ch_uring_cmd (struct io_uring_cmd * cmd ,
17061733 unsigned int issue_flags ,
17071734 const struct ublksrv_io_cmd * ub_cmd )
@@ -1817,6 +1844,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
18171844 default :
18181845 goto out ;
18191846 }
1847+ ublk_prep_cancel (cmd , issue_flags , ubq , tag );
18201848 return - EIOCBQUEUED ;
18211849
18221850 out :
@@ -1884,6 +1912,11 @@ static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd,
18841912
18851913static int ublk_ch_uring_cmd (struct io_uring_cmd * cmd , unsigned int issue_flags )
18861914{
1915+ if (unlikely (issue_flags & IO_URING_F_CANCEL )) {
1916+ ublk_uring_cmd_cancel_fn (cmd , issue_flags );
1917+ return 0 ;
1918+ }
1919+
18871920 /* well-implemented server won't run into unlocked */
18881921 if (unlikely (issue_flags & IO_URING_F_UNLOCKED )) {
18891922 io_uring_cmd_complete_in_task (cmd , ublk_ch_uring_cmd_cb );
@@ -2215,8 +2248,6 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
22152248 if (wait_for_completion_interruptible (& ub -> completion ) != 0 )
22162249 return - EINTR ;
22172250
2218- schedule_delayed_work (& ub -> monitor_work , UBLK_DAEMON_MONITOR_PERIOD );
2219-
22202251 mutex_lock (& ub -> mutex );
22212252 if (ub -> dev_info .state == UBLK_S_DEV_LIVE ||
22222253 test_bit (UB_STATE_USED , & ub -> state )) {
@@ -2393,7 +2424,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
23932424 spin_lock_init (& ub -> lock );
23942425 INIT_WORK (& ub -> quiesce_work , ublk_quiesce_work_fn );
23952426 INIT_WORK (& ub -> stop_work , ublk_stop_work_fn );
2396- INIT_DELAYED_WORK (& ub -> monitor_work , ublk_daemon_monitor_work );
23972427
23982428 ret = ublk_alloc_dev_number (ub , header -> dev_id );
23992429 if (ret < 0 )
@@ -2647,6 +2677,7 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
26472677 /* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
26482678 ubq -> ubq_daemon = NULL ;
26492679 ubq -> timeout = false;
2680+ ubq -> canceling = false;
26502681
26512682 for (i = 0 ; i < ubq -> q_depth ; i ++ ) {
26522683 struct ublk_io * io = & ubq -> ios [i ];
@@ -2732,7 +2763,6 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
27322763 __func__ , header -> dev_id );
27332764 blk_mq_kick_requeue_list (ub -> ub_disk -> queue );
27342765 ub -> dev_info .state = UBLK_S_DEV_LIVE ;
2735- schedule_delayed_work (& ub -> monitor_work , UBLK_DAEMON_MONITOR_PERIOD );
27362766 ret = 0 ;
27372767 out_unlock :
27382768 mutex_unlock (& ub -> mutex );
0 commit comments