4040#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
4141#define DM_COOKIE_LENGTH 24
4242
43+ /*
44+ * For REQ_POLLED fs bio, this flag is set if we link mapped underlying
45+ * dm_io into one list, and reuse bio->bi_private as the list head. Before
46+ * ending this fs bio, we will recover its ->bi_private.
47+ */
48+ #define REQ_DM_POLL_LIST REQ_DRV
49+
4350static const char * _name = DM_NAME ;
4451
4552static unsigned int major = 0 ;
@@ -73,6 +80,7 @@ struct clone_info {
7380 struct dm_io * io ;
7481 sector_t sector ;
7582 unsigned sector_count ;
83+ bool submit_as_polled ;
7684};
7785
7886#define DM_TARGET_IO_BIO_OFFSET (offsetof(struct dm_target_io, clone))
@@ -599,6 +607,9 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
599607 if (!clone )
600608 return NULL ;
601609
610+ /* REQ_DM_POLL_LIST shouldn't be inherited */
611+ clone -> bi_opf &= ~REQ_DM_POLL_LIST ;
612+
602613 tio = clone_to_tio (clone );
603614 tio -> inside_dm_io = false;
604615 }
@@ -888,8 +899,15 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
888899 if (unlikely (wq_has_sleeper (& md -> wait )))
889900 wake_up (& md -> wait );
890901
891- if (io_error == BLK_STS_DM_REQUEUE )
902+ if (io_error == BLK_STS_DM_REQUEUE ) {
903+ /*
904+ * Upper layer won't help us poll split bio, io->orig_bio
905+ * may only reflect a subset of the pre-split original,
906+ * so clear REQ_POLLED in case of requeue
907+ */
908+ bio -> bi_opf &= ~REQ_POLLED ;
892909 return ;
910+ }
893911
894912 if (bio_is_flush_with_data (bio )) {
895913 /*
@@ -1440,6 +1458,47 @@ static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
14401458 return true;
14411459}
14421460
1461+ /*
1462+ * Reuse ->bi_private as hlist head for storing all dm_io instances
1463+ * associated with this bio, and this bio's bi_private needs to be
1464+ * stored in dm_io->data before the reuse.
1465+ *
1466+ * bio->bi_private is owned by fs or upper layer, so block layer won't
1467+ * touch it after splitting. Meantime it won't be changed by anyone after
1468+ * bio is submitted. So this reuse is safe.
1469+ */
1470+ static inline struct hlist_head * dm_get_bio_hlist_head (struct bio * bio )
1471+ {
1472+ return (struct hlist_head * )& bio -> bi_private ;
1473+ }
1474+
1475+ static void dm_queue_poll_io (struct bio * bio , struct dm_io * io )
1476+ {
1477+ struct hlist_head * head = dm_get_bio_hlist_head (bio );
1478+
1479+ if (!(bio -> bi_opf & REQ_DM_POLL_LIST )) {
1480+ bio -> bi_opf |= REQ_DM_POLL_LIST ;
1481+ /*
1482+ * Save .bi_private into dm_io, so that we can reuse
1483+ * .bi_private as hlist head for storing dm_io list
1484+ */
1485+ io -> data = bio -> bi_private ;
1486+
1487+ INIT_HLIST_HEAD (head );
1488+
1489+ /* tell block layer to poll for completion */
1490+ bio -> bi_cookie = ~BLK_QC_T_NONE ;
1491+ } else {
1492+ /*
1493+ * bio recursed due to split, reuse original poll list,
1494+ * and save bio->bi_private too.
1495+ */
1496+ io -> data = hlist_entry (head -> first , struct dm_io , node )-> data ;
1497+ }
1498+
1499+ hlist_add_head (& io -> node , head );
1500+ }
1501+
14431502/*
14441503 * Select the correct strategy for processing a non-flush bio.
14451504 */
@@ -1457,6 +1516,12 @@ static int __split_and_process_bio(struct clone_info *ci)
14571516 if (__process_abnormal_io (ci , ti , & r ))
14581517 return r ;
14591518
1519+ /*
1520+ * Only support bio polling for normal IO, and the target io is
1521+ * exactly inside the dm_io instance (verified in dm_poll_dm_io)
1522+ */
1523+ ci -> submit_as_polled = ci -> bio -> bi_opf & REQ_POLLED ;
1524+
14601525 len = min_t (sector_t , max_io_len (ti , ci -> sector ), ci -> sector_count );
14611526 clone = alloc_tio (ci , ti , 0 , & len , GFP_NOIO );
14621527 __map_bio (clone );
@@ -1473,6 +1538,7 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
14731538 ci -> map = map ;
14741539 ci -> io = alloc_io (md , bio );
14751540 ci -> bio = bio ;
1541+ ci -> submit_as_polled = false;
14761542 ci -> sector = bio -> bi_iter .bi_sector ;
14771543 ci -> sector_count = bio_sectors (bio );
14781544
@@ -1522,8 +1588,17 @@ static void dm_split_and_process_bio(struct mapped_device *md,
15221588 if (ci .io -> start_io_acct )
15231589 dm_start_io_acct (ci .io , NULL );
15241590
1525- /* drop the extra reference count */
1526- dm_io_dec_pending (ci .io , errno_to_blk_status (error ));
1591+ /*
1592+ * Drop the extra reference count for non-POLLED bio, and hold one
1593+ * reference for POLLED bio, which will be released in dm_poll_bio
1594+ *
1595+ * Add every dm_io instance into the hlist_head which is stored in
1596+ * bio->bi_private, so that dm_poll_bio can poll them all.
1597+ */
1598+ if (error || !ci .submit_as_polled )
1599+ dm_io_dec_pending (ci .io , errno_to_blk_status (error ));
1600+ else
1601+ dm_queue_poll_io (bio , ci .io );
15271602}
15281603
15291604static void dm_submit_bio (struct bio * bio )
@@ -1558,6 +1633,67 @@ static void dm_submit_bio(struct bio *bio)
15581633 dm_put_live_table (md , srcu_idx );
15591634}
15601635
1636+ static bool dm_poll_dm_io (struct dm_io * io , struct io_comp_batch * iob ,
1637+ unsigned int flags )
1638+ {
1639+ WARN_ON_ONCE (!io -> tio .inside_dm_io );
1640+
1641+ /* don't poll if the mapped io is done */
1642+ if (atomic_read (& io -> io_count ) > 1 )
1643+ bio_poll (& io -> tio .clone , iob , flags );
1644+
1645+ /* bio_poll holds the last reference */
1646+ return atomic_read (& io -> io_count ) == 1 ;
1647+ }
1648+
1649+ static int dm_poll_bio (struct bio * bio , struct io_comp_batch * iob ,
1650+ unsigned int flags )
1651+ {
1652+ struct hlist_head * head = dm_get_bio_hlist_head (bio );
1653+ struct hlist_head tmp = HLIST_HEAD_INIT ;
1654+ struct hlist_node * next ;
1655+ struct dm_io * io ;
1656+
1657+ /* Only poll normal bio which was marked as REQ_DM_POLL_LIST */
1658+ if (!(bio -> bi_opf & REQ_DM_POLL_LIST ))
1659+ return 0 ;
1660+
1661+ WARN_ON_ONCE (hlist_empty (head ));
1662+
1663+ hlist_move_list (head , & tmp );
1664+
1665+ /*
1666+ * Restore .bi_private before possibly completing dm_io.
1667+ *
1668+ * bio_poll() is only possible once @bio has been completely
1669+ * submitted via submit_bio_noacct()'s depth-first submission.
1670+ * So there is no dm_queue_poll_io() race associated with
1671+ * clearing REQ_DM_POLL_LIST here.
1672+ */
1673+ bio -> bi_opf &= ~REQ_DM_POLL_LIST ;
1674+ bio -> bi_private = hlist_entry (tmp .first , struct dm_io , node )-> data ;
1675+
1676+ hlist_for_each_entry_safe (io , next , & tmp , node ) {
1677+ if (dm_poll_dm_io (io , iob , flags )) {
1678+ hlist_del_init (& io -> node );
1679+ /*
1680+ * clone_endio() has already occurred, so passing
1681+ * error as 0 here doesn't override io->status
1682+ */
1683+ dm_io_dec_pending (io , 0 );
1684+ }
1685+ }
1686+
1687+ /* Not done? */
1688+ if (!hlist_empty (& tmp )) {
1689+ bio -> bi_opf |= REQ_DM_POLL_LIST ;
1690+ /* Reset bio->bi_private to dm_io list head */
1691+ hlist_move_list (& tmp , head );
1692+ return 0 ;
1693+ }
1694+ return 1 ;
1695+ }
1696+
15611697/*-----------------------------------------------------------------
15621698 * An IDR is used to keep track of allocated minor numbers.
15631699 *---------------------------------------------------------------*/
@@ -2983,6 +3119,7 @@ static const struct pr_ops dm_pr_ops = {
29833119
29843120static const struct block_device_operations dm_blk_dops = {
29853121 .submit_bio = dm_submit_bio ,
3122+ .poll_bio = dm_poll_bio ,
29863123 .open = dm_blk_open ,
29873124 .release = dm_blk_close ,
29883125 .ioctl = dm_blk_ioctl ,
0 commit comments