Skip to content

Commit b1f8ccd

Browse files
committed
Merge tag 'for-5.18/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Significant refactoring and fixing of how DM core does bio-based IO accounting with focus on fixing wildly inaccurate IO stats for dm-crypt (and other DM targets that defer bio submission in their own workqueues). End result is proper IO accounting, made possible by targets being updated to use the new dm_submit_bio_remap() interface. - Add hipri bio polling support (REQ_POLLED) to bio-based DM. - Reduce dm_io and dm_target_io structs so that a single dm_io (which contains dm_target_io and first clone bio) weighs in at 256 bytes. For reference the bio struct is 128 bytes. - Various other small cleanups, fixes or improvements in DM core and targets. - Update MAINTAINERS with my kernel.org email address to allow distinction between my "upstream" and "Red" Hats. * tag 'for-5.18/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (46 commits) dm: consolidate spinlocks in dm_io struct dm: reduce size of dm_io and dm_target_io structs dm: switch dm_target_io booleans over to proper flags dm: switch dm_io booleans over to proper flags dm: update email address in MAINTAINERS dm: return void from __send_empty_flush dm: factor out dm_io_complete dm cache: use dm_submit_bio_remap dm: simplify dm_sumbit_bio_remap interface dm thin: use dm_submit_bio_remap dm: add WARN_ON_ONCE to dm_submit_bio_remap dm: support bio polling block: add ->poll_bio to block_device_operations dm mpath: use DMINFO instead of printk with KERN_INFO dm: stop using bdevname dm-zoned: remove the ->name field in struct dmz_dev dm: remove unnecessary local variables in __bind dm: requeue IO if mapping table not yet available dm io: remove stale comment block for dm_io() dm thin metadata: remove unused dm_thin_remove_block and __remove ...
2 parents 2dacc1e + 4d7bca1 commit b1f8ccd

27 files changed

Lines changed: 685 additions & 361 deletions

MAINTAINERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5605,7 +5605,7 @@ F: include/linux/devm-helpers.h
56055605

56065606
DEVICE-MAPPER (LVM)
56075607
M: Alasdair Kergon <agk@redhat.com>
5608-
M: Mike Snitzer <snitzer@redhat.com>
5608+
M: Mike Snitzer <snitzer@kernel.org>
56095609
M: dm-devel@redhat.com
56105610
L: dm-devel@redhat.com
56115611
S: Maintained

block/blk-core.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ static void __submit_bio(struct bio *bio)
688688
*
689689
* bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
690690
* bio_list_on_stack[1] contains bios that were submitted before the current
691-
* ->submit_bio_bio, but that haven't been processed yet.
691+
* ->submit_bio, but that haven't been processed yet.
692692
*/
693693
static void __submit_bio_noacct(struct bio *bio)
694694
{
@@ -955,7 +955,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
955955
{
956956
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
957957
blk_qc_t cookie = READ_ONCE(bio->bi_cookie);
958-
int ret;
958+
int ret = 0;
959959

960960
if (cookie == BLK_QC_T_NONE ||
961961
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
@@ -965,10 +965,14 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
965965

966966
if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
967967
return 0;
968-
if (WARN_ON_ONCE(!queue_is_mq(q)))
969-
ret = 0; /* not yet implemented, should not happen */
970-
else
968+
if (queue_is_mq(q)) {
971969
ret = blk_mq_poll(q, cookie, iob, flags);
970+
} else {
971+
struct gendisk *disk = q->disk;
972+
973+
if (disk && disk->fops->poll_bio)
974+
ret = disk->fops->poll_bio(bio, iob, flags);
975+
}
972976
blk_queue_exit(q);
973977
return ret;
974978
}

block/genhd.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,10 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
412412
struct device *ddev = disk_to_dev(disk);
413413
int ret;
414414

415+
/* Only makes sense for bio-based to set ->poll_bio */
416+
if (queue_is_mq(disk->queue) && disk->fops->poll_bio)
417+
return -EINVAL;
418+
415419
/*
416420
* The disk queue should now be all set with enough information about
417421
* the device for the elevator code to pick an adequate default

drivers/md/dm-cache-policy-smq.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1026,7 +1026,9 @@ static unsigned default_promote_level(struct smq_policy *mq)
10261026
* This scheme reminds me of a graph of entropy vs probability of a
10271027
* binary variable.
10281028
*/
1029-
static unsigned table[] = {1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1};
1029+
static const unsigned int table[] = {
1030+
1, 1, 1, 2, 4, 6, 7, 8, 7, 6, 4, 4, 3, 3, 2, 2, 1
1031+
};
10301032

10311033
unsigned hits = mq->cache_stats.hits;
10321034
unsigned misses = mq->cache_stats.misses;

drivers/md/dm-cache-target.c

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -803,7 +803,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio)
803803
static void accounted_request(struct cache *cache, struct bio *bio)
804804
{
805805
accounted_begin(cache, bio);
806-
submit_bio_noacct(bio);
806+
dm_submit_bio_remap(bio, NULL);
807807
}
808808

809809
static void issue_op(struct bio *bio, void *context)
@@ -1708,7 +1708,7 @@ static bool process_bio(struct cache *cache, struct bio *bio)
17081708
bool commit_needed;
17091709

17101710
if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
1711-
submit_bio_noacct(bio);
1711+
dm_submit_bio_remap(bio, NULL);
17121712

17131713
return commit_needed;
17141714
}
@@ -1774,7 +1774,7 @@ static bool process_discard_bio(struct cache *cache, struct bio *bio)
17741774

17751775
if (cache->features.discard_passdown) {
17761776
remap_to_origin(cache, bio);
1777-
submit_bio_noacct(bio);
1777+
dm_submit_bio_remap(bio, NULL);
17781778
} else
17791779
bio_endio(bio);
17801780

@@ -2015,7 +2015,6 @@ static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
20152015
{
20162016
int r;
20172017
sector_t metadata_dev_size;
2018-
char b[BDEVNAME_SIZE];
20192018

20202019
if (!at_least_one_arg(as, error))
20212020
return -EINVAL;
@@ -2029,8 +2028,8 @@ static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
20292028

20302029
metadata_dev_size = get_dev_size(ca->metadata_dev);
20312030
if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2032-
DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2033-
bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2031+
DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.",
2032+
ca->metadata_dev->bdev, THIN_METADATA_MAX_SECTORS);
20342033

20352034
return 0;
20362035
}
@@ -2357,6 +2356,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
23572356

23582357
cache->ti = ca->ti;
23592358
ti->private = cache;
2359+
ti->accounts_remapped_io = true;
23602360
ti->num_flush_bios = 2;
23612361
ti->flush_supported = true;
23622362

@@ -3345,7 +3345,6 @@ static void disable_passdown_if_not_supported(struct cache *cache)
33453345
struct block_device *origin_bdev = cache->origin_dev->bdev;
33463346
struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
33473347
const char *reason = NULL;
3348-
char buf[BDEVNAME_SIZE];
33493348

33503349
if (!cache->features.discard_passdown)
33513350
return;
@@ -3357,8 +3356,8 @@ static void disable_passdown_if_not_supported(struct cache *cache)
33573356
reason = "max discard sectors smaller than a block";
33583357

33593358
if (reason) {
3360-
DMWARN("Origin device (%s) %s: Disabling discard passdown.",
3361-
bdevname(origin_bdev, buf), reason);
3359+
DMWARN("Origin device (%pg) %s: Disabling discard passdown.",
3360+
origin_bdev, reason);
33623361
cache->features.discard_passdown = false;
33633362
}
33643363
}

drivers/md/dm-clone-target.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1682,7 +1682,6 @@ static int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char *
16821682
{
16831683
int r;
16841684
sector_t metadata_dev_size;
1685-
char b[BDEVNAME_SIZE];
16861685

16871686
r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
16881687
&clone->metadata_dev);
@@ -1693,8 +1692,8 @@ static int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char *
16931692

16941693
metadata_dev_size = get_dev_size(clone->metadata_dev);
16951694
if (metadata_dev_size > DM_CLONE_METADATA_MAX_SECTORS_WARNING)
1696-
DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
1697-
bdevname(clone->metadata_dev->bdev, b), DM_CLONE_METADATA_MAX_SECTORS);
1695+
DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.",
1696+
clone->metadata_dev->bdev, DM_CLONE_METADATA_MAX_SECTORS);
16981697

16991698
return 0;
17001699
}
@@ -2033,7 +2032,6 @@ static void disable_passdown_if_not_supported(struct clone *clone)
20332032
struct block_device *dest_dev = clone->dest_dev->bdev;
20342033
struct queue_limits *dest_limits = &bdev_get_queue(dest_dev)->limits;
20352034
const char *reason = NULL;
2036-
char buf[BDEVNAME_SIZE];
20372035

20382036
if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
20392037
return;
@@ -2044,8 +2042,8 @@ static void disable_passdown_if_not_supported(struct clone *clone)
20442042
reason = "max discard sectors smaller than a region";
20452043

20462044
if (reason) {
2047-
DMWARN("Destination device (%s) %s: Disabling discard passdown.",
2048-
bdevname(dest_dev, buf), reason);
2045+
DMWARN("Destination device (%pd) %s: Disabling discard passdown.",
2046+
dest_dev, reason);
20492047
clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
20502048
}
20512049
}

drivers/md/dm-core.h

Lines changed: 71 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,21 @@ struct mapped_device {
6464
struct gendisk *disk;
6565
struct dax_device *dax_dev;
6666

67+
wait_queue_head_t wait;
68+
unsigned long __percpu *pending_io;
69+
70+
/* forced geometry settings */
71+
struct hd_geometry geometry;
72+
73+
/*
74+
* Processing queue (flush)
75+
*/
76+
struct workqueue_struct *wq;
77+
6778
/*
6879
* A list of ios that arrived while we were suspended.
6980
*/
7081
struct work_struct work;
71-
wait_queue_head_t wait;
7282
spinlock_t deferred_lock;
7383
struct bio_list deferred;
7484

@@ -83,36 +93,28 @@ struct mapped_device {
8393
struct list_head uevent_list;
8494
spinlock_t uevent_lock; /* Protect access to uevent_list */
8595

96+
/* for blk-mq request-based DM support */
97+
bool init_tio_pdu:1;
98+
struct blk_mq_tag_set *tag_set;
99+
100+
struct dm_stats stats;
101+
86102
/* the number of internal suspends */
87103
unsigned internal_suspend_count;
88104

105+
int swap_bios;
106+
struct semaphore swap_bios_semaphore;
107+
struct mutex swap_bios_lock;
108+
89109
/*
90110
* io objects are allocated from here.
91111
*/
92112
struct bio_set io_bs;
93113
struct bio_set bs;
94114

95-
/*
96-
* Processing queue (flush)
97-
*/
98-
struct workqueue_struct *wq;
99-
100-
/* forced geometry settings */
101-
struct hd_geometry geometry;
102-
103115
/* kobject and completion */
104116
struct dm_kobject_holder kobj_holder;
105117

106-
int swap_bios;
107-
struct semaphore swap_bios_semaphore;
108-
struct mutex swap_bios_lock;
109-
110-
struct dm_stats stats;
111-
112-
/* for blk-mq request-based DM support */
113-
struct blk_mq_tag_set *tag_set;
114-
bool init_tio_pdu:1;
115-
116118
struct srcu_struct io_barrier;
117119

118120
#ifdef CONFIG_BLK_DEV_ZONED
@@ -206,35 +208,76 @@ struct dm_table {
206208
/*
207209
* One of these is allocated per clone bio.
208210
*/
209-
#define DM_TIO_MAGIC 7282014
211+
#define DM_TIO_MAGIC 28714
210212
struct dm_target_io {
211-
unsigned int magic;
213+
unsigned short magic;
214+
unsigned short flags;
215+
unsigned int target_bio_nr;
212216
struct dm_io *io;
213217
struct dm_target *ti;
214-
unsigned int target_bio_nr;
215218
unsigned int *len_ptr;
216-
bool inside_dm_io;
219+
sector_t old_sector;
217220
struct bio clone;
218221
};
219222

223+
/*
224+
* dm_target_io flags
225+
*/
226+
enum {
227+
DM_TIO_INSIDE_DM_IO,
228+
DM_TIO_IS_DUPLICATE_BIO
229+
};
230+
231+
static inline bool dm_tio_flagged(struct dm_target_io *tio, unsigned int bit)
232+
{
233+
return (tio->flags & (1U << bit)) != 0;
234+
}
235+
236+
static inline void dm_tio_set_flag(struct dm_target_io *tio, unsigned int bit)
237+
{
238+
tio->flags |= (1U << bit);
239+
}
240+
220241
/*
221242
* One of these is allocated per original bio.
222243
* It contains the first clone used for that original.
223244
*/
224-
#define DM_IO_MAGIC 5191977
245+
#define DM_IO_MAGIC 19577
225246
struct dm_io {
226-
unsigned int magic;
227-
struct mapped_device *md;
228-
blk_status_t status;
247+
unsigned short magic;
248+
unsigned short flags;
229249
atomic_t io_count;
250+
struct mapped_device *md;
230251
struct bio *orig_bio;
252+
blk_status_t status;
253+
spinlock_t lock;
231254
unsigned long start_time;
232-
spinlock_t endio_lock;
255+
void *data;
256+
struct hlist_node node;
257+
struct task_struct *map_task;
233258
struct dm_stats_aux stats_aux;
234259
/* last member of dm_target_io is 'struct bio' */
235260
struct dm_target_io tio;
236261
};
237262

263+
/*
264+
* dm_io flags
265+
*/
266+
enum {
267+
DM_IO_START_ACCT,
268+
DM_IO_ACCOUNTED
269+
};
270+
271+
static inline bool dm_io_flagged(struct dm_io *io, unsigned int bit)
272+
{
273+
return (io->flags & (1U << bit)) != 0;
274+
}
275+
276+
static inline void dm_io_set_flag(struct dm_io *io, unsigned int bit)
277+
{
278+
io->flags |= (1U << bit);
279+
}
280+
238281
static inline void dm_io_inc_pending(struct dm_io *io)
239282
{
240283
atomic_inc(&io->io_count);

0 commit comments

Comments
 (0)