Skip to content

Commit 935a20d

Browse files
bvanasscheaxboe
authored andcommitted
block: Remove queue freezing from several sysfs store callbacks
Freezing the request queue from inside sysfs store callbacks may cause a deadlock in combination with the dm-multipath driver and the queue_if_no_path option. Additionally, freezing the request queue slows down system boot on systems where sysfs attributes are set synchronously. Fix this by removing the blk_mq_freeze_queue() / blk_mq_unfreeze_queue() calls from the store callbacks that do not strictly need these callbacks. Add the __data_racy annotation to request_queue.rq_timeout to suppress KCSAN data race reports about the rq_timeout reads. This patch may cause a small delay in applying the new settings. For all the attributes affected by this patch, I/O will complete correctly whether the old or the new value of the attribute is used. This patch affects the following sysfs attributes: * io_poll_delay * io_timeout * nomerges * read_ahead_kb * rq_affinity Here is an example of a deadlock triggered by running test srp/002 if this patch is not applied: task:multipathd Call Trace: <TASK> __schedule+0x8c1/0x1bf0 schedule+0xdd/0x270 schedule_preempt_disabled+0x1c/0x30 __mutex_lock+0xb89/0x1650 mutex_lock_nested+0x1f/0x30 dm_table_set_restrictions+0x823/0xdf0 __bind+0x166/0x590 dm_swap_table+0x2a7/0x490 do_resume+0x1b1/0x610 dev_suspend+0x55/0x1a0 ctl_ioctl+0x3a5/0x7e0 dm_ctl_ioctl+0x12/0x20 __x64_sys_ioctl+0x127/0x1a0 x64_sys_call+0xe2b/0x17d0 do_syscall_64+0x96/0x3a0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 </TASK> task:(udev-worker) Call Trace: <TASK> __schedule+0x8c1/0x1bf0 schedule+0xdd/0x270 blk_mq_freeze_queue_wait+0xf2/0x140 blk_mq_freeze_queue_nomemsave+0x23/0x30 queue_ra_store+0x14e/0x290 queue_attr_store+0x23e/0x2c0 sysfs_kf_write+0xde/0x140 kernfs_fop_write_iter+0x3b2/0x630 vfs_write+0x4fd/0x1390 ksys_write+0xfd/0x230 __x64_sys_write+0x76/0xc0 x64_sys_call+0x276/0x17d0 do_syscall_64+0x96/0x3a0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 </TASK> Cc: Christoph Hellwig <hch@lst.de> Cc: Ming Lei <ming.lei@redhat.com> Cc: Nilay Shroff <nilay@linux.ibm.com> Cc: Martin Wilck <mwilck@suse.com> Cc: Benjamin Marzinski <bmarzins@redhat.com> Cc: stable@vger.kernel.org Fixes: af28141 ("block: freeze the queue in queue_attr_store") Signed-off-by: Bart Van Assche <bvanassche@acm.org> Reviewed-by: Nilay Shroff <nilay@linux.ibm.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 42adb2d commit 935a20d

2 files changed

Lines changed: 9 additions & 19 deletions

File tree

block/blk-sysfs.c

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -143,21 +143,22 @@ queue_ra_store(struct gendisk *disk, const char *page, size_t count)
143143
{
144144
unsigned long ra_kb;
145145
ssize_t ret;
146-
unsigned int memflags;
147146
struct request_queue *q = disk->queue;
148147

149148
ret = queue_var_store(&ra_kb, page, count);
150149
if (ret < 0)
151150
return ret;
152151
/*
153-
* ->ra_pages is protected by ->limits_lock because it is usually
154-
* calculated from the queue limits by queue_limits_commit_update.
152+
* The ->ra_pages change below is protected by ->limits_lock because it
153+
* is usually calculated from the queue limits by
154+
* queue_limits_commit_update().
155+
*
156+
* bdi->ra_pages reads are not serialized against bdi->ra_pages writes.
157+
* Use WRITE_ONCE() to write bdi->ra_pages once.
155158
*/
156159
mutex_lock(&q->limits_lock);
157-
memflags = blk_mq_freeze_queue(q);
158-
disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
160+
WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10));
159161
mutex_unlock(&q->limits_lock);
160-
blk_mq_unfreeze_queue(q, memflags);
161162

162163
return ret;
163164
}
@@ -375,21 +376,18 @@ static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
375376
size_t count)
376377
{
377378
unsigned long nm;
378-
unsigned int memflags;
379379
struct request_queue *q = disk->queue;
380380
ssize_t ret = queue_var_store(&nm, page, count);
381381

382382
if (ret < 0)
383383
return ret;
384384

385-
memflags = blk_mq_freeze_queue(q);
386385
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
387386
blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
388387
if (nm == 2)
389388
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
390389
else if (nm)
391390
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
392-
blk_mq_unfreeze_queue(q, memflags);
393391

394392
return ret;
395393
}
@@ -409,7 +407,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
409407
#ifdef CONFIG_SMP
410408
struct request_queue *q = disk->queue;
411409
unsigned long val;
412-
unsigned int memflags;
413410

414411
ret = queue_var_store(&val, page, count);
415412
if (ret < 0)
@@ -421,7 +418,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
421418
* are accessed individually using atomic test_bit operation. So we
422419
* don't grab any lock while updating these flags.
423420
*/
424-
memflags = blk_mq_freeze_queue(q);
425421
if (val == 2) {
426422
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
427423
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
@@ -432,7 +428,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
432428
blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
433429
blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
434430
}
435-
blk_mq_unfreeze_queue(q, memflags);
436431
#endif
437432
return ret;
438433
}
@@ -446,11 +441,9 @@ static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
446441
static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
447442
size_t count)
448443
{
449-
unsigned int memflags;
450444
ssize_t ret = count;
451445
struct request_queue *q = disk->queue;
452446

453-
memflags = blk_mq_freeze_queue(q);
454447
if (!(q->limits.features & BLK_FEAT_POLL)) {
455448
ret = -EINVAL;
456449
goto out;
@@ -459,7 +452,6 @@ static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
459452
pr_info_ratelimited("writes to the poll attribute are ignored.\n");
460453
pr_info_ratelimited("please use driver specific parameters instead.\n");
461454
out:
462-
blk_mq_unfreeze_queue(q, memflags);
463455
return ret;
464456
}
465457

@@ -472,17 +464,15 @@ static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
472464
static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
473465
size_t count)
474466
{
475-
unsigned int val, memflags;
467+
unsigned int val;
476468
int err;
477469
struct request_queue *q = disk->queue;
478470

479471
err = kstrtou32(page, 10, &val);
480472
if (err || val == 0)
481473
return -EINVAL;
482474

483-
memflags = blk_mq_freeze_queue(q);
484475
blk_queue_rq_timeout(q, msecs_to_jiffies(val));
485-
blk_mq_unfreeze_queue(q, memflags);
486476

487477
return count;
488478
}

include/linux/blkdev.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ struct request_queue {
495495
*/
496496
unsigned long queue_flags;
497497

498-
unsigned int rq_timeout;
498+
unsigned int __data_racy rq_timeout;
499499

500500
unsigned int queue_depth;
501501

0 commit comments

Comments
 (0)