Skip to content

Commit fde0269

Browse files
damien-lemoalaxboe
authored andcommitted
block: mq-deadline: Remove support for zone write locking
With the block layer generic plugging of write operations for zoned block devices, mq-deadline, or any other scheduler, can only ever see at most one write operation per zone at any time. There is thus no sequentiality requirements for these writes and thus no need to tightly control the dispatching of write requests using zone write locking. Remove all the code that implement this control in the mq-deadline scheduler and remove advertizing support for the ELEVATOR_F_ZBD_SEQ_WRITE elevator feature. Signed-off-by: Damien Le Moal <dlemoal@kernel.org> Reviewed-by: Hannes Reinecke <hare@suse.de> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Tested-by: Hans Holmberg <hans.holmberg@wdc.com> Tested-by: Dennis Maisenbacher <dennis.maisenbacher@wdc.com> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Link: https://lore.kernel.org/r/20240408014128.205141-22-dlemoal@kernel.org Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 9b3c08b commit fde0269

1 file changed

Lines changed: 6 additions & 170 deletions

File tree

block/mq-deadline.c

Lines changed: 6 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@ struct deadline_data {
102102
int prio_aging_expire;
103103

104104
spinlock_t lock;
105-
spinlock_t zone_lock;
106105
};
107106

108107
/* Maps an I/O priority class to a deadline scheduler priority. */
@@ -157,8 +156,7 @@ deadline_latter_request(struct request *rq)
157156
}
158157

159158
/*
160-
* Return the first request for which blk_rq_pos() >= @pos. For zoned devices,
161-
* return the first request after the start of the zone containing @pos.
159+
* Return the first request for which blk_rq_pos() >= @pos.
162160
*/
163161
static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
164162
enum dd_data_dir data_dir, sector_t pos)
@@ -170,14 +168,6 @@ static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
170168
return NULL;
171169

172170
rq = rb_entry_rq(node);
173-
/*
174-
* A zoned write may have been requeued with a starting position that
175-
* is below that of the most recently dispatched request. Hence, for
176-
* zoned writes, start searching from the start of a zone.
177-
*/
178-
if (blk_rq_is_seq_zoned_write(rq))
179-
pos = round_down(pos, rq->q->limits.chunk_sectors);
180-
181171
while (node) {
182172
rq = rb_entry_rq(node);
183173
if (blk_rq_pos(rq) >= pos) {
@@ -308,36 +298,6 @@ static inline bool deadline_check_fifo(struct dd_per_prio *per_prio,
308298
return time_is_before_eq_jiffies((unsigned long)rq->fifo_time);
309299
}
310300

311-
/*
312-
* Check if rq has a sequential request preceding it.
313-
*/
314-
static bool deadline_is_seq_write(struct deadline_data *dd, struct request *rq)
315-
{
316-
struct request *prev = deadline_earlier_request(rq);
317-
318-
if (!prev)
319-
return false;
320-
321-
return blk_rq_pos(prev) + blk_rq_sectors(prev) == blk_rq_pos(rq);
322-
}
323-
324-
/*
325-
* Skip all write requests that are sequential from @rq, even if we cross
326-
* a zone boundary.
327-
*/
328-
static struct request *deadline_skip_seq_writes(struct deadline_data *dd,
329-
struct request *rq)
330-
{
331-
sector_t pos = blk_rq_pos(rq);
332-
333-
do {
334-
pos += blk_rq_sectors(rq);
335-
rq = deadline_latter_request(rq);
336-
} while (rq && blk_rq_pos(rq) == pos);
337-
338-
return rq;
339-
}
340-
341301
/*
342302
* For the specified data direction, return the next request to
343303
* dispatch using arrival ordered lists.
@@ -346,40 +306,10 @@ static struct request *
346306
deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
347307
enum dd_data_dir data_dir)
348308
{
349-
struct request *rq, *rb_rq, *next;
350-
unsigned long flags;
351-
352309
if (list_empty(&per_prio->fifo_list[data_dir]))
353310
return NULL;
354311

355-
rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
356-
if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
357-
return rq;
358-
359-
/*
360-
* Look for a write request that can be dispatched, that is one with
361-
* an unlocked target zone. For some HDDs, breaking a sequential
362-
* write stream can lead to lower throughput, so make sure to preserve
363-
* sequential write streams, even if that stream crosses into the next
364-
* zones and these zones are unlocked.
365-
*/
366-
spin_lock_irqsave(&dd->zone_lock, flags);
367-
list_for_each_entry_safe(rq, next, &per_prio->fifo_list[DD_WRITE],
368-
queuelist) {
369-
/* Check whether a prior request exists for the same zone. */
370-
rb_rq = deadline_from_pos(per_prio, data_dir, blk_rq_pos(rq));
371-
if (rb_rq && blk_rq_pos(rb_rq) < blk_rq_pos(rq))
372-
rq = rb_rq;
373-
if (blk_req_can_dispatch_to_zone(rq) &&
374-
(blk_queue_nonrot(rq->q) ||
375-
!deadline_is_seq_write(dd, rq)))
376-
goto out;
377-
}
378-
rq = NULL;
379-
out:
380-
spin_unlock_irqrestore(&dd->zone_lock, flags);
381-
382-
return rq;
312+
return rq_entry_fifo(per_prio->fifo_list[data_dir].next);
383313
}
384314

385315
/*
@@ -390,36 +320,8 @@ static struct request *
390320
deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
391321
enum dd_data_dir data_dir)
392322
{
393-
struct request *rq;
394-
unsigned long flags;
395-
396-
rq = deadline_from_pos(per_prio, data_dir,
397-
per_prio->latest_pos[data_dir]);
398-
if (!rq)
399-
return NULL;
400-
401-
if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
402-
return rq;
403-
404-
/*
405-
* Look for a write request that can be dispatched, that is one with
406-
* an unlocked target zone. For some HDDs, breaking a sequential
407-
* write stream can lead to lower throughput, so make sure to preserve
408-
* sequential write streams, even if that stream crosses into the next
409-
* zones and these zones are unlocked.
410-
*/
411-
spin_lock_irqsave(&dd->zone_lock, flags);
412-
while (rq) {
413-
if (blk_req_can_dispatch_to_zone(rq))
414-
break;
415-
if (blk_queue_nonrot(rq->q))
416-
rq = deadline_latter_request(rq);
417-
else
418-
rq = deadline_skip_seq_writes(dd, rq);
419-
}
420-
spin_unlock_irqrestore(&dd->zone_lock, flags);
421-
422-
return rq;
323+
return deadline_from_pos(per_prio, data_dir,
324+
per_prio->latest_pos[data_dir]);
423325
}
424326

425327
/*
@@ -525,10 +427,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
525427
rq = next_rq;
526428
}
527429

528-
/*
529-
* For a zoned block device, if we only have writes queued and none of
530-
* them can be dispatched, rq will be NULL.
531-
*/
532430
if (!rq)
533431
return NULL;
534432

@@ -549,10 +447,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
549447
prio = ioprio_class_to_prio[ioprio_class];
550448
dd->per_prio[prio].latest_pos[data_dir] = blk_rq_pos(rq);
551449
dd->per_prio[prio].stats.dispatched++;
552-
/*
553-
* If the request needs its target zone locked, do it.
554-
*/
555-
blk_req_zone_write_lock(rq);
556450
rq->rq_flags |= RQF_STARTED;
557451
return rq;
558452
}
@@ -722,7 +616,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
722616
dd->fifo_batch = fifo_batch;
723617
dd->prio_aging_expire = prio_aging_expire;
724618
spin_lock_init(&dd->lock);
725-
spin_lock_init(&dd->zone_lock);
726619

727620
/* We dispatch from request queue wide instead of hw queue */
728621
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
@@ -804,12 +697,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
804697

805698
lockdep_assert_held(&dd->lock);
806699

807-
/*
808-
* This may be a requeue of a write request that has locked its
809-
* target zone. If it is the case, this releases the zone lock.
810-
*/
811-
blk_req_zone_write_unlock(rq);
812-
813700
prio = ioprio_class_to_prio[ioprio_class];
814701
per_prio = &dd->per_prio[prio];
815702
if (!rq->elv.priv[0]) {
@@ -841,18 +728,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
841728
*/
842729
rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
843730
insert_before = &per_prio->fifo_list[data_dir];
844-
#ifdef CONFIG_BLK_DEV_ZONED
845-
/*
846-
* Insert zoned writes such that requests are sorted by
847-
* position per zone.
848-
*/
849-
if (blk_rq_is_seq_zoned_write(rq)) {
850-
struct request *rq2 = deadline_latter_request(rq);
851-
852-
if (rq2 && blk_rq_zone_no(rq2) == blk_rq_zone_no(rq))
853-
insert_before = &rq2->queuelist;
854-
}
855-
#endif
856731
list_add_tail(&rq->queuelist, insert_before);
857732
}
858733
}
@@ -887,33 +762,8 @@ static void dd_prepare_request(struct request *rq)
887762
rq->elv.priv[0] = NULL;
888763
}
889764

890-
static bool dd_has_write_work(struct blk_mq_hw_ctx *hctx)
891-
{
892-
struct deadline_data *dd = hctx->queue->elevator->elevator_data;
893-
enum dd_prio p;
894-
895-
for (p = 0; p <= DD_PRIO_MAX; p++)
896-
if (!list_empty_careful(&dd->per_prio[p].fifo_list[DD_WRITE]))
897-
return true;
898-
899-
return false;
900-
}
901-
902765
/*
903766
* Callback from inside blk_mq_free_request().
904-
*
905-
* For zoned block devices, write unlock the target zone of
906-
* completed write requests. Do this while holding the zone lock
907-
* spinlock so that the zone is never unlocked while deadline_fifo_request()
908-
* or deadline_next_request() are executing. This function is called for
909-
* all requests, whether or not these requests complete successfully.
910-
*
911-
* For a zoned block device, __dd_dispatch_request() may have stopped
912-
* dispatching requests if all the queued requests are write requests directed
913-
* at zones that are already locked due to on-going write requests. To ensure
914-
* write request dispatch progress in this case, mark the queue as needing a
915-
* restart to ensure that the queue is run again after completion of the
916-
* request and zones being unlocked.
917767
*/
918768
static void dd_finish_request(struct request *rq)
919769
{
@@ -928,21 +778,8 @@ static void dd_finish_request(struct request *rq)
928778
* called dd_insert_requests(). Skip requests that bypassed I/O
929779
* scheduling. See also blk_mq_request_bypass_insert().
930780
*/
931-
if (!rq->elv.priv[0])
932-
return;
933-
934-
atomic_inc(&per_prio->stats.completed);
935-
936-
if (blk_queue_is_zoned(q)) {
937-
unsigned long flags;
938-
939-
spin_lock_irqsave(&dd->zone_lock, flags);
940-
blk_req_zone_write_unlock(rq);
941-
spin_unlock_irqrestore(&dd->zone_lock, flags);
942-
943-
if (dd_has_write_work(rq->mq_hctx))
944-
blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
945-
}
781+
if (rq->elv.priv[0])
782+
atomic_inc(&per_prio->stats.completed);
946783
}
947784

948785
static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)
@@ -1266,7 +1103,6 @@ static struct elevator_type mq_deadline = {
12661103
.elevator_attrs = deadline_attrs,
12671104
.elevator_name = "mq-deadline",
12681105
.elevator_alias = "deadline",
1269-
.elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE,
12701106
.elevator_owner = THIS_MODULE,
12711107
};
12721108
MODULE_ALIAS("mq-deadline-iosched");

0 commit comments

Comments
 (0)