Skip to content

Commit 0ba93a9

Browse files
Ming Leiaxboe
authored andcommitted
loop: try to handle loop aio command via NOWAIT IO first
Try to handle loop aio command via NOWAIT IO first, then we can avoid to queue the aio command into workqueue. This is usually one big win in case that FS block mapping is stable, Mikulas verified [1] that this way improves IO perf by close to 5X in 12jobs sequential read/write test, in which FS block mapping is just stable. Fallback to workqueue in case of -EAGAIN. This way may bring a little cost from the 1st retry, but when running the following write test over loop/sparse_file, the actual effect on randwrite is obvious: ``` truncate -s 4G 1.img #1.img is created on XFS/virtio-scsi losetup -f 1.img --direct-io=on fio --direct=1 --bs=4k --runtime=40 --time_based --numjobs=1 --ioengine=libaio \ --iodepth=16 --group_reporting=1 --filename=/dev/loop0 -name=job --rw=$RW ``` - RW=randwrite: obvious IOPS drop observed - RW=write: a little drop(%5 - 10%) This perf drop on randwrite over sparse file will be addressed in the following patch. BLK_MQ_F_BLOCKING has to be set for calling into .read_iter() or .write_iter() which might sleep even though it is NOWAIT, and the only effect is that rcu read lock is replaced with srcu read lock. Link: https://lore.kernel.org/linux-block/a8e5c76a-231f-07d1-a394-847de930f638@redhat.com/ [1] Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent f4788ae commit 0ba93a9

1 file changed

Lines changed: 63 additions & 5 deletions

File tree

drivers/block/loop.c

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ struct loop_cmd {
9090
#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
9191
#define LOOP_DEFAULT_HW_Q_DEPTH 128
9292

93+
static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd);
94+
9395
static DEFINE_IDR(loop_index_idr);
9496
static DEFINE_MUTEX(loop_ctl_mutex);
9597
static DEFINE_MUTEX(loop_validate_mutex);
@@ -321,6 +323,15 @@ static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
321323

322324
if (!atomic_dec_and_test(&cmd->ref))
323325
return;
326+
327+
/* -EAGAIN could be returned from bdev's ->ki_complete */
328+
if (cmd->ret == -EAGAIN) {
329+
struct loop_device *lo = rq->q->queuedata;
330+
331+
loop_queue_work(lo, cmd);
332+
return;
333+
}
334+
324335
kfree(cmd->bvec);
325336
cmd->bvec = NULL;
326337
if (req_op(rq) == REQ_OP_WRITE)
@@ -430,22 +441,51 @@ static int lo_submit_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
430441
return ret;
431442
}
432443

444+
static bool lo_backfile_support_nowait(const struct loop_device *lo)
445+
{
446+
return lo->lo_backing_file->f_mode & FMODE_NOWAIT;
447+
}
448+
433449
static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
434450
loff_t pos, int rw)
435451
{
436452
int nr_bvec = lo_cmd_nr_bvec(cmd);
437453
int ret;
438454

439-
ret = lo_rw_aio_prep(lo, cmd, nr_bvec, pos);
440-
if (unlikely(ret))
441-
return ret;
455+
/* prepared already if we have tried nowait */
456+
if (!cmd->use_aio || !lo_backfile_support_nowait(lo)) {
457+
ret = lo_rw_aio_prep(lo, cmd, nr_bvec, pos);
458+
if (unlikely(ret))
459+
goto fail;
460+
}
442461

462+
cmd->iocb.ki_flags &= ~IOCB_NOWAIT;
443463
ret = lo_submit_rw_aio(lo, cmd, nr_bvec, rw);
464+
fail:
444465
if (ret != -EIOCBQUEUED)
445466
lo_rw_aio_complete(&cmd->iocb, ret);
446467
return -EIOCBQUEUED;
447468
}
448469

470+
static int lo_rw_aio_nowait(struct loop_device *lo, struct loop_cmd *cmd,
471+
int rw)
472+
{
473+
struct request *rq = blk_mq_rq_from_pdu(cmd);
474+
loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset;
475+
int nr_bvec = lo_cmd_nr_bvec(cmd);
476+
int ret = lo_rw_aio_prep(lo, cmd, nr_bvec, pos);
477+
478+
if (unlikely(ret))
479+
goto fail;
480+
481+
cmd->iocb.ki_flags |= IOCB_NOWAIT;
482+
ret = lo_submit_rw_aio(lo, cmd, nr_bvec, rw);
483+
fail:
484+
if (ret != -EIOCBQUEUED && ret != -EAGAIN)
485+
lo_rw_aio_complete(&cmd->iocb, ret);
486+
return ret;
487+
}
488+
449489
static int do_req_filebacked(struct loop_device *lo, struct request *rq)
450490
{
451491
struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
@@ -1907,6 +1947,7 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
19071947
struct request *rq = bd->rq;
19081948
struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq);
19091949
struct loop_device *lo = rq->q->queuedata;
1950+
int rw = 0;
19101951

19111952
blk_mq_start_request(rq);
19121953

@@ -1919,9 +1960,25 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
19191960
case REQ_OP_WRITE_ZEROES:
19201961
cmd->use_aio = false;
19211962
break;
1922-
default:
1963+
case REQ_OP_READ:
1964+
rw = ITER_DEST;
19231965
cmd->use_aio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
19241966
break;
1967+
case REQ_OP_WRITE:
1968+
rw = ITER_SOURCE;
1969+
cmd->use_aio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
1970+
break;
1971+
default:
1972+
return BLK_STS_IOERR;
1973+
}
1974+
1975+
/* try NOWAIT if the backing file supports the mode */
1976+
if (cmd->use_aio && lo_backfile_support_nowait(lo)) {
1977+
int res = lo_rw_aio_nowait(lo, cmd, rw);
1978+
1979+
if (res != -EAGAIN && res != -EOPNOTSUPP)
1980+
return BLK_STS_OK;
1981+
/* fallback to workqueue for handling aio */
19251982
}
19261983

19271984
loop_queue_work(lo, cmd);
@@ -2073,7 +2130,8 @@ static int loop_add(int i)
20732130
lo->tag_set.queue_depth = hw_queue_depth;
20742131
lo->tag_set.numa_node = NUMA_NO_NODE;
20752132
lo->tag_set.cmd_size = sizeof(struct loop_cmd);
2076-
lo->tag_set.flags = BLK_MQ_F_STACKING | BLK_MQ_F_NO_SCHED_BY_DEFAULT;
2133+
lo->tag_set.flags = BLK_MQ_F_STACKING | BLK_MQ_F_NO_SCHED_BY_DEFAULT |
2134+
BLK_MQ_F_BLOCKING;
20772135
lo->tag_set.driver_data = lo;
20782136

20792137
err = blk_mq_alloc_tag_set(&lo->tag_set);

0 commit comments

Comments
 (0)