Skip to content

Commit e523f2d

Browse files
Christoph Hellwigbrauner
authored andcommitted
iomap: optionally use ioends for direct I/O
struct iomap_ioend currently tracks outstanding buffered writes and has some really nice code in core iomap and XFS to merge contiguous I/Os an defer them to userspace for completion in a very efficient way. For zoned writes we'll also need a per-bio user context completion to record the written blocks, and the infrastructure for that would look basically like the ioend handling for buffered I/O. So instead of reinventing the wheel, reuse the existing infrastructure. Signed-off-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20250206064035.2323428-8-hch@lst.de Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent ae2f33a commit e523f2d

4 files changed

Lines changed: 55 additions & 6 deletions

File tree

fs/iomap/direct-io.c

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0
22
/*
33
* Copyright (C) 2010 Red Hat, Inc.
4-
* Copyright (c) 2016-2021 Christoph Hellwig.
4+
* Copyright (c) 2016-2025 Christoph Hellwig.
55
*/
66
#include <linux/module.h>
77
#include <linux/compiler.h>
@@ -12,6 +12,7 @@
1212
#include <linux/backing-dev.h>
1313
#include <linux/uio.h>
1414
#include <linux/task_io_accounting_ops.h>
15+
#include "internal.h"
1516
#include "trace.h"
1617

1718
#include "../internal.h"
@@ -20,6 +21,7 @@
2021
* Private flags for iomap_dio, must not overlap with the public ones in
2122
* iomap.h:
2223
*/
24+
#define IOMAP_DIO_NO_INVALIDATE (1U << 25)
2325
#define IOMAP_DIO_CALLER_COMP (1U << 26)
2426
#define IOMAP_DIO_INLINE_COMP (1U << 27)
2527
#define IOMAP_DIO_WRITE_THROUGH (1U << 28)
@@ -119,7 +121,8 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
119121
* ->end_io() when necessary, otherwise a racing buffer read would cache
120122
* zeros from unwritten extents.
121123
*/
122-
if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE))
124+
if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE) &&
125+
!(dio->flags & IOMAP_DIO_NO_INVALIDATE))
123126
kiocb_invalidate_post_direct_write(iocb, dio->size);
124127

125128
inode_dio_end(file_inode(iocb->ki_filp));
@@ -241,6 +244,47 @@ void iomap_dio_bio_end_io(struct bio *bio)
241244
}
242245
EXPORT_SYMBOL_GPL(iomap_dio_bio_end_io);
243246

247+
u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend)
248+
{
249+
struct iomap_dio *dio = ioend->io_bio.bi_private;
250+
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
251+
u32 vec_count = ioend->io_bio.bi_vcnt;
252+
253+
if (ioend->io_error)
254+
iomap_dio_set_error(dio, ioend->io_error);
255+
256+
if (atomic_dec_and_test(&dio->ref)) {
257+
/*
258+
* Try to avoid another context switch for the completion given
259+
* that we are already called from the ioend completion
260+
* workqueue, but never invalidate pages from this thread to
261+
* avoid deadlocks with buffered I/O completions. Tough luck if
262+
* you hit the tiny race with someone dirtying the range now
263+
* between this check and the actual completion.
264+
*/
265+
if (!dio->iocb->ki_filp->f_mapping->nrpages) {
266+
dio->flags |= IOMAP_DIO_INLINE_COMP;
267+
dio->flags |= IOMAP_DIO_NO_INVALIDATE;
268+
}
269+
dio->flags &= ~IOMAP_DIO_CALLER_COMP;
270+
iomap_dio_done(dio);
271+
}
272+
273+
if (should_dirty) {
274+
bio_check_pages_dirty(&ioend->io_bio);
275+
} else {
276+
bio_release_pages(&ioend->io_bio, false);
277+
bio_put(&ioend->io_bio);
278+
}
279+
280+
/*
281+
* Return the number of bvecs completed as even direct I/O completions
282+
* do significant per-folio work and we'll still want to give up the
283+
* CPU after a lot of completions.
284+
*/
285+
return vec_count;
286+
}
287+
244288
static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
245289
loff_t pos, unsigned len)
246290
{

fs/iomap/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@
55
#define IOEND_BATCH_SIZE 4096
66

77
u32 iomap_finish_ioend_buffered(struct iomap_ioend *ioend);
8+
u32 iomap_finish_ioend_direct(struct iomap_ioend *ioend);
89

910
#endif /* _IOMAP_INTERNAL_H */

fs/iomap/ioend.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error)
4141

4242
if (!atomic_dec_and_test(&ioend->io_remaining))
4343
return 0;
44+
if (ioend->io_flags & IOMAP_IOEND_DIRECT)
45+
return iomap_finish_ioend_direct(ioend);
4446
return iomap_finish_ioend_buffered(ioend);
4547
}
4648

include/linux/iomap.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -343,20 +343,22 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
343343
#define IOMAP_IOEND_UNWRITTEN (1U << 1)
344344
/* don't merge into previous ioend */
345345
#define IOMAP_IOEND_BOUNDARY (1U << 2)
346+
/* is direct I/O */
347+
#define IOMAP_IOEND_DIRECT (1U << 3)
346348

347349
/*
348350
* Flags that if set on either ioend prevent the merge of two ioends.
349351
* (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way)
350352
*/
351353
#define IOMAP_IOEND_NOMERGE_FLAGS \
352-
(IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN)
354+
(IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT)
353355

354356
/*
355357
* Structure for writeback I/O completions.
356358
*
357-
* File systems implementing ->submit_ioend can split a bio generated
358-
* by iomap. In that case the parent ioend it was split from is recorded
359-
* in ioend->io_parent.
359+
* File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io
360+
* for direct I/O) can split a bio generated by iomap. In that case the parent
361+
* ioend it was split from is recorded in ioend->io_parent.
360362
*/
361363
struct iomap_ioend {
362364
struct list_head io_list; /* next ioend in chain */

0 commit comments

Comments
 (0)