Skip to content

Commit 5fcbd55

Browse files
Christoph Hellwigbrauner
authored andcommitted
iomap: split bios to zone append limits in the submission handlers
Provide helpers for file systems to split bios in the direct I/O and writeback I/O submission handlers. The split ioends are chained to the parent ioend so that only the parent ioend originally generated by the iomap layer will be processed after all the chained off children have completed. This is based on the block layer bio chaining that has supported a similar mechanism for a long time. This Follows btrfs' lead and don't try to build bios to hardware limits for zone append commands, but instead build them as normal unconstrained bios and split them to the hardware limits in the I/O submission handler. Signed-off-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20250206064035.2323428-5-hch@lst.de Reviewed-by: "Darrick J. Wong" <djwong@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 034c29f commit 5fcbd55

4 files changed

Lines changed: 130 additions & 21 deletions

File tree

fs/iomap/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ iomap-y += trace.o \
1212
iter.o
1313
iomap-$(CONFIG_BLOCK) += buffered-io.o \
1414
direct-io.o \
15+
ioend.o \
1516
fiemap.o \
1617
seek.o
1718
iomap-$(CONFIG_SWAP) += swapfile.o

fs/iomap/buffered-io.c

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ struct iomap_folio_state {
4040
unsigned long state[];
4141
};
4242

43-
static struct bio_set iomap_ioend_bioset;
43+
struct bio_set iomap_ioend_bioset;
44+
EXPORT_SYMBOL_GPL(iomap_ioend_bioset);
4445

4546
static inline bool ifs_is_fully_uptodate(struct folio *folio,
4647
struct iomap_folio_state *ifs)
@@ -1539,15 +1540,15 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
15391540
* ioend after this.
15401541
*/
15411542
static u32
1542-
iomap_finish_ioend(struct iomap_ioend *ioend, int error)
1543+
iomap_finish_ioend_buffered(struct iomap_ioend *ioend)
15431544
{
15441545
struct inode *inode = ioend->io_inode;
15451546
struct bio *bio = &ioend->io_bio;
15461547
struct folio_iter fi;
15471548
u32 folio_count = 0;
15481549

1549-
if (error) {
1550-
mapping_set_error(inode->i_mapping, error);
1550+
if (ioend->io_error) {
1551+
mapping_set_error(inode->i_mapping, ioend->io_error);
15511552
if (!bio_flagged(bio, BIO_QUIET)) {
15521553
pr_err_ratelimited(
15531554
"%s: writeback error on inode %lu, offset %lld, sector %llu",
@@ -1566,6 +1567,24 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
15661567
return folio_count;
15671568
}
15681569

1570+
static u32
1571+
iomap_finish_ioend(struct iomap_ioend *ioend, int error)
1572+
{
1573+
if (ioend->io_parent) {
1574+
struct bio *bio = &ioend->io_bio;
1575+
1576+
ioend = ioend->io_parent;
1577+
bio_put(bio);
1578+
}
1579+
1580+
if (error)
1581+
cmpxchg(&ioend->io_error, 0, error);
1582+
1583+
if (!atomic_dec_and_test(&ioend->io_remaining))
1584+
return 0;
1585+
return iomap_finish_ioend_buffered(ioend);
1586+
}
1587+
15691588
/*
15701589
* Ioend completion routine for merged bios. This can only be called from task
15711590
* contexts as merged ioends can be of unbound length. Hence we have to break up
@@ -1667,8 +1686,10 @@ EXPORT_SYMBOL_GPL(iomap_sort_ioends);
16671686

16681687
static void iomap_writepage_end_bio(struct bio *bio)
16691688
{
1670-
iomap_finish_ioend(iomap_ioend_from_bio(bio),
1671-
blk_status_to_errno(bio->bi_status));
1689+
struct iomap_ioend *ioend = iomap_ioend_from_bio(bio);
1690+
1691+
ioend->io_error = blk_status_to_errno(bio->bi_status);
1692+
iomap_finish_ioend_buffered(ioend);
16721693
}
16731694

16741695
/*
@@ -1713,29 +1734,17 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc,
17131734
struct writeback_control *wbc, struct inode *inode, loff_t pos,
17141735
u16 ioend_flags)
17151736
{
1716-
struct iomap_ioend *ioend;
17171737
struct bio *bio;
17181738

17191739
bio = bio_alloc_bioset(wpc->iomap.bdev, BIO_MAX_VECS,
17201740
REQ_OP_WRITE | wbc_to_write_flags(wbc),
17211741
GFP_NOFS, &iomap_ioend_bioset);
17221742
bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos);
17231743
bio->bi_end_io = iomap_writepage_end_bio;
1724-
wbc_init_bio(wbc, bio);
17251744
bio->bi_write_hint = inode->i_write_hint;
1726-
1727-
ioend = iomap_ioend_from_bio(bio);
1728-
INIT_LIST_HEAD(&ioend->io_list);
1729-
ioend->io_flags = ioend_flags;
1730-
if (pos > wpc->iomap.offset)
1731-
wpc->iomap.flags &= ~IOMAP_F_BOUNDARY;
1732-
ioend->io_inode = inode;
1733-
ioend->io_size = 0;
1734-
ioend->io_offset = pos;
1735-
ioend->io_sector = bio->bi_iter.bi_sector;
1736-
1745+
wbc_init_bio(wbc, bio);
17371746
wpc->nr_folios = 0;
1738-
return ioend;
1747+
return iomap_init_ioend(inode, bio, pos, ioend_flags);
17391748
}
17401749

17411750
static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos,

fs/iomap/ioend.c

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Copyright (c) 2024-2025 Christoph Hellwig.
4+
*/
5+
#include <linux/iomap.h>
6+
7+
struct iomap_ioend *iomap_init_ioend(struct inode *inode,
8+
struct bio *bio, loff_t file_offset, u16 ioend_flags)
9+
{
10+
struct iomap_ioend *ioend = iomap_ioend_from_bio(bio);
11+
12+
atomic_set(&ioend->io_remaining, 1);
13+
ioend->io_error = 0;
14+
ioend->io_parent = NULL;
15+
INIT_LIST_HEAD(&ioend->io_list);
16+
ioend->io_flags = ioend_flags;
17+
ioend->io_inode = inode;
18+
ioend->io_offset = file_offset;
19+
ioend->io_size = bio->bi_iter.bi_size;
20+
ioend->io_sector = bio->bi_iter.bi_sector;
21+
return ioend;
22+
}
23+
EXPORT_SYMBOL_GPL(iomap_init_ioend);
24+
25+
/*
26+
* Split up to the first @max_len bytes from @ioend if the ioend covers more
27+
* than @max_len bytes.
28+
*
29+
* If @is_append is set, the split will be based on the hardware limits for
30+
* REQ_OP_ZONE_APPEND commands and can be less than @max_len if the hardware
31+
* limits don't allow the entire @max_len length.
32+
*
33+
* The bio embedded into @ioend must be a REQ_OP_WRITE because the block layer
34+
* does not allow splitting REQ_OP_ZONE_APPEND bios. The file systems has to
35+
* switch the operation after this call, but before submitting the bio.
36+
*/
37+
struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend,
38+
unsigned int max_len, bool is_append)
39+
{
40+
struct bio *bio = &ioend->io_bio;
41+
struct iomap_ioend *split_ioend;
42+
unsigned int nr_segs;
43+
int sector_offset;
44+
struct bio *split;
45+
46+
if (is_append) {
47+
struct queue_limits *lim = bdev_limits(bio->bi_bdev);
48+
49+
max_len = min(max_len,
50+
lim->max_zone_append_sectors << SECTOR_SHIFT);
51+
52+
sector_offset = bio_split_rw_at(bio, lim, &nr_segs, max_len);
53+
if (unlikely(sector_offset < 0))
54+
return ERR_PTR(sector_offset);
55+
if (!sector_offset)
56+
return NULL;
57+
} else {
58+
if (bio->bi_iter.bi_size <= max_len)
59+
return NULL;
60+
sector_offset = max_len >> SECTOR_SHIFT;
61+
}
62+
63+
/* ensure the split ioend is still block size aligned */
64+
sector_offset = ALIGN_DOWN(sector_offset << SECTOR_SHIFT,
65+
i_blocksize(ioend->io_inode)) >> SECTOR_SHIFT;
66+
67+
split = bio_split(bio, sector_offset, GFP_NOFS, &iomap_ioend_bioset);
68+
if (IS_ERR(split))
69+
return ERR_CAST(split);
70+
split->bi_private = bio->bi_private;
71+
split->bi_end_io = bio->bi_end_io;
72+
73+
split_ioend = iomap_init_ioend(ioend->io_inode, split, ioend->io_offset,
74+
ioend->io_flags);
75+
split_ioend->io_parent = ioend;
76+
77+
atomic_inc(&ioend->io_remaining);
78+
ioend->io_offset += split_ioend->io_size;
79+
ioend->io_size -= split_ioend->io_size;
80+
81+
split_ioend->io_sector = ioend->io_sector;
82+
if (!is_append)
83+
ioend->io_sector += (split_ioend->io_size >> SECTOR_SHIFT);
84+
return split_ioend;
85+
}
86+
EXPORT_SYMBOL_GPL(iomap_split_ioend);

include/linux/iomap.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,12 +353,19 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
353353

354354
/*
355355
* Structure for writeback I/O completions.
356+
*
357+
* File systems implementing ->submit_ioend can split a bio generated
358+
* by iomap. In that case the parent ioend it was split from is recorded
359+
* in ioend->io_parent.
356360
*/
357361
struct iomap_ioend {
358362
struct list_head io_list; /* next ioend in chain */
359363
u16 io_flags; /* IOMAP_IOEND_* */
360364
struct inode *io_inode; /* file being written to */
361-
size_t io_size; /* size of data within eof */
365+
size_t io_size; /* size of the extent */
366+
atomic_t io_remaining; /* completetion defer count */
367+
int io_error; /* stashed away status */
368+
struct iomap_ioend *io_parent; /* parent for completions */
362369
loff_t io_offset; /* offset in the file */
363370
sector_t io_sector; /* start sector of ioend */
364371
struct bio io_bio; /* MUST BE LAST! */
@@ -408,6 +415,10 @@ struct iomap_writepage_ctx {
408415
u32 nr_folios; /* folios added to the ioend */
409416
};
410417

418+
struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio,
419+
loff_t file_offset, u16 ioend_flags);
420+
struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend,
421+
unsigned int max_len, bool is_append);
411422
void iomap_finish_ioends(struct iomap_ioend *ioend, int error);
412423
void iomap_ioend_try_merge(struct iomap_ioend *ioend,
413424
struct list_head *more_ioends);
@@ -479,4 +490,6 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
479490
# define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO)
480491
#endif /* CONFIG_SWAP */
481492

493+
extern struct bio_set iomap_ioend_bioset;
494+
482495
#endif /* LINUX_IOMAP_H */

0 commit comments

Comments
 (0)