Skip to content

Commit 001397f

Browse files
adam900710brauner
authored andcommitted
iomap: add IOMAP_DIO_FSBLOCK_ALIGNED flag
Btrfs requires all of its bios to be fs block aligned, normally it's totally fine but with the incoming block size larger than page size (bs > ps) support, the requirement is no longer met for direct IOs. Because iomap_dio_bio_iter() calls bio_iov_iter_get_pages(), only requiring alignment to be bdev_logical_block_size(). In the real world that value is either 512 or 4K, on 4K page sized systems it means bio_iov_iter_get_pages() can break the bio at any page boundary, breaking btrfs' requirement for bs > ps cases. To address this problem, introduce a new public iomap dio flag, IOMAP_DIO_FSBLOCK_ALIGNED. When calling __iomap_dio_rw() with that new flag, iomap_dio::flags will inherit that new flag, and iomap_dio_bio_iter() will take fs block size into the calculation of the alignment, and pass the alignment to bio_iov_iter_get_pages(), respecting the fs block size requirement. The initial user of this flag will be btrfs, which needs to calculate the checksum for direct read and thus requires the biovec to be fs block aligned for the incoming bs > ps support. Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: Pankaj Raghav <p.raghav@samsung.com> [hch: also align pos/len, incorporate the trace flags from Darrick] Signed-off-by: Christoph Hellwig <hch@lst.de> Link: https://patch.msgid.link/20251031131045.1613229-2-hch@lst.de Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 560507c commit 001397f

3 files changed

Lines changed: 27 additions & 5 deletions

File tree

fs/iomap/direct-io.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -336,8 +336,18 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
336336
int nr_pages, ret = 0;
337337
u64 copied = 0;
338338
size_t orig_count;
339+
unsigned int alignment;
339340

340-
if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1))
341+
/*
342+
* File systems that write out of place and always allocate new blocks
343+
* need each bio to be block aligned as that's the unit of allocation.
344+
*/
345+
if (dio->flags & IOMAP_DIO_FSBLOCK_ALIGNED)
346+
alignment = fs_block_size;
347+
else
348+
alignment = bdev_logical_block_size(iomap->bdev);
349+
350+
if ((pos | length) & (alignment - 1))
341351
return -EINVAL;
342352

343353
if (dio->flags & IOMAP_DIO_WRITE) {
@@ -434,7 +444,7 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
434444
bio->bi_end_io = iomap_dio_bio_end_io;
435445

436446
ret = bio_iov_iter_get_pages(bio, dio->submit.iter,
437-
bdev_logical_block_size(iomap->bdev) - 1);
447+
alignment - 1);
438448
if (unlikely(ret)) {
439449
/*
440450
* We have to stop part way through an IO. We must fall
@@ -639,6 +649,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
639649
if (iocb->ki_flags & IOCB_NOWAIT)
640650
iomi.flags |= IOMAP_NOWAIT;
641651

652+
if (dio_flags & IOMAP_DIO_FSBLOCK_ALIGNED)
653+
dio->flags |= IOMAP_DIO_FSBLOCK_ALIGNED;
654+
642655
if (iov_iter_rw(iter) == READ) {
643656
/* reads can always complete inline */
644657
dio->flags |= IOMAP_DIO_INLINE_COMP;

fs/iomap/trace.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,10 @@ DEFINE_RANGE_EVENT(iomap_zero_iter);
122122

123123

124124
#define IOMAP_DIO_STRINGS \
125-
{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
126-
{IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \
127-
{IOMAP_DIO_PARTIAL, "DIO_PARTIAL" }
125+
{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
126+
{IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \
127+
{IOMAP_DIO_PARTIAL, "DIO_PARTIAL" }, \
128+
{IOMAP_DIO_FSBLOCK_ALIGNED, "DIO_FSBLOCK_ALIGNED" }
128129

129130
DECLARE_EVENT_CLASS(iomap_class,
130131
TP_PROTO(struct inode *inode, struct iomap *iomap),

include/linux/iomap.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,14 @@ struct iomap_dio_ops {
553553
*/
554554
#define IOMAP_DIO_PARTIAL (1 << 2)
555555

556+
/*
557+
* Ensure each bio is aligned to fs block size.
558+
*
559+
* For filesystems which need to calculate/verify the checksum of each fs
560+
* block. Otherwise they may not be able to handle unaligned bios.
561+
*/
562+
#define IOMAP_DIO_FSBLOCK_ALIGNED (1 << 3)
563+
556564
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
557565
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
558566
unsigned int dio_flags, void *private, size_t done_before);

0 commit comments

Comments
 (0)