Skip to content

Commit 3304b3f

Browse files
committed
Merge tag 'vfs-7.0-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs iomap updates from Christian Brauner: - Erofs page cache sharing preliminaries: Plumb a void *private parameter through iomap_read_folio() and iomap_readahead() into iomap_iter->private, matching iomap DIO. Erofs uses this to replace a bogus kmap_to_page() call, as preparatory work for page cache sharing. - Fix for invalid folio access: Fix an invalid folio access when a folio without iomap_folio_state is fully submitted to the IO helper — the helper may call folio_end_read() at any time, so ctx->cur_folio must be invalidated after full submission. * tag 'vfs-7.0-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: iomap: fix invalid folio access after folio_end_read() erofs: hold read context in iomap_iter if needed iomap: stash iomap read ctx in the private field of iomap_iter
2 parents 157d3d6 + aa35dd5 commit 3304b3f

4 files changed

Lines changed: 83 additions & 53 deletions

File tree

fs/erofs/data.c

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -267,13 +267,20 @@ void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
267267
folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO)));
268268
}
269269

270+
struct erofs_iomap_iter_ctx {
271+
struct page *page;
272+
void *base;
273+
};
274+
270275
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
271276
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
272277
{
273-
int ret;
278+
struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
279+
struct erofs_iomap_iter_ctx *ctx = iter->private;
274280
struct super_block *sb = inode->i_sb;
275281
struct erofs_map_blocks map;
276282
struct erofs_map_dev mdev;
283+
int ret;
277284

278285
map.m_la = offset;
279286
map.m_llen = length;
@@ -284,7 +291,6 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
284291
iomap->offset = map.m_la;
285292
iomap->length = map.m_llen;
286293
iomap->flags = 0;
287-
iomap->private = NULL;
288294
iomap->addr = IOMAP_NULL_ADDR;
289295
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
290296
iomap->type = IOMAP_HOLE;
@@ -310,16 +316,20 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
310316
}
311317

312318
if (map.m_flags & EROFS_MAP_META) {
313-
void *ptr;
314-
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
315-
316319
iomap->type = IOMAP_INLINE;
317-
ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
318-
erofs_inode_in_metabox(inode));
319-
if (IS_ERR(ptr))
320-
return PTR_ERR(ptr);
321-
iomap->inline_data = ptr;
322-
iomap->private = buf.base;
320+
/* read context should read the inlined data */
321+
if (ctx) {
322+
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
323+
void *ptr;
324+
325+
ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
326+
erofs_inode_in_metabox(inode));
327+
if (IS_ERR(ptr))
328+
return PTR_ERR(ptr);
329+
iomap->inline_data = ptr;
330+
ctx->page = buf.page;
331+
ctx->base = buf.base;
332+
}
323333
} else {
324334
iomap->type = IOMAP_MAPPED;
325335
}
@@ -329,18 +339,18 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
329339
static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
330340
ssize_t written, unsigned int flags, struct iomap *iomap)
331341
{
332-
void *ptr = iomap->private;
342+
struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
343+
struct erofs_iomap_iter_ctx *ctx = iter->private;
333344

334-
if (ptr) {
345+
if (ctx && ctx->base) {
335346
struct erofs_buf buf = {
336-
.page = kmap_to_page(ptr),
337-
.base = ptr,
347+
.page = ctx->page,
348+
.base = ctx->base,
338349
};
339350

340351
DBG_BUGON(iomap->type != IOMAP_INLINE);
341352
erofs_put_metabuf(&buf);
342-
} else {
343-
DBG_BUGON(iomap->type == IOMAP_INLINE);
353+
ctx->base = NULL;
344354
}
345355
return written;
346356
}
@@ -370,18 +380,30 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
370380
*/
371381
static int erofs_read_folio(struct file *file, struct folio *folio)
372382
{
383+
struct iomap_read_folio_ctx read_ctx = {
384+
.ops = &iomap_bio_read_ops,
385+
.cur_folio = folio,
386+
};
387+
struct erofs_iomap_iter_ctx iter_ctx = {};
388+
373389
trace_erofs_read_folio(folio, true);
374390

375-
iomap_bio_read_folio(folio, &erofs_iomap_ops);
391+
iomap_read_folio(&erofs_iomap_ops, &read_ctx, &iter_ctx);
376392
return 0;
377393
}
378394

379395
static void erofs_readahead(struct readahead_control *rac)
380396
{
397+
struct iomap_read_folio_ctx read_ctx = {
398+
.ops = &iomap_bio_read_ops,
399+
.rac = rac,
400+
};
401+
struct erofs_iomap_iter_ctx iter_ctx = {};
402+
381403
trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
382404
readahead_count(rac), true);
383405

384-
iomap_bio_readahead(rac, &erofs_iomap_ops);
406+
iomap_readahead(&erofs_iomap_ops, &read_ctx, &iter_ctx);
385407
}
386408

387409
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
@@ -401,9 +423,12 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
401423
if (IS_DAX(inode))
402424
return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
403425
#endif
404-
if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev)
426+
if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev) {
427+
struct erofs_iomap_iter_ctx iter_ctx = {};
428+
405429
return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
406-
NULL, 0, NULL, 0);
430+
NULL, 0, &iter_ctx, 0);
431+
}
407432
return filemap_read(iocb, to, 0);
408433
}
409434

fs/fuse/file.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,7 @@ static int fuse_read_folio(struct file *file, struct folio *folio)
979979
return -EIO;
980980
}
981981

982-
iomap_read_folio(&fuse_iomap_ops, &ctx);
982+
iomap_read_folio(&fuse_iomap_ops, &ctx, NULL);
983983
fuse_invalidate_atime(inode);
984984
return 0;
985985
}
@@ -1081,7 +1081,7 @@ static void fuse_readahead(struct readahead_control *rac)
10811081
if (fuse_is_bad(inode))
10821082
return;
10831083

1084-
iomap_readahead(&fuse_iomap_ops, &ctx);
1084+
iomap_readahead(&fuse_iomap_ops, &ctx, NULL);
10851085
}
10861086

10871087
static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)

fs/iomap/buffered-io.c

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -418,32 +418,26 @@ static void iomap_read_init(struct folio *folio)
418418
struct iomap_folio_state *ifs = folio->private;
419419

420420
if (ifs) {
421-
size_t len = folio_size(folio);
422-
423421
/*
424422
* ifs->read_bytes_pending is used to track how many bytes are
425423
* read in asynchronously by the IO helper. We need to track
426424
* this so that we can know when the IO helper has finished
427425
* reading in all the necessary ranges of the folio and can end
428426
* the read.
429427
*
430-
* Increase ->read_bytes_pending by the folio size to start, and
431-
* add a +1 bias. We'll subtract the bias and any uptodate /
432-
* zeroed ranges that did not require IO in iomap_read_end()
433-
* after we're done processing the folio.
428+
* Increase ->read_bytes_pending by the folio size to start.
429+
* We'll subtract any uptodate / zeroed ranges that did not
430+
* require IO in iomap_read_end() after we're done processing
431+
* the folio.
434432
*
435433
* We do this because otherwise, we would have to increment
436434
* ifs->read_bytes_pending every time a range in the folio needs
437435
* to be read in, which can get expensive since the spinlock
438436
* needs to be held whenever modifying ifs->read_bytes_pending.
439-
*
440-
* We add the bias to ensure the read has not been ended on the
441-
* folio when iomap_read_end() is called, even if the IO helper
442-
* has already finished reading in the entire folio.
443437
*/
444438
spin_lock_irq(&ifs->state_lock);
445439
WARN_ON_ONCE(ifs->read_bytes_pending != 0);
446-
ifs->read_bytes_pending = len + 1;
440+
ifs->read_bytes_pending = folio_size(folio);
447441
spin_unlock_irq(&ifs->state_lock);
448442
}
449443
}
@@ -474,11 +468,9 @@ static void iomap_read_end(struct folio *folio, size_t bytes_submitted)
474468

475469
/*
476470
* Subtract any bytes that were initially accounted to
477-
* read_bytes_pending but skipped for IO. The +1 accounts for
478-
* the bias we added in iomap_read_init().
471+
* read_bytes_pending but skipped for IO.
479472
*/
480-
ifs->read_bytes_pending -=
481-
(folio_size(folio) + 1 - bytes_submitted);
473+
ifs->read_bytes_pending -= folio_size(folio) - bytes_submitted;
482474

483475
/*
484476
* If !ifs->read_bytes_pending, this means all pending reads by
@@ -492,14 +484,16 @@ static void iomap_read_end(struct folio *folio, size_t bytes_submitted)
492484
spin_unlock_irq(&ifs->state_lock);
493485
if (end_read)
494486
folio_end_read(folio, uptodate);
495-
} else if (!bytes_submitted) {
487+
} else {
496488
/*
497-
* If there were no bytes submitted, this means we are
498-
* responsible for unlocking the folio here, since no IO helper
499-
* has taken ownership of it. If there were bytes submitted,
500-
* then the IO helper will end the read via
501-
* iomap_finish_folio_read().
489+
* If a folio without an ifs is submitted to the IO helper, the
490+
* read must be on the entire folio and the IO helper takes
491+
* ownership of the folio. This means we should only enter
492+
* iomap_read_end() for the !ifs case if no bytes were submitted
493+
* to the IO helper, in which case we are responsible for
494+
* unlocking the folio here.
502495
*/
496+
WARN_ON_ONCE(bytes_submitted);
503497
folio_unlock(folio);
504498
}
505499
}
@@ -511,6 +505,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
511505
loff_t pos = iter->pos;
512506
loff_t length = iomap_length(iter);
513507
struct folio *folio = ctx->cur_folio;
508+
size_t folio_len = folio_size(folio);
514509
size_t poff, plen;
515510
loff_t pos_diff;
516511
int ret;
@@ -524,8 +519,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
524519

525520
ifs_alloc(iter->inode, folio, iter->flags);
526521

527-
length = min_t(loff_t, length,
528-
folio_size(folio) - offset_in_folio(folio, pos));
522+
length = min_t(loff_t, length, folio_len - offset_in_folio(folio, pos));
529523
while (length) {
530524
iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff,
531525
&plen);
@@ -555,7 +549,15 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
555549
plen, ret, GFP_NOFS);
556550
if (ret)
557551
return ret;
552+
558553
*bytes_submitted += plen;
554+
/*
555+
* If the entire folio has been read in by the IO
556+
* helper, then the helper owns the folio and will end
557+
* the read on it.
558+
*/
559+
if (*bytes_submitted == folio_len)
560+
ctx->cur_folio = NULL;
559561
}
560562

561563
ret = iomap_iter_advance(iter, plen);
@@ -568,13 +570,14 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
568570
}
569571

570572
void iomap_read_folio(const struct iomap_ops *ops,
571-
struct iomap_read_folio_ctx *ctx)
573+
struct iomap_read_folio_ctx *ctx, void *private)
572574
{
573575
struct folio *folio = ctx->cur_folio;
574576
struct iomap_iter iter = {
575577
.inode = folio->mapping->host,
576578
.pos = folio_pos(folio),
577579
.len = folio_size(folio),
580+
.private = private,
578581
};
579582
size_t bytes_submitted = 0;
580583
int ret;
@@ -588,7 +591,8 @@ void iomap_read_folio(const struct iomap_ops *ops,
588591
if (ctx->ops->submit_read)
589592
ctx->ops->submit_read(ctx);
590593

591-
iomap_read_end(folio, bytes_submitted);
594+
if (ctx->cur_folio)
595+
iomap_read_end(ctx->cur_folio, bytes_submitted);
592596
}
593597
EXPORT_SYMBOL_GPL(iomap_read_folio);
594598

@@ -633,13 +637,14 @@ static int iomap_readahead_iter(struct iomap_iter *iter,
633637
* the filesystem to be reentered.
634638
*/
635639
void iomap_readahead(const struct iomap_ops *ops,
636-
struct iomap_read_folio_ctx *ctx)
640+
struct iomap_read_folio_ctx *ctx, void *private)
637641
{
638642
struct readahead_control *rac = ctx->rac;
639643
struct iomap_iter iter = {
640644
.inode = rac->mapping->host,
641645
.pos = readahead_pos(rac),
642646
.len = readahead_length(rac),
647+
.private = private,
643648
};
644649
size_t cur_bytes_submitted;
645650

include/linux/iomap.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,9 +345,9 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
345345
const struct iomap_ops *ops,
346346
const struct iomap_write_ops *write_ops, void *private);
347347
void iomap_read_folio(const struct iomap_ops *ops,
348-
struct iomap_read_folio_ctx *ctx);
348+
struct iomap_read_folio_ctx *ctx, void *private);
349349
void iomap_readahead(const struct iomap_ops *ops,
350-
struct iomap_read_folio_ctx *ctx);
350+
struct iomap_read_folio_ctx *ctx, void *private);
351351
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
352352
struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
353353
bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
@@ -599,7 +599,7 @@ static inline void iomap_bio_read_folio(struct folio *folio,
599599
.cur_folio = folio,
600600
};
601601

602-
iomap_read_folio(ops, &ctx);
602+
iomap_read_folio(ops, &ctx, NULL);
603603
}
604604

605605
static inline void iomap_bio_readahead(struct readahead_control *rac,
@@ -610,7 +610,7 @@ static inline void iomap_bio_readahead(struct readahead_control *rac,
610610
.rac = rac,
611611
};
612612

613-
iomap_readahead(ops, &ctx);
613+
iomap_readahead(ops, &ctx, NULL);
614614
}
615615
#endif /* CONFIG_BLOCK */
616616

0 commit comments

Comments
 (0)