Skip to content

Commit 74e797d

Browse files
committed
mm: Provide a means of invalidation without using launder_folio
Implement a replacement for launder_folio. The key feature of invalidate_inode_pages2() is that it locks each folio individually, unmaps it to prevent mmap'd accesses interfering and calls the ->launder_folio() address_space op to flush it. This has problems: firstly, each folio is written individually as one or more small writes; secondly, adjacent folios cannot be added so easily into the laundry; thirdly, it's yet another op to implement. Instead, use the invalidate lock to cause anyone wanting to add a folio to the inode to wait, then unmap all the folios if we have mmaps, then, conditionally, use ->writepages() to flush any dirty data back and then discard all pages. The invalidate lock prevents ->read_iter(), ->write_iter() and faulting through mmap all from adding pages for the duration. This is then used from netfslib to handle the flusing in unbuffered and direct writes. Signed-off-by: David Howells <dhowells@redhat.com> cc: Matthew Wilcox <willy@infradead.org> cc: Miklos Szeredi <miklos@szeredi.hu> cc: Trond Myklebust <trond.myklebust@hammerspace.com> cc: Christoph Hellwig <hch@lst.de> cc: Andrew Morton <akpm@linux-foundation.org> cc: Alexander Viro <viro@zeniv.linux.org.uk> cc: Christian Brauner <brauner@kernel.org> cc: Jeff Layton <jlayton@kernel.org> cc: linux-mm@kvack.org cc: linux-fsdevel@vger.kernel.org cc: netfs@lists.linux.dev cc: v9fs@lists.linux.dev cc: linux-afs@lists.infradead.org cc: ceph-devel@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: linux-nfs@vger.kernel.org cc: devel@lists.orangefs.org
1 parent 120b878 commit 74e797d

3 files changed

Lines changed: 80 additions & 4 deletions

File tree

fs/netfs/direct_write.c

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,14 @@ static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov
132132
ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
133133
{
134134
struct file *file = iocb->ki_filp;
135-
struct inode *inode = file->f_mapping->host;
135+
struct address_space *mapping = file->f_mapping;
136+
struct inode *inode = mapping->host;
136137
struct netfs_inode *ictx = netfs_inode(inode);
137-
unsigned long long end;
138138
ssize_t ret;
139+
loff_t pos = iocb->ki_pos;
140+
unsigned long long end = pos + iov_iter_count(from) - 1;
139141

140-
_enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
142+
_enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
141143

142144
if (!iov_iter_count(from))
143145
return 0;
@@ -157,7 +159,25 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
157159
ret = file_update_time(file);
158160
if (ret < 0)
159161
goto out;
160-
ret = kiocb_invalidate_pages(iocb, iov_iter_count(from));
162+
if (iocb->ki_flags & IOCB_NOWAIT) {
163+
/* We could block if there are any pages in the range. */
164+
ret = -EAGAIN;
165+
if (filemap_range_has_page(mapping, pos, end))
166+
if (filemap_invalidate_inode(inode, true, pos, end))
167+
goto out;
168+
} else {
169+
ret = filemap_write_and_wait_range(mapping, pos, end);
170+
if (ret < 0)
171+
goto out;
172+
}
173+
174+
/*
175+
* After a write we want buffered reads to be sure to go to disk to get
176+
* the new data. We invalidate clean cached page from the region we're
177+
* about to write. We do this *before* the write so that we can return
178+
* without clobbering -EIOCBQUEUED from ->direct_IO().
179+
*/
180+
ret = filemap_invalidate_inode(inode, true, pos, end);
161181
if (ret < 0)
162182
goto out;
163183
end = iocb->ki_pos + iov_iter_count(from);

include/linux/pagemap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ int filemap_fdatawait_keep_errors(struct address_space *mapping);
4040
int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
4141
int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
4242
loff_t start_byte, loff_t end_byte);
43+
int filemap_invalidate_inode(struct inode *inode, bool flush,
44+
loff_t start, loff_t end);
4345

4446
static inline int filemap_fdatawait(struct address_space *mapping)
4547
{

mm/filemap.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4134,6 +4134,60 @@ bool filemap_release_folio(struct folio *folio, gfp_t gfp)
41344134
}
41354135
EXPORT_SYMBOL(filemap_release_folio);
41364136

4137+
/**
4138+
* filemap_invalidate_inode - Invalidate/forcibly write back a range of an inode's pagecache
4139+
* @inode: The inode to flush
4140+
* @flush: Set to write back rather than simply invalidate.
4141+
* @start: First byte to in range.
4142+
* @end: Last byte in range (inclusive), or LLONG_MAX for everything from start
4143+
* onwards.
4144+
*
4145+
* Invalidate all the folios on an inode that contribute to the specified
4146+
* range, possibly writing them back first. Whilst the operation is
4147+
* undertaken, the invalidate lock is held to prevent new folios from being
4148+
* installed.
4149+
*/
4150+
int filemap_invalidate_inode(struct inode *inode, bool flush,
4151+
loff_t start, loff_t end)
4152+
{
4153+
struct address_space *mapping = inode->i_mapping;
4154+
pgoff_t first = start >> PAGE_SHIFT;
4155+
pgoff_t last = end >> PAGE_SHIFT;
4156+
pgoff_t nr = end == LLONG_MAX ? ULONG_MAX : last - first + 1;
4157+
4158+
if (!mapping || !mapping->nrpages || end < start)
4159+
goto out;
4160+
4161+
/* Prevent new folios from being added to the inode. */
4162+
filemap_invalidate_lock(mapping);
4163+
4164+
if (!mapping->nrpages)
4165+
goto unlock;
4166+
4167+
unmap_mapping_pages(mapping, first, nr, false);
4168+
4169+
/* Write back the data if we're asked to. */
4170+
if (flush) {
4171+
struct writeback_control wbc = {
4172+
.sync_mode = WB_SYNC_ALL,
4173+
.nr_to_write = LONG_MAX,
4174+
.range_start = start,
4175+
.range_end = end,
4176+
};
4177+
4178+
filemap_fdatawrite_wbc(mapping, &wbc);
4179+
}
4180+
4181+
/* Wait for writeback to complete on all folios and discard. */
4182+
truncate_inode_pages_range(mapping, start, end);
4183+
4184+
unlock:
4185+
filemap_invalidate_unlock(mapping);
4186+
out:
4187+
return filemap_check_errors(mapping);
4188+
}
4189+
EXPORT_SYMBOL_GPL(filemap_invalidate_inode);
4190+
41374191
#ifdef CONFIG_CACHESTAT_SYSCALL
41384192
/**
41394193
* filemap_cachestat() - compute the page cache statistics of a mapping

0 commit comments

Comments
 (0)