Skip to content

Commit 58fd191

Browse files
LiBaokun96tytso
authored andcommitted
ext4: make data=journal support large block size
Currently, ext4_set_inode_mapping_order() does not set max folio order for files with the data journalling flag. For files that already have large folios enabled, ext4_inode_journal_mode() ignores the data journalling flag once max folio order is set. This is not because data journalling cannot work with large folios, but because credit estimates will go through the roof if there are too many blocks per folio. Since the real constraint is blocks-per-folio, to support data=journal under LBS, we now set max folio order to be equal to min folio order for files with the journalling flag. When LBS is disabled, the max folio order remains unset as before. Therefore, before ext4_change_inode_journal_flag() switches the journalling mode, we call truncate_pagecache() to drop all page cache for that inode, and filemap_write_and_wait() is called unconditionally. After that, once the journalling mode has been switched, we can safely reset the inode mapping order, and the mapping_large_folio_support() check in ext4_inode_journal_mode() can be removed. Suggested-by: Jan Kara <jack@suse.cz> Suggested-by: Dan Carpenter <dan.carpenter@linaro.org> Signed-off-by: Baokun Li <libaokun1@huawei.com> Reviewed-by: Zhang Yi <yi.zhang@huawei.com> Reviewed-by: Jan Kara <jack@suse.cz> Reviewed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> Message-ID: <20251121090654.631996-22-libaokun@huaweicloud.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
1 parent c00a629 commit 58fd191

2 files changed

Lines changed: 20 additions & 16 deletions

File tree

fs/ext4/ext4_jbd2.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ int ext4_inode_journal_mode(struct inode *inode)
1616
ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) ||
1717
test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
1818
(ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
19-
!test_opt(inode->i_sb, DELALLOC) &&
20-
!mapping_large_folio_support(inode->i_mapping))) {
19+
!test_opt(inode->i_sb, DELALLOC))) {
2120
/* We do not support data journalling for encrypted data */
2221
if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode))
2322
return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */

fs/ext4/inode.c

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5154,9 +5154,6 @@ static bool ext4_should_enable_large_folio(struct inode *inode)
51545154

51555155
if (!S_ISREG(inode->i_mode))
51565156
return false;
5157-
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
5158-
ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
5159-
return false;
51605157
if (ext4_has_feature_verity(sb))
51615158
return false;
51625159
if (ext4_has_feature_encrypt(sb))
@@ -5174,12 +5171,20 @@ static bool ext4_should_enable_large_folio(struct inode *inode)
51745171
umin(MAX_PAGECACHE_ORDER, (11 + (i)->i_blkbits - PAGE_SHIFT))
51755172
void ext4_set_inode_mapping_order(struct inode *inode)
51765173
{
5174+
u32 max_order;
5175+
51775176
if (!ext4_should_enable_large_folio(inode))
51785177
return;
51795178

5179+
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
5180+
ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
5181+
max_order = EXT4_SB(inode->i_sb)->s_min_folio_order;
5182+
else
5183+
max_order = EXT4_MAX_PAGECACHE_ORDER(inode);
5184+
51805185
mapping_set_folio_order_range(inode->i_mapping,
51815186
EXT4_SB(inode->i_sb)->s_min_folio_order,
5182-
EXT4_MAX_PAGECACHE_ORDER(inode));
5187+
max_order);
51835188
}
51845189

51855190
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
@@ -6554,14 +6559,14 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
65546559
* dirty data which can be converted only after flushing the dirty
65556560
* data (and journalled aops don't know how to handle these cases).
65566561
*/
6557-
if (val) {
6558-
filemap_invalidate_lock(inode->i_mapping);
6559-
err = filemap_write_and_wait(inode->i_mapping);
6560-
if (err < 0) {
6561-
filemap_invalidate_unlock(inode->i_mapping);
6562-
return err;
6563-
}
6562+
filemap_invalidate_lock(inode->i_mapping);
6563+
err = filemap_write_and_wait(inode->i_mapping);
6564+
if (err < 0) {
6565+
filemap_invalidate_unlock(inode->i_mapping);
6566+
return err;
65646567
}
6568+
/* Before switch the inode journalling mode evict all the page cache. */
6569+
truncate_pagecache(inode, 0);
65656570

65666571
alloc_ctx = ext4_writepages_down_write(inode->i_sb);
65676572
jbd2_journal_lock_updates(journal);
@@ -6581,17 +6586,17 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
65816586
if (err < 0) {
65826587
jbd2_journal_unlock_updates(journal);
65836588
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
6589+
filemap_invalidate_unlock(inode->i_mapping);
65846590
return err;
65856591
}
65866592
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
65876593
}
65886594
ext4_set_aops(inode);
6595+
ext4_set_inode_mapping_order(inode);
65896596

65906597
jbd2_journal_unlock_updates(journal);
65916598
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
6592-
6593-
if (val)
6594-
filemap_invalidate_unlock(inode->i_mapping);
6599+
filemap_invalidate_unlock(inode->i_mapping);
65956600

65966601
/* Finally we can mark the inode as dirty. */
65976602

0 commit comments

Comments
 (0)