Skip to content

Commit 76a9701

Browse files
committed
Merge tag 'erofs-for-6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang: "We now support metadata compression. It can be useful for embedded use cases or archiving a large number of small files. Additionally, readdir performance has been improved by enabling readahead (note that it was already common practice for ext3/4 non-dx and f2fs directories). We may consider further improvements later to align with ext4's s_inode_readahead_blks behavior for slow devices too. The remaining commits are minor. Summary: - Add support for metadata compression - Enable readahead for directories to improve readdir performance - Minor fixes and cleanups" * tag 'erofs-for-6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: erofs: support to readahead dirent blocks in erofs_readdir() erofs: implement metadata compression erofs: add on-disk definition for metadata compression erofs: fix build error with CONFIG_EROFS_FS_ZIP_ACCEL=y erofs: remove ENOATTR definition erofs: refine erofs_iomap_begin() erofs: unify meta buffers in z_erofs_fill_inode() erofs: remove need_kmap in erofs_read_metabuf() erofs: do sanity check on m->type in z_erofs_load_compact_lcluster() erofs: get rid of {get,put}_page() for ztailpacking data
2 parents a11b4fa + df0ce6c commit 76a9701

16 files changed

Lines changed: 255 additions & 167 deletions

File tree

Documentation/ABI/testing/sysfs-fs-erofs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Description: Shows all enabled kernel features.
55
Supported features:
66
zero_padding, compr_cfgs, big_pcluster, chunked_file,
77
device_table, compr_head2, sb_chksum, ztailpacking,
8-
dedupe, fragments.
8+
dedupe, fragments, 48bit, metabox.
99

1010
What: /sys/fs/erofs/<disk>/sync_decompress
1111
Date: November 2021
@@ -35,3 +35,11 @@ Description: Used to set or show hardware accelerators in effect
3535
and multiple accelerators are separated by '\n'.
3636
Supported accelerator(s): qat_deflate.
3737
Disable all accelerators with an empty string (echo > accel).
38+
39+
What: /sys/fs/erofs/<disk>/dir_ra_bytes
40+
Date: July 2025
41+
Contact: "Chao Yu" <chao@kernel.org>
42+
Description: Used to set or show readahead bytes during readdir(), by
43+
default the value is 16384.
44+
45+
- 0: disable readahead.

fs/erofs/Kconfig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ config EROFS_FS_ZIP_ZSTD
147147
config EROFS_FS_ZIP_ACCEL
148148
bool "EROFS hardware decompression support"
149149
depends on EROFS_FS_ZIP
150+
select CRYPTO
151+
select CRYPTO_DEFLATE
150152
help
151153
Saying Y here includes hardware accelerator support for reading
152154
EROFS file systems containing compressed data. It gives better

fs/erofs/data.c

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,18 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap)
4949
return buf->base + (offset & ~PAGE_MASK);
5050
}
5151

52-
void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
52+
int erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb,
53+
bool in_metabox)
5354
{
5455
struct erofs_sb_info *sbi = EROFS_SB(sb);
5556

5657
buf->file = NULL;
58+
if (in_metabox) {
59+
if (unlikely(!sbi->metabox_inode))
60+
return -EFSCORRUPTED;
61+
buf->mapping = sbi->metabox_inode->i_mapping;
62+
return 0;
63+
}
5764
buf->off = sbi->dif0.fsoff;
5865
if (erofs_is_fileio_mode(sbi)) {
5966
buf->file = sbi->dif0.file; /* some fs like FUSE needs it */
@@ -62,13 +69,18 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
6269
buf->mapping = sbi->dif0.fscache->inode->i_mapping;
6370
else
6471
buf->mapping = sb->s_bdev->bd_mapping;
72+
return 0;
6573
}
6674

6775
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
68-
erofs_off_t offset, bool need_kmap)
76+
erofs_off_t offset, bool in_metabox)
6977
{
70-
erofs_init_metabuf(buf, sb);
71-
return erofs_bread(buf, offset, need_kmap);
78+
int err;
79+
80+
err = erofs_init_metabuf(buf, sb, in_metabox);
81+
if (err)
82+
return ERR_PTR(err);
83+
return erofs_bread(buf, offset, true);
7284
}
7385

7486
int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
@@ -118,7 +130,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
118130
pos = ALIGN(erofs_iloc(inode) + vi->inode_isize +
119131
vi->xattr_isize, unit) + unit * chunknr;
120132

121-
idx = erofs_read_metabuf(&buf, sb, pos, true);
133+
idx = erofs_read_metabuf(&buf, sb, pos, erofs_inode_in_metabox(inode));
122134
if (IS_ERR(idx)) {
123135
err = PTR_ERR(idx);
124136
goto out;
@@ -264,51 +276,51 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
264276

265277
map.m_la = offset;
266278
map.m_llen = length;
267-
268279
ret = erofs_map_blocks(inode, &map);
269280
if (ret < 0)
270281
return ret;
271282

272-
mdev = (struct erofs_map_dev) {
273-
.m_deviceid = map.m_deviceid,
274-
.m_pa = map.m_pa,
275-
};
276-
ret = erofs_map_dev(sb, &mdev);
277-
if (ret)
278-
return ret;
279-
280283
iomap->offset = map.m_la;
281-
if (flags & IOMAP_DAX)
282-
iomap->dax_dev = mdev.m_dif->dax_dev;
283-
else
284-
iomap->bdev = mdev.m_bdev;
285284
iomap->length = map.m_llen;
286285
iomap->flags = 0;
287286
iomap->private = NULL;
288-
287+
iomap->addr = IOMAP_NULL_ADDR;
289288
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
290289
iomap->type = IOMAP_HOLE;
291-
iomap->addr = IOMAP_NULL_ADDR;
292-
if (!iomap->length)
293-
iomap->length = length;
294290
return 0;
295291
}
296292

293+
if (!(map.m_flags & EROFS_MAP_META) || !erofs_inode_in_metabox(inode)) {
294+
mdev = (struct erofs_map_dev) {
295+
.m_deviceid = map.m_deviceid,
296+
.m_pa = map.m_pa,
297+
};
298+
ret = erofs_map_dev(sb, &mdev);
299+
if (ret)
300+
return ret;
301+
302+
if (flags & IOMAP_DAX)
303+
iomap->dax_dev = mdev.m_dif->dax_dev;
304+
else
305+
iomap->bdev = mdev.m_bdev;
306+
iomap->addr = mdev.m_dif->fsoff + mdev.m_pa;
307+
if (flags & IOMAP_DAX)
308+
iomap->addr += mdev.m_dif->dax_part_off;
309+
}
310+
297311
if (map.m_flags & EROFS_MAP_META) {
298312
void *ptr;
299313
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
300314

301315
iomap->type = IOMAP_INLINE;
302-
ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, true);
316+
ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
317+
erofs_inode_in_metabox(inode));
303318
if (IS_ERR(ptr))
304319
return PTR_ERR(ptr);
305320
iomap->inline_data = ptr;
306321
iomap->private = buf.base;
307322
} else {
308323
iomap->type = IOMAP_MAPPED;
309-
iomap->addr = mdev.m_dif->fsoff + mdev.m_pa;
310-
if (flags & IOMAP_DAX)
311-
iomap->addr += mdev.m_dif->dax_part_off;
312324
}
313325
return 0;
314326
}

fs/erofs/decompressor.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb)
467467
return -EOPNOTSUPP;
468468
}
469469

470-
erofs_init_metabuf(&buf, sb);
470+
(void)erofs_init_metabuf(&buf, sb, false);
471471
offset = EROFS_SUPER_OFFSET + sbi->sb_size;
472472
alg = 0;
473473
for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) {

fs/erofs/dir.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
3434
}
3535

3636
if (!dir_emit(ctx, de_name, de_namelen,
37-
le64_to_cpu(de->nid), d_type))
37+
erofs_nid_to_ino64(EROFS_SB(dir->i_sb),
38+
le64_to_cpu(de->nid)), d_type))
3839
return 1;
3940
++de;
4041
ctx->pos += sizeof(struct erofs_dirent);
@@ -47,8 +48,12 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
4748
struct inode *dir = file_inode(f);
4849
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
4950
struct super_block *sb = dir->i_sb;
51+
struct file_ra_state *ra = &f->f_ra;
5052
unsigned long bsz = sb->s_blocksize;
5153
unsigned int ofs = erofs_blkoff(sb, ctx->pos);
54+
pgoff_t ra_pages = DIV_ROUND_UP_POW2(
55+
EROFS_I_SB(dir)->dir_ra_bytes, PAGE_SIZE);
56+
pgoff_t nr_pages = DIV_ROUND_UP_POW2(dir->i_size, PAGE_SIZE);
5257
int err = 0;
5358
bool initial = true;
5459

@@ -63,6 +68,16 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
6368
break;
6469
}
6570

71+
/* readahead blocks to enhance performance for large directories */
72+
if (ra_pages) {
73+
pgoff_t idx = DIV_ROUND_UP_POW2(ctx->pos, PAGE_SIZE);
74+
pgoff_t pages = min(nr_pages - idx, ra_pages);
75+
76+
if (pages > 1 && !ra_has_index(ra, idx))
77+
page_cache_sync_readahead(dir->i_mapping, ra,
78+
f, idx, pages);
79+
}
80+
6681
de = erofs_bread(&buf, dbstart, true);
6782
if (IS_ERR(de)) {
6883
erofs_err(sb, "failed to readdir of logical block %llu of nid %llu",

fs/erofs/erofs_fs.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
1616
#define EROFS_FEATURE_COMPAT_MTIME 0x00000002
1717
#define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004
18+
#define EROFS_FEATURE_COMPAT_SHARED_EA_IN_METABOX 0x00000008
1819

1920
/*
2021
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
@@ -31,8 +32,9 @@
3132
#define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020
3233
#define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES 0x00000040
3334
#define EROFS_FEATURE_INCOMPAT_48BIT 0x00000080
35+
#define EROFS_FEATURE_INCOMPAT_METABOX 0x00000100
3436
#define EROFS_ALL_FEATURE_INCOMPAT \
35-
((EROFS_FEATURE_INCOMPAT_48BIT << 1) - 1)
37+
((EROFS_FEATURE_INCOMPAT_METABOX << 1) - 1)
3638

3739
#define EROFS_SB_EXTSLOT_SIZE 16
3840

@@ -46,7 +48,7 @@ struct erofs_deviceslot {
4648
};
4749
#define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot)
4850

49-
/* erofs on-disk super block (currently 128 bytes) */
51+
/* erofs on-disk super block (currently 144 bytes at maximum) */
5052
struct erofs_super_block {
5153
__le32 magic; /* file system magic number */
5254
__le32 checksum; /* crc32c to avoid unexpected on-disk overlap */
@@ -82,7 +84,9 @@ struct erofs_super_block {
8284
__u8 reserved[3];
8385
__le32 build_time; /* seconds added to epoch for mkfs time */
8486
__le64 rootnid_8b; /* (48BIT on) nid of root directory */
85-
__u8 reserved2[8];
87+
__le64 reserved2;
88+
__le64 metabox_nid; /* (METABOX on) nid of the metabox inode */
89+
__le64 reserved3; /* [align to extslot 1] */
8690
};
8791

8892
/*
@@ -267,6 +271,9 @@ struct erofs_inode_chunk_index {
267271
__le32 startblk_lo; /* starting block number of this chunk */
268272
};
269273

274+
#define EROFS_DIRENT_NID_METABOX_BIT 63
275+
#define EROFS_DIRENT_NID_MASK (BIT_ULL(EROFS_DIRENT_NID_METABOX_BIT) - 1)
276+
270277
/* dirent sorts in alphabet order, thus we can do binary search */
271278
struct erofs_dirent {
272279
__le64 nid; /* node number */
@@ -434,7 +441,7 @@ static inline void erofs_check_ondisk_layout_definitions(void)
434441
.h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT
435442
};
436443

437-
BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
444+
BUILD_BUG_ON(sizeof(struct erofs_super_block) != 144);
438445
BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32);
439446
BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
440447
BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);

fs/erofs/fileio.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
115115
void *src;
116116

117117
src = erofs_read_metabuf(&buf, inode->i_sb,
118-
map->m_pa + ofs, true);
118+
map->m_pa + ofs, erofs_inode_in_metabox(inode));
119119
if (IS_ERR(src)) {
120120
err = PTR_ERR(src);
121121
break;

fs/erofs/fscache.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,8 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
274274
size_t size = map.m_llen;
275275
void *src;
276276

277-
src = erofs_read_metabuf(&buf, sb, map.m_pa, true);
277+
src = erofs_read_metabuf(&buf, sb, map.m_pa,
278+
erofs_inode_in_metabox(inode));
278279
if (IS_ERR(src))
279280
return PTR_ERR(src);
280281

fs/erofs/inode.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ static int erofs_read_inode(struct inode *inode)
2929
struct super_block *sb = inode->i_sb;
3030
erofs_blk_t blkaddr = erofs_blknr(sb, erofs_iloc(inode));
3131
unsigned int ofs = erofs_blkoff(sb, erofs_iloc(inode));
32+
bool in_mbox = erofs_inode_in_metabox(inode);
3233
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
3334
struct erofs_sb_info *sbi = EROFS_SB(sb);
3435
erofs_blk_t addrmask = BIT_ULL(48) - 1;
@@ -39,10 +40,10 @@ static int erofs_read_inode(struct inode *inode)
3940
void *ptr;
4041
int err = 0;
4142

42-
ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), true);
43+
ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), in_mbox);
4344
if (IS_ERR(ptr)) {
4445
err = PTR_ERR(ptr);
45-
erofs_err(sb, "failed to get inode (nid: %llu) page, err %d",
46+
erofs_err(sb, "failed to read inode meta block (nid: %llu): %d",
4647
vi->nid, err);
4748
goto err_out;
4849
}
@@ -78,10 +79,10 @@ static int erofs_read_inode(struct inode *inode)
7879

7980
memcpy(&copied, dic, gotten);
8081
ptr = erofs_read_metabuf(&buf, sb,
81-
erofs_pos(sb, blkaddr + 1), true);
82+
erofs_pos(sb, blkaddr + 1), in_mbox);
8283
if (IS_ERR(ptr)) {
8384
err = PTR_ERR(ptr);
84-
erofs_err(sb, "failed to get inode payload block (nid: %llu), err %d",
85+
erofs_err(sb, "failed to read inode payload block (nid: %llu): %d",
8586
vi->nid, err);
8687
goto err_out;
8788
}
@@ -264,13 +265,13 @@ static int erofs_fill_inode(struct inode *inode)
264265
* ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
265266
* so that it will fit.
266267
*/
267-
static ino_t erofs_squash_ino(erofs_nid_t nid)
268+
static ino_t erofs_squash_ino(struct super_block *sb, erofs_nid_t nid)
268269
{
269-
ino_t ino = (ino_t)nid;
270+
u64 ino64 = erofs_nid_to_ino64(EROFS_SB(sb), nid);
270271

271272
if (sizeof(ino_t) < sizeof(erofs_nid_t))
272-
ino ^= nid >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8;
273-
return ino;
273+
ino64 ^= ino64 >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8;
274+
return (ino_t)ino64;
274275
}
275276

276277
static int erofs_iget5_eq(struct inode *inode, void *opaque)
@@ -282,7 +283,7 @@ static int erofs_iget5_set(struct inode *inode, void *opaque)
282283
{
283284
const erofs_nid_t nid = *(erofs_nid_t *)opaque;
284285

285-
inode->i_ino = erofs_squash_ino(nid);
286+
inode->i_ino = erofs_squash_ino(inode->i_sb, nid);
286287
EROFS_I(inode)->nid = nid;
287288
return 0;
288289
}
@@ -291,7 +292,7 @@ struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid)
291292
{
292293
struct inode *inode;
293294

294-
inode = iget5_locked(sb, erofs_squash_ino(nid), erofs_iget5_eq,
295+
inode = iget5_locked(sb, erofs_squash_ino(sb, nid), erofs_iget5_eq,
295296
erofs_iget5_set, &nid);
296297
if (!inode)
297298
return ERR_PTR(-ENOMEM);

0 commit comments

Comments
 (0)