Skip to content

Commit 61d325d

Browse files
committed
Merge tag 'erofs-for-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang: "In this cycle, sub-page block support for uncompressed files is available. It's mainly used to enable original signing ('golden') 4k-block images on arm64 with 16/64k pages. In addition, end users could also use this feature to build a manifest to directly refer to golden tar data. Besides, long xattr name prefix support is also introduced in this cycle to avoid too many xattrs with the same prefix (e.g. overlayfs xattrs). It's useful for erofs + overlayfs combination (like Composefs model): the image size is reduced by ~14% and runtime performance is also slightly improved. Others are random fixes and cleanups as usual. Summary: - Add sub-page block size support for uncompressed files - Support flattened block device for multi-blob images to be attached into virtual machines (including cloud servers) and bare metals - Support long xattr name prefixes to optimize images with common xattr namespaces (e.g. files with overlayfs xattrs) use cases - Various minor cleanups & fixes" * tag 'erofs-for-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: erofs: cleanup i_format-related stuffs erofs: sunset erofs_dbg() erofs: fix potential overflow calculating xattr_isize erofs: get rid of z_erofs_fill_inode() erofs: enable long extended attribute name prefixes erofs: handle long xattr name prefixes properly erofs: add helpers to load long xattr name prefixes erofs: introduce on-disk format for long xattr name prefixes erofs: move packed inode out of the compression part erofs: keep meta inode into erofs_buf erofs: initialize packed inode after root inode is assigned erofs: stop parsing non-compact HEAD index if clusterofs is invalid erofs: don't warn ztailpacking feature anymore erofs: simplify erofs_xattr_generic_get() erofs: rename init_inode_xattrs with erofs_ prefix erofs: move several xattr helpers into xattr.c erofs: tidy up EROFS on-disk naming erofs: support flattened block device for multi-blob images erofs: set block size to the on-disk block size erofs: avoid hardcoded blocksize for subpage block support
2 parents 97adb49 + 745ed7d commit 61d325d

16 files changed

Lines changed: 540 additions & 459 deletions

File tree

Documentation/filesystems/erofs.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ Here are the main features of EROFS:
4040
- Support multiple devices to refer to external blobs, which can be used
4141
for container images;
4242

43-
- 4KiB block size and 32-bit block addresses for each device, therefore
44-
16TiB address space at most for now;
43+
- 32-bit block addresses for each device, therefore 16TiB address space at
44+
most with 4KiB block size for now;
4545

4646
- Two inode layouts for different requirements:
4747

fs/erofs/data.c

Lines changed: 47 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,15 @@ void erofs_put_metabuf(struct erofs_buf *buf)
2727
buf->page = NULL;
2828
}
2929

30-
void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
31-
erofs_blk_t blkaddr, enum erofs_kmap_type type)
30+
/*
31+
* Derive the block size from inode->i_blkbits to make compatible with
32+
* anonymous inode in fscache mode.
33+
*/
34+
void *erofs_bread(struct erofs_buf *buf, erofs_blk_t blkaddr,
35+
enum erofs_kmap_type type)
3236
{
33-
struct address_space *const mapping = inode->i_mapping;
34-
erofs_off_t offset = blknr_to_addr(blkaddr);
37+
struct inode *inode = buf->inode;
38+
erofs_off_t offset = (erofs_off_t)blkaddr << inode->i_blkbits;
3539
pgoff_t index = offset >> PAGE_SHIFT;
3640
struct page *page = buf->page;
3741
struct folio *folio;
@@ -41,7 +45,7 @@ void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
4145
erofs_put_metabuf(buf);
4246

4347
nofs_flag = memalloc_nofs_save();
44-
folio = read_cache_folio(mapping, index, NULL, NULL);
48+
folio = read_cache_folio(inode->i_mapping, index, NULL, NULL);
4549
memalloc_nofs_restore(nofs_flag);
4650
if (IS_ERR(folio))
4751
return folio;
@@ -63,14 +67,19 @@ void *erofs_bread(struct erofs_buf *buf, struct inode *inode,
6367
return buf->base + (offset & ~PAGE_MASK);
6468
}
6569

66-
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
67-
erofs_blk_t blkaddr, enum erofs_kmap_type type)
70+
void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
6871
{
6972
if (erofs_is_fscache_mode(sb))
70-
return erofs_bread(buf, EROFS_SB(sb)->s_fscache->inode,
71-
blkaddr, type);
73+
buf->inode = EROFS_SB(sb)->s_fscache->inode;
74+
else
75+
buf->inode = sb->s_bdev->bd_inode;
76+
}
7277

73-
return erofs_bread(buf, sb->s_bdev->bd_inode, blkaddr, type);
78+
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
79+
erofs_blk_t blkaddr, enum erofs_kmap_type type)
80+
{
81+
erofs_init_metabuf(buf, sb);
82+
return erofs_bread(buf, blkaddr, type);
7483
}
7584

7685
static int erofs_map_blocks_flatmode(struct inode *inode,
@@ -79,33 +88,32 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
7988
erofs_blk_t nblocks, lastblk;
8089
u64 offset = map->m_la;
8190
struct erofs_inode *vi = EROFS_I(inode);
91+
struct super_block *sb = inode->i_sb;
8292
bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
8393

84-
nblocks = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
94+
nblocks = erofs_iblks(inode);
8595
lastblk = nblocks - tailendpacking;
8696

8797
/* there is no hole in flatmode */
8898
map->m_flags = EROFS_MAP_MAPPED;
89-
if (offset < blknr_to_addr(lastblk)) {
90-
map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
91-
map->m_plen = blknr_to_addr(lastblk) - offset;
99+
if (offset < erofs_pos(sb, lastblk)) {
100+
map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la;
101+
map->m_plen = erofs_pos(sb, lastblk) - offset;
92102
} else if (tailendpacking) {
93103
map->m_pa = erofs_iloc(inode) + vi->inode_isize +
94-
vi->xattr_isize + erofs_blkoff(offset);
104+
vi->xattr_isize + erofs_blkoff(sb, offset);
95105
map->m_plen = inode->i_size - offset;
96106

97107
/* inline data should be located in the same meta block */
98-
if (erofs_blkoff(map->m_pa) + map->m_plen > EROFS_BLKSIZ) {
99-
erofs_err(inode->i_sb,
100-
"inline data cross block boundary @ nid %llu",
108+
if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
109+
erofs_err(sb, "inline data cross block boundary @ nid %llu",
101110
vi->nid);
102111
DBG_BUGON(1);
103112
return -EFSCORRUPTED;
104113
}
105114
map->m_flags |= EROFS_MAP_META;
106115
} else {
107-
erofs_err(inode->i_sb,
108-
"internal error @ nid: %llu (size %llu), m_la 0x%llx",
116+
erofs_err(sb, "internal error @ nid: %llu (size %llu), m_la 0x%llx",
109117
vi->nid, inode->i_size, map->m_la);
110118
DBG_BUGON(1);
111119
return -EIO;
@@ -148,37 +156,37 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
148156
pos = ALIGN(erofs_iloc(inode) + vi->inode_isize +
149157
vi->xattr_isize, unit) + unit * chunknr;
150158

151-
kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos), EROFS_KMAP);
159+
kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(sb, pos), EROFS_KMAP);
152160
if (IS_ERR(kaddr)) {
153161
err = PTR_ERR(kaddr);
154162
goto out;
155163
}
156164
map->m_la = chunknr << vi->chunkbits;
157165
map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
158-
roundup(inode->i_size - map->m_la, EROFS_BLKSIZ));
166+
round_up(inode->i_size - map->m_la, sb->s_blocksize));
159167

160168
/* handle block map */
161169
if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
162-
__le32 *blkaddr = kaddr + erofs_blkoff(pos);
170+
__le32 *blkaddr = kaddr + erofs_blkoff(sb, pos);
163171

164172
if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
165173
map->m_flags = 0;
166174
} else {
167-
map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr));
175+
map->m_pa = erofs_pos(sb, le32_to_cpu(*blkaddr));
168176
map->m_flags = EROFS_MAP_MAPPED;
169177
}
170178
goto out_unlock;
171179
}
172180
/* parse chunk indexes */
173-
idx = kaddr + erofs_blkoff(pos);
181+
idx = kaddr + erofs_blkoff(sb, pos);
174182
switch (le32_to_cpu(idx->blkaddr)) {
175183
case EROFS_NULL_ADDR:
176184
map->m_flags = 0;
177185
break;
178186
default:
179187
map->m_deviceid = le16_to_cpu(idx->device_id) &
180188
EROFS_SB(sb)->device_id_mask;
181-
map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr));
189+
map->m_pa = erofs_pos(sb, le32_to_cpu(idx->blkaddr));
182190
map->m_flags = EROFS_MAP_MAPPED;
183191
break;
184192
}
@@ -197,7 +205,6 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
197205
struct erofs_device_info *dif;
198206
int id;
199207

200-
/* primary device by default */
201208
map->m_bdev = sb->s_bdev;
202209
map->m_daxdev = EROFS_SB(sb)->dax_dev;
203210
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
@@ -210,20 +217,25 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
210217
up_read(&devs->rwsem);
211218
return -ENODEV;
212219
}
220+
if (devs->flatdev) {
221+
map->m_pa += erofs_pos(sb, dif->mapped_blkaddr);
222+
up_read(&devs->rwsem);
223+
return 0;
224+
}
213225
map->m_bdev = dif->bdev;
214226
map->m_daxdev = dif->dax_dev;
215227
map->m_dax_part_off = dif->dax_part_off;
216228
map->m_fscache = dif->fscache;
217229
up_read(&devs->rwsem);
218-
} else if (devs->extra_devices) {
230+
} else if (devs->extra_devices && !devs->flatdev) {
219231
down_read(&devs->rwsem);
220232
idr_for_each_entry(&devs->tree, dif, id) {
221233
erofs_off_t startoff, length;
222234

223235
if (!dif->mapped_blkaddr)
224236
continue;
225-
startoff = blknr_to_addr(dif->mapped_blkaddr);
226-
length = blknr_to_addr(dif->blocks);
237+
startoff = erofs_pos(sb, dif->mapped_blkaddr);
238+
length = erofs_pos(sb, dif->blocks);
227239

228240
if (map->m_pa >= startoff &&
229241
map->m_pa < startoff + length) {
@@ -244,6 +256,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
244256
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
245257
{
246258
int ret;
259+
struct super_block *sb = inode->i_sb;
247260
struct erofs_map_blocks map;
248261
struct erofs_map_dev mdev;
249262

@@ -258,7 +271,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
258271
.m_deviceid = map.m_deviceid,
259272
.m_pa = map.m_pa,
260273
};
261-
ret = erofs_map_dev(inode->i_sb, &mdev);
274+
ret = erofs_map_dev(sb, &mdev);
262275
if (ret)
263276
return ret;
264277

@@ -284,11 +297,11 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
284297
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
285298

286299
iomap->type = IOMAP_INLINE;
287-
ptr = erofs_read_metabuf(&buf, inode->i_sb,
288-
erofs_blknr(mdev.m_pa), EROFS_KMAP);
300+
ptr = erofs_read_metabuf(&buf, sb,
301+
erofs_blknr(sb, mdev.m_pa), EROFS_KMAP);
289302
if (IS_ERR(ptr))
290303
return PTR_ERR(ptr);
291-
iomap->inline_data = ptr + erofs_blkoff(mdev.m_pa);
304+
iomap->inline_data = ptr + erofs_blkoff(sb, mdev.m_pa);
292305
iomap->private = buf.base;
293306
} else {
294307
iomap->type = IOMAP_MAPPED;

fs/erofs/decompressor.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ int z_erofs_load_lz4_config(struct super_block *sb,
4242
if (!sbi->lz4.max_pclusterblks) {
4343
sbi->lz4.max_pclusterblks = 1; /* reserved case */
4444
} else if (sbi->lz4.max_pclusterblks >
45-
Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
45+
erofs_blknr(sb, Z_EROFS_PCLUSTER_MAX_SIZE)) {
4646
erofs_err(sb, "too large lz4 pclusterblks %u",
4747
sbi->lz4.max_pclusterblks);
4848
return -EINVAL;
@@ -221,13 +221,13 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
221221
support_0padding = true;
222222
ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
223223
min_t(unsigned int, rq->inputsize,
224-
EROFS_BLKSIZ - rq->pageofs_in));
224+
rq->sb->s_blocksize - rq->pageofs_in));
225225
if (ret) {
226226
kunmap_atomic(headpage);
227227
return ret;
228228
}
229229
may_inplace = !((rq->pageofs_in + rq->inputsize) &
230-
(EROFS_BLKSIZ - 1));
230+
(rq->sb->s_blocksize - 1));
231231
}
232232

233233
inputmargin = rq->pageofs_in;

fs/erofs/decompressor_lzma.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,8 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
166166
/* 1. get the exact LZMA compressed size */
167167
kin = kmap(*rq->in);
168168
err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
169-
min_t(unsigned int, rq->inputsize,
170-
EROFS_BLKSIZ - rq->pageofs_in));
169+
min_t(unsigned int, rq->inputsize,
170+
rq->sb->s_blocksize - rq->pageofs_in));
171171
if (err) {
172172
kunmap(*rq->in);
173173
return err;

fs/erofs/dir.c

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -50,44 +50,43 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
5050
{
5151
struct inode *dir = file_inode(f);
5252
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
53+
struct super_block *sb = dir->i_sb;
54+
unsigned long bsz = sb->s_blocksize;
5355
const size_t dirsize = i_size_read(dir);
54-
unsigned int i = ctx->pos / EROFS_BLKSIZ;
55-
unsigned int ofs = ctx->pos % EROFS_BLKSIZ;
56+
unsigned int i = erofs_blknr(sb, ctx->pos);
57+
unsigned int ofs = erofs_blkoff(sb, ctx->pos);
5658
int err = 0;
5759
bool initial = true;
5860

61+
buf.inode = dir;
5962
while (ctx->pos < dirsize) {
6063
struct erofs_dirent *de;
6164
unsigned int nameoff, maxsize;
6265

63-
de = erofs_bread(&buf, dir, i, EROFS_KMAP);
66+
de = erofs_bread(&buf, i, EROFS_KMAP);
6467
if (IS_ERR(de)) {
65-
erofs_err(dir->i_sb,
66-
"fail to readdir of logical block %u of nid %llu",
68+
erofs_err(sb, "fail to readdir of logical block %u of nid %llu",
6769
i, EROFS_I(dir)->nid);
6870
err = PTR_ERR(de);
6971
break;
7072
}
7173

7274
nameoff = le16_to_cpu(de->nameoff);
73-
if (nameoff < sizeof(struct erofs_dirent) ||
74-
nameoff >= EROFS_BLKSIZ) {
75-
erofs_err(dir->i_sb,
76-
"invalid de[0].nameoff %u @ nid %llu",
75+
if (nameoff < sizeof(struct erofs_dirent) || nameoff >= bsz) {
76+
erofs_err(sb, "invalid de[0].nameoff %u @ nid %llu",
7777
nameoff, EROFS_I(dir)->nid);
7878
err = -EFSCORRUPTED;
7979
break;
8080
}
8181

82-
maxsize = min_t(unsigned int,
83-
dirsize - ctx->pos + ofs, EROFS_BLKSIZ);
82+
maxsize = min_t(unsigned int, dirsize - ctx->pos + ofs, bsz);
8483

8584
/* search dirents at the arbitrary position */
8685
if (initial) {
8786
initial = false;
8887

8988
ofs = roundup(ofs, sizeof(struct erofs_dirent));
90-
ctx->pos = blknr_to_addr(i) + ofs;
89+
ctx->pos = erofs_pos(sb, i) + ofs;
9190
if (ofs >= nameoff)
9291
goto skip_this;
9392
}
@@ -97,7 +96,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
9796
if (err)
9897
break;
9998
skip_this:
100-
ctx->pos = blknr_to_addr(i) + maxsize;
99+
ctx->pos = erofs_pos(sb, i) + maxsize;
101100
++i;
102101
ofs = 0;
103102
}

0 commit comments

Comments
 (0)