Skip to content

Commit 34096ba

Browse files
Hongbo Lihsiangkao
authored andcommitted
erofs: support unencoded inodes for page cache share
This patch adds inode page cache sharing functionality for unencoded files. I conducted experiments in the container environment. Below is the memory usage for reading all files in two different minor versions of container images: +-------------------+------------------+-------------+---------------+ | Image | Page Cache Share | Memory (MB) | Memory | | | | | Reduction (%) | +-------------------+------------------+-------------+---------------+ | | No | 241 | - | | redis +------------------+-------------+---------------+ | 7.2.4 & 7.2.5 | Yes | 163 | 33% | +-------------------+------------------+-------------+---------------+ | | No | 872 | - | | postgres +------------------+-------------+---------------+ | 16.1 & 16.2 | Yes | 630 | 28% | +-------------------+------------------+-------------+---------------+ | | No | 2771 | - | | tensorflow +------------------+-------------+---------------+ | 2.11.0 & 2.11.1 | Yes | 2340 | 16% | +-------------------+------------------+-------------+---------------+ | | No | 926 | - | | mysql +------------------+-------------+---------------+ | 8.0.11 & 8.0.12 | Yes | 735 | 21% | +-------------------+------------------+-------------+---------------+ | | No | 390 | - | | nginx +------------------+-------------+---------------+ | 7.2.4 & 7.2.5 | Yes | 219 | 44% | +-------------------+------------------+-------------+---------------+ | tomcat | No | 924 | - | | 10.1.25 & 10.1.26 +------------------+-------------+---------------+ | | Yes | 474 | 49% | +-------------------+------------------+-------------+---------------+ Additionally, the table below shows the runtime memory usage of the container: +-------------------+------------------+-------------+---------------+ | Image | Page Cache Share | Memory (MB) | Memory | | | | | Reduction (%) | +-------------------+------------------+-------------+---------------+ | | No | 35 | - | | redis +------------------+-------------+---------------+ | 7.2.4 & 7.2.5 | Yes | 28 | 20% | +-------------------+------------------+-------------+---------------+ | | No | 149 | - | | postgres +------------------+-------------+---------------+ | 16.1 & 16.2 | Yes | 95 | 37% | +-------------------+------------------+-------------+---------------+ | | No | 1028 | - | | tensorflow +------------------+-------------+---------------+ | 2.11.0 & 2.11.1 | Yes | 930 | 10% | +-------------------+------------------+-------------+---------------+ | | No | 155 | - | | mysql +------------------+-------------+---------------+ | 8.0.11 & 8.0.12 | Yes | 132 | 15% | +-------------------+------------------+-------------+---------------+ | | No | 25 | - | | nginx +------------------+-------------+---------------+ | 7.2.4 & 7.2.5 | Yes | 20 | 20% | +-------------------+------------------+-------------+---------------+ | tomcat | No | 186 | - | | 10.1.25 & 10.1.26 +------------------+-------------+---------------+ | | Yes | 98 | 48% | +-------------------+------------------+-------------+---------------+ Co-developed-by: Hongzhen Luo <hongzhen@linux.alibaba.com> Signed-off-by: Hongzhen Luo <hongzhen@linux.alibaba.com> Signed-off-by: Hongbo Li <lihongbo22@huawei.com> Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
1 parent 69368d2 commit 34096ba

5 files changed

Lines changed: 81 additions & 19 deletions

File tree

fs/erofs/data.c

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -269,21 +269,23 @@ void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty)
269269
struct erofs_iomap_iter_ctx {
270270
struct page *page;
271271
void *base;
272+
struct inode *realinode;
272273
};
273274

274275
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
275276
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
276277
{
277278
struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
278279
struct erofs_iomap_iter_ctx *ctx = iter->private;
279-
struct super_block *sb = inode->i_sb;
280+
struct inode *realinode = ctx ? ctx->realinode : inode;
281+
struct super_block *sb = realinode->i_sb;
280282
struct erofs_map_blocks map;
281283
struct erofs_map_dev mdev;
282284
int ret;
283285

284286
map.m_la = offset;
285287
map.m_llen = length;
286-
ret = erofs_map_blocks(inode, &map);
288+
ret = erofs_map_blocks(realinode, &map);
287289
if (ret < 0)
288290
return ret;
289291

@@ -296,7 +298,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
296298
return 0;
297299
}
298300

299-
if (!(map.m_flags & EROFS_MAP_META) || !erofs_inode_in_metabox(inode)) {
301+
if (!(map.m_flags & EROFS_MAP_META) || !erofs_inode_in_metabox(realinode)) {
300302
mdev = (struct erofs_map_dev) {
301303
.m_deviceid = map.m_deviceid,
302304
.m_pa = map.m_pa,
@@ -322,7 +324,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
322324
void *ptr;
323325

324326
ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
325-
erofs_inode_in_metabox(inode));
327+
erofs_inode_in_metabox(realinode));
326328
if (IS_ERR(ptr))
327329
return PTR_ERR(ptr);
328330
iomap->inline_data = ptr;
@@ -383,10 +385,15 @@ static int erofs_read_folio(struct file *file, struct folio *folio)
383385
.ops = &iomap_bio_read_ops,
384386
.cur_folio = folio,
385387
};
386-
struct erofs_iomap_iter_ctx iter_ctx = {};
388+
bool need_iput;
389+
struct erofs_iomap_iter_ctx iter_ctx = {
390+
.realinode = erofs_real_inode(folio_inode(folio), &need_iput),
391+
};
387392

388-
trace_erofs_read_folio(folio_inode(folio), folio, true);
393+
trace_erofs_read_folio(iter_ctx.realinode, folio, true);
389394
iomap_read_folio(&erofs_iomap_ops, &read_ctx, &iter_ctx);
395+
if (need_iput)
396+
iput(iter_ctx.realinode);
390397
return 0;
391398
}
392399

@@ -396,11 +403,16 @@ static void erofs_readahead(struct readahead_control *rac)
396403
.ops = &iomap_bio_read_ops,
397404
.rac = rac,
398405
};
399-
struct erofs_iomap_iter_ctx iter_ctx = {};
406+
bool need_iput;
407+
struct erofs_iomap_iter_ctx iter_ctx = {
408+
.realinode = erofs_real_inode(rac->mapping->host, &need_iput),
409+
};
400410

401-
trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
411+
trace_erofs_readahead(iter_ctx.realinode, readahead_index(rac),
402412
readahead_count(rac), true);
403413
iomap_readahead(&erofs_iomap_ops, &read_ctx, &iter_ctx);
414+
if (need_iput)
415+
iput(iter_ctx.realinode);
404416
}
405417

406418
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
@@ -421,7 +433,9 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
421433
return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
422434
#endif
423435
if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev) {
424-
struct erofs_iomap_iter_ctx iter_ctx = {};
436+
struct erofs_iomap_iter_ctx iter_ctx = {
437+
.realinode = inode,
438+
};
425439

426440
return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
427441
NULL, 0, &iter_ctx, 0);

fs/erofs/fileio.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,9 @@ void erofs_fileio_submit_bio(struct bio *bio)
8888
bio));
8989
}
9090

91-
static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
91+
static int erofs_fileio_scan_folio(struct erofs_fileio *io,
92+
struct inode *inode, struct folio *folio)
9293
{
93-
struct inode *inode = folio_inode(folio);
9494
struct erofs_map_blocks *map = &io->map;
9595
unsigned int cur = 0, end = folio_size(folio), len, attached = 0;
9696
loff_t pos = folio_pos(folio), ofs;
@@ -158,31 +158,38 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
158158

159159
static int erofs_fileio_read_folio(struct file *file, struct folio *folio)
160160
{
161+
bool need_iput;
162+
struct inode *realinode = erofs_real_inode(folio_inode(folio), &need_iput);
161163
struct erofs_fileio io = {};
162164
int err;
163165

164-
trace_erofs_read_folio(folio_inode(folio), folio, true);
165-
err = erofs_fileio_scan_folio(&io, folio);
166+
trace_erofs_read_folio(realinode, folio, true);
167+
err = erofs_fileio_scan_folio(&io, realinode, folio);
166168
erofs_fileio_rq_submit(io.rq);
169+
if (need_iput)
170+
iput(realinode);
167171
return err;
168172
}
169173

170174
static void erofs_fileio_readahead(struct readahead_control *rac)
171175
{
172-
struct inode *inode = rac->mapping->host;
176+
bool need_iput;
177+
struct inode *realinode = erofs_real_inode(rac->mapping->host, &need_iput);
173178
struct erofs_fileio io = {};
174179
struct folio *folio;
175180
int err;
176181

177-
trace_erofs_readahead(inode, readahead_index(rac),
182+
trace_erofs_readahead(realinode, readahead_index(rac),
178183
readahead_count(rac), true);
179184
while ((folio = readahead_folio(rac))) {
180-
err = erofs_fileio_scan_folio(&io, folio);
185+
err = erofs_fileio_scan_folio(&io, realinode, folio);
181186
if (err && err != -EINTR)
182-
erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
183-
folio->index, EROFS_I(inode)->nid);
187+
erofs_err(realinode->i_sb, "readahead error at folio %lu @ nid %llu",
188+
folio->index, EROFS_I(realinode)->nid);
184189
}
185190
erofs_fileio_rq_submit(io.rq);
191+
if (need_iput)
192+
iput(realinode);
186193
}
187194

188195
const struct address_space_operations erofs_fileio_aops = {

fs/erofs/inode.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,8 @@ static int erofs_fill_inode(struct inode *inode)
213213
switch (inode->i_mode & S_IFMT) {
214214
case S_IFREG:
215215
inode->i_op = &erofs_generic_iops;
216-
inode->i_fop = &erofs_file_fops;
216+
inode->i_fop = erofs_ishare_fill_inode(inode) ?
217+
&erofs_ishare_fops : &erofs_file_fops;
217218
break;
218219
case S_IFDIR:
219220
inode->i_op = &erofs_dir_iops;

fs/erofs/internal.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,11 +584,17 @@ int __init erofs_init_ishare(void);
584584
void erofs_exit_ishare(void);
585585
bool erofs_ishare_fill_inode(struct inode *inode);
586586
void erofs_ishare_free_inode(struct inode *inode);
587+
struct inode *erofs_real_inode(struct inode *inode, bool *need_iput);
587588
#else
588589
static inline int erofs_init_ishare(void) { return 0; }
589590
static inline void erofs_exit_ishare(void) {}
590591
static inline bool erofs_ishare_fill_inode(struct inode *inode) { return false; }
591592
static inline void erofs_ishare_free_inode(struct inode *inode) {}
593+
static inline struct inode *erofs_real_inode(struct inode *inode, bool *need_iput)
594+
{
595+
*need_iput = false;
596+
return inode;
597+
}
592598
#endif
593599

594600
long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);

fs/erofs/ishare.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111

1212
static struct vfsmount *erofs_ishare_mnt;
1313

14+
static inline bool erofs_is_ishare_inode(struct inode *inode)
15+
{
16+
/* assumed FS_ONDEMAND is excluded with FS_PAGE_CACHE_SHARE feature */
17+
return inode->i_sb->s_type == &erofs_anon_fs_type;
18+
}
19+
1420
static int erofs_ishare_iget5_eq(struct inode *inode, void *data)
1521
{
1622
struct erofs_inode_fingerprint *fp1 = &EROFS_I(inode)->fingerprint;
@@ -38,6 +44,8 @@ bool erofs_ishare_fill_inode(struct inode *inode)
3844
struct inode *sharedinode;
3945
unsigned long hash;
4046

47+
if (erofs_inode_is_data_compressed(vi->datalayout))
48+
return false;
4149
if (erofs_xattr_fill_inode_fingerprint(&fp, inode, sbi->domain_id))
4250
return false;
4351
hash = xxh32(fp.opaque, fp.size, 0);
@@ -155,6 +163,32 @@ const struct file_operations erofs_ishare_fops = {
155163
.splice_read = filemap_splice_read,
156164
};
157165

166+
struct inode *erofs_real_inode(struct inode *inode, bool *need_iput)
167+
{
168+
struct erofs_inode *vi, *vi_share;
169+
struct inode *realinode;
170+
171+
*need_iput = false;
172+
if (!erofs_is_ishare_inode(inode))
173+
return inode;
174+
175+
vi_share = EROFS_I(inode);
176+
spin_lock(&vi_share->ishare_lock);
177+
/* fetch any one as real inode */
178+
DBG_BUGON(list_empty(&vi_share->ishare_list));
179+
list_for_each_entry(vi, &vi_share->ishare_list, ishare_list) {
180+
realinode = igrab(&vi->vfs_inode);
181+
if (realinode) {
182+
*need_iput = true;
183+
break;
184+
}
185+
}
186+
spin_unlock(&vi_share->ishare_lock);
187+
188+
DBG_BUGON(!realinode);
189+
return realinode;
190+
}
191+
158192
int __init erofs_init_ishare(void)
159193
{
160194
erofs_ishare_mnt = kern_mount(&erofs_anon_fs_type);

0 commit comments

Comments
 (0)