Skip to content

Commit 1923516

Browse files
committed
erofs: support I/O submission for sub-page compressed blocks
Add a basic I/O submission path first to support sub-page blocks: - Temporary short-lived pages will be used entirely; - In-place I/O pages can be used partially, but compressed pages need to be able to be mapped in contiguous virtual memory. As a start, currently cache decompression is explicitly disabled for sub-page blocks, which will be supported in the future. Reviewed-by: Yue Hu <huyue2@coolpad.com> Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206091057.87027-2-hsiangkao@linux.alibaba.com
1 parent 3c12466 commit 1923516

1 file changed

Lines changed: 74 additions & 82 deletions

File tree

fs/erofs/zdata.c

Lines changed: 74 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1435,86 +1435,85 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
14351435
z_erofs_decompressqueue_work(&io->u.work);
14361436
}
14371437

1438-
static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
1439-
unsigned int nr,
1440-
struct page **pagepool,
1441-
struct address_space *mc)
1438+
static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
1439+
struct z_erofs_decompress_frontend *f,
1440+
struct z_erofs_pcluster *pcl,
1441+
unsigned int nr,
1442+
struct address_space *mc)
14421443
{
1443-
const pgoff_t index = pcl->obj.index;
14441444
gfp_t gfp = mapping_gfp_mask(mc);
14451445
bool tocache = false;
1446-
1446+
struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr;
14471447
struct address_space *mapping;
1448-
struct page *oldpage, *page;
1449-
int justfound;
1448+
struct page *page, *oldpage;
1449+
int justfound, bs = i_blocksize(f->inode);
14501450

1451+
/* Except for inplace pages, the entire page can be used for I/Os */
1452+
bvec->bv_offset = 0;
1453+
bvec->bv_len = PAGE_SIZE;
14511454
repeat:
1452-
page = READ_ONCE(pcl->compressed_bvecs[nr].page);
1453-
oldpage = page;
1454-
1455-
if (!page)
1455+
oldpage = READ_ONCE(zbv->page);
1456+
if (!oldpage)
14561457
goto out_allocpage;
14571458

1458-
justfound = (unsigned long)page & 1UL;
1459-
page = (struct page *)((unsigned long)page & ~1UL);
1459+
justfound = (unsigned long)oldpage & 1UL;
1460+
page = (struct page *)((unsigned long)oldpage & ~1UL);
1461+
bvec->bv_page = page;
14601462

1463+
DBG_BUGON(z_erofs_is_shortlived_page(page));
14611464
/*
1462-
* preallocated cached pages, which is used to avoid direct reclaim
1463-
* otherwise, it will go inplace I/O path instead.
1465+
* Handle preallocated cached pages. We tried to allocate such pages
1466+
* without triggering direct reclaim. If allocation failed, inplace
1467+
* file-backed pages will be used instead.
14641468
*/
14651469
if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
1466-
WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
14671470
set_page_private(page, 0);
1471+
WRITE_ONCE(zbv->page, page);
14681472
tocache = true;
14691473
goto out_tocache;
14701474
}
1471-
mapping = READ_ONCE(page->mapping);
14721475

1476+
mapping = READ_ONCE(page->mapping);
14731477
/*
1474-
* file-backed online pages in plcuster are all locked steady,
1475-
* therefore it is impossible for `mapping' to be NULL.
1478+
* File-backed pages for inplace I/Os are all locked steady,
1479+
* therefore it is impossible for `mapping` to be NULL.
14761480
*/
1477-
if (mapping && mapping != mc)
1478-
/* ought to be unmanaged pages */
1479-
goto out;
1480-
1481-
/* directly return for shortlived page as well */
1482-
if (z_erofs_is_shortlived_page(page))
1483-
goto out;
1481+
if (mapping && mapping != mc) {
1482+
if (zbv->offset < 0)
1483+
bvec->bv_offset = round_up(-zbv->offset, bs);
1484+
bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset;
1485+
return;
1486+
}
14841487

14851488
lock_page(page);
1486-
14871489
/* only true if page reclaim goes wrong, should never happen */
14881490
DBG_BUGON(justfound && PagePrivate(page));
14891491

1490-
/* the page is still in manage cache */
1492+
/* the cached page is still in managed cache */
14911493
if (page->mapping == mc) {
1492-
WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
1493-
1494+
WRITE_ONCE(zbv->page, page);
1495+
/*
1496+
* The cached page is still available but without a valid
1497+
* `->private` pcluster hint. Let's reconnect them.
1498+
*/
14941499
if (!PagePrivate(page)) {
1495-
/*
1496-
* impossible to be !PagePrivate(page) for
1497-
* the current restriction as well if
1498-
* the page is already in compressed_bvecs[].
1499-
*/
15001500
DBG_BUGON(!justfound);
1501-
1502-
justfound = 0;
1503-
set_page_private(page, (unsigned long)pcl);
1504-
SetPagePrivate(page);
1501+
/* compressed_bvecs[] already takes a ref */
1502+
attach_page_private(page, pcl);
1503+
put_page(page);
15051504
}
15061505

1507-
/* no need to submit io if it is already up-to-date */
1506+
/* no need to submit if it is already up-to-date */
15081507
if (PageUptodate(page)) {
15091508
unlock_page(page);
1510-
page = NULL;
1509+
bvec->bv_page = NULL;
15111510
}
1512-
goto out;
1511+
return;
15131512
}
15141513

15151514
/*
1516-
* the managed page has been truncated, it's unsafe to
1517-
* reuse this one, let's allocate a new cache-managed page.
1515+
* It has been truncated, so it's unsafe to reuse this one. Let's
1516+
* allocate a new page for compressed data.
15181517
*/
15191518
DBG_BUGON(page->mapping);
15201519
DBG_BUGON(!justfound);
@@ -1523,25 +1522,23 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
15231522
unlock_page(page);
15241523
put_page(page);
15251524
out_allocpage:
1526-
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
1527-
if (oldpage != cmpxchg(&pcl->compressed_bvecs[nr].page,
1528-
oldpage, page)) {
1529-
erofs_pagepool_add(pagepool, page);
1525+
page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
1526+
if (oldpage != cmpxchg(&zbv->page, oldpage, page)) {
1527+
erofs_pagepool_add(&f->pagepool, page);
15301528
cond_resched();
15311529
goto repeat;
15321530
}
1531+
bvec->bv_page = page;
15331532
out_tocache:
1534-
if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
1535-
/* turn into temporary page if fails (1 ref) */
1533+
if (!tocache || bs != PAGE_SIZE ||
1534+
add_to_page_cache_lru(page, mc, pcl->obj.index + nr, gfp)) {
1535+
/* turn into a temporary shortlived page (1 ref) */
15361536
set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
1537-
goto out;
1537+
return;
15381538
}
15391539
attach_page_private(page, pcl);
1540-
/* drop a refcount added by allocpage (then we have 2 refs here) */
1540+
/* drop a refcount added by allocpage (then 2 refs in total here) */
15411541
put_page(page);
1542-
1543-
out: /* the only exit (for tracing and debugging) */
1544-
return page;
15451542
}
15461543

15471544
static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,
@@ -1596,7 +1593,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
15961593
qtail[JQ_BYPASS] = &pcl->next;
15971594
}
15981595

1599-
static void z_erofs_decompressqueue_endio(struct bio *bio)
1596+
static void z_erofs_submissionqueue_endio(struct bio *bio)
16001597
{
16011598
struct z_erofs_decompressqueue *q = bio->bi_private;
16021599
blk_status_t err = bio->bi_status;
@@ -1608,7 +1605,6 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
16081605

16091606
DBG_BUGON(PageUptodate(page));
16101607
DBG_BUGON(z_erofs_page_is_invalidated(page));
1611-
16121608
if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
16131609
if (!err)
16141610
SetPageUptodate(page);
@@ -1631,17 +1627,14 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
16311627
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
16321628
z_erofs_next_pcluster_t owned_head = f->owned_head;
16331629
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
1634-
pgoff_t last_index;
1630+
erofs_off_t last_pa;
16351631
struct block_device *last_bdev;
16361632
unsigned int nr_bios = 0;
16371633
struct bio *bio = NULL;
16381634
unsigned long pflags;
16391635
int memstall = 0;
16401636

1641-
/*
1642-
* if managed cache is enabled, bypass jobqueue is needed,
1643-
* no need to read from device for all pclusters in this queue.
1644-
*/
1637+
/* No need to read from device for pclusters in the bypass queue. */
16451638
q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL);
16461639
q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, force_fg);
16471640

@@ -1654,7 +1647,8 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
16541647
do {
16551648
struct erofs_map_dev mdev;
16561649
struct z_erofs_pcluster *pcl;
1657-
pgoff_t cur, end;
1650+
erofs_off_t cur, end;
1651+
struct bio_vec bvec;
16581652
unsigned int i = 0;
16591653
bool bypass = true;
16601654

@@ -1673,18 +1667,14 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
16731667
};
16741668
(void)erofs_map_dev(sb, &mdev);
16751669

1676-
cur = erofs_blknr(sb, mdev.m_pa);
1677-
end = cur + pcl->pclusterpages;
1678-
1670+
cur = mdev.m_pa;
1671+
end = cur + (pcl->pclusterpages << PAGE_SHIFT);
16791672
do {
1680-
struct page *page;
1681-
1682-
page = pickup_page_for_submission(pcl, i++,
1683-
&f->pagepool, mc);
1684-
if (!page)
1673+
z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc);
1674+
if (!bvec.bv_page)
16851675
continue;
16861676

1687-
if (bio && (cur != last_index + 1 ||
1677+
if (bio && (cur != last_pa ||
16881678
last_bdev != mdev.m_bdev)) {
16891679
submit_bio_retry:
16901680
submit_bio(bio);
@@ -1695,31 +1685,33 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
16951685
bio = NULL;
16961686
}
16971687

1698-
if (unlikely(PageWorkingset(page)) && !memstall) {
1688+
if (unlikely(PageWorkingset(bvec.bv_page)) &&
1689+
!memstall) {
16991690
psi_memstall_enter(&pflags);
17001691
memstall = 1;
17011692
}
17021693

17031694
if (!bio) {
17041695
bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
17051696
REQ_OP_READ, GFP_NOIO);
1706-
bio->bi_end_io = z_erofs_decompressqueue_endio;
1707-
1708-
last_bdev = mdev.m_bdev;
1709-
bio->bi_iter.bi_sector = (sector_t)cur <<
1710-
(sb->s_blocksize_bits - 9);
1697+
bio->bi_end_io = z_erofs_submissionqueue_endio;
1698+
bio->bi_iter.bi_sector = cur >> 9;
17111699
bio->bi_private = q[JQ_SUBMIT];
17121700
if (readahead)
17131701
bio->bi_opf |= REQ_RAHEAD;
17141702
++nr_bios;
1703+
last_bdev = mdev.m_bdev;
17151704
}
17161705

1717-
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
1706+
if (cur + bvec.bv_len > end)
1707+
bvec.bv_len = end - cur;
1708+
if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
1709+
bvec.bv_offset))
17181710
goto submit_bio_retry;
17191711

1720-
last_index = cur;
1712+
last_pa = cur + bvec.bv_len;
17211713
bypass = false;
1722-
} while (++cur < end);
1714+
} while ((cur += bvec.bv_len) < end);
17231715

17241716
if (!bypass)
17251717
qtail[JQ_SUBMIT] = &pcl->next;

0 commit comments

Comments
 (0)