@@ -44,8 +44,8 @@ struct z_erofs_pcluster {
4444 /* A: point to next chained pcluster or TAILs */
4545 struct z_erofs_pcluster * next ;
4646
47- /* I: start block address of this pcluster */
48- erofs_off_t index ;
47+ /* I: start physical position of this pcluster */
48+ erofs_off_t pos ;
4949
5050 /* L: the maximum decompression size of this round */
5151 unsigned int length ;
@@ -73,6 +73,9 @@ struct z_erofs_pcluster {
7373 /* I: compression algorithm format */
7474 unsigned char algorithmformat ;
7575
76+ /* I: whether compressed data is in-lined or not */
77+ bool from_meta ;
78+
7679 /* L: whether partial decompression or not */
7780 bool partial ;
7881
@@ -102,14 +105,9 @@ struct z_erofs_decompressqueue {
102105 bool eio , sync ;
103106};
104107
105- static inline bool z_erofs_is_inline_pcluster (struct z_erofs_pcluster * pcl )
106- {
107- return !pcl -> index ;
108- }
109-
110108static inline unsigned int z_erofs_pclusterpages (struct z_erofs_pcluster * pcl )
111109{
112- return PAGE_ALIGN (pcl -> pclustersize ) >> PAGE_SHIFT ;
110+ return PAGE_ALIGN (pcl -> pageofs_in + pcl -> pclustersize ) >> PAGE_SHIFT ;
113111}
114112
115113static bool erofs_folio_is_managed (struct erofs_sb_info * sbi , struct folio * fo )
@@ -133,7 +131,7 @@ struct z_erofs_pcluster_slab {
133131
134132static struct z_erofs_pcluster_slab pcluster_pool [] __read_mostly = {
135133 _PCLP (1 ), _PCLP (4 ), _PCLP (16 ), _PCLP (64 ), _PCLP (128 ),
136- _PCLP (Z_EROFS_PCLUSTER_MAX_PAGES )
134+ _PCLP (Z_EROFS_PCLUSTER_MAX_PAGES + 1 )
137135};
138136
139137struct z_erofs_bvec_iter {
@@ -267,7 +265,6 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size)
267265 pcl = kmem_cache_zalloc (pcs -> slab , GFP_KERNEL );
268266 if (!pcl )
269267 return ERR_PTR (- ENOMEM );
270- pcl -> pclustersize = size ;
271268 return pcl ;
272269 }
273270 return ERR_PTR (- EINVAL );
@@ -516,6 +513,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
516513 struct z_erofs_pcluster * pcl = fe -> pcl ;
517514 unsigned int pclusterpages = z_erofs_pclusterpages (pcl );
518515 bool shouldalloc = z_erofs_should_alloc_cache (fe );
516+ pgoff_t poff = pcl -> pos >> PAGE_SHIFT ;
519517 bool may_bypass = true;
520518 /* Optimistic allocation, as in-place I/O can be used as a fallback */
521519 gfp_t gfp = (mapping_gfp_mask (mc ) & ~__GFP_DIRECT_RECLAIM ) |
@@ -532,7 +530,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
532530 if (READ_ONCE (pcl -> compressed_bvecs [i ].page ))
533531 continue ;
534532
535- folio = filemap_get_folio (mc , pcl -> index + i );
533+ folio = filemap_get_folio (mc , poff + i );
536534 if (IS_ERR (folio )) {
537535 may_bypass = false;
538536 if (!shouldalloc )
@@ -575,7 +573,7 @@ static int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
575573 struct folio * folio ;
576574 int i ;
577575
578- DBG_BUGON (z_erofs_is_inline_pcluster ( pcl ) );
576+ DBG_BUGON (pcl -> from_meta );
579577 /* Each cached folio contains one page unless bs > ps is supported */
580578 for (i = 0 ; i < pclusterpages ; ++ i ) {
581579 if (pcl -> compressed_bvecs [i ].page ) {
@@ -607,7 +605,7 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
607605 ret = false;
608606 spin_lock (& pcl -> lockref .lock );
609607 if (pcl -> lockref .count <= 0 ) {
610- DBG_BUGON (z_erofs_is_inline_pcluster ( pcl ) );
608+ DBG_BUGON (pcl -> from_meta );
611609 for (; bvec < end ; ++ bvec ) {
612610 if (bvec -> page && page_folio (bvec -> page ) == folio ) {
613611 bvec -> page = NULL ;
@@ -667,16 +665,20 @@ static int z_erofs_attach_page(struct z_erofs_frontend *fe,
667665 int ret ;
668666
669667 if (exclusive ) {
670- /* give priority for inplaceio to use file pages first */
671- spin_lock (& pcl -> lockref .lock );
672- while (fe -> icur > 0 ) {
673- if (pcl -> compressed_bvecs [-- fe -> icur ].page )
674- continue ;
675- pcl -> compressed_bvecs [fe -> icur ] = * bvec ;
668+ /* Inplace I/O is limited to one page for uncompressed data */
669+ if (pcl -> algorithmformat < Z_EROFS_COMPRESSION_MAX ||
670+ fe -> icur <= 1 ) {
671+ /* Try to prioritize inplace I/O here */
672+ spin_lock (& pcl -> lockref .lock );
673+ while (fe -> icur > 0 ) {
674+ if (pcl -> compressed_bvecs [-- fe -> icur ].page )
675+ continue ;
676+ pcl -> compressed_bvecs [fe -> icur ] = * bvec ;
677+ spin_unlock (& pcl -> lockref .lock );
678+ return 0 ;
679+ }
676680 spin_unlock (& pcl -> lockref .lock );
677- return 0 ;
678681 }
679- spin_unlock (& pcl -> lockref .lock );
680682
681683 /* otherwise, check if it can be used as a bvpage */
682684 if (fe -> mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
@@ -711,27 +713,26 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
711713 struct erofs_map_blocks * map = & fe -> map ;
712714 struct super_block * sb = fe -> inode -> i_sb ;
713715 struct erofs_sb_info * sbi = EROFS_SB (sb );
714- bool ztailpacking = map -> m_flags & EROFS_MAP_META ;
715716 struct z_erofs_pcluster * pcl , * pre ;
717+ unsigned int pageofs_in ;
716718 int err ;
717719
718- if (!(map -> m_flags & EROFS_MAP_ENCODED ) ||
719- (!ztailpacking && !erofs_blknr (sb , map -> m_pa ))) {
720- DBG_BUGON (1 );
721- return - EFSCORRUPTED ;
722- }
723-
724- /* no available pcluster, let's allocate one */
725- pcl = z_erofs_alloc_pcluster (map -> m_plen );
720+ pageofs_in = erofs_blkoff (sb , map -> m_pa );
721+ pcl = z_erofs_alloc_pcluster (pageofs_in + map -> m_plen );
726722 if (IS_ERR (pcl ))
727723 return PTR_ERR (pcl );
728724
729725 lockref_init (& pcl -> lockref ); /* one ref for this request */
730726 pcl -> algorithmformat = map -> m_algorithmformat ;
727+ pcl -> pclustersize = map -> m_plen ;
728+ pcl -> pageofs_in = pageofs_in ;
731729 pcl -> length = 0 ;
732730 pcl -> partial = true;
733731 pcl -> next = fe -> head ;
732+ pcl -> pos = map -> m_pa ;
733+ pcl -> pageofs_in = pageofs_in ;
734734 pcl -> pageofs_out = map -> m_la & ~PAGE_MASK ;
735+ pcl -> from_meta = map -> m_flags & EROFS_MAP_META ;
735736 fe -> mode = Z_EROFS_PCLUSTER_FOLLOWED ;
736737
737738 /*
@@ -741,13 +742,10 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
741742 mutex_init (& pcl -> lock );
742743 DBG_BUGON (!mutex_trylock (& pcl -> lock ));
743744
744- if (ztailpacking ) {
745- pcl -> index = 0 ; /* which indicates ztailpacking */
746- } else {
747- pcl -> index = erofs_blknr (sb , map -> m_pa );
745+ if (!pcl -> from_meta ) {
748746 while (1 ) {
749747 xa_lock (& sbi -> managed_pslots );
750- pre = __xa_cmpxchg (& sbi -> managed_pslots , pcl -> index ,
748+ pre = __xa_cmpxchg (& sbi -> managed_pslots , pcl -> pos ,
751749 NULL , pcl , GFP_KERNEL );
752750 if (!pre || xa_is_err (pre ) || z_erofs_get_pcluster (pre )) {
753751 xa_unlock (& sbi -> managed_pslots );
@@ -779,7 +777,6 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
779777{
780778 struct erofs_map_blocks * map = & fe -> map ;
781779 struct super_block * sb = fe -> inode -> i_sb ;
782- erofs_blk_t blknr = erofs_blknr (sb , map -> m_pa );
783780 struct z_erofs_pcluster * pcl = NULL ;
784781 int ret ;
785782
@@ -790,9 +787,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
790787 if (!(map -> m_flags & EROFS_MAP_META )) {
791788 while (1 ) {
792789 rcu_read_lock ();
793- pcl = xa_load (& EROFS_SB (sb )-> managed_pslots , blknr );
790+ pcl = xa_load (& EROFS_SB (sb )-> managed_pslots , map -> m_pa );
794791 if (!pcl || z_erofs_get_pcluster (pcl )) {
795- DBG_BUGON (pcl && blknr != pcl -> index );
792+ DBG_BUGON (pcl && map -> m_pa != pcl -> pos );
796793 rcu_read_unlock ();
797794 break ;
798795 }
@@ -826,7 +823,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
826823
827824 z_erofs_bvec_iter_begin (& fe -> biter , & fe -> pcl -> bvset ,
828825 Z_EROFS_INLINE_BVECS , fe -> pcl -> vcnt );
829- if (!z_erofs_is_inline_pcluster ( fe -> pcl ) ) {
826+ if (!fe -> pcl -> from_meta ) {
830827 /* bind cache first when cached decompression is preferred */
831828 z_erofs_bind_cache (fe );
832829 } else {
@@ -871,7 +868,7 @@ static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
871868 * It's impossible to fail after the pcluster is freezed, but in order
872869 * to avoid some race conditions, add a DBG_BUGON to observe this.
873870 */
874- DBG_BUGON (__xa_erase (& sbi -> managed_pslots , pcl -> index ) != pcl );
871+ DBG_BUGON (__xa_erase (& sbi -> managed_pslots , pcl -> pos ) != pcl );
875872
876873 lockref_mark_dead (& pcl -> lockref );
877874 return true;
@@ -1221,7 +1218,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped)
12211218 }
12221219 be -> compressed_pages [i ] = page ;
12231220
1224- if (z_erofs_is_inline_pcluster ( pcl ) ||
1221+ if (pcl -> from_meta ||
12251222 erofs_folio_is_managed (EROFS_SB (be -> sb ), page_folio (page ))) {
12261223 if (!PageUptodate (page ))
12271224 err = - EIO ;
@@ -1299,7 +1296,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
12991296 }, be -> pagepool );
13001297
13011298 /* must handle all compressed pages before actual file pages */
1302- if (z_erofs_is_inline_pcluster ( pcl ) ) {
1299+ if (pcl -> from_meta ) {
13031300 page = pcl -> compressed_bvecs [0 ].page ;
13041301 WRITE_ONCE (pcl -> compressed_bvecs [0 ].page , NULL );
13051302 put_page (page );
@@ -1359,7 +1356,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
13591356 WRITE_ONCE (pcl -> next , NULL );
13601357 mutex_unlock (& pcl -> lock );
13611358
1362- if (z_erofs_is_inline_pcluster ( pcl ) )
1359+ if (pcl -> from_meta )
13631360 z_erofs_free_pcluster (pcl );
13641361 else
13651362 z_erofs_put_pcluster (sbi , pcl , try_free );
@@ -1540,7 +1537,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
15401537 folio = page_folio (page );
15411538out_tocache :
15421539 if (!tocache || bs != PAGE_SIZE ||
1543- filemap_add_folio (mc , folio , pcl -> index + nr , gfp )) {
1540+ filemap_add_folio (mc , folio , ( pcl -> pos >> PAGE_SHIFT ) + nr , gfp )) {
15441541 /* turn into a temporary shortlived folio (1 ref) */
15451542 folio -> private = (void * )Z_EROFS_SHORTLIVED_PAGE ;
15461543 return ;
@@ -1657,19 +1654,20 @@ static void z_erofs_submit_queue(struct z_erofs_frontend *f,
16571654
16581655 pcl = next ;
16591656 next = READ_ONCE (pcl -> next );
1660- if (z_erofs_is_inline_pcluster ( pcl ) ) {
1657+ if (pcl -> from_meta ) {
16611658 z_erofs_move_to_bypass_queue (pcl , next , qtail );
16621659 continue ;
16631660 }
16641661
16651662 /* no device id here, thus it will always succeed */
16661663 mdev = (struct erofs_map_dev ) {
1667- .m_pa = erofs_pos ( sb , pcl -> index ),
1664+ .m_pa = round_down ( pcl -> pos , sb -> s_blocksize ),
16681665 };
16691666 (void )erofs_map_dev (sb , & mdev );
16701667
16711668 cur = mdev .m_pa ;
1672- end = cur + pcl -> pclustersize ;
1669+ end = round_up (cur + pcl -> pageofs_in + pcl -> pclustersize ,
1670+ sb -> s_blocksize );
16731671 do {
16741672 bvec .bv_page = NULL ;
16751673 if (bio && (cur != last_pa ||
0 commit comments