@@ -105,65 +105,72 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
105105 return kaddr ? 1 : 0 ;
106106}
107107
108- static void * z_erofs_lz4_handle_overlap (struct z_erofs_decompress_req * rq ,
108+ static void * z_erofs_lz4_handle_overlap (const struct z_erofs_decompress_req * rq ,
109109 void * inpage , void * out , unsigned int * inputmargin ,
110110 int * maptype , bool may_inplace )
111111{
112- unsigned int oend , omargin , total , i ;
112+ unsigned int oend , omargin , cnt , i ;
113113 struct page * * in ;
114- void * src , * tmp ;
115-
116- if (rq -> inplace_io ) {
117- oend = rq -> pageofs_out + rq -> outputsize ;
118- omargin = PAGE_ALIGN (oend ) - oend ;
119- if (rq -> partial_decoding || !may_inplace ||
120- omargin < LZ4_DECOMPRESS_INPLACE_MARGIN (rq -> inputsize ))
121- goto docopy ;
114+ void * src ;
122115
116+ /*
117+ * If in-place I/O isn't used, for example, the bounce compressed cache
118+ * can hold data for incomplete read requests. Just map the compressed
119+ * buffer as well and decompress directly.
120+ */
121+ if (!rq -> inplace_io ) {
122+ if (rq -> inpages <= 1 ) {
123+ * maptype = 0 ;
124+ return inpage ;
125+ }
126+ kunmap_local (inpage );
127+ src = erofs_vm_map_ram (rq -> in , rq -> inpages );
128+ if (!src )
129+ return ERR_PTR (- ENOMEM );
130+ * maptype = 1 ;
131+ return src ;
132+ }
133+ /*
134+ * Then, deal with in-place I/Os. The reasons why in-place I/O is useful
135+ * are: (1) It minimizes memory footprint during the I/O submission,
136+ * which is useful for slow storage (including network devices and
137+ * low-end HDDs/eMMCs) but with a lot inflight I/Os; (2) If in-place
138+ * decompression can also be applied, it will reuse the unique buffer so
139+ * that no extra CPU D-cache is polluted with temporary compressed data
140+ * for extreme performance.
141+ */
142+ oend = rq -> pageofs_out + rq -> outputsize ;
143+ omargin = PAGE_ALIGN (oend ) - oend ;
144+ if (!rq -> partial_decoding && may_inplace &&
145+ omargin >= LZ4_DECOMPRESS_INPLACE_MARGIN (rq -> inputsize )) {
123146 for (i = 0 ; i < rq -> inpages ; ++ i )
124147 if (rq -> out [rq -> outpages - rq -> inpages + i ] !=
125148 rq -> in [i ])
126- goto docopy ;
127- kunmap_local (inpage );
128- * maptype = 3 ;
129- return out + ((rq -> outpages - rq -> inpages ) << PAGE_SHIFT );
130- }
131-
132- if (rq -> inpages <= 1 ) {
133- * maptype = 0 ;
134- return inpage ;
149+ break ;
150+ if (i >= rq -> inpages ) {
151+ kunmap_local (inpage );
152+ * maptype = 3 ;
153+ return out + ((rq -> outpages - rq -> inpages ) << PAGE_SHIFT );
154+ }
135155 }
136- kunmap_local (inpage );
137- src = erofs_vm_map_ram (rq -> in , rq -> inpages );
138- if (!src )
139- return ERR_PTR (- ENOMEM );
140- * maptype = 1 ;
141- return src ;
142-
143- docopy :
144- /* Or copy compressed data which can be overlapped to per-CPU buffer */
145- in = rq -> in ;
156+ /*
157+ * If in-place decompression can't be applied, copy compressed data that
158+ * may potentially overlap during decompression to a per-CPU buffer.
159+ */
146160 src = z_erofs_get_gbuf (rq -> inpages );
147161 if (!src ) {
148162 DBG_BUGON (1 );
149163 kunmap_local (inpage );
150164 return ERR_PTR (- EFAULT );
151165 }
152166
153- tmp = src ;
154- total = rq -> inputsize ;
155- while (total ) {
156- unsigned int page_copycnt =
157- min_t (unsigned int , total , PAGE_SIZE - * inputmargin );
158-
167+ for (i = 0 , in = rq -> in ; i < rq -> inputsize ; i += cnt , ++ in ) {
168+ cnt = min_t (u32 , rq -> inputsize - i , PAGE_SIZE - * inputmargin );
159169 if (!inpage )
160170 inpage = kmap_local_page (* in );
161- memcpy (tmp , inpage + * inputmargin , page_copycnt );
171+ memcpy (src + i , inpage + * inputmargin , cnt );
162172 kunmap_local (inpage );
163173 inpage = NULL ;
164- tmp += page_copycnt ;
165- total -= page_copycnt ;
166- ++ in ;
167174 * inputmargin = 0 ;
168175 }
169176 * maptype = 2 ;
0 commit comments