Skip to content

Commit 598162d

Browse files
author
Gao Xiang
committed
erofs: support decompress big pcluster for lz4 backend
Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
1 parent b86269f commit 598162d

2 files changed

Lines changed: 138 additions & 95 deletions

File tree

fs/erofs/decompressor.c

Lines changed: 123 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -120,95 +120,123 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
120120
return kaddr ? 1 : 0;
121121
}
122122

123-
static void *generic_copy_inplace_data(struct z_erofs_decompress_req *rq,
124-
u8 *src, unsigned int pageofs_in)
123+
static void *z_erofs_handle_inplace_io(struct z_erofs_decompress_req *rq,
124+
void *inpage, unsigned int *inputmargin, int *maptype,
125+
bool support_0padding)
125126
{
126-
/*
127-
* if in-place decompression is ongoing, those decompressed
128-
* pages should be copied in order to avoid being overlapped.
129-
*/
130-
struct page **in = rq->in;
131-
u8 *const tmp = erofs_get_pcpubuf(1);
132-
u8 *tmpp = tmp;
133-
unsigned int inlen = rq->inputsize - pageofs_in;
134-
unsigned int count = min_t(uint, inlen, PAGE_SIZE - pageofs_in);
135-
136-
while (tmpp < tmp + inlen) {
137-
if (!src)
138-
src = kmap_atomic(*in);
139-
memcpy(tmpp, src + pageofs_in, count);
140-
kunmap_atomic(src);
141-
src = NULL;
142-
tmpp += count;
143-
pageofs_in = 0;
144-
count = PAGE_SIZE;
127+
unsigned int nrpages_in, nrpages_out;
128+
unsigned int ofull, oend, inputsize, total, i, j;
129+
struct page **in;
130+
void *src, *tmp;
131+
132+
inputsize = rq->inputsize;
133+
nrpages_in = PAGE_ALIGN(inputsize) >> PAGE_SHIFT;
134+
oend = rq->pageofs_out + rq->outputsize;
135+
ofull = PAGE_ALIGN(oend);
136+
nrpages_out = ofull >> PAGE_SHIFT;
137+
138+
if (rq->inplace_io) {
139+
if (rq->partial_decoding || !support_0padding ||
140+
ofull - oend < LZ4_DECOMPRESS_INPLACE_MARGIN(inputsize))
141+
goto docopy;
142+
143+
for (i = 0; i < nrpages_in; ++i) {
144+
DBG_BUGON(rq->in[i] == NULL);
145+
for (j = 0; j < nrpages_out - nrpages_in + i; ++j)
146+
if (rq->out[j] == rq->in[i])
147+
goto docopy;
148+
}
149+
}
150+
151+
if (nrpages_in <= 1) {
152+
*maptype = 0;
153+
return inpage;
154+
}
155+
kunmap_atomic(inpage);
156+
might_sleep();
157+
src = erofs_vm_map_ram(rq->in, nrpages_in);
158+
if (!src)
159+
return ERR_PTR(-ENOMEM);
160+
*maptype = 1;
161+
return src;
162+
163+
docopy:
164+
/* Or copy compressed data which can be overlapped to per-CPU buffer */
165+
in = rq->in;
166+
src = erofs_get_pcpubuf(nrpages_in);
167+
if (!src) {
168+
DBG_BUGON(1);
169+
kunmap_atomic(inpage);
170+
return ERR_PTR(-EFAULT);
171+
}
172+
173+
tmp = src;
174+
total = rq->inputsize;
175+
while (total) {
176+
unsigned int page_copycnt =
177+
min_t(unsigned int, total, PAGE_SIZE - *inputmargin);
178+
179+
if (!inpage)
180+
inpage = kmap_atomic(*in);
181+
memcpy(tmp, inpage + *inputmargin, page_copycnt);
182+
kunmap_atomic(inpage);
183+
inpage = NULL;
184+
tmp += page_copycnt;
185+
total -= page_copycnt;
145186
++in;
187+
*inputmargin = 0;
146188
}
147-
return tmp;
189+
*maptype = 2;
190+
return src;
148191
}
149192

150193
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
151194
{
152-
unsigned int inputmargin, inlen;
153-
u8 *src;
154-
bool copied, support_0padding;
155-
int ret;
156-
157-
if (rq->inputsize > PAGE_SIZE)
158-
return -EOPNOTSUPP;
195+
unsigned int inputmargin;
196+
u8 *headpage, *src;
197+
bool support_0padding;
198+
int ret, maptype;
159199

160-
src = kmap_atomic(*rq->in);
200+
DBG_BUGON(*rq->in == NULL);
201+
headpage = kmap_atomic(*rq->in);
161202
inputmargin = 0;
162203
support_0padding = false;
163204

164205
/* decompression inplace is only safe when 0padding is enabled */
165206
if (erofs_sb_has_lz4_0padding(EROFS_SB(rq->sb))) {
166207
support_0padding = true;
167208

168-
while (!src[inputmargin & ~PAGE_MASK])
209+
while (!headpage[inputmargin & ~PAGE_MASK])
169210
if (!(++inputmargin & ~PAGE_MASK))
170211
break;
171212

172213
if (inputmargin >= rq->inputsize) {
173-
kunmap_atomic(src);
214+
kunmap_atomic(headpage);
174215
return -EIO;
175216
}
176217
}
177218

178-
copied = false;
179-
inlen = rq->inputsize - inputmargin;
180-
if (rq->inplace_io) {
181-
const uint oend = (rq->pageofs_out +
182-
rq->outputsize) & ~PAGE_MASK;
183-
const uint nr = PAGE_ALIGN(rq->pageofs_out +
184-
rq->outputsize) >> PAGE_SHIFT;
185-
186-
if (rq->partial_decoding || !support_0padding ||
187-
rq->out[nr - 1] != rq->in[0] ||
188-
rq->inputsize - oend <
189-
LZ4_DECOMPRESS_INPLACE_MARGIN(inlen)) {
190-
src = generic_copy_inplace_data(rq, src, inputmargin);
191-
inputmargin = 0;
192-
copied = true;
193-
}
194-
}
219+
rq->inputsize -= inputmargin;
220+
src = z_erofs_handle_inplace_io(rq, headpage, &inputmargin, &maptype,
221+
support_0padding);
222+
if (IS_ERR(src))
223+
return PTR_ERR(src);
195224

196225
/* legacy format could compress extra data in a pcluster. */
197226
if (rq->partial_decoding || !support_0padding)
198227
ret = LZ4_decompress_safe_partial(src + inputmargin, out,
199-
inlen, rq->outputsize,
200-
rq->outputsize);
228+
rq->inputsize, rq->outputsize, rq->outputsize);
201229
else
202230
ret = LZ4_decompress_safe(src + inputmargin, out,
203-
inlen, rq->outputsize);
231+
rq->inputsize, rq->outputsize);
204232

205233
if (ret != rq->outputsize) {
206234
erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
207-
ret, inlen, inputmargin, rq->outputsize);
235+
ret, rq->inputsize, inputmargin, rq->outputsize);
208236

209237
WARN_ON(1);
210238
print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
211-
16, 1, src + inputmargin, inlen, true);
239+
16, 1, src + inputmargin, rq->inputsize, true);
212240
print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
213241
16, 1, out, rq->outputsize, true);
214242

@@ -217,10 +245,16 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
217245
ret = -EIO;
218246
}
219247

220-
if (copied)
221-
erofs_put_pcpubuf(src);
222-
else
248+
if (maptype == 0) {
223249
kunmap_atomic(src);
250+
} else if (maptype == 1) {
251+
vm_unmap_ram(src, PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT);
252+
} else if (maptype == 2) {
253+
erofs_put_pcpubuf(src);
254+
} else {
255+
DBG_BUGON(1);
256+
return -EFAULT;
257+
}
224258
return ret;
225259
}
226260

@@ -270,57 +304,51 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
270304
const struct z_erofs_decompressor *alg = decompressors + rq->alg;
271305
unsigned int dst_maptype;
272306
void *dst;
273-
int ret, i;
307+
int ret;
274308

275-
if (nrpages_out == 1 && !rq->inplace_io) {
276-
DBG_BUGON(!*rq->out);
277-
dst = kmap_atomic(*rq->out);
278-
dst_maptype = 0;
279-
goto dstmap_out;
280-
}
309+
/* two optimized fast paths only for non bigpcluster cases yet */
310+
if (rq->inputsize <= PAGE_SIZE) {
311+
if (nrpages_out == 1 && !rq->inplace_io) {
312+
DBG_BUGON(!*rq->out);
313+
dst = kmap_atomic(*rq->out);
314+
dst_maptype = 0;
315+
goto dstmap_out;
316+
}
281317

282-
/*
283-
* For the case of small output size (especially much less
284-
* than PAGE_SIZE), memcpy the decompressed data rather than
285-
* compressed data is preferred.
286-
*/
287-
if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
288-
dst = erofs_get_pcpubuf(1);
289-
if (IS_ERR(dst))
290-
return PTR_ERR(dst);
291-
292-
rq->inplace_io = false;
293-
ret = alg->decompress(rq, dst);
294-
if (!ret)
295-
copy_from_pcpubuf(rq->out, dst, rq->pageofs_out,
296-
rq->outputsize);
297-
298-
erofs_put_pcpubuf(dst);
299-
return ret;
318+
/*
319+
* For the case of small output size (especially much less
320+
* than PAGE_SIZE), memcpy the decompressed data rather than
321+
* compressed data is preferred.
322+
*/
323+
if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
324+
dst = erofs_get_pcpubuf(1);
325+
if (IS_ERR(dst))
326+
return PTR_ERR(dst);
327+
328+
rq->inplace_io = false;
329+
ret = alg->decompress(rq, dst);
330+
if (!ret)
331+
copy_from_pcpubuf(rq->out, dst, rq->pageofs_out,
332+
rq->outputsize);
333+
334+
erofs_put_pcpubuf(dst);
335+
return ret;
336+
}
300337
}
301338

339+
/* general decoding path which can be used for all cases */
302340
ret = alg->prepare_destpages(rq, pagepool);
303-
if (ret < 0) {
341+
if (ret < 0)
304342
return ret;
305-
} else if (ret) {
343+
if (ret) {
306344
dst = page_address(*rq->out);
307345
dst_maptype = 1;
308346
goto dstmap_out;
309347
}
310348

311-
i = 0;
312-
while (1) {
313-
dst = vm_map_ram(rq->out, nrpages_out, -1);
314-
315-
/* retry two more times (totally 3 times) */
316-
if (dst || ++i >= 3)
317-
break;
318-
vm_unmap_aliases();
319-
}
320-
349+
dst = erofs_vm_map_ram(rq->out, nrpages_out);
321350
if (!dst)
322351
return -ENOMEM;
323-
324352
dst_maptype = 2;
325353

326354
dstmap_out:

fs/erofs/internal.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,21 @@ int erofs_namei(struct inode *dir, struct qstr *name,
402402
/* dir.c */
403403
extern const struct file_operations erofs_dir_fops;
404404

405+
static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count)
406+
{
407+
int retried = 0;
408+
409+
while (1) {
410+
void *p = vm_map_ram(pages, count, -1);
411+
412+
/* retry two more times (totally 3 times) */
413+
if (p || ++retried >= 3)
414+
return p;
415+
vm_unmap_aliases();
416+
}
417+
return NULL;
418+
}
419+
405420
/* pcpubuf.c */
406421
void *erofs_get_pcpubuf(unsigned int requiredpages);
407422
void erofs_put_pcpubuf(void *ptr);

0 commit comments

Comments
 (0)