Skip to content

Commit 74acb70

Browse files
author
Chandan Babu R
committed
Merge tag 'repair-refcount-scalability-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.9-mergeC
xfs: reduce refcount repair memory usage The refcountbt repair code has serious memory usage problems when the block sharing factor of the filesystem is very high. This can happen if a deduplication tool has been run against the filesystem, or if the fs stores reflinked VM images that have been aging for a long time. Recall that the original reference counting algorithm walks the reverse mapping records of the filesystem to generate reference counts. For any given block in the AG, the rmap bag structure contains the all rmap records that cover that block; the refcount is the size of that bag. For online repair, the bag doesn't need the owner, offset, or state flag information, so it discards those. This halves the record size, but the bag structure still stores one excerpted record for each reverse mapping. If the sharing count is high, this will use a LOT of memory storing redundant records. In the extreme case, 100k mappings to the same piece of space will consume 100k*16 bytes = 1.6M of memory. For offline repair, the bag stores the owner values so that we know which inodes need to be marked as being reflink inodes. If a deduplication tool has been run and there are many blocks within a file pointing to the same physical space, this will stll use a lot of memory to store redundant records. The solution to this problem is to deduplicate the bag records when possible by adding a reference count to the bag record, and changing the bag add function to detect an existing record to bump the refcount. In the above example, the 100k mappings will now use 24 bytes of memory. These lookups can be done efficiently with a btree, so we create a new refcount bag btree type (inside of online repair). This is why we refactored the btree code in the previous patchset. The btree conversion also dramatically reduces the runtime of the refcount generation algorithm, because the code to delete all bag records that end at a given agblock now only has to delete one record instead of (using the example above) 100k records. As an added benefit, record deletion now gives back the unused xfile space, which it did not do previously. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandanbabu@kernel.org> * tag 'repair-refcount-scalability-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: port refcount repair to the new refcount bag structure xfs: create refcount bag structure for btree repairs xfs: define an in-memory btree for storing refcount bag info during repairs
2 parents fd43925 + 7fbaab5 commit 74acb70

11 files changed

Lines changed: 872 additions & 108 deletions

File tree

fs/xfs/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ xfs-y += $(addprefix scrub/, \
198198
inode_repair.o \
199199
newbt.o \
200200
nlinks_repair.o \
201+
rcbag_btree.o \
202+
rcbag.o \
201203
reap.o \
202204
refcount_repair.o \
203205
repair.o \

fs/xfs/scrub/rcbag.c

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
/*
3+
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
4+
* Author: Darrick J. Wong <djwong@kernel.org>
5+
*/
6+
#include "xfs.h"
7+
#include "xfs_fs.h"
8+
#include "xfs_shared.h"
9+
#include "xfs_format.h"
10+
#include "xfs_log_format.h"
11+
#include "xfs_trans.h"
12+
#include "xfs_trans_resv.h"
13+
#include "xfs_mount.h"
14+
#include "xfs_defer.h"
15+
#include "xfs_btree.h"
16+
#include "xfs_buf_mem.h"
17+
#include "xfs_btree_mem.h"
18+
#include "xfs_error.h"
19+
#include "scrub/scrub.h"
20+
#include "scrub/rcbag_btree.h"
21+
#include "scrub/rcbag.h"
22+
#include "scrub/trace.h"
23+
24+
struct rcbag {
25+
struct xfs_mount *mp;
26+
struct xfbtree xfbtree;
27+
uint64_t nr_items;
28+
};
29+
30+
int
31+
rcbag_init(
32+
struct xfs_mount *mp,
33+
struct xfs_buftarg *btp,
34+
struct rcbag **bagp)
35+
{
36+
struct rcbag *bag;
37+
int error;
38+
39+
bag = kzalloc(sizeof(struct rcbag), XCHK_GFP_FLAGS);
40+
if (!bag)
41+
return -ENOMEM;
42+
43+
bag->nr_items = 0;
44+
bag->mp = mp;
45+
46+
error = rcbagbt_mem_init(mp, &bag->xfbtree, btp);
47+
if (error)
48+
goto out_bag;
49+
50+
*bagp = bag;
51+
return 0;
52+
53+
out_bag:
54+
kfree(bag);
55+
return error;
56+
}
57+
58+
void
59+
rcbag_free(
60+
struct rcbag **bagp)
61+
{
62+
struct rcbag *bag = *bagp;
63+
64+
xfbtree_destroy(&bag->xfbtree);
65+
kfree(bag);
66+
*bagp = NULL;
67+
}
68+
69+
/* Track an rmap in the refcount bag. */
70+
int
71+
rcbag_add(
72+
struct rcbag *bag,
73+
struct xfs_trans *tp,
74+
const struct xfs_rmap_irec *rmap)
75+
{
76+
struct rcbag_rec bagrec;
77+
struct xfs_mount *mp = bag->mp;
78+
struct xfs_btree_cur *cur;
79+
int has;
80+
int error;
81+
82+
cur = rcbagbt_mem_cursor(mp, tp, &bag->xfbtree);
83+
error = rcbagbt_lookup_eq(cur, rmap, &has);
84+
if (error)
85+
goto out_cur;
86+
87+
if (has) {
88+
error = rcbagbt_get_rec(cur, &bagrec, &has);
89+
if (error)
90+
goto out_cur;
91+
if (!has) {
92+
error = -EFSCORRUPTED;
93+
goto out_cur;
94+
}
95+
96+
bagrec.rbg_refcount++;
97+
error = rcbagbt_update(cur, &bagrec);
98+
if (error)
99+
goto out_cur;
100+
} else {
101+
bagrec.rbg_startblock = rmap->rm_startblock;
102+
bagrec.rbg_blockcount = rmap->rm_blockcount;
103+
bagrec.rbg_refcount = 1;
104+
105+
error = rcbagbt_insert(cur, &bagrec, &has);
106+
if (error)
107+
goto out_cur;
108+
if (!has) {
109+
error = -EFSCORRUPTED;
110+
goto out_cur;
111+
}
112+
}
113+
114+
xfs_btree_del_cursor(cur, 0);
115+
116+
error = xfbtree_trans_commit(&bag->xfbtree, tp);
117+
if (error)
118+
return error;
119+
120+
bag->nr_items++;
121+
return 0;
122+
123+
out_cur:
124+
xfs_btree_del_cursor(cur, error);
125+
xfbtree_trans_cancel(&bag->xfbtree, tp);
126+
return error;
127+
}
128+
129+
/* Return the number of records in the bag. */
130+
uint64_t
131+
rcbag_count(
132+
const struct rcbag *rcbag)
133+
{
134+
return rcbag->nr_items;
135+
}
136+
137+
static inline uint32_t rcbag_rec_next_bno(const struct rcbag_rec *r)
138+
{
139+
return r->rbg_startblock + r->rbg_blockcount;
140+
}
141+
142+
/*
143+
* Find the next block where the refcount changes, given the next rmap we
144+
* looked at and the ones we're already tracking.
145+
*/
146+
int
147+
rcbag_next_edge(
148+
struct rcbag *bag,
149+
struct xfs_trans *tp,
150+
const struct xfs_rmap_irec *next_rmap,
151+
bool next_valid,
152+
uint32_t *next_bnop)
153+
{
154+
struct rcbag_rec bagrec;
155+
struct xfs_mount *mp = bag->mp;
156+
struct xfs_btree_cur *cur;
157+
uint32_t next_bno = NULLAGBLOCK;
158+
int has;
159+
int error;
160+
161+
if (next_valid)
162+
next_bno = next_rmap->rm_startblock;
163+
164+
cur = rcbagbt_mem_cursor(mp, tp, &bag->xfbtree);
165+
error = xfs_btree_goto_left_edge(cur);
166+
if (error)
167+
goto out_cur;
168+
169+
while (true) {
170+
error = xfs_btree_increment(cur, 0, &has);
171+
if (error)
172+
goto out_cur;
173+
if (!has)
174+
break;
175+
176+
error = rcbagbt_get_rec(cur, &bagrec, &has);
177+
if (error)
178+
goto out_cur;
179+
if (!has) {
180+
error = -EFSCORRUPTED;
181+
goto out_cur;
182+
}
183+
184+
next_bno = min(next_bno, rcbag_rec_next_bno(&bagrec));
185+
}
186+
187+
/*
188+
* We should have found /something/ because either next_rrm is the next
189+
* interesting rmap to look at after emitting this refcount extent, or
190+
* there are other rmaps in rmap_bag contributing to the current
191+
* sharing count. But if something is seriously wrong, bail out.
192+
*/
193+
if (next_bno == NULLAGBLOCK) {
194+
error = -EFSCORRUPTED;
195+
goto out_cur;
196+
}
197+
198+
xfs_btree_del_cursor(cur, 0);
199+
200+
*next_bnop = next_bno;
201+
return 0;
202+
203+
out_cur:
204+
xfs_btree_del_cursor(cur, error);
205+
return error;
206+
}
207+
208+
/* Pop all refcount bag records that end at next_bno */
209+
int
210+
rcbag_remove_ending_at(
211+
struct rcbag *bag,
212+
struct xfs_trans *tp,
213+
uint32_t next_bno)
214+
{
215+
struct rcbag_rec bagrec;
216+
struct xfs_mount *mp = bag->mp;
217+
struct xfs_btree_cur *cur;
218+
int has;
219+
int error;
220+
221+
/* go to the right edge of the tree */
222+
cur = rcbagbt_mem_cursor(mp, tp, &bag->xfbtree);
223+
memset(&cur->bc_rec, 0xFF, sizeof(cur->bc_rec));
224+
error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, &has);
225+
if (error)
226+
goto out_cur;
227+
228+
while (true) {
229+
error = xfs_btree_decrement(cur, 0, &has);
230+
if (error)
231+
goto out_cur;
232+
if (!has)
233+
break;
234+
235+
error = rcbagbt_get_rec(cur, &bagrec, &has);
236+
if (error)
237+
goto out_cur;
238+
if (!has) {
239+
error = -EFSCORRUPTED;
240+
goto out_cur;
241+
}
242+
243+
if (rcbag_rec_next_bno(&bagrec) != next_bno)
244+
continue;
245+
246+
error = xfs_btree_delete(cur, &has);
247+
if (error)
248+
goto out_cur;
249+
if (!has) {
250+
error = -EFSCORRUPTED;
251+
goto out_cur;
252+
}
253+
254+
bag->nr_items -= bagrec.rbg_refcount;
255+
}
256+
257+
xfs_btree_del_cursor(cur, 0);
258+
return xfbtree_trans_commit(&bag->xfbtree, tp);
259+
out_cur:
260+
xfs_btree_del_cursor(cur, error);
261+
xfbtree_trans_cancel(&bag->xfbtree, tp);
262+
return error;
263+
}
264+
265+
/* Dump the rcbag. */
266+
void
267+
rcbag_dump(
268+
struct rcbag *bag,
269+
struct xfs_trans *tp)
270+
{
271+
struct rcbag_rec bagrec;
272+
struct xfs_mount *mp = bag->mp;
273+
struct xfs_btree_cur *cur;
274+
unsigned long long nr = 0;
275+
int has;
276+
int error;
277+
278+
cur = rcbagbt_mem_cursor(mp, tp, &bag->xfbtree);
279+
error = xfs_btree_goto_left_edge(cur);
280+
if (error)
281+
goto out_cur;
282+
283+
while (true) {
284+
error = xfs_btree_increment(cur, 0, &has);
285+
if (error)
286+
goto out_cur;
287+
if (!has)
288+
break;
289+
290+
error = rcbagbt_get_rec(cur, &bagrec, &has);
291+
if (error)
292+
goto out_cur;
293+
if (!has) {
294+
error = -EFSCORRUPTED;
295+
goto out_cur;
296+
}
297+
298+
xfs_err(bag->mp, "[%llu]: bno 0x%x fsbcount 0x%x refcount 0x%llx\n",
299+
nr++,
300+
(unsigned int)bagrec.rbg_startblock,
301+
(unsigned int)bagrec.rbg_blockcount,
302+
(unsigned long long)bagrec.rbg_refcount);
303+
}
304+
305+
out_cur:
306+
xfs_btree_del_cursor(cur, error);
307+
}

fs/xfs/scrub/rcbag.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
/*
3+
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
4+
* Author: Darrick J. Wong <djwong@kernel.org>
5+
*/
6+
#ifndef __XFS_SCRUB_RCBAG_H__
7+
#define __XFS_SCRUB_RCBAG_H__
8+
9+
struct xfs_mount;
10+
struct rcbag;
11+
struct xfs_buftarg;
12+
13+
int rcbag_init(struct xfs_mount *mp, struct xfs_buftarg *btp,
14+
struct rcbag **bagp);
15+
void rcbag_free(struct rcbag **bagp);
16+
int rcbag_add(struct rcbag *bag, struct xfs_trans *tp,
17+
const struct xfs_rmap_irec *rmap);
18+
uint64_t rcbag_count(const struct rcbag *bag);
19+
20+
int rcbag_next_edge(struct rcbag *bag, struct xfs_trans *tp,
21+
const struct xfs_rmap_irec *next_rmap, bool next_valid,
22+
uint32_t *next_bnop);
23+
int rcbag_remove_ending_at(struct rcbag *bag, struct xfs_trans *tp,
24+
uint32_t next_bno);
25+
26+
void rcbag_dump(struct rcbag *bag, struct xfs_trans *tp);
27+
28+
#endif /* __XFS_SCRUB_RCBAG_H__ */

0 commit comments

Comments
 (0)