Skip to content

Commit b9fcf89

Browse files
dchinnerdgchinner
authored andcommitted
Merge tag 'scrub-detect-mergeable-records-6.4_2023-04-11' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into guilt/xfs-for-next
xfs: detect mergeable and overlapping btree records [v24.5] While I was doing differential fuzz analysis between xfs_scrub and xfs_repair, I noticed that xfs_repair was only partially effective at detecting btree records that can be merged, and xfs_scrub totally didn't notice at all. For every interval btree type except for the bmbt, there should never exist two adjacent records with adjacent keyspaces because the blockcount field is always large enough to span the entire keyspace of the domain. This is because the free space, rmap, and refcount btrees have a blockcount field large enough to store the maximum AG length, and there can never be an allocation larger than an AG. The bmbt is a different story due to its ondisk encoding where the blockcount is only 21 bits wide. Because AGs can span up to 2^31 blocks and the RT volume can span up to 2^52 blocks, a preallocation of 2^22 blocks will be expressed as two records of 2^21 length. We don't opportunistically combine records when doing bmbt operations, which is why the fsck tools have never complained about this scenario. Offline repair is partially effective at detecting mergeable records because I taught it to do that for the rmap and refcount btrees. This series enhances the free space, rmap, and refcount scrubbers to detect mergeable records. For the bmbt, it will flag the file as being eligible for an optimization to shrink the size of the data structure. The last patch in this set also enhances the rmap scrubber to detect records that overlap incorrectly. This check is done automatically for non-overlapping btree types, but we have to do it separately for the rmapbt because there are constraints on which allocation types are allowed to overlap. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Dave Chinner <david@fromorbit.com>
2 parents d808a8e + 1c1646a commit b9fcf89

3 files changed

Lines changed: 196 additions & 3 deletions

File tree

fs/xfs/scrub/alloc.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ xchk_setup_ag_allocbt(
3131
}
3232

3333
/* Free space btree scrubber. */
34+
35+
struct xchk_alloc {
36+
/* Previous free space extent. */
37+
struct xfs_alloc_rec_incore prev;
38+
};
39+
3440
/*
3541
* Ensure there's a corresponding cntbt/bnobt record matching this
3642
* bnobt/cntbt record, respectively.
@@ -93,20 +99,40 @@ xchk_allocbt_xref(
9399
xchk_xref_is_not_cow_staging(sc, agbno, len);
94100
}
95101

102+
/* Flag failures for records that could be merged. */
103+
STATIC void
104+
xchk_allocbt_mergeable(
105+
struct xchk_btree *bs,
106+
struct xchk_alloc *ca,
107+
const struct xfs_alloc_rec_incore *irec)
108+
{
109+
if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
110+
return;
111+
112+
if (ca->prev.ar_blockcount > 0 &&
113+
ca->prev.ar_startblock + ca->prev.ar_blockcount == irec->ar_startblock &&
114+
ca->prev.ar_blockcount + irec->ar_blockcount < (uint32_t)~0U)
115+
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
116+
117+
memcpy(&ca->prev, irec, sizeof(*irec));
118+
}
119+
96120
/* Scrub a bnobt/cntbt record. */
97121
STATIC int
98122
xchk_allocbt_rec(
99123
struct xchk_btree *bs,
100124
const union xfs_btree_rec *rec)
101125
{
102126
struct xfs_alloc_rec_incore irec;
127+
struct xchk_alloc *ca = bs->private;
103128

104129
xfs_alloc_btrec_to_irec(rec, &irec);
105130
if (xfs_alloc_check_irec(bs->cur, &irec) != NULL) {
106131
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
107132
return 0;
108133
}
109134

135+
xchk_allocbt_mergeable(bs, ca, &irec);
110136
xchk_allocbt_xref(bs->sc, &irec);
111137

112138
return 0;
@@ -118,10 +144,11 @@ xchk_allocbt(
118144
struct xfs_scrub *sc,
119145
xfs_btnum_t which)
120146
{
147+
struct xchk_alloc ca = { };
121148
struct xfs_btree_cur *cur;
122149

123150
cur = which == XFS_BTNUM_BNO ? sc->sa.bno_cur : sc->sa.cnt_cur;
124-
return xchk_btree(sc, cur, xchk_allocbt_rec, &XFS_RMAP_OINFO_AG, NULL);
151+
return xchk_btree(sc, cur, xchk_allocbt_rec, &XFS_RMAP_OINFO_AG, &ca);
125152
}
126153

127154
int

fs/xfs/scrub/refcount.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,9 @@ xchk_refcountbt_xref(
333333
}
334334

335335
struct xchk_refcbt_records {
336+
/* Previous refcount record. */
337+
struct xfs_refcount_irec prev_rec;
338+
336339
/* The next AG block where we aren't expecting shared extents. */
337340
xfs_agblock_t next_unshared_agbno;
338341

@@ -390,6 +393,46 @@ xchk_refcountbt_xref_gaps(
390393
xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur);
391394
}
392395

396+
static inline bool
397+
xchk_refcount_mergeable(
398+
struct xchk_refcbt_records *rrc,
399+
const struct xfs_refcount_irec *r2)
400+
{
401+
const struct xfs_refcount_irec *r1 = &rrc->prev_rec;
402+
403+
/* Ignore if prev_rec is not yet initialized. */
404+
if (r1->rc_blockcount > 0)
405+
return false;
406+
407+
if (r1->rc_domain != r2->rc_domain)
408+
return false;
409+
if (r1->rc_startblock + r1->rc_blockcount != r2->rc_startblock)
410+
return false;
411+
if (r1->rc_refcount != r2->rc_refcount)
412+
return false;
413+
if ((unsigned long long)r1->rc_blockcount + r2->rc_blockcount >
414+
MAXREFCEXTLEN)
415+
return false;
416+
417+
return true;
418+
}
419+
420+
/* Flag failures for records that could be merged. */
421+
STATIC void
422+
xchk_refcountbt_check_mergeable(
423+
struct xchk_btree *bs,
424+
struct xchk_refcbt_records *rrc,
425+
const struct xfs_refcount_irec *irec)
426+
{
427+
if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
428+
return;
429+
430+
if (xchk_refcount_mergeable(rrc, irec))
431+
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
432+
433+
memcpy(&rrc->prev_rec, irec, sizeof(struct xfs_refcount_irec));
434+
}
435+
393436
/* Scrub a refcountbt record. */
394437
STATIC int
395438
xchk_refcountbt_rec(
@@ -414,6 +457,7 @@ xchk_refcountbt_rec(
414457
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
415458
rrc->prev_domain = irec.rc_domain;
416459

460+
xchk_refcountbt_check_mergeable(bs, rrc, &irec);
417461
xchk_refcountbt_xref(bs->sc, &irec);
418462

419463
/*

fs/xfs/scrub/rmap.c

Lines changed: 124 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,21 @@ xchk_setup_ag_rmapbt(
3232

3333
/* Reverse-mapping scrubber. */
3434

35+
struct xchk_rmap {
36+
/*
37+
* The furthest-reaching of the rmapbt records that we've already
38+
* processed. This enables us to detect overlapping records for space
39+
* allocations that cannot be shared.
40+
*/
41+
struct xfs_rmap_irec overlap_rec;
42+
43+
/*
44+
* The previous rmapbt record, so that we can check for two records
45+
* that could be one.
46+
*/
47+
struct xfs_rmap_irec prev_rec;
48+
};
49+
3550
/* Cross-reference a rmap against the refcount btree. */
3651
STATIC void
3752
xchk_rmapbt_xref_refc(
@@ -139,12 +154,108 @@ xchk_rmapbt_check_unwritten_in_keyflags(
139154
}
140155
}
141156

157+
static inline bool
158+
xchk_rmapbt_is_shareable(
159+
struct xfs_scrub *sc,
160+
const struct xfs_rmap_irec *irec)
161+
{
162+
if (!xfs_has_reflink(sc->mp))
163+
return false;
164+
if (XFS_RMAP_NON_INODE_OWNER(irec->rm_owner))
165+
return false;
166+
if (irec->rm_flags & (XFS_RMAP_BMBT_BLOCK | XFS_RMAP_ATTR_FORK |
167+
XFS_RMAP_UNWRITTEN))
168+
return false;
169+
return true;
170+
}
171+
172+
/* Flag failures for records that overlap but cannot. */
173+
STATIC void
174+
xchk_rmapbt_check_overlapping(
175+
struct xchk_btree *bs,
176+
struct xchk_rmap *cr,
177+
const struct xfs_rmap_irec *irec)
178+
{
179+
xfs_agblock_t pnext, inext;
180+
181+
if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
182+
return;
183+
184+
/* No previous record? */
185+
if (cr->overlap_rec.rm_blockcount == 0)
186+
goto set_prev;
187+
188+
/* Do overlap_rec and irec overlap? */
189+
pnext = cr->overlap_rec.rm_startblock + cr->overlap_rec.rm_blockcount;
190+
if (pnext <= irec->rm_startblock)
191+
goto set_prev;
192+
193+
/* Overlap is only allowed if both records are data fork mappings. */
194+
if (!xchk_rmapbt_is_shareable(bs->sc, &cr->overlap_rec) ||
195+
!xchk_rmapbt_is_shareable(bs->sc, irec))
196+
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
197+
198+
/* Save whichever rmap record extends furthest. */
199+
inext = irec->rm_startblock + irec->rm_blockcount;
200+
if (pnext > inext)
201+
return;
202+
203+
set_prev:
204+
memcpy(&cr->overlap_rec, irec, sizeof(struct xfs_rmap_irec));
205+
}
206+
207+
/* Decide if two reverse-mapping records can be merged. */
208+
static inline bool
209+
xchk_rmap_mergeable(
210+
struct xchk_rmap *cr,
211+
const struct xfs_rmap_irec *r2)
212+
{
213+
const struct xfs_rmap_irec *r1 = &cr->prev_rec;
214+
215+
/* Ignore if prev_rec is not yet initialized. */
216+
if (cr->prev_rec.rm_blockcount == 0)
217+
return false;
218+
219+
if (r1->rm_owner != r2->rm_owner)
220+
return false;
221+
if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
222+
return false;
223+
if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
224+
XFS_RMAP_LEN_MAX)
225+
return false;
226+
if (XFS_RMAP_NON_INODE_OWNER(r2->rm_owner))
227+
return true;
228+
/* must be an inode owner below here */
229+
if (r1->rm_flags != r2->rm_flags)
230+
return false;
231+
if (r1->rm_flags & XFS_RMAP_BMBT_BLOCK)
232+
return true;
233+
return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
234+
}
235+
236+
/* Flag failures for records that could be merged. */
237+
STATIC void
238+
xchk_rmapbt_check_mergeable(
239+
struct xchk_btree *bs,
240+
struct xchk_rmap *cr,
241+
const struct xfs_rmap_irec *irec)
242+
{
243+
if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
244+
return;
245+
246+
if (xchk_rmap_mergeable(cr, irec))
247+
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
248+
249+
memcpy(&cr->prev_rec, irec, sizeof(struct xfs_rmap_irec));
250+
}
251+
142252
/* Scrub an rmapbt record. */
143253
STATIC int
144254
xchk_rmapbt_rec(
145255
struct xchk_btree *bs,
146256
const union xfs_btree_rec *rec)
147257
{
258+
struct xchk_rmap *cr = bs->private;
148259
struct xfs_rmap_irec irec;
149260

150261
if (xfs_rmap_btrec_to_irec(rec, &irec) != NULL ||
@@ -154,6 +265,8 @@ xchk_rmapbt_rec(
154265
}
155266

156267
xchk_rmapbt_check_unwritten_in_keyflags(bs);
268+
xchk_rmapbt_check_mergeable(bs, cr, &irec);
269+
xchk_rmapbt_check_overlapping(bs, cr, &irec);
157270
xchk_rmapbt_xref(bs->sc, &irec);
158271
return 0;
159272
}
@@ -163,8 +276,17 @@ int
163276
xchk_rmapbt(
164277
struct xfs_scrub *sc)
165278
{
166-
return xchk_btree(sc, sc->sa.rmap_cur, xchk_rmapbt_rec,
167-
&XFS_RMAP_OINFO_AG, NULL);
279+
struct xchk_rmap *cr;
280+
int error;
281+
282+
cr = kzalloc(sizeof(struct xchk_rmap), XCHK_GFP_FLAGS);
283+
if (!cr)
284+
return -ENOMEM;
285+
286+
error = xchk_btree(sc, sc->sa.rmap_cur, xchk_rmapbt_rec,
287+
&XFS_RMAP_OINFO_AG, cr);
288+
kfree(cr);
289+
return error;
168290
}
169291

170292
/* xref check that the extent is owned only by a given owner */

0 commit comments

Comments
 (0)