Skip to content

Commit 1ee7550

Browse files
dchinnerdgchinner
authored andcommitted
Merge tag 'rmap-btree-fix-key-handling-6.4_2023-04-11' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into guilt/xfs-for-next
xfs: fix rmap btree key flag handling [v24.5] This series fixes numerous flag handling bugs in the rmapbt key code. The most serious transgression is that key comparisons completely strip out all flag bits from rm_offset, including the ones that participate in record lookups. The second problem is that for years we've been letting the unwritten flag (which is an attribute of a specific record and not part of the record key) escape from leaf records into key records. The solution to the second problem is to filter attribute flags when creating keys from records, and the solution to the first problem is to preserve *only* the flags used for key lookups. The ATTR and BMBT flags are a part of the lookup key, and the UNWRITTEN flag is a record attribute. This has worked for years without generating user complaints because ATTR and BMBT extents cannot be shared, so key comparisons succeed solely on rm_startblock. Only file data fork extents can be shared, and those records never set any of the three flag bits, so comparisons that dig into rm_owner and rm_offset work just fine. A filesystem written with an unpatched kernel and mounted on a patched kernel will work correctly because the ATTR/BMBT flags have been conveyed into keys correctly all along, and we still ignore the UNWRITTEN flag in any key record. This was what doomed my previous attempt to correct this problem in 2019. A filesystem written with a patched kernel and mounted on an unpatched kernel will also work correctly because unpatched kernels ignore all flags. With this patchset applied, the scrub code gains the ability to detect rmap btrees with incorrectly set attr and bmbt flags in the key records. After three years of testing, I haven't encountered any problems. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Dave Chinner <david@fromorbit.com>
2 parents b764ea2 + 3838456 commit 1ee7550

4 files changed

Lines changed: 95 additions & 10 deletions

File tree

fs/xfs/libxfs/xfs_rmap_btree.c

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -156,14 +156,24 @@ xfs_rmapbt_get_maxrecs(
156156
return cur->bc_mp->m_rmap_mxr[level != 0];
157157
}
158158

159+
/*
160+
* Convert the ondisk record's offset field into the ondisk key's offset field.
161+
* Fork and bmbt are significant parts of the rmap record key, but written
162+
* status is merely a record attribute.
163+
*/
164+
static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
165+
{
166+
return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
167+
}
168+
159169
STATIC void
160170
xfs_rmapbt_init_key_from_rec(
161171
union xfs_btree_key *key,
162172
const union xfs_btree_rec *rec)
163173
{
164174
key->rmap.rm_startblock = rec->rmap.rm_startblock;
165175
key->rmap.rm_owner = rec->rmap.rm_owner;
166-
key->rmap.rm_offset = rec->rmap.rm_offset;
176+
key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
167177
}
168178

169179
/*
@@ -186,7 +196,7 @@ xfs_rmapbt_init_high_key_from_rec(
186196
key->rmap.rm_startblock = rec->rmap.rm_startblock;
187197
be32_add_cpu(&key->rmap.rm_startblock, adj);
188198
key->rmap.rm_owner = rec->rmap.rm_owner;
189-
key->rmap.rm_offset = rec->rmap.rm_offset;
199+
key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
190200
if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
191201
XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
192202
return;
@@ -219,6 +229,16 @@ xfs_rmapbt_init_ptr_from_cur(
219229
ptr->s = agf->agf_roots[cur->bc_btnum];
220230
}
221231

232+
/*
233+
* Mask the appropriate parts of the ondisk key field for a key comparison.
234+
* Fork and bmbt are significant parts of the rmap record key, but written
235+
* status is merely a record attribute.
236+
*/
237+
static inline uint64_t offset_keymask(uint64_t offset)
238+
{
239+
return offset & ~XFS_RMAP_OFF_UNWRITTEN;
240+
}
241+
222242
STATIC int64_t
223243
xfs_rmapbt_key_diff(
224244
struct xfs_btree_cur *cur,
@@ -240,8 +260,8 @@ xfs_rmapbt_key_diff(
240260
else if (y > x)
241261
return -1;
242262

243-
x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
244-
y = rec->rm_offset;
263+
x = offset_keymask(be64_to_cpu(kp->rm_offset));
264+
y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
245265
if (x > y)
246266
return 1;
247267
else if (y > x)
@@ -272,8 +292,8 @@ xfs_rmapbt_diff_two_keys(
272292
else if (y > x)
273293
return -1;
274294

275-
x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
276-
y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
295+
x = offset_keymask(be64_to_cpu(kp1->rm_offset));
296+
y = offset_keymask(be64_to_cpu(kp2->rm_offset));
277297
if (x > y)
278298
return 1;
279299
else if (y > x)
@@ -387,8 +407,8 @@ xfs_rmapbt_keys_inorder(
387407
return 1;
388408
else if (a > b)
389409
return 0;
390-
a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
391-
b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
410+
a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
411+
b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
392412
if (a <= b)
393413
return 1;
394414
return 0;
@@ -417,8 +437,8 @@ xfs_rmapbt_recs_inorder(
417437
return 1;
418438
else if (a > b)
419439
return 0;
420-
a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
421-
b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
440+
a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
441+
b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
422442
if (a <= b)
423443
return 1;
424444
return 0;

fs/xfs/scrub/btree.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,16 @@ xchk_btree_xref_set_corrupt(
119119
__return_address);
120120
}
121121

122+
void
123+
xchk_btree_set_preen(
124+
struct xfs_scrub *sc,
125+
struct xfs_btree_cur *cur,
126+
int level)
127+
{
128+
__xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_PREEN,
129+
__return_address);
130+
}
131+
122132
/*
123133
* Make sure this record is in order and doesn't stray outside of the parent
124134
* keys.

fs/xfs/scrub/btree.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ bool xchk_btree_xref_process_error(struct xfs_scrub *sc,
1919
/* Check for btree corruption. */
2020
void xchk_btree_set_corrupt(struct xfs_scrub *sc,
2121
struct xfs_btree_cur *cur, int level);
22+
void xchk_btree_set_preen(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
23+
int level);
2224

2325
/* Check for btree xref discrepancies. */
2426
void xchk_btree_xref_set_corrupt(struct xfs_scrub *sc,

fs/xfs/scrub/rmap.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,58 @@ xchk_rmapbt_xref(
8787
xchk_rmapbt_xref_refc(sc, irec);
8888
}
8989

90+
/*
91+
* Check for bogus UNWRITTEN flags in the rmapbt node block keys.
92+
*
93+
* In reverse mapping records, the file mapping extent state
94+
* (XFS_RMAP_OFF_UNWRITTEN) is a record attribute, not a key field. It is not
95+
* involved in lookups in any way. In older kernels, the functions that
96+
* convert rmapbt records to keys forgot to filter out the extent state bit,
97+
* even though the key comparison functions have filtered the flag correctly.
98+
* If we spot an rmap key with the unwritten bit set in rm_offset, we should
99+
* mark the btree as needing optimization to rebuild the btree without those
100+
* flags.
101+
*/
102+
STATIC void
103+
xchk_rmapbt_check_unwritten_in_keyflags(
104+
struct xchk_btree *bs)
105+
{
106+
struct xfs_scrub *sc = bs->sc;
107+
struct xfs_btree_cur *cur = bs->cur;
108+
struct xfs_btree_block *keyblock;
109+
union xfs_btree_key *lkey, *hkey;
110+
__be64 badflag = cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
111+
unsigned int level;
112+
113+
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_PREEN)
114+
return;
115+
116+
for (level = 1; level < cur->bc_nlevels; level++) {
117+
struct xfs_buf *bp;
118+
unsigned int ptr;
119+
120+
/* Only check the first time we've seen this node block. */
121+
if (cur->bc_levels[level].ptr > 1)
122+
continue;
123+
124+
keyblock = xfs_btree_get_block(cur, level, &bp);
125+
for (ptr = 1; ptr <= be16_to_cpu(keyblock->bb_numrecs); ptr++) {
126+
lkey = xfs_btree_key_addr(cur, ptr, keyblock);
127+
128+
if (lkey->rmap.rm_offset & badflag) {
129+
xchk_btree_set_preen(sc, cur, level);
130+
break;
131+
}
132+
133+
hkey = xfs_btree_high_key_addr(cur, ptr, keyblock);
134+
if (hkey->rmap.rm_offset & badflag) {
135+
xchk_btree_set_preen(sc, cur, level);
136+
break;
137+
}
138+
}
139+
}
140+
}
141+
90142
/* Scrub an rmapbt record. */
91143
STATIC int
92144
xchk_rmapbt_rec(
@@ -101,6 +153,7 @@ xchk_rmapbt_rec(
101153
return 0;
102154
}
103155

156+
xchk_rmapbt_check_unwritten_in_keyflags(bs);
104157
xchk_rmapbt_xref(bs->sc, &irec);
105158
return 0;
106159
}

0 commit comments

Comments
 (0)