Skip to content

Commit 08c987d

Browse files
author
Darrick J. Wong
committed
xfs: fix rm_offset flag handling in rmap keys
Keys for extent interval records in the reverse mapping btree are supposed to be computed as follows: (physical block, owner, fork, is_btree, offset) This provides users the ability to look up a reverse mapping from a file block mapping record -- start with the physical block; then if there are multiple records for the same block, move on to the owner; then the inode fork type; and so on to the file offset. Unfortunately, the code that creates rmap lookup keys from rmap records forgot to mask off the record attribute flags, leading to ondisk keys that look like this: (physical block, owner, fork, is_btree, unwritten state, offset) Fortunately, this has all worked ok for the past six years because the key comparison functions incorrectly ignore the fork/bmbt/unwritten information that's encoded in the on-disk offset. This means that lookup comparisons are only done with: (physical block, owner, offset) Queries can (theoretically) return incorrect results because of this omission. On consistent filesystems this isn't an issue because xattr and bmbt blocks cannot be shared and hence the comparisons succeed purely on the contents of the rm_startblock field. For the one case where we support sharing (written data fork blocks) all flag bits are zero, so the omission in the comparison has no ill effects. Unfortunately, this bug prevents scrub from detecting incorrect fork and bmbt flag bits in the rmap btree, so we really do need to fix the compare code. Old filesystems with the unwritten bit erroneously set in the rmap key struct will work fine on new kernels since we still ignore the unwritten bit. New filesystems on older kernels will work fine since the old kernels never paid attention to the unwritten bit. A previous version of this patch forgot to keep the (un)written state flag masked during the comparison and caused a major regression in 5.9.x since unwritten extent conversion can update an rmap record without requiring key updates. Note that blocks cannot go directly from data fork to attr fork without being deallocated and reallocated, nor can they be added to or removed from a bmbt without a free/alloc cycle, so this should not cause any regressions. Found by fuzzing keys[1].attrfork = ones on xfs/371. Fixes: 4b8ed67 ("xfs: add rmap btree operations") Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
1 parent de1a9ce commit 08c987d

1 file changed

Lines changed: 30 additions & 10 deletions

File tree

fs/xfs/libxfs/xfs_rmap_btree.c

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -156,14 +156,24 @@ xfs_rmapbt_get_maxrecs(
156156
return cur->bc_mp->m_rmap_mxr[level != 0];
157157
}
158158

159+
/*
160+
* Convert the ondisk record's offset field into the ondisk key's offset field.
161+
* Fork and bmbt are significant parts of the rmap record key, but written
162+
* status is merely a record attribute.
163+
*/
164+
static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
165+
{
166+
return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
167+
}
168+
159169
STATIC void
160170
xfs_rmapbt_init_key_from_rec(
161171
union xfs_btree_key *key,
162172
const union xfs_btree_rec *rec)
163173
{
164174
key->rmap.rm_startblock = rec->rmap.rm_startblock;
165175
key->rmap.rm_owner = rec->rmap.rm_owner;
166-
key->rmap.rm_offset = rec->rmap.rm_offset;
176+
key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
167177
}
168178

169179
/*
@@ -186,7 +196,7 @@ xfs_rmapbt_init_high_key_from_rec(
186196
key->rmap.rm_startblock = rec->rmap.rm_startblock;
187197
be32_add_cpu(&key->rmap.rm_startblock, adj);
188198
key->rmap.rm_owner = rec->rmap.rm_owner;
189-
key->rmap.rm_offset = rec->rmap.rm_offset;
199+
key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
190200
if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
191201
XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
192202
return;
@@ -219,6 +229,16 @@ xfs_rmapbt_init_ptr_from_cur(
219229
ptr->s = agf->agf_roots[cur->bc_btnum];
220230
}
221231

232+
/*
233+
* Mask the appropriate parts of the ondisk key field for a key comparison.
234+
* Fork and bmbt are significant parts of the rmap record key, but written
235+
* status is merely a record attribute.
236+
*/
237+
static inline uint64_t offset_keymask(uint64_t offset)
238+
{
239+
return offset & ~XFS_RMAP_OFF_UNWRITTEN;
240+
}
241+
222242
STATIC int64_t
223243
xfs_rmapbt_key_diff(
224244
struct xfs_btree_cur *cur,
@@ -240,8 +260,8 @@ xfs_rmapbt_key_diff(
240260
else if (y > x)
241261
return -1;
242262

243-
x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
244-
y = rec->rm_offset;
263+
x = offset_keymask(be64_to_cpu(kp->rm_offset));
264+
y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
245265
if (x > y)
246266
return 1;
247267
else if (y > x)
@@ -272,8 +292,8 @@ xfs_rmapbt_diff_two_keys(
272292
else if (y > x)
273293
return -1;
274294

275-
x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
276-
y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
295+
x = offset_keymask(be64_to_cpu(kp1->rm_offset));
296+
y = offset_keymask(be64_to_cpu(kp2->rm_offset));
277297
if (x > y)
278298
return 1;
279299
else if (y > x)
@@ -387,8 +407,8 @@ xfs_rmapbt_keys_inorder(
387407
return 1;
388408
else if (a > b)
389409
return 0;
390-
a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
391-
b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
410+
a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
411+
b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
392412
if (a <= b)
393413
return 1;
394414
return 0;
@@ -417,8 +437,8 @@ xfs_rmapbt_recs_inorder(
417437
return 1;
418438
else if (a > b)
419439
return 0;
420-
a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
421-
b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
440+
a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
441+
b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
422442
if (a <= b)
423443
return 1;
424444
return 0;

0 commit comments

Comments
 (0)