Skip to content

Commit 2744d7a

Browse files
committed
Merge tag 'attr-leaf-freemap-fixes-7.0_2026-01-25' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-7.0-merge
xfs: fix problems in the attr leaf freemap code [1/3] Running generic/753 for hours revealed data corruption problems in the attr leaf block space management code. Under certain circumstances, freemap entries are left with zero size but a nonzero offset. If that offset happens to be the same offset as the end of the entries array during an attr set operation, the leaf entry table expansion will push the freemap record offset upwards without checking for overlap with any other freemap entries. If there happened to be a second freemap entry overlapping with the newly allocated leaf entry space, then the next attr set operation might find that space and overwrite the leaf entry, thereby corrupting the leaf block. Fix this by zeroing the freemap offset any time we set the size to zero. If a subsequent attr set operation finds no space in the freemap, it will compact the block and regenerate the freemaps. With a bit of luck, this should all go splendidly. Signed-off-by: "Darrick J. Wong" <djwong@kernel.org> Signed-off-by: Carlos Maiolino <cem@kernel.org>
2 parents 04a6566 + bd3138e commit 2744d7a

3 files changed

Lines changed: 155 additions & 63 deletions

File tree

fs/xfs/libxfs/xfs_attr_leaf.c

Lines changed: 123 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,59 @@ STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args,
7575
int move_count);
7676
STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
7777

78+
/* Compute the byte offset of the end of the leaf entry array. */
79+
static inline int
80+
xfs_attr_leaf_entries_end(
81+
unsigned int hdrcount,
82+
const struct xfs_attr_leafblock *leaf)
83+
{
84+
return hdrcount * sizeof(struct xfs_attr_leaf_entry) +
85+
xfs_attr3_leaf_hdr_size(leaf);
86+
}
87+
88+
static inline bool
89+
ichdr_freemaps_overlap(
90+
const struct xfs_attr3_icleaf_hdr *ichdr,
91+
unsigned int x,
92+
unsigned int y)
93+
{
94+
const unsigned int xend =
95+
ichdr->freemap[x].base + ichdr->freemap[x].size;
96+
const unsigned int yend =
97+
ichdr->freemap[y].base + ichdr->freemap[y].size;
98+
99+
/* empty slots do not overlap */
100+
if (!ichdr->freemap[x].size || !ichdr->freemap[y].size)
101+
return false;
102+
103+
return ichdr->freemap[x].base < yend && xend > ichdr->freemap[y].base;
104+
}
105+
106+
static inline xfs_failaddr_t
107+
xfs_attr_leaf_ichdr_freemaps_verify(
108+
const struct xfs_attr3_icleaf_hdr *ichdr,
109+
const struct xfs_attr_leafblock *leaf)
110+
{
111+
unsigned int entries_end =
112+
xfs_attr_leaf_entries_end(ichdr->count, leaf);
113+
int i;
114+
115+
if (ichdr_freemaps_overlap(ichdr, 0, 1))
116+
return __this_address;
117+
if (ichdr_freemaps_overlap(ichdr, 0, 2))
118+
return __this_address;
119+
if (ichdr_freemaps_overlap(ichdr, 1, 2))
120+
return __this_address;
121+
122+
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
123+
if (ichdr->freemap[i].size > 0 &&
124+
ichdr->freemap[i].base < entries_end)
125+
return __this_address;
126+
}
127+
128+
return NULL;
129+
}
130+
78131
/*
79132
* attr3 block 'firstused' conversion helpers.
80133
*
@@ -218,6 +271,8 @@ xfs_attr3_leaf_hdr_to_disk(
218271
hdr3->freemap[i].base = cpu_to_be16(from->freemap[i].base);
219272
hdr3->freemap[i].size = cpu_to_be16(from->freemap[i].size);
220273
}
274+
275+
ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL);
221276
return;
222277
}
223278
to->hdr.info.forw = cpu_to_be32(from->forw);
@@ -233,6 +288,8 @@ xfs_attr3_leaf_hdr_to_disk(
233288
to->hdr.freemap[i].base = cpu_to_be16(from->freemap[i].base);
234289
to->hdr.freemap[i].size = cpu_to_be16(from->freemap[i].size);
235290
}
291+
292+
ASSERT(xfs_attr_leaf_ichdr_freemaps_verify(from, to) == NULL);
236293
}
237294

238295
static xfs_failaddr_t
@@ -385,6 +442,10 @@ xfs_attr3_leaf_verify(
385442
return __this_address;
386443
}
387444

445+
fa = xfs_attr_leaf_ichdr_freemaps_verify(&ichdr, leaf);
446+
if (fa)
447+
return fa;
448+
388449
return NULL;
389450
}
390451

@@ -1409,8 +1470,7 @@ xfs_attr3_leaf_add(
14091470
* Search through freemap for first-fit on new name length.
14101471
* (may need to figure in size of entry struct too)
14111472
*/
1412-
tablesize = (ichdr.count + 1) * sizeof(xfs_attr_leaf_entry_t)
1413-
+ xfs_attr3_leaf_hdr_size(leaf);
1473+
tablesize = xfs_attr_leaf_entries_end(ichdr.count + 1, leaf);
14141474
for (sum = 0, i = XFS_ATTR_LEAF_MAPSIZE - 1; i >= 0; i--) {
14151475
if (tablesize > ichdr.firstused) {
14161476
sum += ichdr.freemap[i].size;
@@ -1476,6 +1536,7 @@ xfs_attr3_leaf_add_work(
14761536
struct xfs_attr_leaf_name_local *name_loc;
14771537
struct xfs_attr_leaf_name_remote *name_rmt;
14781538
struct xfs_mount *mp;
1539+
int old_end, new_end;
14791540
int tmp;
14801541
int i;
14811542

@@ -1568,17 +1629,48 @@ xfs_attr3_leaf_add_work(
15681629
if (be16_to_cpu(entry->nameidx) < ichdr->firstused)
15691630
ichdr->firstused = be16_to_cpu(entry->nameidx);
15701631

1571-
ASSERT(ichdr->firstused >= ichdr->count * sizeof(xfs_attr_leaf_entry_t)
1572-
+ xfs_attr3_leaf_hdr_size(leaf));
1573-
tmp = (ichdr->count - 1) * sizeof(xfs_attr_leaf_entry_t)
1574-
+ xfs_attr3_leaf_hdr_size(leaf);
1632+
new_end = xfs_attr_leaf_entries_end(ichdr->count, leaf);
1633+
old_end = new_end - sizeof(struct xfs_attr_leaf_entry);
1634+
1635+
ASSERT(ichdr->firstused >= new_end);
15751636

15761637
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
1577-
if (ichdr->freemap[i].base == tmp) {
1578-
ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t);
1638+
int diff = 0;
1639+
1640+
if (ichdr->freemap[i].base == old_end) {
1641+
/*
1642+
* This freemap entry starts at the old end of the
1643+
* leaf entry array, so we need to adjust its base
1644+
* upward to accomodate the larger array.
1645+
*/
1646+
diff = sizeof(struct xfs_attr_leaf_entry);
1647+
} else if (ichdr->freemap[i].size > 0 &&
1648+
ichdr->freemap[i].base < new_end) {
1649+
/*
1650+
* This freemap entry starts in the space claimed by
1651+
* the new leaf entry. Adjust its base upward to
1652+
* reflect that.
1653+
*/
1654+
diff = new_end - ichdr->freemap[i].base;
1655+
}
1656+
1657+
if (diff) {
1658+
ichdr->freemap[i].base += diff;
15791659
ichdr->freemap[i].size -=
1580-
min_t(uint16_t, ichdr->freemap[i].size,
1581-
sizeof(xfs_attr_leaf_entry_t));
1660+
min_t(uint16_t, ichdr->freemap[i].size, diff);
1661+
}
1662+
1663+
/*
1664+
* Don't leave zero-length freemaps with nonzero base lying
1665+
* around, because we don't want the code in _remove that
1666+
* matches on base address to get confused and create
1667+
* overlapping freemaps. If we end up with no freemap entries
1668+
* then the next _add will compact the leaf block and
1669+
* regenerate the freemaps.
1670+
*/
1671+
if (ichdr->freemap[i].size == 0 && ichdr->freemap[i].base > 0) {
1672+
ichdr->freemap[i].base = 0;
1673+
ichdr->holes = 1;
15821674
}
15831675
}
15841676
ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
@@ -1623,6 +1715,10 @@ xfs_attr3_leaf_compact(
16231715
ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src);
16241716
ichdr_dst->freemap[0].size = ichdr_dst->firstused -
16251717
ichdr_dst->freemap[0].base;
1718+
ichdr_dst->freemap[1].base = 0;
1719+
ichdr_dst->freemap[2].base = 0;
1720+
ichdr_dst->freemap[1].size = 0;
1721+
ichdr_dst->freemap[2].size = 0;
16261722

16271723
/* write the header back to initialise the underlying buffer */
16281724
xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst);
@@ -1774,8 +1870,8 @@ xfs_attr3_leaf_rebalance(
17741870
/*
17751871
* leaf2 is the destination, compact it if it looks tight.
17761872
*/
1777-
max = ichdr2.firstused - xfs_attr3_leaf_hdr_size(leaf1);
1778-
max -= ichdr2.count * sizeof(xfs_attr_leaf_entry_t);
1873+
max = ichdr2.firstused -
1874+
xfs_attr_leaf_entries_end(ichdr2.count, leaf1);
17791875
if (space > max)
17801876
xfs_attr3_leaf_compact(args, &ichdr2, blk2->bp);
17811877

@@ -1803,8 +1899,8 @@ xfs_attr3_leaf_rebalance(
18031899
/*
18041900
* leaf1 is the destination, compact it if it looks tight.
18051901
*/
1806-
max = ichdr1.firstused - xfs_attr3_leaf_hdr_size(leaf1);
1807-
max -= ichdr1.count * sizeof(xfs_attr_leaf_entry_t);
1902+
max = ichdr1.firstused -
1903+
xfs_attr_leaf_entries_end(ichdr1.count, leaf1);
18081904
if (space > max)
18091905
xfs_attr3_leaf_compact(args, &ichdr1, blk1->bp);
18101906

@@ -2010,9 +2106,7 @@ xfs_attr3_leaf_toosmall(
20102106
blk = &state->path.blk[ state->path.active-1 ];
20112107
leaf = blk->bp->b_addr;
20122108
xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf);
2013-
bytes = xfs_attr3_leaf_hdr_size(leaf) +
2014-
ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
2015-
ichdr.usedbytes;
2109+
bytes = xfs_attr_leaf_entries_end(ichdr.count, leaf) + ichdr.usedbytes;
20162110
if (bytes > (state->args->geo->blksize >> 1)) {
20172111
*action = 0; /* blk over 50%, don't try to join */
20182112
return 0;
@@ -2070,9 +2164,8 @@ xfs_attr3_leaf_toosmall(
20702164
bytes = state->args->geo->blksize -
20712165
(state->args->geo->blksize >> 2) -
20722166
ichdr.usedbytes - ichdr2.usedbytes -
2073-
((ichdr.count + ichdr2.count) *
2074-
sizeof(xfs_attr_leaf_entry_t)) -
2075-
xfs_attr3_leaf_hdr_size(leaf);
2167+
xfs_attr_leaf_entries_end(ichdr.count + ichdr2.count,
2168+
leaf);
20762169

20772170
xfs_trans_brelse(state->args->trans, bp);
20782171
if (bytes >= 0)
@@ -2134,8 +2227,7 @@ xfs_attr3_leaf_remove(
21342227

21352228
ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8);
21362229
ASSERT(args->index >= 0 && args->index < ichdr.count);
2137-
ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) +
2138-
xfs_attr3_leaf_hdr_size(leaf));
2230+
ASSERT(ichdr.firstused >= xfs_attr_leaf_entries_end(ichdr.count, leaf));
21392231

21402232
entry = &xfs_attr3_leaf_entryp(leaf)[args->index];
21412233

@@ -2148,8 +2240,7 @@ xfs_attr3_leaf_remove(
21482240
* find smallest free region in case we need to replace it,
21492241
* adjust any map that borders the entry table,
21502242
*/
2151-
tablesize = ichdr.count * sizeof(xfs_attr_leaf_entry_t)
2152-
+ xfs_attr3_leaf_hdr_size(leaf);
2243+
tablesize = xfs_attr_leaf_entries_end(ichdr.count, leaf);
21532244
tmp = ichdr.freemap[0].size;
21542245
before = after = -1;
21552246
smallest = XFS_ATTR_LEAF_MAPSIZE - 1;
@@ -2256,8 +2347,7 @@ xfs_attr3_leaf_remove(
22562347
* Check if leaf is less than 50% full, caller may want to
22572348
* "join" the leaf with a sibling if so.
22582349
*/
2259-
tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) +
2260-
ichdr.count * sizeof(xfs_attr_leaf_entry_t);
2350+
tmp = ichdr.usedbytes + xfs_attr_leaf_entries_end(ichdr.count, leaf);
22612351

22622352
return tmp < args->geo->magicpct; /* leaf is < 37% full */
22632353
}
@@ -2580,11 +2670,11 @@ xfs_attr3_leaf_moveents(
25802670
ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC);
25812671
ASSERT(ichdr_s->magic == ichdr_d->magic);
25822672
ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8);
2583-
ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s))
2584-
+ xfs_attr3_leaf_hdr_size(leaf_s));
2673+
ASSERT(ichdr_s->firstused >=
2674+
xfs_attr_leaf_entries_end(ichdr_s->count, leaf_s));
25852675
ASSERT(ichdr_d->count < args->geo->blksize / 8);
2586-
ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d))
2587-
+ xfs_attr3_leaf_hdr_size(leaf_d));
2676+
ASSERT(ichdr_d->firstused >=
2677+
xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d));
25882678

25892679
ASSERT(start_s < ichdr_s->count);
25902680
ASSERT(start_d <= ichdr_d->count);
@@ -2644,8 +2734,7 @@ xfs_attr3_leaf_moveents(
26442734
ichdr_d->usedbytes += tmp;
26452735
ichdr_s->count -= 1;
26462736
ichdr_d->count += 1;
2647-
tmp = ichdr_d->count * sizeof(xfs_attr_leaf_entry_t)
2648-
+ xfs_attr3_leaf_hdr_size(leaf_d);
2737+
tmp = xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d);
26492738
ASSERT(ichdr_d->firstused >= tmp);
26502739
#ifdef GROT
26512740
}
@@ -2681,8 +2770,8 @@ xfs_attr3_leaf_moveents(
26812770
/*
26822771
* Fill in the freemap information
26832772
*/
2684-
ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_d);
2685-
ichdr_d->freemap[0].base += ichdr_d->count * sizeof(xfs_attr_leaf_entry_t);
2773+
ichdr_d->freemap[0].base =
2774+
xfs_attr_leaf_entries_end(ichdr_d->count, leaf_d);
26862775
ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base;
26872776
ichdr_d->freemap[1].base = 0;
26882777
ichdr_d->freemap[2].base = 0;

fs/xfs/libxfs/xfs_da_format.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ struct xfs_attr3_leafblock {
746746
#define XFS_ATTR_LEAF_NAME_ALIGN ((uint)sizeof(xfs_dablk_t))
747747

748748
static inline int
749-
xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
749+
xfs_attr3_leaf_hdr_size(const struct xfs_attr_leafblock *leafp)
750750
{
751751
if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
752752
return sizeof(struct xfs_attr3_leaf_hdr);

fs/xfs/scrub/attr.c

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -287,32 +287,6 @@ xchk_xattr_set_map(
287287
return ret;
288288
}
289289

290-
/*
291-
* Check the leaf freemap from the usage bitmap. Returns false if the
292-
* attr freemap has problems or points to used space.
293-
*/
294-
STATIC bool
295-
xchk_xattr_check_freemap(
296-
struct xfs_scrub *sc,
297-
struct xfs_attr3_icleaf_hdr *leafhdr)
298-
{
299-
struct xchk_xattr_buf *ab = sc->buf;
300-
unsigned int mapsize = sc->mp->m_attr_geo->blksize;
301-
int i;
302-
303-
/* Construct bitmap of freemap contents. */
304-
bitmap_zero(ab->freemap, mapsize);
305-
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
306-
if (!xchk_xattr_set_map(sc, ab->freemap,
307-
leafhdr->freemap[i].base,
308-
leafhdr->freemap[i].size))
309-
return false;
310-
}
311-
312-
/* Look for bits that are set in freemap and are marked in use. */
313-
return !bitmap_intersects(ab->freemap, ab->usedmap, mapsize);
314-
}
315-
316290
/*
317291
* Check this leaf entry's relations to everything else.
318292
* Returns the number of bytes used for the name/value data.
@@ -364,7 +338,10 @@ xchk_xattr_entry(
364338
rentry = xfs_attr3_leaf_name_remote(leaf, idx);
365339
namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
366340
name_end = (char *)rentry + namesize;
367-
if (rentry->namelen == 0 || rentry->valueblk == 0)
341+
if (rentry->namelen == 0)
342+
xchk_da_set_corrupt(ds, level);
343+
if (rentry->valueblk == 0 &&
344+
!(ent->flags & XFS_ATTR_INCOMPLETE))
368345
xchk_da_set_corrupt(ds, level);
369346
}
370347
if (name_end > buf_end)
@@ -403,6 +380,7 @@ xchk_xattr_block(
403380

404381
*last_checked = blk->blkno;
405382
bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize);
383+
bitmap_zero(ab->freemap, mp->m_attr_geo->blksize);
406384

407385
/* Check all the padding. */
408386
if (xfs_has_crc(ds->sc->mp)) {
@@ -449,6 +427,9 @@ xchk_xattr_block(
449427
if ((char *)&entries[leafhdr.count] > (char *)leaf + leafhdr.firstused)
450428
xchk_da_set_corrupt(ds, level);
451429

430+
if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
431+
goto out;
432+
452433
buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
453434
for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
454435
/* Mark the leaf entry itself. */
@@ -467,7 +448,29 @@ xchk_xattr_block(
467448
goto out;
468449
}
469450

470-
if (!xchk_xattr_check_freemap(ds->sc, &leafhdr))
451+
/* Construct bitmap of freemap contents. */
452+
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
453+
if (!xchk_xattr_set_map(ds->sc, ab->freemap,
454+
leafhdr.freemap[i].base,
455+
leafhdr.freemap[i].size))
456+
xchk_da_set_corrupt(ds, level);
457+
458+
/*
459+
* freemap entries with zero length and nonzero base can cause
460+
* problems with older kernels, so we mark these for preening
461+
* even though there's no inconsistency.
462+
*/
463+
if (leafhdr.freemap[i].size == 0 &&
464+
leafhdr.freemap[i].base > 0)
465+
xchk_da_set_preen(ds, level);
466+
467+
if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
468+
goto out;
469+
}
470+
471+
/* Look for bits that are set in freemap and are marked in use. */
472+
if (bitmap_intersects(ab->freemap, ab->usedmap,
473+
mp->m_attr_geo->blksize))
471474
xchk_da_set_corrupt(ds, level);
472475

473476
if (leafhdr.usedbytes != usedbytes)

0 commit comments

Comments
 (0)