Skip to content

Commit 4787fc8

Browse files
author
Darrick J. Wong
committed
xfs: create a shadow rmap btree during rmap repair
Create an in-memory btree of rmap records instead of an array. This enables us to do live record collection instead of freezing the fs. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
1 parent 32080a9 commit 4787fc8

9 files changed

Lines changed: 377 additions & 84 deletions

File tree

fs/xfs/libxfs/xfs_rmap.c

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,16 @@ xfs_rmap_check_irec(
269269
return NULL;
270270
}
271271

272+
static inline xfs_failaddr_t
273+
xfs_rmap_check_btrec(
274+
struct xfs_btree_cur *cur,
275+
const struct xfs_rmap_irec *irec)
276+
{
277+
if (xfs_btree_is_mem_rmap(cur->bc_ops))
278+
return xfs_rmap_check_irec(cur->bc_mem.pag, irec);
279+
return xfs_rmap_check_irec(cur->bc_ag.pag, irec);
280+
}
281+
272282
static inline int
273283
xfs_rmap_complain_bad_rec(
274284
struct xfs_btree_cur *cur,
@@ -277,9 +287,13 @@ xfs_rmap_complain_bad_rec(
277287
{
278288
struct xfs_mount *mp = cur->bc_mp;
279289

280-
xfs_warn(mp,
281-
"Reverse Mapping BTree record corruption in AG %d detected at %pS!",
282-
cur->bc_ag.pag->pag_agno, fa);
290+
if (xfs_btree_is_mem_rmap(cur->bc_ops))
291+
xfs_warn(mp,
292+
"In-Memory Reverse Mapping BTree record corruption detected at %pS!", fa);
293+
else
294+
xfs_warn(mp,
295+
"Reverse Mapping BTree record corruption in AG %d detected at %pS!",
296+
cur->bc_ag.pag->pag_agno, fa);
283297
xfs_warn(mp,
284298
"Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
285299
irec->rm_owner, irec->rm_flags, irec->rm_startblock,
@@ -307,7 +321,7 @@ xfs_rmap_get_rec(
307321

308322
fa = xfs_rmap_btrec_to_irec(rec, irec);
309323
if (!fa)
310-
fa = xfs_rmap_check_irec(cur->bc_ag.pag, irec);
324+
fa = xfs_rmap_check_btrec(cur, irec);
311325
if (fa)
312326
return xfs_rmap_complain_bad_rec(cur, fa, irec);
313327

@@ -2404,15 +2418,12 @@ xfs_rmap_map_raw(
24042418
{
24052419
struct xfs_owner_info oinfo;
24062420

2407-
oinfo.oi_owner = rmap->rm_owner;
2408-
oinfo.oi_offset = rmap->rm_offset;
2409-
oinfo.oi_flags = 0;
2410-
if (rmap->rm_flags & XFS_RMAP_ATTR_FORK)
2411-
oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
2412-
if (rmap->rm_flags & XFS_RMAP_BMBT_BLOCK)
2413-
oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
2421+
xfs_owner_info_pack(&oinfo, rmap->rm_owner, rmap->rm_offset,
2422+
rmap->rm_flags);
24142423

2415-
if (rmap->rm_flags || XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
2424+
if ((rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
2425+
XFS_RMAP_UNWRITTEN)) ||
2426+
XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
24162427
return xfs_rmap_map(cur, rmap->rm_startblock,
24172428
rmap->rm_blockcount,
24182429
rmap->rm_flags & XFS_RMAP_UNWRITTEN,
@@ -2442,7 +2453,7 @@ xfs_rmap_query_range_helper(
24422453

24432454
fa = xfs_rmap_btrec_to_irec(rec, &irec);
24442455
if (!fa)
2445-
fa = xfs_rmap_check_irec(cur->bc_ag.pag, &irec);
2456+
fa = xfs_rmap_check_btrec(cur, &irec);
24462457
if (fa)
24472458
return xfs_rmap_complain_bad_rec(cur, fa, &irec);
24482459

fs/xfs/libxfs/xfs_rmap_btree.c

Lines changed: 149 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include "xfs_extent_busy.h"
2323
#include "xfs_ag.h"
2424
#include "xfs_ag_resv.h"
25+
#include "xfs_buf_mem.h"
26+
#include "xfs_btree_mem.h"
2527

2628
static struct kmem_cache *xfs_rmapbt_cur_cache;
2729

@@ -541,6 +543,151 @@ xfs_rmapbt_init_cursor(
541543
return cur;
542544
}
543545

546+
#ifdef CONFIG_XFS_BTREE_IN_MEM
547+
static inline unsigned int
548+
xfs_rmapbt_mem_block_maxrecs(
549+
unsigned int blocklen,
550+
bool leaf)
551+
{
552+
if (leaf)
553+
return blocklen / sizeof(struct xfs_rmap_rec);
554+
return blocklen /
555+
(2 * sizeof(struct xfs_rmap_key) + sizeof(__be64));
556+
}
557+
558+
/*
559+
* Validate an in-memory rmap btree block. Callers are allowed to generate an
560+
* in-memory btree even if the ondisk feature is not enabled.
561+
*/
562+
static xfs_failaddr_t
563+
xfs_rmapbt_mem_verify(
564+
struct xfs_buf *bp)
565+
{
566+
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
567+
xfs_failaddr_t fa;
568+
unsigned int level;
569+
unsigned int maxrecs;
570+
571+
if (!xfs_verify_magic(bp, block->bb_magic))
572+
return __this_address;
573+
574+
fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
575+
if (fa)
576+
return fa;
577+
578+
level = be16_to_cpu(block->bb_level);
579+
if (level >= xfs_rmapbt_maxlevels_ondisk())
580+
return __this_address;
581+
582+
maxrecs = xfs_rmapbt_mem_block_maxrecs(
583+
XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN, level == 0);
584+
return xfs_btree_memblock_verify(bp, maxrecs);
585+
}
586+
587+
static void
588+
xfs_rmapbt_mem_rw_verify(
589+
struct xfs_buf *bp)
590+
{
591+
xfs_failaddr_t fa = xfs_rmapbt_mem_verify(bp);
592+
593+
if (fa)
594+
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
595+
}
596+
597+
/* skip crc checks on in-memory btrees to save time */
598+
static const struct xfs_buf_ops xfs_rmapbt_mem_buf_ops = {
599+
.name = "xfs_rmapbt_mem",
600+
.magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
601+
.verify_read = xfs_rmapbt_mem_rw_verify,
602+
.verify_write = xfs_rmapbt_mem_rw_verify,
603+
.verify_struct = xfs_rmapbt_mem_verify,
604+
};
605+
606+
const struct xfs_btree_ops xfs_rmapbt_mem_ops = {
607+
.name = "mem_rmap",
608+
.type = XFS_BTREE_TYPE_MEM,
609+
.geom_flags = XFS_BTGEO_OVERLAPPING,
610+
611+
.rec_len = sizeof(struct xfs_rmap_rec),
612+
/* Overlapping btree; 2 keys per pointer. */
613+
.key_len = 2 * sizeof(struct xfs_rmap_key),
614+
.ptr_len = XFS_BTREE_LONG_PTR_LEN,
615+
616+
.lru_refs = XFS_RMAP_BTREE_REF,
617+
.statoff = XFS_STATS_CALC_INDEX(xs_rmap_mem_2),
618+
619+
.dup_cursor = xfbtree_dup_cursor,
620+
.set_root = xfbtree_set_root,
621+
.alloc_block = xfbtree_alloc_block,
622+
.free_block = xfbtree_free_block,
623+
.get_minrecs = xfbtree_get_minrecs,
624+
.get_maxrecs = xfbtree_get_maxrecs,
625+
.init_key_from_rec = xfs_rmapbt_init_key_from_rec,
626+
.init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
627+
.init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
628+
.init_ptr_from_cur = xfbtree_init_ptr_from_cur,
629+
.key_diff = xfs_rmapbt_key_diff,
630+
.buf_ops = &xfs_rmapbt_mem_buf_ops,
631+
.diff_two_keys = xfs_rmapbt_diff_two_keys,
632+
.keys_inorder = xfs_rmapbt_keys_inorder,
633+
.recs_inorder = xfs_rmapbt_recs_inorder,
634+
.keys_contiguous = xfs_rmapbt_keys_contiguous,
635+
};
636+
637+
/* Create a cursor for an in-memory btree. */
638+
struct xfs_btree_cur *
639+
xfs_rmapbt_mem_cursor(
640+
struct xfs_perag *pag,
641+
struct xfs_trans *tp,
642+
struct xfbtree *xfbt)
643+
{
644+
struct xfs_btree_cur *cur;
645+
struct xfs_mount *mp = pag->pag_mount;
646+
647+
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_mem_ops,
648+
xfs_rmapbt_maxlevels_ondisk(), xfs_rmapbt_cur_cache);
649+
cur->bc_mem.xfbtree = xfbt;
650+
cur->bc_nlevels = xfbt->nlevels;
651+
652+
cur->bc_mem.pag = xfs_perag_hold(pag);
653+
return cur;
654+
}
655+
656+
/* Create an in-memory rmap btree. */
657+
int
658+
xfs_rmapbt_mem_init(
659+
struct xfs_mount *mp,
660+
struct xfbtree *xfbt,
661+
struct xfs_buftarg *btp,
662+
xfs_agnumber_t agno)
663+
{
664+
xfbt->owner = agno;
665+
return xfbtree_init(mp, xfbt, btp, &xfs_rmapbt_mem_ops);
666+
}
667+
668+
/* Compute the max possible height for reverse mapping btrees in memory. */
669+
static unsigned int
670+
xfs_rmapbt_mem_maxlevels(void)
671+
{
672+
unsigned int minrecs[2];
673+
unsigned int blocklen;
674+
675+
blocklen = XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
676+
677+
minrecs[0] = xfs_rmapbt_mem_block_maxrecs(blocklen, true) / 2;
678+
minrecs[1] = xfs_rmapbt_mem_block_maxrecs(blocklen, false) / 2;
679+
680+
/*
681+
* How tall can an in-memory rmap btree become if we filled the entire
682+
* AG with rmap records?
683+
*/
684+
return xfs_btree_compute_maxlevels(minrecs,
685+
XFS_MAX_AG_BYTES / sizeof(struct xfs_rmap_rec));
686+
}
687+
#else
688+
# define xfs_rmapbt_mem_maxlevels() (0)
689+
#endif /* CONFIG_XFS_BTREE_IN_MEM */
690+
544691
/*
545692
* Install a new reverse mapping btree root. Caller is responsible for
546693
* invalidating and freeing the old btree blocks.
@@ -611,7 +758,8 @@ xfs_rmapbt_maxlevels_ondisk(void)
611758
* like if it consumes almost all the blocks in the AG due to maximal
612759
* sharing factor.
613760
*/
614-
return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
761+
return max(xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS),
762+
xfs_rmapbt_mem_maxlevels());
615763
}
616764

617765
/* Compute the maximum height of an rmap btree. */

fs/xfs/libxfs/xfs_rmap_btree.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ struct xfs_buf;
1010
struct xfs_btree_cur;
1111
struct xfs_mount;
1212
struct xbtree_afakeroot;
13+
struct xfbtree;
1314

1415
/* rmaps only exist on crc enabled filesystems */
1516
#define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN
@@ -62,4 +63,9 @@ unsigned int xfs_rmapbt_maxlevels_ondisk(void);
6263
int __init xfs_rmapbt_init_cur_cache(void);
6364
void xfs_rmapbt_destroy_cur_cache(void);
6465

66+
struct xfs_btree_cur *xfs_rmapbt_mem_cursor(struct xfs_perag *pag,
67+
struct xfs_trans *tp, struct xfbtree *xfbtree);
68+
int xfs_rmapbt_mem_init(struct xfs_mount *mp, struct xfbtree *xfbtree,
69+
struct xfs_buftarg *btp, xfs_agnumber_t agno);
70+
6571
#endif /* __XFS_RMAP_BTREE_H__ */

fs/xfs/libxfs/xfs_shared.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ extern const struct xfs_btree_ops xfs_finobt_ops;
5151
extern const struct xfs_btree_ops xfs_bmbt_ops;
5252
extern const struct xfs_btree_ops xfs_refcountbt_ops;
5353
extern const struct xfs_btree_ops xfs_rmapbt_ops;
54+
extern const struct xfs_btree_ops xfs_rmapbt_mem_ops;
5455

5556
static inline bool xfs_btree_is_bno(const struct xfs_btree_ops *ops)
5657
{
@@ -87,6 +88,15 @@ static inline bool xfs_btree_is_rmap(const struct xfs_btree_ops *ops)
8788
return ops == &xfs_rmapbt_ops;
8889
}
8990

91+
#ifdef CONFIG_XFS_BTREE_IN_MEM
92+
static inline bool xfs_btree_is_mem_rmap(const struct xfs_btree_ops *ops)
93+
{
94+
return ops == &xfs_rmapbt_mem_ops;
95+
}
96+
#else
97+
# define xfs_btree_is_mem_rmap(...) (false)
98+
#endif
99+
90100
/* log size calculation functions */
91101
int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
92102
int xfs_log_calc_minimum_size(struct xfs_mount *);

fs/xfs/scrub/repair.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@
3131
#include "xfs_error.h"
3232
#include "xfs_reflink.h"
3333
#include "xfs_health.h"
34+
#include "xfs_buf_mem.h"
3435
#include "scrub/scrub.h"
3536
#include "scrub/common.h"
3637
#include "scrub/trace.h"
3738
#include "scrub/repair.h"
3839
#include "scrub/bitmap.h"
3940
#include "scrub/stats.h"
41+
#include "scrub/xfile.h"
4042

4143
/*
4244
* Attempt to repair some metadata, if the metadata is corrupt and userspace
@@ -1147,3 +1149,19 @@ xrep_metadata_inode_forks(
11471149

11481150
return 0;
11491151
}
1152+
1153+
/*
1154+
* Set up an in-memory buffer cache so that we can use the xfbtree. Allocating
1155+
* a shmem file might take loks, so we cannot be in transaction context. Park
1156+
* our resources in the scrub context and let the teardown function take care
1157+
* of them at the right time.
1158+
*/
1159+
int
1160+
xrep_setup_xfbtree(
1161+
struct xfs_scrub *sc,
1162+
const char *descr)
1163+
{
1164+
ASSERT(sc->tp == NULL);
1165+
1166+
return xmbuf_alloc(sc->mp, descr, &sc->xmbtp);
1167+
}

fs/xfs/scrub/repair.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
8181
# define xrep_ino_dqattach(sc) (0)
8282
#endif /* CONFIG_XFS_QUOTA */
8383

84+
int xrep_setup_xfbtree(struct xfs_scrub *sc, const char *descr);
85+
8486
int xrep_ino_ensure_extent_count(struct xfs_scrub *sc, int whichfork,
8587
xfs_extnum_t nextents);
8688
int xrep_reset_perag_resv(struct xfs_scrub *sc);

0 commit comments

Comments
 (0)