1- // SPDX-License-Identifier: GPL-2.0+
1+ // SPDX-License-Identifier: GPL-2.0-or-later
22/*
33 * Copyright (C) 2019-2023 Oracle. All Rights Reserved.
44 * Author: Darrick J. Wong <djwong@kernel.org>
88#include "xfs_shared.h"
99#include "xfs_format.h"
1010#include "xfs_trans_resv.h"
11+ #include "xfs_log_format.h"
12+ #include "xfs_trans.h"
1113#include "xfs_mount.h"
1214#include "xfs_alloc.h"
1315#include "xfs_ialloc.h"
1618#include "xfs_ag.h"
1719#include "xfs_rtalloc.h"
1820#include "xfs_inode.h"
21+ #include "xfs_icache.h"
1922#include "scrub/scrub.h"
2023#include "scrub/common.h"
2124#include "scrub/trace.h"
@@ -53,6 +56,7 @@ struct xchk_fscounters {
5356 uint64_t frextents ;
5457 unsigned long long icount_min ;
5558 unsigned long long icount_max ;
59+ bool frozen ;
5660};
5761
5862/*
@@ -123,6 +127,82 @@ xchk_fscount_warmup(
123127 return error ;
124128}
125129
130+ static inline int
131+ xchk_fsfreeze (
132+ struct xfs_scrub * sc )
133+ {
134+ int error ;
135+
136+ error = freeze_super (sc -> mp -> m_super , FREEZE_HOLDER_KERNEL );
137+ trace_xchk_fsfreeze (sc , error );
138+ return error ;
139+ }
140+
141+ static inline int
142+ xchk_fsthaw (
143+ struct xfs_scrub * sc )
144+ {
145+ int error ;
146+
147+ /* This should always succeed, we have a kernel freeze */
148+ error = thaw_super (sc -> mp -> m_super , FREEZE_HOLDER_KERNEL );
149+ trace_xchk_fsthaw (sc , error );
150+ return error ;
151+ }
152+
153+ /*
154+ * We couldn't stabilize the filesystem long enough to sample all the variables
155+ * that comprise the summary counters and compare them to the percpu counters.
156+ * We need to disable all writer threads, which means taking the first two
157+ * freeze levels to put userspace to sleep, and the third freeze level to
158+ * prevent background threads from starting new transactions. Take one level
159+ * more to prevent other callers from unfreezing the filesystem while we run.
160+ */
161+ STATIC int
162+ xchk_fscounters_freeze (
163+ struct xfs_scrub * sc )
164+ {
165+ struct xchk_fscounters * fsc = sc -> buf ;
166+ int error = 0 ;
167+
168+ if (sc -> flags & XCHK_HAVE_FREEZE_PROT ) {
169+ sc -> flags &= ~XCHK_HAVE_FREEZE_PROT ;
170+ mnt_drop_write_file (sc -> file );
171+ }
172+
173+ /* Try to grab a kernel freeze. */
174+ while ((error = xchk_fsfreeze (sc )) == - EBUSY ) {
175+ if (xchk_should_terminate (sc , & error ))
176+ return error ;
177+
178+ delay (HZ / 10 );
179+ }
180+ if (error )
181+ return error ;
182+
183+ fsc -> frozen = true;
184+ return 0 ;
185+ }
186+
187+ /* Thaw the filesystem after checking or repairing fscounters. */
188+ STATIC void
189+ xchk_fscounters_cleanup (
190+ void * buf )
191+ {
192+ struct xchk_fscounters * fsc = buf ;
193+ struct xfs_scrub * sc = fsc -> sc ;
194+ int error ;
195+
196+ if (!fsc -> frozen )
197+ return ;
198+
199+ error = xchk_fsthaw (sc );
200+ if (error )
201+ xfs_emerg (sc -> mp , "still frozen after scrub, err=%d" , error );
202+ else
203+ fsc -> frozen = false;
204+ }
205+
126206int
127207xchk_setup_fscounters (
128208 struct xfs_scrub * sc )
@@ -140,6 +220,7 @@ xchk_setup_fscounters(
140220 sc -> buf = kzalloc (sizeof (struct xchk_fscounters ), XCHK_GFP_FLAGS );
141221 if (!sc -> buf )
142222 return - ENOMEM ;
223+ sc -> buf_cleanup = xchk_fscounters_cleanup ;
143224 fsc = sc -> buf ;
144225 fsc -> sc = sc ;
145226
@@ -150,7 +231,18 @@ xchk_setup_fscounters(
150231 if (error )
151232 return error ;
152233
153- return xchk_trans_alloc (sc , 0 );
234+ /*
235+ * Pause all writer activity in the filesystem while we're scrubbing to
236+ * reduce the likelihood of background perturbations to the counters
237+ * throwing off our calculations.
238+ */
239+ if (sc -> flags & XCHK_TRY_HARDER ) {
240+ error = xchk_fscounters_freeze (sc );
241+ if (error )
242+ return error ;
243+ }
244+
245+ return xfs_trans_alloc_empty (sc -> mp , & sc -> tp );
154246}
155247
156248/*
@@ -290,8 +382,7 @@ xchk_fscount_aggregate_agcounts(
290382 if (fsc -> ifree > fsc -> icount ) {
291383 if (tries -- )
292384 goto retry ;
293- xchk_set_incomplete (sc );
294- return 0 ;
385+ return - EDEADLOCK ;
295386 }
296387
297388 return 0 ;
@@ -367,6 +458,8 @@ xchk_fscount_count_frextents(
367458 * Otherwise, we /might/ have a problem. If the change in the summations is
368459 * more than we want to tolerate, the filesystem is probably busy and we should
369460 * just send back INCOMPLETE and see if userspace will try again.
461+ *
462+ * If we're repairing then we require an exact match.
370463 */
371464static inline bool
372465xchk_fscount_within_range (
@@ -396,21 +489,7 @@ xchk_fscount_within_range(
396489 if (expected >= min_value && expected <= max_value )
397490 return true;
398491
399- /*
400- * If the difference between the two summations is too large, the fs
401- * might just be busy and so we'll mark the scrub incomplete. Return
402- * true here so that we don't mark the counter corrupt.
403- *
404- * XXX: In the future when userspace can grant scrub permission to
405- * quiesce the filesystem to solve the outsized variance problem, this
406- * check should be moved up and the return code changed to signal to
407- * userspace that we need quiesce permission.
408- */
409- if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE ) {
410- xchk_set_incomplete (sc );
411- return true;
412- }
413-
492+ /* Everything else is bad. */
414493 return false;
415494}
416495
@@ -422,6 +501,7 @@ xchk_fscounters(
422501 struct xfs_mount * mp = sc -> mp ;
423502 struct xchk_fscounters * fsc = sc -> buf ;
424503 int64_t icount , ifree , fdblocks , frextents ;
504+ bool try_again = false;
425505 int error ;
426506
427507 /* Snapshot the percpu counters. */
@@ -431,9 +511,26 @@ xchk_fscounters(
431511 frextents = percpu_counter_sum (& mp -> m_frextents );
432512
433513 /* No negative values, please! */
434- if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0 )
514+ if (icount < 0 || ifree < 0 )
435515 xchk_set_corrupt (sc );
436516
517+ /*
518+ * If the filesystem is not frozen, the counter summation calls above
519+ * can race with xfs_mod_freecounter, which subtracts a requested space
520+ * reservation from the counter and undoes the subtraction if that made
521+ * the counter go negative. Therefore, it's possible to see negative
522+ * values here, and we should only flag that as a corruption if we
523+ * froze the fs. This is much more likely to happen with frextents
524+ * since there are no reserved pools.
525+ */
526+ if (fdblocks < 0 || frextents < 0 ) {
527+ if (!fsc -> frozen )
528+ return - EDEADLOCK ;
529+
530+ xchk_set_corrupt (sc );
531+ return 0 ;
532+ }
533+
437534 /* See if icount is obviously wrong. */
438535 if (icount < fsc -> icount_min || icount > fsc -> icount_max )
439536 xchk_set_corrupt (sc );
@@ -446,12 +543,6 @@ xchk_fscounters(
446543 if (frextents > mp -> m_sb .sb_rextents )
447544 xchk_set_corrupt (sc );
448545
449- /*
450- * XXX: We can't quiesce percpu counter updates, so exit early.
451- * This can be re-enabled when we gain exclusive freeze functionality.
452- */
453- return 0 ;
454-
455546 /*
456547 * If ifree exceeds icount by more than the minimum variance then
457548 * something's probably wrong with the counters.
@@ -463,8 +554,6 @@ xchk_fscounters(
463554 error = xchk_fscount_aggregate_agcounts (sc , fsc );
464555 if (!xchk_process_error (sc , 0 , XFS_SB_BLOCK (mp ), & error ))
465556 return error ;
466- if (sc -> sm -> sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE )
467- return 0 ;
468557
469558 /* Count the free extents counter for rt volumes. */
470559 error = xchk_fscount_count_frextents (sc , fsc );
@@ -473,20 +562,45 @@ xchk_fscounters(
473562 if (sc -> sm -> sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE )
474563 return 0 ;
475564
476- /* Compare the in-core counters with whatever we counted. */
477- if (!xchk_fscount_within_range (sc , icount , & mp -> m_icount , fsc -> icount ))
478- xchk_set_corrupt (sc );
565+ /*
566+ * Compare the in-core counters with whatever we counted. If the fs is
567+ * frozen, we treat the discrepancy as a corruption because the freeze
568+ * should have stabilized the counter values. Otherwise, we need
569+ * userspace to call us back having granted us freeze permission.
570+ */
571+ if (!xchk_fscount_within_range (sc , icount , & mp -> m_icount ,
572+ fsc -> icount )) {
573+ if (fsc -> frozen )
574+ xchk_set_corrupt (sc );
575+ else
576+ try_again = true;
577+ }
479578
480- if (!xchk_fscount_within_range (sc , ifree , & mp -> m_ifree , fsc -> ifree ))
481- xchk_set_corrupt (sc );
579+ if (!xchk_fscount_within_range (sc , ifree , & mp -> m_ifree , fsc -> ifree )) {
580+ if (fsc -> frozen )
581+ xchk_set_corrupt (sc );
582+ else
583+ try_again = true;
584+ }
482585
483586 if (!xchk_fscount_within_range (sc , fdblocks , & mp -> m_fdblocks ,
484- fsc -> fdblocks ))
485- xchk_set_corrupt (sc );
587+ fsc -> fdblocks )) {
588+ if (fsc -> frozen )
589+ xchk_set_corrupt (sc );
590+ else
591+ try_again = true;
592+ }
486593
487594 if (!xchk_fscount_within_range (sc , frextents , & mp -> m_frextents ,
488- fsc -> frextents ))
489- xchk_set_corrupt (sc );
595+ fsc -> frextents )) {
596+ if (fsc -> frozen )
597+ xchk_set_corrupt (sc );
598+ else
599+ try_again = true;
600+ }
601+
602+ if (try_again )
603+ return - EDEADLOCK ;
490604
491605 return 0 ;
492606}
0 commit comments