Skip to content

Commit f107757

Browse files
author
Chandan Babu R
committed
Merge tag 'indirect-health-reporting-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.9-mergeC
xfs: indirect health reporting This series enables the XFS health reporting infrastructure to remember indirect health concerns when resources are scarce. For example, if a scrub notices that there's something wrong with an inode's metadata but memory reclaim needs to free the incore inode, we want to record in the perag data the fact that there was some inode somewhere with an error. The perag structures never go away. The first two patches in this series set that up, and the third one provides a means for xfs_scrub to tell the kernel that it can forget the indirect problem report. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandanbabu@kernel.org> * tag 'indirect-health-reporting-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: update health status if we get a clean bill of health xfs: remember sick inodes that get inactivated xfs: add secondary and indirect classes to the health tracking system
2 parents 6fe1910 + a1f3e0c commit f107757

11 files changed

Lines changed: 196 additions & 18 deletions

File tree

fs/xfs/libxfs/xfs_fs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ struct xfs_ag_geometry {
294294
#define XFS_AG_GEOM_SICK_FINOBT (1 << 7) /* free inode index */
295295
#define XFS_AG_GEOM_SICK_RMAPBT (1 << 8) /* reverse mappings */
296296
#define XFS_AG_GEOM_SICK_REFCNTBT (1 << 9) /* reference counts */
297+
#define XFS_AG_GEOM_SICK_INODES (1 << 10) /* bad inodes were seen */
297298

298299
/*
299300
* Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
@@ -713,9 +714,10 @@ struct xfs_scrub_metadata {
713714
#define XFS_SCRUB_TYPE_FSCOUNTERS 24 /* fs summary counters */
714715
#define XFS_SCRUB_TYPE_QUOTACHECK 25 /* quota counters */
715716
#define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */
717+
#define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */
716718

717719
/* Number of scrub subcommands. */
718-
#define XFS_SCRUB_TYPE_NR 27
720+
#define XFS_SCRUB_TYPE_NR 28
719721

720722
/* i: Repair this metadata. */
721723
#define XFS_SCRUB_IFLAG_REPAIR (1u << 0)

fs/xfs/libxfs/xfs_health.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@
3131
* - !checked && sick => errors have been observed during normal operation,
3232
* but the metadata has not been checked thoroughly
3333
* - !checked && !sick => has not been examined since mount
34+
*
35+
* Evidence of health problems can be sorted into three basic categories:
36+
*
37+
* a) Primary evidence, which signals that something is defective within the
38+
* general grouping of metadata.
39+
*
40+
* b) Secondary evidence, which are side effects of primary problem but are
41+
* not themselves problems. These can be forgotten when the primary
42+
* health problems are addressed.
43+
*
44+
* c) Indirect evidence, which points to something being wrong in another
45+
* group, but we had to release resources and this is all that's left of
46+
* that state.
3447
*/
3548

3649
struct xfs_mount;
@@ -63,6 +76,7 @@ struct xfs_da_args;
6376
#define XFS_SICK_AG_FINOBT (1 << 7) /* free inode index */
6477
#define XFS_SICK_AG_RMAPBT (1 << 8) /* reverse mappings */
6578
#define XFS_SICK_AG_REFCNTBT (1 << 9) /* reference counts */
79+
#define XFS_SICK_AG_INODES (1 << 10) /* inactivated bad inodes */
6680

6781
/* Observable health issues for inode metadata. */
6882
#define XFS_SICK_INO_CORE (1 << 0) /* inode core */
@@ -79,6 +93,9 @@ struct xfs_da_args;
7993
#define XFS_SICK_INO_DIR_ZAPPED (1 << 10) /* directory erased */
8094
#define XFS_SICK_INO_SYMLINK_ZAPPED (1 << 11) /* symlink erased */
8195

96+
/* Don't propagate sick status to ag health summary during inactivation */
97+
#define XFS_SICK_INO_FORGET (1 << 12)
98+
8299
/* Primary evidence of health problems in a given group. */
83100
#define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \
84101
XFS_SICK_FS_UQUOTA | \
@@ -115,6 +132,36 @@ struct xfs_da_args;
115132
XFS_SICK_INO_DIR_ZAPPED | \
116133
XFS_SICK_INO_SYMLINK_ZAPPED)
117134

135+
/* Secondary state related to (but not primary evidence of) health problems. */
136+
#define XFS_SICK_FS_SECONDARY (0)
137+
#define XFS_SICK_RT_SECONDARY (0)
138+
#define XFS_SICK_AG_SECONDARY (0)
139+
#define XFS_SICK_INO_SECONDARY (XFS_SICK_INO_FORGET)
140+
141+
/* Evidence of health problems elsewhere. */
142+
#define XFS_SICK_FS_INDIRECT (0)
143+
#define XFS_SICK_RT_INDIRECT (0)
144+
#define XFS_SICK_AG_INDIRECT (XFS_SICK_AG_INODES)
145+
#define XFS_SICK_INO_INDIRECT (0)
146+
147+
/* All health masks. */
148+
#define XFS_SICK_FS_ALL (XFS_SICK_FS_PRIMARY | \
149+
XFS_SICK_FS_SECONDARY | \
150+
XFS_SICK_FS_INDIRECT)
151+
152+
#define XFS_SICK_RT_ALL (XFS_SICK_RT_PRIMARY | \
153+
XFS_SICK_RT_SECONDARY | \
154+
XFS_SICK_RT_INDIRECT)
155+
156+
#define XFS_SICK_AG_ALL (XFS_SICK_AG_PRIMARY | \
157+
XFS_SICK_AG_SECONDARY | \
158+
XFS_SICK_AG_INDIRECT)
159+
160+
#define XFS_SICK_INO_ALL (XFS_SICK_INO_PRIMARY | \
161+
XFS_SICK_INO_SECONDARY | \
162+
XFS_SICK_INO_INDIRECT | \
163+
XFS_SICK_INO_ZAPPED)
164+
118165
/*
119166
* These functions must be provided by the xfs implementation. Function
120167
* behavior with respect to the first argument should be as follows:

fs/xfs/libxfs/xfs_inode_buf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ xfs_imap_to_bp(
139139
imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops);
140140
if (xfs_metadata_is_sick(error))
141141
xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno),
142-
XFS_SICK_AG_INOBT);
142+
XFS_SICK_AG_INODES);
143143
return error;
144144
}
145145

fs/xfs/scrub/health.c

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "xfs_health.h"
1515
#include "scrub/scrub.h"
1616
#include "scrub/health.h"
17+
#include "scrub/common.h"
1718

1819
/*
1920
* Scrub and In-Core Filesystem Health Assessments
@@ -149,6 +150,24 @@ xchk_file_looks_zapped(
149150
return xfs_inode_has_sickness(sc->ip, mask);
150151
}
151152

153+
/*
154+
* Scrub gave the filesystem a clean bill of health, so clear all the indirect
155+
* markers of past problems (at least for the fs and ags) so that we can be
156+
* healthy again.
157+
*/
158+
STATIC void
159+
xchk_mark_all_healthy(
160+
struct xfs_mount *mp)
161+
{
162+
struct xfs_perag *pag;
163+
xfs_agnumber_t agno;
164+
165+
xfs_fs_mark_healthy(mp, XFS_SICK_FS_INDIRECT);
166+
xfs_rt_mark_healthy(mp, XFS_SICK_RT_INDIRECT);
167+
for_each_perag(mp, agno, pag)
168+
xfs_ag_mark_healthy(pag, XFS_SICK_AG_INDIRECT);
169+
}
170+
152171
/*
153172
* Update filesystem health assessments based on what we found and did.
154173
*
@@ -166,6 +185,18 @@ xchk_update_health(
166185
struct xfs_perag *pag;
167186
bool bad;
168187

188+
/*
189+
* The HEALTHY scrub type is a request from userspace to clear all the
190+
* indirect flags after a clean scan of the entire filesystem. As such
191+
* there's no sick flag defined for it, so we branch here ahead of the
192+
* mask check.
193+
*/
194+
if (sc->sm->sm_type == XFS_SCRUB_TYPE_HEALTHY &&
195+
!(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
196+
xchk_mark_all_healthy(sc->mp);
197+
return;
198+
}
199+
169200
if (!sc->sick_mask)
170201
return;
171202

@@ -183,9 +214,19 @@ xchk_update_health(
183214
case XHG_INO:
184215
if (!sc->ip)
185216
return;
186-
if (bad)
187-
xfs_inode_mark_corrupt(sc->ip, sc->sick_mask);
188-
else
217+
if (bad) {
218+
unsigned int mask = sc->sick_mask;
219+
220+
/*
221+
* If we're coming in for repairs then we don't want
222+
* sickness flags to propagate to the incore health
223+
* status if the inode gets inactivated before we can
224+
* fix it.
225+
*/
226+
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
227+
mask |= XFS_SICK_INO_FORGET;
228+
xfs_inode_mark_corrupt(sc->ip, mask);
229+
} else
189230
xfs_inode_mark_healthy(sc->ip, sc->sick_mask);
190231
break;
191232
case XHG_FS:
@@ -275,3 +316,36 @@ xchk_ag_btree_healthy_enough(
275316

276317
return true;
277318
}
319+
320+
/*
321+
* Quick scan to double-check that there isn't any evidence of lingering
322+
* primary health problems. If we're still clear, then the health update will
323+
* take care of clearing the indirect evidence.
324+
*/
325+
int
326+
xchk_health_record(
327+
struct xfs_scrub *sc)
328+
{
329+
struct xfs_mount *mp = sc->mp;
330+
struct xfs_perag *pag;
331+
xfs_agnumber_t agno;
332+
333+
unsigned int sick;
334+
unsigned int checked;
335+
336+
xfs_fs_measure_sickness(mp, &sick, &checked);
337+
if (sick & XFS_SICK_FS_PRIMARY)
338+
xchk_set_corrupt(sc);
339+
340+
xfs_rt_measure_sickness(mp, &sick, &checked);
341+
if (sick & XFS_SICK_RT_PRIMARY)
342+
xchk_set_corrupt(sc);
343+
344+
for_each_perag(mp, agno, pag) {
345+
xfs_ag_measure_sickness(pag, &sick, &checked);
346+
if (sick & XFS_SICK_AG_PRIMARY)
347+
xchk_set_corrupt(sc);
348+
}
349+
350+
return 0;
351+
}

fs/xfs/scrub/health.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ bool xchk_ag_btree_healthy_enough(struct xfs_scrub *sc, struct xfs_perag *pag,
1212
xfs_btnum_t btnum);
1313
void xchk_mark_healthy_if_clean(struct xfs_scrub *sc, unsigned int mask);
1414
bool xchk_file_looks_zapped(struct xfs_scrub *sc, unsigned int mask);
15+
int xchk_health_record(struct xfs_scrub *sc);
1516

1617
#endif /* __XFS_SCRUB_HEALTH_H__ */

fs/xfs/scrub/repair.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "xfs_errortag.h"
3131
#include "xfs_error.h"
3232
#include "xfs_reflink.h"
33+
#include "xfs_health.h"
3334
#include "scrub/scrub.h"
3435
#include "scrub/common.h"
3536
#include "scrub/trace.h"

fs/xfs/scrub/scrub.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,12 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
378378
.scrub = xchk_nlinks,
379379
.repair = xrep_nlinks,
380380
},
381+
[XFS_SCRUB_TYPE_HEALTHY] = { /* fs healthy; clean all reminders */
382+
.type = ST_FS,
383+
.setup = xchk_setup_fs,
384+
.scrub = xchk_health_record,
385+
.repair = xrep_notsupported,
386+
},
381387
};
382388

383389
static int

fs/xfs/scrub/trace.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PQUOTA);
6969
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
7070
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_QUOTACHECK);
7171
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_NLINKS);
72+
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY);
7273

7374
#define XFS_SCRUB_TYPE_STRINGS \
7475
{ XFS_SCRUB_TYPE_PROBE, "probe" }, \
@@ -97,7 +98,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_NLINKS);
9798
{ XFS_SCRUB_TYPE_PQUOTA, "prjquota" }, \
9899
{ XFS_SCRUB_TYPE_FSCOUNTERS, "fscounters" }, \
99100
{ XFS_SCRUB_TYPE_QUOTACHECK, "quotacheck" }, \
100-
{ XFS_SCRUB_TYPE_NLINKS, "nlinks" }
101+
{ XFS_SCRUB_TYPE_NLINKS, "nlinks" }, \
102+
{ XFS_SCRUB_TYPE_HEALTHY, "healthy" }
101103

102104
#define XFS_SCRUB_FLAG_STRINGS \
103105
{ XFS_SCRUB_IFLAG_REPAIR, "repair" }, \

0 commit comments

Comments
 (0)