Skip to content

Commit 5eb4cb1

Browse files
author
Darrick J. Wong
committed
xfs: convey metadata health events to the health monitor
Connect the filesystem metadata health event collection system to the health monitor so that xfs can send events to xfs_healer as it collects information. Signed-off-by: "Darrick J. Wong" <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
1 parent 25ca57f commit 5eb4cb1

6 files changed

Lines changed: 511 additions & 2 deletions

File tree

fs/xfs/libxfs/xfs_fs.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,12 @@ struct xfs_rtgroup_geometry {
10081008
/* affects the whole fs */
10091009
#define XFS_HEALTH_MONITOR_DOMAIN_MOUNT (0)
10101010

1011+
/* metadata health events */
1012+
#define XFS_HEALTH_MONITOR_DOMAIN_FS (1)
1013+
#define XFS_HEALTH_MONITOR_DOMAIN_AG (2)
1014+
#define XFS_HEALTH_MONITOR_DOMAIN_INODE (3)
1015+
#define XFS_HEALTH_MONITOR_DOMAIN_RTGROUP (4)
1016+
10111017
/* Health monitor event types */
10121018

10131019
/* status of the monitor itself */
@@ -1017,11 +1023,37 @@ struct xfs_rtgroup_geometry {
10171023
/* filesystem was unmounted */
10181024
#define XFS_HEALTH_MONITOR_TYPE_UNMOUNT (2)
10191025

1026+
/* metadata health events */
1027+
#define XFS_HEALTH_MONITOR_TYPE_SICK (3)
1028+
#define XFS_HEALTH_MONITOR_TYPE_CORRUPT (4)
1029+
#define XFS_HEALTH_MONITOR_TYPE_HEALTHY (5)
1030+
10201031
/* lost events */
10211032
struct xfs_health_monitor_lost {
10221033
__u64 count;
10231034
};
10241035

1036+
/* fs/rt metadata */
1037+
struct xfs_health_monitor_fs {
1038+
/* XFS_FSOP_GEOM_SICK_* flags */
1039+
__u32 mask;
1040+
};
1041+
1042+
/* ag/rtgroup metadata */
1043+
struct xfs_health_monitor_group {
1044+
/* XFS_{AG,RTGROUP}_SICK_* flags */
1045+
__u32 mask;
1046+
__u32 gno;
1047+
};
1048+
1049+
/* inode metadata */
1050+
struct xfs_health_monitor_inode {
1051+
/* XFS_BS_SICK_* flags */
1052+
__u32 mask;
1053+
__u32 gen;
1054+
__u64 ino;
1055+
};
1056+
10251057
struct xfs_health_monitor_event {
10261058
/* XFS_HEALTH_MONITOR_DOMAIN_* */
10271059
__u32 domain;
@@ -1039,6 +1071,9 @@ struct xfs_health_monitor_event {
10391071
*/
10401072
union {
10411073
struct xfs_health_monitor_lost lost;
1074+
struct xfs_health_monitor_fs fs;
1075+
struct xfs_health_monitor_group group;
1076+
struct xfs_health_monitor_inode inode;
10421077
} e;
10431078

10441079
/* zeroes */

fs/xfs/libxfs/xfs_health.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,4 +289,9 @@ void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
289289
#define xfs_metadata_is_sick(error) \
290290
(unlikely((error) == -EFSCORRUPTED || (error) == -EFSBADCRC))
291291

292+
unsigned int xfs_healthmon_inode_mask(unsigned int sick_mask);
293+
unsigned int xfs_healthmon_rtgroup_mask(unsigned int sick_mask);
294+
unsigned int xfs_healthmon_perag_mask(unsigned int sick_mask);
295+
unsigned int xfs_healthmon_fs_mask(unsigned int sick_mask);
296+
292297
#endif /* __XFS_HEALTH_H__ */

fs/xfs/xfs_health.c

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,19 @@ xfs_fs_mark_sick(
108108
struct xfs_mount *mp,
109109
unsigned int mask)
110110
{
111+
unsigned int old_mask;
112+
111113
ASSERT(!(mask & ~XFS_SICK_FS_ALL));
112114
trace_xfs_fs_mark_sick(mp, mask);
113115

114116
spin_lock(&mp->m_sb_lock);
117+
old_mask = mp->m_fs_sick;
115118
mp->m_fs_sick |= mask;
116119
spin_unlock(&mp->m_sb_lock);
117120

118121
fserror_report_metadata(mp->m_super, -EFSCORRUPTED, GFP_NOFS);
122+
if (mask)
123+
xfs_healthmon_report_fs(mp, XFS_HEALTHMON_SICK, old_mask, mask);
119124
}
120125

121126
/* Mark per-fs metadata as having been checked and found unhealthy by fsck. */
@@ -124,15 +129,21 @@ xfs_fs_mark_corrupt(
124129
struct xfs_mount *mp,
125130
unsigned int mask)
126131
{
132+
unsigned int old_mask;
133+
127134
ASSERT(!(mask & ~XFS_SICK_FS_ALL));
128135
trace_xfs_fs_mark_corrupt(mp, mask);
129136

130137
spin_lock(&mp->m_sb_lock);
138+
old_mask = mp->m_fs_sick;
131139
mp->m_fs_sick |= mask;
132140
mp->m_fs_checked |= mask;
133141
spin_unlock(&mp->m_sb_lock);
134142

135143
fserror_report_metadata(mp->m_super, -EFSCORRUPTED, GFP_NOFS);
144+
if (mask)
145+
xfs_healthmon_report_fs(mp, XFS_HEALTHMON_CORRUPT, old_mask,
146+
mask);
136147
}
137148

138149
/* Mark a per-fs metadata healed. */
@@ -141,15 +152,22 @@ xfs_fs_mark_healthy(
141152
struct xfs_mount *mp,
142153
unsigned int mask)
143154
{
155+
unsigned int old_mask;
156+
144157
ASSERT(!(mask & ~XFS_SICK_FS_ALL));
145158
trace_xfs_fs_mark_healthy(mp, mask);
146159

147160
spin_lock(&mp->m_sb_lock);
161+
old_mask = mp->m_fs_sick;
148162
mp->m_fs_sick &= ~mask;
149163
if (!(mp->m_fs_sick & XFS_SICK_FS_PRIMARY))
150164
mp->m_fs_sick &= ~XFS_SICK_FS_SECONDARY;
151165
mp->m_fs_checked |= mask;
152166
spin_unlock(&mp->m_sb_lock);
167+
168+
if (mask)
169+
xfs_healthmon_report_fs(mp, XFS_HEALTHMON_HEALTHY, old_mask,
170+
mask);
153171
}
154172

155173
/* Sample which per-fs metadata are unhealthy. */
@@ -199,14 +217,20 @@ xfs_group_mark_sick(
199217
struct xfs_group *xg,
200218
unsigned int mask)
201219
{
220+
unsigned int old_mask;
221+
202222
xfs_group_check_mask(xg, mask);
203223
trace_xfs_group_mark_sick(xg, mask);
204224

205225
spin_lock(&xg->xg_state_lock);
226+
old_mask = xg->xg_sick;
206227
xg->xg_sick |= mask;
207228
spin_unlock(&xg->xg_state_lock);
208229

209230
fserror_report_metadata(xg->xg_mount->m_super, -EFSCORRUPTED, GFP_NOFS);
231+
if (mask)
232+
xfs_healthmon_report_group(xg, XFS_HEALTHMON_SICK, old_mask,
233+
mask);
210234
}
211235

212236
/*
@@ -217,15 +241,21 @@ xfs_group_mark_corrupt(
217241
struct xfs_group *xg,
218242
unsigned int mask)
219243
{
244+
unsigned int old_mask;
245+
220246
xfs_group_check_mask(xg, mask);
221247
trace_xfs_group_mark_corrupt(xg, mask);
222248

223249
spin_lock(&xg->xg_state_lock);
250+
old_mask = xg->xg_sick;
224251
xg->xg_sick |= mask;
225252
xg->xg_checked |= mask;
226253
spin_unlock(&xg->xg_state_lock);
227254

228255
fserror_report_metadata(xg->xg_mount->m_super, -EFSCORRUPTED, GFP_NOFS);
256+
if (mask)
257+
xfs_healthmon_report_group(xg, XFS_HEALTHMON_CORRUPT, old_mask,
258+
mask);
229259
}
230260

231261
/*
@@ -236,15 +266,22 @@ xfs_group_mark_healthy(
236266
struct xfs_group *xg,
237267
unsigned int mask)
238268
{
269+
unsigned int old_mask;
270+
239271
xfs_group_check_mask(xg, mask);
240272
trace_xfs_group_mark_healthy(xg, mask);
241273

242274
spin_lock(&xg->xg_state_lock);
275+
old_mask = xg->xg_sick;
243276
xg->xg_sick &= ~mask;
244277
if (!(xg->xg_sick & XFS_SICK_AG_PRIMARY))
245278
xg->xg_sick &= ~XFS_SICK_AG_SECONDARY;
246279
xg->xg_checked |= mask;
247280
spin_unlock(&xg->xg_state_lock);
281+
282+
if (mask)
283+
xfs_healthmon_report_group(xg, XFS_HEALTHMON_HEALTHY, old_mask,
284+
mask);
248285
}
249286

250287
/* Sample which per-ag metadata are unhealthy. */
@@ -283,10 +320,13 @@ xfs_inode_mark_sick(
283320
struct xfs_inode *ip,
284321
unsigned int mask)
285322
{
323+
unsigned int old_mask;
324+
286325
ASSERT(!(mask & ~XFS_SICK_INO_ALL));
287326
trace_xfs_inode_mark_sick(ip, mask);
288327

289328
spin_lock(&ip->i_flags_lock);
329+
old_mask = ip->i_sick;
290330
ip->i_sick |= mask;
291331
spin_unlock(&ip->i_flags_lock);
292332

@@ -300,6 +340,9 @@ xfs_inode_mark_sick(
300340
spin_unlock(&VFS_I(ip)->i_lock);
301341

302342
fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS);
343+
if (mask)
344+
xfs_healthmon_report_inode(ip, XFS_HEALTHMON_SICK, old_mask,
345+
mask);
303346
}
304347

305348
/* Mark inode metadata as having been checked and found unhealthy by fsck. */
@@ -308,10 +351,13 @@ xfs_inode_mark_corrupt(
308351
struct xfs_inode *ip,
309352
unsigned int mask)
310353
{
354+
unsigned int old_mask;
355+
311356
ASSERT(!(mask & ~XFS_SICK_INO_ALL));
312357
trace_xfs_inode_mark_corrupt(ip, mask);
313358

314359
spin_lock(&ip->i_flags_lock);
360+
old_mask = ip->i_sick;
315361
ip->i_sick |= mask;
316362
ip->i_checked |= mask;
317363
spin_unlock(&ip->i_flags_lock);
@@ -326,6 +372,9 @@ xfs_inode_mark_corrupt(
326372
spin_unlock(&VFS_I(ip)->i_lock);
327373

328374
fserror_report_file_metadata(VFS_I(ip), -EFSCORRUPTED, GFP_NOFS);
375+
if (mask)
376+
xfs_healthmon_report_inode(ip, XFS_HEALTHMON_CORRUPT, old_mask,
377+
mask);
329378
}
330379

331380
/* Mark parts of an inode healed. */
@@ -334,15 +383,22 @@ xfs_inode_mark_healthy(
334383
struct xfs_inode *ip,
335384
unsigned int mask)
336385
{
386+
unsigned int old_mask;
387+
337388
ASSERT(!(mask & ~XFS_SICK_INO_ALL));
338389
trace_xfs_inode_mark_healthy(ip, mask);
339390

340391
spin_lock(&ip->i_flags_lock);
392+
old_mask = ip->i_sick;
341393
ip->i_sick &= ~mask;
342394
if (!(ip->i_sick & XFS_SICK_INO_PRIMARY))
343395
ip->i_sick &= ~XFS_SICK_INO_SECONDARY;
344396
ip->i_checked |= mask;
345397
spin_unlock(&ip->i_flags_lock);
398+
399+
if (mask)
400+
xfs_healthmon_report_inode(ip, XFS_HEALTHMON_HEALTHY, old_mask,
401+
mask);
346402
}
347403

348404
/* Sample which parts of an inode are unhealthy. */
@@ -422,6 +478,25 @@ xfs_fsop_geom_health(
422478
}
423479
}
424480

481+
/*
482+
* Translate XFS_SICK_FS_* into XFS_FSOP_GEOM_SICK_* except for the rt free
483+
* space codes, which are sent via the rtgroup events.
484+
*/
485+
unsigned int
486+
xfs_healthmon_fs_mask(
487+
unsigned int sick_mask)
488+
{
489+
const struct ioctl_sick_map *m;
490+
unsigned int ioctl_mask = 0;
491+
492+
for_each_sick_map(fs_map, m) {
493+
if (sick_mask & m->sick_mask)
494+
ioctl_mask |= m->ioctl_mask;
495+
}
496+
497+
return ioctl_mask;
498+
}
499+
425500
static const struct ioctl_sick_map ag_map[] = {
426501
{ XFS_SICK_AG_SB, XFS_AG_GEOM_SICK_SB },
427502
{ XFS_SICK_AG_AGF, XFS_AG_GEOM_SICK_AGF },
@@ -458,6 +533,22 @@ xfs_ag_geom_health(
458533
}
459534
}
460535

536+
/* Translate XFS_SICK_AG_* into XFS_AG_GEOM_SICK_*. */
537+
unsigned int
538+
xfs_healthmon_perag_mask(
539+
unsigned int sick_mask)
540+
{
541+
const struct ioctl_sick_map *m;
542+
unsigned int ioctl_mask = 0;
543+
544+
for_each_sick_map(ag_map, m) {
545+
if (sick_mask & m->sick_mask)
546+
ioctl_mask |= m->ioctl_mask;
547+
}
548+
549+
return ioctl_mask;
550+
}
551+
461552
static const struct ioctl_sick_map rtgroup_map[] = {
462553
{ XFS_SICK_RG_SUPER, XFS_RTGROUP_GEOM_SICK_SUPER },
463554
{ XFS_SICK_RG_BITMAP, XFS_RTGROUP_GEOM_SICK_BITMAP },
@@ -488,6 +579,22 @@ xfs_rtgroup_geom_health(
488579
}
489580
}
490581

582+
/* Translate XFS_SICK_RG_* into XFS_RTGROUP_GEOM_SICK_*. */
583+
unsigned int
584+
xfs_healthmon_rtgroup_mask(
585+
unsigned int sick_mask)
586+
{
587+
const struct ioctl_sick_map *m;
588+
unsigned int ioctl_mask = 0;
589+
590+
for_each_sick_map(rtgroup_map, m) {
591+
if (sick_mask & m->sick_mask)
592+
ioctl_mask |= m->ioctl_mask;
593+
}
594+
595+
return ioctl_mask;
596+
}
597+
491598
static const struct ioctl_sick_map ino_map[] = {
492599
{ XFS_SICK_INO_CORE, XFS_BS_SICK_INODE },
493600
{ XFS_SICK_INO_BMBTD, XFS_BS_SICK_BMBTD },
@@ -526,6 +633,22 @@ xfs_bulkstat_health(
526633
}
527634
}
528635

636+
/* Translate XFS_SICK_INO_* into XFS_BS_SICK_*. */
637+
unsigned int
638+
xfs_healthmon_inode_mask(
639+
unsigned int sick_mask)
640+
{
641+
const struct ioctl_sick_map *m;
642+
unsigned int ioctl_mask = 0;
643+
644+
for_each_sick_map(ino_map, m) {
645+
if (sick_mask & m->sick_mask)
646+
ioctl_mask |= m->ioctl_mask;
647+
}
648+
649+
return ioctl_mask;
650+
}
651+
529652
/* Mark a block mapping sick. */
530653
void
531654
xfs_bmap_mark_sick(

0 commit comments

Comments
 (0)