Skip to content

Commit dfa8bad

Browse files
author
Darrick J. Wong
committed
xfs: convey file I/O errors to the health monitor
Connect the fserror reporting to the health monitor so that xfs can send events about file I/O errors to the xfs_healer daemon. These events are entirely informational because xfs cannot regenerate user data, so hopefully the fsnotify I/O error event gets noticed by the relevant management systems. Signed-off-by: "Darrick J. Wong" <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
1 parent e76e0e3 commit dfa8bad

6 files changed

Lines changed: 198 additions & 0 deletions

File tree

fs/xfs/libxfs/xfs_fs.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,6 +1019,9 @@ struct xfs_rtgroup_geometry {
10191019
#define XFS_HEALTH_MONITOR_DOMAIN_RTDEV (6)
10201020
#define XFS_HEALTH_MONITOR_DOMAIN_LOGDEV (7)
10211021

1022+
/* file range events */
1023+
#define XFS_HEALTH_MONITOR_DOMAIN_FILERANGE (8)
1024+
10221025
/* Health monitor event types */
10231026

10241027
/* status of the monitor itself */
@@ -1039,6 +1042,17 @@ struct xfs_rtgroup_geometry {
10391042
/* media errors */
10401043
#define XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR (7)
10411044

1045+
/* pagecache I/O to a file range failed */
1046+
#define XFS_HEALTH_MONITOR_TYPE_BUFREAD (8)
1047+
#define XFS_HEALTH_MONITOR_TYPE_BUFWRITE (9)
1048+
1049+
/* direct I/O to a file range failed */
1050+
#define XFS_HEALTH_MONITOR_TYPE_DIOREAD (10)
1051+
#define XFS_HEALTH_MONITOR_TYPE_DIOWRITE (11)
1052+
1053+
/* out of band media error reported for a file range */
1054+
#define XFS_HEALTH_MONITOR_TYPE_DATALOST (12)
1055+
10421056
/* lost events */
10431057
struct xfs_health_monitor_lost {
10441058
__u64 count;
@@ -1079,6 +1093,15 @@ struct xfs_health_monitor_shutdown {
10791093
__u32 reasons;
10801094
};
10811095

1096+
/* file range events */
1097+
struct xfs_health_monitor_filerange {
1098+
__u64 pos;
1099+
__u64 len;
1100+
__u64 ino;
1101+
__u32 gen;
1102+
__u32 error;
1103+
};
1104+
10821105
/* disk media errors */
10831106
struct xfs_health_monitor_media {
10841107
__u64 daddr;
@@ -1107,6 +1130,7 @@ struct xfs_health_monitor_event {
11071130
struct xfs_health_monitor_inode inode;
11081131
struct xfs_health_monitor_shutdown shutdown;
11091132
struct xfs_health_monitor_media media;
1133+
struct xfs_health_monitor_filerange filerange;
11101134
} e;
11111135

11121136
/* zeroes */

fs/xfs/xfs_healthmon.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,12 @@
2222
#include "xfs_healthmon.h"
2323
#include "xfs_fsops.h"
2424
#include "xfs_notify_failure.h"
25+
#include "xfs_file.h"
2526

2627
#include <linux/anon_inodes.h>
2728
#include <linux/eventpoll.h>
2829
#include <linux/poll.h>
30+
#include <linux/fserror.h>
2931

3032
/*
3133
* Live Health Monitoring
@@ -222,6 +224,27 @@ xfs_healthmon_merge_events(
222224
return true;
223225
}
224226
return false;
227+
228+
case XFS_HEALTHMON_BUFREAD:
229+
case XFS_HEALTHMON_BUFWRITE:
230+
case XFS_HEALTHMON_DIOREAD:
231+
case XFS_HEALTHMON_DIOWRITE:
232+
case XFS_HEALTHMON_DATALOST:
233+
/* logically adjacent file ranges can merge */
234+
if (existing->fino != new->fino || existing->fgen != new->fgen)
235+
return false;
236+
237+
if (existing->fpos + existing->flen == new->fpos) {
238+
existing->flen += new->flen;
239+
return true;
240+
}
241+
242+
if (new->fpos + new->flen == existing->fpos) {
243+
existing->fpos = new->fpos;
244+
existing->flen += new->flen;
245+
return true;
246+
}
247+
return false;
225248
}
226249

227250
return false;
@@ -578,6 +601,55 @@ xfs_healthmon_report_media(
578601
xfs_healthmon_put(hm);
579602
}
580603

604+
static inline enum xfs_healthmon_type file_ioerr_type(enum fserror_type action)
605+
{
606+
switch (action) {
607+
case FSERR_BUFFERED_READ:
608+
return XFS_HEALTHMON_BUFREAD;
609+
case FSERR_BUFFERED_WRITE:
610+
return XFS_HEALTHMON_BUFWRITE;
611+
case FSERR_DIRECTIO_READ:
612+
return XFS_HEALTHMON_DIOREAD;
613+
case FSERR_DIRECTIO_WRITE:
614+
return XFS_HEALTHMON_DIOWRITE;
615+
case FSERR_DATA_LOST:
616+
return XFS_HEALTHMON_DATALOST;
617+
case FSERR_METADATA:
618+
/* filtered out by xfs_fs_report_error */
619+
break;
620+
}
621+
622+
ASSERT(0);
623+
return -1;
624+
}
625+
626+
/* Add a file io error event to the reporting queue. */
627+
void
628+
xfs_healthmon_report_file_ioerror(
629+
struct xfs_inode *ip,
630+
const struct fserror_event *p)
631+
{
632+
struct xfs_healthmon_event event = {
633+
.type = file_ioerr_type(p->type),
634+
.domain = XFS_HEALTHMON_FILERANGE,
635+
.fino = ip->i_ino,
636+
.fgen = VFS_I(ip)->i_generation,
637+
.fpos = p->pos,
638+
.flen = p->len,
639+
/* send positive error number to userspace */
640+
.error = -p->error,
641+
};
642+
struct xfs_healthmon *hm = xfs_healthmon_get(ip->i_mount);
643+
644+
if (!hm)
645+
return;
646+
647+
trace_xfs_healthmon_report_file_ioerror(hm, p);
648+
649+
xfs_healthmon_push(hm, &event);
650+
xfs_healthmon_put(hm);
651+
}
652+
581653
static inline void
582654
xfs_healthmon_reset_outbuf(
583655
struct xfs_healthmon *hm)
@@ -633,6 +705,7 @@ static const unsigned int domain_map[] = {
633705
[XFS_HEALTHMON_DATADEV] = XFS_HEALTH_MONITOR_DOMAIN_DATADEV,
634706
[XFS_HEALTHMON_RTDEV] = XFS_HEALTH_MONITOR_DOMAIN_RTDEV,
635707
[XFS_HEALTHMON_LOGDEV] = XFS_HEALTH_MONITOR_DOMAIN_LOGDEV,
708+
[XFS_HEALTHMON_FILERANGE] = XFS_HEALTH_MONITOR_DOMAIN_FILERANGE,
636709
};
637710

638711
static const unsigned int type_map[] = {
@@ -644,6 +717,11 @@ static const unsigned int type_map[] = {
644717
[XFS_HEALTHMON_UNMOUNT] = XFS_HEALTH_MONITOR_TYPE_UNMOUNT,
645718
[XFS_HEALTHMON_SHUTDOWN] = XFS_HEALTH_MONITOR_TYPE_SHUTDOWN,
646719
[XFS_HEALTHMON_MEDIA_ERROR] = XFS_HEALTH_MONITOR_TYPE_MEDIA_ERROR,
720+
[XFS_HEALTHMON_BUFREAD] = XFS_HEALTH_MONITOR_TYPE_BUFREAD,
721+
[XFS_HEALTHMON_BUFWRITE] = XFS_HEALTH_MONITOR_TYPE_BUFWRITE,
722+
[XFS_HEALTHMON_DIOREAD] = XFS_HEALTH_MONITOR_TYPE_DIOREAD,
723+
[XFS_HEALTHMON_DIOWRITE] = XFS_HEALTH_MONITOR_TYPE_DIOWRITE,
724+
[XFS_HEALTHMON_DATALOST] = XFS_HEALTH_MONITOR_TYPE_DATALOST,
647725
};
648726

649727
/* Render event as a V0 structure */
@@ -701,6 +779,13 @@ xfs_healthmon_format_v0(
701779
hme.e.media.daddr = event->daddr;
702780
hme.e.media.bbcount = event->bbcount;
703781
break;
782+
case XFS_HEALTHMON_FILERANGE:
783+
hme.e.filerange.ino = event->fino;
784+
hme.e.filerange.gen = event->fgen;
785+
hme.e.filerange.pos = event->fpos;
786+
hme.e.filerange.len = event->flen;
787+
hme.e.filerange.error = abs(event->error);
788+
break;
704789
default:
705790
break;
706791
}

fs/xfs/xfs_healthmon.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@ enum xfs_healthmon_type {
8282

8383
/* media errors */
8484
XFS_HEALTHMON_MEDIA_ERROR,
85+
86+
/* file range events */
87+
XFS_HEALTHMON_BUFREAD,
88+
XFS_HEALTHMON_BUFWRITE,
89+
XFS_HEALTHMON_DIOREAD,
90+
XFS_HEALTHMON_DIOWRITE,
91+
XFS_HEALTHMON_DATALOST,
8592
};
8693

8794
enum xfs_healthmon_domain {
@@ -97,6 +104,9 @@ enum xfs_healthmon_domain {
97104
XFS_HEALTHMON_DATADEV,
98105
XFS_HEALTHMON_RTDEV,
99106
XFS_HEALTHMON_LOGDEV,
107+
108+
/* file range events */
109+
XFS_HEALTHMON_FILERANGE,
100110
};
101111

102112
struct xfs_healthmon_event {
@@ -139,6 +149,14 @@ struct xfs_healthmon_event {
139149
xfs_daddr_t daddr;
140150
uint64_t bbcount;
141151
};
152+
/* file range events */
153+
struct {
154+
xfs_ino_t fino;
155+
loff_t fpos;
156+
uint64_t flen;
157+
uint32_t fgen;
158+
int error;
159+
};
142160
};
143161
};
144162

@@ -157,6 +175,9 @@ void xfs_healthmon_report_shutdown(struct xfs_mount *mp, uint32_t flags);
157175
void xfs_healthmon_report_media(struct xfs_mount *mp, enum xfs_device fdev,
158176
xfs_daddr_t daddr, uint64_t bbcount);
159177

178+
void xfs_healthmon_report_file_ioerror(struct xfs_inode *ip,
179+
const struct fserror_event *p);
180+
160181
long xfs_ioc_health_monitor(struct file *file,
161182
struct xfs_health_monitor __user *arg);
162183

fs/xfs/xfs_super.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,14 @@
4747
#include "xfs_parent.h"
4848
#include "xfs_rtalloc.h"
4949
#include "xfs_zone_alloc.h"
50+
#include "xfs_healthmon.h"
5051
#include "scrub/stats.h"
5152
#include "scrub/rcbag_btree.h"
5253

5354
#include <linux/magic.h>
5455
#include <linux/fs_context.h>
5556
#include <linux/fs_parser.h>
57+
#include <linux/fserror.h>
5658

5759
static const struct super_operations xfs_super_operations;
5860

@@ -1301,6 +1303,15 @@ xfs_fs_show_stats(
13011303
return 0;
13021304
}
13031305

1306+
static void
1307+
xfs_fs_report_error(
1308+
const struct fserror_event *event)
1309+
{
1310+
/* healthmon already knows about non-inode and metadata errors */
1311+
if (event->inode && event->type != FSERR_METADATA)
1312+
xfs_healthmon_report_file_ioerror(XFS_I(event->inode), event);
1313+
}
1314+
13041315
static const struct super_operations xfs_super_operations = {
13051316
.alloc_inode = xfs_fs_alloc_inode,
13061317
.destroy_inode = xfs_fs_destroy_inode,
@@ -1317,6 +1328,7 @@ static const struct super_operations xfs_super_operations = {
13171328
.free_cached_objects = xfs_fs_free_cached_objects,
13181329
.shutdown = xfs_fs_shutdown,
13191330
.show_stats = xfs_fs_show_stats,
1331+
.report_error = xfs_fs_report_error,
13201332
};
13211333

13221334
static int

fs/xfs/xfs_trace.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@
5454
#include "xfs_health.h"
5555
#include "xfs_healthmon.h"
5656
#include "xfs_notify_failure.h"
57+
#include "xfs_file.h"
58+
#include <linux/fserror.h>
5759

5860
/*
5961
* We include this last to have the helpers above available for the trace

fs/xfs/xfs_trace.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ struct xfs_rtgroup;
105105
struct xfs_open_zone;
106106
struct xfs_healthmon_event;
107107
struct xfs_healthmon;
108+
struct fserror_event;
108109

109110
#define XFS_ATTR_FILTER_FLAGS \
110111
{ XFS_ATTR_ROOT, "ROOT" }, \
@@ -6092,6 +6093,12 @@ DECLARE_EVENT_CLASS(xfs_healthmon_event_class,
60926093
__entry->offset = event->daddr;
60936094
__entry->length = event->bbcount;
60946095
break;
6096+
case XFS_HEALTHMON_FILERANGE:
6097+
__entry->ino = event->fino;
6098+
__entry->gen = event->fgen;
6099+
__entry->offset = event->fpos;
6100+
__entry->length = event->flen;
6101+
break;
60956102
}
60966103
),
60976104
TP_printk("dev %d:%d type %s domain %s mask 0x%x ino 0x%llx gen 0x%x offset 0x%llx len 0x%llx group 0x%x lost %llu",
@@ -6266,6 +6273,53 @@ TRACE_EVENT(xfs_healthmon_report_media,
62666273
__entry->bbcount)
62676274
);
62686275

6276+
#define FS_ERROR_STRINGS \
6277+
{ FSERR_BUFFERED_READ, "buffered_read" }, \
6278+
{ FSERR_BUFFERED_WRITE, "buffered_write" }, \
6279+
{ FSERR_DIRECTIO_READ, "directio_read" }, \
6280+
{ FSERR_DIRECTIO_WRITE, "directio_write" }, \
6281+
{ FSERR_DATA_LOST, "data_lost" }, \
6282+
{ FSERR_METADATA, "metadata" }
6283+
6284+
TRACE_DEFINE_ENUM(FSERR_BUFFERED_READ);
6285+
TRACE_DEFINE_ENUM(FSERR_BUFFERED_WRITE);
6286+
TRACE_DEFINE_ENUM(FSERR_DIRECTIO_READ);
6287+
TRACE_DEFINE_ENUM(FSERR_DIRECTIO_WRITE);
6288+
TRACE_DEFINE_ENUM(FSERR_DATA_LOST);
6289+
TRACE_DEFINE_ENUM(FSERR_METADATA);
6290+
6291+
TRACE_EVENT(xfs_healthmon_report_file_ioerror,
6292+
TP_PROTO(const struct xfs_healthmon *hm,
6293+
const struct fserror_event *p),
6294+
TP_ARGS(hm, p),
6295+
TP_STRUCT__entry(
6296+
__field(dev_t, dev)
6297+
__field(unsigned int, type)
6298+
__field(unsigned long long, ino)
6299+
__field(unsigned int, gen)
6300+
__field(long long, pos)
6301+
__field(unsigned long long, len)
6302+
__field(int, error)
6303+
),
6304+
TP_fast_assign(
6305+
__entry->dev = hm->dev;
6306+
__entry->type = p->type;
6307+
__entry->ino = XFS_I(p->inode)->i_ino;
6308+
__entry->gen = p->inode->i_generation;
6309+
__entry->pos = p->pos;
6310+
__entry->len = p->len;
6311+
__entry->error = p->error;
6312+
),
6313+
TP_printk("dev %d:%d ino 0x%llx gen 0x%x op %s pos 0x%llx bytecount 0x%llx error %d",
6314+
MAJOR(__entry->dev), MINOR(__entry->dev),
6315+
__entry->ino,
6316+
__entry->gen,
6317+
__print_symbolic(__entry->type, FS_ERROR_STRINGS),
6318+
__entry->pos,
6319+
__entry->len,
6320+
__entry->error)
6321+
);
6322+
62696323
#endif /* _TRACE_XFS_H */
62706324

62716325
#undef TRACE_INCLUDE_PATH

0 commit comments

Comments
 (0)