Skip to content

Commit b8accfd

Browse files
author
Darrick J. Wong
committed
xfs: add media verification ioctl
Add a new privileged ioctl so that xfs_scrub can ask the kernel to verify the media of the devices backing an xfs filesystem, and have any resulting media errors reported to fsnotify and xfs_healer. To accomplish this, the kernel allocates a folio between the base page size and 1MB, and issues read IOs to a gradually incrementing range of one of the storage devices underlying an xfs filesystem. If any error occurs, that raw error is reported to the calling process. If the error happens to be one of the ones that the kernel considers indicative of data loss, then it will also be reported to xfs_healthmon and fsnotify. Driving the verification from the kernel enables xfs (and by extension xfs_scrub) to have precise control over the size and error handling of IOs that are issued to the underlying block device, and to emit notifications about problems to other relevant kernel subsystems immediately. Note that the caller is also allowed to reduce the size of the IO and to ask for a relaxation period after each IO. Signed-off-by: "Darrick J. Wong" <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
1 parent 8b85dc4 commit b8accfd

6 files changed

Lines changed: 590 additions & 0 deletions

File tree

fs/xfs/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ xfs-y += xfs_aops.o \
106106
xfs_symlink.o \
107107
xfs_sysfs.o \
108108
xfs_trans.o \
109+
xfs_verify_media.o \
109110
xfs_xattr.o
110111

111112
# low-level transaction/log code

fs/xfs/libxfs/xfs_fs.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,34 @@ struct xfs_health_file_on_monitored_fs {
11601160
__u32 flags; /* zero for now */
11611161
};
11621162

1163+
/* Verify the media of the underlying devices */
1164+
struct xfs_verify_media {
1165+
__u32 me_dev; /* I: XFS_DEV_{DATA,LOG,RT} */
1166+
__u32 me_flags; /* I: XFS_VERIFY_MEDIA_* */
1167+
1168+
/*
1169+
* IO: inclusive start of disk range to verify, in 512b blocks.
1170+
* Will be adjusted upwards as media verification succeeds.
1171+
*/
1172+
__u64 me_start_daddr;
1173+
1174+
/*
1175+
* IO: exclusive end of the disk range to verify, in 512b blocks.
1176+
* Can be adjusted downwards to match device size.
1177+
*/
1178+
__u64 me_end_daddr;
1179+
1180+
__u32 me_ioerror; /* O: I/O error (positive) */
1181+
__u32 me_max_io_size; /* I: maximum IO size in bytes */
1182+
1183+
__u32 me_rest_us; /* I: rest time between IOs, usecs */
1184+
__u32 me_pad; /* zero */
1185+
};
1186+
1187+
#define XFS_VERIFY_MEDIA_REPORT (1 << 0) /* report to fsnotify */
1188+
1189+
#define XFS_VERIFY_MEDIA_FLAGS (XFS_VERIFY_MEDIA_REPORT)
1190+
11631191
/*
11641192
* ioctl commands that are used by Linux filesystems
11651193
*/
@@ -1202,6 +1230,8 @@ struct xfs_health_file_on_monitored_fs {
12021230
#define XFS_IOC_HEALTH_MONITOR _IOW ('X', 68, struct xfs_health_monitor)
12031231
#define XFS_IOC_HEALTH_FD_ON_MONITORED_FS \
12041232
_IOW ('X', 69, struct xfs_health_file_on_monitored_fs)
1233+
#define XFS_IOC_VERIFY_MEDIA _IOWR('X', 70, struct xfs_verify_media)
1234+
12051235
/*
12061236
* ioctl commands that replace IRIX syssgi()'s
12071237
*/

fs/xfs/xfs_ioctl.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include "xfs_handle.h"
4343
#include "xfs_rtgroup.h"
4444
#include "xfs_healthmon.h"
45+
#include "xfs_verify_media.h"
4546

4647
#include <linux/mount.h>
4748
#include <linux/fileattr.h>
@@ -1422,6 +1423,8 @@ xfs_file_ioctl(
14221423

14231424
case XFS_IOC_HEALTH_MONITOR:
14241425
return xfs_ioc_health_monitor(filp, arg);
1426+
case XFS_IOC_VERIFY_MEDIA:
1427+
return xfs_ioc_verify_media(filp, arg);
14251428

14261429
default:
14271430
return -ENOTTY;

fs/xfs/xfs_trace.h

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6320,6 +6320,104 @@ TRACE_EVENT(xfs_healthmon_report_file_ioerror,
63206320
__entry->error)
63216321
);
63226322

6323+
TRACE_EVENT(xfs_verify_media,
6324+
TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me,
6325+
dev_t fdev, xfs_daddr_t daddr, uint64_t bbcount,
6326+
const struct folio *folio),
6327+
TP_ARGS(mp, me, fdev, daddr, bbcount, folio),
6328+
TP_STRUCT__entry(
6329+
__field(dev_t, dev)
6330+
__field(dev_t, fdev)
6331+
__field(xfs_daddr_t, start_daddr)
6332+
__field(xfs_daddr_t, end_daddr)
6333+
__field(unsigned int, flags)
6334+
__field(xfs_daddr_t, daddr)
6335+
__field(uint64_t, bbcount)
6336+
__field(unsigned int, bufsize)
6337+
),
6338+
TP_fast_assign(
6339+
__entry->dev = mp->m_ddev_targp->bt_dev;
6340+
__entry->fdev = fdev;
6341+
__entry->start_daddr = me->me_start_daddr;
6342+
__entry->end_daddr = me->me_end_daddr;
6343+
__entry->flags = me->me_flags;
6344+
__entry->daddr = daddr;
6345+
__entry->bbcount = bbcount;
6346+
__entry->bufsize = folio_size(folio);
6347+
),
6348+
TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx flags 0x%x daddr 0x%llx bbcount 0x%llx bufsize 0x%x",
6349+
MAJOR(__entry->dev), MINOR(__entry->dev),
6350+
MAJOR(__entry->fdev), MINOR(__entry->fdev),
6351+
__entry->start_daddr,
6352+
__entry->end_daddr,
6353+
__entry->flags,
6354+
__entry->daddr,
6355+
__entry->bbcount,
6356+
__entry->bufsize)
6357+
);
6358+
6359+
TRACE_EVENT(xfs_verify_media_end,
6360+
TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me,
6361+
dev_t fdev),
6362+
TP_ARGS(mp, me, fdev),
6363+
TP_STRUCT__entry(
6364+
__field(dev_t, dev)
6365+
__field(dev_t, fdev)
6366+
__field(xfs_daddr_t, start_daddr)
6367+
__field(xfs_daddr_t, end_daddr)
6368+
__field(int, ioerror)
6369+
),
6370+
TP_fast_assign(
6371+
__entry->dev = mp->m_ddev_targp->bt_dev;
6372+
__entry->fdev = fdev;
6373+
__entry->start_daddr = me->me_start_daddr;
6374+
__entry->end_daddr = me->me_end_daddr;
6375+
__entry->ioerror = me->me_ioerror;
6376+
),
6377+
TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx ioerror %d",
6378+
MAJOR(__entry->dev), MINOR(__entry->dev),
6379+
MAJOR(__entry->fdev), MINOR(__entry->fdev),
6380+
__entry->start_daddr,
6381+
__entry->end_daddr,
6382+
__entry->ioerror)
6383+
);
6384+
6385+
TRACE_EVENT(xfs_verify_media_error,
6386+
TP_PROTO(const struct xfs_mount *mp, const struct xfs_verify_media *me,
6387+
dev_t fdev, xfs_daddr_t daddr, uint64_t bbcount,
6388+
blk_status_t status),
6389+
TP_ARGS(mp, me, fdev, daddr, bbcount, status),
6390+
TP_STRUCT__entry(
6391+
__field(dev_t, dev)
6392+
__field(dev_t, fdev)
6393+
__field(xfs_daddr_t, start_daddr)
6394+
__field(xfs_daddr_t, end_daddr)
6395+
__field(unsigned int, flags)
6396+
__field(xfs_daddr_t, daddr)
6397+
__field(uint64_t, bbcount)
6398+
__field(int, error)
6399+
),
6400+
TP_fast_assign(
6401+
__entry->dev = mp->m_ddev_targp->bt_dev;
6402+
__entry->fdev = fdev;
6403+
__entry->start_daddr = me->me_start_daddr;
6404+
__entry->end_daddr = me->me_end_daddr;
6405+
__entry->flags = me->me_flags;
6406+
__entry->daddr = daddr;
6407+
__entry->bbcount = bbcount;
6408+
__entry->error = blk_status_to_errno(status);
6409+
),
6410+
TP_printk("dev %d:%d fdev %d:%d start_daddr 0x%llx end_daddr 0x%llx flags 0x%x daddr 0x%llx bbcount 0x%llx error %d",
6411+
MAJOR(__entry->dev), MINOR(__entry->dev),
6412+
MAJOR(__entry->fdev), MINOR(__entry->fdev),
6413+
__entry->start_daddr,
6414+
__entry->end_daddr,
6415+
__entry->flags,
6416+
__entry->daddr,
6417+
__entry->bbcount,
6418+
__entry->error)
6419+
);
6420+
63236421
#endif /* _TRACE_XFS_H */
63246422

63256423
#undef TRACE_INCLUDE_PATH

0 commit comments

Comments
 (0)