Skip to content

Commit 8e3ef44

Browse files
author
Chandan Babu R
committed
Merge tag 'repair-inode-mode-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.9-mergeC
xfs: repair inode mode by scanning dirs One missing piece of functionality in the inode record repair code is figuring out what to do with a file whose mode is so corrupt that we cannot tell us the type of the file. Originally this was done by guessing the mode from the ondisk inode contents, but Christoph didn't like that because it read from data fork block 0, which could be user controlled data. Therefore, I've replaced all that with a directory scanner that looks for any dirents that point to the file with the garbage mode. If so, the ftype in the dirent will tell us exactly what mode to set on the file. Since users cannot directly write to the ftype field of a dirent, this should be safe. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Chandan Babu R <chandanbabu@kernel.org> * tag 'repair-inode-mode-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: repair file modes by scanning for a dirent pointing to us xfs: create a macro for decoding ftypes in tracepoints xfs: create a predicate to determine if two xfs_names are the same xfs: create a static name for the dot entry too xfs: iscan batching should handle unallocated inodes too xfs: cache a bunch of inodes for repair scans xfs: stagger the starting AG of scrub iscans to reduce contention xfs: allow scrub to hook metadata updates in other writers xfs: implement live inode scan for scrub xfs: speed up xfs_iwalk_adjust_start a little bit
2 parents 4b2f459 + 5385f1a commit 8e3ef44

15 files changed

Lines changed: 1436 additions & 19 deletions

File tree

fs/xfs/Kconfig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,16 @@ config XFS_DRAIN_INTENTS
124124
bool
125125
select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
126126

127+
config XFS_LIVE_HOOKS
128+
bool
129+
select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
130+
127131
config XFS_ONLINE_SCRUB
128132
bool "XFS online metadata check support"
129133
default n
130134
depends on XFS_FS
131135
depends on TMPFS && SHMEM
136+
select XFS_LIVE_HOOKS
132137
select XFS_DRAIN_INTENTS
133138
help
134139
If you say Y here you will be able to check metadata on a

fs/xfs/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ xfs-$(CONFIG_FS_DAX) += xfs_notify_failure.o
136136
endif
137137

138138
xfs-$(CONFIG_XFS_DRAIN_INTENTS) += xfs_drain.o
139+
xfs-$(CONFIG_XFS_LIVE_HOOKS) += xfs_hooks.o
139140

140141
# online scrub/repair
141142
ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
@@ -158,6 +159,7 @@ xfs-y += $(addprefix scrub/, \
158159
health.o \
159160
ialloc.o \
160161
inode.o \
162+
iscan.o \
161163
parent.o \
162164
readdir.o \
163165
refcount.o \

fs/xfs/libxfs/xfs_da_format.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,17 @@ struct xfs_da3_intnode {
159159

160160
#define XFS_DIR3_FT_MAX 9
161161

162+
#define XFS_DIR3_FTYPE_STR \
163+
{ XFS_DIR3_FT_UNKNOWN, "unknown" }, \
164+
{ XFS_DIR3_FT_REG_FILE, "file" }, \
165+
{ XFS_DIR3_FT_DIR, "directory" }, \
166+
{ XFS_DIR3_FT_CHRDEV, "char" }, \
167+
{ XFS_DIR3_FT_BLKDEV, "block" }, \
168+
{ XFS_DIR3_FT_FIFO, "fifo" }, \
169+
{ XFS_DIR3_FT_SOCK, "sock" }, \
170+
{ XFS_DIR3_FT_SYMLINK, "symlink" }, \
171+
{ XFS_DIR3_FT_WHT, "whiteout" }
172+
162173
/*
163174
* Byte offset in data block and shortform entry.
164175
*/

fs/xfs/libxfs/xfs_dir2.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ const struct xfs_name xfs_name_dotdot = {
2525
.type = XFS_DIR3_FT_DIR,
2626
};
2727

28+
const struct xfs_name xfs_name_dot = {
29+
.name = (const unsigned char *)".",
30+
.len = 1,
31+
.type = XFS_DIR3_FT_DIR,
32+
};
33+
2834
/*
2935
* Convert inode mode to directory entry filetype
3036
*/

fs/xfs/libxfs/xfs_dir2.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,19 @@ struct xfs_dir3_icfree_hdr;
2222
struct xfs_dir3_icleaf_hdr;
2323

2424
extern const struct xfs_name xfs_name_dotdot;
25+
extern const struct xfs_name xfs_name_dot;
26+
27+
static inline bool
28+
xfs_dir2_samename(
29+
const struct xfs_name *n1,
30+
const struct xfs_name *n2)
31+
{
32+
if (n1 == n2)
33+
return true;
34+
if (n1->len != n2->len)
35+
return false;
36+
return !memcmp(n1->name, n2->name, n1->len);
37+
}
2538

2639
/*
2740
* Convert inode mode to directory entry filetype

fs/xfs/scrub/dir.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,11 @@ xchk_dir_actor(
9393
return -ECANCELED;
9494
}
9595

96-
if (!strncmp(".", name->name, name->len)) {
96+
if (xfs_dir2_samename(name, &xfs_name_dot)) {
9797
/* If this is "." then check that the inum matches the dir. */
9898
if (ino != dp->i_ino)
9999
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
100-
} else if (!strncmp("..", name->name, name->len)) {
100+
} else if (xfs_dir2_samename(name, &xfs_name_dotdot)) {
101101
/*
102102
* If this is ".." in the root inode, check that the inum
103103
* matches this dir.

fs/xfs/scrub/inode_repair.c

Lines changed: 230 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include "scrub/btree.h"
4444
#include "scrub/trace.h"
4545
#include "scrub/repair.h"
46+
#include "scrub/iscan.h"
47+
#include "scrub/readdir.h"
4648

4749
/*
4850
* Inode Record Repair
@@ -126,6 +128,10 @@ struct xrep_inode {
126128

127129
/* Must we remove all access from this file? */
128130
bool zap_acls;
131+
132+
/* Inode scanner to see if we can find the ftype from dirents */
133+
struct xchk_iscan ftype_iscan;
134+
uint8_t alleged_ftype;
129135
};
130136

131137
/*
@@ -227,26 +233,233 @@ xrep_dinode_header(
227233
dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
228234
}
229235

230-
/* Turn di_mode into /something/ recognizable. */
231-
STATIC void
236+
/*
237+
* If this directory entry points to the scrub target inode, then the directory
238+
* we're scanning is the parent of the scrub target inode.
239+
*/
240+
STATIC int
241+
xrep_dinode_findmode_dirent(
242+
struct xfs_scrub *sc,
243+
struct xfs_inode *dp,
244+
xfs_dir2_dataptr_t dapos,
245+
const struct xfs_name *name,
246+
xfs_ino_t ino,
247+
void *priv)
248+
{
249+
struct xrep_inode *ri = priv;
250+
int error = 0;
251+
252+
if (xchk_should_terminate(ri->sc, &error))
253+
return error;
254+
255+
if (ino != sc->sm->sm_ino)
256+
return 0;
257+
258+
/* Ignore garbage directory entry names. */
259+
if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
260+
return -EFSCORRUPTED;
261+
262+
/* Don't pick up dot or dotdot entries; we only want child dirents. */
263+
if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
264+
xfs_dir2_samename(name, &xfs_name_dot))
265+
return 0;
266+
267+
/*
268+
* Uhoh, more than one parent for this inode and they don't agree on
269+
* the file type?
270+
*/
271+
if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
272+
ri->alleged_ftype != name->type) {
273+
trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
274+
ri->alleged_ftype);
275+
return -EFSCORRUPTED;
276+
}
277+
278+
/* We found a potential parent; remember the ftype. */
279+
trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
280+
ri->alleged_ftype = name->type;
281+
return 0;
282+
}
283+
284+
/*
285+
* If this is a directory, walk the dirents looking for any that point to the
286+
* scrub target inode.
287+
*/
288+
STATIC int
289+
xrep_dinode_findmode_walk_directory(
290+
struct xrep_inode *ri,
291+
struct xfs_inode *dp)
292+
{
293+
struct xfs_scrub *sc = ri->sc;
294+
unsigned int lock_mode;
295+
int error = 0;
296+
297+
/*
298+
* Scan the directory to see if there it contains an entry pointing to
299+
* the directory that we are repairing.
300+
*/
301+
lock_mode = xfs_ilock_data_map_shared(dp);
302+
303+
/*
304+
* If this directory is known to be sick, we cannot scan it reliably
305+
* and must abort.
306+
*/
307+
if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
308+
XFS_SICK_INO_BMBTD |
309+
XFS_SICK_INO_DIR)) {
310+
error = -EFSCORRUPTED;
311+
goto out_unlock;
312+
}
313+
314+
/*
315+
* We cannot complete our parent pointer scan if a directory looks as
316+
* though it has been zapped by the inode record repair code.
317+
*/
318+
if (xchk_dir_looks_zapped(dp)) {
319+
error = -EBUSY;
320+
goto out_unlock;
321+
}
322+
323+
error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri);
324+
if (error)
325+
goto out_unlock;
326+
327+
out_unlock:
328+
xfs_iunlock(dp, lock_mode);
329+
return error;
330+
}
331+
332+
/*
333+
* Try to find the mode of the inode being repaired by looking for directories
334+
* that point down to this file.
335+
*/
336+
STATIC int
337+
xrep_dinode_find_mode(
338+
struct xrep_inode *ri,
339+
uint16_t *mode)
340+
{
341+
struct xfs_scrub *sc = ri->sc;
342+
struct xfs_inode *dp;
343+
int error;
344+
345+
/* No ftype means we have no other metadata to consult. */
346+
if (!xfs_has_ftype(sc->mp)) {
347+
*mode = S_IFREG;
348+
return 0;
349+
}
350+
351+
/*
352+
* Scan all directories for parents that might point down to this
353+
* inode. Skip the inode being repaired during the scan since it
354+
* cannot be its own parent. Note that we still hold the AGI locked
355+
* so there's a real possibility that _iscan_iter can return EBUSY.
356+
*/
357+
xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
358+
ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
359+
ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN;
360+
while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) {
361+
if (S_ISDIR(VFS_I(dp)->i_mode))
362+
error = xrep_dinode_findmode_walk_directory(ri, dp);
363+
xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
364+
xchk_irele(sc, dp);
365+
if (error < 0)
366+
break;
367+
if (xchk_should_terminate(sc, &error))
368+
break;
369+
}
370+
xchk_iscan_iter_finish(&ri->ftype_iscan);
371+
xchk_iscan_teardown(&ri->ftype_iscan);
372+
373+
if (error == -EBUSY) {
374+
if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) {
375+
/*
376+
* If we got an EBUSY after finding at least one
377+
* dirent, that means the scan found an inode on the
378+
* inactivation list and could not open it. Accept the
379+
* alleged ftype and install a new mode below.
380+
*/
381+
error = 0;
382+
} else if (!(sc->flags & XCHK_TRY_HARDER)) {
383+
/*
384+
* Otherwise, retry the operation one time to see if
385+
* the reason for the delay is an inode from the same
386+
* cluster buffer waiting on the inactivation list.
387+
*/
388+
error = -EDEADLOCK;
389+
}
390+
}
391+
if (error)
392+
return error;
393+
394+
/*
395+
* Convert the discovered ftype into the file mode. If all else fails,
396+
* return S_IFREG.
397+
*/
398+
switch (ri->alleged_ftype) {
399+
case XFS_DIR3_FT_DIR:
400+
*mode = S_IFDIR;
401+
break;
402+
case XFS_DIR3_FT_WHT:
403+
case XFS_DIR3_FT_CHRDEV:
404+
*mode = S_IFCHR;
405+
break;
406+
case XFS_DIR3_FT_BLKDEV:
407+
*mode = S_IFBLK;
408+
break;
409+
case XFS_DIR3_FT_FIFO:
410+
*mode = S_IFIFO;
411+
break;
412+
case XFS_DIR3_FT_SOCK:
413+
*mode = S_IFSOCK;
414+
break;
415+
case XFS_DIR3_FT_SYMLINK:
416+
*mode = S_IFLNK;
417+
break;
418+
default:
419+
*mode = S_IFREG;
420+
break;
421+
}
422+
return 0;
423+
}
424+
425+
/* Turn di_mode into /something/ recognizable. Returns true if we succeed. */
426+
STATIC int
232427
xrep_dinode_mode(
233428
struct xrep_inode *ri,
234429
struct xfs_dinode *dip)
235430
{
236431
struct xfs_scrub *sc = ri->sc;
237432
uint16_t mode = be16_to_cpu(dip->di_mode);
433+
int error;
238434

239435
trace_xrep_dinode_mode(sc, dip);
240436

241437
if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
242-
return;
438+
return 0;
439+
440+
/* Try to fix the mode. If we cannot, then leave everything alone. */
441+
error = xrep_dinode_find_mode(ri, &mode);
442+
switch (error) {
443+
case -EINTR:
444+
case -EBUSY:
445+
case -EDEADLOCK:
446+
/* temporary failure or fatal signal */
447+
return error;
448+
case 0:
449+
/* found mode */
450+
break;
451+
default:
452+
/* some other error, assume S_IFREG */
453+
mode = S_IFREG;
454+
break;
455+
}
243456

244457
/* bad mode, so we set it to a file that only root can read */
245-
mode = S_IFREG;
246458
dip->di_mode = cpu_to_be16(mode);
247459
dip->di_uid = 0;
248460
dip->di_gid = 0;
249461
ri->zap_acls = true;
462+
return 0;
250463
}
251464

252465
/* Fix any conflicting flags that the verifiers complain about. */
@@ -1107,12 +1320,15 @@ xrep_dinode_core(
11071320
/* Fix everything the verifier will complain about. */
11081321
dip = xfs_buf_offset(bp, ri->imap.im_boffset);
11091322
xrep_dinode_header(sc, dip);
1110-
xrep_dinode_mode(ri, dip);
1323+
iget_error = xrep_dinode_mode(ri, dip);
1324+
if (iget_error)
1325+
goto write;
11111326
xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
11121327
xrep_dinode_size(ri, dip);
11131328
xrep_dinode_extsize_hints(sc, dip);
11141329
xrep_dinode_zap_forks(ri, dip);
11151330

1331+
write:
11161332
/* Write out the inode. */
11171333
trace_xrep_dinode_fixed(sc, dip);
11181334
xfs_dinode_calc_crc(sc->mp, dip);
@@ -1128,7 +1344,8 @@ xrep_dinode_core(
11281344
* accessing the inode. If iget fails, we still need to commit the
11291345
* changes.
11301346
*/
1131-
iget_error = xchk_iget(sc, ino, &sc->ip);
1347+
if (!iget_error)
1348+
iget_error = xchk_iget(sc, ino, &sc->ip);
11321349
if (!iget_error)
11331350
xchk_ilock(sc, XFS_IOLOCK_EXCL);
11341351

@@ -1496,6 +1713,13 @@ xrep_inode(
14961713
ASSERT(ri != NULL);
14971714

14981715
error = xrep_dinode_problems(ri);
1716+
if (error == -EBUSY) {
1717+
/*
1718+
* Directory scan to recover inode mode encountered a
1719+
* busy inode, so we did not continue repairing things.
1720+
*/
1721+
return 0;
1722+
}
14991723
if (error)
15001724
return error;
15011725

0 commit comments

Comments
 (0)