Skip to content

Commit 38bb131

Browse files
author
Darrick J. Wong
committed
xfs: retain the AGI when we can't iget an inode to scrub the core
xchk_get_inode is not quite the right function to be calling from the inode scrubber setup function. The common get_inode function either gets an inode and installs it in the scrub context, or it returns an error code explaining what happened. This is acceptable for most file scrubbers because it is not in their scope to fix corruptions in the inode core and fork areas that cause iget to fail. Dealing with these problems is within the scope of the inode scrubber, however. If iget fails with EFSCORRUPTED, we need to xchk_inode to flag that as corruption. Since we can't get our hands on an incore inode, we need to hold the AGI to prevent inode allocation activity so that nothing changes in the inode metadata. Looking ahead to the inode core repair patches, we will also need to hold the AGI buffer into xrep_inode so that we can make modifications to the xfs_dinode structure without any other thread swooping in to allocate or free the inode. Adapt the xchk_get_inode into xchk_setup_inode since this is a one-off use case where the error codes we check for are a little different, and the return state is much different from the common function. xchk_setup_inode prepares to check or repair an inode record, so it must continue the scrub operation even if the inode/inobt verifiers cause xfs_iget to return EFSCORRUPTED. This is done by attaching the locked AGI buffer to the scrub transaction and returning 0 to move on to the actual scrub. (Later, the online inode repair code will also want the xfs_imap structure so that it can reset the ondisk xfs_dinode structure.) xchk_get_inode retrieves an inode on behalf of a scrubber that operates on an incore inode -- data/attr/cow forks, directories, xattrs, symlinks, parent pointers, etc. If the inode/inobt verifiers fail and xfs_iget returns EFSCORRUPTED, we want to exit to userspace (because the caller should be fix the inode first) and drop everything we acquired along the way. A behavior common to both functions is that it's possible that xfs_scrub asked for a scrub-by-handle concurrent with the inode being freed or the passed-in inumber is invalid. In this case, we call xfs_imap to see if the inobt index thinks the inode is allocated, and return ENOENT ("nothing to check here") to userspace if this is not the case. The imap lookup is why both functions call xchk_iget_agi. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
1 parent 46e0dd8 commit 38bb131

3 files changed

Lines changed: 156 additions & 24 deletions

File tree

fs/xfs/scrub/common.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -817,7 +817,7 @@ xchk_iget_agi(
817817
}
818818

819819
/* Install an inode that we opened by handle for scrubbing. */
820-
static int
820+
int
821821
xchk_install_handle_inode(
822822
struct xfs_scrub *sc,
823823
struct xfs_inode *ip)

fs/xfs/scrub/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ int xchk_iget(struct xfs_scrub *sc, xfs_ino_t inum, struct xfs_inode **ipp);
143143
int xchk_iget_agi(struct xfs_scrub *sc, xfs_ino_t inum,
144144
struct xfs_buf **agi_bpp, struct xfs_inode **ipp);
145145
void xchk_irele(struct xfs_scrub *sc, struct xfs_inode *ip);
146+
int xchk_install_handle_inode(struct xfs_scrub *sc, struct xfs_inode *ip);
146147

147148
/*
148149
* Don't bother cross-referencing if we already found corruption or cross

fs/xfs/scrub/inode.c

Lines changed: 154 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,57 +11,188 @@
1111
#include "xfs_mount.h"
1212
#include "xfs_btree.h"
1313
#include "xfs_log_format.h"
14+
#include "xfs_trans.h"
15+
#include "xfs_ag.h"
1416
#include "xfs_inode.h"
1517
#include "xfs_ialloc.h"
18+
#include "xfs_icache.h"
1619
#include "xfs_da_format.h"
1720
#include "xfs_reflink.h"
1821
#include "xfs_rmap.h"
1922
#include "xfs_bmap_util.h"
2023
#include "scrub/scrub.h"
2124
#include "scrub/common.h"
2225
#include "scrub/btree.h"
26+
#include "scrub/trace.h"
27+
28+
/* Prepare the attached inode for scrubbing. */
29+
static inline int
30+
xchk_prepare_iscrub(
31+
struct xfs_scrub *sc)
32+
{
33+
int error;
34+
35+
sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
36+
xfs_ilock(sc->ip, sc->ilock_flags);
37+
38+
error = xchk_trans_alloc(sc, 0);
39+
if (error)
40+
return error;
41+
42+
sc->ilock_flags |= XFS_ILOCK_EXCL;
43+
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
44+
return 0;
45+
}
46+
47+
/* Install this scrub-by-handle inode and prepare it for scrubbing. */
48+
static inline int
49+
xchk_install_handle_iscrub(
50+
struct xfs_scrub *sc,
51+
struct xfs_inode *ip)
52+
{
53+
int error;
54+
55+
error = xchk_install_handle_inode(sc, ip);
56+
if (error)
57+
return error;
58+
59+
return xchk_prepare_iscrub(sc);
60+
}
2361

2462
/*
25-
* Grab total control of the inode metadata. It doesn't matter here if
26-
* the file data is still changing; exclusive access to the metadata is
27-
* the goal.
63+
* Grab total control of the inode metadata. In the best case, we grab the
64+
* incore inode and take all locks on it. If the incore inode cannot be
65+
* constructed due to corruption problems, lock the AGI so that we can single
66+
* step the loading process to fix everything that can go wrong.
2867
*/
2968
int
3069
xchk_setup_inode(
3170
struct xfs_scrub *sc)
3271
{
72+
struct xfs_imap imap;
73+
struct xfs_inode *ip;
74+
struct xfs_mount *mp = sc->mp;
75+
struct xfs_inode *ip_in = XFS_I(file_inode(sc->file));
76+
struct xfs_buf *agi_bp;
77+
struct xfs_perag *pag;
78+
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino);
3379
int error;
3480

3581
if (xchk_need_intent_drain(sc))
3682
xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
3783

84+
/* We want to scan the opened inode, so lock it and exit. */
85+
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
86+
sc->ip = ip_in;
87+
return xchk_prepare_iscrub(sc);
88+
}
89+
90+
/* Reject internal metadata files and obviously bad inode numbers. */
91+
if (xfs_internal_inum(mp, sc->sm->sm_ino))
92+
return -ENOENT;
93+
if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
94+
return -ENOENT;
95+
96+
/* Try a regular untrusted iget. */
97+
error = xchk_iget(sc, sc->sm->sm_ino, &ip);
98+
if (!error)
99+
return xchk_install_handle_iscrub(sc, ip);
100+
if (error == -ENOENT)
101+
return error;
102+
if (error != -EFSCORRUPTED && error != -EFSBADCRC && error != -EINVAL)
103+
goto out_error;
104+
38105
/*
39-
* Try to get the inode. If the verifiers fail, we try again
40-
* in raw mode.
106+
* EINVAL with IGET_UNTRUSTED probably means one of several things:
107+
* userspace gave us an inode number that doesn't correspond to fs
108+
* space; the inode btree lacks a record for this inode; or there is
109+
* a record, and it says this inode is free.
110+
*
111+
* EFSCORRUPTED/EFSBADCRC could mean that the inode was mappable, but
112+
* some other metadata corruption (e.g. inode forks) prevented
113+
* instantiation of the incore inode. Or it could mean the inobt is
114+
* corrupt.
115+
*
116+
* We want to look up this inode in the inobt directly to distinguish
117+
* three different scenarios: (1) the inobt says the inode is free,
118+
* in which case there's nothing to do; (2) the inobt is corrupt so we
119+
* should flag the corruption and exit to userspace to let it fix the
120+
* inobt; and (3) the inobt says the inode is allocated, but loading it
121+
* failed due to corruption.
122+
*
123+
* Allocate a transaction and grab the AGI to prevent inobt activity in
124+
* this AG. Retry the iget in case someone allocated a new inode after
125+
* the first iget failed.
41126
*/
42-
error = xchk_iget_for_scrubbing(sc);
43-
switch (error) {
44-
case 0:
45-
break;
46-
case -EFSCORRUPTED:
47-
case -EFSBADCRC:
48-
return xchk_trans_alloc(sc, 0);
49-
default:
50-
return error;
127+
error = xchk_trans_alloc(sc, 0);
128+
if (error)
129+
goto out_error;
130+
131+
error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip);
132+
if (error == 0) {
133+
/* Actually got the incore inode, so install it and proceed. */
134+
xchk_trans_cancel(sc);
135+
return xchk_install_handle_iscrub(sc, ip);
136+
}
137+
if (error == -ENOENT)
138+
goto out_gone;
139+
if (error != -EFSCORRUPTED && error != -EFSBADCRC && error != -EINVAL)
140+
goto out_cancel;
141+
142+
/* Ensure that we have protected against inode allocation/freeing. */
143+
if (agi_bp == NULL) {
144+
ASSERT(agi_bp != NULL);
145+
error = -ECANCELED;
146+
goto out_cancel;
51147
}
52148

53-
/* Got the inode, lock it and we're ready to go. */
54-
sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
55-
xfs_ilock(sc->ip, sc->ilock_flags);
56-
error = xchk_trans_alloc(sc, 0);
149+
/*
150+
* Untrusted iget failed a second time. Let's try an inobt lookup.
151+
* If the inobt doesn't think this is an allocated inode then we'll
152+
* return ENOENT to signal that the check can be skipped.
153+
*
154+
* If the lookup signals corruption, we'll mark this inode corrupt and
155+
* exit to userspace. There's little chance of fixing anything until
156+
* the inobt is straightened out, but there's nothing we can do here.
157+
*
158+
* If the lookup encounters a runtime error, exit to userspace.
159+
*/
160+
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
161+
if (!pag) {
162+
error = -EFSCORRUPTED;
163+
goto out_cancel;
164+
}
165+
166+
error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
167+
XFS_IGET_UNTRUSTED);
168+
xfs_perag_put(pag);
169+
if (error == -EINVAL || error == -ENOENT)
170+
goto out_gone;
57171
if (error)
58-
goto out;
59-
sc->ilock_flags |= XFS_ILOCK_EXCL;
60-
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
172+
goto out_cancel;
61173

62-
out:
63-
/* scrub teardown will unlock and release the inode for us */
174+
/*
175+
* The lookup succeeded. Chances are the ondisk inode is corrupt and
176+
* preventing iget from reading it. Retain the scrub transaction and
177+
* the AGI buffer to prevent anyone from allocating or freeing inodes.
178+
* This ensures that we preserve the inconsistency between the inobt
179+
* saying the inode is allocated and the icache being unable to load
180+
* the inode until we can flag the corruption in xchk_inode. The
181+
* scrub function has to note the corruption, since we're not really
182+
* supposed to do that from the setup function.
183+
*/
184+
return 0;
185+
186+
out_cancel:
187+
xchk_trans_cancel(sc);
188+
out_error:
189+
trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
190+
error, __return_address);
64191
return error;
192+
out_gone:
193+
/* The file is gone, so there's nothing to check. */
194+
xchk_trans_cancel(sc);
195+
return -ENOENT;
65196
}
66197

67198
/* Inode core */

0 commit comments

Comments
 (0)