Skip to content

Commit 302436c

Browse files
author
Darrick J. Wong
committed
xfs: fix an inode lookup race in xchk_get_inode
In commit d658e, we tried to improve the robustnes of xchk_get_inode in the face of EINVAL returns from iget by calling xfs_imap to see if the inobt itself thinks that the inode is allocated. Unfortunately, that commit didn't consider the possibility that the inode gets allocated after iget but before imap. In this case, the imap call will succeed, but we turn that into a corruption error and tell userspace the inode is corrupt. Avoid this false corruption report by grabbing the AGI header and retrying the iget before calling imap. If the iget succeeds, we can proceed with the usual scrub-by-handle code. Fix all the incorrect comments too, since unreadable/corrupt inodes no longer result in EINVAL returns. Fixes: d658e72 ("xfs: distinguish between corrupt inode and invalid inum in xfs_scrub_get_inode") Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
1 parent a03297a commit 302436c

4 files changed

Lines changed: 205 additions & 46 deletions

File tree

fs/xfs/scrub/common.c

Lines changed: 192 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,14 @@ xchk_ag_init(
643643

644644
/* Per-scrubber setup functions */
645645

646+
void
647+
xchk_trans_cancel(
648+
struct xfs_scrub *sc)
649+
{
650+
xfs_trans_cancel(sc->tp);
651+
sc->tp = NULL;
652+
}
653+
646654
/*
647655
* Grab an empty transaction so that we can re-grab locked buffers if
648656
* one of our btrees turns out to be cyclic.
@@ -728,6 +736,101 @@ xchk_iget(
728736
return xfs_iget(sc->mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, ipp);
729737
}
730738

739+
/*
740+
* Try to grab an inode in a manner that avoids races with physical inode
741+
* allocation. If we can't, return the locked AGI buffer so that the caller
742+
* can single-step the loading process to see where things went wrong.
743+
* Callers must have a valid scrub transaction.
744+
*
745+
* If the iget succeeds, return 0, a NULL AGI, and the inode.
746+
*
747+
* If the iget fails, return the error, the locked AGI, and a NULL inode. This
748+
* can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are
749+
* no longer allocated; or any other corruption or runtime error.
750+
*
751+
* If the AGI read fails, return the error, a NULL AGI, and NULL inode.
752+
*
753+
* If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode.
754+
*/
755+
int
756+
xchk_iget_agi(
757+
struct xfs_scrub *sc,
758+
xfs_ino_t inum,
759+
struct xfs_buf **agi_bpp,
760+
struct xfs_inode **ipp)
761+
{
762+
struct xfs_mount *mp = sc->mp;
763+
struct xfs_trans *tp = sc->tp;
764+
struct xfs_perag *pag;
765+
int error;
766+
767+
ASSERT(sc->tp != NULL);
768+
769+
again:
770+
*agi_bpp = NULL;
771+
*ipp = NULL;
772+
error = 0;
773+
774+
if (xchk_should_terminate(sc, &error))
775+
return error;
776+
777+
/*
778+
* Attach the AGI buffer to the scrub transaction to avoid deadlocks
779+
* in the iget cache miss path.
780+
*/
781+
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
782+
error = xfs_ialloc_read_agi(pag, tp, agi_bpp);
783+
xfs_perag_put(pag);
784+
if (error)
785+
return error;
786+
787+
error = xfs_iget(mp, tp, inum,
788+
XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED, 0, ipp);
789+
if (error == -EAGAIN) {
790+
/*
791+
* The inode may be in core but temporarily unavailable and may
792+
* require the AGI buffer before it can be returned. Drop the
793+
* AGI buffer and retry the lookup.
794+
*
795+
* Incore lookup will fail with EAGAIN on a cache hit if the
796+
* inode is queued to the inactivation list. The inactivation
797+
* worker may remove the inode from the unlinked list and hence
798+
* needs the AGI.
799+
*
800+
* Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN
801+
* to allow inodegc to make progress and move the inode to
802+
* IRECLAIMABLE state where xfs_iget will be able to return it
803+
* again if it can lock the inode.
804+
*/
805+
xfs_trans_brelse(tp, *agi_bpp);
806+
delay(1);
807+
goto again;
808+
}
809+
if (error)
810+
return error;
811+
812+
/* We got the inode, so we can release the AGI. */
813+
ASSERT(*ipp != NULL);
814+
xfs_trans_brelse(tp, *agi_bpp);
815+
*agi_bpp = NULL;
816+
return 0;
817+
}
818+
819+
/* Install an inode that we opened by handle for scrubbing. */
820+
static int
821+
xchk_install_handle_inode(
822+
struct xfs_scrub *sc,
823+
struct xfs_inode *ip)
824+
{
825+
if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
826+
xchk_irele(sc, ip);
827+
return -ENOENT;
828+
}
829+
830+
sc->ip = ip;
831+
return 0;
832+
}
833+
731834
/*
732835
* Given an inode and the scrub control structure, grab either the
733836
* inode referenced in the control structure or the inode passed in.
@@ -740,64 +843,112 @@ xchk_get_inode(
740843
struct xfs_imap imap;
741844
struct xfs_mount *mp = sc->mp;
742845
struct xfs_perag *pag;
846+
struct xfs_buf *agi_bp;
743847
struct xfs_inode *ip_in = XFS_I(file_inode(sc->file));
744848
struct xfs_inode *ip = NULL;
849+
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino);
745850
int error;
746851

852+
ASSERT(sc->tp == NULL);
853+
747854
/* We want to scan the inode we already had opened. */
748855
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
749856
sc->ip = ip_in;
750857
return 0;
751858
}
752859

753-
/* Look up the inode, see if the generation number matches. */
860+
/* Reject internal metadata files and obviously bad inode numbers. */
754861
if (xfs_internal_inum(mp, sc->sm->sm_ino))
755862
return -ENOENT;
863+
if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
864+
return -ENOENT;
865+
866+
/* Try a regular untrusted iget. */
756867
error = xchk_iget(sc, sc->sm->sm_ino, &ip);
757-
switch (error) {
758-
case -ENOENT:
759-
/* Inode doesn't exist, just bail out. */
760-
return error;
761-
case 0:
762-
/* Got an inode, continue. */
763-
break;
764-
case -EINVAL:
765-
/*
766-
* -EINVAL with IGET_UNTRUSTED could mean one of several
767-
* things: userspace gave us an inode number that doesn't
768-
* correspond to fs space, or doesn't have an inobt entry;
769-
* or it could simply mean that the inode buffer failed the
770-
* read verifiers.
771-
*
772-
* Try just the inode mapping lookup -- if it succeeds, then
773-
* the inode buffer verifier failed and something needs fixing.
774-
* Otherwise, we really couldn't find it so tell userspace
775-
* that it no longer exists.
776-
*/
777-
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
778-
if (pag) {
779-
error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
780-
XFS_IGET_UNTRUSTED);
781-
xfs_perag_put(pag);
782-
if (error)
783-
return -ENOENT;
784-
}
785-
error = -EFSCORRUPTED;
786-
fallthrough;
787-
default:
788-
trace_xchk_op_error(sc,
789-
XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
790-
XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
791-
error, __return_address);
868+
if (!error)
869+
return xchk_install_handle_inode(sc, ip);
870+
if (error == -ENOENT)
792871
return error;
872+
if (error != -EINVAL)
873+
goto out_error;
874+
875+
/*
876+
* EINVAL with IGET_UNTRUSTED probably means one of several things:
877+
* userspace gave us an inode number that doesn't correspond to fs
878+
* space; the inode btree lacks a record for this inode; or there is a
879+
* record, and it says this inode is free.
880+
*
881+
* We want to look up this inode in the inobt to distinguish two
882+
* scenarios: (1) the inobt says the inode is free, in which case
883+
* there's nothing to do; and (2) the inobt says the inode is
884+
* allocated, but loading it failed due to corruption.
885+
*
886+
* Allocate a transaction and grab the AGI to prevent inobt activity
887+
* in this AG. Retry the iget in case someone allocated a new inode
888+
* after the first iget failed.
889+
*/
890+
error = xchk_trans_alloc(sc, 0);
891+
if (error)
892+
goto out_error;
893+
894+
error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip);
895+
if (error == 0) {
896+
/* Actually got the inode, so install it. */
897+
xchk_trans_cancel(sc);
898+
return xchk_install_handle_inode(sc, ip);
793899
}
794-
if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
795-
xchk_irele(sc, ip);
796-
return -ENOENT;
900+
if (error == -ENOENT)
901+
goto out_gone;
902+
if (error != -EINVAL)
903+
goto out_cancel;
904+
905+
/* Ensure that we have protected against inode allocation/freeing. */
906+
if (agi_bp == NULL) {
907+
ASSERT(agi_bp != NULL);
908+
error = -ECANCELED;
909+
goto out_cancel;
797910
}
798911

799-
sc->ip = ip;
800-
return 0;
912+
/*
913+
* Untrusted iget failed a second time. Let's try an inobt lookup.
914+
* If the inobt thinks this the inode neither can exist inside the
915+
* filesystem nor is allocated, return ENOENT to signal that the check
916+
* can be skipped.
917+
*
918+
* If the lookup returns corruption, we'll mark this inode corrupt and
919+
* exit to userspace. There's little chance of fixing anything until
920+
* the inobt is straightened out, but there's nothing we can do here.
921+
*
922+
* If the lookup encounters any other error, exit to userspace.
923+
*
924+
* If the lookup succeeds, something else must be very wrong in the fs
925+
* such that setting up the incore inode failed in some strange way.
926+
* Treat those as corruptions.
927+
*/
928+
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
929+
if (!pag) {
930+
error = -EFSCORRUPTED;
931+
goto out_cancel;
932+
}
933+
934+
error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
935+
XFS_IGET_UNTRUSTED);
936+
xfs_perag_put(pag);
937+
if (error == -EINVAL || error == -ENOENT)
938+
goto out_gone;
939+
if (!error)
940+
error = -EFSCORRUPTED;
941+
942+
out_cancel:
943+
xchk_trans_cancel(sc);
944+
out_error:
945+
trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
946+
error, __return_address);
947+
return error;
948+
out_gone:
949+
/* The file is gone, so there's nothing to check. */
950+
xchk_trans_cancel(sc);
951+
return -ENOENT;
801952
}
802953

803954
/* Release an inode, possibly dropping it in the process. */

fs/xfs/scrub/common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ xchk_should_terminate(
3232
}
3333

3434
int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks);
35+
void xchk_trans_cancel(struct xfs_scrub *sc);
36+
3537
bool xchk_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno,
3638
xfs_agblock_t bno, int *error);
3739
bool xchk_fblock_process_error(struct xfs_scrub *sc, int whichfork,
@@ -138,6 +140,8 @@ int xchk_setup_inode_contents(struct xfs_scrub *sc, unsigned int resblks);
138140
void xchk_buffer_recheck(struct xfs_scrub *sc, struct xfs_buf *bp);
139141

140142
int xchk_iget(struct xfs_scrub *sc, xfs_ino_t inum, struct xfs_inode **ipp);
143+
int xchk_iget_agi(struct xfs_scrub *sc, xfs_ino_t inum,
144+
struct xfs_buf **agi_bpp, struct xfs_inode **ipp);
141145
void xchk_irele(struct xfs_scrub *sc, struct xfs_inode *ip);
142146

143147
/*

fs/xfs/xfs_icache.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -767,7 +767,8 @@ xfs_iget(
767767
return 0;
768768

769769
out_error_or_again:
770-
if (!(flags & XFS_IGET_INCORE) && error == -EAGAIN) {
770+
if (!(flags & (XFS_IGET_INCORE | XFS_IGET_NORETRY)) &&
771+
error == -EAGAIN) {
771772
delay(1);
772773
goto again;
773774
}

fs/xfs/xfs_icache.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,13 @@ struct xfs_icwalk {
3434
/*
3535
* Flags for xfs_iget()
3636
*/
37-
#define XFS_IGET_CREATE 0x1
38-
#define XFS_IGET_UNTRUSTED 0x2
39-
#define XFS_IGET_DONTCACHE 0x4
40-
#define XFS_IGET_INCORE 0x8 /* don't read from disk or reinit */
37+
#define XFS_IGET_CREATE (1U << 0)
38+
#define XFS_IGET_UNTRUSTED (1U << 1)
39+
#define XFS_IGET_DONTCACHE (1U << 2)
40+
/* don't read from disk or reinit */
41+
#define XFS_IGET_INCORE (1U << 3)
42+
/* Return -EAGAIN immediately if the inode is unavailable. */
43+
#define XFS_IGET_NORETRY (1U << 4)
4144

4245
int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
4346
uint flags, uint lock_flags, xfs_inode_t **ipp);

0 commit comments

Comments
 (0)