Skip to content

Commit 1e79123

Browse files
dchinnerdgchinner
authored andcommitted
Merge tag 'scrub-iget-fixes-6.4_2023-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into guilt/xfs-for-next
xfs: fix iget/irele usage in online fsck [v24.5] This patchset fixes a handful of problems relating to how we get and release incore inodes in the online scrub code. The first patch fixes how we handle DONTCACHE -- our reasons for setting (or clearing it) depend entirely on the runtime environment at irele time. Hence we can refactor iget and irele to use our own wrappers that set that context appropriately. The second patch fixes a race between the iget call in the inode core scrubber and other writer threads that are allocating or freeing inodes in the same AG by changing the behavior of xchk_iget (and the inode core scrub setup function) to return either an incore inode or the AGI buffer so that we can be sure that the inode cannot disappear on us. The final patch elides MMAPLOCK from scrub paths when possible. It did not fit anywhere else. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Dave Chinner <david@fromorbit.com>
2 parents a446672 + 1fc7a05 commit 1e79123

9 files changed

Lines changed: 438 additions & 101 deletions

File tree

fs/xfs/scrub/bmap.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,12 @@ xchk_setup_inode_bmap(
3434
if (xchk_need_intent_drain(sc))
3535
xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
3636

37-
error = xchk_get_inode(sc);
37+
error = xchk_iget_for_scrubbing(sc);
3838
if (error)
3939
goto out;
4040

41-
sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
42-
xfs_ilock(sc->ip, sc->ilock_flags);
41+
sc->ilock_flags = XFS_IOLOCK_EXCL;
42+
xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
4343

4444
/*
4545
* We don't want any ephemeral data fork updates sitting around
@@ -50,6 +50,9 @@ xchk_setup_inode_bmap(
5050
sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
5151
struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
5252

53+
sc->ilock_flags |= XFS_MMAPLOCK_EXCL;
54+
xfs_ilock(sc->ip, XFS_MMAPLOCK_EXCL);
55+
5356
inode_dio_wait(VFS_I(sc->ip));
5457

5558
/*

fs/xfs/scrub/common.c

Lines changed: 251 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,14 @@ xchk_ag_init(
643643

644644
/* Per-scrubber setup functions */
645645

646+
void
647+
xchk_trans_cancel(
648+
struct xfs_scrub *sc)
649+
{
650+
xfs_trans_cancel(sc->tp);
651+
sc->tp = NULL;
652+
}
653+
646654
/*
647655
* Grab an empty transaction so that we can re-grab locked buffers if
648656
* one of our btrees turns out to be cyclic.
@@ -718,94 +726,288 @@ xchk_checkpoint_log(
718726
return 0;
719727
}
720728

729+
/* Verify that an inode is allocated ondisk, then return its cached inode. */
730+
int
731+
xchk_iget(
732+
struct xfs_scrub *sc,
733+
xfs_ino_t inum,
734+
struct xfs_inode **ipp)
735+
{
736+
return xfs_iget(sc->mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, ipp);
737+
}
738+
739+
/*
740+
* Try to grab an inode in a manner that avoids races with physical inode
741+
* allocation. If we can't, return the locked AGI buffer so that the caller
742+
* can single-step the loading process to see where things went wrong.
743+
* Callers must have a valid scrub transaction.
744+
*
745+
* If the iget succeeds, return 0, a NULL AGI, and the inode.
746+
*
747+
* If the iget fails, return the error, the locked AGI, and a NULL inode. This
748+
* can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are
749+
* no longer allocated; or any other corruption or runtime error.
750+
*
751+
* If the AGI read fails, return the error, a NULL AGI, and NULL inode.
752+
*
753+
* If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode.
754+
*/
755+
int
756+
xchk_iget_agi(
757+
struct xfs_scrub *sc,
758+
xfs_ino_t inum,
759+
struct xfs_buf **agi_bpp,
760+
struct xfs_inode **ipp)
761+
{
762+
struct xfs_mount *mp = sc->mp;
763+
struct xfs_trans *tp = sc->tp;
764+
struct xfs_perag *pag;
765+
int error;
766+
767+
ASSERT(sc->tp != NULL);
768+
769+
again:
770+
*agi_bpp = NULL;
771+
*ipp = NULL;
772+
error = 0;
773+
774+
if (xchk_should_terminate(sc, &error))
775+
return error;
776+
777+
/*
778+
* Attach the AGI buffer to the scrub transaction to avoid deadlocks
779+
* in the iget cache miss path.
780+
*/
781+
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
782+
error = xfs_ialloc_read_agi(pag, tp, agi_bpp);
783+
xfs_perag_put(pag);
784+
if (error)
785+
return error;
786+
787+
error = xfs_iget(mp, tp, inum,
788+
XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED, 0, ipp);
789+
if (error == -EAGAIN) {
790+
/*
791+
* The inode may be in core but temporarily unavailable and may
792+
* require the AGI buffer before it can be returned. Drop the
793+
* AGI buffer and retry the lookup.
794+
*
795+
* Incore lookup will fail with EAGAIN on a cache hit if the
796+
* inode is queued to the inactivation list. The inactivation
797+
* worker may remove the inode from the unlinked list and hence
798+
* needs the AGI.
799+
*
800+
* Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN
801+
* to allow inodegc to make progress and move the inode to
802+
* IRECLAIMABLE state where xfs_iget will be able to return it
803+
* again if it can lock the inode.
804+
*/
805+
xfs_trans_brelse(tp, *agi_bpp);
806+
delay(1);
807+
goto again;
808+
}
809+
if (error)
810+
return error;
811+
812+
/* We got the inode, so we can release the AGI. */
813+
ASSERT(*ipp != NULL);
814+
xfs_trans_brelse(tp, *agi_bpp);
815+
*agi_bpp = NULL;
816+
return 0;
817+
}
818+
819+
/* Install an inode that we opened by handle for scrubbing. */
820+
int
821+
xchk_install_handle_inode(
822+
struct xfs_scrub *sc,
823+
struct xfs_inode *ip)
824+
{
825+
if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
826+
xchk_irele(sc, ip);
827+
return -ENOENT;
828+
}
829+
830+
sc->ip = ip;
831+
return 0;
832+
}
833+
721834
/*
722-
* Given an inode and the scrub control structure, grab either the
723-
* inode referenced in the control structure or the inode passed in.
724-
* The inode is not locked.
835+
* In preparation to scrub metadata structures that hang off of an inode,
836+
* grab either the inode referenced in the scrub control structure or the
837+
* inode passed in. If the inumber does not reference an allocated inode
838+
* record, the function returns ENOENT to end the scrub early. The inode
839+
* is not locked.
725840
*/
726841
int
727-
xchk_get_inode(
842+
xchk_iget_for_scrubbing(
728843
struct xfs_scrub *sc)
729844
{
730845
struct xfs_imap imap;
731846
struct xfs_mount *mp = sc->mp;
732847
struct xfs_perag *pag;
848+
struct xfs_buf *agi_bp;
733849
struct xfs_inode *ip_in = XFS_I(file_inode(sc->file));
734850
struct xfs_inode *ip = NULL;
851+
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino);
735852
int error;
736853

854+
ASSERT(sc->tp == NULL);
855+
737856
/* We want to scan the inode we already had opened. */
738857
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
739858
sc->ip = ip_in;
740859
return 0;
741860
}
742861

743-
/* Look up the inode, see if the generation number matches. */
862+
/* Reject internal metadata files and obviously bad inode numbers. */
744863
if (xfs_internal_inum(mp, sc->sm->sm_ino))
745864
return -ENOENT;
746-
error = xfs_iget(mp, NULL, sc->sm->sm_ino,
747-
XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip);
748-
switch (error) {
749-
case -ENOENT:
750-
/* Inode doesn't exist, just bail out. */
865+
if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
866+
return -ENOENT;
867+
868+
/* Try a regular untrusted iget. */
869+
error = xchk_iget(sc, sc->sm->sm_ino, &ip);
870+
if (!error)
871+
return xchk_install_handle_inode(sc, ip);
872+
if (error == -ENOENT)
751873
return error;
752-
case 0:
753-
/* Got an inode, continue. */
754-
break;
755-
case -EINVAL:
874+
if (error != -EINVAL)
875+
goto out_error;
876+
877+
/*
878+
* EINVAL with IGET_UNTRUSTED probably means one of several things:
879+
* userspace gave us an inode number that doesn't correspond to fs
880+
* space; the inode btree lacks a record for this inode; or there is a
881+
* record, and it says this inode is free.
882+
*
883+
* We want to look up this inode in the inobt to distinguish two
884+
* scenarios: (1) the inobt says the inode is free, in which case
885+
* there's nothing to do; and (2) the inobt says the inode is
886+
* allocated, but loading it failed due to corruption.
887+
*
888+
* Allocate a transaction and grab the AGI to prevent inobt activity
889+
* in this AG. Retry the iget in case someone allocated a new inode
890+
* after the first iget failed.
891+
*/
892+
error = xchk_trans_alloc(sc, 0);
893+
if (error)
894+
goto out_error;
895+
896+
error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip);
897+
if (error == 0) {
898+
/* Actually got the inode, so install it. */
899+
xchk_trans_cancel(sc);
900+
return xchk_install_handle_inode(sc, ip);
901+
}
902+
if (error == -ENOENT)
903+
goto out_gone;
904+
if (error != -EINVAL)
905+
goto out_cancel;
906+
907+
/* Ensure that we have protected against inode allocation/freeing. */
908+
if (agi_bp == NULL) {
909+
ASSERT(agi_bp != NULL);
910+
error = -ECANCELED;
911+
goto out_cancel;
912+
}
913+
914+
/*
915+
* Untrusted iget failed a second time. Let's try an inobt lookup.
916+
* If the inobt thinks this the inode neither can exist inside the
917+
* filesystem nor is allocated, return ENOENT to signal that the check
918+
* can be skipped.
919+
*
920+
* If the lookup returns corruption, we'll mark this inode corrupt and
921+
* exit to userspace. There's little chance of fixing anything until
922+
* the inobt is straightened out, but there's nothing we can do here.
923+
*
924+
* If the lookup encounters any other error, exit to userspace.
925+
*
926+
* If the lookup succeeds, something else must be very wrong in the fs
927+
* such that setting up the incore inode failed in some strange way.
928+
* Treat those as corruptions.
929+
*/
930+
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
931+
if (!pag) {
932+
error = -EFSCORRUPTED;
933+
goto out_cancel;
934+
}
935+
936+
error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
937+
XFS_IGET_UNTRUSTED);
938+
xfs_perag_put(pag);
939+
if (error == -EINVAL || error == -ENOENT)
940+
goto out_gone;
941+
if (!error)
942+
error = -EFSCORRUPTED;
943+
944+
out_cancel:
945+
xchk_trans_cancel(sc);
946+
out_error:
947+
trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
948+
error, __return_address);
949+
return error;
950+
out_gone:
951+
/* The file is gone, so there's nothing to check. */
952+
xchk_trans_cancel(sc);
953+
return -ENOENT;
954+
}
955+
956+
/* Release an inode, possibly dropping it in the process. */
957+
void
958+
xchk_irele(
959+
struct xfs_scrub *sc,
960+
struct xfs_inode *ip)
961+
{
962+
if (current->journal_info != NULL) {
963+
ASSERT(current->journal_info == sc->tp);
964+
756965
/*
757-
* -EINVAL with IGET_UNTRUSTED could mean one of several
758-
* things: userspace gave us an inode number that doesn't
759-
* correspond to fs space, or doesn't have an inobt entry;
760-
* or it could simply mean that the inode buffer failed the
761-
* read verifiers.
966+
* If we are in a transaction, we /cannot/ drop the inode
967+
* ourselves, because the VFS will trigger writeback, which
968+
* can require a transaction. Clear DONTCACHE to force the
969+
* inode to the LRU, where someone else can take care of
970+
* dropping it.
762971
*
763-
* Try just the inode mapping lookup -- if it succeeds, then
764-
* the inode buffer verifier failed and something needs fixing.
765-
* Otherwise, we really couldn't find it so tell userspace
766-
* that it no longer exists.
972+
* Note that when we grabbed our reference to the inode, it
973+
* could have had an active ref and DONTCACHE set if a sysadmin
974+
* is trying to coerce a change in file access mode. icache
975+
* hits do not clear DONTCACHE, so we must do it here.
767976
*/
768-
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
769-
if (pag) {
770-
error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
771-
XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE);
772-
xfs_perag_put(pag);
773-
if (error)
774-
return -ENOENT;
775-
}
776-
error = -EFSCORRUPTED;
777-
fallthrough;
778-
default:
779-
trace_xchk_op_error(sc,
780-
XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
781-
XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
782-
error, __return_address);
783-
return error;
784-
}
785-
if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
786-
xfs_irele(ip);
787-
return -ENOENT;
977+
spin_lock(&VFS_I(ip)->i_lock);
978+
VFS_I(ip)->i_state &= ~I_DONTCACHE;
979+
spin_unlock(&VFS_I(ip)->i_lock);
980+
} else if (atomic_read(&VFS_I(ip)->i_count) == 1) {
981+
/*
982+
* If this is the last reference to the inode and the caller
983+
* permits it, set DONTCACHE to avoid thrashing.
984+
*/
985+
d_mark_dontcache(VFS_I(ip));
788986
}
789987

790-
sc->ip = ip;
791-
return 0;
988+
xfs_irele(ip);
792989
}
793990

794-
/* Set us up to scrub a file's contents. */
991+
/*
992+
* Set us up to scrub metadata mapped by a file's fork. Callers must not use
993+
* this to operate on user-accessible regular file data because the MMAPLOCK is
994+
* not taken.
995+
*/
795996
int
796997
xchk_setup_inode_contents(
797998
struct xfs_scrub *sc,
798999
unsigned int resblks)
7991000
{
8001001
int error;
8011002

802-
error = xchk_get_inode(sc);
1003+
error = xchk_iget_for_scrubbing(sc);
8031004
if (error)
8041005
return error;
8051006

806-
/* Got the inode, lock it and we're ready to go. */
807-
sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
1007+
/* Lock the inode so the VFS cannot touch this file. */
1008+
sc->ilock_flags = XFS_IOLOCK_EXCL;
8081009
xfs_ilock(sc->ip, sc->ilock_flags);
1010+
8091011
error = xchk_trans_alloc(sc, resblks);
8101012
if (error)
8111013
goto out;

0 commit comments

Comments
 (0)