@@ -643,6 +643,14 @@ xchk_ag_init(
643643
644644/* Per-scrubber setup functions */
645645
646+ void
647+ xchk_trans_cancel (
648+ struct xfs_scrub * sc )
649+ {
650+ xfs_trans_cancel (sc -> tp );
651+ sc -> tp = NULL ;
652+ }
653+
646654/*
647655 * Grab an empty transaction so that we can re-grab locked buffers if
648656 * one of our btrees turns out to be cyclic.
@@ -728,6 +736,101 @@ xchk_iget(
728736 return xfs_iget (sc -> mp , sc -> tp , inum , XFS_IGET_UNTRUSTED , 0 , ipp );
729737}
730738
739+ /*
740+ * Try to grab an inode in a manner that avoids races with physical inode
741+ * allocation. If we can't, return the locked AGI buffer so that the caller
742+ * can single-step the loading process to see where things went wrong.
743+ * Callers must have a valid scrub transaction.
744+ *
745+ * If the iget succeeds, return 0, a NULL AGI, and the inode.
746+ *
747+ * If the iget fails, return the error, the locked AGI, and a NULL inode. This
748+ * can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are
749+ * no longer allocated; or any other corruption or runtime error.
750+ *
751+ * If the AGI read fails, return the error, a NULL AGI, and NULL inode.
752+ *
753+ * If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode.
754+ */
755+ int
756+ xchk_iget_agi (
757+ struct xfs_scrub * sc ,
758+ xfs_ino_t inum ,
759+ struct xfs_buf * * agi_bpp ,
760+ struct xfs_inode * * ipp )
761+ {
762+ struct xfs_mount * mp = sc -> mp ;
763+ struct xfs_trans * tp = sc -> tp ;
764+ struct xfs_perag * pag ;
765+ int error ;
766+
767+ ASSERT (sc -> tp != NULL );
768+
769+ again :
770+ * agi_bpp = NULL ;
771+ * ipp = NULL ;
772+ error = 0 ;
773+
774+ if (xchk_should_terminate (sc , & error ))
775+ return error ;
776+
777+ /*
778+ * Attach the AGI buffer to the scrub transaction to avoid deadlocks
779+ * in the iget cache miss path.
780+ */
781+ pag = xfs_perag_get (mp , XFS_INO_TO_AGNO (mp , inum ));
782+ error = xfs_ialloc_read_agi (pag , tp , agi_bpp );
783+ xfs_perag_put (pag );
784+ if (error )
785+ return error ;
786+
787+ error = xfs_iget (mp , tp , inum ,
788+ XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED , 0 , ipp );
789+ if (error == - EAGAIN ) {
790+ /*
791+ * The inode may be in core but temporarily unavailable and may
792+ * require the AGI buffer before it can be returned. Drop the
793+ * AGI buffer and retry the lookup.
794+ *
795+ * Incore lookup will fail with EAGAIN on a cache hit if the
796+ * inode is queued to the inactivation list. The inactivation
797+ * worker may remove the inode from the unlinked list and hence
798+ * needs the AGI.
799+ *
800+ * Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN
801+ * to allow inodegc to make progress and move the inode to
802+ * IRECLAIMABLE state where xfs_iget will be able to return it
803+ * again if it can lock the inode.
804+ */
805+ xfs_trans_brelse (tp , * agi_bpp );
806+ delay (1 );
807+ goto again ;
808+ }
809+ if (error )
810+ return error ;
811+
812+ /* We got the inode, so we can release the AGI. */
813+ ASSERT (* ipp != NULL );
814+ xfs_trans_brelse (tp , * agi_bpp );
815+ * agi_bpp = NULL ;
816+ return 0 ;
817+ }
818+
819+ /* Install an inode that we opened by handle for scrubbing. */
820+ static int
821+ xchk_install_handle_inode (
822+ struct xfs_scrub * sc ,
823+ struct xfs_inode * ip )
824+ {
825+ if (VFS_I (ip )-> i_generation != sc -> sm -> sm_gen ) {
826+ xchk_irele (sc , ip );
827+ return - ENOENT ;
828+ }
829+
830+ sc -> ip = ip ;
831+ return 0 ;
832+ }
833+
731834/*
732835 * Given an inode and the scrub control structure, grab either the
733836 * inode referenced in the control structure or the inode passed in.
@@ -740,64 +843,112 @@ xchk_get_inode(
740843 struct xfs_imap imap ;
741844 struct xfs_mount * mp = sc -> mp ;
742845 struct xfs_perag * pag ;
846+ struct xfs_buf * agi_bp ;
743847 struct xfs_inode * ip_in = XFS_I (file_inode (sc -> file ));
744848 struct xfs_inode * ip = NULL ;
849+ xfs_agnumber_t agno = XFS_INO_TO_AGNO (mp , sc -> sm -> sm_ino );
745850 int error ;
746851
852+ ASSERT (sc -> tp == NULL );
853+
747854 /* We want to scan the inode we already had opened. */
748855 if (sc -> sm -> sm_ino == 0 || sc -> sm -> sm_ino == ip_in -> i_ino ) {
749856 sc -> ip = ip_in ;
750857 return 0 ;
751858 }
752859
753- /* Look up the inode, see if the generation number matches . */
860+ /* Reject internal metadata files and obviously bad inode numbers . */
754861 if (xfs_internal_inum (mp , sc -> sm -> sm_ino ))
755862 return - ENOENT ;
863+ if (!xfs_verify_ino (sc -> mp , sc -> sm -> sm_ino ))
864+ return - ENOENT ;
865+
866+ /* Try a regular untrusted iget. */
756867 error = xchk_iget (sc , sc -> sm -> sm_ino , & ip );
757- switch (error ) {
758- case - ENOENT :
759- /* Inode doesn't exist, just bail out. */
760- return error ;
761- case 0 :
762- /* Got an inode, continue. */
763- break ;
764- case - EINVAL :
765- /*
766- * -EINVAL with IGET_UNTRUSTED could mean one of several
767- * things: userspace gave us an inode number that doesn't
768- * correspond to fs space, or doesn't have an inobt entry;
769- * or it could simply mean that the inode buffer failed the
770- * read verifiers.
771- *
772- * Try just the inode mapping lookup -- if it succeeds, then
773- * the inode buffer verifier failed and something needs fixing.
774- * Otherwise, we really couldn't find it so tell userspace
775- * that it no longer exists.
776- */
777- pag = xfs_perag_get (mp , XFS_INO_TO_AGNO (mp , sc -> sm -> sm_ino ));
778- if (pag ) {
779- error = xfs_imap (pag , sc -> tp , sc -> sm -> sm_ino , & imap ,
780- XFS_IGET_UNTRUSTED );
781- xfs_perag_put (pag );
782- if (error )
783- return - ENOENT ;
784- }
785- error = - EFSCORRUPTED ;
786- fallthrough ;
787- default :
788- trace_xchk_op_error (sc ,
789- XFS_INO_TO_AGNO (mp , sc -> sm -> sm_ino ),
790- XFS_INO_TO_AGBNO (mp , sc -> sm -> sm_ino ),
791- error , __return_address );
868+ if (!error )
869+ return xchk_install_handle_inode (sc , ip );
870+ if (error == - ENOENT )
792871 return error ;
872+ if (error != - EINVAL )
873+ goto out_error ;
874+
875+ /*
876+ * EINVAL with IGET_UNTRUSTED probably means one of several things:
877+ * userspace gave us an inode number that doesn't correspond to fs
878+ * space; the inode btree lacks a record for this inode; or there is a
879+ * record, and it says this inode is free.
880+ *
881+ * We want to look up this inode in the inobt to distinguish two
882+ * scenarios: (1) the inobt says the inode is free, in which case
883+ * there's nothing to do; and (2) the inobt says the inode is
884+ * allocated, but loading it failed due to corruption.
885+ *
886+ * Allocate a transaction and grab the AGI to prevent inobt activity
887+ * in this AG. Retry the iget in case someone allocated a new inode
888+ * after the first iget failed.
889+ */
890+ error = xchk_trans_alloc (sc , 0 );
891+ if (error )
892+ goto out_error ;
893+
894+ error = xchk_iget_agi (sc , sc -> sm -> sm_ino , & agi_bp , & ip );
895+ if (error == 0 ) {
896+ /* Actually got the inode, so install it. */
897+ xchk_trans_cancel (sc );
898+ return xchk_install_handle_inode (sc , ip );
793899 }
794- if (VFS_I (ip )-> i_generation != sc -> sm -> sm_gen ) {
795- xchk_irele (sc , ip );
796- return - ENOENT ;
900+ if (error == - ENOENT )
901+ goto out_gone ;
902+ if (error != - EINVAL )
903+ goto out_cancel ;
904+
905+ /* Ensure that we have protected against inode allocation/freeing. */
906+ if (agi_bp == NULL ) {
907+ ASSERT (agi_bp != NULL );
908+ error = - ECANCELED ;
909+ goto out_cancel ;
797910 }
798911
799- sc -> ip = ip ;
800- return 0 ;
912+ /*
913+ * Untrusted iget failed a second time. Let's try an inobt lookup.
914+ * If the inobt thinks this the inode neither can exist inside the
915+ * filesystem nor is allocated, return ENOENT to signal that the check
916+ * can be skipped.
917+ *
918+ * If the lookup returns corruption, we'll mark this inode corrupt and
919+ * exit to userspace. There's little chance of fixing anything until
920+ * the inobt is straightened out, but there's nothing we can do here.
921+ *
922+ * If the lookup encounters any other error, exit to userspace.
923+ *
924+ * If the lookup succeeds, something else must be very wrong in the fs
925+ * such that setting up the incore inode failed in some strange way.
926+ * Treat those as corruptions.
927+ */
928+ pag = xfs_perag_get (mp , XFS_INO_TO_AGNO (mp , sc -> sm -> sm_ino ));
929+ if (!pag ) {
930+ error = - EFSCORRUPTED ;
931+ goto out_cancel ;
932+ }
933+
934+ error = xfs_imap (pag , sc -> tp , sc -> sm -> sm_ino , & imap ,
935+ XFS_IGET_UNTRUSTED );
936+ xfs_perag_put (pag );
937+ if (error == - EINVAL || error == - ENOENT )
938+ goto out_gone ;
939+ if (!error )
940+ error = - EFSCORRUPTED ;
941+
942+ out_cancel :
943+ xchk_trans_cancel (sc );
944+ out_error :
945+ trace_xchk_op_error (sc , agno , XFS_INO_TO_AGBNO (mp , sc -> sm -> sm_ino ),
946+ error , __return_address );
947+ return error ;
948+ out_gone :
949+ /* The file is gone, so there's nothing to check. */
950+ xchk_trans_cancel (sc );
951+ return - ENOENT ;
801952}
802953
803954/* Release an inode, possibly dropping it in the process. */
0 commit comments