@@ -643,6 +643,14 @@ xchk_ag_init(
643643
644644/* Per-scrubber setup functions */
645645
646+ void
647+ xchk_trans_cancel (
648+ struct xfs_scrub * sc )
649+ {
650+ xfs_trans_cancel (sc -> tp );
651+ sc -> tp = NULL ;
652+ }
653+
646654/*
647655 * Grab an empty transaction so that we can re-grab locked buffers if
648656 * one of our btrees turns out to be cyclic.
@@ -718,94 +726,288 @@ xchk_checkpoint_log(
718726 return 0 ;
719727}
720728
729+ /* Verify that an inode is allocated ondisk, then return its cached inode. */
730+ int
731+ xchk_iget (
732+ struct xfs_scrub * sc ,
733+ xfs_ino_t inum ,
734+ struct xfs_inode * * ipp )
735+ {
736+ return xfs_iget (sc -> mp , sc -> tp , inum , XFS_IGET_UNTRUSTED , 0 , ipp );
737+ }
738+
739+ /*
740+ * Try to grab an inode in a manner that avoids races with physical inode
741+ * allocation. If we can't, return the locked AGI buffer so that the caller
742+ * can single-step the loading process to see where things went wrong.
743+ * Callers must have a valid scrub transaction.
744+ *
745+ * If the iget succeeds, return 0, a NULL AGI, and the inode.
746+ *
747+ * If the iget fails, return the error, the locked AGI, and a NULL inode. This
748+ * can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are
749+ * no longer allocated; or any other corruption or runtime error.
750+ *
751+ * If the AGI read fails, return the error, a NULL AGI, and NULL inode.
752+ *
753+ * If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode.
754+ */
755+ int
756+ xchk_iget_agi (
757+ struct xfs_scrub * sc ,
758+ xfs_ino_t inum ,
759+ struct xfs_buf * * agi_bpp ,
760+ struct xfs_inode * * ipp )
761+ {
762+ struct xfs_mount * mp = sc -> mp ;
763+ struct xfs_trans * tp = sc -> tp ;
764+ struct xfs_perag * pag ;
765+ int error ;
766+
767+ ASSERT (sc -> tp != NULL );
768+
769+ again :
770+ * agi_bpp = NULL ;
771+ * ipp = NULL ;
772+ error = 0 ;
773+
774+ if (xchk_should_terminate (sc , & error ))
775+ return error ;
776+
777+ /*
778+ * Attach the AGI buffer to the scrub transaction to avoid deadlocks
779+ * in the iget cache miss path.
780+ */
781+ pag = xfs_perag_get (mp , XFS_INO_TO_AGNO (mp , inum ));
782+ error = xfs_ialloc_read_agi (pag , tp , agi_bpp );
783+ xfs_perag_put (pag );
784+ if (error )
785+ return error ;
786+
787+ error = xfs_iget (mp , tp , inum ,
788+ XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED , 0 , ipp );
789+ if (error == - EAGAIN ) {
790+ /*
791+ * The inode may be in core but temporarily unavailable and may
792+ * require the AGI buffer before it can be returned. Drop the
793+ * AGI buffer and retry the lookup.
794+ *
795+ * Incore lookup will fail with EAGAIN on a cache hit if the
796+ * inode is queued to the inactivation list. The inactivation
797+ * worker may remove the inode from the unlinked list and hence
798+ * needs the AGI.
799+ *
800+ * Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN
801+ * to allow inodegc to make progress and move the inode to
802+ * IRECLAIMABLE state where xfs_iget will be able to return it
803+ * again if it can lock the inode.
804+ */
805+ xfs_trans_brelse (tp , * agi_bpp );
806+ delay (1 );
807+ goto again ;
808+ }
809+ if (error )
810+ return error ;
811+
812+ /* We got the inode, so we can release the AGI. */
813+ ASSERT (* ipp != NULL );
814+ xfs_trans_brelse (tp , * agi_bpp );
815+ * agi_bpp = NULL ;
816+ return 0 ;
817+ }
818+
819+ /* Install an inode that we opened by handle for scrubbing. */
820+ int
821+ xchk_install_handle_inode (
822+ struct xfs_scrub * sc ,
823+ struct xfs_inode * ip )
824+ {
825+ if (VFS_I (ip )-> i_generation != sc -> sm -> sm_gen ) {
826+ xchk_irele (sc , ip );
827+ return - ENOENT ;
828+ }
829+
830+ sc -> ip = ip ;
831+ return 0 ;
832+ }
833+
721834/*
722- * Given an inode and the scrub control structure, grab either the
723- * inode referenced in the control structure or the inode passed in.
724- * The inode is not locked.
835+ * In preparation to scrub metadata structures that hang off of an inode,
836+ * grab either the inode referenced in the scrub control structure or the
837+ * inode passed in. If the inumber does not reference an allocated inode
838+ * record, the function returns ENOENT to end the scrub early. The inode
839+ * is not locked.
725840 */
726841int
727- xchk_get_inode (
842+ xchk_iget_for_scrubbing (
728843 struct xfs_scrub * sc )
729844{
730845 struct xfs_imap imap ;
731846 struct xfs_mount * mp = sc -> mp ;
732847 struct xfs_perag * pag ;
848+ struct xfs_buf * agi_bp ;
733849 struct xfs_inode * ip_in = XFS_I (file_inode (sc -> file ));
734850 struct xfs_inode * ip = NULL ;
851+ xfs_agnumber_t agno = XFS_INO_TO_AGNO (mp , sc -> sm -> sm_ino );
735852 int error ;
736853
854+ ASSERT (sc -> tp == NULL );
855+
737856 /* We want to scan the inode we already had opened. */
738857 if (sc -> sm -> sm_ino == 0 || sc -> sm -> sm_ino == ip_in -> i_ino ) {
739858 sc -> ip = ip_in ;
740859 return 0 ;
741860 }
742861
743- /* Look up the inode, see if the generation number matches . */
862+ /* Reject internal metadata files and obviously bad inode numbers . */
744863 if (xfs_internal_inum (mp , sc -> sm -> sm_ino ))
745864 return - ENOENT ;
746- error = xfs_iget (mp , NULL , sc -> sm -> sm_ino ,
747- XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE , 0 , & ip );
748- switch (error ) {
749- case - ENOENT :
750- /* Inode doesn't exist, just bail out. */
865+ if (!xfs_verify_ino (sc -> mp , sc -> sm -> sm_ino ))
866+ return - ENOENT ;
867+
868+ /* Try a regular untrusted iget. */
869+ error = xchk_iget (sc , sc -> sm -> sm_ino , & ip );
870+ if (!error )
871+ return xchk_install_handle_inode (sc , ip );
872+ if (error == - ENOENT )
751873 return error ;
752- case 0 :
753- /* Got an inode, continue. */
754- break ;
755- case - EINVAL :
874+ if (error != - EINVAL )
875+ goto out_error ;
876+
877+ /*
878+ * EINVAL with IGET_UNTRUSTED probably means one of several things:
879+ * userspace gave us an inode number that doesn't correspond to fs
880+ * space; the inode btree lacks a record for this inode; or there is a
881+ * record, and it says this inode is free.
882+ *
883+ * We want to look up this inode in the inobt to distinguish two
884+ * scenarios: (1) the inobt says the inode is free, in which case
885+ * there's nothing to do; and (2) the inobt says the inode is
886+ * allocated, but loading it failed due to corruption.
887+ *
888+ * Allocate a transaction and grab the AGI to prevent inobt activity
889+ * in this AG. Retry the iget in case someone allocated a new inode
890+ * after the first iget failed.
891+ */
892+ error = xchk_trans_alloc (sc , 0 );
893+ if (error )
894+ goto out_error ;
895+
896+ error = xchk_iget_agi (sc , sc -> sm -> sm_ino , & agi_bp , & ip );
897+ if (error == 0 ) {
898+ /* Actually got the inode, so install it. */
899+ xchk_trans_cancel (sc );
900+ return xchk_install_handle_inode (sc , ip );
901+ }
902+ if (error == - ENOENT )
903+ goto out_gone ;
904+ if (error != - EINVAL )
905+ goto out_cancel ;
906+
907+ /* Ensure that we have protected against inode allocation/freeing. */
908+ if (agi_bp == NULL ) {
909+ ASSERT (agi_bp != NULL );
910+ error = - ECANCELED ;
911+ goto out_cancel ;
912+ }
913+
914+ /*
915+ * Untrusted iget failed a second time. Let's try an inobt lookup.
916+ * If the inobt thinks this the inode neither can exist inside the
917+ * filesystem nor is allocated, return ENOENT to signal that the check
918+ * can be skipped.
919+ *
920+ * If the lookup returns corruption, we'll mark this inode corrupt and
921+ * exit to userspace. There's little chance of fixing anything until
922+ * the inobt is straightened out, but there's nothing we can do here.
923+ *
924+ * If the lookup encounters any other error, exit to userspace.
925+ *
926+ * If the lookup succeeds, something else must be very wrong in the fs
927+ * such that setting up the incore inode failed in some strange way.
928+ * Treat those as corruptions.
929+ */
930+ pag = xfs_perag_get (mp , XFS_INO_TO_AGNO (mp , sc -> sm -> sm_ino ));
931+ if (!pag ) {
932+ error = - EFSCORRUPTED ;
933+ goto out_cancel ;
934+ }
935+
936+ error = xfs_imap (pag , sc -> tp , sc -> sm -> sm_ino , & imap ,
937+ XFS_IGET_UNTRUSTED );
938+ xfs_perag_put (pag );
939+ if (error == - EINVAL || error == - ENOENT )
940+ goto out_gone ;
941+ if (!error )
942+ error = - EFSCORRUPTED ;
943+
944+ out_cancel :
945+ xchk_trans_cancel (sc );
946+ out_error :
947+ trace_xchk_op_error (sc , agno , XFS_INO_TO_AGBNO (mp , sc -> sm -> sm_ino ),
948+ error , __return_address );
949+ return error ;
950+ out_gone :
951+ /* The file is gone, so there's nothing to check. */
952+ xchk_trans_cancel (sc );
953+ return - ENOENT ;
954+ }
955+
956+ /* Release an inode, possibly dropping it in the process. */
957+ void
958+ xchk_irele (
959+ struct xfs_scrub * sc ,
960+ struct xfs_inode * ip )
961+ {
962+ if (current -> journal_info != NULL ) {
963+ ASSERT (current -> journal_info == sc -> tp );
964+
756965 /*
757- * -EINVAL with IGET_UNTRUSTED could mean one of several
758- * things: userspace gave us an inode number that doesn't
759- * correspond to fs space, or doesn't have an inobt entry;
760- * or it could simply mean that the inode buffer failed the
761- * read verifiers .
966+ * If we are in a transaction, we /cannot/ drop the inode
967+ * ourselves, because the VFS will trigger writeback, which
968+ * can require a transaction. Clear DONTCACHE to force the
969+ * inode to the LRU, where someone else can take care of
970+ * dropping it .
762971 *
763- * Try just the inode mapping lookup -- if it succeeds, then
764- * the inode buffer verifier failed and something needs fixing.
765- * Otherwise, we really couldn't find it so tell userspace
766- * that it no longer exists .
972+ * Note that when we grabbed our reference to the inode, it
973+ * could have had an active ref and DONTCACHE set if a sysadmin
974+ * is trying to coerce a change in file access mode. icache
975+ * hits do not clear DONTCACHE, so we must do it here .
767976 */
768- pag = xfs_perag_get (mp , XFS_INO_TO_AGNO (mp , sc -> sm -> sm_ino ));
769- if (pag ) {
770- error = xfs_imap (pag , sc -> tp , sc -> sm -> sm_ino , & imap ,
771- XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE );
772- xfs_perag_put (pag );
773- if (error )
774- return - ENOENT ;
775- }
776- error = - EFSCORRUPTED ;
777- fallthrough ;
778- default :
779- trace_xchk_op_error (sc ,
780- XFS_INO_TO_AGNO (mp , sc -> sm -> sm_ino ),
781- XFS_INO_TO_AGBNO (mp , sc -> sm -> sm_ino ),
782- error , __return_address );
783- return error ;
784- }
785- if (VFS_I (ip )-> i_generation != sc -> sm -> sm_gen ) {
786- xfs_irele (ip );
787- return - ENOENT ;
977+ spin_lock (& VFS_I (ip )-> i_lock );
978+ VFS_I (ip )-> i_state &= ~I_DONTCACHE ;
979+ spin_unlock (& VFS_I (ip )-> i_lock );
980+ } else if (atomic_read (& VFS_I (ip )-> i_count ) == 1 ) {
981+ /*
982+ * If this is the last reference to the inode and the caller
983+ * permits it, set DONTCACHE to avoid thrashing.
984+ */
985+ d_mark_dontcache (VFS_I (ip ));
788986 }
789987
790- sc -> ip = ip ;
791- return 0 ;
988+ xfs_irele (ip );
792989}
793990
794- /* Set us up to scrub a file's contents. */
991+ /*
992+ * Set us up to scrub metadata mapped by a file's fork. Callers must not use
993+ * this to operate on user-accessible regular file data because the MMAPLOCK is
994+ * not taken.
995+ */
795996int
796997xchk_setup_inode_contents (
797998 struct xfs_scrub * sc ,
798999 unsigned int resblks )
7991000{
8001001 int error ;
8011002
802- error = xchk_get_inode (sc );
1003+ error = xchk_iget_for_scrubbing (sc );
8031004 if (error )
8041005 return error ;
8051006
806- /* Got the inode, lock it and we're ready to go . */
807- sc -> ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL ;
1007+ /* Lock the inode so the VFS cannot touch this file . */
1008+ sc -> ilock_flags = XFS_IOLOCK_EXCL ;
8081009 xfs_ilock (sc -> ip , sc -> ilock_flags );
1010+
8091011 error = xchk_trans_alloc (sc , resblks );
8101012 if (error )
8111013 goto out ;
0 commit comments