@@ -1230,3 +1230,155 @@ xchk_fsgates_enable(
12301230
12311231 sc -> flags |= scrub_fsgates ;
12321232}
1233+
1234+ /*
1235+ * Decide if this is this a cached inode that's also allocated. The caller
1236+ * must hold a reference to an AG and the AGI buffer lock to prevent inodes
1237+ * from being allocated or freed.
1238+ *
1239+ * Look up an inode by number in the given file system. If the inode number
1240+ * is invalid, return -EINVAL. If the inode is not in cache, return -ENODATA.
1241+ * If the inode is being reclaimed, return -ENODATA because we know the inode
1242+ * cache cannot be updating the ondisk metadata.
1243+ *
1244+ * Otherwise, the incore inode is the one we want, and it is either live,
1245+ * somewhere in the inactivation machinery, or reclaimable. The inode is
1246+ * allocated if i_mode is nonzero. In all three cases, the cached inode will
1247+ * be more up to date than the ondisk inode buffer, so we must use the incore
1248+ * i_mode.
1249+ */
1250+ int
1251+ xchk_inode_is_allocated (
1252+ struct xfs_scrub * sc ,
1253+ xfs_agino_t agino ,
1254+ bool * inuse )
1255+ {
1256+ struct xfs_mount * mp = sc -> mp ;
1257+ struct xfs_perag * pag = sc -> sa .pag ;
1258+ xfs_ino_t ino ;
1259+ struct xfs_inode * ip ;
1260+ int error ;
1261+
1262+ /* caller must hold perag reference */
1263+ if (pag == NULL ) {
1264+ ASSERT (pag != NULL );
1265+ return - EINVAL ;
1266+ }
1267+
1268+ /* caller must have AGI buffer */
1269+ if (sc -> sa .agi_bp == NULL ) {
1270+ ASSERT (sc -> sa .agi_bp != NULL );
1271+ return - EINVAL ;
1272+ }
1273+
1274+ /* reject inode numbers outside existing AGs */
1275+ ino = XFS_AGINO_TO_INO (sc -> mp , pag -> pag_agno , agino );
1276+ if (!xfs_verify_ino (mp , ino ))
1277+ return - EINVAL ;
1278+
1279+ error = - ENODATA ;
1280+ rcu_read_lock ();
1281+ ip = radix_tree_lookup (& pag -> pag_ici_root , agino );
1282+ if (!ip ) {
1283+ /* cache miss */
1284+ goto out_rcu ;
1285+ }
1286+
1287+ /*
1288+ * If the inode number doesn't match, the incore inode got reused
1289+ * during an RCU grace period and the radix tree hasn't been updated.
1290+ * This isn't the inode we want.
1291+ */
1292+ spin_lock (& ip -> i_flags_lock );
1293+ if (ip -> i_ino != ino )
1294+ goto out_skip ;
1295+
1296+ trace_xchk_inode_is_allocated (ip );
1297+
1298+ /*
1299+ * We have an incore inode that matches the inode we want, and the
1300+ * caller holds the perag structure and the AGI buffer. Let's check
1301+ * our assumptions below:
1302+ */
1303+
1304+ #ifdef DEBUG
1305+ /*
1306+ * (1) If the incore inode is live (i.e. referenced from the dcache),
1307+ * it will not be INEW, nor will it be in the inactivation or reclaim
1308+ * machinery. The ondisk inode had better be allocated. This is the
1309+ * most trivial case.
1310+ */
1311+ if (!(ip -> i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE |
1312+ XFS_INACTIVATING ))) {
1313+ /* live inode */
1314+ ASSERT (VFS_I (ip )-> i_mode != 0 );
1315+ }
1316+
1317+ /*
1318+ * If the incore inode is INEW, there are several possibilities:
1319+ *
1320+ * (2) For a file that is being created, note that we allocate the
1321+ * ondisk inode before allocating, initializing, and adding the incore
1322+ * inode to the radix tree.
1323+ *
1324+ * (3) If the incore inode is being recycled, the inode has to be
1325+ * allocated because we don't allow freed inodes to be recycled.
1326+ * Recycling doesn't touch i_mode.
1327+ */
1328+ if (ip -> i_flags & XFS_INEW ) {
1329+ /* created on disk already or recycling */
1330+ ASSERT (VFS_I (ip )-> i_mode != 0 );
1331+ }
1332+
1333+ /*
1334+ * (4) If the inode is queued for inactivation (NEED_INACTIVE) but
1335+ * inactivation has not started (!INACTIVATING), it is still allocated.
1336+ */
1337+ if ((ip -> i_flags & XFS_NEED_INACTIVE ) &&
1338+ !(ip -> i_flags & XFS_INACTIVATING )) {
1339+ /* definitely before difree */
1340+ ASSERT (VFS_I (ip )-> i_mode != 0 );
1341+ }
1342+ #endif
1343+
1344+ /*
1345+ * If the incore inode is undergoing inactivation (INACTIVATING), there
1346+ * are two possibilities:
1347+ *
1348+ * (5) It is before the point where it would get freed ondisk, in which
1349+ * case i_mode is still nonzero.
1350+ *
1351+ * (6) It has already been freed, in which case i_mode is zero.
1352+ *
1353+ * We don't take the ILOCK here, but difree and dialloc update the AGI,
1354+ * and we've taken the AGI buffer lock, which prevents that from
1355+ * happening.
1356+ */
1357+
1358+ /*
1359+ * (7) Inodes undergoing inactivation (INACTIVATING) or queued for
1360+ * reclaim (IRECLAIMABLE) could be allocated or free. i_mode still
1361+ * reflects the ondisk state.
1362+ */
1363+
1364+ /*
1365+ * (8) If the inode is in IFLUSHING, it's safe to query i_mode because
1366+ * the flush code uses i_mode to format the ondisk inode.
1367+ */
1368+
1369+ /*
1370+ * (9) If the inode is in IRECLAIM and was reachable via the radix
1371+ * tree, it still has the same i_mode as it did before it entered
1372+ * reclaim. The inode object is still alive because we hold the RCU
1373+ * read lock.
1374+ */
1375+
1376+ * inuse = VFS_I (ip )-> i_mode != 0 ;
1377+ error = 0 ;
1378+
1379+ out_skip :
1380+ spin_unlock (& ip -> i_flags_lock );
1381+ out_rcu :
1382+ rcu_read_unlock ();
1383+ return error ;
1384+ }
0 commit comments