Skip to content

Commit 0916056

Browse files
author
Darrick J. Wong
committed
xfs: fix parent pointer scrub racing with subdirectory reparenting
Jan Kara pointed out that rename() doesn't lock a subdirectory that is being moved from one parent to another, even though the move requires an update to the subdirectory's dotdot entry. This means that it's *not* sufficient to hold a directory's IOLOCK to stabilize the dotdot entry. We must hold the ILOCK of both the child and the alleged parent, and there's no use in holding the parent's IOLOCK. With that in mind, we can get rid of all the messy code that tries to grab the parent's IOLOCK, which means we don't need to let go of the ILOCK of the directory whose parent we are checking. We still have to use nonblocking mode to take the ILOCK of the alleged parent, so the revalidation loop has to stay. However, we can remove the retry counter, since threads aren't supposed to hold the ILOCK for long periods of time. Remove the inverted ilock helper from the common code since nobody uses it. Remove the entire source of -EDEADLOCK-based "retry harder" scrub executions. Link: https://lore.kernel.org/linux-xfs/20230117123735.un7wbamlbdihninm@quack3/ Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
1 parent b049962 commit 0916056

3 files changed

Lines changed: 57 additions & 84 deletions

File tree

fs/xfs/scrub/common.c

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -962,28 +962,6 @@ xchk_metadata_inode_forks(
962962
return 0;
963963
}
964964

965-
/*
966-
* Try to lock an inode in violation of the usual locking order rules. For
967-
* example, trying to get the IOLOCK while in transaction context, or just
968-
* plain breaking AG-order or inode-order inode locking rules. Either way,
969-
* the only way to avoid an ABBA deadlock is to use trylock and back off if
970-
* we can't.
971-
*/
972-
int
973-
xchk_ilock_inverted(
974-
struct xfs_inode *ip,
975-
uint lock_mode)
976-
{
977-
int i;
978-
979-
for (i = 0; i < 20; i++) {
980-
if (xfs_ilock_nowait(ip, lock_mode))
981-
return 0;
982-
delay(1);
983-
}
984-
return -EDEADLOCK;
985-
}
986-
987965
/* Pause background reaping of resources. */
988966
void
989967
xchk_stop_reaping(

fs/xfs/scrub/common.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
148148
}
149149

150150
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
151-
int xchk_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
152151
void xchk_stop_reaping(struct xfs_scrub *sc);
153152
void xchk_start_reaping(struct xfs_scrub *sc);
154153

fs/xfs/scrub/parent.c

Lines changed: 57 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,37 @@ xchk_parent_actor(
6464
}
6565

6666
/*
67-
* Given the inode number of the alleged parent of the inode being
68-
* scrubbed, try to validate that the parent has exactly one directory
69-
* entry pointing back to the inode being scrubbed.
67+
* Try to lock a parent directory for checking dirents. Returns the inode
68+
* flags for the locks we now hold, or zero if we failed.
69+
*/
70+
STATIC unsigned int
71+
xchk_parent_ilock_dir(
72+
struct xfs_inode *dp)
73+
{
74+
if (!xfs_ilock_nowait(dp, XFS_ILOCK_SHARED))
75+
return 0;
76+
77+
if (!xfs_need_iread_extents(&dp->i_df))
78+
return XFS_ILOCK_SHARED;
79+
80+
xfs_iunlock(dp, XFS_ILOCK_SHARED);
81+
82+
if (!xfs_ilock_nowait(dp, XFS_ILOCK_EXCL))
83+
return 0;
84+
85+
return XFS_ILOCK_EXCL;
86+
}
87+
88+
/*
89+
* Given the inode number of the alleged parent of the inode being scrubbed,
90+
* try to validate that the parent has exactly one directory entry pointing
91+
* back to the inode being scrubbed. Returns -EAGAIN if we need to revalidate
92+
* the dotdot entry.
7093
*/
7194
STATIC int
7295
xchk_parent_validate(
7396
struct xfs_scrub *sc,
74-
xfs_ino_t parent_ino,
75-
bool *try_again)
97+
xfs_ino_t parent_ino)
7698
{
7799
struct xchk_parent_ctx spc = {
78100
.sc = sc,
@@ -81,23 +103,21 @@ xchk_parent_validate(
81103
struct xfs_mount *mp = sc->mp;
82104
struct xfs_inode *dp = NULL;
83105
xfs_nlink_t expected_nlink;
84-
uint lock_mode;
106+
unsigned int lock_mode;
85107
int error = 0;
86108

87-
*try_again = false;
88-
89109
/* Is this the root dir? Then '..' must point to itself. */
90110
if (sc->ip == mp->m_rootip) {
91111
if (sc->ip->i_ino != mp->m_sb.sb_rootino ||
92112
sc->ip->i_ino != parent_ino)
93113
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
94-
goto out;
114+
return 0;
95115
}
96116

97117
/* '..' must not point to ourselves. */
98118
if (sc->ip->i_ino == parent_ino) {
99119
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
100-
goto out;
120+
return 0;
101121
}
102122

103123
/*
@@ -124,41 +144,39 @@ xchk_parent_validate(
124144
if (error == -EINVAL || error == -ENOENT) {
125145
error = -EFSCORRUPTED;
126146
xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
127-
goto out;
147+
return error;
128148
}
129149
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
130-
goto out;
150+
return error;
131151
if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
132152
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
133153
goto out_rele;
134154
}
135155

136-
/*
137-
* We prefer to keep the inode locked while we lock and search
138-
* its alleged parent for a forward reference. If we can grab
139-
* the iolock, validate the pointers and we're done. We must
140-
* use nowait here to avoid an ABBA deadlock on the parent and
141-
* the child inodes.
142-
*/
143-
if (!xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
144-
*try_again = true;
156+
lock_mode = xchk_parent_ilock_dir(dp);
157+
if (!lock_mode) {
158+
xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
159+
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
160+
error = -EAGAIN;
145161
goto out_rele;
146162
}
147163

148-
lock_mode = xfs_ilock_data_map_shared(dp);
164+
/* Look for a directory entry in the parent pointing to the child. */
149165
error = xchk_dir_walk(sc, dp, xchk_parent_actor, &spc);
150-
xfs_iunlock(dp, lock_mode);
151166
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
152167
goto out_unlock;
153168

169+
/*
170+
* Ensure that the parent has as many links to the child as the child
171+
* thinks it has to the parent.
172+
*/
154173
if (spc.nlink != expected_nlink)
155174
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
156175

157176
out_unlock:
158-
xfs_iunlock(dp, XFS_IOLOCK_SHARED);
177+
xfs_iunlock(dp, lock_mode);
159178
out_rele:
160179
xfs_irele(dp);
161-
out:
162180
return error;
163181
}
164182

@@ -169,8 +187,6 @@ xchk_parent(
169187
{
170188
struct xfs_mount *mp = sc->mp;
171189
xfs_ino_t parent_ino;
172-
bool try_again;
173-
int tries = 0;
174190
int error = 0;
175191

176192
/*
@@ -183,49 +199,29 @@ xchk_parent(
183199
/* We're not a special inode, are we? */
184200
if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) {
185201
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
186-
goto out;
202+
return 0;
187203
}
188204

189-
/*
190-
* The VFS grabs a read or write lock via i_rwsem before it reads
191-
* or writes to a directory. If we've gotten this far we've
192-
* already obtained IOLOCK_EXCL, which (since 4.10) is the same as
193-
* getting a write lock on i_rwsem. Therefore, it is safe for us
194-
* to drop the ILOCK here in order to do directory lookups.
195-
*/
196-
sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
197-
xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
198-
199205
do {
206+
if (xchk_should_terminate(sc, &error))
207+
break;
208+
200209
/* Look up '..' */
201-
error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot,
202-
&parent_ino, NULL);
210+
error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot,
211+
&parent_ino);
203212
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
204-
goto out;
213+
return error;
205214
if (!xfs_verify_dir_ino(mp, parent_ino)) {
206215
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
207-
goto out;
216+
return 0;
208217
}
209218

210-
error = xchk_parent_validate(sc, parent_ino, &try_again);
211-
if (error)
212-
goto out;
213-
} while (try_again && ++tries < 20);
219+
/*
220+
* Check that the dotdot entry points to a parent directory
221+
* containing a dirent pointing to this subdirectory.
222+
*/
223+
error = xchk_parent_validate(sc, parent_ino);
224+
} while (error == -EAGAIN);
214225

215-
/*
216-
* We gave it our best shot but failed, so mark this scrub
217-
* incomplete. Userspace can decide if it wants to try again.
218-
*/
219-
if (try_again && tries == 20)
220-
xchk_set_incomplete(sc);
221-
out:
222-
/*
223-
* If we failed to lock the parent inode even after a retry, just mark
224-
* this scrub incomplete and return.
225-
*/
226-
if ((sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
227-
error = 0;
228-
xchk_set_incomplete(sc);
229-
}
230226
return error;
231227
}

0 commit comments

Comments
 (0)