Skip to content

Commit a446672

Browse files
dchinnerdgchinner
authored andcommitted
Merge tag 'scrub-parent-fixes-6.4_2023-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into guilt/xfs-for-next
xfs: fix bugs in parent pointer checking [v24.5] Jan Kara pointed out that the VFS doesn't take i_rwsem of a child subdirectory that is being moved from one parent to another. Upon deeper analysis, I realized that this was the source of a very hard to trigger false corruption report in the parent pointer checking code. Now that we've refactored how directory walks work in scrub, we can also get rid of all the unnecessary and broken locking to make parent pointer scrubbing work properly. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Dave Chinner <david@fromorbit.com>
2 parents f697c2c + 0916056 commit a446672

3 files changed

Lines changed: 71 additions & 168 deletions

File tree

fs/xfs/scrub/common.c

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -962,28 +962,6 @@ xchk_metadata_inode_forks(
962962
return 0;
963963
}
964964

965-
/*
966-
* Try to lock an inode in violation of the usual locking order rules. For
967-
* example, trying to get the IOLOCK while in transaction context, or just
968-
* plain breaking AG-order or inode-order inode locking rules. Either way,
969-
* the only way to avoid an ABBA deadlock is to use trylock and back off if
970-
* we can't.
971-
*/
972-
int
973-
xchk_ilock_inverted(
974-
struct xfs_inode *ip,
975-
uint lock_mode)
976-
{
977-
int i;
978-
979-
for (i = 0; i < 20; i++) {
980-
if (xfs_ilock_nowait(ip, lock_mode))
981-
return 0;
982-
delay(1);
983-
}
984-
return -EDEADLOCK;
985-
}
986-
987965
/* Pause background reaping of resources. */
988966
void
989967
xchk_stop_reaping(

fs/xfs/scrub/common.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
148148
}
149149

150150
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
151-
int xchk_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
152151
void xchk_stop_reaping(struct xfs_scrub *sc);
153152
void xchk_start_reaping(struct xfs_scrub *sc);
154153

fs/xfs/scrub/parent.c

Lines changed: 71 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -63,56 +63,61 @@ xchk_parent_actor(
6363
return 0;
6464
}
6565

66-
/* Count the number of dentries in the parent dir that point to this inode. */
67-
STATIC int
68-
xchk_parent_count_parent_dentries(
69-
struct xfs_scrub *sc,
70-
struct xfs_inode *parent,
71-
xfs_nlink_t *nlink)
66+
/*
67+
* Try to lock a parent directory for checking dirents. Returns the inode
68+
* flags for the locks we now hold, or zero if we failed.
69+
*/
70+
STATIC unsigned int
71+
xchk_parent_ilock_dir(
72+
struct xfs_inode *dp)
7273
{
73-
struct xchk_parent_ctx spc = {
74-
.sc = sc,
75-
.nlink = 0,
76-
};
77-
uint lock_mode;
78-
int error = 0;
74+
if (!xfs_ilock_nowait(dp, XFS_ILOCK_SHARED))
75+
return 0;
7976

80-
lock_mode = xfs_ilock_data_map_shared(parent);
81-
error = xchk_dir_walk(sc, parent, xchk_parent_actor, &spc);
82-
xfs_iunlock(parent, lock_mode);
83-
if (error)
84-
return error;
77+
if (!xfs_need_iread_extents(&dp->i_df))
78+
return XFS_ILOCK_SHARED;
8579

86-
*nlink = spc.nlink;
87-
return error;
80+
xfs_iunlock(dp, XFS_ILOCK_SHARED);
81+
82+
if (!xfs_ilock_nowait(dp, XFS_ILOCK_EXCL))
83+
return 0;
84+
85+
return XFS_ILOCK_EXCL;
8886
}
8987

9088
/*
91-
* Given the inode number of the alleged parent of the inode being
92-
* scrubbed, try to validate that the parent has exactly one directory
93-
* entry pointing back to the inode being scrubbed.
89+
* Given the inode number of the alleged parent of the inode being scrubbed,
90+
* try to validate that the parent has exactly one directory entry pointing
91+
* back to the inode being scrubbed. Returns -EAGAIN if we need to revalidate
92+
* the dotdot entry.
9493
*/
9594
STATIC int
9695
xchk_parent_validate(
9796
struct xfs_scrub *sc,
98-
xfs_ino_t dnum,
99-
bool *try_again)
97+
xfs_ino_t parent_ino)
10098
{
99+
struct xchk_parent_ctx spc = {
100+
.sc = sc,
101+
.nlink = 0,
102+
};
101103
struct xfs_mount *mp = sc->mp;
102104
struct xfs_inode *dp = NULL;
103105
xfs_nlink_t expected_nlink;
104-
xfs_nlink_t nlink;
106+
unsigned int lock_mode;
105107
int error = 0;
106108

107-
*try_again = false;
108-
109-
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
110-
goto out;
109+
/* Is this the root dir? Then '..' must point to itself. */
110+
if (sc->ip == mp->m_rootip) {
111+
if (sc->ip->i_ino != mp->m_sb.sb_rootino ||
112+
sc->ip->i_ino != parent_ino)
113+
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
114+
return 0;
115+
}
111116

112117
/* '..' must not point to ourselves. */
113-
if (sc->ip->i_ino == dnum) {
118+
if (sc->ip->i_ino == parent_ino) {
114119
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
115-
goto out;
120+
return 0;
116121
}
117122

118123
/*
@@ -135,93 +140,43 @@ xchk_parent_validate(
135140
* -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a
136141
* cross referencing error. Any other error is an operational error.
137142
*/
138-
error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp);
143+
error = xfs_iget(mp, sc->tp, parent_ino, XFS_IGET_UNTRUSTED, 0, &dp);
139144
if (error == -EINVAL || error == -ENOENT) {
140145
error = -EFSCORRUPTED;
141146
xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
142-
goto out;
147+
return error;
143148
}
144149
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
145-
goto out;
150+
return error;
146151
if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
147152
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
148153
goto out_rele;
149154
}
150155

151-
/*
152-
* We prefer to keep the inode locked while we lock and search
153-
* its alleged parent for a forward reference. If we can grab
154-
* the iolock, validate the pointers and we're done. We must
155-
* use nowait here to avoid an ABBA deadlock on the parent and
156-
* the child inodes.
157-
*/
158-
if (xfs_ilock_nowait(dp, XFS_IOLOCK_SHARED)) {
159-
error = xchk_parent_count_parent_dentries(sc, dp, &nlink);
160-
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
161-
&error))
162-
goto out_unlock;
163-
if (nlink != expected_nlink)
164-
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
165-
goto out_unlock;
166-
}
167-
168-
/*
169-
* The game changes if we get here. We failed to lock the parent,
170-
* so we're going to try to verify both pointers while only holding
171-
* one lock so as to avoid deadlocking with something that's actually
172-
* trying to traverse down the directory tree.
173-
*/
174-
xfs_iunlock(sc->ip, sc->ilock_flags);
175-
sc->ilock_flags = 0;
176-
error = xchk_ilock_inverted(dp, XFS_IOLOCK_SHARED);
177-
if (error)
156+
lock_mode = xchk_parent_ilock_dir(dp);
157+
if (!lock_mode) {
158+
xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
159+
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
160+
error = -EAGAIN;
178161
goto out_rele;
162+
}
179163

180-
/* Go looking for our dentry. */
181-
error = xchk_parent_count_parent_dentries(sc, dp, &nlink);
164+
/* Look for a directory entry in the parent pointing to the child. */
165+
error = xchk_dir_walk(sc, dp, xchk_parent_actor, &spc);
182166
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
183167
goto out_unlock;
184168

185-
/* Drop the parent lock, relock this inode. */
186-
xfs_iunlock(dp, XFS_IOLOCK_SHARED);
187-
error = xchk_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL);
188-
if (error)
189-
goto out_rele;
190-
sc->ilock_flags = XFS_IOLOCK_EXCL;
191-
192-
/*
193-
* If we're an unlinked directory, the parent /won't/ have a link
194-
* to us. Otherwise, it should have one link. We have to re-set
195-
* it here because we dropped the lock on sc->ip.
196-
*/
197-
expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
198-
199-
/* Look up '..' to see if the inode changed. */
200-
error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
201-
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
202-
goto out_rele;
203-
204-
/* Drat, parent changed. Try again! */
205-
if (dnum != dp->i_ino) {
206-
xfs_irele(dp);
207-
*try_again = true;
208-
return 0;
209-
}
210-
xfs_irele(dp);
211-
212169
/*
213-
* '..' didn't change, so check that there was only one entry
214-
* for us in the parent.
170+
* Ensure that the parent has as many links to the child as the child
171+
* thinks it has to the parent.
215172
*/
216-
if (nlink != expected_nlink)
173+
if (spc.nlink != expected_nlink)
217174
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
218-
return error;
219175

220176
out_unlock:
221-
xfs_iunlock(dp, XFS_IOLOCK_SHARED);
177+
xfs_iunlock(dp, lock_mode);
222178
out_rele:
223179
xfs_irele(dp);
224-
out:
225180
return error;
226181
}
227182

@@ -231,9 +186,7 @@ xchk_parent(
231186
struct xfs_scrub *sc)
232187
{
233188
struct xfs_mount *mp = sc->mp;
234-
xfs_ino_t dnum;
235-
bool try_again;
236-
int tries = 0;
189+
xfs_ino_t parent_ino;
237190
int error = 0;
238191

239192
/*
@@ -246,56 +199,29 @@ xchk_parent(
246199
/* We're not a special inode, are we? */
247200
if (!xfs_verify_dir_ino(mp, sc->ip->i_ino)) {
248201
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
249-
goto out;
250-
}
251-
252-
/*
253-
* The VFS grabs a read or write lock via i_rwsem before it reads
254-
* or writes to a directory. If we've gotten this far we've
255-
* already obtained IOLOCK_EXCL, which (since 4.10) is the same as
256-
* getting a write lock on i_rwsem. Therefore, it is safe for us
257-
* to drop the ILOCK here in order to do directory lookups.
258-
*/
259-
sc->ilock_flags &= ~(XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
260-
xfs_iunlock(sc->ip, XFS_ILOCK_EXCL | XFS_MMAPLOCK_EXCL);
261-
262-
/* Look up '..' */
263-
error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &dnum, NULL);
264-
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
265-
goto out;
266-
if (!xfs_verify_dir_ino(mp, dnum)) {
267-
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
268-
goto out;
202+
return 0;
269203
}
270204

271-
/* Is this the root dir? Then '..' must point to itself. */
272-
if (sc->ip == mp->m_rootip) {
273-
if (sc->ip->i_ino != mp->m_sb.sb_rootino ||
274-
sc->ip->i_ino != dnum)
205+
do {
206+
if (xchk_should_terminate(sc, &error))
207+
break;
208+
209+
/* Look up '..' */
210+
error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot,
211+
&parent_ino);
212+
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
213+
return error;
214+
if (!xfs_verify_dir_ino(mp, parent_ino)) {
275215
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
276-
goto out;
277-
}
216+
return 0;
217+
}
278218

279-
do {
280-
error = xchk_parent_validate(sc, dnum, &try_again);
281-
if (error)
282-
goto out;
283-
} while (try_again && ++tries < 20);
219+
/*
220+
* Check that the dotdot entry points to a parent directory
221+
* containing a dirent pointing to this subdirectory.
222+
*/
223+
error = xchk_parent_validate(sc, parent_ino);
224+
} while (error == -EAGAIN);
284225

285-
/*
286-
* We gave it our best shot but failed, so mark this scrub
287-
* incomplete. Userspace can decide if it wants to try again.
288-
*/
289-
if (try_again && tries == 20)
290-
xchk_set_incomplete(sc);
291-
out:
292-
/*
293-
* If we failed to lock the parent inode even after a retry, just mark
294-
* this scrub incomplete and return.
295-
*/
296-
if ((sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
297-
error = 0;
298-
xchk_set_incomplete(sc);
299-
}
300226
return error;
301227
}

0 commit comments

Comments
 (0)