Skip to content

Commit d2d7c04

Browse files
dgchinnerDarrick J. Wong
authored andcommitted
xfs: aborting inodes on shutdown may need buffer lock
Most buffer io list operations are run with the bp->b_lock held, but xfs_iflush_abort() can be called without the buffer lock being held resulting in inodes being removed from the buffer list while other list operations are occurring. This causes problems with corrupted bp->b_io_list inode lists during filesystem shutdown, leading to traversals that never end, double removals from the AIL, etc. Fix this by passing the buffer to xfs_iflush_abort() if we have it locked. If the inode is attached to the buffer, we're going to have to remove it from the buffer list and we'd have to get the buffer off the inode log item to do that anyway. If we don't have a buffer passed in (e.g. from xfs_reclaim_inode()) then we can determine if the inode has a log item and if it is attached to a buffer before we do anything else. If it does have an attached buffer, we can lock it safely (because the inode has a reference to it) and then perform the inode abort. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
1 parent 85bcfa2 commit d2d7c04

4 files changed

Lines changed: 136 additions & 31 deletions

File tree

fs/xfs/xfs_icache.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -883,7 +883,7 @@ xfs_reclaim_inode(
883883
*/
884884
if (xlog_is_shutdown(ip->i_mount->m_log)) {
885885
xfs_iunpin_wait(ip);
886-
xfs_iflush_abort(ip);
886+
xfs_iflush_shutdown_abort(ip);
887887
goto reclaim;
888888
}
889889
if (xfs_ipincount(ip))

fs/xfs/xfs_inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3631,7 +3631,7 @@ xfs_iflush_cluster(
36313631

36323632
/*
36333633
* We must use the safe variant here as on shutdown xfs_iflush_abort()
3634-
* can remove itself from the list.
3634+
* will remove itself from the list.
36353635
*/
36363636
list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) {
36373637
iip = (struct xfs_inode_log_item *)lip;

fs/xfs/xfs_inode_item.c

Lines changed: 133 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -544,10 +544,17 @@ xfs_inode_item_push(
544544
uint rval = XFS_ITEM_SUCCESS;
545545
int error;
546546

547-
ASSERT(iip->ili_item.li_buf);
547+
if (!bp || (ip->i_flags & XFS_ISTALE)) {
548+
/*
549+
* Inode item/buffer is being being aborted due to cluster
550+
* buffer deletion. Trigger a log force to have that operation
551+
* completed and items removed from the AIL before the next push
552+
* attempt.
553+
*/
554+
return XFS_ITEM_PINNED;
555+
}
548556

549-
if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp) ||
550-
(ip->i_flags & XFS_ISTALE))
557+
if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp))
551558
return XFS_ITEM_PINNED;
552559

553560
if (xfs_iflags_test(ip, XFS_IFLUSHING))
@@ -834,46 +841,143 @@ xfs_buf_inode_io_fail(
834841
}
835842

836843
/*
837-
* This is the inode flushing abort routine. It is called when
838-
* the filesystem is shutting down to clean up the inode state. It is
839-
* responsible for removing the inode item from the AIL if it has not been
840-
* re-logged and clearing the inode's flush state.
844+
* Clear the inode logging fields so no more flushes are attempted. If we are
845+
* on a buffer list, it is now safe to remove it because the buffer is
846+
* guaranteed to be locked. The caller will drop the reference to the buffer
847+
* the log item held.
848+
*/
849+
static void
850+
xfs_iflush_abort_clean(
851+
struct xfs_inode_log_item *iip)
852+
{
853+
iip->ili_last_fields = 0;
854+
iip->ili_fields = 0;
855+
iip->ili_fsync_fields = 0;
856+
iip->ili_flush_lsn = 0;
857+
iip->ili_item.li_buf = NULL;
858+
list_del_init(&iip->ili_item.li_bio_list);
859+
}
860+
861+
/*
862+
* Abort flushing the inode from a context holding the cluster buffer locked.
863+
*
864+
* This is the normal runtime method of aborting writeback of an inode that is
865+
* attached to a cluster buffer. It occurs when the inode and the backing
866+
* cluster buffer have been freed (i.e. inode is XFS_ISTALE), or when cluster
867+
* flushing or buffer IO completion encounters a log shutdown situation.
868+
*
869+
* If we need to abort inode writeback and we don't already hold the buffer
870+
* locked, call xfs_iflush_shutdown_abort() instead as this should only ever be
871+
* necessary in a shutdown situation.
841872
*/
842873
void
843874
xfs_iflush_abort(
844875
struct xfs_inode *ip)
845876
{
846877
struct xfs_inode_log_item *iip = ip->i_itemp;
847-
struct xfs_buf *bp = NULL;
878+
struct xfs_buf *bp;
848879

849-
if (iip) {
850-
/*
851-
* Clear the failed bit before removing the item from the AIL so
852-
* xfs_trans_ail_delete() doesn't try to clear and release the
853-
* buffer attached to the log item before we are done with it.
854-
*/
855-
clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags);
856-
xfs_trans_ail_delete(&iip->ili_item, 0);
880+
if (!iip) {
881+
/* clean inode, nothing to do */
882+
xfs_iflags_clear(ip, XFS_IFLUSHING);
883+
return;
884+
}
885+
886+
/*
887+
* Remove the inode item from the AIL before we clear its internal
888+
* state. Whilst the inode is in the AIL, it should have a valid buffer
889+
* pointer for push operations to access - it is only safe to remove the
890+
* inode from the buffer once it has been removed from the AIL.
891+
*
892+
* We also clear the failed bit before removing the item from the AIL
893+
* as xfs_trans_ail_delete()->xfs_clear_li_failed() will release buffer
894+
* references the inode item owns and needs to hold until we've fully
895+
* aborted the inode log item and detached it from the buffer.
896+
*/
897+
clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags);
898+
xfs_trans_ail_delete(&iip->ili_item, 0);
899+
900+
/*
901+
* Grab the inode buffer so can we release the reference the inode log
902+
* item holds on it.
903+
*/
904+
spin_lock(&iip->ili_lock);
905+
bp = iip->ili_item.li_buf;
906+
xfs_iflush_abort_clean(iip);
907+
spin_unlock(&iip->ili_lock);
857908

909+
xfs_iflags_clear(ip, XFS_IFLUSHING);
910+
if (bp)
911+
xfs_buf_rele(bp);
912+
}
913+
914+
/*
915+
* Abort an inode flush in the case of a shutdown filesystem. This can be called
916+
* from anywhere with just an inode reference and does not require holding the
917+
* inode cluster buffer locked. If the inode is attached to a cluster buffer,
918+
* it will grab and lock it safely, then abort the inode flush.
919+
*/
920+
void
921+
xfs_iflush_shutdown_abort(
922+
struct xfs_inode *ip)
923+
{
924+
struct xfs_inode_log_item *iip = ip->i_itemp;
925+
struct xfs_buf *bp;
926+
927+
if (!iip) {
928+
/* clean inode, nothing to do */
929+
xfs_iflags_clear(ip, XFS_IFLUSHING);
930+
return;
931+
}
932+
933+
spin_lock(&iip->ili_lock);
934+
bp = iip->ili_item.li_buf;
935+
if (!bp) {
936+
spin_unlock(&iip->ili_lock);
937+
xfs_iflush_abort(ip);
938+
return;
939+
}
940+
941+
/*
942+
* We have to take a reference to the buffer so that it doesn't get
943+
* freed when we drop the ili_lock and then wait to lock the buffer.
944+
* We'll clean up the extra reference after we pick up the ili_lock
945+
* again.
946+
*/
947+
xfs_buf_hold(bp);
948+
spin_unlock(&iip->ili_lock);
949+
xfs_buf_lock(bp);
950+
951+
spin_lock(&iip->ili_lock);
952+
if (!iip->ili_item.li_buf) {
858953
/*
859-
* Clear the inode logging fields so no more flushes are
860-
* attempted.
954+
* Raced with another removal, hold the only reference
955+
* to bp now. Inode should not be in the AIL now, so just clean
956+
* up and return;
861957
*/
862-
spin_lock(&iip->ili_lock);
863-
iip->ili_last_fields = 0;
864-
iip->ili_fields = 0;
865-
iip->ili_fsync_fields = 0;
866-
iip->ili_flush_lsn = 0;
867-
bp = iip->ili_item.li_buf;
868-
iip->ili_item.li_buf = NULL;
869-
list_del_init(&iip->ili_item.li_bio_list);
958+
ASSERT(list_empty(&iip->ili_item.li_bio_list));
959+
ASSERT(!test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags));
960+
xfs_iflush_abort_clean(iip);
870961
spin_unlock(&iip->ili_lock);
962+
xfs_iflags_clear(ip, XFS_IFLUSHING);
963+
xfs_buf_relse(bp);
964+
return;
871965
}
872-
xfs_iflags_clear(ip, XFS_IFLUSHING);
873-
if (bp)
874-
xfs_buf_rele(bp);
966+
967+
/*
968+
* Got two references to bp. The first will get dropped by
969+
* xfs_iflush_abort() when the item is removed from the buffer list, but
970+
* we can't drop our reference until _abort() returns because we have to
971+
* unlock the buffer as well. Hence we abort and then unlock and release
972+
* our reference to the buffer.
973+
*/
974+
ASSERT(iip->ili_item.li_buf == bp);
975+
spin_unlock(&iip->ili_lock);
976+
xfs_iflush_abort(ip);
977+
xfs_buf_relse(bp);
875978
}
876979

980+
877981
/*
878982
* convert an xfs_inode_log_format struct from the old 32 bit version
879983
* (which can have different field alignments) to the native 64 bit version

fs/xfs/xfs_inode_item.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ static inline int xfs_inode_clean(struct xfs_inode *ip)
4444
extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
4545
extern void xfs_inode_item_destroy(struct xfs_inode *);
4646
extern void xfs_iflush_abort(struct xfs_inode *);
47+
extern void xfs_iflush_shutdown_abort(struct xfs_inode *);
4748
extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
4849
struct xfs_inode_log_format *);
4950

0 commit comments

Comments
 (0)