Skip to content

Commit 0dc63c8

Browse files
author
Darrick J. Wong
committed
xfs: launder in-memory btree buffers before transaction commit
As we've noted in various places, all current users of in-memory btrees are online fsck. Online fsck only stages a btree long enough to rebuild an ondisk data structure, which means that the in-memory btree is ephemeral. Furthermore, if we encounter /any/ errors while updating an in-memory btree, all we do is tear down all the staged data and return an errno to userspace. In-memory btrees need not be transactional, so their buffers should not be committed to the ondisk log, nor should they be checkpointed by the AIL. That's just as well since the ephemeral nature of the btree means that the buftarg and the buffers may disappear quickly anyway. Therefore, we need a way to launder the btree buffers that get attached to the transaction by the generic btree code. Because the buffers are directly mapped to backing file pages, there's no need to bwrite them back to the tmpfs file. All we need to do is clean enough of the buffer log item state so that the bli can be detached from the buffer, remove the bli from the transaction's log item list, and reset the transaction dirty state as if the laundered items had never been there. For simplicity, create xfbtree transaction commit and cancel helpers that launder the in-memory btree buffers for callers. Once laundered, call the write verifier on non-stale buffers to avoid integrity issues, or punch a hole in the backing file for stale buffers. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
1 parent a095686 commit 0dc63c8

7 files changed

Lines changed: 236 additions & 0 deletions

File tree

fs/xfs/libxfs/xfs_btree_mem.c

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,122 @@ xfbtree_get_maxrecs(
226226

227227
return xfbt->maxrecs[level != 0];
228228
}
229+
230+
/* If this log item is a buffer item that came from the xfbtree, return it. */
231+
static inline struct xfs_buf *
232+
xfbtree_buf_match(
233+
struct xfbtree *xfbt,
234+
const struct xfs_log_item *lip)
235+
{
236+
const struct xfs_buf_log_item *bli;
237+
struct xfs_buf *bp;
238+
239+
if (lip->li_type != XFS_LI_BUF)
240+
return NULL;
241+
242+
bli = container_of(lip, struct xfs_buf_log_item, bli_item);
243+
bp = bli->bli_buf;
244+
if (bp->b_target != xfbt->target)
245+
return NULL;
246+
247+
return bp;
248+
}
249+
250+
/*
251+
* Commit changes to the incore btree immediately by writing all dirty xfbtree
252+
* buffers to the backing xfile. This detaches all xfbtree buffers from the
253+
* transaction, even on failure. The buffer locks are dropped between the
254+
* delwri queue and submit, so the caller must synchronize btree access.
255+
*
256+
* Normally we'd let the buffers commit with the transaction and get written to
257+
* the xfile via the log, but online repair stages ephemeral btrees in memory
258+
* and uses the btree_staging functions to write new btrees to disk atomically.
259+
* The in-memory btree (and its backing store) are discarded at the end of the
260+
* repair phase, which means that xfbtree buffers cannot commit with the rest
261+
* of a transaction.
262+
*
263+
* In other words, online repair only needs the transaction to collect buffer
264+
* pointers and to avoid buffer deadlocks, not to guarantee consistency of
265+
* updates.
266+
*/
267+
int
268+
xfbtree_trans_commit(
269+
struct xfbtree *xfbt,
270+
struct xfs_trans *tp)
271+
{
272+
struct xfs_log_item *lip, *n;
273+
bool tp_dirty = false;
274+
int error = 0;
275+
276+
/*
277+
* For each xfbtree buffer attached to the transaction, write the dirty
278+
* buffers to the xfile and release them.
279+
*/
280+
list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
281+
struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip);
282+
283+
if (!bp) {
284+
if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
285+
tp_dirty |= true;
286+
continue;
287+
}
288+
289+
trace_xfbtree_trans_commit_buf(xfbt, bp);
290+
291+
xmbuf_trans_bdetach(tp, bp);
292+
293+
/*
294+
* If the buffer fails verification, note the failure but
295+
* continue walking the transaction items so that we remove all
296+
* ephemeral btree buffers.
297+
*/
298+
if (!error)
299+
error = xmbuf_finalize(bp);
300+
301+
xfs_buf_relse(bp);
302+
}
303+
304+
/*
305+
* Reset the transaction's dirty flag to reflect the dirty state of the
306+
* log items that are still attached.
307+
*/
308+
tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
309+
(tp_dirty ? XFS_TRANS_DIRTY : 0);
310+
311+
return error;
312+
}
313+
314+
/*
315+
* Cancel changes to the incore btree by detaching all the xfbtree buffers.
316+
* Changes are not undone, so callers must not access the btree ever again.
317+
*/
318+
void
319+
xfbtree_trans_cancel(
320+
struct xfbtree *xfbt,
321+
struct xfs_trans *tp)
322+
{
323+
struct xfs_log_item *lip, *n;
324+
bool tp_dirty = false;
325+
326+
list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
327+
struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip);
328+
329+
if (!bp) {
330+
if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
331+
tp_dirty |= true;
332+
continue;
333+
}
334+
335+
trace_xfbtree_trans_cancel_buf(xfbt, bp);
336+
337+
xmbuf_trans_bdetach(tp, bp);
338+
xfs_buf_relse(bp);
339+
}
340+
341+
/*
342+
* Reset the transaction's dirty flag to reflect the dirty state of the
343+
* log items that are still attached.
344+
*/
345+
tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
346+
(tp_dirty ? XFS_TRANS_DIRTY : 0);
347+
}

fs/xfs/libxfs/xfs_btree_mem.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ int xfbtree_free_block(struct xfs_btree_cur *cur, struct xfs_buf *bp);
6565
int xfbtree_init(struct xfs_mount *mp, struct xfbtree *xfbt,
6666
struct xfs_buftarg *btp, const struct xfs_btree_ops *ops);
6767
void xfbtree_destroy(struct xfbtree *xfbt);
68+
69+
int xfbtree_trans_commit(struct xfbtree *xfbt, struct xfs_trans *tp);
70+
void xfbtree_trans_cancel(struct xfbtree *xfbt, struct xfs_trans *tp);
6871
#else
6972
# define xfbtree_verify_bno(...) (false)
7073
#endif /* CONFIG_XFS_BTREE_IN_MEM */

fs/xfs/xfs_buf_mem.c

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
#include "xfs_buf_mem.h"
1010
#include "xfs_trace.h"
1111
#include <linux/shmem_fs.h>
12+
#include "xfs_log_format.h"
13+
#include "xfs_trans.h"
14+
#include "xfs_buf_item.h"
15+
#include "xfs_error.h"
1216

1317
/*
1418
* Buffer Cache for In-Memory Files
@@ -200,3 +204,67 @@ xmbuf_verify_daddr(
200204

201205
return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT);
202206
}
207+
208+
/* Discard the page backing this buffer. */
209+
static void
210+
xmbuf_stale(
211+
struct xfs_buf *bp)
212+
{
213+
struct inode *inode = file_inode(bp->b_target->bt_file);
214+
loff_t pos;
215+
216+
ASSERT(xfs_buftarg_is_mem(bp->b_target));
217+
218+
pos = BBTOB(xfs_buf_daddr(bp));
219+
shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1);
220+
}
221+
222+
/*
223+
* Finalize a buffer -- discard the backing page if it's stale, or run the
224+
* write verifier to detect problems.
225+
*/
226+
int
227+
xmbuf_finalize(
228+
struct xfs_buf *bp)
229+
{
230+
xfs_failaddr_t fa;
231+
int error = 0;
232+
233+
if (bp->b_flags & XBF_STALE) {
234+
xmbuf_stale(bp);
235+
return 0;
236+
}
237+
238+
/*
239+
* Although this btree is ephemeral, validate the buffer structure so
240+
* that we can detect memory corruption errors and software bugs.
241+
*/
242+
fa = bp->b_ops->verify_struct(bp);
243+
if (fa) {
244+
error = -EFSCORRUPTED;
245+
xfs_verifier_error(bp, error, fa);
246+
}
247+
248+
return error;
249+
}
250+
251+
/*
252+
* Detach this xmbuf buffer from the transaction by any means necessary.
253+
* All buffers are direct-mapped, so they do not need bwrite.
254+
*/
255+
void
256+
xmbuf_trans_bdetach(
257+
struct xfs_trans *tp,
258+
struct xfs_buf *bp)
259+
{
260+
struct xfs_buf_log_item *bli = bp->b_log_item;
261+
262+
ASSERT(bli != NULL);
263+
264+
bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED |
265+
XFS_BLI_LOGGED | XFS_BLI_STALE);
266+
clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags);
267+
268+
while (bp->b_log_item != NULL)
269+
xfs_trans_bdetach(tp, bp);
270+
}

fs/xfs/xfs_buf_mem.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ void xmbuf_free(struct xfs_buftarg *btp);
2222
int xmbuf_map_page(struct xfs_buf *bp);
2323
void xmbuf_unmap_page(struct xfs_buf *bp);
2424
bool xmbuf_verify_daddr(struct xfs_buftarg *btp, xfs_daddr_t daddr);
25+
void xmbuf_trans_bdetach(struct xfs_trans *tp, struct xfs_buf *bp);
26+
int xmbuf_finalize(struct xfs_buf *bp);
2527
#else
2628
# define xfs_buftarg_is_mem(...) (false)
2729
# define xmbuf_map_page(...) (-ENOMEM)

fs/xfs/xfs_trace.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
642642
DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
643643
DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
644644
DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
645+
DEFINE_BUF_ITEM_EVENT(xfs_trans_bdetach);
645646
DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
646647
DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
647648
DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);

fs/xfs/xfs_trans.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ struct xfs_buf *xfs_trans_getsb(struct xfs_trans *);
215215

216216
void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
217217
void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
218+
void xfs_trans_bdetach(struct xfs_trans *tp, struct xfs_buf *bp);
218219
void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
219220
void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
220221
void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);

fs/xfs/xfs_trans_buf.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,48 @@ xfs_trans_brelse(
392392
xfs_buf_relse(bp);
393393
}
394394

395+
/*
396+
* Forcibly detach a buffer previously joined to the transaction. The caller
397+
* will retain its locked reference to the buffer after this function returns.
398+
* The buffer must be completely clean and must not be held to the transaction.
399+
*/
400+
void
401+
xfs_trans_bdetach(
402+
struct xfs_trans *tp,
403+
struct xfs_buf *bp)
404+
{
405+
struct xfs_buf_log_item *bip = bp->b_log_item;
406+
407+
ASSERT(tp != NULL);
408+
ASSERT(bp->b_transp == tp);
409+
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
410+
ASSERT(atomic_read(&bip->bli_refcount) > 0);
411+
412+
trace_xfs_trans_bdetach(bip);
413+
414+
/*
415+
* Erase all recursion count, since we're removing this buffer from the
416+
* transaction.
417+
*/
418+
bip->bli_recur = 0;
419+
420+
/*
421+
* The buffer must be completely clean. Specifically, it had better
422+
* not be dirty, stale, logged, ordered, or held to the transaction.
423+
*/
424+
ASSERT(!test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags));
425+
ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY));
426+
ASSERT(!(bip->bli_flags & XFS_BLI_HOLD));
427+
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
428+
ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED));
429+
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
430+
431+
/* Unlink the log item from the transaction and drop the log item. */
432+
xfs_trans_del_item(&bip->bli_item);
433+
xfs_buf_item_put(bip);
434+
bp->b_transp = NULL;
435+
}
436+
395437
/*
396438
* Mark the buffer as not needing to be unlocked when the buf item's
397439
* iop_committing() routine is called. The buffer must already be locked

0 commit comments

Comments
 (0)