Skip to content

Commit c0fb476

Browse files
dgchinnerdchinner
authored andcommitted
xfs: convert CIL to unordered per cpu lists
So that we can remove the cil_lock which is a global serialisation point. We've already got ordering sorted, so all we need to do is treat the CIL list like the busy extent list and reconstruct it before the push starts. This is what we're trying to avoid: - 75.35% 1.83% [kernel] [k] xfs_log_commit_cil - 46.35% xfs_log_commit_cil - 41.54% _raw_spin_lock - 67.30% do_raw_spin_lock 66.96% __pv_queued_spin_lock_slowpath Which happens on a 32p system when running a 32-way 'rm -rf' workload. After this patch: - 20.90% 3.23% [kernel] [k] xfs_log_commit_cil - 17.67% xfs_log_commit_cil - 6.51% xfs_log_ticket_ungrant 1.40% xfs_log_space_wake 2.32% memcpy_erms - 2.18% xfs_buf_item_committing - 2.12% xfs_buf_item_release - 1.03% xfs_buf_unlock 0.96% up 0.72% xfs_buf_rele 1.33% xfs_inode_item_format 1.19% down_read 0.91% up_read 0.76% xfs_buf_item_format - 0.68% kmem_alloc_large - 0.67% kmem_alloc 0.64% __kmalloc 0.50% xfs_buf_item_size It kinda looks like the workload is running out of log space all the time. But all the spinlock contention is gone and the transaction commit rate has gone from 800k/s to 1.3M/s so the amount of real work being done has gone up a *lot*. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
1 parent 016a233 commit c0fb476

2 files changed

Lines changed: 17 additions & 21 deletions

File tree

fs/xfs/xfs_log_cil.c

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ xlog_cil_ctx_alloc(void)
104104
ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS);
105105
INIT_LIST_HEAD(&ctx->committing);
106106
INIT_LIST_HEAD(&ctx->busy_extents);
107+
INIT_LIST_HEAD(&ctx->log_items);
107108
INIT_WORK(&ctx->push_work, xlog_cil_push_work);
108109
return ctx;
109110
}
@@ -132,6 +133,8 @@ xlog_cil_push_pcp_aggregate(
132133
list_splice_init(&cilpcp->busy_extents,
133134
&ctx->busy_extents);
134135
}
136+
if (!list_empty(&cilpcp->log_items))
137+
list_splice_init(&cilpcp->log_items, &ctx->log_items);
135138

136139
/*
137140
* We're in the middle of switching cil contexts. Reset the
@@ -579,10 +582,9 @@ xlog_cil_insert_items(
579582
/*
580583
* We need to take the CIL checkpoint unit reservation on the first
581584
* commit into the CIL. Test the XLOG_CIL_EMPTY bit first so we don't
582-
* unnecessarily do an atomic op in the fast path here. We don't need to
583-
* hold the xc_cil_lock here to clear the XLOG_CIL_EMPTY bit as we are
584-
* under the xc_ctx_lock here and that needs to be held exclusively to
585-
* reset the XLOG_CIL_EMPTY bit.
585+
* unnecessarily do an atomic op in the fast path here. We can clear the
586+
* XLOG_CIL_EMPTY bit as we are under the xc_ctx_lock here and that
587+
* needs to be held exclusively to reset the XLOG_CIL_EMPTY bit.
586588
*/
587589
if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags) &&
588590
test_and_clear_bit(XLOG_CIL_EMPTY, &cil->xc_flags))
@@ -643,7 +645,6 @@ xlog_cil_insert_items(
643645
/* attach the transaction to the CIL if it has any busy extents */
644646
if (!list_empty(&tp->t_busy))
645647
list_splice_init(&tp->t_busy, &cilpcp->busy_extents);
646-
put_cpu_ptr(cilpcp);
647648

648649
/*
649650
* Now update the order of everything modified in the transaction
@@ -652,7 +653,6 @@ xlog_cil_insert_items(
652653
* the transaction commit.
653654
*/
654655
order = atomic_inc_return(&ctx->order_id);
655-
spin_lock(&cil->xc_cil_lock);
656656
list_for_each_entry(lip, &tp->t_items, li_trans) {
657657
/* Skip items which aren't dirty in this transaction. */
658658
if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
@@ -661,10 +661,9 @@ xlog_cil_insert_items(
661661
lip->li_order_id = order;
662662
if (!list_empty(&lip->li_cil))
663663
continue;
664-
list_add_tail(&lip->li_cil, &cil->xc_cil);
664+
list_add_tail(&lip->li_cil, &cilpcp->log_items);
665665
}
666-
667-
spin_unlock(&cil->xc_cil_lock);
666+
put_cpu_ptr(cilpcp);
668667

669668
/*
670669
* If we've overrun the reservation, dump the tx details before we move
@@ -1113,20 +1112,19 @@ xlog_cil_order_cmp(
11131112
*/
11141113
static void
11151114
xlog_cil_build_lv_chain(
1116-
struct xfs_cil *cil,
11171115
struct xfs_cil_ctx *ctx,
11181116
struct list_head *whiteouts,
11191117
uint32_t *num_iovecs,
11201118
uint32_t *num_bytes)
11211119
{
11221120
struct xfs_log_vec *lv = NULL;
11231121

1124-
list_sort(NULL, &cil->xc_cil, xlog_cil_order_cmp);
1122+
list_sort(NULL, &ctx->log_items, xlog_cil_order_cmp);
11251123

1126-
while (!list_empty(&cil->xc_cil)) {
1124+
while (!list_empty(&ctx->log_items)) {
11271125
struct xfs_log_item *item;
11281126

1129-
item = list_first_entry(&cil->xc_cil,
1127+
item = list_first_entry(&ctx->log_items,
11301128
struct xfs_log_item, li_cil);
11311129

11321130
if (test_bit(XFS_LI_WHITEOUT, &item->li_flags)) {
@@ -1265,7 +1263,7 @@ xlog_cil_push_work(
12651263
list_add(&ctx->committing, &cil->xc_committing);
12661264
spin_unlock(&cil->xc_push_lock);
12671265

1268-
xlog_cil_build_lv_chain(cil, ctx, &whiteouts, &num_iovecs, &num_bytes);
1266+
xlog_cil_build_lv_chain(ctx, &whiteouts, &num_iovecs, &num_bytes);
12691267

12701268
/*
12711269
* Switch the contexts so we can drop the context lock and move out
@@ -1409,7 +1407,6 @@ xlog_cil_push_background(
14091407
* The cil won't be empty because we are called while holding the
14101408
* context lock so whatever we added to the CIL will still be there.
14111409
*/
1412-
ASSERT(!list_empty(&cil->xc_cil));
14131410
ASSERT(!test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
14141411

14151412
/*
@@ -1656,7 +1653,7 @@ xlog_cil_flush(
16561653
* If the CIL is empty, make sure that any previous checkpoint that may
16571654
* still be in an active iclog is pushed to stable storage.
16581655
*/
1659-
if (list_empty(&log->l_cilp->xc_cil))
1656+
if (test_bit(XLOG_CIL_EMPTY, &log->l_cilp->xc_flags))
16601657
xfs_log_force(log->l_mp, 0);
16611658
}
16621659

@@ -1784,6 +1781,8 @@ xlog_cil_pcp_dead(
17841781
ctx->ticket->t_curr_res += cilpcp->space_reserved;
17851782
cilpcp->space_reserved = 0;
17861783

1784+
if (!list_empty(&cilpcp->log_items))
1785+
list_splice_init(&cilpcp->log_items, &ctx->log_items);
17871786
if (!list_empty(&cilpcp->busy_extents))
17881787
list_splice_init(&cilpcp->busy_extents, &ctx->busy_extents);
17891788
atomic_add(cilpcp->space_used, &ctx->space_used);
@@ -1824,11 +1823,10 @@ xlog_cil_init(
18241823
for_each_possible_cpu(cpu) {
18251824
cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
18261825
INIT_LIST_HEAD(&cilpcp->busy_extents);
1826+
INIT_LIST_HEAD(&cilpcp->log_items);
18271827
}
18281828

1829-
INIT_LIST_HEAD(&cil->xc_cil);
18301829
INIT_LIST_HEAD(&cil->xc_committing);
1831-
spin_lock_init(&cil->xc_cil_lock);
18321830
spin_lock_init(&cil->xc_push_lock);
18331831
init_waitqueue_head(&cil->xc_push_wait);
18341832
init_rwsem(&cil->xc_ctx_lock);
@@ -1859,7 +1857,6 @@ xlog_cil_destroy(
18591857
kmem_free(cil->xc_ctx);
18601858
}
18611859

1862-
ASSERT(list_empty(&cil->xc_cil));
18631860
ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
18641861
free_percpu(cil->xc_pcp);
18651862
destroy_workqueue(cil->xc_push_wq);

fs/xfs/xfs_log_priv.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ struct xfs_cil_ctx {
224224
struct xlog_ticket *ticket; /* chkpt ticket */
225225
atomic_t space_used; /* aggregate size of regions */
226226
struct list_head busy_extents; /* busy extents in chkpt */
227+
struct list_head log_items; /* log items in chkpt */
227228
struct xfs_log_vec *lv_chain; /* logvecs being pushed */
228229
struct list_head iclog_entry;
229230
struct list_head committing; /* ctx committing list */
@@ -262,8 +263,6 @@ struct xfs_cil {
262263
struct xlog *xc_log;
263264
unsigned long xc_flags;
264265
atomic_t xc_iclog_hdrs;
265-
struct list_head xc_cil;
266-
spinlock_t xc_cil_lock;
267266
struct workqueue_struct *xc_push_wq;
268267

269268
struct rw_semaphore xc_ctx_lock ____cacheline_aligned_in_smp;

0 commit comments

Comments
 (0)