Skip to content

Commit 2129b42

Browse files
author
Andreas Gruenbacher
committed
gfs2: Per-revoke accounting in transactions
In the log, revokes are stored as a revoke descriptor (struct gfs2_log_descriptor), followed by zero or more additional revoke blocks (struct gfs2_meta_header). On filesystems with a blocksize of 4k, the revoke descriptor contains up to 503 revokes, and the metadata blocks contain up to 509 revokes each. We've so far been reserving space for revokes in transactions in block granularity, so a lot more space than necessary was being allocated and then released again. This patch switches to assigning revokes to transactions individually instead. Initially, space for the revoke descriptor is reserved and handed out to transactions. When more revokes than that are reserved, additional revoke blocks are added. When the log is flushed, the space for the additional revoke blocks is released, but we keep the space for the revoke descriptor block allocated. Transactions may still reserve more revokes than they will actually need in the end, but now we won't overshoot the target as much, and by only returning the space for excess revokes at log flush time, we further reduce the amount of contention between processes. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
1 parent fe3e397 commit 2129b42

7 files changed

Lines changed: 131 additions & 42 deletions

File tree

fs/gfs2/glops.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,19 +136,15 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
136136
{
137137
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
138138
unsigned int revokes = atomic_read(&gl->gl_ail_count);
139-
unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
140139
int ret;
141140

142141
if (!revokes)
143142
return;
144143

145-
while (revokes > max_revokes)
146-
max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
147-
148-
ret = gfs2_trans_begin(sdp, 0, max_revokes);
144+
ret = gfs2_trans_begin(sdp, 0, revokes);
149145
if (ret)
150146
return;
151-
__gfs2_ail_flush(gl, fsync, max_revokes);
147+
__gfs2_ail_flush(gl, fsync, revokes);
152148
gfs2_trans_end(sdp);
153149
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
154150
GFS2_LFC_AIL_FLUSH);

fs/gfs2/incore.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,6 @@ struct gfs2_trans {
506506
unsigned int tr_num_buf_rm;
507507
unsigned int tr_num_databuf_rm;
508508
unsigned int tr_num_revoke;
509-
unsigned int tr_num_revoke_rm;
510509

511510
struct list_head tr_list;
512511
struct list_head tr_databuf;
@@ -821,7 +820,6 @@ struct gfs2_sbd {
821820

822821
struct gfs2_trans *sd_log_tr;
823822
unsigned int sd_log_blks_reserved;
824-
int sd_log_committed_revoke;
825823

826824
atomic_t sd_log_pinned;
827825
unsigned int sd_log_num_revoke;
@@ -834,6 +832,7 @@ struct gfs2_sbd {
834832
atomic_t sd_log_thresh2;
835833
atomic_t sd_log_blks_free;
836834
atomic_t sd_log_blks_needed;
835+
atomic_t sd_log_revokes_available;
837836
wait_queue_head_t sd_log_waitq;
838837
wait_queue_head_t sd_logd_waitq;
839838

fs/gfs2/log.c

Lines changed: 90 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,32 @@ bool gfs2_log_is_empty(struct gfs2_sbd *sdp) {
429429
return atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks;
430430
}
431431

432+
static bool __gfs2_log_try_reserve_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
433+
{
434+
unsigned int available;
435+
436+
available = atomic_read(&sdp->sd_log_revokes_available);
437+
while (available >= revokes) {
438+
if (atomic_try_cmpxchg(&sdp->sd_log_revokes_available,
439+
&available, available - revokes))
440+
return true;
441+
}
442+
return false;
443+
}
444+
445+
/**
446+
* gfs2_log_release_revokes - Release a given number of revokes
447+
* @sdp: The GFS2 superblock
448+
* @revokes: The number of revokes to release
449+
*
450+
* sdp->sd_log_flush_lock must be held.
451+
*/
452+
void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
453+
{
454+
if (revokes)
455+
atomic_add(revokes, &sdp->sd_log_revokes_available);
456+
}
457+
432458
/**
433459
* gfs2_log_release - Release a given number of log blocks
434460
* @sdp: The GFS2 superblock
@@ -519,15 +545,59 @@ static void __gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks,
519545
}
520546

521547
/**
522-
* gfs2_log_reserve - Make a log reservation
548+
* gfs2_log_try_reserve - Try to make a log reservation
523549
* @sdp: The GFS2 superblock
524-
* @blks: The number of blocks to reserve
550+
* @tr: The transaction
551+
* @extra_revokes: The number of additional revokes reserved (output)
552+
*
553+
* This is similar to gfs2_log_reserve, but sdp->sd_log_flush_lock must be
554+
* held for correct revoke accounting.
525555
*/
526556

527-
void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
557+
bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
558+
unsigned int *extra_revokes)
528559
{
560+
unsigned int blks = tr->tr_reserved;
561+
unsigned int revokes = tr->tr_revokes;
562+
unsigned int revoke_blks = 0;
563+
564+
*extra_revokes = 0;
565+
if (revokes && !__gfs2_log_try_reserve_revokes(sdp, revokes)) {
566+
revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
567+
*extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
568+
blks += revoke_blks;
569+
}
570+
if (!blks)
571+
return true;
529572
if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS))
530-
return;
573+
return true;
574+
if (!revoke_blks)
575+
gfs2_log_release_revokes(sdp, revokes);
576+
return false;
577+
}
578+
579+
/**
580+
* gfs2_log_reserve - Make a log reservation
581+
* @sdp: The GFS2 superblock
582+
* @tr: The transaction
583+
* @extra_revokes: The number of additional revokes reserved (output)
584+
*
585+
* sdp->sd_log_flush_lock must not be held.
586+
*/
587+
588+
void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
589+
unsigned int *extra_revokes)
590+
{
591+
unsigned int blks = tr->tr_reserved;
592+
unsigned int revokes = tr->tr_revokes;
593+
unsigned int revoke_blks = 0;
594+
595+
*extra_revokes = 0;
596+
if (revokes) {
597+
revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
598+
*extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
599+
blks += revoke_blks;
600+
}
531601
__gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS);
532602
}
533603

@@ -588,9 +658,6 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp)
588658
blocks = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
589659
reserved += blocks + DIV_ROUND_UP(blocks, databuf_limit(sdp));
590660
}
591-
592-
if (sdp->sd_log_committed_revoke > 0)
593-
reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke) - 1;
594661
return reserved;
595662
}
596663

@@ -730,14 +797,9 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
730797
void gfs2_flush_revokes(struct gfs2_sbd *sdp)
731798
{
732799
/* number of revokes we still have room for */
733-
unsigned int max_revokes;
800+
unsigned int max_revokes = atomic_read(&sdp->sd_log_revokes_available);
734801

735802
gfs2_log_lock(sdp);
736-
max_revokes = sdp->sd_ldptrs;
737-
if (sdp->sd_log_num_revoke > sdp->sd_ldptrs)
738-
max_revokes += roundup(sdp->sd_log_num_revoke - sdp->sd_ldptrs,
739-
sdp->sd_inptrs);
740-
max_revokes -= sdp->sd_log_num_revoke;
741803
gfs2_ail1_empty(sdp, max_revokes);
742804
gfs2_log_unlock(sdp);
743805
}
@@ -955,6 +1017,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
9551017
unsigned int reserved_blocks = 0, used_blocks = 0;
9561018
enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
9571019
unsigned int first_log_head;
1020+
unsigned int reserved_revokes = 0;
9581021

9591022
down_write(&sdp->sd_log_flush_lock);
9601023
trace_gfs2_log_flush(sdp, 1, flags);
@@ -979,13 +1042,15 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
9791042
if (reserved_blocks)
9801043
gfs2_log_release(sdp, reserved_blocks);
9811044
reserved_blocks = sdp->sd_log_blks_reserved;
1045+
reserved_revokes = sdp->sd_log_num_revoke;
9821046
if (tr) {
9831047
sdp->sd_log_tr = NULL;
9841048
tr->tr_first = first_log_head;
985-
if (unlikely (state == SFS_FROZEN))
1049+
if (unlikely (state == SFS_FROZEN)) {
9861050
if (gfs2_assert_withdraw_delayed(sdp,
9871051
!tr->tr_num_buf_new && !tr->tr_num_databuf_new))
9881052
goto out_withdraw;
1053+
}
9891054
}
9901055
} else if (!reserved_blocks) {
9911056
unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
@@ -1000,17 +1065,15 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
10001065
down_write(&sdp->sd_log_flush_lock);
10011066
goto repeat;
10021067
}
1068+
BUG_ON(sdp->sd_log_num_revoke);
10031069
}
10041070

10051071
if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
10061072
clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
10071073

10081074
if (unlikely(state == SFS_FROZEN))
1009-
if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke))
1075+
if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes))
10101076
goto out_withdraw;
1011-
if (gfs2_assert_withdraw_delayed(sdp,
1012-
sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke))
1013-
goto out_withdraw;
10141077

10151078
gfs2_ordered_write(sdp);
10161079
if (gfs2_withdrawn(sdp))
@@ -1034,7 +1097,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
10341097

10351098
gfs2_log_lock(sdp);
10361099
sdp->sd_log_blks_reserved = 0;
1037-
sdp->sd_log_committed_revoke = 0;
10381100

10391101
spin_lock(&sdp->sd_ail_lock);
10401102
if (tr && !list_empty(&tr->tr_ail1_list)) {
@@ -1060,11 +1122,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
10601122

10611123
out_end:
10621124
used_blocks = log_distance(sdp, sdp->sd_log_flush_head, first_log_head);
1063-
if (gfs2_assert_withdraw_delayed(sdp, used_blocks <= reserved_blocks))
1064-
goto out;
1125+
reserved_revokes += atomic_read(&sdp->sd_log_revokes_available);
1126+
atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs);
1127+
gfs2_assert_withdraw(sdp, reserved_revokes % sdp->sd_inptrs == sdp->sd_ldptrs);
1128+
if (reserved_revokes > sdp->sd_ldptrs)
1129+
reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs;
10651130
out:
1066-
if (used_blocks != reserved_blocks)
1131+
if (used_blocks != reserved_blocks) {
1132+
gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks);
10671133
gfs2_log_release(sdp, reserved_blocks - used_blocks);
1134+
}
10681135
up_write(&sdp->sd_log_flush_lock);
10691136
gfs2_trans_free(sdp, tr);
10701137
if (gfs2_withdrawing(sdp))
@@ -1105,8 +1172,8 @@ static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new)
11051172
old->tr_num_databuf_new += new->tr_num_databuf_new;
11061173
old->tr_num_buf_rm += new->tr_num_buf_rm;
11071174
old->tr_num_databuf_rm += new->tr_num_databuf_rm;
1175+
old->tr_revokes += new->tr_revokes;
11081176
old->tr_num_revoke += new->tr_num_revoke;
1109-
old->tr_num_revoke_rm += new->tr_num_revoke_rm;
11101177

11111178
list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
11121179
list_splice_tail_init(&new->tr_buf, &old->tr_buf);
@@ -1133,7 +1200,6 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
11331200
set_bit(TR_ATTACHED, &tr->tr_flags);
11341201
}
11351202

1136-
sdp->sd_log_committed_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
11371203
reserved = calc_reserved(sdp);
11381204
maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
11391205
gfs2_assert_withdraw(sdp, maxres >= reserved);

fs/gfs2/log.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,12 @@ extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
7474
extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
7575
extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
7676
extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp);
77+
extern void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes);
7778
extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
78-
extern void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
79+
extern bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
80+
unsigned int *extra_revokes);
81+
extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
82+
unsigned int *extra_revokes);
7983
extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
8084
u64 seq, u32 tail, u32 lblock, u32 flags,
8185
int op_flags);

fs/gfs2/lops.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -857,7 +857,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
857857
sdp->sd_log_num_revoke--;
858858

859859
if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
860-
861860
gfs2_log_write_page(sdp, page);
862861
page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
863862
mh = page_address(page);

fs/gfs2/ops_fstype.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
315315
sizeof(struct gfs2_meta_header))
316316
* GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */
317317

318+
/*
319+
* We always keep at least one block reserved for revokes in
320+
* transactions. This greatly simplifies allocating additional
321+
* revoke blocks.
322+
*/
323+
atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs);
324+
318325
/* Compute maximum reservation required to add a entry to a directory */
319326

320327
hash_blocks = DIV_ROUND_UP(sizeof(u64) * BIT(GFS2_DIR_MAX_DEPTH),

fs/gfs2/trans.c

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,18 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr)
3131
fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n",
3232
tr->tr_blocks, tr->tr_revokes, tr->tr_reserved,
3333
test_bit(TR_TOUCHED, &tr->tr_flags));
34-
fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
34+
fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n",
3535
tr->tr_num_buf_new, tr->tr_num_buf_rm,
3636
tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
37-
tr->tr_num_revoke, tr->tr_num_revoke_rm);
37+
tr->tr_num_revoke);
3838
}
3939

4040
int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
4141
unsigned int blocks, unsigned int revokes,
4242
unsigned long ip)
4343
{
44+
unsigned int extra_revokes;
45+
4446
if (current->journal_info) {
4547
gfs2_print_trans(sdp, current->journal_info);
4648
BUG();
@@ -62,8 +64,6 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
6264
*/
6365
tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp));
6466
}
65-
if (revokes)
66-
tr->tr_reserved += gfs2_struct2blk(sdp, revokes) - 1;
6767
INIT_LIST_HEAD(&tr->tr_databuf);
6868
INIT_LIST_HEAD(&tr->tr_buf);
6969
INIT_LIST_HEAD(&tr->tr_list);
@@ -75,10 +75,26 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
7575

7676
sb_start_intwrite(sdp->sd_vfs);
7777

78-
gfs2_log_reserve(sdp, tr->tr_reserved);
78+
/*
79+
* Try the reservations under sd_log_flush_lock to prevent log flushes
80+
* from creating inconsistencies between the number of allocated and
81+
* reserved revokes. If that fails, do a full-block allocation outside
82+
* of the lock to avoid stalling log flushes. Then, allot the
83+
* appropriate number of blocks to revokes, use as many revokes locally
84+
* as needed, and "release" the surplus into the revokes pool.
85+
*/
7986

8087
down_read(&sdp->sd_log_flush_lock);
88+
if (gfs2_log_try_reserve(sdp, tr, &extra_revokes))
89+
goto reserved;
90+
up_read(&sdp->sd_log_flush_lock);
91+
gfs2_log_reserve(sdp, tr, &extra_revokes);
92+
down_read(&sdp->sd_log_flush_lock);
93+
94+
reserved:
95+
gfs2_log_release_revokes(sdp, extra_revokes);
8196
if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
97+
gfs2_log_release_revokes(sdp, tr->tr_revokes);
8298
up_read(&sdp->sd_log_flush_lock);
8399
gfs2_log_release(sdp, tr->tr_reserved);
84100
sb_end_intwrite(sdp->sd_vfs);
@@ -113,14 +129,17 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
113129
current->journal_info = NULL;
114130

115131
if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
116-
gfs2_log_release(sdp, tr->tr_reserved);
132+
gfs2_log_release_revokes(sdp, tr->tr_revokes);
117133
up_read(&sdp->sd_log_flush_lock);
134+
gfs2_log_release(sdp, tr->tr_reserved);
118135
if (!test_bit(TR_ONSTACK, &tr->tr_flags))
119136
gfs2_trans_free(sdp, tr);
120137
sb_end_intwrite(sdp->sd_vfs);
121138
return;
122139
}
123140

141+
gfs2_log_release_revokes(sdp, tr->tr_revokes - tr->tr_num_revoke);
142+
124143
nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new;
125144
nbuf -= tr->tr_num_buf_rm;
126145
nbuf -= tr->tr_num_databuf_rm;
@@ -278,7 +297,6 @@ void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
278297
void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
279298
{
280299
struct gfs2_bufdata *bd, *tmp;
281-
struct gfs2_trans *tr = current->journal_info;
282300
unsigned int n = len;
283301

284302
gfs2_log_lock(sdp);
@@ -290,7 +308,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
290308
if (bd->bd_gl)
291309
gfs2_glock_remove_revoke(bd->bd_gl);
292310
kmem_cache_free(gfs2_bufdata_cachep, bd);
293-
tr->tr_num_revoke_rm++;
311+
gfs2_log_release_revokes(sdp, 1);
294312
if (--n == 0)
295313
break;
296314
}

0 commit comments

Comments
 (0)