Skip to content

Commit fe3e397

Browse files
author
Andreas Gruenbacher
committed
gfs2: Rework the log space allocation logic
The current log space allocation logic is hard to understand or extend. The principle it that when the log is flushed, we may or may not have a transaction active that has space allocated in the log. To deal with that, we set aside a magical number of blocks to be used in case we don't have an active transaction. It isn't clear that the pool will always be big enough. In addition, we can't return unused log space at the end of a transaction, so the number of blocks allocated must exactly match the number of blocks used. Simplify this as follows: * When transactions are allocated or merged, always reserve enough blocks to flush the transaction (err on the safe side). * In gfs2_log_flush, return any allocated blocks that haven't been used. * Maintain a pool of spare blocks big enough to do one log flush, as before. * In gfs2_log_flush, when we have no active transaction, allocate a suitable number of blocks. For that, use the spare pool when called from logd, and leave the pool alone otherwise. This means that when the log is almost full, logd will still be able to do one more log flush, which will result in more log space becoming available. This will make the log space allocator code easier to work with in the future. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
1 parent 71b219f commit fe3e397

3 files changed

Lines changed: 104 additions & 69 deletions

File tree

fs/gfs2/log.c

Lines changed: 95 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -447,15 +447,42 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
447447
}
448448

449449
/**
450-
* gfs2_log_reserve - Make a log reservation
450+
* __gfs2_log_try_reserve - Try to make a log reservation
451+
* @sdp: The GFS2 superblock
452+
* @blks: The number of blocks to reserve
453+
* @taboo_blks: The number of blocks to leave free
454+
*
455+
* Try to do the same as __gfs2_log_reserve(), but fail if no more log
456+
* space is immediately available.
457+
*/
458+
static bool __gfs2_log_try_reserve(struct gfs2_sbd *sdp, unsigned int blks,
459+
unsigned int taboo_blks)
460+
{
461+
unsigned wanted = blks + taboo_blks;
462+
unsigned int free_blocks;
463+
464+
free_blocks = atomic_read(&sdp->sd_log_blks_free);
465+
while (free_blocks >= wanted) {
466+
if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks,
467+
free_blocks - blks)) {
468+
trace_gfs2_log_blocks(sdp, -blks);
469+
return true;
470+
}
471+
}
472+
return false;
473+
}
474+
475+
/**
476+
* __gfs2_log_reserve - Make a log reservation
451477
* @sdp: The GFS2 superblock
452478
* @blks: The number of blocks to reserve
479+
* @taboo_blks: The number of blocks to leave free
453480
*
454-
* Note that we never give out the last few blocks of the journal. Thats
455-
* due to the fact that there is a small number of header blocks
456-
* associated with each log flush. The exact number can't be known until
457-
* flush time, so we ensure that we have just enough free blocks at all
458-
* times to avoid running out during a log flush.
481+
* @taboo_blks is set to 0 for logd, and to GFS2_LOG_FLUSH_MIN_BLOCKS
482+
* for all other processes. This ensures that when the log is almost full,
483+
* logd will still be able to call gfs2_log_flush one more time without
484+
* blocking, which will advance the tail and make some more log space
485+
* available.
459486
*
460487
* We no longer flush the log here, instead we wake up logd to do that
461488
* for us. To avoid the thundering herd and to ensure that we deal fairly
@@ -464,19 +491,12 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
464491
* wake the next waiter on the list.
465492
*/
466493

467-
void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
494+
static void __gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks,
495+
unsigned int taboo_blks)
468496
{
469-
unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
470-
unsigned wanted = blks + reserved_blks;
497+
unsigned wanted = blks + taboo_blks;
471498
unsigned int free_blocks;
472499

473-
free_blocks = atomic_read(&sdp->sd_log_blks_free);
474-
while (free_blocks >= wanted) {
475-
if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks,
476-
free_blocks - blks))
477-
return;
478-
}
479-
480500
atomic_add(blks, &sdp->sd_log_blks_needed);
481501
for (;;) {
482502
if (current != sdp->sd_logd_process)
@@ -498,6 +518,19 @@ void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
498518
wake_up(&sdp->sd_log_waitq);
499519
}
500520

521+
/**
522+
* gfs2_log_reserve - Make a log reservation
523+
* @sdp: The GFS2 superblock
524+
* @blks: The number of blocks to reserve
525+
*/
526+
527+
void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
528+
{
529+
if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS))
530+
return;
531+
__gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS);
532+
}
533+
501534
/**
502535
* log_distance - Compute distance between two journal blocks
503536
* @sdp: The GFS2 superblock
@@ -545,7 +578,7 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
545578
*/
546579
static unsigned int calc_reserved(struct gfs2_sbd *sdp)
547580
{
548-
unsigned int reserved = 0;
581+
unsigned int reserved = GFS2_LOG_FLUSH_MIN_BLOCKS;
549582
unsigned int blocks;
550583
struct gfs2_trans *tr = sdp->sd_log_tr;
551584

@@ -557,10 +590,7 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp)
557590
}
558591

559592
if (sdp->sd_log_committed_revoke > 0)
560-
reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke);
561-
/* One for the overall header */
562-
if (reserved)
563-
reserved++;
593+
reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke) - 1;
564594
return reserved;
565595
}
566596

@@ -708,29 +738,8 @@ void gfs2_flush_revokes(struct gfs2_sbd *sdp)
708738
max_revokes += roundup(sdp->sd_log_num_revoke - sdp->sd_ldptrs,
709739
sdp->sd_inptrs);
710740
max_revokes -= sdp->sd_log_num_revoke;
711-
if (!sdp->sd_log_num_revoke) {
712-
atomic_dec(&sdp->sd_log_blks_free);
713-
/* If no blocks have been reserved, we need to also
714-
* reserve a block for the header */
715-
if (!sdp->sd_log_blks_reserved) {
716-
atomic_dec(&sdp->sd_log_blks_free);
717-
trace_gfs2_log_blocks(sdp, -2);
718-
} else {
719-
trace_gfs2_log_blocks(sdp, -1);
720-
}
721-
}
722741
gfs2_ail1_empty(sdp, max_revokes);
723742
gfs2_log_unlock(sdp);
724-
725-
if (!sdp->sd_log_num_revoke) {
726-
atomic_inc(&sdp->sd_log_blks_free);
727-
if (!sdp->sd_log_blks_reserved) {
728-
atomic_inc(&sdp->sd_log_blks_free);
729-
trace_gfs2_log_blocks(sdp, 2);
730-
} else {
731-
trace_gfs2_log_blocks(sdp, 1);
732-
}
733-
}
734743
}
735744

736745
/**
@@ -843,6 +852,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
843852
gfs2_log_incr_head(sdp);
844853
log_flush_wait(sdp);
845854
log_pull_tail(sdp);
855+
gfs2_log_update_head(sdp);
846856
}
847857

848858
/**
@@ -942,10 +952,14 @@ static void trans_drain(struct gfs2_trans *tr)
942952
void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
943953
{
944954
struct gfs2_trans *tr = NULL;
955+
unsigned int reserved_blocks = 0, used_blocks = 0;
945956
enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
957+
unsigned int first_log_head;
946958

947959
down_write(&sdp->sd_log_flush_lock);
960+
trace_gfs2_log_flush(sdp, 1, flags);
948961

962+
repeat:
949963
/*
950964
* Do this check while holding the log_flush_lock to prevent new
951965
* buffers from being added to the ail via gfs2_pin()
@@ -956,22 +970,41 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
956970
/* Log might have been flushed while we waited for the flush lock */
957971
if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags))
958972
goto out;
959-
trace_gfs2_log_flush(sdp, 1, flags);
960973

961-
if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
962-
clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
974+
first_log_head = sdp->sd_log_head;
975+
sdp->sd_log_flush_head = first_log_head;
963976

964-
sdp->sd_log_flush_head = sdp->sd_log_head;
965977
tr = sdp->sd_log_tr;
966-
if (tr) {
967-
sdp->sd_log_tr = NULL;
968-
tr->tr_first = sdp->sd_log_flush_head;
969-
if (unlikely (state == SFS_FROZEN))
970-
if (gfs2_assert_withdraw_delayed(sdp,
971-
!tr->tr_num_buf_new && !tr->tr_num_databuf_new))
972-
goto out_withdraw;
978+
if (tr || sdp->sd_log_num_revoke) {
979+
if (reserved_blocks)
980+
gfs2_log_release(sdp, reserved_blocks);
981+
reserved_blocks = sdp->sd_log_blks_reserved;
982+
if (tr) {
983+
sdp->sd_log_tr = NULL;
984+
tr->tr_first = first_log_head;
985+
if (unlikely (state == SFS_FROZEN))
986+
if (gfs2_assert_withdraw_delayed(sdp,
987+
!tr->tr_num_buf_new && !tr->tr_num_databuf_new))
988+
goto out_withdraw;
989+
}
990+
} else if (!reserved_blocks) {
991+
unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
992+
993+
reserved_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
994+
if (current == sdp->sd_logd_process)
995+
taboo_blocks = 0;
996+
997+
if (!__gfs2_log_try_reserve(sdp, reserved_blocks, taboo_blocks)) {
998+
up_write(&sdp->sd_log_flush_lock);
999+
__gfs2_log_reserve(sdp, reserved_blocks, taboo_blocks);
1000+
down_write(&sdp->sd_log_flush_lock);
1001+
goto repeat;
1002+
}
9731003
}
9741004

1005+
if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
1006+
clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
1007+
9751008
if (unlikely(state == SFS_FROZEN))
9761009
if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke))
9771010
goto out_withdraw;
@@ -993,16 +1026,13 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
9931026
log_flush_wait(sdp);
9941027
log_write_header(sdp, flags);
9951028
} else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) {
996-
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
997-
trace_gfs2_log_blocks(sdp, -1);
9981029
log_write_header(sdp, flags);
9991030
}
10001031
if (gfs2_withdrawn(sdp))
10011032
goto out_withdraw;
10021033
lops_after_commit(sdp, tr);
10031034

10041035
gfs2_log_lock(sdp);
1005-
gfs2_log_update_head(sdp);
10061036
sdp->sd_log_blks_reserved = 0;
10071037
sdp->sd_log_committed_revoke = 0;
10081038

@@ -1019,10 +1049,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
10191049
empty_ail1_list(sdp);
10201050
if (gfs2_withdrawn(sdp))
10211051
goto out_withdraw;
1022-
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
1023-
trace_gfs2_log_blocks(sdp, -1);
10241052
log_write_header(sdp, flags);
1025-
gfs2_log_update_head(sdp);
10261053
}
10271054
if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
10281055
GFS2_LOG_HEAD_FLUSH_FREEZE))
@@ -1032,12 +1059,17 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
10321059
}
10331060

10341061
out_end:
1035-
trace_gfs2_log_flush(sdp, 0, flags);
1062+
used_blocks = log_distance(sdp, sdp->sd_log_flush_head, first_log_head);
1063+
if (gfs2_assert_withdraw_delayed(sdp, used_blocks <= reserved_blocks))
1064+
goto out;
10361065
out:
1066+
if (used_blocks != reserved_blocks)
1067+
gfs2_log_release(sdp, reserved_blocks - used_blocks);
10371068
up_write(&sdp->sd_log_flush_lock);
10381069
gfs2_trans_free(sdp, tr);
10391070
if (gfs2_withdrawing(sdp))
10401071
gfs2_withdraw(sdp);
1072+
trace_gfs2_log_flush(sdp, 0, flags);
10411073
return;
10421074

10431075
out_withdraw:
@@ -1150,15 +1182,11 @@ static void gfs2_log_shutdown(struct gfs2_sbd *sdp)
11501182
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
11511183
gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
11521184

1153-
sdp->sd_log_flush_head = sdp->sd_log_head;
1154-
11551185
log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN);
1186+
log_pull_tail(sdp);
11561187

11571188
gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
11581189
gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
1159-
1160-
gfs2_log_update_head(sdp);
1161-
sdp->sd_log_tail = sdp->sd_log_head;
11621190
}
11631191

11641192
static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
@@ -1213,15 +1241,15 @@ int gfs2_logd(void *data)
12131241
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
12141242
gfs2_ail1_empty(sdp, 0);
12151243
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
1216-
GFS2_LFC_LOGD_JFLUSH_REQD);
1244+
GFS2_LFC_LOGD_JFLUSH_REQD);
12171245
}
12181246

12191247
if (gfs2_ail_flush_reqd(sdp)) {
12201248
gfs2_ail1_start(sdp);
12211249
gfs2_ail1_wait(sdp);
12221250
gfs2_ail1_empty(sdp, 0);
12231251
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
1224-
GFS2_LFC_LOGD_AIL_FLUSH_REQD);
1252+
GFS2_LFC_LOGD_AIL_FLUSH_REQD);
12251253
}
12261254

12271255
t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;

fs/gfs2/log.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@
1313
#include "incore.h"
1414
#include "inode.h"
1515

16+
/*
17+
* The minimum amount of log space required for a log flush is one block for
18+
* revokes and one block for the log header. Log flushes other than
19+
* GFS2_LOG_HEAD_FLUSH_NORMAL may write one or two more log headers.
20+
*/
21+
#define GFS2_LOG_FLUSH_MIN_BLOCKS 4
22+
1623
/**
1724
* gfs2_log_lock - acquire the right to mess with the log manager
1825
* @sdp: the filesystem

fs/gfs2/trans.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
5353
tr->tr_ip = ip;
5454
tr->tr_blocks = blocks;
5555
tr->tr_revokes = revokes;
56-
tr->tr_reserved = 1;
56+
tr->tr_reserved = GFS2_LOG_FLUSH_MIN_BLOCKS;
5757
if (blocks) {
5858
/*
5959
* The reserved blocks are either used for data or metadata.
@@ -63,7 +63,7 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
6363
tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp));
6464
}
6565
if (revokes)
66-
tr->tr_reserved += gfs2_struct2blk(sdp, revokes);
66+
tr->tr_reserved += gfs2_struct2blk(sdp, revokes) - 1;
6767
INIT_LIST_HEAD(&tr->tr_databuf);
6868
INIT_LIST_HEAD(&tr->tr_buf);
6969
INIT_LIST_HEAD(&tr->tr_list);

0 commit comments

Comments
 (0)