Skip to content

Commit 725d0e9

Browse files
author
Andreas Gruenbacher
committed
gfs2: Add per-reservation reserved block accounting
Add a rs_reserved field to struct gfs2_blkreserv to keep track of the number of blocks reserved by this particular reservation, and a rd_reserved field to struct gfs2_rgrpd to keep track of the total number of reserved blocks in the resource group. Those blocks are exclusively reserved, as opposed to the rs_requested / rd_requested blocks which are tracked in the reservation tree (rd_rstree) and which can be stolen if necessary. When making a reservation with gfs2_inplace_reserve, rs_reserved is set to somewhere between ap->min_target and ap->target depending on the number of free blocks in the resource group. When allocating blocks with gfs2_alloc_blocks, rs_reserved is decremented accordingly. Eventually, any reserved but not consumed blocks are returned to the resource group by gfs2_inplace_release. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
1 parent 07974d2 commit 725d0e9

5 files changed

Lines changed: 82 additions & 28 deletions

File tree

fs/gfs2/file.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,8 +1115,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
11151115
goto out_qunlock;
11161116

11171117
/* check if the selected rgrp limits our max_blks further */
1118-
if (ap.allowed && ap.allowed < max_blks)
1119-
max_blks = ap.allowed;
1118+
if (ip->i_res.rs_reserved < max_blks)
1119+
max_blks = ip->i_res.rs_reserved;
11201120

11211121
/* Almost done. Calculate bytes that can be written using
11221122
* max_blks. We also recompute max_bytes, data_blocks and

fs/gfs2/incore.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ struct gfs2_rgrpd {
107107
u32 rd_bitbytes; /* number of bytes in data bitmaps */
108108
u32 rd_free;
109109
u32 rd_requested; /* number of blocks in rd_rstree */
110+
u32 rd_reserved; /* number of reserved blocks */
110111
u32 rd_free_clone;
111112
u32 rd_dinodes;
112113
u64 rd_igeneration;
@@ -292,6 +293,7 @@ struct gfs2_blkreserv {
292293
struct gfs2_rgrpd *rs_rgd;
293294
u64 rs_start;
294295
u32 rs_requested;
296+
u32 rs_reserved; /* number of reserved blocks */
295297
};
296298

297299
/*

fs/gfs2/lops.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
8484
bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
8585
clear_bit(GBF_FULL, &bi->bi_flags);
8686
rgd->rd_free_clone = rgd->rd_free;
87+
BUG_ON(rgd->rd_free_clone < rgd->rd_reserved);
8788
rgd->rd_extfail_pt = rgd->rd_free;
8889
}
8990

fs/gfs2/rgrp.c

Lines changed: 60 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1229,6 +1229,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
12291229
rgrp_set_bitmap_flags(rgd);
12301230
rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
12311231
rgd->rd_free_clone = rgd->rd_free;
1232+
BUG_ON(rgd->rd_reserved);
12321233
/* max out the rgrp allocation failure point */
12331234
rgd->rd_extfail_pt = rgd->rd_free;
12341235
}
@@ -1278,6 +1279,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
12781279
rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
12791280
rgrp_set_bitmap_flags(rgd);
12801281
rgd->rd_free_clone = rgd->rd_free;
1282+
BUG_ON(rgd->rd_reserved);
12811283
/* max out the rgrp allocation failure point */
12821284
rgd->rd_extfail_pt = rgd->rd_free;
12831285
rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
@@ -1568,17 +1570,26 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
15681570
u64 goal;
15691571
struct gfs2_blkreserv *rs = &ip->i_res;
15701572
u32 extlen;
1571-
u32 free_blocks = rgd_free(rgd, rs);
1573+
u32 free_blocks, blocks_available;
15721574
int ret;
15731575
struct inode *inode = &ip->i_inode;
15741576

1577+
spin_lock(&rgd->rd_rsspin);
1578+
free_blocks = rgd_free(rgd, rs);
1579+
if (rgd->rd_free_clone < rgd->rd_requested)
1580+
free_blocks = 0;
1581+
blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
1582+
if (rgd == rs->rs_rgd)
1583+
blocks_available += rs->rs_reserved;
1584+
spin_unlock(&rgd->rd_rsspin);
1585+
15751586
if (S_ISDIR(inode->i_mode))
15761587
extlen = 1;
15771588
else {
15781589
extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target);
15791590
extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
15801591
}
1581-
if ((rgd->rd_free_clone < rgd->rd_requested) || (free_blocks < extlen))
1592+
if (free_blocks < extlen || blocks_available < extlen)
15821593
return;
15831594

15841595
/* Find bitmap block that contains bits for goal block */
@@ -2027,8 +2038,7 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
20272038
* We try our best to find an rgrp that has at least ap->target blocks
20282039
* available. After a couple of passes (loops == 2), the prospects of finding
20292040
* such an rgrp diminish. At this stage, we return the first rgrp that has
2030-
* at least ap->min_target blocks available. Either way, we set ap->allowed to
2031-
* the number of blocks available in the chosen rgrp.
2041+
* at least ap->min_target blocks available.
20322042
*
20332043
* Returns: 0 on success,
20342044
* -ENOMEM if a suitable rgrp can't be found
@@ -2044,7 +2054,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
20442054
u64 last_unlinked = NO_BLOCK;
20452055
u32 target = ap->target;
20462056
int loops = 0;
2047-
u32 free_blocks, skip = 0;
2057+
u32 free_blocks, blocks_available, skip = 0;
2058+
2059+
BUG_ON(rs->rs_reserved);
20482060

20492061
if (sdp->sd_args.ar_rgrplvb)
20502062
flags |= GL_SKIP;
@@ -2065,6 +2077,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
20652077
return -EBADSLT;
20662078

20672079
while (loops < 3) {
2080+
struct gfs2_rgrpd *rgd;
2081+
20682082
rg_locked = 1;
20692083

20702084
if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
@@ -2115,11 +2129,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
21152129
goto check_rgrp;
21162130

21172131
/* If rgrp has enough free space, use it */
2118-
free_blocks = rgd_free(rs->rs_rgd, rs);
2119-
if (free_blocks >= target) {
2120-
ap->allowed = free_blocks;
2121-
return 0;
2132+
rgd = rs->rs_rgd;
2133+
spin_lock(&rgd->rd_rsspin);
2134+
free_blocks = rgd_free(rgd, rs);
2135+
blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
2136+
if (free_blocks < target || blocks_available < target) {
2137+
spin_unlock(&rgd->rd_rsspin);
2138+
goto check_rgrp;
21222139
}
2140+
rs->rs_reserved = ap->target;
2141+
if (rs->rs_reserved > blocks_available)
2142+
rs->rs_reserved = blocks_available;
2143+
rgd->rd_reserved += rs->rs_reserved;
2144+
spin_unlock(&rgd->rd_rsspin);
2145+
return 0;
21232146
check_rgrp:
21242147
/* Check for unlinked inodes which can be reclaimed */
21252148
if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK)
@@ -2172,6 +2195,17 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
21722195

21732196
void gfs2_inplace_release(struct gfs2_inode *ip)
21742197
{
2198+
struct gfs2_blkreserv *rs = &ip->i_res;
2199+
2200+
if (rs->rs_reserved) {
2201+
struct gfs2_rgrpd *rgd = rs->rs_rgd;
2202+
2203+
spin_lock(&rgd->rd_rsspin);
2204+
BUG_ON(rgd->rd_reserved < rs->rs_reserved);
2205+
rgd->rd_reserved -= rs->rs_reserved;
2206+
spin_unlock(&rgd->rd_rsspin);
2207+
rs->rs_reserved = 0;
2208+
}
21752209
if (gfs2_holder_initialized(&ip->i_rgd_gh))
21762210
gfs2_glock_dq_uninit(&ip->i_rgd_gh);
21772211
}
@@ -2259,11 +2293,11 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
22592293
struct gfs2_blkreserv *trs;
22602294
const struct rb_node *n;
22612295

2262-
gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
2296+
gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n",
22632297
fs_id_buf,
22642298
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
22652299
rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
2266-
rgd->rd_requested, rgd->rd_extfail_pt);
2300+
rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt);
22672301
if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
22682302
struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
22692303

@@ -2310,7 +2344,8 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
23102344
struct gfs2_blkreserv *rs = &ip->i_res;
23112345
struct gfs2_rgrpd *rgd = rbm->rgd;
23122346

2313-
spin_lock(&rgd->rd_rsspin);
2347+
BUG_ON(rs->rs_reserved < len);
2348+
rs->rs_reserved -= len;
23142349
if (gfs2_rs_active(rs)) {
23152350
u64 start = gfs2_rbm_to_block(rbm);
23162351

@@ -2324,15 +2359,13 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
23242359
trace_gfs2_rs(rs, TRACE_RS_CLAIM);
23252360
if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
23262361
rs->rs_requested)
2327-
goto out;
2362+
return;
23282363
/* We used up our block reservation, so we should
23292364
reserve more blocks next time. */
23302365
atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
23312366
}
23322367
__rs_deltree(rs);
23332368
}
2334-
out:
2335-
spin_unlock(&rgd->rd_rsspin);
23362369
}
23372370

23382371
/**
@@ -2386,6 +2419,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
23862419
u32 minext = 1;
23872420
int error = -ENOSPC;
23882421

2422+
BUG_ON(ip->i_res.rs_reserved < *nblocks);
2423+
23892424
if (gfs2_rs_active(&ip->i_res)) {
23902425
gfs2_set_alloc_start(&rbm, ip, dinode);
23912426
error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
@@ -2407,8 +2442,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
24072442
gfs2_alloc_extent(&rbm, dinode, nblocks);
24082443
block = gfs2_rbm_to_block(&rbm);
24092444
rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
2410-
if (gfs2_rs_active(&ip->i_res))
2411-
gfs2_adjust_reservation(ip, &rbm, *nblocks);
24122445
if (!dinode) {
24132446
ip->i_goal = block + *nblocks - 1;
24142447
error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -2421,12 +2454,20 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
24212454
brelse(dibh);
24222455
}
24232456
}
2424-
if (rbm.rgd->rd_free < *nblocks) {
2457+
spin_lock(&rbm.rgd->rd_rsspin);
2458+
gfs2_adjust_reservation(ip, &rbm, *nblocks);
2459+
if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) {
24252460
fs_warn(sdp, "nblocks=%u\n", *nblocks);
2461+
spin_unlock(&rbm.rgd->rd_rsspin);
24262462
goto rgrp_error;
24272463
}
2428-
2464+
BUG_ON(rbm.rgd->rd_reserved < *nblocks);
2465+
BUG_ON(rbm.rgd->rd_free_clone < *nblocks);
2466+
BUG_ON(rbm.rgd->rd_free < *nblocks);
2467+
rbm.rgd->rd_reserved -= *nblocks;
2468+
rbm.rgd->rd_free_clone -= *nblocks;
24292469
rbm.rgd->rd_free -= *nblocks;
2470+
spin_unlock(&rbm.rgd->rd_rsspin);
24302471
if (dinode) {
24312472
rbm.rgd->rd_dinodes++;
24322473
*generation = rbm.rgd->rd_igeneration++;
@@ -2443,7 +2484,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
24432484

24442485
gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
24452486

2446-
rbm.rgd->rd_free_clone -= *nblocks;
24472487
trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
24482488
dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
24492489
*bn = block;

fs/gfs2/trace_gfs2.h

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ TRACE_EVENT(gfs2_block_alloc,
561561
__field( u64, rd_addr )
562562
__field( u32, rd_free_clone )
563563
__field( u32, rd_requested )
564+
__field( u32, rd_reserved )
564565
),
565566

566567
TP_fast_assign(
@@ -572,16 +573,19 @@ TRACE_EVENT(gfs2_block_alloc,
572573
__entry->rd_addr = rgd->rd_addr;
573574
__entry->rd_free_clone = rgd->rd_free_clone;
574575
__entry->rd_requested = rgd->rd_requested;
576+
__entry->rd_reserved = rgd->rd_reserved;
575577
),
576578

577-
TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
579+
TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rq:%u rr:%u",
578580
MAJOR(__entry->dev), MINOR(__entry->dev),
579581
(unsigned long long)__entry->inum,
580582
(unsigned long long)__entry->start,
581583
(unsigned long)__entry->len,
582584
block_state_name(__entry->block_state),
583585
(unsigned long long)__entry->rd_addr,
584-
__entry->rd_free_clone, (unsigned long)__entry->rd_requested)
586+
__entry->rd_free_clone,
587+
__entry->rd_requested,
588+
__entry->rd_reserved)
585589
);
586590

587591
/* Keep track of multi-block reservations as they are allocated/freed */
@@ -596,9 +600,11 @@ TRACE_EVENT(gfs2_rs,
596600
__field( u64, rd_addr )
597601
__field( u32, rd_free_clone )
598602
__field( u32, rd_requested )
603+
__field( u32, rd_reserved )
599604
__field( u64, inum )
600605
__field( u64, start )
601606
__field( u32, requested )
607+
__field( u32, reserved )
602608
__field( u8, func )
603609
),
604610

@@ -607,21 +613,26 @@ TRACE_EVENT(gfs2_rs,
607613
__entry->rd_addr = rs->rs_rgd->rd_addr;
608614
__entry->rd_free_clone = rs->rs_rgd->rd_free_clone;
609615
__entry->rd_requested = rs->rs_rgd->rd_requested;
616+
__entry->rd_reserved = rs->rs_rgd->rd_reserved;
610617
__entry->inum = container_of(rs, struct gfs2_inode,
611618
i_res)->i_no_addr;
612619
__entry->start = rs->rs_start;
613620
__entry->requested = rs->rs_requested;
621+
__entry->reserved = rs->rs_reserved;
614622
__entry->func = func;
615623
),
616624

617-
TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
625+
TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%u rq:%u rr:%u %s q:%u r:%u",
618626
MAJOR(__entry->dev), MINOR(__entry->dev),
619627
(unsigned long long)__entry->inum,
620628
(unsigned long long)__entry->start,
621629
(unsigned long long)__entry->rd_addr,
622-
(unsigned long)__entry->rd_free_clone,
623-
(unsigned long)__entry->rd_requested,
624-
rs_func_name(__entry->func), (unsigned long)__entry->requested)
630+
__entry->rd_free_clone,
631+
__entry->rd_requested,
632+
__entry->rd_reserved,
633+
rs_func_name(__entry->func),
634+
__entry->requested,
635+
__entry->reserved)
625636
);
626637

627638
#endif /* _TRACE_GFS2_H */

0 commit comments

Comments
 (0)