Skip to content

Commit 11d763f

Browse files
author
Andreas Gruenbacher
committed
gfs2: Retries missing in gfs2_{rename,exchange}
Fix a bug in gfs2's asynchronous glock handling for rename and exchange operations. The original async implementation from commit ad26967 ("gfs2: Use async glocks for rename") mentioned that retries were needed but never implemented them, causing operations to fail with -ESTALE instead of retrying on timeout. Also makes the waiting interruptible. In addition, the timeouts used were too high for situations in which timing out is a rare but expected scenario. Switch to shorter timeouts with randomization and exponentional backoff. Fixes: ad26967 ("gfs2: Use async glocks for rename") Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
1 parent f8f0424 commit 11d763f

3 files changed

Lines changed: 43 additions & 14 deletions

File tree

fs/gfs2/glock.c

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,31 +1284,45 @@ static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs)
12841284
* gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions
12851285
* @num_gh: the number of holders in the array
12861286
* @ghs: the glock holder array
1287+
* @retries: number of retries attempted so far
12871288
*
12881289
* Returns: 0 on success, meaning all glocks have been granted and are held.
12891290
* -ESTALE if the request timed out, meaning all glocks were released,
12901291
* and the caller should retry the operation.
12911292
*/
12921293

1293-
int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
1294+
int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs,
1295+
unsigned int retries)
12941296
{
12951297
struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd;
1296-
int i, ret = 0, timeout = 0;
12971298
unsigned long start_time = jiffies;
1299+
int i, ret = 0;
1300+
long timeout;
12981301

12991302
might_sleep();
1300-
/*
1301-
* Total up the (minimum hold time * 2) of all glocks and use that to
1302-
* determine the max amount of time we should wait.
1303-
*/
1304-
for (i = 0; i < num_gh; i++)
1305-
timeout += ghs[i].gh_gl->gl_hold_time << 1;
13061303

1307-
if (!wait_event_timeout(sdp->sd_async_glock_wait,
1304+
timeout = GL_GLOCK_MIN_HOLD;
1305+
if (retries) {
1306+
unsigned int max_shift;
1307+
long incr;
1308+
1309+
/* Add a random delay and increase the timeout exponentially. */
1310+
max_shift = BITS_PER_LONG - 2 - __fls(GL_GLOCK_HOLD_INCR);
1311+
incr = min(GL_GLOCK_HOLD_INCR << min(retries - 1, max_shift),
1312+
10 * HZ - GL_GLOCK_MIN_HOLD);
1313+
schedule_timeout_interruptible(get_random_long() % (incr / 3));
1314+
if (signal_pending(current))
1315+
goto interrupted;
1316+
timeout += (incr / 3) + get_random_long() % (incr / 3);
1317+
}
1318+
1319+
if (!wait_event_interruptible_timeout(sdp->sd_async_glock_wait,
13081320
!glocks_pending(num_gh, ghs), timeout)) {
13091321
ret = -ESTALE; /* request timed out. */
13101322
goto out;
13111323
}
1324+
if (signal_pending(current))
1325+
goto interrupted;
13121326

13131327
for (i = 0; i < num_gh; i++) {
13141328
struct gfs2_holder *gh = &ghs[i];
@@ -1332,6 +1346,10 @@ int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
13321346
}
13331347
}
13341348
return ret;
1349+
1350+
interrupted:
1351+
ret = -EINTR;
1352+
goto out;
13351353
}
13361354

13371355
/**

fs/gfs2/glock.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,8 @@ int gfs2_glock_poll(struct gfs2_holder *gh);
204204
int gfs2_instantiate(struct gfs2_holder *gh);
205205
int gfs2_glock_holder_ready(struct gfs2_holder *gh);
206206
int gfs2_glock_wait(struct gfs2_holder *gh);
207-
int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs);
207+
int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs,
208+
unsigned int retries);
208209
void gfs2_glock_dq(struct gfs2_holder *gh);
209210
void gfs2_glock_dq_wait(struct gfs2_holder *gh);
210211
void gfs2_glock_dq_uninit(struct gfs2_holder *gh);

fs/gfs2/inode.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1495,7 +1495,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
14951495
unsigned int num_gh;
14961496
int dir_rename = 0;
14971497
struct gfs2_diradd da = { .nr_blocks = 0, .save_loc = 0, };
1498-
unsigned int x;
1498+
unsigned int retries = 0, x;
14991499
int error;
15001500

15011501
gfs2_holder_mark_uninitialized(&r_gh);
@@ -1545,12 +1545,17 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
15451545
num_gh++;
15461546
}
15471547

1548+
again:
15481549
for (x = 0; x < num_gh; x++) {
15491550
error = gfs2_glock_nq(ghs + x);
15501551
if (error)
15511552
goto out_gunlock;
15521553
}
1553-
error = gfs2_glock_async_wait(num_gh, ghs);
1554+
error = gfs2_glock_async_wait(num_gh, ghs, retries);
1555+
if (error == -ESTALE) {
1556+
retries++;
1557+
goto again;
1558+
}
15541559
if (error)
15551560
goto out_gunlock;
15561561

@@ -1739,7 +1744,7 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
17391744
struct gfs2_sbd *sdp = GFS2_SB(odir);
17401745
struct gfs2_holder ghs[4], r_gh;
17411746
unsigned int num_gh;
1742-
unsigned int x;
1747+
unsigned int retries = 0, x;
17431748
umode_t old_mode = oip->i_inode.i_mode;
17441749
umode_t new_mode = nip->i_inode.i_mode;
17451750
int error;
@@ -1783,13 +1788,18 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
17831788
gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC, ghs + num_gh);
17841789
num_gh++;
17851790

1791+
again:
17861792
for (x = 0; x < num_gh; x++) {
17871793
error = gfs2_glock_nq(ghs + x);
17881794
if (error)
17891795
goto out_gunlock;
17901796
}
17911797

1792-
error = gfs2_glock_async_wait(num_gh, ghs);
1798+
error = gfs2_glock_async_wait(num_gh, ghs, retries);
1799+
if (error == -ESTALE) {
1800+
retries++;
1801+
goto again;
1802+
}
17931803
if (error)
17941804
goto out_gunlock;
17951805

0 commit comments

Comments
 (0)