Skip to content

Commit 4928c36

Browse files
author
Andreas Gruenbacher
committed
gfs2: Do not cancel internal demote requests
Trying to change the state of a glock may result in a "conversion deadlock" error, indicating that the requested state transition would cause a deadlock. In this case, we unlock and retry the state change. It makes no sense to try canceling those unlock requests, but the current code is not aware of that. In addition, if a locking request is canceled shortly after it is made, the cancelation request can currently overtake the locking request. This may result in deadlocks. Fix both of these bugs by repurposing the GLF_PENDING_REPLY flag into a GLF_MAY_CANCEL flag which is set only when the current locking request can be canceled. When trying to cancel a locking request in gfs2_glock_dq(), wait for this flag to be set. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
1 parent 5e33199 commit 4928c36

3 files changed

Lines changed: 35 additions & 15 deletions

File tree

fs/gfs2/glock.c

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ struct gfs2_glock_iter {
6060

6161
typedef void (*glock_examiner) (struct gfs2_glock * gl);
6262

63-
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
63+
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh,
64+
unsigned int target, bool may_cancel);
6465
static void request_demote(struct gfs2_glock *gl, unsigned int state,
6566
unsigned long delay, bool remote);
6667

@@ -600,12 +601,14 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
600601
switch(gl->gl_state) {
601602
/* Unlocked due to conversion deadlock, try again */
602603
case LM_ST_UNLOCKED:
603-
do_xmote(gl, gh, gl->gl_target);
604+
do_xmote(gl, gh, gl->gl_target,
605+
!test_bit(GLF_DEMOTE_IN_PROGRESS,
606+
&gl->gl_flags));
604607
break;
605608
/* Conversion fails, unlock and try again */
606609
case LM_ST_SHARED:
607610
case LM_ST_DEFERRED:
608-
do_xmote(gl, gh, LM_ST_UNLOCKED);
611+
do_xmote(gl, gh, LM_ST_UNLOCKED, false);
609612
break;
610613
default: /* Everything else */
611614
fs_err(gl->gl_name.ln_sbd,
@@ -638,19 +641,20 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
638641
}
639642
out:
640643
if (!test_bit(GLF_CANCELING, &gl->gl_flags))
641-
clear_bit(GLF_LOCK, &gl->gl_flags);
644+
clear_and_wake_up_bit(GLF_LOCK, &gl->gl_flags);
642645
}
643646

644647
/**
645648
* do_xmote - Calls the DLM to change the state of a lock
646649
* @gl: The lock state
647650
* @gh: The holder (only for promotes)
648651
* @target: The target lock state
652+
* @may_cancel: Operation may be canceled
649653
*
650654
*/
651655

652656
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh,
653-
unsigned int target)
657+
unsigned int target, bool may_cancel)
654658
__releases(&gl->gl_lockref.lock)
655659
__acquires(&gl->gl_lockref.lock)
656660
{
@@ -703,17 +707,20 @@ __acquires(&gl->gl_lockref.lock)
703707
}
704708

705709
if (ls->ls_ops->lm_lock) {
706-
set_bit(GLF_PENDING_REPLY, &gl->gl_flags);
707710
spin_unlock(&gl->gl_lockref.lock);
708711
ret = ls->ls_ops->lm_lock(gl, target, gh ? gh->gh_flags : 0);
709712
spin_lock(&gl->gl_lockref.lock);
710713

711714
if (!ret) {
715+
if (may_cancel) {
716+
set_bit(GLF_MAY_CANCEL, &gl->gl_flags);
717+
smp_mb__after_atomic();
718+
wake_up_bit(&gl->gl_flags, GLF_LOCK);
719+
}
712720
/* The operation will be completed asynchronously. */
713721
gl->gl_lockref.count++;
714722
return;
715723
}
716-
clear_bit(GLF_PENDING_REPLY, &gl->gl_flags);
717724

718725
if (ret == -ENODEV) {
719726
/*
@@ -774,7 +781,7 @@ __acquires(&gl->gl_lockref.lock)
774781
GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
775782
gl->gl_target = gl->gl_demote_state;
776783
set_bit(GLF_LOCK, &gl->gl_flags);
777-
do_xmote(gl, NULL, gl->gl_target);
784+
do_xmote(gl, NULL, gl->gl_target, false);
778785
return;
779786
}
780787

@@ -791,7 +798,7 @@ __acquires(&gl->gl_lockref.lock)
791798
if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
792799
do_error(gl, 0); /* Fail queued try locks */
793800
set_bit(GLF_LOCK, &gl->gl_flags);
794-
do_xmote(gl, gh, gl->gl_target);
801+
do_xmote(gl, gh, gl->gl_target, true);
795802
return;
796803

797804
out_sched:
@@ -1588,6 +1595,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
15881595
{
15891596
struct gfs2_glock *gl = gh->gh_gl;
15901597

1598+
again:
15911599
spin_lock(&gl->gl_lockref.lock);
15921600
if (!gfs2_holder_queued(gh)) {
15931601
/*
@@ -1602,13 +1610,25 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
16021610
test_bit(GLF_LOCK, &gl->gl_flags) &&
16031611
!test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
16041612
!test_bit(GLF_CANCELING, &gl->gl_flags)) {
1613+
if (!test_bit(GLF_MAY_CANCEL, &gl->gl_flags)) {
1614+
struct wait_queue_head *wq;
1615+
DEFINE_WAIT(wait);
1616+
1617+
wq = bit_waitqueue(&gl->gl_flags, GLF_LOCK);
1618+
prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
1619+
spin_unlock(&gl->gl_lockref.lock);
1620+
schedule();
1621+
finish_wait(wq, &wait);
1622+
goto again;
1623+
}
1624+
16051625
set_bit(GLF_CANCELING, &gl->gl_flags);
16061626
spin_unlock(&gl->gl_lockref.lock);
16071627
gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl);
16081628
wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
16091629
spin_lock(&gl->gl_lockref.lock);
16101630
clear_bit(GLF_CANCELING, &gl->gl_flags);
1611-
clear_bit(GLF_LOCK, &gl->gl_flags);
1631+
clear_and_wake_up_bit(GLF_LOCK, &gl->gl_flags);
16121632
if (!gfs2_holder_queued(gh))
16131633
goto out;
16141634
}
@@ -1838,7 +1858,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
18381858
struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
18391859

18401860
spin_lock(&gl->gl_lockref.lock);
1841-
clear_bit(GLF_PENDING_REPLY, &gl->gl_flags);
1861+
clear_bit(GLF_MAY_CANCEL, &gl->gl_flags);
18421862
gl->gl_reply = ret;
18431863

18441864
if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
@@ -2245,8 +2265,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
22452265
*p++ = 'y';
22462266
if (test_bit(GLF_LFLUSH, gflags))
22472267
*p++ = 'f';
2248-
if (test_bit(GLF_PENDING_REPLY, gflags))
2249-
*p++ = 'R';
2268+
if (test_bit(GLF_MAY_CANCEL, gflags))
2269+
*p++ = 'c';
22502270
if (test_bit(GLF_HAVE_REPLY, gflags))
22512271
*p++ = 'r';
22522272
if (test_bit(GLF_INITIAL, gflags))

fs/gfs2/incore.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ enum {
326326
GLF_BLOCKING = 15,
327327
GLF_TRY_TO_EVICT = 17, /* iopen glocks only */
328328
GLF_VERIFY_DELETE = 18, /* iopen glocks only */
329-
GLF_PENDING_REPLY = 19,
329+
GLF_MAY_CANCEL = 19,
330330
GLF_DEFER_DELETE = 20, /* iopen glocks only */
331331
GLF_CANCELING = 21,
332332
};

fs/gfs2/trace_gfs2.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
{(1UL << GLF_DEMOTE_IN_PROGRESS), "p" }, \
5353
{(1UL << GLF_DIRTY), "y" }, \
5454
{(1UL << GLF_LFLUSH), "f" }, \
55-
{(1UL << GLF_PENDING_REPLY), "R" }, \
55+
{(1UL << GLF_MAY_CANCEL), "c" }, \
5656
{(1UL << GLF_HAVE_REPLY), "r" }, \
5757
{(1UL << GLF_INITIAL), "a" }, \
5858
{(1UL << GLF_HAVE_FROZEN_REPLY), "F" }, \

0 commit comments

Comments
 (0)