Skip to content

Commit bbbf152

Browse files
author
Andreas Gruenbacher
committed
gfs2: New gfs2_withdraw_helper
Currently, when a gfs2 filesystem is withdrawn, an "offline" uevent is triggered that invokes gfs2-util's gfs2_withdraw_helper script. The purpose of this script is to deactivate the filesystem's block device so that it can be withdrawn immediately, even before all the filesystem's caches have been discarded. The script provided by gfs2-utils never did anything useful, and there was no way for it to report back its status to the kernel. To fix that, extend the gfs2_withdraw_helper mechanism so that the script can report one of the following results by writing the corresponding value into "/sys$DEVPATH/lock_module/withdraw": 0 - The shared block device has been marked inactive. Future write operations will fail. 1 - The shared block device may still be active and carry out write operations. If the "offline" uevent isn't reacted upon within the timeout configured in /sys$DEVPATH/tune/withdraw_helper_timeout (default 5 seconds), the event handler is assumed to have failed. In addition, add an additional "errors=deactivate" mount option. With these changes, if fatal errors are detected on a gfs2 filesystem and the filesystem is mounted with the "errors=panic" option, the kernel will panic immediately. Otherwise, an attempt will be made to deactivate the underlying block device. If successful, the kernel will release all cluster-wide locks immediately so that the rest of the cluster can continue. If unsuccessful, the kernel will either panic ("errors=deactivate"), or it will purge all filesystem I/O before releasing all cluster-wide locks ("errors=withdraw"). Note that the gfs2_withdraw_helper script still needs to be fixed to take advantage of these improvements. It could be changed to use a mechanism like LVM Persistent Reservations. "dmsetup suspend" is not a suitable mechanism as it infinitely postpones I/O operations, which may prevent withdraw from completing. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
1 parent 0e10da6 commit bbbf152

5 files changed

Lines changed: 90 additions & 28 deletions

File tree

fs/gfs2/incore.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -537,8 +537,7 @@ struct gfs2_statfs_change_host {
537537

538538
#define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW
539539
#define GFS2_ERRORS_WITHDRAW 0
540-
#define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */
541-
#define GFS2_ERRORS_RO 2 /* place holder for future feature */
540+
#define GFS2_ERRORS_DEACTIVATE 1
542541
#define GFS2_ERRORS_PANIC 3
543542

544543
struct gfs2_args {
@@ -554,7 +553,7 @@ struct gfs2_args {
554553
unsigned int ar_data:2; /* ordered/writeback */
555554
unsigned int ar_meta:1; /* mount metafs */
556555
unsigned int ar_discard:1; /* discard requests */
557-
unsigned int ar_errors:2; /* errors=withdraw | panic */
556+
unsigned int ar_errors:2; /* errors=withdraw | deactivate | panic */
558557
unsigned int ar_nobarrier:1; /* do not send barriers */
559558
unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */
560559
unsigned int ar_got_rgrplvb:1; /* Was the rgrplvb opt given? */
@@ -580,6 +579,7 @@ struct gfs2_tune {
580579
unsigned int gt_complain_secs;
581580
unsigned int gt_statfs_quantum;
582581
unsigned int gt_statfs_slow;
582+
unsigned int gt_withdraw_helper_timeout;
583583
};
584584

585585
enum {
@@ -711,7 +711,8 @@ struct gfs2_sbd {
711711
wait_queue_head_t sd_async_glock_wait;
712712
atomic_t sd_glock_disposal;
713713
struct completion sd_locking_init;
714-
struct completion sd_wdack;
714+
struct completion sd_withdraw_helper;
715+
int sd_withdraw_helper_status;
715716
struct delayed_work sd_control_work;
716717

717718
/* Inode Stuff */

fs/gfs2/ops_fstype.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
6060
gt->gt_new_files_jdata = 0;
6161
gt->gt_max_readahead = BIT(18);
6262
gt->gt_complain_secs = 10;
63+
gt->gt_withdraw_helper_timeout = 5;
6364
}
6465

6566
void free_sbd(struct gfs2_sbd *sdp)
@@ -92,7 +93,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
9293
init_waitqueue_head(&sdp->sd_async_glock_wait);
9394
atomic_set(&sdp->sd_glock_disposal, 0);
9495
init_completion(&sdp->sd_locking_init);
95-
init_completion(&sdp->sd_wdack);
96+
init_completion(&sdp->sd_withdraw_helper);
9697
spin_lock_init(&sdp->sd_statfs_spin);
9798

9899
spin_lock_init(&sdp->sd_rindex_spin);
@@ -1395,12 +1396,14 @@ static const struct constant_table gfs2_param_data[] = {
13951396
};
13961397

13971398
enum opt_errors {
1398-
Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
1399-
Opt_errors_panic = GFS2_ERRORS_PANIC,
1399+
Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
1400+
Opt_errors_deactivate = GFS2_ERRORS_DEACTIVATE,
1401+
Opt_errors_panic = GFS2_ERRORS_PANIC,
14001402
};
14011403

14021404
static const struct constant_table gfs2_param_errors[] = {
14031405
{"withdraw", Opt_errors_withdraw },
1406+
{"deactivate", Opt_errors_deactivate },
14041407
{"panic", Opt_errors_panic },
14051408
{}
14061409
};

fs/gfs2/super.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,6 +1145,9 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
11451145
case GFS2_ERRORS_WITHDRAW:
11461146
state = "withdraw";
11471147
break;
1148+
case GFS2_ERRORS_DEACTIVATE:
1149+
state = "deactivate";
1150+
break;
11481151
case GFS2_ERRORS_PANIC:
11491152
state = "panic";
11501153
break;

fs/gfs2/sys.c

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -425,26 +425,20 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
425425
return len;
426426
}
427427

428-
static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf)
429-
{
430-
int val = completion_done(&sdp->sd_wdack) ? 1 : 0;
431-
432-
return sprintf(buf, "%d\n", val);
433-
}
434-
435-
static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
428+
static ssize_t withdraw_helper_status_store(struct gfs2_sbd *sdp,
429+
const char *buf,
430+
size_t len)
436431
{
437432
int ret, val;
438433

439434
ret = kstrtoint(buf, 0, &val);
440435
if (ret)
441436
return ret;
442-
443-
if ((val == 1) &&
444-
!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
445-
complete(&sdp->sd_wdack);
446-
else
437+
if (val < 0 || val > 1)
447438
return -EINVAL;
439+
440+
sdp->sd_withdraw_helper_status = val;
441+
complete(&sdp->sd_withdraw_helper);
448442
return len;
449443
}
450444

@@ -591,7 +585,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
591585

592586
GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
593587
GDLM_ATTR(block, 0644, block_show, block_store);
594-
GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store);
588+
GDLM_ATTR(withdraw, 0200, NULL, withdraw_helper_status_store);
595589
GDLM_ATTR(jid, 0644, jid_show, jid_store);
596590
GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store);
597591
GDLM_ATTR(first_done, 0444, first_done_show, NULL);
@@ -690,6 +684,7 @@ TUNE_ATTR(statfs_slow, 0);
690684
TUNE_ATTR(new_files_jdata, 0);
691685
TUNE_ATTR(statfs_quantum, 1);
692686
TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
687+
TUNE_ATTR(withdraw_helper_timeout, 1);
693688

694689
static struct attribute *tune_attrs[] = {
695690
&tune_attr_quota_warn_period.attr,
@@ -700,6 +695,7 @@ static struct attribute *tune_attrs[] = {
700695
&tune_attr_statfs_quantum.attr,
701696
&tune_attr_quota_scale.attr,
702697
&tune_attr_new_files_jdata.attr,
698+
&tune_attr_withdraw_helper_timeout.attr,
703699
NULL,
704700
};
705701

fs/gfs2/util.c

Lines changed: 66 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -171,32 +171,91 @@ void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
171171
va_end(args);
172172
}
173173

174+
/**
175+
* gfs2_offline_uevent - run gfs2_withdraw_helper
176+
* @sdp: The GFS2 superblock
177+
*/
178+
static bool gfs2_offline_uevent(struct gfs2_sbd *sdp)
179+
{
180+
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
181+
long timeout;
182+
183+
/* Skip protocol "lock_nolock" which doesn't require shared storage. */
184+
if (!ls->ls_ops->lm_lock)
185+
return false;
186+
187+
/*
188+
* The gfs2_withdraw_helper replies by writing one of the following
189+
* status codes to "/sys$DEVPATH/lock_module/withdraw":
190+
*
191+
* 0 - The shared block device has been marked inactive. Future write
192+
* operations will fail.
193+
*
194+
* 1 - The shared block device may still be active and carry out
195+
* write operations.
196+
*
197+
* If the "offline" uevent isn't reacted upon in time, the event
198+
* handler is assumed to have failed.
199+
*/
200+
201+
sdp->sd_withdraw_helper_status = -1;
202+
kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
203+
timeout = gfs2_tune_get(sdp, gt_withdraw_helper_timeout) * HZ;
204+
wait_for_completion_timeout(&sdp->sd_withdraw_helper, timeout);
205+
if (sdp->sd_withdraw_helper_status == -1) {
206+
fs_err(sdp, "%s timed out\n", "gfs2_withdraw_helper");
207+
} else {
208+
fs_err(sdp, "%s %s with status %d\n",
209+
"gfs2_withdraw_helper",
210+
sdp->sd_withdraw_helper_status == 0 ?
211+
"succeeded" : "failed",
212+
sdp->sd_withdraw_helper_status);
213+
}
214+
return sdp->sd_withdraw_helper_status == 0;
215+
}
216+
174217
void gfs2_withdraw_func(struct work_struct *work)
175218
{
176219
struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_withdraw_work);
177220
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
178221
const struct lm_lockops *lm = ls->ls_ops;
222+
bool device_inactive;
179223

180224
if (test_bit(SDF_KILL, &sdp->sd_flags))
181225
return;
182226

183227
BUG_ON(sdp->sd_args.ar_debug);
184228

185-
do_withdraw(sdp);
229+
/*
230+
* Try to deactivate the shared block device so that no more I/O will
231+
* go through. If successful, we can immediately trigger remote
232+
* recovery. Otherwise, we must first empty out all our local caches.
233+
*/
186234

187-
kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
235+
device_inactive = gfs2_offline_uevent(sdp);
188236

189-
if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
190-
wait_for_completion(&sdp->sd_wdack);
237+
if (sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE && !device_inactive)
238+
panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
239+
240+
if (lm->lm_unmount) {
241+
if (device_inactive) {
242+
lm->lm_unmount(sdp, false);
243+
do_withdraw(sdp);
244+
} else {
245+
do_withdraw(sdp);
246+
lm->lm_unmount(sdp, false);
247+
}
248+
} else {
249+
do_withdraw(sdp);
250+
}
191251

192-
if (lm->lm_unmount)
193-
lm->lm_unmount(sdp, false);
194252
fs_err(sdp, "file system withdrawn\n");
195253
}
196254

197255
void gfs2_withdraw(struct gfs2_sbd *sdp)
198256
{
199-
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
257+
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW ||
258+
sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE) {
200259
if (test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags))
201260
return;
202261

0 commit comments

Comments
 (0)