Skip to content

Commit aff46e0

Browse files
Alexander Aringteigland
authored andcommitted
dlm: use a new list for recovery of master rsb names
Add a new "masters_list" for master rsb structs, with a new rwlock. The new list is created and used during the recovery process to send the master rsb names to new nodes. With this change, the current "root_list" can be used without locking. Signed-off-by: Alexander Aring <aahringo@redhat.com> Signed-off-by: David Teigland <teigland@redhat.com>
1 parent 29e345f commit aff46e0

5 files changed

Lines changed: 79 additions & 14 deletions

File tree

fs/dlm/dir.c

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -216,16 +216,13 @@ static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, const char *name,
216216
if (!rv)
217217
return r;
218218

219-
down_read(&ls->ls_root_sem);
220-
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
219+
list_for_each_entry(r, &ls->ls_masters_list, res_masters_list) {
221220
if (len == r->res_length && !memcmp(name, r->res_name, len)) {
222-
up_read(&ls->ls_root_sem);
223221
log_debug(ls, "find_rsb_root revert to root_list %s",
224222
r->res_name);
225223
return r;
226224
}
227225
}
228-
up_read(&ls->ls_root_sem);
229226
return NULL;
230227
}
231228

@@ -241,7 +238,7 @@ void dlm_copy_master_names(struct dlm_ls *ls, const char *inbuf, int inlen,
241238
int offset = 0, dir_nodeid;
242239
__be16 be_namelen;
243240

244-
down_read(&ls->ls_root_sem);
241+
read_lock(&ls->ls_masters_lock);
245242

246243
if (inlen > 1) {
247244
r = find_rsb_root(ls, inbuf, inlen);
@@ -250,16 +247,13 @@ void dlm_copy_master_names(struct dlm_ls *ls, const char *inbuf, int inlen,
250247
nodeid, inlen, inlen, inbuf);
251248
goto out;
252249
}
253-
list = r->res_root_list.next;
250+
list = r->res_masters_list.next;
254251
} else {
255-
list = ls->ls_root_list.next;
252+
list = ls->ls_masters_list.next;
256253
}
257254

258-
for (offset = 0; list != &ls->ls_root_list; list = list->next) {
259-
r = list_entry(list, struct dlm_rsb, res_root_list);
260-
if (r->res_nodeid)
261-
continue;
262-
255+
for (offset = 0; list != &ls->ls_masters_list; list = list->next) {
256+
r = list_entry(list, struct dlm_rsb, res_masters_list);
263257
dir_nodeid = dlm_dir_nodeid(r);
264258
if (dir_nodeid != nodeid)
265259
continue;
@@ -294,14 +288,14 @@ void dlm_copy_master_names(struct dlm_ls *ls, const char *inbuf, int inlen,
294288
* terminating record.
295289
*/
296290

297-
if ((list == &ls->ls_root_list) &&
291+
if ((list == &ls->ls_masters_list) &&
298292
(offset + sizeof(uint16_t) <= outlen)) {
299293
be_namelen = cpu_to_be16(0xFFFF);
300294
memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
301295
offset += sizeof(__be16);
302296
ls->ls_recover_dir_sent_msg++;
303297
}
304298
out:
305-
up_read(&ls->ls_root_sem);
299+
read_unlock(&ls->ls_masters_lock);
306300
}
307301

fs/dlm/dlm_internal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ struct dlm_rsb {
342342
struct list_head res_waitqueue;
343343

344344
struct list_head res_root_list; /* used for recovery */
345+
struct list_head res_masters_list; /* used for recovery */
345346
struct list_head res_recover_list; /* used for recovery */
346347
int res_recover_locks_count;
347348

@@ -675,6 +676,8 @@ struct dlm_ls {
675676

676677
struct list_head ls_root_list; /* root resources */
677678
struct rw_semaphore ls_root_sem; /* protect root_list */
679+
struct list_head ls_masters_list; /* root resources */
680+
rwlock_t ls_masters_lock; /* protect root_list */
678681

679682
const struct dlm_lockspace_ops *ls_ops;
680683
void *ls_ops_arg;

fs/dlm/lock.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,7 @@ static int get_rsb_struct(struct dlm_ls *ls, const void *name, int len,
423423
INIT_LIST_HEAD(&r->res_waitqueue);
424424
INIT_LIST_HEAD(&r->res_root_list);
425425
INIT_LIST_HEAD(&r->res_recover_list);
426+
INIT_LIST_HEAD(&r->res_masters_list);
426427

427428
*r_ret = r;
428429
return 0;
@@ -1168,6 +1169,7 @@ static void kill_rsb(struct kref *kref)
11681169
DLM_ASSERT(list_empty(&r->res_waitqueue), dlm_dump_rsb(r););
11691170
DLM_ASSERT(list_empty(&r->res_root_list), dlm_dump_rsb(r););
11701171
DLM_ASSERT(list_empty(&r->res_recover_list), dlm_dump_rsb(r););
1172+
DLM_ASSERT(list_empty(&r->res_masters_list), dlm_dump_rsb(r););
11711173
}
11721174

11731175
/* Attaching/detaching lkb's from rsb's is for rsb reference counting.

fs/dlm/lockspace.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,8 @@ static int new_lockspace(const char *name, const char *cluster,
582582
init_waitqueue_head(&ls->ls_wait_general);
583583
INIT_LIST_HEAD(&ls->ls_root_list);
584584
init_rwsem(&ls->ls_root_sem);
585+
INIT_LIST_HEAD(&ls->ls_masters_list);
586+
rwlock_init(&ls->ls_masters_lock);
585587

586588
spin_lock(&lslist_lock);
587589
ls->ls_create_count = 1;

fs/dlm/recoverd.c

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,48 @@
2020
#include "requestqueue.h"
2121
#include "recoverd.h"
2222

23+
static int dlm_create_masters_list(struct dlm_ls *ls)
24+
{
25+
struct rb_node *n;
26+
struct dlm_rsb *r;
27+
int i, error = 0;
28+
29+
write_lock(&ls->ls_masters_lock);
30+
if (!list_empty(&ls->ls_masters_list)) {
31+
log_error(ls, "root list not empty");
32+
error = -EINVAL;
33+
goto out;
34+
}
35+
36+
for (i = 0; i < ls->ls_rsbtbl_size; i++) {
37+
spin_lock_bh(&ls->ls_rsbtbl[i].lock);
38+
for (n = rb_first(&ls->ls_rsbtbl[i].keep); n; n = rb_next(n)) {
39+
r = rb_entry(n, struct dlm_rsb, res_hashnode);
40+
if (r->res_nodeid)
41+
continue;
42+
43+
list_add(&r->res_masters_list, &ls->ls_masters_list);
44+
dlm_hold_rsb(r);
45+
}
46+
spin_unlock_bh(&ls->ls_rsbtbl[i].lock);
47+
}
48+
out:
49+
write_unlock(&ls->ls_masters_lock);
50+
return error;
51+
}
52+
53+
static void dlm_release_masters_list(struct dlm_ls *ls)
54+
{
55+
struct dlm_rsb *r, *safe;
56+
57+
write_lock(&ls->ls_masters_lock);
58+
list_for_each_entry_safe(r, safe, &ls->ls_masters_list, res_masters_list) {
59+
list_del_init(&r->res_masters_list);
60+
dlm_put_rsb(r);
61+
}
62+
write_unlock(&ls->ls_masters_lock);
63+
}
64+
2365
static void dlm_create_root_list(struct dlm_ls *ls)
2466
{
2567
struct rb_node *n;
@@ -123,6 +165,23 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
123165

124166
dlm_recover_dir_nodeid(ls);
125167

168+
/* Create a snapshot of all active rsbs were we are the master of.
169+
* During the barrier between dlm_recover_members_wait() and
170+
* dlm_recover_directory() other nodes can dump their necessary
171+
* directory dlm_rsb (r->res_dir_nodeid == nodeid) in rcom
172+
* communication dlm_copy_master_names() handling.
173+
*
174+
* TODO We should create a per lockspace list that contains rsbs
175+
* that we are the master of. Instead of creating this list while
176+
* recovery we keep track of those rsbs while locking handling and
177+
* recovery can use it when necessary.
178+
*/
179+
error = dlm_create_masters_list(ls);
180+
if (error) {
181+
log_rinfo(ls, "dlm_create_masters_list error %d", error);
182+
goto fail;
183+
}
184+
126185
ls->ls_recover_dir_sent_res = 0;
127186
ls->ls_recover_dir_sent_msg = 0;
128187
ls->ls_recover_locks_in = 0;
@@ -132,6 +191,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
132191
error = dlm_recover_members_wait(ls, rv->seq);
133192
if (error) {
134193
log_rinfo(ls, "dlm_recover_members_wait error %d", error);
194+
dlm_release_masters_list(ls);
135195
goto fail;
136196
}
137197

@@ -145,6 +205,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
145205
error = dlm_recover_directory(ls, rv->seq);
146206
if (error) {
147207
log_rinfo(ls, "dlm_recover_directory error %d", error);
208+
dlm_release_masters_list(ls);
148209
goto fail;
149210
}
150211

@@ -153,9 +214,12 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
153214
error = dlm_recover_directory_wait(ls, rv->seq);
154215
if (error) {
155216
log_rinfo(ls, "dlm_recover_directory_wait error %d", error);
217+
dlm_release_masters_list(ls);
156218
goto fail;
157219
}
158220

221+
dlm_release_masters_list(ls);
222+
159223
log_rinfo(ls, "dlm_recover_directory %u out %u messages",
160224
ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg);
161225

0 commit comments

Comments
 (0)