Skip to content

Commit be9e782

Browse files
author
Kent Overstreet
committed
bcachefs: Don't downgrade locks on transaction restart
We should only be downgrading locks on success - otherwise, our transaction restarts won't be getting the correct locks and we'll livelock. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
1 parent 2e7acdf commit be9e782

9 files changed

Lines changed: 96 additions & 37 deletions

File tree

fs/bcachefs/btree_iter.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1523,6 +1523,7 @@ static inline struct btree_path *btree_path_alloc(struct btree_trans *trans,
15231523
path->ref = 0;
15241524
path->intent_ref = 0;
15251525
path->nodes_locked = 0;
1526+
path->alloc_seq++;
15261527

15271528
btree_path_list_add(trans, pos, path);
15281529
trans->paths_sorted = false;
@@ -1598,7 +1599,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
15981599

15991600
locks_want = min(locks_want, BTREE_MAX_DEPTH);
16001601
if (locks_want > path->locks_want)
1601-
bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want);
1602+
bch2_btree_path_upgrade_noupgrade_sibs(trans, path, locks_want, NULL);
16021603

16031604
return path;
16041605
}

fs/bcachefs/btree_key_cache.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree
509509
* path->uptodate yet:
510510
*/
511511
if (!path->locks_want &&
512-
!__bch2_btree_path_upgrade(trans, path, 1)) {
512+
!__bch2_btree_path_upgrade(trans, path, 1, NULL)) {
513513
trace_and_count(trans->c, trans_restart_key_cache_upgrade, trans, _THIS_IP_);
514514
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade);
515515
goto err;

fs/bcachefs/btree_locking.c

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,8 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
431431

432432
static inline bool btree_path_get_locks(struct btree_trans *trans,
433433
struct btree_path *path,
434-
bool upgrade)
434+
bool upgrade,
435+
struct get_locks_fail *f)
435436
{
436437
unsigned l = path->level;
437438
int fail_idx = -1;
@@ -442,8 +443,14 @@ static inline bool btree_path_get_locks(struct btree_trans *trans,
442443

443444
if (!(upgrade
444445
? bch2_btree_node_upgrade(trans, path, l)
445-
: bch2_btree_node_relock(trans, path, l)))
446-
fail_idx = l;
446+
: bch2_btree_node_relock(trans, path, l))) {
447+
fail_idx = l;
448+
449+
if (f) {
450+
f->l = l;
451+
f->b = path->l[l].b;
452+
}
453+
}
447454

448455
l++;
449456
} while (l < path->locks_want);
@@ -584,7 +591,9 @@ __flatten
584591
bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
585592
struct btree_path *path, unsigned long trace_ip)
586593
{
587-
return btree_path_get_locks(trans, path, false);
594+
struct get_locks_fail f;
595+
596+
return btree_path_get_locks(trans, path, false, &f);
588597
}
589598

590599
int __bch2_btree_path_relock(struct btree_trans *trans,
@@ -600,22 +609,24 @@ int __bch2_btree_path_relock(struct btree_trans *trans,
600609

601610
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
602611
struct btree_path *path,
603-
unsigned new_locks_want)
612+
unsigned new_locks_want,
613+
struct get_locks_fail *f)
604614
{
605615
EBUG_ON(path->locks_want >= new_locks_want);
606616

607617
path->locks_want = new_locks_want;
608618

609-
return btree_path_get_locks(trans, path, true);
619+
return btree_path_get_locks(trans, path, true, f);
610620
}
611621

612622
bool __bch2_btree_path_upgrade(struct btree_trans *trans,
613623
struct btree_path *path,
614-
unsigned new_locks_want)
624+
unsigned new_locks_want,
625+
struct get_locks_fail *f)
615626
{
616627
struct btree_path *linked;
617628

618-
if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want))
629+
if (bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f))
619630
return true;
620631

621632
/*
@@ -644,7 +655,7 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
644655
linked->btree_id == path->btree_id &&
645656
linked->locks_want < new_locks_want) {
646657
linked->locks_want = new_locks_want;
647-
btree_path_get_locks(trans, linked, true);
658+
btree_path_get_locks(trans, linked, true, NULL);
648659
}
649660

650661
return false;
@@ -656,6 +667,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
656667
{
657668
unsigned l;
658669

670+
if (trans->restarted)
671+
return;
672+
659673
EBUG_ON(path->locks_want < new_locks_want);
660674

661675
path->locks_want = new_locks_want;
@@ -674,6 +688,9 @@ void __bch2_btree_path_downgrade(struct btree_trans *trans,
674688
}
675689

676690
bch2_btree_path_verify_locks(path);
691+
692+
path->downgrade_seq++;
693+
trace_path_downgrade(trans, _RET_IP_, path);
677694
}
678695

679696
/* Btree transaction locking: */
@@ -682,6 +699,9 @@ void bch2_trans_downgrade(struct btree_trans *trans)
682699
{
683700
struct btree_path *path;
684701

702+
if (trans->restarted)
703+
return;
704+
685705
trans_for_each_path(trans, path)
686706
bch2_btree_path_downgrade(trans, path);
687707
}

fs/bcachefs/btree_locking.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -355,26 +355,36 @@ static inline bool bch2_btree_node_relock_notrace(struct btree_trans *trans,
355355

356356
/* upgrade */
357357

358+
359+
struct get_locks_fail {
360+
unsigned l;
361+
struct btree *b;
362+
};
363+
358364
bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *,
359-
struct btree_path *, unsigned);
365+
struct btree_path *, unsigned,
366+
struct get_locks_fail *);
367+
360368
bool __bch2_btree_path_upgrade(struct btree_trans *,
361-
struct btree_path *, unsigned);
369+
struct btree_path *, unsigned,
370+
struct get_locks_fail *);
362371

363372
static inline int bch2_btree_path_upgrade(struct btree_trans *trans,
364373
struct btree_path *path,
365374
unsigned new_locks_want)
366375
{
376+
struct get_locks_fail f;
367377
unsigned old_locks_want = path->locks_want;
368378

369379
new_locks_want = min(new_locks_want, BTREE_MAX_DEPTH);
370380

371381
if (path->locks_want < new_locks_want
372-
? __bch2_btree_path_upgrade(trans, path, new_locks_want)
382+
? __bch2_btree_path_upgrade(trans, path, new_locks_want, &f)
373383
: path->uptodate == BTREE_ITER_UPTODATE)
374384
return 0;
375385

376386
trace_and_count(trans->c, trans_restart_upgrade, trans, _THIS_IP_, path,
377-
old_locks_want, new_locks_want);
387+
old_locks_want, new_locks_want, &f);
378388
return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
379389
}
380390

fs/bcachefs/btree_trans_commit.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -861,12 +861,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
861861
*/
862862
bch2_journal_res_put(&c->journal, &trans->journal_res);
863863

864-
if (unlikely(ret))
865-
return ret;
866-
867-
bch2_trans_downgrade(trans);
868-
869-
return 0;
864+
return ret;
870865
}
871866

872867
static int journal_reclaim_wait_done(struct bch_fs *c)
@@ -1135,6 +1130,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
11351130
if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
11361131
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
11371132
out_reset:
1133+
if (!ret)
1134+
bch2_trans_downgrade(trans);
11381135
bch2_trans_reset_updates(trans);
11391136

11401137
return ret;

fs/bcachefs/btree_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,8 @@ struct btree_path {
228228
u8 sorted_idx;
229229
u8 ref;
230230
u8 intent_ref;
231+
u32 alloc_seq;
232+
u32 downgrade_seq;
231233

232234
/* btree_iter_copy starts here: */
233235
struct bpos pos;

fs/bcachefs/btree_update_interior.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1987,7 +1987,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
19871987
out:
19881988
if (new_path)
19891989
bch2_path_put(trans, new_path, true);
1990-
bch2_btree_path_downgrade(trans, iter->path);
1990+
bch2_trans_downgrade(trans);
19911991
return ret;
19921992
err:
19931993
bch2_btree_node_free_never_used(as, trans, n);

fs/bcachefs/data_update.c

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -162,11 +162,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
162162
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
163163
(ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
164164
!ptr->cached) {
165-
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), ptr);
166-
/*
167-
* See comment below:
168165
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
169-
*/
170166
rewrites_found |= 1U << i;
171167
}
172168
i++;
@@ -212,14 +208,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
212208
if (!p.ptr.cached &&
213209
durability - ptr_durability >= m->op.opts.data_replicas) {
214210
durability -= ptr_durability;
215-
bch2_bkey_drop_ptr_noerror(bkey_i_to_s(insert), &entry->ptr);
216-
/*
217-
* Currently, we're dropping unneeded replicas
218-
* instead of marking them as cached, since
219-
* cached data in stripe buckets prevents them
220-
* from being reused:
211+
221212
bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
222-
*/
223213
goto restart_drop_extra_replicas;
224214
}
225215
}

fs/bcachefs/trace.h

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,20 +1043,28 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_btree_node_split,
10431043
TP_ARGS(trans, caller_ip, path)
10441044
);
10451045

1046+
struct get_locks_fail;
1047+
10461048
TRACE_EVENT(trans_restart_upgrade,
10471049
TP_PROTO(struct btree_trans *trans,
10481050
unsigned long caller_ip,
10491051
struct btree_path *path,
10501052
unsigned old_locks_want,
1051-
unsigned new_locks_want),
1052-
TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want),
1053+
unsigned new_locks_want,
1054+
struct get_locks_fail *f),
1055+
TP_ARGS(trans, caller_ip, path, old_locks_want, new_locks_want, f),
10531056

10541057
TP_STRUCT__entry(
10551058
__array(char, trans_fn, 32 )
10561059
__field(unsigned long, caller_ip )
10571060
__field(u8, btree_id )
10581061
__field(u8, old_locks_want )
10591062
__field(u8, new_locks_want )
1063+
__field(u8, level )
1064+
__field(u32, path_seq )
1065+
__field(u32, node_seq )
1066+
__field(u32, path_alloc_seq )
1067+
__field(u32, downgrade_seq)
10601068
TRACE_BPOS_entries(pos)
10611069
),
10621070

@@ -1066,18 +1074,28 @@ TRACE_EVENT(trans_restart_upgrade,
10661074
__entry->btree_id = path->btree_id;
10671075
__entry->old_locks_want = old_locks_want;
10681076
__entry->new_locks_want = new_locks_want;
1077+
__entry->level = f->l;
1078+
__entry->path_seq = path->l[f->l].lock_seq;
1079+
__entry->node_seq = IS_ERR_OR_NULL(f->b) ? 0 : f->b->c.lock.seq;
1080+
__entry->path_alloc_seq = path->alloc_seq;
1081+
__entry->downgrade_seq = path->downgrade_seq;
10691082
TRACE_BPOS_assign(pos, path->pos)
10701083
),
10711084

1072-
TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u",
1085+
TP_printk("%s %pS btree %s pos %llu:%llu:%u locks_want %u -> %u level %u path seq %u node seq %u alloc_seq %u downgrade_seq %u",
10731086
__entry->trans_fn,
10741087
(void *) __entry->caller_ip,
10751088
bch2_btree_id_str(__entry->btree_id),
10761089
__entry->pos_inode,
10771090
__entry->pos_offset,
10781091
__entry->pos_snapshot,
10791092
__entry->old_locks_want,
1080-
__entry->new_locks_want)
1093+
__entry->new_locks_want,
1094+
__entry->level,
1095+
__entry->path_seq,
1096+
__entry->node_seq,
1097+
__entry->path_alloc_seq,
1098+
__entry->downgrade_seq)
10811099
);
10821100

10831101
DEFINE_EVENT(transaction_restart_iter, trans_restart_relock,
@@ -1238,6 +1256,27 @@ TRACE_EVENT(trans_restart_key_cache_key_realloced,
12381256
__entry->new_u64s)
12391257
);
12401258

1259+
TRACE_EVENT(path_downgrade,
1260+
TP_PROTO(struct btree_trans *trans,
1261+
unsigned long caller_ip,
1262+
struct btree_path *path),
1263+
TP_ARGS(trans, caller_ip, path),
1264+
1265+
TP_STRUCT__entry(
1266+
__array(char, trans_fn, 32 )
1267+
__field(unsigned long, caller_ip )
1268+
),
1269+
1270+
TP_fast_assign(
1271+
strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
1272+
__entry->caller_ip = caller_ip;
1273+
),
1274+
1275+
TP_printk("%s %pS",
1276+
__entry->trans_fn,
1277+
(void *) __entry->caller_ip)
1278+
);
1279+
12411280
DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
12421281
TP_PROTO(struct btree_trans *trans,
12431282
unsigned long caller_ip),

0 commit comments

Comments
 (0)