Skip to content

Commit 7d9f846

Browse files
author
Kent Overstreet
committed
bcachefs: Data update path won't accidentaly grow replicas
Previously, there was a bug where if an extent had greater durability than required (because we needed to move a durability=1 pointer and ended up putting it on a durability 2 device), we would submit a write for replicas=2 - the durability of the pointer being rewritten - instead of the number of replicas required to bring it back up to the data_replicas option. This, plus the allocation path sometimes allocating on a greater durability device than requested, meant that extents could continue having more and more replicas added as they were being rewritten. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
1 parent 0af8a06 commit 7d9f846

5 files changed

Lines changed: 96 additions & 67 deletions

File tree

fs/bcachefs/data_update.c

Lines changed: 82 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ void bch2_data_update_exit(struct data_update *update)
356356
bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
357357
}
358358

359-
void bch2_update_unwritten_extent(struct btree_trans *trans,
359+
static void bch2_update_unwritten_extent(struct btree_trans *trans,
360360
struct data_update *update)
361361
{
362362
struct bch_fs *c = update->op.c;
@@ -436,7 +436,51 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
436436
}
437437
}
438438

439+
int bch2_extent_drop_ptrs(struct btree_trans *trans,
440+
struct btree_iter *iter,
441+
struct bkey_s_c k,
442+
struct data_update_opts data_opts)
443+
{
444+
struct bch_fs *c = trans->c;
445+
struct bkey_i *n;
446+
int ret;
447+
448+
n = bch2_bkey_make_mut_noupdate(trans, k);
449+
ret = PTR_ERR_OR_ZERO(n);
450+
if (ret)
451+
return ret;
452+
453+
while (data_opts.kill_ptrs) {
454+
unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
455+
struct bch_extent_ptr *ptr;
456+
457+
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
458+
data_opts.kill_ptrs ^= 1U << drop;
459+
}
460+
461+
/*
462+
* If the new extent no longer has any pointers, bch2_extent_normalize()
463+
* will do the appropriate thing with it (turning it into a
464+
* KEY_TYPE_error key, or just a discard if it was a cached extent)
465+
*/
466+
bch2_extent_normalize(c, bkey_i_to_s(n));
467+
468+
/*
469+
* Since we're not inserting through an extent iterator
470+
* (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
471+
* we aren't using the extent overwrite path to delete, we're
472+
* just using the normal key deletion path:
473+
*/
474+
if (bkey_deleted(&n->k))
475+
n->k.size = 0;
476+
477+
return bch2_trans_relock(trans) ?:
478+
bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
479+
bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
480+
}
481+
439482
int bch2_data_update_init(struct btree_trans *trans,
483+
struct btree_iter *iter,
440484
struct moving_context *ctxt,
441485
struct data_update *m,
442486
struct write_point_specifier wp,
@@ -452,7 +496,7 @@ int bch2_data_update_init(struct btree_trans *trans,
452496
const struct bch_extent_ptr *ptr;
453497
unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
454498
unsigned ptrs_locked = 0;
455-
int ret;
499+
int ret = 0;
456500

457501
bch2_bkey_buf_init(&m->k);
458502
bch2_bkey_buf_reassemble(&m->k, c, k);
@@ -478,6 +522,8 @@ int bch2_data_update_init(struct btree_trans *trans,
478522
bkey_for_each_ptr(ptrs, ptr)
479523
percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref);
480524

525+
unsigned durability_have = 0, durability_removing = 0;
526+
481527
i = 0;
482528
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
483529
bool locked;
@@ -489,8 +535,11 @@ int bch2_data_update_init(struct btree_trans *trans,
489535
reserve_sectors += k.k->size;
490536

491537
m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
492-
} else if (!p.ptr.cached) {
538+
durability_removing += bch2_extent_ptr_desired_durability(c, &p);
539+
} else if (!p.ptr.cached &&
540+
!((1U << i) & m->data_opts.kill_ptrs)) {
493541
bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
542+
durability_have += bch2_extent_ptr_durability(c, &p);
494543
}
495544

496545
/*
@@ -529,6 +578,29 @@ int bch2_data_update_init(struct btree_trans *trans,
529578
i++;
530579
}
531580

581+
/*
582+
* If current extent durability is less than io_opts.data_replicas,
583+
* we're not trying to rereplicate the extent up to data_replicas here -
584+
* unless extra_replicas was specified
585+
*
586+
* Increasing replication is an explicit operation triggered by
587+
* rereplicate, currently, so that users don't get an unexpected -ENOSPC
588+
*/
589+
if (durability_have >= io_opts.data_replicas) {
590+
m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
591+
m->data_opts.rewrite_ptrs = 0;
592+
/* if iter == NULL, it's just a promote */
593+
if (iter)
594+
ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts);
595+
goto done;
596+
}
597+
598+
m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
599+
m->data_opts.extra_replicas;
600+
m->op.nr_replicas_required = m->op.nr_replicas;
601+
602+
BUG_ON(!m->op.nr_replicas);
603+
532604
if (reserve_sectors) {
533605
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
534606
m->data_opts.extra_replicas
@@ -538,14 +610,11 @@ int bch2_data_update_init(struct btree_trans *trans,
538610
goto err;
539611
}
540612

541-
m->op.nr_replicas += m->data_opts.extra_replicas;
542-
m->op.nr_replicas_required = m->op.nr_replicas;
543-
544-
BUG_ON(!m->op.nr_replicas);
613+
if (bkey_extent_is_unwritten(k)) {
614+
bch2_update_unwritten_extent(trans, m);
615+
goto done;
616+
}
545617

546-
/* Special handling required: */
547-
if (bkey_extent_is_unwritten(k))
548-
return -BCH_ERR_unwritten_extent_update;
549618
return 0;
550619
err:
551620
i = 0;
@@ -560,6 +629,9 @@ int bch2_data_update_init(struct btree_trans *trans,
560629
bch2_bkey_buf_exit(&m->k, c);
561630
bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
562631
return ret;
632+
done:
633+
bch2_data_update_exit(m);
634+
return ret ?: -BCH_ERR_data_update_done;
563635
}
564636

565637
void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)

fs/bcachefs/data_update.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,14 @@ int bch2_data_update_index_update(struct bch_write_op *);
3232
void bch2_data_update_read_done(struct data_update *,
3333
struct bch_extent_crc_unpacked);
3434

35+
int bch2_extent_drop_ptrs(struct btree_trans *,
36+
struct btree_iter *,
37+
struct bkey_s_c,
38+
struct data_update_opts);
39+
3540
void bch2_data_update_exit(struct data_update *);
36-
void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *);
37-
int bch2_data_update_init(struct btree_trans *, struct moving_context *,
41+
int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
42+
struct moving_context *,
3843
struct data_update *,
3944
struct write_point_specifier,
4045
struct bch_io_opts, struct data_update_opts,

fs/bcachefs/errcode.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@
162162
x(BCH_ERR_fsck, fsck_repair_unimplemented) \
163163
x(BCH_ERR_fsck, fsck_repair_impossible) \
164164
x(0, restart_recovery) \
165-
x(0, unwritten_extent_update) \
165+
x(0, data_update_done) \
166166
x(EINVAL, device_state_not_allowed) \
167167
x(EINVAL, member_info_missing) \
168168
x(EINVAL, mismatched_block_size) \

fs/bcachefs/io_read.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
209209
bio = &op->write.op.wbio.bio;
210210
bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0);
211211

212-
ret = bch2_data_update_init(trans, NULL, &op->write,
212+
ret = bch2_data_update_init(trans, NULL, NULL, &op->write,
213213
writepoint_hashed((unsigned long) current),
214214
opts,
215215
(struct data_update_opts) {

fs/bcachefs/move.c

Lines changed: 5 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -229,49 +229,6 @@ void bch2_move_stats_init(struct bch_move_stats *stats, char *name)
229229
scnprintf(stats->name, sizeof(stats->name), "%s", name);
230230
}
231231

232-
static int bch2_extent_drop_ptrs(struct btree_trans *trans,
233-
struct btree_iter *iter,
234-
struct bkey_s_c k,
235-
struct data_update_opts data_opts)
236-
{
237-
struct bch_fs *c = trans->c;
238-
struct bkey_i *n;
239-
int ret;
240-
241-
n = bch2_bkey_make_mut_noupdate(trans, k);
242-
ret = PTR_ERR_OR_ZERO(n);
243-
if (ret)
244-
return ret;
245-
246-
while (data_opts.kill_ptrs) {
247-
unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
248-
struct bch_extent_ptr *ptr;
249-
250-
bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
251-
data_opts.kill_ptrs ^= 1U << drop;
252-
}
253-
254-
/*
255-
* If the new extent no longer has any pointers, bch2_extent_normalize()
256-
* will do the appropriate thing with it (turning it into a
257-
* KEY_TYPE_error key, or just a discard if it was a cached extent)
258-
*/
259-
bch2_extent_normalize(c, bkey_i_to_s(n));
260-
261-
/*
262-
* Since we're not inserting through an extent iterator
263-
* (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
264-
* we aren't using the extent overwrite path to delete, we're
265-
* just using the normal key deletion path:
266-
*/
267-
if (bkey_deleted(&n->k))
268-
n->k.size = 0;
269-
270-
return bch2_trans_relock(trans) ?:
271-
bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
272-
bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
273-
}
274-
275232
int bch2_move_extent(struct moving_context *ctxt,
276233
struct move_bucket_in_flight *bucket_in_flight,
277234
struct btree_iter *iter,
@@ -341,19 +298,11 @@ int bch2_move_extent(struct moving_context *ctxt,
341298
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k);
342299
io->rbio.bio.bi_end_io = move_read_endio;
343300

344-
ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp,
301+
ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp,
345302
io_opts, data_opts, iter->btree_id, k);
346-
if (ret && ret != -BCH_ERR_unwritten_extent_update)
303+
if (ret)
347304
goto err_free_pages;
348305

349-
if (ret == -BCH_ERR_unwritten_extent_update) {
350-
bch2_update_unwritten_extent(trans, &io->write);
351-
move_free(io);
352-
return 0;
353-
}
354-
355-
BUG_ON(ret);
356-
357306
io->write.op.end_io = move_write_done;
358307

359308
if (ctxt->rate)
@@ -397,6 +346,9 @@ int bch2_move_extent(struct moving_context *ctxt,
397346
err_free:
398347
kfree(io);
399348
err:
349+
if (ret == -BCH_ERR_data_update_done)
350+
return 0;
351+
400352
this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]);
401353
trace_move_extent_alloc_mem_fail2(c, k);
402354
return ret;

0 commit comments

Comments
 (0)