Skip to content

Commit 1009254

Browse files
adam900710kdave
authored andcommitted
btrfs: scrub: use scrub_stripe to implement RAID56 P/Q scrub
Implement the only missing part for scrub: RAID56 P/Q stripe scrub. The workflow is pretty straightforward for the new function, scrub_raid56_parity_stripe(): - Go through the regular scrub path for each data stripe - Wait for the verification and repair to finish - Writeback the repaired sectors to data stripes - Make sure all stripes are properly repaired If we have sectors unrepaired, we cannot continue, or we could further corrupt the P/Q stripe. - Submit the rbio for P/Q stripe The dev-replace would be handled inside raid56_parity_submit_scrub_rbio() path. - Wait for the above bio to finish Although the old code is no longer used, we still keep the declaration, as the cleanup can be several times larger than this patch itself. Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent e02ee89 commit 1009254

2 files changed

Lines changed: 210 additions & 10 deletions

File tree

fs/btrfs/scrub.c

Lines changed: 205 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,13 @@ enum scrub_stripe_flags {
9898

9999
/* Set when the read-repair is finished. */
100100
SCRUB_STRIPE_FLAG_REPAIR_DONE,
101+
102+
/*
103+
* Set for data stripes if it's triggered from P/Q stripe.
104+
* During such scrub, we should not report errors in data stripes, nor
105+
* update the accounting.
106+
*/
107+
SCRUB_STRIPE_FLAG_NO_REPORT,
101108
};
102109

103110
#define SCRUB_STRIPE_PAGES (BTRFS_STRIPE_LEN / PAGE_SIZE)
@@ -279,6 +286,7 @@ struct scrub_parity {
279286
struct scrub_ctx {
280287
struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
281288
struct scrub_stripe stripes[SCRUB_STRIPES_PER_SCTX];
289+
struct scrub_stripe *raid56_data_stripes;
282290
struct btrfs_fs_info *fs_info;
283291
int first_free;
284292
int curr;
@@ -2490,6 +2498,9 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
24902498
int nr_repaired_sectors = 0;
24912499
int sector_nr;
24922500

2501+
if (test_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state))
2502+
return;
2503+
24932504
/*
24942505
* Init needed infos for error reporting.
24952506
*
@@ -3799,11 +3810,8 @@ static int scrub_raid56_data_stripe_for_parity(struct scrub_ctx *sctx,
37993810
return ret;
38003811
}
38013812

3802-
static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
3803-
struct map_lookup *map,
3804-
struct btrfs_device *sdev,
3805-
u64 logic_start,
3806-
u64 logic_end)
3813+
int scrub_raid56_parity(struct scrub_ctx *sctx, struct map_lookup *map,
3814+
struct btrfs_device *sdev, u64 logic_start, u64 logic_end)
38073815
{
38083816
struct btrfs_fs_info *fs_info = sctx->fs_info;
38093817
struct btrfs_path *path;
@@ -4171,6 +4179,11 @@ static void flush_scrub_stripes(struct scrub_ctx *sctx)
41714179
sctx->cur_stripe = 0;
41724180
}
41734181

4182+
static void raid56_scrub_wait_endio(struct bio *bio)
4183+
{
4184+
complete(bio->bi_private);
4185+
}
4186+
41744187
static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg,
41754188
struct btrfs_device *dev, int mirror_num,
41764189
u64 logical, u32 length, u64 physical)
@@ -4195,6 +4208,165 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
41954208
return 0;
41964209
}
41974210

4211+
static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
4212+
struct btrfs_device *scrub_dev,
4213+
struct btrfs_block_group *bg,
4214+
struct map_lookup *map,
4215+
u64 full_stripe_start)
4216+
{
4217+
DECLARE_COMPLETION_ONSTACK(io_done);
4218+
struct btrfs_fs_info *fs_info = sctx->fs_info;
4219+
struct btrfs_raid_bio *rbio;
4220+
struct btrfs_io_context *bioc = NULL;
4221+
struct bio *bio;
4222+
struct scrub_stripe *stripe;
4223+
bool all_empty = true;
4224+
const int data_stripes = nr_data_stripes(map);
4225+
unsigned long extent_bitmap = 0;
4226+
u64 length = data_stripes << BTRFS_STRIPE_LEN_SHIFT;
4227+
int ret;
4228+
4229+
ASSERT(sctx->raid56_data_stripes);
4230+
4231+
for (int i = 0; i < data_stripes; i++) {
4232+
int stripe_index;
4233+
int rot;
4234+
u64 physical;
4235+
4236+
stripe = &sctx->raid56_data_stripes[i];
4237+
rot = div_u64(full_stripe_start - bg->start,
4238+
data_stripes) >> BTRFS_STRIPE_LEN_SHIFT;
4239+
stripe_index = (i + rot) % map->num_stripes;
4240+
physical = map->stripes[stripe_index].physical +
4241+
(rot << BTRFS_STRIPE_LEN_SHIFT);
4242+
4243+
scrub_reset_stripe(stripe);
4244+
set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state);
4245+
ret = scrub_find_fill_first_stripe(bg,
4246+
map->stripes[stripe_index].dev, physical, 1,
4247+
full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT),
4248+
BTRFS_STRIPE_LEN, stripe);
4249+
if (ret < 0)
4250+
goto out;
4251+
/*
4252+
* No extent in this data stripe, need to manually mark them
4253+
* initialized to make later read submission happy.
4254+
*/
4255+
if (ret > 0) {
4256+
stripe->logical = full_stripe_start +
4257+
(i << BTRFS_STRIPE_LEN_SHIFT);
4258+
stripe->dev = map->stripes[stripe_index].dev;
4259+
stripe->mirror_num = 1;
4260+
set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state);
4261+
}
4262+
}
4263+
4264+
/* Check if all data stripes are empty. */
4265+
for (int i = 0; i < data_stripes; i++) {
4266+
stripe = &sctx->raid56_data_stripes[i];
4267+
if (!bitmap_empty(&stripe->extent_sector_bitmap, stripe->nr_sectors)) {
4268+
all_empty = false;
4269+
break;
4270+
}
4271+
}
4272+
if (all_empty) {
4273+
ret = 0;
4274+
goto out;
4275+
}
4276+
4277+
for (int i = 0; i < data_stripes; i++) {
4278+
stripe = &sctx->raid56_data_stripes[i];
4279+
scrub_submit_initial_read(sctx, stripe);
4280+
}
4281+
for (int i = 0; i < data_stripes; i++) {
4282+
stripe = &sctx->raid56_data_stripes[i];
4283+
4284+
wait_event(stripe->repair_wait,
4285+
test_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state));
4286+
}
4287+
/* For now, no zoned support for RAID56. */
4288+
ASSERT(!btrfs_is_zoned(sctx->fs_info));
4289+
4290+
/* Writeback for the repaired sectors. */
4291+
for (int i = 0; i < data_stripes; i++) {
4292+
unsigned long repaired;
4293+
4294+
stripe = &sctx->raid56_data_stripes[i];
4295+
4296+
bitmap_andnot(&repaired, &stripe->init_error_bitmap,
4297+
&stripe->error_bitmap, stripe->nr_sectors);
4298+
scrub_write_sectors(sctx, stripe, repaired, false);
4299+
}
4300+
4301+
/* Wait for the above writebacks to finish. */
4302+
for (int i = 0; i < data_stripes; i++) {
4303+
stripe = &sctx->raid56_data_stripes[i];
4304+
4305+
wait_scrub_stripe_io(stripe);
4306+
}
4307+
4308+
/*
4309+
* Now all data stripes are properly verified. Check if we have any
4310+
* unrepaired, if so abort immediately or we could further corrupt the
4311+
* P/Q stripes.
4312+
*
4313+
* During the loop, also populate extent_bitmap.
4314+
*/
4315+
for (int i = 0; i < data_stripes; i++) {
4316+
unsigned long error;
4317+
4318+
stripe = &sctx->raid56_data_stripes[i];
4319+
4320+
/*
4321+
* We should only check the errors where there is an extent.
4322+
* As we may hit an empty data stripe while it's missing.
4323+
*/
4324+
bitmap_and(&error, &stripe->error_bitmap,
4325+
&stripe->extent_sector_bitmap, stripe->nr_sectors);
4326+
if (!bitmap_empty(&error, stripe->nr_sectors)) {
4327+
btrfs_err(fs_info,
4328+
"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
4329+
full_stripe_start, i, stripe->nr_sectors,
4330+
&error);
4331+
ret = -EIO;
4332+
goto out;
4333+
}
4334+
bitmap_or(&extent_bitmap, &extent_bitmap,
4335+
&stripe->extent_sector_bitmap, stripe->nr_sectors);
4336+
}
4337+
4338+
/* Now we can check and regenerate the P/Q stripe. */
4339+
bio = bio_alloc(NULL, 1, REQ_OP_READ, GFP_NOFS);
4340+
bio->bi_iter.bi_sector = full_stripe_start >> SECTOR_SHIFT;
4341+
bio->bi_private = &io_done;
4342+
bio->bi_end_io = raid56_scrub_wait_endio;
4343+
4344+
btrfs_bio_counter_inc_blocked(fs_info);
4345+
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
4346+
&length, &bioc);
4347+
if (ret < 0) {
4348+
btrfs_put_bioc(bioc);
4349+
btrfs_bio_counter_dec(fs_info);
4350+
goto out;
4351+
}
4352+
rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, scrub_dev, &extent_bitmap,
4353+
BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
4354+
btrfs_put_bioc(bioc);
4355+
if (!rbio) {
4356+
ret = -ENOMEM;
4357+
btrfs_bio_counter_dec(fs_info);
4358+
goto out;
4359+
}
4360+
raid56_parity_submit_scrub_rbio(rbio);
4361+
wait_for_completion_io(&io_done);
4362+
ret = blk_status_to_errno(bio->bi_status);
4363+
bio_put(bio);
4364+
btrfs_bio_counter_dec(fs_info);
4365+
4366+
out:
4367+
return ret;
4368+
}
4369+
41984370
/*
41994371
* Scrub one range which can only has simple mirror based profile.
42004372
* (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in
@@ -4368,7 +4540,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
43684540
/* Offset inside the chunk */
43694541
u64 offset;
43704542
u64 stripe_logical;
4371-
u64 stripe_end;
43724543
int stop_loop = 0;
43734544

43744545
wait_event(sctx->list_wait,
@@ -4383,6 +4554,26 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
43834554
sctx->flush_all_writes = true;
43844555
}
43854556

4557+
/* Prepare the extra data stripes used by RAID56. */
4558+
if (profile & BTRFS_BLOCK_GROUP_RAID56_MASK) {
4559+
ASSERT(sctx->raid56_data_stripes == NULL);
4560+
4561+
sctx->raid56_data_stripes = kcalloc(nr_data_stripes(map),
4562+
sizeof(struct scrub_stripe),
4563+
GFP_KERNEL);
4564+
if (!sctx->raid56_data_stripes) {
4565+
ret = -ENOMEM;
4566+
goto out;
4567+
}
4568+
for (int i = 0; i < nr_data_stripes(map); i++) {
4569+
ret = init_scrub_stripe(fs_info,
4570+
&sctx->raid56_data_stripes[i]);
4571+
if (ret < 0)
4572+
goto out;
4573+
sctx->raid56_data_stripes[i].bg = bg;
4574+
sctx->raid56_data_stripes[i].sctx = sctx;
4575+
}
4576+
}
43864577
/*
43874578
* There used to be a big double loop to handle all profiles using the
43884579
* same routine, which grows larger and more gross over time.
@@ -4436,10 +4627,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
44364627
if (ret) {
44374628
/* it is parity strip */
44384629
stripe_logical += chunk_logical;
4439-
stripe_end = stripe_logical + increment;
4440-
ret = scrub_raid56_parity(sctx, map, scrub_dev,
4441-
stripe_logical,
4442-
stripe_end);
4630+
ret = scrub_raid56_parity_stripe(sctx, scrub_dev, bg,
4631+
map, stripe_logical);
44434632
if (ret)
44444633
goto out;
44454634
goto next;
@@ -4477,6 +4666,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
44774666
scrub_wr_submit(sctx);
44784667
mutex_unlock(&sctx->wr_lock);
44794668
flush_scrub_stripes(sctx);
4669+
if (sctx->raid56_data_stripes) {
4670+
for (int i = 0; i < nr_data_stripes(map); i++)
4671+
release_scrub_stripe(&sctx->raid56_data_stripes[i]);
4672+
kfree(sctx->raid56_data_stripes);
4673+
sctx->raid56_data_stripes = NULL;
4674+
}
44804675

44814676
if (sctx->is_dev_replace && ret >= 0) {
44824677
int ret2;

fs/btrfs/scrub.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,9 @@ int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
1313
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
1414
struct btrfs_scrub_progress *progress);
1515

16+
/* Temporary declaration, would be deleted later. */
17+
int scrub_raid56_parity(struct scrub_ctx *sctx, struct map_lookup *map,
18+
struct btrfs_device *sdev, u64 logic_start,
19+
u64 logic_end);
20+
1621
#endif

0 commit comments

Comments
 (0)