@@ -98,6 +98,13 @@ enum scrub_stripe_flags {
9898
9999 /* Set when the read-repair is finished. */
100100 SCRUB_STRIPE_FLAG_REPAIR_DONE ,
101+
102+ /*
103+ * Set for data stripes if it's triggered from P/Q stripe.
104+ * During such scrub, we should not report errors in data stripes, nor
105+ * update the accounting.
106+ */
107+ SCRUB_STRIPE_FLAG_NO_REPORT ,
101108};
102109
103110#define SCRUB_STRIPE_PAGES (BTRFS_STRIPE_LEN / PAGE_SIZE)
@@ -279,6 +286,7 @@ struct scrub_parity {
279286struct scrub_ctx {
280287 struct scrub_bio * bios [SCRUB_BIOS_PER_SCTX ];
281288 struct scrub_stripe stripes [SCRUB_STRIPES_PER_SCTX ];
289+ struct scrub_stripe * raid56_data_stripes ;
282290 struct btrfs_fs_info * fs_info ;
283291 int first_free ;
284292 int curr ;
@@ -2490,6 +2498,9 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
24902498 int nr_repaired_sectors = 0 ;
24912499 int sector_nr ;
24922500
2501+ if (test_bit (SCRUB_STRIPE_FLAG_NO_REPORT , & stripe -> state ))
2502+ return ;
2503+
24932504 /*
24942505 * Init needed infos for error reporting.
24952506 *
@@ -3799,11 +3810,8 @@ static int scrub_raid56_data_stripe_for_parity(struct scrub_ctx *sctx,
37993810 return ret ;
38003811}
38013812
3802- static noinline_for_stack int scrub_raid56_parity (struct scrub_ctx * sctx ,
3803- struct map_lookup * map ,
3804- struct btrfs_device * sdev ,
3805- u64 logic_start ,
3806- u64 logic_end )
3813+ int scrub_raid56_parity (struct scrub_ctx * sctx , struct map_lookup * map ,
3814+ struct btrfs_device * sdev , u64 logic_start , u64 logic_end )
38073815{
38083816 struct btrfs_fs_info * fs_info = sctx -> fs_info ;
38093817 struct btrfs_path * path ;
@@ -4171,6 +4179,11 @@ static void flush_scrub_stripes(struct scrub_ctx *sctx)
41714179 sctx -> cur_stripe = 0 ;
41724180}
41734181
4182+ static void raid56_scrub_wait_endio (struct bio * bio )
4183+ {
4184+ complete (bio -> bi_private );
4185+ }
4186+
41744187static int queue_scrub_stripe (struct scrub_ctx * sctx , struct btrfs_block_group * bg ,
41754188 struct btrfs_device * dev , int mirror_num ,
41764189 u64 logical , u32 length , u64 physical )
@@ -4195,6 +4208,165 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *
41954208 return 0 ;
41964209}
41974210
4211+ static int scrub_raid56_parity_stripe (struct scrub_ctx * sctx ,
4212+ struct btrfs_device * scrub_dev ,
4213+ struct btrfs_block_group * bg ,
4214+ struct map_lookup * map ,
4215+ u64 full_stripe_start )
4216+ {
4217+ DECLARE_COMPLETION_ONSTACK (io_done );
4218+ struct btrfs_fs_info * fs_info = sctx -> fs_info ;
4219+ struct btrfs_raid_bio * rbio ;
4220+ struct btrfs_io_context * bioc = NULL ;
4221+ struct bio * bio ;
4222+ struct scrub_stripe * stripe ;
4223+ bool all_empty = true;
4224+ const int data_stripes = nr_data_stripes (map );
4225+ unsigned long extent_bitmap = 0 ;
4226+ u64 length = data_stripes << BTRFS_STRIPE_LEN_SHIFT ;
4227+ int ret ;
4228+
4229+ ASSERT (sctx -> raid56_data_stripes );
4230+
4231+ for (int i = 0 ; i < data_stripes ; i ++ ) {
4232+ int stripe_index ;
4233+ int rot ;
4234+ u64 physical ;
4235+
4236+ stripe = & sctx -> raid56_data_stripes [i ];
4237+ rot = div_u64 (full_stripe_start - bg -> start ,
4238+ data_stripes ) >> BTRFS_STRIPE_LEN_SHIFT ;
4239+ stripe_index = (i + rot ) % map -> num_stripes ;
4240+ physical = map -> stripes [stripe_index ].physical +
4241+ (rot << BTRFS_STRIPE_LEN_SHIFT );
4242+
4243+ scrub_reset_stripe (stripe );
4244+ set_bit (SCRUB_STRIPE_FLAG_NO_REPORT , & stripe -> state );
4245+ ret = scrub_find_fill_first_stripe (bg ,
4246+ map -> stripes [stripe_index ].dev , physical , 1 ,
4247+ full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT ),
4248+ BTRFS_STRIPE_LEN , stripe );
4249+ if (ret < 0 )
4250+ goto out ;
4251+ /*
4252+ * No extent in this data stripe, need to manually mark them
4253+ * initialized to make later read submission happy.
4254+ */
4255+ if (ret > 0 ) {
4256+ stripe -> logical = full_stripe_start +
4257+ (i << BTRFS_STRIPE_LEN_SHIFT );
4258+ stripe -> dev = map -> stripes [stripe_index ].dev ;
4259+ stripe -> mirror_num = 1 ;
4260+ set_bit (SCRUB_STRIPE_FLAG_INITIALIZED , & stripe -> state );
4261+ }
4262+ }
4263+
4264+ /* Check if all data stripes are empty. */
4265+ for (int i = 0 ; i < data_stripes ; i ++ ) {
4266+ stripe = & sctx -> raid56_data_stripes [i ];
4267+ if (!bitmap_empty (& stripe -> extent_sector_bitmap , stripe -> nr_sectors )) {
4268+ all_empty = false;
4269+ break ;
4270+ }
4271+ }
4272+ if (all_empty ) {
4273+ ret = 0 ;
4274+ goto out ;
4275+ }
4276+
4277+ for (int i = 0 ; i < data_stripes ; i ++ ) {
4278+ stripe = & sctx -> raid56_data_stripes [i ];
4279+ scrub_submit_initial_read (sctx , stripe );
4280+ }
4281+ for (int i = 0 ; i < data_stripes ; i ++ ) {
4282+ stripe = & sctx -> raid56_data_stripes [i ];
4283+
4284+ wait_event (stripe -> repair_wait ,
4285+ test_bit (SCRUB_STRIPE_FLAG_REPAIR_DONE , & stripe -> state ));
4286+ }
4287+ /* For now, no zoned support for RAID56. */
4288+ ASSERT (!btrfs_is_zoned (sctx -> fs_info ));
4289+
4290+ /* Writeback for the repaired sectors. */
4291+ for (int i = 0 ; i < data_stripes ; i ++ ) {
4292+ unsigned long repaired ;
4293+
4294+ stripe = & sctx -> raid56_data_stripes [i ];
4295+
4296+ bitmap_andnot (& repaired , & stripe -> init_error_bitmap ,
4297+ & stripe -> error_bitmap , stripe -> nr_sectors );
4298+ scrub_write_sectors (sctx , stripe , repaired , false);
4299+ }
4300+
4301+ /* Wait for the above writebacks to finish. */
4302+ for (int i = 0 ; i < data_stripes ; i ++ ) {
4303+ stripe = & sctx -> raid56_data_stripes [i ];
4304+
4305+ wait_scrub_stripe_io (stripe );
4306+ }
4307+
4308+ /*
4309+ * Now all data stripes are properly verified. Check if we have any
4310+ * unrepaired, if so abort immediately or we could further corrupt the
4311+ * P/Q stripes.
4312+ *
4313+ * During the loop, also populate extent_bitmap.
4314+ */
4315+ for (int i = 0 ; i < data_stripes ; i ++ ) {
4316+ unsigned long error ;
4317+
4318+ stripe = & sctx -> raid56_data_stripes [i ];
4319+
4320+ /*
4321+ * We should only check the errors where there is an extent.
4322+ * As we may hit an empty data stripe while it's missing.
4323+ */
4324+ bitmap_and (& error , & stripe -> error_bitmap ,
4325+ & stripe -> extent_sector_bitmap , stripe -> nr_sectors );
4326+ if (!bitmap_empty (& error , stripe -> nr_sectors )) {
4327+ btrfs_err (fs_info ,
4328+ "unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl" ,
4329+ full_stripe_start , i , stripe -> nr_sectors ,
4330+ & error );
4331+ ret = - EIO ;
4332+ goto out ;
4333+ }
4334+ bitmap_or (& extent_bitmap , & extent_bitmap ,
4335+ & stripe -> extent_sector_bitmap , stripe -> nr_sectors );
4336+ }
4337+
4338+ /* Now we can check and regenerate the P/Q stripe. */
4339+ bio = bio_alloc (NULL , 1 , REQ_OP_READ , GFP_NOFS );
4340+ bio -> bi_iter .bi_sector = full_stripe_start >> SECTOR_SHIFT ;
4341+ bio -> bi_private = & io_done ;
4342+ bio -> bi_end_io = raid56_scrub_wait_endio ;
4343+
4344+ btrfs_bio_counter_inc_blocked (fs_info );
4345+ ret = btrfs_map_sblock (fs_info , BTRFS_MAP_WRITE , full_stripe_start ,
4346+ & length , & bioc );
4347+ if (ret < 0 ) {
4348+ btrfs_put_bioc (bioc );
4349+ btrfs_bio_counter_dec (fs_info );
4350+ goto out ;
4351+ }
4352+ rbio = raid56_parity_alloc_scrub_rbio (bio , bioc , scrub_dev , & extent_bitmap ,
4353+ BTRFS_STRIPE_LEN >> fs_info -> sectorsize_bits );
4354+ btrfs_put_bioc (bioc );
4355+ if (!rbio ) {
4356+ ret = - ENOMEM ;
4357+ btrfs_bio_counter_dec (fs_info );
4358+ goto out ;
4359+ }
4360+ raid56_parity_submit_scrub_rbio (rbio );
4361+ wait_for_completion_io (& io_done );
4362+ ret = blk_status_to_errno (bio -> bi_status );
4363+ bio_put (bio );
4364+ btrfs_bio_counter_dec (fs_info );
4365+
4366+ out :
4367+ return ret ;
4368+ }
4369+
41984370/*
41994371 * Scrub one range which can only has simple mirror based profile.
42004372 * (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in
@@ -4368,7 +4540,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
43684540 /* Offset inside the chunk */
43694541 u64 offset ;
43704542 u64 stripe_logical ;
4371- u64 stripe_end ;
43724543 int stop_loop = 0 ;
43734544
43744545 wait_event (sctx -> list_wait ,
@@ -4383,6 +4554,26 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
43834554 sctx -> flush_all_writes = true;
43844555 }
43854556
4557+ /* Prepare the extra data stripes used by RAID56. */
4558+ if (profile & BTRFS_BLOCK_GROUP_RAID56_MASK ) {
4559+ ASSERT (sctx -> raid56_data_stripes == NULL );
4560+
4561+ sctx -> raid56_data_stripes = kcalloc (nr_data_stripes (map ),
4562+ sizeof (struct scrub_stripe ),
4563+ GFP_KERNEL );
4564+ if (!sctx -> raid56_data_stripes ) {
4565+ ret = - ENOMEM ;
4566+ goto out ;
4567+ }
4568+ for (int i = 0 ; i < nr_data_stripes (map ); i ++ ) {
4569+ ret = init_scrub_stripe (fs_info ,
4570+ & sctx -> raid56_data_stripes [i ]);
4571+ if (ret < 0 )
4572+ goto out ;
4573+ sctx -> raid56_data_stripes [i ].bg = bg ;
4574+ sctx -> raid56_data_stripes [i ].sctx = sctx ;
4575+ }
4576+ }
43864577 /*
43874578 * There used to be a big double loop to handle all profiles using the
43884579 * same routine, which grows larger and more gross over time.
@@ -4436,10 +4627,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
44364627 if (ret ) {
44374628 /* it is parity strip */
44384629 stripe_logical += chunk_logical ;
4439- stripe_end = stripe_logical + increment ;
4440- ret = scrub_raid56_parity (sctx , map , scrub_dev ,
4441- stripe_logical ,
4442- stripe_end );
4630+ ret = scrub_raid56_parity_stripe (sctx , scrub_dev , bg ,
4631+ map , stripe_logical );
44434632 if (ret )
44444633 goto out ;
44454634 goto next ;
@@ -4477,6 +4666,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
44774666 scrub_wr_submit (sctx );
44784667 mutex_unlock (& sctx -> wr_lock );
44794668 flush_scrub_stripes (sctx );
4669+ if (sctx -> raid56_data_stripes ) {
4670+ for (int i = 0 ; i < nr_data_stripes (map ); i ++ )
4671+ release_scrub_stripe (& sctx -> raid56_data_stripes [i ]);
4672+ kfree (sctx -> raid56_data_stripes );
4673+ sctx -> raid56_data_stripes = NULL ;
4674+ }
44804675
44814676 if (sctx -> is_dev_replace && ret >= 0 ) {
44824677 int ret2 ;
0 commit comments