@@ -50,6 +50,7 @@ struct scrub_ctx;
5050 */
5151#define SCRUB_SECTORS_PER_BIO 32 /* 128KiB per bio for 4KiB pages */
5252#define SCRUB_BIOS_PER_SCTX 64 /* 8MiB per device in flight for 4KiB pages */
53+ #define SCRUB_STRIPES_PER_SCTX 8 /* That would be 8 64K stripe per-device. */
5354
5455/*
5556 * The following value times PAGE_SIZE needs to be large enough to match the
@@ -277,9 +278,11 @@ struct scrub_parity {
277278
278279struct scrub_ctx {
279280 struct scrub_bio * bios [SCRUB_BIOS_PER_SCTX ];
281+ struct scrub_stripe stripes [SCRUB_STRIPES_PER_SCTX ];
280282 struct btrfs_fs_info * fs_info ;
281283 int first_free ;
282284 int curr ;
285+ int cur_stripe ;
283286 atomic_t bios_in_flight ;
284287 atomic_t workers_pending ;
285288 spinlock_t list_lock ;
@@ -389,7 +392,8 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
389392 stripe -> state = 0 ;
390393}
391394
392- int init_scrub_stripe (struct btrfs_fs_info * fs_info , struct scrub_stripe * stripe )
395+ static int init_scrub_stripe (struct btrfs_fs_info * fs_info ,
396+ struct scrub_stripe * stripe )
393397{
394398 int ret ;
395399
@@ -895,6 +899,9 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
895899 kfree (sbio );
896900 }
897901
902+ for (i = 0 ; i < SCRUB_STRIPES_PER_SCTX ; i ++ )
903+ release_scrub_stripe (& sctx -> stripes [i ]);
904+
898905 kfree (sctx -> wr_curr_bio );
899906 scrub_free_csums (sctx );
900907 kfree (sctx );
@@ -939,6 +946,14 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
939946 else
940947 sctx -> bios [i ]-> next_free = -1 ;
941948 }
949+ for (i = 0 ; i < SCRUB_STRIPES_PER_SCTX ; i ++ ) {
950+ int ret ;
951+
952+ ret = init_scrub_stripe (fs_info , & sctx -> stripes [i ]);
953+ if (ret < 0 )
954+ goto nomem ;
955+ sctx -> stripes [i ].sctx = sctx ;
956+ }
942957 sctx -> first_free = 0 ;
943958 atomic_set (& sctx -> bios_in_flight , 0 );
944959 atomic_set (& sctx -> workers_pending , 0 );
@@ -2668,7 +2683,7 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
26682683 wake_up (& stripe -> repair_wait );
26692684}
26702685
2671- void scrub_read_endio (struct btrfs_bio * bbio )
2686+ static void scrub_read_endio (struct btrfs_bio * bbio )
26722687{
26732688 struct scrub_stripe * stripe = bbio -> private ;
26742689
@@ -2725,8 +2740,8 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
27252740 *
27262741 * - Handle dev-replace and read-repair writeback differently
27272742 */
2728- void scrub_write_sectors (struct scrub_ctx * sctx , struct scrub_stripe * stripe ,
2729- unsigned long write_bitmap , bool dev_replace )
2743+ static void scrub_write_sectors (struct scrub_ctx * sctx , struct scrub_stripe * stripe ,
2744+ unsigned long write_bitmap , bool dev_replace )
27302745{
27312746 struct btrfs_fs_info * fs_info = stripe -> bg -> fs_info ;
27322747 struct btrfs_bio * bbio = NULL ;
@@ -4294,10 +4309,11 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
42944309 * Return >0 if there is no such stripe in the specified range.
42954310 * Return <0 for error.
42964311 */
4297- int scrub_find_fill_first_stripe (struct btrfs_block_group * bg ,
4298- struct btrfs_device * dev , u64 physical ,
4299- int mirror_num , u64 logical_start ,
4300- u32 logical_len , struct scrub_stripe * stripe )
4312+ static int scrub_find_fill_first_stripe (struct btrfs_block_group * bg ,
4313+ struct btrfs_device * dev , u64 physical ,
4314+ int mirror_num , u64 logical_start ,
4315+ u32 logical_len ,
4316+ struct scrub_stripe * stripe )
43014317{
43024318 struct btrfs_fs_info * fs_info = bg -> fs_info ;
43034319 struct btrfs_root * extent_root = btrfs_extent_root (fs_info , bg -> start );
@@ -4406,6 +4422,159 @@ int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
44064422 return ret ;
44074423}
44084424
4425+ static void scrub_reset_stripe (struct scrub_stripe * stripe )
4426+ {
4427+ scrub_stripe_reset_bitmaps (stripe );
4428+
4429+ stripe -> nr_meta_extents = 0 ;
4430+ stripe -> nr_data_extents = 0 ;
4431+ stripe -> state = 0 ;
4432+
4433+ for (int i = 0 ; i < stripe -> nr_sectors ; i ++ ) {
4434+ stripe -> sectors [i ].is_metadata = false;
4435+ stripe -> sectors [i ].csum = NULL ;
4436+ stripe -> sectors [i ].generation = 0 ;
4437+ }
4438+ }
4439+
4440+ static void scrub_submit_initial_read (struct scrub_ctx * sctx ,
4441+ struct scrub_stripe * stripe )
4442+ {
4443+ struct btrfs_fs_info * fs_info = sctx -> fs_info ;
4444+ struct btrfs_bio * bbio ;
4445+ int mirror = stripe -> mirror_num ;
4446+
4447+ ASSERT (stripe -> bg );
4448+ ASSERT (stripe -> mirror_num > 0 );
4449+ ASSERT (test_bit (SCRUB_STRIPE_FLAG_INITIALIZED , & stripe -> state ));
4450+
4451+ bbio = btrfs_bio_alloc (SCRUB_STRIPE_PAGES , REQ_OP_READ , fs_info ,
4452+ scrub_read_endio , stripe );
4453+
4454+ /* Read the whole stripe. */
4455+ bbio -> bio .bi_iter .bi_sector = stripe -> logical >> SECTOR_SHIFT ;
4456+ for (int i = 0 ; i < BTRFS_STRIPE_LEN >> PAGE_SHIFT ; i ++ ) {
4457+ int ret ;
4458+
4459+ ret = bio_add_page (& bbio -> bio , stripe -> pages [i ], PAGE_SIZE , 0 );
4460+ /* We should have allocated enough bio vectors. */
4461+ ASSERT (ret == PAGE_SIZE );
4462+ }
4463+ atomic_inc (& stripe -> pending_io );
4464+
4465+ /*
4466+ * For dev-replace, either user asks to avoid the source dev, or
4467+ * the device is missing, we try the next mirror instead.
4468+ */
4469+ if (sctx -> is_dev_replace &&
4470+ (fs_info -> dev_replace .cont_reading_from_srcdev_mode ==
4471+ BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID ||
4472+ !stripe -> dev -> bdev )) {
4473+ int num_copies = btrfs_num_copies (fs_info , stripe -> bg -> start ,
4474+ stripe -> bg -> length );
4475+
4476+ mirror = calc_next_mirror (mirror , num_copies );
4477+ }
4478+ btrfs_submit_bio (bbio , mirror );
4479+ }
4480+
4481+ static void flush_scrub_stripes (struct scrub_ctx * sctx )
4482+ {
4483+ struct btrfs_fs_info * fs_info = sctx -> fs_info ;
4484+ struct scrub_stripe * stripe ;
4485+ const int nr_stripes = sctx -> cur_stripe ;
4486+
4487+ if (!nr_stripes )
4488+ return ;
4489+
4490+ ASSERT (test_bit (SCRUB_STRIPE_FLAG_INITIALIZED , & sctx -> stripes [0 ].state ));
4491+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4492+ stripe = & sctx -> stripes [i ];
4493+ scrub_submit_initial_read (sctx , stripe );
4494+ }
4495+
4496+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4497+ stripe = & sctx -> stripes [i ];
4498+
4499+ wait_event (stripe -> repair_wait ,
4500+ test_bit (SCRUB_STRIPE_FLAG_REPAIR_DONE , & stripe -> state ));
4501+ }
4502+
4503+ /*
4504+ * Submit the repaired sectors. For zoned case, we cannot do repair
4505+ * in-place, but queue the bg to be relocated.
4506+ */
4507+ if (btrfs_is_zoned (fs_info )) {
4508+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4509+ stripe = & sctx -> stripes [i ];
4510+
4511+ if (!bitmap_empty (& stripe -> error_bitmap , stripe -> nr_sectors )) {
4512+ btrfs_repair_one_zone (fs_info ,
4513+ sctx -> stripes [0 ].bg -> start );
4514+ break ;
4515+ }
4516+ }
4517+ } else {
4518+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4519+ unsigned long repaired ;
4520+
4521+ stripe = & sctx -> stripes [i ];
4522+
4523+ bitmap_andnot (& repaired , & stripe -> init_error_bitmap ,
4524+ & stripe -> error_bitmap , stripe -> nr_sectors );
4525+ scrub_write_sectors (sctx , stripe , repaired , false);
4526+ }
4527+ }
4528+
4529+ /* Submit for dev-replace. */
4530+ if (sctx -> is_dev_replace ) {
4531+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4532+ unsigned long good ;
4533+
4534+ stripe = & sctx -> stripes [i ];
4535+
4536+ ASSERT (stripe -> dev == fs_info -> dev_replace .srcdev );
4537+
4538+ bitmap_andnot (& good , & stripe -> extent_sector_bitmap ,
4539+ & stripe -> error_bitmap , stripe -> nr_sectors );
4540+ scrub_write_sectors (sctx , stripe , good , true);
4541+ }
4542+ }
4543+
4544+ /* Wait for the above writebacks to finish. */
4545+ for (int i = 0 ; i < nr_stripes ; i ++ ) {
4546+ stripe = & sctx -> stripes [i ];
4547+
4548+ wait_scrub_stripe_io (stripe );
4549+ scrub_reset_stripe (stripe );
4550+ }
4551+ sctx -> cur_stripe = 0 ;
4552+ }
4553+
4554+ int queue_scrub_stripe (struct scrub_ctx * sctx , struct btrfs_block_group * bg ,
4555+ struct btrfs_device * dev , int mirror_num ,
4556+ u64 logical , u32 length , u64 physical )
4557+ {
4558+ struct scrub_stripe * stripe ;
4559+ int ret ;
4560+
4561+ /* No available slot, submit all stripes and wait for them. */
4562+ if (sctx -> cur_stripe >= SCRUB_STRIPES_PER_SCTX )
4563+ flush_scrub_stripes (sctx );
4564+
4565+ stripe = & sctx -> stripes [sctx -> cur_stripe ];
4566+
4567+ /* We can queue one stripe using the remaining slot. */
4568+ scrub_reset_stripe (stripe );
4569+ ret = scrub_find_fill_first_stripe (bg , dev , physical , mirror_num ,
4570+ logical , length , stripe );
4571+ /* Either >0 as no more extents or <0 for error. */
4572+ if (ret )
4573+ return ret ;
4574+ sctx -> cur_stripe ++ ;
4575+ return 0 ;
4576+ }
4577+
44094578/*
44104579 * Scrub one range which can only has simple mirror based profile.
44114580 * (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in
0 commit comments