@@ -121,6 +121,7 @@ struct scrub_stripe {
121121
122122 atomic_t pending_io ;
123123 wait_queue_head_t io_wait ;
124+ wait_queue_head_t repair_wait ;
124125
125126 /*
126127 * Indicate the states of the stripe. Bits are defined in
@@ -156,6 +157,8 @@ struct scrub_stripe {
156157 * group.
157158 */
158159 u8 * csums ;
160+
161+ struct work_struct work ;
159162};
160163
161164struct scrub_recover {
@@ -381,6 +384,7 @@ int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe
381384 stripe -> state = 0 ;
382385
383386 init_waitqueue_head (& stripe -> io_wait );
387+ init_waitqueue_head (& stripe -> repair_wait );
384388 atomic_set (& stripe -> pending_io , 0 );
385389
386390 ret = btrfs_alloc_page_array (SCRUB_STRIPE_PAGES , stripe -> pages );
@@ -403,7 +407,7 @@ int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe
403407 return - ENOMEM ;
404408}
405409
406- void wait_scrub_stripe_io (struct scrub_stripe * stripe )
410+ static void wait_scrub_stripe_io (struct scrub_stripe * stripe )
407411{
408412 wait_event (stripe -> io_wait , atomic_read (& stripe -> pending_io ) == 0 );
409413}
@@ -2327,7 +2331,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
23272331}
23282332
23292333/* Verify specified sectors of a stripe. */
2330- void scrub_verify_one_stripe (struct scrub_stripe * stripe , unsigned long bitmap )
2334+ static void scrub_verify_one_stripe (struct scrub_stripe * stripe , unsigned long bitmap )
23312335{
23322336 struct btrfs_fs_info * fs_info = stripe -> bg -> fs_info ;
23332337 const u32 sectors_per_tree = fs_info -> nodesize >> fs_info -> sectorsize_bits ;
@@ -2340,6 +2344,203 @@ void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap)
23402344 }
23412345}
23422346
2347+ static int calc_sector_number (struct scrub_stripe * stripe , struct bio_vec * first_bvec )
2348+ {
2349+ int i ;
2350+
2351+ for (i = 0 ; i < stripe -> nr_sectors ; i ++ ) {
2352+ if (scrub_stripe_get_page (stripe , i ) == first_bvec -> bv_page &&
2353+ scrub_stripe_get_page_offset (stripe , i ) == first_bvec -> bv_offset )
2354+ break ;
2355+ }
2356+ ASSERT (i < stripe -> nr_sectors );
2357+ return i ;
2358+ }
2359+
2360+ /*
2361+ * Repair read is different to the regular read:
2362+ *
2363+ * - Only reads the failed sectors
2364+ * - May have extra blocksize limits
2365+ */
2366+ static void scrub_repair_read_endio (struct btrfs_bio * bbio )
2367+ {
2368+ struct scrub_stripe * stripe = bbio -> private ;
2369+ struct btrfs_fs_info * fs_info = stripe -> bg -> fs_info ;
2370+ struct bio_vec * bvec ;
2371+ int sector_nr = calc_sector_number (stripe , bio_first_bvec_all (& bbio -> bio ));
2372+ u32 bio_size = 0 ;
2373+ int i ;
2374+
2375+ ASSERT (sector_nr < stripe -> nr_sectors );
2376+
2377+ bio_for_each_bvec_all (bvec , & bbio -> bio , i )
2378+ bio_size += bvec -> bv_len ;
2379+
2380+ if (bbio -> bio .bi_status ) {
2381+ bitmap_set (& stripe -> io_error_bitmap , sector_nr ,
2382+ bio_size >> fs_info -> sectorsize_bits );
2383+ bitmap_set (& stripe -> error_bitmap , sector_nr ,
2384+ bio_size >> fs_info -> sectorsize_bits );
2385+ } else {
2386+ bitmap_clear (& stripe -> io_error_bitmap , sector_nr ,
2387+ bio_size >> fs_info -> sectorsize_bits );
2388+ }
2389+ bio_put (& bbio -> bio );
2390+ if (atomic_dec_and_test (& stripe -> pending_io ))
2391+ wake_up (& stripe -> io_wait );
2392+ }
2393+
2394+ static int calc_next_mirror (int mirror , int num_copies )
2395+ {
2396+ ASSERT (mirror <= num_copies );
2397+ return (mirror + 1 > num_copies ) ? 1 : mirror + 1 ;
2398+ }
2399+
2400+ static void scrub_stripe_submit_repair_read (struct scrub_stripe * stripe ,
2401+ int mirror , int blocksize , bool wait )
2402+ {
2403+ struct btrfs_fs_info * fs_info = stripe -> bg -> fs_info ;
2404+ struct btrfs_bio * bbio = NULL ;
2405+ const unsigned long old_error_bitmap = stripe -> error_bitmap ;
2406+ int i ;
2407+
2408+ ASSERT (stripe -> mirror_num >= 1 );
2409+ ASSERT (atomic_read (& stripe -> pending_io ) == 0 );
2410+
2411+ for_each_set_bit (i , & old_error_bitmap , stripe -> nr_sectors ) {
2412+ struct page * page ;
2413+ int pgoff ;
2414+ int ret ;
2415+
2416+ page = scrub_stripe_get_page (stripe , i );
2417+ pgoff = scrub_stripe_get_page_offset (stripe , i );
2418+
2419+ /* The current sector cannot be merged, submit the bio. */
2420+ if (bbio && ((i > 0 && !test_bit (i - 1 , & stripe -> error_bitmap )) ||
2421+ bbio -> bio .bi_iter .bi_size >= blocksize )) {
2422+ ASSERT (bbio -> bio .bi_iter .bi_size );
2423+ atomic_inc (& stripe -> pending_io );
2424+ btrfs_submit_bio (bbio , mirror );
2425+ if (wait )
2426+ wait_scrub_stripe_io (stripe );
2427+ bbio = NULL ;
2428+ }
2429+
2430+ if (!bbio ) {
2431+ bbio = btrfs_bio_alloc (stripe -> nr_sectors , REQ_OP_READ ,
2432+ fs_info , scrub_repair_read_endio , stripe );
2433+ bbio -> bio .bi_iter .bi_sector = (stripe -> logical +
2434+ (i << fs_info -> sectorsize_bits )) >> SECTOR_SHIFT ;
2435+ }
2436+
2437+ ret = bio_add_page (& bbio -> bio , page , fs_info -> sectorsize , pgoff );
2438+ ASSERT (ret == fs_info -> sectorsize );
2439+ }
2440+ if (bbio ) {
2441+ ASSERT (bbio -> bio .bi_iter .bi_size );
2442+ atomic_inc (& stripe -> pending_io );
2443+ btrfs_submit_bio (bbio , mirror );
2444+ if (wait )
2445+ wait_scrub_stripe_io (stripe );
2446+ }
2447+ }
2448+
2449+ /*
2450+ * The main entrance for all read related scrub work, including:
2451+ *
2452+ * - Wait for the initial read to finish
2453+ * - Verify and locate any bad sectors
2454+ * - Go through the remaining mirrors and try to read as large blocksize as
2455+ * possible
2456+ * - Go through all mirrors (including the failed mirror) sector-by-sector
2457+ *
2458+ * Writeback does not happen here, it needs extra synchronization.
2459+ */
2460+ static void scrub_stripe_read_repair_worker (struct work_struct * work )
2461+ {
2462+ struct scrub_stripe * stripe = container_of (work , struct scrub_stripe , work );
2463+ struct btrfs_fs_info * fs_info = stripe -> bg -> fs_info ;
2464+ int num_copies = btrfs_num_copies (fs_info , stripe -> bg -> start ,
2465+ stripe -> bg -> length );
2466+ int mirror ;
2467+ int i ;
2468+
2469+ ASSERT (stripe -> mirror_num > 0 );
2470+
2471+ wait_scrub_stripe_io (stripe );
2472+ scrub_verify_one_stripe (stripe , stripe -> extent_sector_bitmap );
2473+ /* Save the initial failed bitmap for later repair and report usage. */
2474+ stripe -> init_error_bitmap = stripe -> error_bitmap ;
2475+
2476+ if (bitmap_empty (& stripe -> init_error_bitmap , stripe -> nr_sectors ))
2477+ goto out ;
2478+
2479+ /*
2480+ * Try all remaining mirrors.
2481+ *
2482+ * Here we still try to read as large block as possible, as this is
2483+ * faster and we have extra safety nets to rely on.
2484+ */
2485+ for (mirror = calc_next_mirror (stripe -> mirror_num , num_copies );
2486+ mirror != stripe -> mirror_num ;
2487+ mirror = calc_next_mirror (mirror , num_copies )) {
2488+ const unsigned long old_error_bitmap = stripe -> error_bitmap ;
2489+
2490+ scrub_stripe_submit_repair_read (stripe , mirror ,
2491+ BTRFS_STRIPE_LEN , false);
2492+ wait_scrub_stripe_io (stripe );
2493+ scrub_verify_one_stripe (stripe , old_error_bitmap );
2494+ if (bitmap_empty (& stripe -> error_bitmap , stripe -> nr_sectors ))
2495+ goto out ;
2496+ }
2497+
2498+ /*
2499+ * Last safety net, try re-checking all mirrors, including the failed
2500+ * one, sector-by-sector.
2501+ *
2502+ * As if one sector failed the drive's internal csum, the whole read
2503+ * containing the offending sector would be marked as error.
2504+ * Thus here we do sector-by-sector read.
2505+ *
2506+ * This can be slow, thus we only try it as the last resort.
2507+ */
2508+
2509+ for (i = 0 , mirror = stripe -> mirror_num ;
2510+ i < num_copies ;
2511+ i ++ , mirror = calc_next_mirror (mirror , num_copies )) {
2512+ const unsigned long old_error_bitmap = stripe -> error_bitmap ;
2513+
2514+ scrub_stripe_submit_repair_read (stripe , mirror ,
2515+ fs_info -> sectorsize , true);
2516+ wait_scrub_stripe_io (stripe );
2517+ scrub_verify_one_stripe (stripe , old_error_bitmap );
2518+ if (bitmap_empty (& stripe -> error_bitmap , stripe -> nr_sectors ))
2519+ goto out ;
2520+ }
2521+ out :
2522+ set_bit (SCRUB_STRIPE_FLAG_REPAIR_DONE , & stripe -> state );
2523+ wake_up (& stripe -> repair_wait );
2524+ }
2525+
2526+ void scrub_read_endio (struct btrfs_bio * bbio )
2527+ {
2528+ struct scrub_stripe * stripe = bbio -> private ;
2529+
2530+ if (bbio -> bio .bi_status ) {
2531+ bitmap_set (& stripe -> io_error_bitmap , 0 , stripe -> nr_sectors );
2532+ bitmap_set (& stripe -> error_bitmap , 0 , stripe -> nr_sectors );
2533+ } else {
2534+ bitmap_clear (& stripe -> io_error_bitmap , 0 , stripe -> nr_sectors );
2535+ }
2536+ bio_put (& bbio -> bio );
2537+ if (atomic_dec_and_test (& stripe -> pending_io )) {
2538+ wake_up (& stripe -> io_wait );
2539+ INIT_WORK (& stripe -> work , scrub_stripe_read_repair_worker );
2540+ queue_work (stripe -> bg -> fs_info -> scrub_workers , & stripe -> work );
2541+ }
2542+ }
2543+
23432544static int scrub_checksum_tree_block (struct scrub_block * sblock )
23442545{
23452546 struct scrub_ctx * sctx = sblock -> sctx ;
0 commit comments