4141struct scrub_ctx ;
4242
4343/*
44- * The following three values only influence the performance.
44+ * The following value only influences the performance.
4545 *
46- * The last one configures the number of parallel and outstanding I/O
47- * operations. The first one configures an upper limit for the number
48- * of (dynamically allocated) pages that are added to a bio.
46+ * This determines the batch size for stripe submitted in one go.
4947 */
50- #define SCRUB_SECTORS_PER_BIO 32 /* 128KiB per bio for 4KiB pages */
51- #define SCRUB_BIOS_PER_SCTX 64 /* 8MiB per device in flight for 4KiB pages */
5248#define SCRUB_STRIPES_PER_SCTX 8 /* That would be 8 64K stripe per-device. */
5349
5450/*
@@ -57,19 +53,6 @@ struct scrub_ctx;
5753 */
5854#define SCRUB_MAX_SECTORS_PER_BLOCK (BTRFS_MAX_METADATA_BLOCKSIZE / SZ_4K)
5955
60- #define SCRUB_MAX_PAGES (DIV_ROUND_UP(BTRFS_MAX_METADATA_BLOCKSIZE, PAGE_SIZE))
61-
62- /*
63- * Maximum number of mirrors that can be available for all profiles counting
64- * the target device of dev-replace as one. During an active device replace
65- * procedure, the target device of the copy operation is a mirror for the
66- * filesystem data as well that can be used to read data in order to repair
67- * read errors on other disks.
68- *
69- * Current value is derived from RAID1C4 with 4 copies.
70- */
71- #define BTRFS_MAX_MIRRORS (4 + 1)
72-
7356/* Represent one sector and its needed info to verify the content. */
7457struct scrub_sector_verification {
7558 bool is_metadata ;
@@ -182,31 +165,12 @@ struct scrub_stripe {
182165 struct work_struct work ;
183166};
184167
185- struct scrub_bio {
186- int index ;
187- struct scrub_ctx * sctx ;
188- struct btrfs_device * dev ;
189- struct bio * bio ;
190- blk_status_t status ;
191- u64 logical ;
192- u64 physical ;
193- int sector_count ;
194- int next_free ;
195- struct work_struct work ;
196- };
197-
198168struct scrub_ctx {
199- struct scrub_bio * bios [SCRUB_BIOS_PER_SCTX ];
200169 struct scrub_stripe stripes [SCRUB_STRIPES_PER_SCTX ];
201170 struct scrub_stripe * raid56_data_stripes ;
202171 struct btrfs_fs_info * fs_info ;
203172 int first_free ;
204- int curr ;
205173 int cur_stripe ;
206- atomic_t bios_in_flight ;
207- atomic_t workers_pending ;
208- spinlock_t list_lock ;
209- wait_queue_head_t list_wait ;
210174 struct list_head csum_list ;
211175 atomic_t cancel_req ;
212176 int readonly ;
@@ -305,22 +269,8 @@ static void wait_scrub_stripe_io(struct scrub_stripe *stripe)
305269 wait_event (stripe -> io_wait , atomic_read (& stripe -> pending_io ) == 0 );
306270}
307271
308- static void scrub_bio_end_io_worker (struct work_struct * work );
309272static void scrub_put_ctx (struct scrub_ctx * sctx );
310273
311- static void scrub_pending_bio_inc (struct scrub_ctx * sctx )
312- {
313- refcount_inc (& sctx -> refs );
314- atomic_inc (& sctx -> bios_in_flight );
315- }
316-
317- static void scrub_pending_bio_dec (struct scrub_ctx * sctx )
318- {
319- atomic_dec (& sctx -> bios_in_flight );
320- wake_up (& sctx -> list_wait );
321- scrub_put_ctx (sctx );
322- }
323-
324274static void __scrub_blocked_if_needed (struct btrfs_fs_info * fs_info )
325275{
326276 while (atomic_read (& fs_info -> scrub_pause_req )) {
@@ -371,21 +321,6 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
371321 if (!sctx )
372322 return ;
373323
374- /* this can happen when scrub is cancelled */
375- if (sctx -> curr != -1 ) {
376- struct scrub_bio * sbio = sctx -> bios [sctx -> curr ];
377-
378- bio_put (sbio -> bio );
379- }
380-
381- for (i = 0 ; i < SCRUB_BIOS_PER_SCTX ; ++ i ) {
382- struct scrub_bio * sbio = sctx -> bios [i ];
383-
384- if (!sbio )
385- break ;
386- kfree (sbio );
387- }
388-
389324 for (i = 0 ; i < SCRUB_STRIPES_PER_SCTX ; i ++ )
390325 release_scrub_stripe (& sctx -> stripes [i ]);
391326
@@ -410,28 +345,8 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
410345 goto nomem ;
411346 refcount_set (& sctx -> refs , 1 );
412347 sctx -> is_dev_replace = is_dev_replace ;
413- sctx -> sectors_per_bio = SCRUB_SECTORS_PER_BIO ;
414- sctx -> curr = -1 ;
415348 sctx -> fs_info = fs_info ;
416349 INIT_LIST_HEAD (& sctx -> csum_list );
417- for (i = 0 ; i < SCRUB_BIOS_PER_SCTX ; ++ i ) {
418- struct scrub_bio * sbio ;
419-
420- sbio = kzalloc (sizeof (* sbio ), GFP_KERNEL );
421- if (!sbio )
422- goto nomem ;
423- sctx -> bios [i ] = sbio ;
424-
425- sbio -> index = i ;
426- sbio -> sctx = sctx ;
427- sbio -> sector_count = 0 ;
428- INIT_WORK (& sbio -> work , scrub_bio_end_io_worker );
429-
430- if (i != SCRUB_BIOS_PER_SCTX - 1 )
431- sctx -> bios [i ]-> next_free = i + 1 ;
432- else
433- sctx -> bios [i ]-> next_free = -1 ;
434- }
435350 for (i = 0 ; i < SCRUB_STRIPES_PER_SCTX ; i ++ ) {
436351 int ret ;
437352
@@ -441,13 +356,9 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
441356 sctx -> stripes [i ].sctx = sctx ;
442357 }
443358 sctx -> first_free = 0 ;
444- atomic_set (& sctx -> bios_in_flight , 0 );
445- atomic_set (& sctx -> workers_pending , 0 );
446359 atomic_set (& sctx -> cancel_req , 0 );
447360
448- spin_lock_init (& sctx -> list_lock );
449361 spin_lock_init (& sctx -> stat_lock );
450- init_waitqueue_head (& sctx -> list_wait );
451362 sctx -> throttle_deadline = 0 ;
452363
453364 mutex_init (& sctx -> wr_lock );
@@ -1286,6 +1197,10 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
12861197 }
12871198}
12881199
1200+ /*
1201+ * Throttling of IO submission, bandwidth-limit based, the timeslice is 1
1202+ * second. Limit can be set via /sys/fs/UUID/devinfo/devid/scrub_speed_max.
1203+ */
12891204static void scrub_throttle_dev_io (struct scrub_ctx * sctx , struct btrfs_device * device ,
12901205 unsigned int bio_size )
12911206{
@@ -1338,112 +1253,6 @@ static void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *d
13381253 sctx -> throttle_deadline = 0 ;
13391254}
13401255
1341- /*
1342- * Throttling of IO submission, bandwidth-limit based, the timeslice is 1
1343- * second. Limit can be set via /sys/fs/UUID/devinfo/devid/scrub_speed_max.
1344- */
1345- static void scrub_throttle (struct scrub_ctx * sctx )
1346- {
1347- struct scrub_bio * sbio = sctx -> bios [sctx -> curr ];
1348-
1349- scrub_throttle_dev_io (sctx , sbio -> dev , sbio -> bio -> bi_iter .bi_size );
1350- }
1351-
1352- static void scrub_submit (struct scrub_ctx * sctx )
1353- {
1354- struct scrub_bio * sbio ;
1355-
1356- if (sctx -> curr == -1 )
1357- return ;
1358-
1359- scrub_throttle (sctx );
1360-
1361- sbio = sctx -> bios [sctx -> curr ];
1362- sctx -> curr = -1 ;
1363- scrub_pending_bio_inc (sctx );
1364- btrfsic_check_bio (sbio -> bio );
1365- submit_bio (sbio -> bio );
1366- }
1367-
1368- static void scrub_bio_end_io_worker (struct work_struct * work )
1369- {
1370- struct scrub_bio * sbio = container_of (work , struct scrub_bio , work );
1371- struct scrub_ctx * sctx = sbio -> sctx ;
1372-
1373- ASSERT (sbio -> sector_count <= SCRUB_SECTORS_PER_BIO );
1374-
1375- bio_put (sbio -> bio );
1376- sbio -> bio = NULL ;
1377- spin_lock (& sctx -> list_lock );
1378- sbio -> next_free = sctx -> first_free ;
1379- sctx -> first_free = sbio -> index ;
1380- spin_unlock (& sctx -> list_lock );
1381-
1382- scrub_pending_bio_dec (sctx );
1383- }
1384-
1385- static void drop_csum_range (struct scrub_ctx * sctx , struct btrfs_ordered_sum * sum )
1386- {
1387- sctx -> stat .csum_discards += sum -> len >> sctx -> fs_info -> sectorsize_bits ;
1388- list_del (& sum -> list );
1389- kfree (sum );
1390- }
1391-
1392- /*
1393- * Find the desired csum for range [logical, logical + sectorsize), and store
1394- * the csum into @csum.
1395- *
1396- * The search source is sctx->csum_list, which is a pre-populated list
1397- * storing bytenr ordered csum ranges. We're responsible to cleanup any range
1398- * that is before @logical.
1399- *
1400- * Return 0 if there is no csum for the range.
1401- * Return 1 if there is csum for the range and copied to @csum.
1402- */
1403- int scrub_find_csum (struct scrub_ctx * sctx , u64 logical , u8 * csum )
1404- {
1405- bool found = false;
1406-
1407- while (!list_empty (& sctx -> csum_list )) {
1408- struct btrfs_ordered_sum * sum = NULL ;
1409- unsigned long index ;
1410- unsigned long num_sectors ;
1411-
1412- sum = list_first_entry (& sctx -> csum_list ,
1413- struct btrfs_ordered_sum , list );
1414- /* The current csum range is beyond our range, no csum found */
1415- if (sum -> bytenr > logical )
1416- break ;
1417-
1418- /*
1419- * The current sum is before our bytenr, since scrub is always
1420- * done in bytenr order, the csum will never be used anymore,
1421- * clean it up so that later calls won't bother with the range,
1422- * and continue search the next range.
1423- */
1424- if (sum -> bytenr + sum -> len <= logical ) {
1425- drop_csum_range (sctx , sum );
1426- continue ;
1427- }
1428-
1429- /* Now the csum range covers our bytenr, copy the csum */
1430- found = true;
1431- index = (logical - sum -> bytenr ) >> sctx -> fs_info -> sectorsize_bits ;
1432- num_sectors = sum -> len >> sctx -> fs_info -> sectorsize_bits ;
1433-
1434- memcpy (csum , sum -> sums + index * sctx -> fs_info -> csum_size ,
1435- sctx -> fs_info -> csum_size );
1436-
1437- /* Cleanup the range if we're at the end of the csum range */
1438- if (index == num_sectors - 1 )
1439- drop_csum_range (sctx , sum );
1440- break ;
1441- }
1442- if (!found )
1443- return 0 ;
1444- return 1 ;
1445- }
1446-
14471256/*
14481257 * Given a physical address, this will calculate it's
14491258 * logical offset. if this is a parity stripe, it will return
@@ -1624,8 +1433,6 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,
16241433 if (!btrfs_is_zoned (fs_info ))
16251434 return 0 ;
16261435
1627- wait_event (sctx -> list_wait , atomic_read (& sctx -> bios_in_flight ) == 0 );
1628-
16291436 mutex_lock (& sctx -> wr_lock );
16301437 if (sctx -> write_pointer < physical_end ) {
16311438 ret = btrfs_sync_zone_write_pointer (sctx -> wr_tgtdev , logical ,
@@ -2153,11 +1960,6 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
21531960 /* Paused? */
21541961 if (atomic_read (& fs_info -> scrub_pause_req )) {
21551962 /* Push queued extents */
2156- scrub_submit (sctx );
2157- mutex_lock (& sctx -> wr_lock );
2158- mutex_unlock (& sctx -> wr_lock );
2159- wait_event (sctx -> list_wait ,
2160- atomic_read (& sctx -> bios_in_flight ) == 0 );
21611963 scrub_blocked_if_needed (fs_info );
21621964 }
21631965 /* Block group removed? */
@@ -2285,8 +2087,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
22852087 u64 stripe_logical ;
22862088 int stop_loop = 0 ;
22872089
2288- wait_event (sctx -> list_wait ,
2289- atomic_read (& sctx -> bios_in_flight ) == 0 );
22902090 scrub_blocked_if_needed (fs_info );
22912091
22922092 if (sctx -> is_dev_replace &&
@@ -2402,8 +2202,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
24022202 break ;
24032203 }
24042204out :
2405- /* push queued extents */
2406- scrub_submit (sctx );
24072205 flush_scrub_stripes (sctx );
24082206 if (sctx -> raid56_data_stripes ) {
24092207 for (int i = 0 ; i < nr_data_stripes (map ); i ++ )
@@ -2728,34 +2526,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
27282526
27292527 ret = scrub_chunk (sctx , cache , scrub_dev , found_key .offset ,
27302528 dev_extent_len );
2731-
2732- /*
2733- * flush, submit all pending read and write bios, afterwards
2734- * wait for them.
2735- * Note that in the dev replace case, a read request causes
2736- * write requests that are submitted in the read completion
2737- * worker. Therefore in the current situation, it is required
2738- * that all write requests are flushed, so that all read and
2739- * write requests are really completed when bios_in_flight
2740- * changes to 0.
2741- */
2742- scrub_submit (sctx );
2743-
2744- wait_event (sctx -> list_wait ,
2745- atomic_read (& sctx -> bios_in_flight ) == 0 );
2746-
2747- scrub_pause_on (fs_info );
2748-
2749- /*
2750- * must be called before we decrease @scrub_paused.
2751- * make sure we don't block transaction commit while
2752- * we are waiting pending workers finished.
2753- */
2754- wait_event (sctx -> list_wait ,
2755- atomic_read (& sctx -> workers_pending ) == 0 );
2756-
2757- scrub_pause_off (fs_info );
2758-
27592529 if (sctx -> is_dev_replace &&
27602530 !btrfs_finish_block_group_to_copy (dev_replace -> srcdev ,
27612531 cache , found_key .offset ))
@@ -3086,12 +2856,9 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
30862856 ret = scrub_enumerate_chunks (sctx , dev , start , end );
30872857 memalloc_nofs_restore (nofs_flag );
30882858
3089- wait_event (sctx -> list_wait , atomic_read (& sctx -> bios_in_flight ) == 0 );
30902859 atomic_dec (& fs_info -> scrubs_running );
30912860 wake_up (& fs_info -> scrub_pause_wait );
30922861
3093- wait_event (sctx -> list_wait , atomic_read (& sctx -> workers_pending ) == 0 );
3094-
30952862 if (progress )
30962863 memcpy (progress , & sctx -> stat , sizeof (* progress ));
30972864
0 commit comments