Skip to content

Commit c144c63

Browse files
Christoph Hellwigkdave
authored andcommitted
btrfs: repair all known bad mirrors
When there is more than a single level of redundancy there can also be multiple bad mirrors, and the current read repair code only repairs the last bad one. Restructure btrfs_repair_one_sector so that it records the originally failed mirror and the number of copies, and then repair all known bad copies until we reach the originally failed copy in clean_io_failure. Note that this also means the read repair reads will always start from the next bad mirror and not mirror 0. This fixes btrfs/265 in xfstests. Reviewed-by: Nikolay Borisov <nborisov@suse.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent d28beb3 commit c144c63

2 files changed

Lines changed: 61 additions & 66 deletions

File tree

fs/btrfs/extent_io.c

Lines changed: 60 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -2434,6 +2434,20 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num)
24342434
return ret;
24352435
}
24362436

2437+
static int next_mirror(const struct io_failure_record *failrec, int cur_mirror)
2438+
{
2439+
if (cur_mirror == failrec->num_copies)
2440+
return cur_mirror + 1 - failrec->num_copies;
2441+
return cur_mirror + 1;
2442+
}
2443+
2444+
static int prev_mirror(const struct io_failure_record *failrec, int cur_mirror)
2445+
{
2446+
if (cur_mirror == 1)
2447+
return failrec->num_copies;
2448+
return cur_mirror - 1;
2449+
}
2450+
24372451
/*
24382452
* each time an IO finishes, we do a fast check in the IO failure tree
24392453
* to see if we need to process or clean up an io_failure_record
@@ -2446,7 +2460,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
24462460
u64 private;
24472461
struct io_failure_record *failrec;
24482462
struct extent_state *state;
2449-
int num_copies;
2463+
int mirror;
24502464
int ret;
24512465

24522466
private = 0;
@@ -2470,20 +2484,19 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
24702484
EXTENT_LOCKED);
24712485
spin_unlock(&io_tree->lock);
24722486

2473-
if (state && state->start <= failrec->start &&
2474-
state->end >= failrec->start + failrec->len - 1) {
2475-
num_copies = btrfs_num_copies(fs_info, failrec->logical,
2476-
failrec->len);
2477-
if (num_copies > 1) {
2478-
repair_io_failure(fs_info, ino, start, failrec->len,
2479-
failrec->logical, page, pg_offset,
2480-
failrec->failed_mirror);
2481-
}
2482-
}
2487+
if (!state || state->start > failrec->start ||
2488+
state->end < failrec->start + failrec->len - 1)
2489+
goto out;
2490+
2491+
mirror = failrec->this_mirror;
2492+
do {
2493+
mirror = prev_mirror(failrec, mirror);
2494+
repair_io_failure(fs_info, ino, start, failrec->len,
2495+
failrec->logical, page, pg_offset, mirror);
2496+
} while (mirror != failrec->failed_mirror);
24832497

24842498
out:
24852499
free_io_failure(failure_tree, io_tree, failrec);
2486-
24872500
return 0;
24882501
}
24892502

@@ -2522,7 +2535,8 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
25222535
}
25232536

25242537
static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
2525-
u64 start)
2538+
u64 start,
2539+
int failed_mirror)
25262540
{
25272541
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
25282542
struct io_failure_record *failrec;
@@ -2544,7 +2558,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
25442558
* (e.g. with a list for failed_mirror) to make
25452559
* clean_io_failure() clean all those errors at once.
25462560
*/
2547-
2561+
ASSERT(failrec->this_mirror == failed_mirror);
2562+
ASSERT(failrec->len == fs_info->sectorsize);
25482563
return failrec;
25492564
}
25502565

@@ -2554,7 +2569,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
25542569

25552570
failrec->start = start;
25562571
failrec->len = sectorsize;
2557-
failrec->this_mirror = 0;
2572+
failrec->failed_mirror = failed_mirror;
2573+
failrec->this_mirror = failed_mirror;
25582574
failrec->compress_type = BTRFS_COMPRESS_NONE;
25592575

25602576
read_lock(&em_tree->lock);
@@ -2589,6 +2605,20 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
25892605
failrec->logical = logical;
25902606
free_extent_map(em);
25912607

2608+
failrec->num_copies = btrfs_num_copies(fs_info, logical, sectorsize);
2609+
if (failrec->num_copies == 1) {
2610+
/*
2611+
* We only have a single copy of the data, so don't bother with
2612+
* all the retry and error correction code that follows. No
2613+
* matter what the error is, it is very likely to persist.
2614+
*/
2615+
btrfs_debug(fs_info,
2616+
"cannot repair logical %llu num_copies %d",
2617+
failrec->logical, failrec->num_copies);
2618+
kfree(failrec);
2619+
return ERR_PTR(-EIO);
2620+
}
2621+
25922622
/* Set the bits in the private failure tree */
25932623
ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
25942624
EXTENT_LOCKED | EXTENT_DIRTY);
@@ -2605,54 +2635,6 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
26052635
return failrec;
26062636
}
26072637

2608-
static bool btrfs_check_repairable(struct inode *inode,
2609-
struct io_failure_record *failrec,
2610-
int failed_mirror)
2611-
{
2612-
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2613-
int num_copies;
2614-
2615-
num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
2616-
if (num_copies == 1) {
2617-
/*
2618-
* we only have a single copy of the data, so don't bother with
2619-
* all the retry and error correction code that follows. no
2620-
* matter what the error is, it is very likely to persist.
2621-
*/
2622-
btrfs_debug(fs_info,
2623-
"Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
2624-
num_copies, failrec->this_mirror, failed_mirror);
2625-
return false;
2626-
}
2627-
2628-
/* The failure record should only contain one sector */
2629-
ASSERT(failrec->len == fs_info->sectorsize);
2630-
2631-
/*
2632-
* There are two premises:
2633-
* a) deliver good data to the caller
2634-
* b) correct the bad sectors on disk
2635-
*
2636-
* Since we're only doing repair for one sector, we only need to get
2637-
* a good copy of the failed sector and if we succeed, we have setup
2638-
* everything for repair_io_failure to do the rest for us.
2639-
*/
2640-
ASSERT(failed_mirror);
2641-
failrec->failed_mirror = failed_mirror;
2642-
failrec->this_mirror++;
2643-
if (failrec->this_mirror == failed_mirror)
2644-
failrec->this_mirror++;
2645-
2646-
if (failrec->this_mirror > num_copies) {
2647-
btrfs_debug(fs_info,
2648-
"Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
2649-
num_copies, failrec->this_mirror, failed_mirror);
2650-
return false;
2651-
}
2652-
2653-
return true;
2654-
}
2655-
26562638
int btrfs_repair_one_sector(struct inode *inode,
26572639
struct bio *failed_bio, u32 bio_offset,
26582640
struct page *page, unsigned int pgoff,
@@ -2673,12 +2655,24 @@ int btrfs_repair_one_sector(struct inode *inode,
26732655

26742656
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
26752657

2676-
failrec = btrfs_get_io_failure_record(inode, start);
2658+
failrec = btrfs_get_io_failure_record(inode, start, failed_mirror);
26772659
if (IS_ERR(failrec))
26782660
return PTR_ERR(failrec);
26792661

2680-
2681-
if (!btrfs_check_repairable(inode, failrec, failed_mirror)) {
2662+
/*
2663+
* There are two premises:
2664+
* a) deliver good data to the caller
2665+
* b) correct the bad sectors on disk
2666+
*
2667+
* Since we're only doing repair for one sector, we only need to get
2668+
* a good copy of the failed sector and if we succeed, we have setup
2669+
* everything for repair_io_failure to do the rest for us.
2670+
*/
2671+
failrec->this_mirror = next_mirror(failrec, failrec->this_mirror);
2672+
if (failrec->this_mirror == failrec->failed_mirror) {
2673+
btrfs_debug(fs_info,
2674+
"failed to repair num_copies %d this_mirror %d failed_mirror %d",
2675+
failrec->num_copies, failrec->this_mirror, failrec->failed_mirror);
26822676
free_io_failure(failure_tree, tree, failrec);
26832677
return -EIO;
26842678
}

fs/btrfs/extent_io.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ struct io_failure_record {
263263
enum btrfs_compression_type compress_type;
264264
int this_mirror;
265265
int failed_mirror;
266+
int num_copies;
266267
};
267268

268269
int btrfs_repair_one_sector(struct inode *inode,

0 commit comments

Comments
 (0)