Skip to content

Commit 8912c2f

Browse files
committed
Merge tag 'for-6.20-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "User visible changes, feature updates: - when using block size > page size, enable direct IO - fallback to buffered IO if the data profile has duplication, workaround to avoid checksum mismatches on block group profiles with redundancy, real direct IO is possible on single or RAID0 - redo export of zoned statistics, moved from sysfs to /proc/pid/mountstats due to size limitations of the former Experimental features: - remove offload checksum tunable, intended to find best way to do it but since we've switched to offload to thread for everything we don't need it anymore - initial support for remap-tree feature, a translation layer of logical block addresses that allow changes without moving/rewriting blocks to do eg. relocation, or other changes that require COW Notable fixes: - automatic removal of accidentally leftover chunks when free-space-tree is enabled since mkfs.btrfs v6.16.1 - zoned mode: - do not try to append to conventional zones when RAID is mixing zoned and conventional drives - fixup write pointers when mixing zoned and conventional on DUP/RAID* profiles - when using squota, relax deletion rules for qgroups with 0 members to allow easier recovery from accounting bugs, also add more checks to detect bad accounting - fix periodic reclaim scanning, properly check boundary conditions not to trigger it unexpectedly or miss the time to run it - trim: - continue after first error - change reporting to the first detected error - add more cancellation points - reduce contention of big device lock that can block other operations when there's lots of trimmed space - when chunk allocation is forced (needs experimental build) fix transaction abort when unexpected space layout is detected Core: - switch to crypto library API for checksumming, removed module dependencies, pointer indirections, etc. - error handling improvements - adjust how and where transaction commit or abort are done and are maybe not necessary - minor compression optimization to skip single block ranges - improve how compression folios are handled - new and updated selftests - cleanups, refactoring: - auto-freeing and other automatic variable cleanup conversion - structure size optimizations - condition annotations" * tag 'for-6.20-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (137 commits) btrfs: get rid of compressed_bio::compressed_folios[] btrfs: get rid of compressed_folios[] usage for encoded writes btrfs: get rid of compressed_folios[] usage for compressed read btrfs: remove the old btrfs_compress_folios() infrastructure btrfs: switch to btrfs_compress_bio() interface for compressed writes btrfs: introduce btrfs_compress_bio() helper btrfs: zlib: introduce zlib_compress_bio() helper btrfs: zstd: introduce zstd_compress_bio() helper btrfs: lzo: introduce lzo_compress_bio() helper btrfs: zoned: factor out the zone loading part into a testable function btrfs: add cleanup function for btrfs_free_chunk_map btrfs: tests: add cleanup functions for test specific functions btrfs: raid56: fix memory leak of btrfs_raid_bio::stripe_uptodate_bitmap btrfs: tests: add unit tests for pending extent walking functions btrfs: fix EEXIST abort due to non-consecutive gaps in chunk allocation btrfs: fix transaction commit blocking during trim of unallocated space btrfs: handle user interrupt properly in btrfs_trim_fs() btrfs: preserve first error in btrfs_trim_fs() btrfs: continue trimming remaining devices on failure btrfs: do not BUG_ON() in btrfs_remove_block_group() ...
2 parents b29a7a8 + 161ab30 commit 8912c2f

71 files changed

Lines changed: 6052 additions & 2264 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

fs/btrfs/Kconfig

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,8 @@ config BTRFS_FS
44
tristate "Btrfs filesystem support"
55
select BLK_CGROUP_PUNT_BIO
66
select CRC32
7-
select CRYPTO
8-
select CRYPTO_CRC32C
9-
select CRYPTO_XXHASH
10-
select CRYPTO_SHA256
11-
select CRYPTO_BLAKE2B
7+
select CRYPTO_LIB_BLAKE2B
8+
select CRYPTO_LIB_SHA256
129
select ZLIB_INFLATE
1310
select ZLIB_DEFLATE
1411
select LZO_COMPRESS
@@ -18,6 +15,7 @@ config BTRFS_FS
1815
select FS_IOMAP
1916
select RAID6_PQ
2017
select XOR_BLOCKS
18+
select XXHASH
2119
depends on PAGE_SIZE_LESS_THAN_256KB
2220

2321
help
@@ -106,9 +104,6 @@ config BTRFS_EXPERIMENTAL
106104

107105
- send stream protocol v3 - fs-verity support
108106

109-
- checksum offload mode - sysfs knob to affect when checksums are
110-
calculated (at IO time, or in a thread)
111-
112107
- raid-stripe-tree - additional mapping of extents to devices to
113108
support RAID1* profiles on zoned devices,
114109
RAID56 not yet supported
@@ -121,4 +116,6 @@ config BTRFS_EXPERIMENTAL
121116

122117
- asynchronous checksum generation for data writes
123118

119+
- remap-tree - logical address remapping tree
120+
124121
If unsure, say N.

fs/btrfs/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,5 @@ btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
4444
tests/extent-buffer-tests.o tests/btrfs-tests.o \
4545
tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
4646
tests/free-space-tree-tests.o tests/extent-map-tests.o \
47-
tests/raid-stripe-tree-tests.o tests/delayed-refs-tests.o
47+
tests/raid-stripe-tree-tests.o tests/delayed-refs-tests.o \
48+
tests/chunk-allocation-tests.o

fs/btrfs/accessors.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,26 @@ BTRFS_SETGET_FUNCS(block_group_flags, struct btrfs_block_group_item, flags, 64);
240240
BTRFS_SETGET_STACK_FUNCS(stack_block_group_flags,
241241
struct btrfs_block_group_item, flags, 64);
242242

243+
/* struct btrfs_block_group_item_v2 */
244+
BTRFS_SETGET_STACK_FUNCS(stack_block_group_v2_used, struct btrfs_block_group_item_v2,
245+
used, 64);
246+
BTRFS_SETGET_FUNCS(block_group_v2_used, struct btrfs_block_group_item_v2, used, 64);
247+
BTRFS_SETGET_STACK_FUNCS(stack_block_group_v2_chunk_objectid,
248+
struct btrfs_block_group_item_v2, chunk_objectid, 64);
249+
BTRFS_SETGET_FUNCS(block_group_v2_chunk_objectid,
250+
struct btrfs_block_group_item_v2, chunk_objectid, 64);
251+
BTRFS_SETGET_STACK_FUNCS(stack_block_group_v2_flags,
252+
struct btrfs_block_group_item_v2, flags, 64);
253+
BTRFS_SETGET_FUNCS(block_group_v2_flags, struct btrfs_block_group_item_v2, flags, 64);
254+
BTRFS_SETGET_STACK_FUNCS(stack_block_group_v2_remap_bytes,
255+
struct btrfs_block_group_item_v2, remap_bytes, 64);
256+
BTRFS_SETGET_FUNCS(block_group_v2_remap_bytes, struct btrfs_block_group_item_v2,
257+
remap_bytes, 64);
258+
BTRFS_SETGET_STACK_FUNCS(stack_block_group_v2_identity_remap_count,
259+
struct btrfs_block_group_item_v2, identity_remap_count, 32);
260+
BTRFS_SETGET_FUNCS(block_group_v2_identity_remap_count, struct btrfs_block_group_item_v2,
261+
identity_remap_count, 32);
262+
243263
/* struct btrfs_free_space_info */
244264
BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info,
245265
extent_count, 32);
@@ -863,6 +883,12 @@ BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
863883
uuid_tree_generation, 64);
864884
BTRFS_SETGET_STACK_FUNCS(super_nr_global_roots, struct btrfs_super_block,
865885
nr_global_roots, 64);
886+
BTRFS_SETGET_STACK_FUNCS(super_remap_root, struct btrfs_super_block,
887+
remap_root, 64);
888+
BTRFS_SETGET_STACK_FUNCS(super_remap_root_generation, struct btrfs_super_block,
889+
remap_root_generation, 64);
890+
BTRFS_SETGET_STACK_FUNCS(super_remap_root_level, struct btrfs_super_block,
891+
remap_root_level, 8);
866892

867893
/* struct btrfs_file_extent_item */
868894
BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item,
@@ -1010,6 +1036,10 @@ BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_encryption,
10101036
BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_size,
10111037
struct btrfs_verity_descriptor_item, size, 64);
10121038

1039+
BTRFS_SETGET_FUNCS(remap_address, struct btrfs_remap_item, address, 64);
1040+
BTRFS_SETGET_STACK_FUNCS(stack_remap_address, struct btrfs_remap_item,
1041+
address, 64);
1042+
10131043
/* Cast into the data area of the leaf. */
10141044
#define btrfs_item_ptr(leaf, slot, type) \
10151045
((type *)(btrfs_item_nr_offset(leaf, 0) + btrfs_item_offset(leaf, slot)))

fs/btrfs/backref.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3609,10 +3609,8 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
36093609
}
36103610

36113611
rb_node = rb_simple_insert(&cache->rb_root, &upper->simple_node);
3612-
if (unlikely(rb_node)) {
3612+
if (unlikely(rb_node))
36133613
btrfs_backref_panic(cache->fs_info, upper->bytenr, -EEXIST);
3614-
return -EUCLEAN;
3615-
}
36163614

36173615
list_add_tail(&edge->list[UPPER], &upper->lower);
36183616

fs/btrfs/bio.c

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,13 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
9797
bbio->orig_logical = orig_bbio->orig_logical;
9898
orig_bbio->orig_logical += map_length;
9999
}
100+
100101
bbio->csum_search_commit_root = orig_bbio->csum_search_commit_root;
102+
bbio->can_use_append = orig_bbio->can_use_append;
103+
bbio->is_scrub = orig_bbio->is_scrub;
104+
bbio->is_remap = orig_bbio->is_remap;
105+
bbio->async_csum = orig_bbio->async_csum;
106+
101107
atomic_inc(&orig_bbio->pending_ios);
102108
return bbio;
103109
}
@@ -480,6 +486,8 @@ static void btrfs_clone_write_end_io(struct bio *bio)
480486

481487
static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
482488
{
489+
u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
490+
483491
if (!dev || !dev->bdev ||
484492
test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
485493
(btrfs_op(bio) == BTRFS_MAP_WRITE &&
@@ -494,12 +502,13 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
494502
* For zone append writing, bi_sector must point the beginning of the
495503
* zone
496504
*/
497-
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
498-
u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
505+
if (btrfs_bio(bio)->can_use_append && btrfs_dev_is_sequential(dev, physical)) {
499506
u64 zone_start = round_down(physical, dev->fs_info->zone_size);
500507

501508
ASSERT(btrfs_dev_is_sequential(dev, physical));
502509
bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
510+
bio->bi_opf &= ~REQ_OP_WRITE;
511+
bio->bi_opf |= REQ_OP_ZONE_APPEND;
503512
}
504513
btrfs_debug(dev->fs_info,
505514
"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
@@ -662,11 +671,6 @@ static bool should_async_write(struct btrfs_bio *bbio)
662671
bool auto_csum_mode = true;
663672

664673
#ifdef CONFIG_BTRFS_EXPERIMENTAL
665-
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
666-
enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);
667-
668-
if (csum_mode == BTRFS_OFFLOAD_CSUM_FORCE_ON)
669-
return true;
670674
/*
671675
* Write bios will calculate checksum and submit bio at the same time.
672676
* Unless explicitly required don't offload serial csum calculate and bio
@@ -747,7 +751,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
747751
u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
748752
u64 length = bio->bi_iter.bi_size;
749753
u64 map_length = length;
750-
bool use_append = btrfs_use_zone_append(bbio);
751754
struct btrfs_io_context *bioc = NULL;
752755
struct btrfs_io_stripe smap;
753756
blk_status_t status;
@@ -775,8 +778,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
775778
if (bio_op(bio) == REQ_OP_WRITE && is_data_bbio(bbio))
776779
bbio->orig_logical = logical;
777780

781+
bbio->can_use_append = btrfs_use_zone_append(bbio);
782+
778783
map_length = min(map_length, length);
779-
if (use_append)
784+
if (bbio->can_use_append)
780785
map_length = btrfs_append_map_length(bbio, map_length);
781786

782787
if (map_length < length) {
@@ -805,11 +810,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
805810
}
806811

807812
if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
808-
if (use_append) {
809-
bio->bi_opf &= ~REQ_OP_WRITE;
810-
bio->bi_opf |= REQ_OP_ZONE_APPEND;
811-
}
812-
813813
if (is_data_bbio(bbio) && bioc && bioc->use_rst) {
814814
/*
815815
* No locking for the list update, as we only add to
@@ -827,7 +827,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
827827
*/
828828
if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
829829
!test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
830-
!btrfs_is_data_reloc_root(inode->root)) {
830+
!btrfs_is_data_reloc_root(inode->root) && !bbio->is_remap) {
831831
if (should_async_write(bbio) &&
832832
btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
833833
goto done;
@@ -836,9 +836,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
836836
status = errno_to_blk_status(ret);
837837
if (status)
838838
goto fail;
839-
} else if (use_append ||
840-
(btrfs_is_zoned(fs_info) && inode &&
841-
inode->flags & BTRFS_INODE_NODATASUM)) {
839+
} else if (bbio->can_use_append ||
840+
(btrfs_is_zoned(fs_info) && inode->flags & BTRFS_INODE_NODATASUM)) {
842841
ret = btrfs_alloc_dummy_sum(bbio);
843842
status = errno_to_blk_status(ret);
844843
if (status)

fs/btrfs/bio.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,29 +68,36 @@ struct btrfs_bio {
6868
struct btrfs_tree_parent_check parent_check;
6969
};
7070

71+
/* For internal use in read end I/O handling */
72+
struct work_struct end_io_work;
73+
7174
/* End I/O information supplied to btrfs_bio_alloc */
7275
btrfs_bio_end_io_t end_io;
7376
void *private;
7477

75-
/* For internal use in read end I/O handling */
76-
unsigned int mirror_num;
7778
atomic_t pending_ios;
78-
struct work_struct end_io_work;
79+
u16 mirror_num;
7980

8081
/* Save the first error status of split bio. */
8182
blk_status_t status;
8283

8384
/* Use the commit root to look up csums (data read bio only). */
84-
bool csum_search_commit_root;
85+
bool csum_search_commit_root:1;
8586

8687
/*
8788
* Since scrub will reuse btree inode, we need this flag to distinguish
8889
* scrub bios.
8990
*/
90-
bool is_scrub;
91+
bool is_scrub:1;
92+
93+
/* Whether the bio is coming from copy_remapped_data_io(). */
94+
bool is_remap:1;
9195

9296
/* Whether the csum generation for data write is async. */
93-
bool async_csum;
97+
bool async_csum:1;
98+
99+
/* Whether the bio is written using zone append. */
100+
bool can_use_append:1;
94101

95102
/*
96103
* This member must come last, bio_alloc_bioset will allocate enough

0 commit comments

Comments
 (0)