Skip to content

Commit 1263a7b

Browse files
committed
Merge tag 'for-6.11-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - followup fix for direct io and fsync under some conditions, reported by QEMU users - fix a potential leak when disabling quotas while some extent tracking work can still happen - in zoned mode handle unexpected change of zone write pointer in RAID1-like block groups, turn the zones to read-only * tag 'for-6.11-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: fix race between direct IO write and fsync when using same fd btrfs: zoned: handle broken write pointer on zones btrfs: qgroup: don't use extent changeset when not needed
2 parents d8abb73 + cd9253c commit 1263a7b

6 files changed

Lines changed: 42 additions & 23 deletions

File tree

fs/btrfs/ctree.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,6 @@ struct btrfs_file_private {
459459
void *filldir_buf;
460460
u64 last_index;
461461
struct extent_state *llseek_cached_state;
462-
bool fsync_skip_inode_lock;
463462
};
464463

465464
static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)

fs/btrfs/direct-io.c

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -864,13 +864,6 @@ ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
864864
if (IS_ERR_OR_NULL(dio)) {
865865
ret = PTR_ERR_OR_ZERO(dio);
866866
} else {
867-
struct btrfs_file_private stack_private = { 0 };
868-
struct btrfs_file_private *private;
869-
const bool have_private = (file->private_data != NULL);
870-
871-
if (!have_private)
872-
file->private_data = &stack_private;
873-
874867
/*
875868
* If we have a synchronous write, we must make sure the fsync
876869
* triggered by the iomap_dio_complete() call below doesn't
@@ -879,13 +872,10 @@ ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
879872
* partial writes due to the input buffer (or parts of it) not
880873
* being already faulted in.
881874
*/
882-
private = file->private_data;
883-
private->fsync_skip_inode_lock = true;
875+
ASSERT(current->journal_info == NULL);
876+
current->journal_info = BTRFS_TRANS_DIO_WRITE_STUB;
884877
ret = iomap_dio_complete(dio);
885-
private->fsync_skip_inode_lock = false;
886-
887-
if (!have_private)
888-
file->private_data = NULL;
878+
current->journal_info = NULL;
889879
}
890880

891881
/* No increment (+=) because iomap returns a cumulative value. */

fs/btrfs/file.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,7 +1603,6 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
16031603
*/
16041604
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
16051605
{
1606-
struct btrfs_file_private *private = file->private_data;
16071606
struct dentry *dentry = file_dentry(file);
16081607
struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
16091608
struct btrfs_root *root = inode->root;
@@ -1613,7 +1612,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
16131612
int ret = 0, err;
16141613
u64 len;
16151614
bool full_sync;
1616-
const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false);
1615+
bool skip_ilock = false;
1616+
1617+
if (current->journal_info == BTRFS_TRANS_DIO_WRITE_STUB) {
1618+
skip_ilock = true;
1619+
current->journal_info = NULL;
1620+
lockdep_assert_held(&inode->vfs_inode.i_rwsem);
1621+
}
16171622

16181623
trace_btrfs_sync_file(file, datasync);
16191624

fs/btrfs/qgroup.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4346,10 +4346,9 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
43464346
int ret;
43474347

43484348
if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) {
4349-
extent_changeset_init(&changeset);
43504349
return clear_record_extent_bits(&inode->io_tree, start,
43514350
start + len - 1,
4352-
EXTENT_QGROUP_RESERVED, &changeset);
4351+
EXTENT_QGROUP_RESERVED, NULL);
43534352
}
43544353

43554354
/* In release case, we shouldn't have @reserved */

fs/btrfs/transaction.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ struct btrfs_root_item;
2727
struct btrfs_root;
2828
struct btrfs_path;
2929

30+
/*
31+
* Signal that a direct IO write is in progress, to avoid deadlock for sync
32+
* direct IO writes when fsync is called during the direct IO write path.
33+
*/
34+
#define BTRFS_TRANS_DIO_WRITE_STUB ((void *) 1)
35+
3036
/* Radix-tree tag for roots that are part of the trasaction. */
3137
#define BTRFS_ROOT_TRANS_TAG 0
3238

fs/btrfs/zoned.c

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,6 +1406,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
14061406
return -EINVAL;
14071407
}
14081408

1409+
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
1410+
14091411
if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
14101412
btrfs_err(bg->fs_info,
14111413
"zoned: cannot recover write pointer for zone %llu",
@@ -1432,7 +1434,6 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
14321434
}
14331435

14341436
bg->alloc_offset = zone_info[0].alloc_offset;
1435-
bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity);
14361437
return 0;
14371438
}
14381439

@@ -1450,6 +1451,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
14501451
return -EINVAL;
14511452
}
14521453

1454+
/* In case a device is missing we have a cap of 0, so don't use it. */
1455+
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
1456+
14531457
for (i = 0; i < map->num_stripes; i++) {
14541458
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
14551459
zone_info[i].alloc_offset == WP_CONVENTIONAL)
@@ -1471,9 +1475,6 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
14711475
if (test_bit(0, active))
14721476
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
14731477
}
1474-
/* In case a device is missing we have a cap of 0, so don't use it. */
1475-
bg->zone_capacity = min_not_zero(zone_info[0].capacity,
1476-
zone_info[1].capacity);
14771478
}
14781479

14791480
if (zone_info[0].alloc_offset != WP_MISSING_DEV)
@@ -1563,6 +1564,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
15631564
unsigned long *active = NULL;
15641565
u64 last_alloc = 0;
15651566
u32 num_sequential = 0, num_conventional = 0;
1567+
u64 profile;
15661568

15671569
if (!btrfs_is_zoned(fs_info))
15681570
return 0;
@@ -1623,7 +1625,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
16231625
}
16241626
}
16251627

1626-
switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
1628+
profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
1629+
switch (profile) {
16271630
case 0: /* single */
16281631
ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
16291632
break;
@@ -1650,6 +1653,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
16501653
goto out;
16511654
}
16521655

1656+
if (ret == -EIO && profile != 0 && profile != BTRFS_BLOCK_GROUP_RAID0 &&
1657+
profile != BTRFS_BLOCK_GROUP_RAID10) {
1658+
/*
1659+
* Detected broken write pointer. Make this block group
1660+
* unallocatable by setting the allocation pointer at the end of
1661+
* allocatable region. Relocating this block group will fix the
1662+
* mismatch.
1663+
*
1664+
* Currently, we cannot handle RAID0 or RAID10 case like this
1665+
* because we don't have a proper zone_capacity value. But,
1666+
* reading from this block group won't work anyway by a missing
1667+
* stripe.
1668+
*/
1669+
cache->alloc_offset = cache->zone_capacity;
1670+
ret = 0;
1671+
}
1672+
16531673
out:
16541674
/* Reject non SINGLE data profiles without RST */
16551675
if ((map->type & BTRFS_BLOCK_GROUP_DATA) &&

0 commit comments

Comments
 (0)