@@ -1873,20 +1873,14 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
18731873 return ret ;
18741874}
18751875
1876- int btrfs_zone_finish (struct btrfs_block_group * block_group )
1876+ static int do_zone_finish (struct btrfs_block_group * block_group , bool fully_written )
18771877{
18781878 struct btrfs_fs_info * fs_info = block_group -> fs_info ;
18791879 struct map_lookup * map ;
1880- struct btrfs_device * device ;
1881- u64 physical ;
1880+ bool need_zone_finish ;
18821881 int ret = 0 ;
18831882 int i ;
18841883
1885- if (!btrfs_is_zoned (fs_info ))
1886- return 0 ;
1887-
1888- map = block_group -> physical_map ;
1889-
18901884 spin_lock (& block_group -> lock );
18911885 if (!block_group -> zone_is_active ) {
18921886 spin_unlock (& block_group -> lock );
@@ -1900,61 +1894,82 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
19001894 spin_unlock (& block_group -> lock );
19011895 return - EAGAIN ;
19021896 }
1903- spin_unlock (& block_group -> lock );
1904-
1905- ret = btrfs_inc_block_group_ro (block_group , false);
1906- if (ret )
1907- return ret ;
1908-
1909- /* Ensure all writes in this block group finish */
1910- btrfs_wait_block_group_reservations (block_group );
1911- /* No need to wait for NOCOW writers. Zoned mode does not allow that. */
1912- btrfs_wait_ordered_roots (fs_info , U64_MAX , block_group -> start ,
1913- block_group -> length );
1914-
1915- spin_lock (& block_group -> lock );
19161897
19171898 /*
1918- * Bail out if someone already deactivated the block group, or
1919- * allocated space is left in the block group.
1899+ * If we are sure that the block group is full (= no more room left for
1900+ * new allocation) and the IO for the last usable block is completed, we
1901+ * don't need to wait for the other IOs. This holds because we ensure
1902+ * the sequential IO submissions using the ZONE_APPEND command for data
1903+ * and block_group->meta_write_pointer for metadata.
19201904 */
1921- if (!block_group -> zone_is_active ) {
1905+ if (!fully_written ) {
19221906 spin_unlock (& block_group -> lock );
1923- btrfs_dec_block_group_ro (block_group );
1924- return 0 ;
1925- }
19261907
1927- if (block_group -> reserved ) {
1928- spin_unlock (& block_group -> lock );
1929- btrfs_dec_block_group_ro (block_group );
1930- return - EAGAIN ;
1908+ ret = btrfs_inc_block_group_ro (block_group , false);
1909+ if (ret )
1910+ return ret ;
1911+
1912+ /* Ensure all writes in this block group finish */
1913+ btrfs_wait_block_group_reservations (block_group );
1914+ /* No need to wait for NOCOW writers. Zoned mode does not allow that */
1915+ btrfs_wait_ordered_roots (fs_info , U64_MAX , block_group -> start ,
1916+ block_group -> length );
1917+
1918+ spin_lock (& block_group -> lock );
1919+
1920+ /*
1921+ * Bail out if someone already deactivated the block group, or
1922+ * allocated space is left in the block group.
1923+ */
1924+ if (!block_group -> zone_is_active ) {
1925+ spin_unlock (& block_group -> lock );
1926+ btrfs_dec_block_group_ro (block_group );
1927+ return 0 ;
1928+ }
1929+
1930+ if (block_group -> reserved ) {
1931+ spin_unlock (& block_group -> lock );
1932+ btrfs_dec_block_group_ro (block_group );
1933+ return - EAGAIN ;
1934+ }
19311935 }
19321936
1937+ /*
1938+ * The block group is not fully allocated, so not fully written yet. We
1939+ * need to send ZONE_FINISH command to free up an active zone.
1940+ */
1941+ need_zone_finish = !btrfs_zoned_bg_is_full (block_group );
1942+
19331943 block_group -> zone_is_active = 0 ;
19341944 block_group -> alloc_offset = block_group -> zone_capacity ;
19351945 block_group -> free_space_ctl -> free_space = 0 ;
19361946 btrfs_clear_treelog_bg (block_group );
19371947 btrfs_clear_data_reloc_bg (block_group );
19381948 spin_unlock (& block_group -> lock );
19391949
1950+ map = block_group -> physical_map ;
19401951 for (i = 0 ; i < map -> num_stripes ; i ++ ) {
1941- device = map -> stripes [i ].dev ;
1942- physical = map -> stripes [i ].physical ;
1952+ struct btrfs_device * device = map -> stripes [i ].dev ;
1953+ const u64 physical = map -> stripes [i ].physical ;
19431954
19441955 if (device -> zone_info -> max_active_zones == 0 )
19451956 continue ;
19461957
1947- ret = blkdev_zone_mgmt (device -> bdev , REQ_OP_ZONE_FINISH ,
1948- physical >> SECTOR_SHIFT ,
1949- device -> zone_info -> zone_size >> SECTOR_SHIFT ,
1950- GFP_NOFS );
1958+ if (need_zone_finish ) {
1959+ ret = blkdev_zone_mgmt (device -> bdev , REQ_OP_ZONE_FINISH ,
1960+ physical >> SECTOR_SHIFT ,
1961+ device -> zone_info -> zone_size >> SECTOR_SHIFT ,
1962+ GFP_NOFS );
19511963
1952- if (ret )
1953- return ret ;
1964+ if (ret )
1965+ return ret ;
1966+ }
19541967
19551968 btrfs_dev_clear_active_zone (device , physical );
19561969 }
1957- btrfs_dec_block_group_ro (block_group );
1970+
1971+ if (!fully_written )
1972+ btrfs_dec_block_group_ro (block_group );
19581973
19591974 spin_lock (& fs_info -> zone_active_bgs_lock );
19601975 ASSERT (!list_empty (& block_group -> active_bg_list ));
@@ -1967,6 +1982,14 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
19671982 return 0 ;
19681983}
19691984
1985+ int btrfs_zone_finish (struct btrfs_block_group * block_group )
1986+ {
1987+ if (!btrfs_is_zoned (block_group -> fs_info ))
1988+ return 0 ;
1989+
1990+ return do_zone_finish (block_group , false);
1991+ }
1992+
19701993bool btrfs_can_activate_zone (struct btrfs_fs_devices * fs_devices , u64 flags )
19711994{
19721995 struct btrfs_fs_info * fs_info = fs_devices -> fs_info ;
@@ -1998,9 +2021,6 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
19982021void btrfs_zone_finish_endio (struct btrfs_fs_info * fs_info , u64 logical , u64 length )
19992022{
20002023 struct btrfs_block_group * block_group ;
2001- struct map_lookup * map ;
2002- struct btrfs_device * device ;
2003- u64 physical ;
20042024
20052025 if (!btrfs_is_zoned (fs_info ))
20062026 return ;
@@ -2011,36 +2031,7 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
20112031 if (logical + length < block_group -> start + block_group -> zone_capacity )
20122032 goto out ;
20132033
2014- spin_lock (& block_group -> lock );
2015-
2016- if (!block_group -> zone_is_active ) {
2017- spin_unlock (& block_group -> lock );
2018- goto out ;
2019- }
2020-
2021- block_group -> zone_is_active = 0 ;
2022- /* We should have consumed all the free space */
2023- ASSERT (block_group -> alloc_offset == block_group -> zone_capacity );
2024- ASSERT (block_group -> free_space_ctl -> free_space == 0 );
2025- btrfs_clear_treelog_bg (block_group );
2026- btrfs_clear_data_reloc_bg (block_group );
2027- spin_unlock (& block_group -> lock );
2028-
2029- map = block_group -> physical_map ;
2030- device = map -> stripes [0 ].dev ;
2031- physical = map -> stripes [0 ].physical ;
2032-
2033- if (!device -> zone_info -> max_active_zones )
2034- goto out ;
2035-
2036- btrfs_dev_clear_active_zone (device , physical );
2037-
2038- spin_lock (& fs_info -> zone_active_bgs_lock );
2039- ASSERT (!list_empty (& block_group -> active_bg_list ));
2040- list_del_init (& block_group -> active_bg_list );
2041- spin_unlock (& fs_info -> zone_active_bgs_lock );
2042-
2043- btrfs_put_block_group (block_group );
2034+ do_zone_finish (block_group , true);
20442035
20452036out :
20462037 btrfs_put_block_group (block_group );
0 commit comments