@@ -421,10 +421,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
421421 * since btrfs adds the pages one by one to a bio, and btrfs cannot
422422 * increase the metadata reservation even if it increases the number of
423423 * extents, it is safe to stick with the limit.
424+ *
425+ * With the zoned emulation, we can have non-zoned device on the zoned
426+ * mode. In this case, we don't have a valid max zone append size. So,
427+ * use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
424428 */
425- zone_info -> max_zone_append_size =
426- min_t (u64 , (u64 )bdev_max_zone_append_sectors (bdev ) << SECTOR_SHIFT ,
427- (u64 )bdev_max_segments (bdev ) << PAGE_SHIFT );
429+ if (bdev_is_zoned (bdev )) {
430+ zone_info -> max_zone_append_size = min_t (u64 ,
431+ (u64 )bdev_max_zone_append_sectors (bdev ) << SECTOR_SHIFT ,
432+ (u64 )bdev_max_segments (bdev ) << PAGE_SHIFT );
433+ } else {
434+ zone_info -> max_zone_append_size =
435+ (u64 )bdev_max_segments (bdev ) << PAGE_SHIFT ;
436+ }
428437 if (!IS_ALIGNED (nr_sectors , zone_sectors ))
429438 zone_info -> nr_zones ++ ;
430439
@@ -1178,7 +1187,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
11781187 * offset.
11791188 */
11801189static int calculate_alloc_pointer (struct btrfs_block_group * cache ,
1181- u64 * offset_ret )
1190+ u64 * offset_ret , bool new )
11821191{
11831192 struct btrfs_fs_info * fs_info = cache -> fs_info ;
11841193 struct btrfs_root * root ;
@@ -1188,6 +1197,21 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
11881197 int ret ;
11891198 u64 length ;
11901199
1200+ /*
1201+ * Avoid tree lookups for a new block group, there's no use for it.
1202+ * It must always be 0.
1203+ *
1204+ * Also, we have a lock chain of extent buffer lock -> chunk mutex.
1205+ * For new a block group, this function is called from
1206+ * btrfs_make_block_group() which is already taking the chunk mutex.
1207+ * Thus, we cannot call calculate_alloc_pointer() which takes extent
1208+ * buffer locks to avoid deadlock.
1209+ */
1210+ if (new ) {
1211+ * offset_ret = 0 ;
1212+ return 0 ;
1213+ }
1214+
11911215 path = btrfs_alloc_path ();
11921216 if (!path )
11931217 return - ENOMEM ;
@@ -1323,6 +1347,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
13231347 else
13241348 num_conventional ++ ;
13251349
1350+ /*
1351+ * Consider a zone as active if we can allow any number of
1352+ * active zones.
1353+ */
1354+ if (!device -> zone_info -> max_active_zones )
1355+ __set_bit (i , active );
1356+
13261357 if (!is_sequential ) {
13271358 alloc_offsets [i ] = WP_CONVENTIONAL ;
13281359 continue ;
@@ -1389,45 +1420,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
13891420 __set_bit (i , active );
13901421 break ;
13911422 }
1392-
1393- /*
1394- * Consider a zone as active if we can allow any number of
1395- * active zones.
1396- */
1397- if (!device -> zone_info -> max_active_zones )
1398- __set_bit (i , active );
13991423 }
14001424
14011425 if (num_sequential > 0 )
14021426 cache -> seq_zone = true;
14031427
14041428 if (num_conventional > 0 ) {
1405- /*
1406- * Avoid calling calculate_alloc_pointer() for new BG. It
1407- * is no use for new BG. It must be always 0.
1408- *
1409- * Also, we have a lock chain of extent buffer lock ->
1410- * chunk mutex. For new BG, this function is called from
1411- * btrfs_make_block_group() which is already taking the
1412- * chunk mutex. Thus, we cannot call
1413- * calculate_alloc_pointer() which takes extent buffer
1414- * locks to avoid deadlock.
1415- */
1416-
14171429 /* Zone capacity is always zone size in emulation */
14181430 cache -> zone_capacity = cache -> length ;
1419- if (new ) {
1420- cache -> alloc_offset = 0 ;
1421- goto out ;
1422- }
1423- ret = calculate_alloc_pointer (cache , & last_alloc );
1424- if (ret || map -> num_stripes == num_conventional ) {
1425- if (!ret )
1426- cache -> alloc_offset = last_alloc ;
1427- else
1428- btrfs_err (fs_info ,
1431+ ret = calculate_alloc_pointer (cache , & last_alloc , new );
1432+ if (ret ) {
1433+ btrfs_err (fs_info ,
14291434 "zoned: failed to determine allocation offset of bg %llu" ,
1430- cache -> start );
1435+ cache -> start );
1436+ goto out ;
1437+ } else if (map -> num_stripes == num_conventional ) {
1438+ cache -> alloc_offset = last_alloc ;
1439+ cache -> zone_is_active = 1 ;
14311440 goto out ;
14321441 }
14331442 }
@@ -1495,13 +1504,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
14951504 goto out ;
14961505 }
14971506
1498- if (cache -> zone_is_active ) {
1499- btrfs_get_block_group (cache );
1500- spin_lock (& fs_info -> zone_active_bgs_lock );
1501- list_add_tail (& cache -> active_bg_list , & fs_info -> zone_active_bgs );
1502- spin_unlock (& fs_info -> zone_active_bgs_lock );
1503- }
1504-
15051507out :
15061508 if (cache -> alloc_offset > fs_info -> zone_size ) {
15071509 btrfs_err (fs_info ,
@@ -1526,10 +1528,16 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
15261528 ret = - EIO ;
15271529 }
15281530
1529- if (!ret )
1531+ if (!ret ) {
15301532 cache -> meta_write_pointer = cache -> alloc_offset + cache -> start ;
1531-
1532- if (ret ) {
1533+ if (cache -> zone_is_active ) {
1534+ btrfs_get_block_group (cache );
1535+ spin_lock (& fs_info -> zone_active_bgs_lock );
1536+ list_add_tail (& cache -> active_bg_list ,
1537+ & fs_info -> zone_active_bgs );
1538+ spin_unlock (& fs_info -> zone_active_bgs_lock );
1539+ }
1540+ } else {
15331541 kfree (cache -> physical_map );
15341542 cache -> physical_map = NULL ;
15351543 }
@@ -2007,8 +2015,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
20072015 /* For active_bg_list */
20082016 btrfs_put_block_group (block_group );
20092017
2010- clear_bit (BTRFS_FS_NEED_ZONE_FINISH , & fs_info -> flags );
2011- wake_up_all (& fs_info -> zone_finish_wait );
2018+ clear_and_wake_up_bit (BTRFS_FS_NEED_ZONE_FINISH , & fs_info -> flags );
20122019
20132020 return 0 ;
20142021}
0 commit comments