Skip to content

Commit b639c20

Browse files
LiaoYuanhong-vivoJaegeuk Kim
authored andcommitted
f2fs: Use allocate_section_policy to control write priority in multi-devices setups
Introduces two new sys nodes: allocate_section_hint and allocate_section_policy. The allocate_section_hint identifies the boundary between devices, measured in sections; it defaults to the end of the device for single storage setups, and the end of the first device for multiple storage setups. The allocate_section_policy determines the write strategy, with a default value of 0 for normal sequential write strategy. A value of 1 prioritizes writes before the allocate_section_hint, while a value of 2 prioritizes writes after it. This strategy addresses the issue where, despite F2FS supporting multiple devices, SOC vendors lack multi-devices support (currently only supporting zoned devices). As a workaround, multiple storage devices are mapped to a single dm device. Both this workaround and the F2FS multi-devices solution may require prioritizing writing to certain devices, such as a device with better performance or when switching is needed due to performance degradation near a device's end. For scenarios with more than two devices, sort them at mount time to utilize this feature. When using this feature with a single storage device, it has almost no impact. However, for configurations where multiple storage devices are mapped to the same dm device using F2FS, utilizing this feature can provide some optimization benefits. Therefore, I believe it should not be limited to just multi-devices usage. Signed-off-by: Liao Yuanhong <liaoyuanhong@vivo.com> Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
1 parent 62242ac commit b639c20

6 files changed

Lines changed: 74 additions & 0 deletions

File tree

Documentation/ABI/testing/sysfs-fs-f2fs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,3 +911,25 @@ Description: Used to adjust the BG_GC priority when pending IO, with a default v
911911
bggc_io_aware = 1 skip background GC if there is pending read IO
912912
bggc_io_aware = 2 don't aware IO for background GC
913913
================== ======================================================
914+
915+
What: /sys/fs/f2fs/<disk>/allocate_section_hint
916+
Date: August 2025
917+
Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
918+
Description: Indicates the hint section between the first device and others in multi-devices
919+
setup. It defaults to the end of the first device in sections. For a single storage
920+
device, it defaults to the total number of sections. It can be manually set to match
921+
scenarios where multi-devices are mapped to the same dm device.
922+
923+
What: /sys/fs/f2fs/<disk>/allocate_section_policy
924+
Date: August 2025
925+
Contact: "Liao Yuanhong" <liaoyuanhong@vivo.com>
926+
Description: Controls write priority in multi-devices setups. A value of 0 means normal writing.
927+
A value of 1 prioritizes writing to devices before the allocate_section_hint. A value of 2
928+
prioritizes writing to devices after the allocate_section_hint. The default is 0.
929+
930+
=========================== ==========================================================
931+
value description
932+
allocate_section_policy = 0 Normal writing
933+
allocate_section_policy = 1 Prioritize writing to section before allocate_section_hint
934+
allocate_section_policy = 2 Prioritize writing to section after allocate_section_hint
935+
=========================== ==========================================================

fs/f2fs/f2fs.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,12 @@ enum bggc_io_aware_policy {
162162
AWARE_NONE, /* don't aware IO for background GC */
163163
};
164164

165+
enum device_allocation_policy {
166+
ALLOCATE_FORWARD_NOHINT,
167+
ALLOCATE_FORWARD_WITHIN_HINT,
168+
ALLOCATE_FORWARD_FROM_HINT,
169+
};
170+
165171
/*
166172
* An implementation of an rwsem that is explicitly unfair to readers. This
167173
* prevents priority inversion when a low-priority reader acquires the read lock
@@ -1850,6 +1856,8 @@ struct f2fs_sb_info {
18501856
bool aligned_blksize; /* all devices has the same logical blksize */
18511857
unsigned int first_seq_zone_segno; /* first segno in sequential zone */
18521858
unsigned int bggc_io_aware; /* For adjust the BG_GC priority when pending IO */
1859+
unsigned int allocate_section_hint; /* the boundary position between devices */
1860+
unsigned int allocate_section_policy; /* determine the section writing priority */
18531861

18541862
/* For write statistics */
18551863
u64 sectors_written_start;

fs/f2fs/gc.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2182,13 +2182,18 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
21822182
SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
21832183
MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
21842184
MAIN_SECS(sbi) += secs;
2185+
if (sbi->allocate_section_hint > MAIN_SECS(sbi))
2186+
sbi->allocate_section_hint = MAIN_SECS(sbi);
21852187
FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
21862188
FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
21872189
F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks);
21882190

21892191
if (f2fs_is_multi_device(sbi)) {
21902192
int last_dev = sbi->s_ndevs - 1;
21912193

2194+
sbi->allocate_section_hint = FDEV(0).total_segments /
2195+
SEGS_PER_SEC(sbi);
2196+
21922197
FDEV(last_dev).total_segments =
21932198
(int)FDEV(last_dev).total_segments + segs;
21942199
FDEV(last_dev).end_blk =

fs/f2fs/segment.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2774,6 +2774,8 @@ static int get_new_segment(struct f2fs_sb_info *sbi,
27742774
unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
27752775
unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
27762776
unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2777+
unsigned int alloc_policy = sbi->allocate_section_policy;
2778+
unsigned int alloc_hint = sbi->allocate_section_hint;
27772779
bool init = true;
27782780
int i;
27792781
int ret = 0;
@@ -2807,6 +2809,21 @@ static int get_new_segment(struct f2fs_sb_info *sbi,
28072809
}
28082810
#endif
28092811

2812+
/*
2813+
* Prevent allocate_section_hint from exceeding MAIN_SECS()
2814+
* due to desynchronization.
2815+
*/
2816+
if (alloc_policy != ALLOCATE_FORWARD_NOHINT &&
2817+
alloc_hint > MAIN_SECS(sbi))
2818+
alloc_hint = MAIN_SECS(sbi);
2819+
2820+
if (alloc_policy == ALLOCATE_FORWARD_FROM_HINT &&
2821+
hint < alloc_hint)
2822+
hint = alloc_hint;
2823+
else if (alloc_policy == ALLOCATE_FORWARD_WITHIN_HINT &&
2824+
hint >= alloc_hint)
2825+
hint = 0;
2826+
28102827
find_other_zone:
28112828
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
28122829

fs/f2fs/super.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4234,6 +4234,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
42344234
sbi->total_node_count = SEGS_TO_BLKS(sbi,
42354235
((le32_to_cpu(raw_super->segment_count_nat) / 2) *
42364236
NAT_ENTRY_PER_BLOCK));
4237+
sbi->allocate_section_hint = le32_to_cpu(raw_super->section_count);
4238+
sbi->allocate_section_policy = ALLOCATE_FORWARD_NOHINT;
42374239
F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino);
42384240
F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
42394241
F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
@@ -4748,6 +4750,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
47484750
SEGS_TO_BLKS(sbi,
47494751
FDEV(i).total_segments) - 1 +
47504752
le32_to_cpu(raw_super->segment0_blkaddr);
4753+
sbi->allocate_section_hint = FDEV(i).total_segments /
4754+
SEGS_PER_SEC(sbi);
47514755
} else {
47524756
FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
47534757
FDEV(i).end_blk = FDEV(i).start_blk +

fs/f2fs/sysfs.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,20 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
889889
return count;
890890
}
891891

892+
if (!strcmp(a->attr.name, "allocate_section_hint")) {
893+
if (t < 0 || t > MAIN_SECS(sbi))
894+
return -EINVAL;
895+
sbi->allocate_section_hint = t;
896+
return count;
897+
}
898+
899+
if (!strcmp(a->attr.name, "allocate_section_policy")) {
900+
if (t < ALLOCATE_FORWARD_NOHINT || t > ALLOCATE_FORWARD_FROM_HINT)
901+
return -EINVAL;
902+
sbi->allocate_section_policy = t;
903+
return count;
904+
}
905+
892906
*ui = (unsigned int)t;
893907

894908
return count;
@@ -1161,6 +1175,8 @@ F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
11611175
F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
11621176
F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
11631177
F2FS_SBI_GENERAL_RW_ATTR(dir_level);
1178+
F2FS_SBI_GENERAL_RW_ATTR(allocate_section_hint);
1179+
F2FS_SBI_GENERAL_RW_ATTR(allocate_section_policy);
11641180
#ifdef CONFIG_F2FS_IOSTAT
11651181
F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
11661182
F2FS_SBI_GENERAL_RW_ATTR(iostat_period_ms);
@@ -1398,6 +1414,8 @@ static struct attribute *f2fs_attrs[] = {
13981414
ATTR_LIST(max_read_extent_count),
13991415
ATTR_LIST(carve_out),
14001416
ATTR_LIST(reserved_pin_section),
1417+
ATTR_LIST(allocate_section_hint),
1418+
ATTR_LIST(allocate_section_policy),
14011419
NULL,
14021420
};
14031421
ATTRIBUTE_GROUPS(f2fs);

0 commit comments

Comments
 (0)