@@ -1028,7 +1028,14 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
10281028 return ret ;
10291029}
10301030
1031- static long btrfs_scan_inode (struct btrfs_inode * inode , long * scanned , long nr_to_scan )
1031+ struct btrfs_em_shrink_ctx {
1032+ long nr_to_scan ;
1033+ long scanned ;
1034+ u64 last_ino ;
1035+ u64 last_root ;
1036+ };
1037+
1038+ static long btrfs_scan_inode (struct btrfs_inode * inode , struct btrfs_em_shrink_ctx * ctx )
10321039{
10331040 const u64 cur_fs_gen = btrfs_get_fs_generation (inode -> root -> fs_info );
10341041 struct extent_map_tree * tree = & inode -> extent_tree ;
@@ -1075,7 +1082,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
10751082
10761083 em = rb_entry (node , struct extent_map , rb_node );
10771084 node = rb_next (node );
1078- ( * scanned ) ++ ;
1085+ ctx -> scanned ++ ;
10791086
10801087 if (em -> flags & EXTENT_FLAG_PINNED )
10811088 goto next ;
@@ -1096,7 +1103,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
10961103 free_extent_map (em );
10971104 nr_dropped ++ ;
10981105next :
1099- if (* scanned >= nr_to_scan )
1106+ if (ctx -> scanned >= ctx -> nr_to_scan )
11001107 break ;
11011108
11021109 /*
@@ -1115,22 +1122,21 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
11151122 return nr_dropped ;
11161123}
11171124
1118- static long btrfs_scan_root (struct btrfs_root * root , long * scanned , long nr_to_scan )
1125+ static long btrfs_scan_root (struct btrfs_root * root , struct btrfs_em_shrink_ctx * ctx )
11191126{
1120- struct btrfs_fs_info * fs_info = root -> fs_info ;
11211127 struct btrfs_inode * inode ;
11221128 long nr_dropped = 0 ;
1123- u64 min_ino = fs_info -> extent_map_shrinker_last_ino + 1 ;
1129+ u64 min_ino = ctx -> last_ino + 1 ;
11241130
11251131 inode = btrfs_find_first_inode (root , min_ino );
11261132 while (inode ) {
1127- nr_dropped += btrfs_scan_inode (inode , scanned , nr_to_scan );
1133+ nr_dropped += btrfs_scan_inode (inode , ctx );
11281134
11291135 min_ino = btrfs_ino (inode ) + 1 ;
1130- fs_info -> extent_map_shrinker_last_ino = btrfs_ino (inode );
1136+ ctx -> last_ino = btrfs_ino (inode );
11311137 btrfs_add_delayed_iput (inode );
11321138
1133- if (* scanned >= nr_to_scan )
1139+ if (ctx -> scanned >= ctx -> nr_to_scan )
11341140 break ;
11351141
11361142 /*
@@ -1151,38 +1157,56 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s
11511157 * inode if there is one or we will find out this was the last
11521158 * one and move to the next root.
11531159 */
1154- fs_info -> extent_map_shrinker_last_root = btrfs_root_id (root );
1160+ ctx -> last_root = btrfs_root_id (root );
11551161 } else {
11561162 /*
11571163 * No more inodes in this root, set extent_map_shrinker_last_ino to 0 so
11581164 * that when processing the next root we start from its first inode.
11591165 */
1160- fs_info -> extent_map_shrinker_last_ino = 0 ;
1161- fs_info -> extent_map_shrinker_last_root = btrfs_root_id (root ) + 1 ;
1166+ ctx -> last_ino = 0 ;
1167+ ctx -> last_root = btrfs_root_id (root ) + 1 ;
11621168 }
11631169
11641170 return nr_dropped ;
11651171}
11661172
11671173long btrfs_free_extent_maps (struct btrfs_fs_info * fs_info , long nr_to_scan )
11681174{
1169- const u64 start_root_id = fs_info -> extent_map_shrinker_last_root ;
1170- u64 next_root_id = start_root_id ;
1175+ struct btrfs_em_shrink_ctx ctx ;
1176+ u64 start_root_id ;
1177+ u64 next_root_id ;
11711178 bool cycled = false;
11721179 long nr_dropped = 0 ;
1173- long scanned = 0 ;
1180+
1181+ ctx .scanned = 0 ;
1182+ ctx .nr_to_scan = nr_to_scan ;
1183+
1184+ /*
1185+ * In case we have multiple tasks running this shrinker, make the next
1186+ * one start from the next inode in case it starts before we finish.
1187+ */
1188+ spin_lock (& fs_info -> extent_map_shrinker_lock );
1189+ ctx .last_ino = fs_info -> extent_map_shrinker_last_ino ;
1190+ fs_info -> extent_map_shrinker_last_ino ++ ;
1191+ ctx .last_root = fs_info -> extent_map_shrinker_last_root ;
1192+ spin_unlock (& fs_info -> extent_map_shrinker_lock );
1193+
1194+ start_root_id = ctx .last_root ;
1195+ next_root_id = ctx .last_root ;
11741196
11751197 if (trace_btrfs_extent_map_shrinker_scan_enter_enabled ()) {
11761198 s64 nr = percpu_counter_sum_positive (& fs_info -> evictable_extent_maps );
11771199
1178- trace_btrfs_extent_map_shrinker_scan_enter (fs_info , nr_to_scan , nr );
1200+ trace_btrfs_extent_map_shrinker_scan_enter (fs_info , nr_to_scan ,
1201+ nr , ctx .last_root ,
1202+ ctx .last_ino );
11791203 }
11801204
11811205 /*
11821206 * We may be called from memory allocation paths, so we don't want to
11831207 * take too much time and slowdown tasks, so stop if we need reschedule.
11841208 */
1185- while (scanned < nr_to_scan && !need_resched ()) {
1209+ while (ctx . scanned < ctx . nr_to_scan && !need_resched ()) {
11861210 struct btrfs_root * root ;
11871211 unsigned long count ;
11881212
@@ -1194,8 +1218,8 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
11941218 spin_unlock (& fs_info -> fs_roots_radix_lock );
11951219 if (start_root_id > 0 && !cycled ) {
11961220 next_root_id = 0 ;
1197- fs_info -> extent_map_shrinker_last_root = 0 ;
1198- fs_info -> extent_map_shrinker_last_ino = 0 ;
1221+ ctx . last_root = 0 ;
1222+ ctx . last_ino = 0 ;
11991223 cycled = true;
12001224 continue ;
12011225 }
@@ -1209,15 +1233,33 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
12091233 continue ;
12101234
12111235 if (is_fstree (btrfs_root_id (root )))
1212- nr_dropped += btrfs_scan_root (root , & scanned , nr_to_scan );
1236+ nr_dropped += btrfs_scan_root (root , & ctx );
12131237
12141238 btrfs_put_root (root );
12151239 }
12161240
1241+ /*
1242+ * In case of multiple tasks running this extent map shrinking code this
1243+ * isn't perfect but it's simple and silences things like KCSAN. It's
1244+ * not possible to know which task made more progress because we can
1245+ * cycle back to the first root and first inode if it's not the first
1246+ * time the shrinker ran, see the above logic. Also a task that started
1247+ * later may finish ealier than another task and made less progress. So
1248+ * make this simple and update to the progress of the last task that
1249+ * finished, with the occasional possiblity of having two consecutive
1250+ * runs of the shrinker process the same inodes.
1251+ */
1252+ spin_lock (& fs_info -> extent_map_shrinker_lock );
1253+ fs_info -> extent_map_shrinker_last_ino = ctx .last_ino ;
1254+ fs_info -> extent_map_shrinker_last_root = ctx .last_root ;
1255+ spin_unlock (& fs_info -> extent_map_shrinker_lock );
1256+
12171257 if (trace_btrfs_extent_map_shrinker_scan_exit_enabled ()) {
12181258 s64 nr = percpu_counter_sum_positive (& fs_info -> evictable_extent_maps );
12191259
1220- trace_btrfs_extent_map_shrinker_scan_exit (fs_info , nr_dropped , nr );
1260+ trace_btrfs_extent_map_shrinker_scan_exit (fs_info , nr_dropped ,
1261+ nr , ctx .last_root ,
1262+ ctx .last_ino );
12211263 }
12221264
12231265 return nr_dropped ;
0 commit comments