4343#include <linux/prefetch.h>
4444#include <linux/memcontrol.h>
4545#include <linux/random.h>
46+ #include <linux/prandom.h>
4647#include <kunit/test.h>
4748#include <kunit/test-bug.h>
4849#include <linux/sort.h>
@@ -2189,8 +2190,6 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
21892190 virt_to_slab (vec )-> slab_cache == s );
21902191
21912192 new_exts = (unsigned long )vec ;
2192- if (unlikely (!allow_spin ))
2193- new_exts |= OBJEXTS_NOSPIN_ALLOC ;
21942193#ifdef CONFIG_MEMCG
21952194 new_exts |= MEMCG_DATA_OBJEXTS ;
21962195#endif
@@ -2228,7 +2227,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
22282227 return 0 ;
22292228}
22302229
2231- static inline void free_slab_obj_exts (struct slab * slab )
2230+ static inline void free_slab_obj_exts (struct slab * slab , bool allow_spin )
22322231{
22332232 struct slabobj_ext * obj_exts ;
22342233
@@ -2256,10 +2255,10 @@ static inline void free_slab_obj_exts(struct slab *slab)
22562255 * the extension for obj_exts is expected to be NULL.
22572256 */
22582257 mark_objexts_empty (obj_exts );
2259- if (unlikely (READ_ONCE (slab -> obj_exts ) & OBJEXTS_NOSPIN_ALLOC ))
2260- kfree_nolock (obj_exts );
2261- else
2258+ if (allow_spin )
22622259 kfree (obj_exts );
2260+ else
2261+ kfree_nolock (obj_exts );
22632262 slab -> obj_exts = 0 ;
22642263}
22652264
@@ -2323,7 +2322,7 @@ static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
23232322 return 0 ;
23242323}
23252324
2326- static inline void free_slab_obj_exts (struct slab * slab )
2325+ static inline void free_slab_obj_exts (struct slab * slab , bool allow_spin )
23272326{
23282327}
23292328
@@ -2584,6 +2583,24 @@ struct rcu_delayed_free {
25842583 * Returns true if freeing of the object can proceed, false if its reuse
25852584 * was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned
25862585 * to KFENCE.
2586+ *
2587+ * For objects allocated via kmalloc_nolock(), only a subset of alloc hooks
2588+ * are invoked, so some free hooks must handle asymmetric hook calls.
2589+ *
2590+ * Alloc hooks called for kmalloc_nolock():
2591+ * - kmsan_slab_alloc()
2592+ * - kasan_slab_alloc()
2593+ * - memcg_slab_post_alloc_hook()
2594+ * - alloc_tagging_slab_alloc_hook()
2595+ *
2596+ * Free hooks that must handle missing corresponding alloc hooks:
2597+ * - kmemleak_free_recursive()
2598+ * - kfence_free()
2599+ *
2600+ * Free hooks that have no alloc hook counterpart, and thus safe to call:
2601+ * - debug_check_no_locks_freed()
2602+ * - debug_check_no_obj_freed()
2603+ * - __kcsan_check_access()
25872604 */
25882605static __always_inline
25892606bool slab_free_hook (struct kmem_cache * s , void * x , bool init ,
@@ -3311,8 +3328,11 @@ static void *next_freelist_entry(struct kmem_cache *s,
33113328 return (char * )start + idx ;
33123329}
33133330
3331+ static DEFINE_PER_CPU (struct rnd_state , slab_rnd_state ) ;
3332+
33143333/* Shuffle the single linked freelist based on a random pre-computed sequence */
3315- static bool shuffle_freelist (struct kmem_cache * s , struct slab * slab )
3334+ static bool shuffle_freelist (struct kmem_cache * s , struct slab * slab ,
3335+ bool allow_spin )
33163336{
33173337 void * start ;
33183338 void * cur ;
@@ -3323,7 +3343,19 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
33233343 return false;
33243344
33253345 freelist_count = oo_objects (s -> oo );
3326- pos = get_random_u32_below (freelist_count );
3346+ if (allow_spin ) {
3347+ pos = get_random_u32_below (freelist_count );
3348+ } else {
3349+ struct rnd_state * state ;
3350+
3351+ /*
3352+ * An interrupt or NMI handler might interrupt and change
3353+ * the state in the middle, but that's safe.
3354+ */
3355+ state = & get_cpu_var (slab_rnd_state );
3356+ pos = prandom_u32_state (state ) % freelist_count ;
3357+ put_cpu_var (slab_rnd_state );
3358+ }
33273359
33283360 page_limit = slab -> objects * s -> size ;
33293361 start = fixup_red_left (s , slab_address (slab ));
@@ -3350,7 +3382,8 @@ static inline int init_cache_random_seq(struct kmem_cache *s)
33503382 return 0 ;
33513383}
33523384static inline void init_freelist_randomization (void ) { }
3353- static inline bool shuffle_freelist (struct kmem_cache * s , struct slab * slab )
3385+ static inline bool shuffle_freelist (struct kmem_cache * s , struct slab * slab ,
3386+ bool allow_spin )
33543387{
33553388 return false;
33563389}
@@ -3369,14 +3402,14 @@ static __always_inline void account_slab(struct slab *slab, int order,
33693402}
33703403
33713404static __always_inline void unaccount_slab (struct slab * slab , int order ,
3372- struct kmem_cache * s )
3405+ struct kmem_cache * s , bool allow_spin )
33733406{
33743407 /*
33753408 * The slab object extensions should now be freed regardless of
33763409 * whether mem_alloc_profiling_enabled() or not because profiling
33773410 * might have been disabled after slab->obj_exts got allocated.
33783411 */
3379- free_slab_obj_exts (slab );
3412+ free_slab_obj_exts (slab , allow_spin );
33803413
33813414 mod_node_page_state (slab_pgdat (slab ), cache_vmstat_idx (s ),
33823415 - (PAGE_SIZE << order ));
@@ -3441,7 +3474,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
34413474 alloc_slab_obj_exts_early (s , slab );
34423475 account_slab (slab , oo_order (oo ), s , flags );
34433476
3444- shuffle = shuffle_freelist (s , slab );
3477+ shuffle = shuffle_freelist (s , slab , allow_spin );
34453478
34463479 if (!shuffle ) {
34473480 start = fixup_red_left (s , start );
@@ -3480,7 +3513,7 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab, bool allow_spin
34803513 page -> mapping = NULL ;
34813514 __ClearPageSlab (page );
34823515 mm_account_reclaimed_pages (pages );
3483- unaccount_slab (slab , order , s );
3516+ unaccount_slab (slab , order , s , allow_spin );
34843517 if (allow_spin )
34853518 free_frozen_pages (page , order );
34863519 else
@@ -3791,6 +3824,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
37913824 struct zone * zone ;
37923825 enum zone_type highest_zoneidx = gfp_zone (pc -> flags );
37933826 unsigned int cpuset_mems_cookie ;
3827+ bool allow_spin = gfpflags_allow_spinning (pc -> flags );
37943828
37953829 /*
37963830 * The defrag ratio allows a configuration of the tradeoffs between
@@ -3815,7 +3849,15 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
38153849 return NULL ;
38163850
38173851 do {
3818- cpuset_mems_cookie = read_mems_allowed_begin ();
3852+ /*
3853+ * read_mems_allowed_begin() accesses current->mems_allowed_seq,
3854+ * a seqcount_spinlock_t that is not NMI-safe. Do not access
3855+ * current->mems_allowed_seq and avoid retry when GFP flags
3856+ * indicate spinning is not allowed.
3857+ */
3858+ if (allow_spin )
3859+ cpuset_mems_cookie = read_mems_allowed_begin ();
3860+
38193861 zonelist = node_zonelist (mempolicy_slab_node (), pc -> flags );
38203862 for_each_zone_zonelist (zone , z , zonelist , highest_zoneidx ) {
38213863 struct kmem_cache_node * n ;
@@ -3839,7 +3881,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
38393881 }
38403882 }
38413883 }
3842- } while (read_mems_allowed_retry (cpuset_mems_cookie ));
3884+ } while (allow_spin && read_mems_allowed_retry (cpuset_mems_cookie ));
38433885#endif /* CONFIG_NUMA */
38443886 return NULL ;
38453887}
@@ -6372,7 +6414,7 @@ void kvfree_rcu_cb(struct rcu_head *head)
63726414
63736415/**
63746416 * kfree - free previously allocated memory
6375- * @object: pointer returned by kmalloc() or kmem_cache_alloc()
6417+ * @object: pointer returned by kmalloc(), kmalloc_nolock(), or kmem_cache_alloc()
63766418 *
63776419 * If @object is NULL, no operation is performed.
63786420 */
@@ -6391,6 +6433,7 @@ void kfree(const void *object)
63916433 page = virt_to_page (object );
63926434 slab = page_slab (page );
63936435 if (!slab ) {
6436+ /* kmalloc_nolock() doesn't support large kmalloc */
63946437 free_large_kmalloc (page , (void * )object );
63956438 return ;
63966439 }
@@ -8337,6 +8380,9 @@ void __init kmem_cache_init_late(void)
83378380 flushwq = alloc_workqueue ("slub_flushwq" , WQ_MEM_RECLAIM | WQ_PERCPU ,
83388381 0 );
83398382 WARN_ON (!flushwq );
8383+ #ifdef CONFIG_SLAB_FREELIST_RANDOM
8384+ prandom_init_once (& slab_rnd_state );
8385+ #endif
83408386}
83418387
83428388int do_kmem_cache_create (struct kmem_cache * s , const char * name ,
0 commit comments