Skip to content

Commit 9702969

Browse files
committed
Merge tag 'slab-for-7.0-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull more slab updates from Vlastimil Babka: - Two stable fixes for kmalloc_nolock() usage from NMI context (Harry Yoo) - Allow kmalloc_nolock() allocations to be freed with kfree() and thus also kfree_rcu() and simplify slabobj_ext handling - we no longer need to track how it was allocated to use the matching freeing function (Harry Yoo) * tag 'slab-for-7.0-part2' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: mm/slab: drop the OBJEXTS_NOSPIN_ALLOC flag from enum objext_flags mm/slab: allow freeing kmalloc_nolock()'d objects using kfree[_rcu]() mm/slab: use prandom if !allow_spin mm/slab: do not access current->mems_allowed_seq if !allow_spin
2 parents 45a43ac + 27125df commit 9702969

4 files changed

Lines changed: 76 additions & 33 deletions

File tree

include/linux/memcontrol.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,8 +359,7 @@ enum objext_flags {
359359
* MEMCG_DATA_OBJEXTS.
360360
*/
361361
OBJEXTS_ALLOC_FAIL = __OBJEXTS_ALLOC_FAIL,
362-
/* slabobj_ext vector allocated with kmalloc_nolock() */
363-
OBJEXTS_NOSPIN_ALLOC = __FIRST_OBJEXT_FLAG,
362+
__OBJEXTS_FLAG_UNUSED = __FIRST_OBJEXT_FLAG,
364363
/* the next bit after the last actual flag */
365364
__NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1),
366365
};

include/linux/rcupdate.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,8 +1074,8 @@ static inline void rcu_read_unlock_migrate(void)
10741074
* either fall back to use of call_rcu() or rearrange the structure to
10751075
* position the rcu_head structure into the first 4096 bytes.
10761076
*
1077-
* The object to be freed can be allocated either by kmalloc() or
1078-
* kmem_cache_alloc().
1077+
* The object to be freed can be allocated either by kmalloc(),
1078+
* kmalloc_nolock(), or kmem_cache_alloc().
10791079
*
10801080
* Note that the allowable offset might decrease in the future.
10811081
*

mm/kmemleak.c

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -837,13 +837,12 @@ static void delete_object_full(unsigned long ptr, unsigned int objflags)
837837
struct kmemleak_object *object;
838838

839839
object = find_and_remove_object(ptr, 0, objflags);
840-
if (!object) {
841-
#ifdef DEBUG
842-
kmemleak_warn("Freeing unknown object at 0x%08lx\n",
843-
ptr);
844-
#endif
840+
if (!object)
841+
/*
842+
* kmalloc_nolock() -> kfree() calls kmemleak_free()
843+
* without kmemleak_alloc().
844+
*/
845845
return;
846-
}
847846
__delete_object(object);
848847
}
849848

@@ -926,13 +925,12 @@ static void paint_ptr(unsigned long ptr, int color, unsigned int objflags)
926925
struct kmemleak_object *object;
927926

928927
object = __find_and_get_object(ptr, 0, objflags);
929-
if (!object) {
930-
kmemleak_warn("Trying to color unknown object at 0x%08lx as %s\n",
931-
ptr,
932-
(color == KMEMLEAK_GREY) ? "Grey" :
933-
(color == KMEMLEAK_BLACK) ? "Black" : "Unknown");
928+
if (!object)
929+
/*
930+
* kmalloc_nolock() -> kfree_rcu() calls kmemleak_ignore()
931+
* without kmemleak_alloc().
932+
*/
934933
return;
935-
}
936934
paint_it(object, color);
937935
put_object(object);
938936
}

mm/slub.c

Lines changed: 63 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include <linux/prefetch.h>
4444
#include <linux/memcontrol.h>
4545
#include <linux/random.h>
46+
#include <linux/prandom.h>
4647
#include <kunit/test.h>
4748
#include <kunit/test-bug.h>
4849
#include <linux/sort.h>
@@ -2189,8 +2190,6 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
21892190
virt_to_slab(vec)->slab_cache == s);
21902191

21912192
new_exts = (unsigned long)vec;
2192-
if (unlikely(!allow_spin))
2193-
new_exts |= OBJEXTS_NOSPIN_ALLOC;
21942193
#ifdef CONFIG_MEMCG
21952194
new_exts |= MEMCG_DATA_OBJEXTS;
21962195
#endif
@@ -2228,7 +2227,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
22282227
return 0;
22292228
}
22302229

2231-
static inline void free_slab_obj_exts(struct slab *slab)
2230+
static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin)
22322231
{
22332232
struct slabobj_ext *obj_exts;
22342233

@@ -2256,10 +2255,10 @@ static inline void free_slab_obj_exts(struct slab *slab)
22562255
* the extension for obj_exts is expected to be NULL.
22572256
*/
22582257
mark_objexts_empty(obj_exts);
2259-
if (unlikely(READ_ONCE(slab->obj_exts) & OBJEXTS_NOSPIN_ALLOC))
2260-
kfree_nolock(obj_exts);
2261-
else
2258+
if (allow_spin)
22622259
kfree(obj_exts);
2260+
else
2261+
kfree_nolock(obj_exts);
22632262
slab->obj_exts = 0;
22642263
}
22652264

@@ -2323,7 +2322,7 @@ static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
23232322
return 0;
23242323
}
23252324

2326-
static inline void free_slab_obj_exts(struct slab *slab)
2325+
static inline void free_slab_obj_exts(struct slab *slab, bool allow_spin)
23272326
{
23282327
}
23292328

@@ -2584,6 +2583,24 @@ struct rcu_delayed_free {
25842583
* Returns true if freeing of the object can proceed, false if its reuse
25852584
* was delayed by CONFIG_SLUB_RCU_DEBUG or KASAN quarantine, or it was returned
25862585
* to KFENCE.
2586+
*
2587+
* For objects allocated via kmalloc_nolock(), only a subset of alloc hooks
2588+
* are invoked, so some free hooks must handle asymmetric hook calls.
2589+
*
2590+
* Alloc hooks called for kmalloc_nolock():
2591+
* - kmsan_slab_alloc()
2592+
* - kasan_slab_alloc()
2593+
* - memcg_slab_post_alloc_hook()
2594+
* - alloc_tagging_slab_alloc_hook()
2595+
*
2596+
* Free hooks that must handle missing corresponding alloc hooks:
2597+
* - kmemleak_free_recursive()
2598+
* - kfence_free()
2599+
*
2600+
* Free hooks that have no alloc hook counterpart, and thus safe to call:
2601+
* - debug_check_no_locks_freed()
2602+
* - debug_check_no_obj_freed()
2603+
* - __kcsan_check_access()
25872604
*/
25882605
static __always_inline
25892606
bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
@@ -3311,8 +3328,11 @@ static void *next_freelist_entry(struct kmem_cache *s,
33113328
return (char *)start + idx;
33123329
}
33133330

3331+
static DEFINE_PER_CPU(struct rnd_state, slab_rnd_state);
3332+
33143333
/* Shuffle the single linked freelist based on a random pre-computed sequence */
3315-
static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
3334+
static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab,
3335+
bool allow_spin)
33163336
{
33173337
void *start;
33183338
void *cur;
@@ -3323,7 +3343,19 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
33233343
return false;
33243344

33253345
freelist_count = oo_objects(s->oo);
3326-
pos = get_random_u32_below(freelist_count);
3346+
if (allow_spin) {
3347+
pos = get_random_u32_below(freelist_count);
3348+
} else {
3349+
struct rnd_state *state;
3350+
3351+
/*
3352+
* An interrupt or NMI handler might interrupt and change
3353+
* the state in the middle, but that's safe.
3354+
*/
3355+
state = &get_cpu_var(slab_rnd_state);
3356+
pos = prandom_u32_state(state) % freelist_count;
3357+
put_cpu_var(slab_rnd_state);
3358+
}
33273359

33283360
page_limit = slab->objects * s->size;
33293361
start = fixup_red_left(s, slab_address(slab));
@@ -3350,7 +3382,8 @@ static inline int init_cache_random_seq(struct kmem_cache *s)
33503382
return 0;
33513383
}
33523384
static inline void init_freelist_randomization(void) { }
3353-
static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
3385+
static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab,
3386+
bool allow_spin)
33543387
{
33553388
return false;
33563389
}
@@ -3369,14 +3402,14 @@ static __always_inline void account_slab(struct slab *slab, int order,
33693402
}
33703403

33713404
static __always_inline void unaccount_slab(struct slab *slab, int order,
3372-
struct kmem_cache *s)
3405+
struct kmem_cache *s, bool allow_spin)
33733406
{
33743407
/*
33753408
* The slab object extensions should now be freed regardless of
33763409
* whether mem_alloc_profiling_enabled() or not because profiling
33773410
* might have been disabled after slab->obj_exts got allocated.
33783411
*/
3379-
free_slab_obj_exts(slab);
3412+
free_slab_obj_exts(slab, allow_spin);
33803413

33813414
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
33823415
-(PAGE_SIZE << order));
@@ -3441,7 +3474,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
34413474
alloc_slab_obj_exts_early(s, slab);
34423475
account_slab(slab, oo_order(oo), s, flags);
34433476

3444-
shuffle = shuffle_freelist(s, slab);
3477+
shuffle = shuffle_freelist(s, slab, allow_spin);
34453478

34463479
if (!shuffle) {
34473480
start = fixup_red_left(s, start);
@@ -3480,7 +3513,7 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab, bool allow_spin
34803513
page->mapping = NULL;
34813514
__ClearPageSlab(page);
34823515
mm_account_reclaimed_pages(pages);
3483-
unaccount_slab(slab, order, s);
3516+
unaccount_slab(slab, order, s, allow_spin);
34843517
if (allow_spin)
34853518
free_frozen_pages(page, order);
34863519
else
@@ -3791,6 +3824,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
37913824
struct zone *zone;
37923825
enum zone_type highest_zoneidx = gfp_zone(pc->flags);
37933826
unsigned int cpuset_mems_cookie;
3827+
bool allow_spin = gfpflags_allow_spinning(pc->flags);
37943828

37953829
/*
37963830
* The defrag ratio allows a configuration of the tradeoffs between
@@ -3815,7 +3849,15 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
38153849
return NULL;
38163850

38173851
do {
3818-
cpuset_mems_cookie = read_mems_allowed_begin();
3852+
/*
3853+
* read_mems_allowed_begin() accesses current->mems_allowed_seq,
3854+
* a seqcount_spinlock_t that is not NMI-safe. Do not access
3855+
* current->mems_allowed_seq and avoid retry when GFP flags
3856+
* indicate spinning is not allowed.
3857+
*/
3858+
if (allow_spin)
3859+
cpuset_mems_cookie = read_mems_allowed_begin();
3860+
38193861
zonelist = node_zonelist(mempolicy_slab_node(), pc->flags);
38203862
for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
38213863
struct kmem_cache_node *n;
@@ -3839,7 +3881,7 @@ static void *get_from_any_partial(struct kmem_cache *s, struct partial_context *
38393881
}
38403882
}
38413883
}
3842-
} while (read_mems_allowed_retry(cpuset_mems_cookie));
3884+
} while (allow_spin && read_mems_allowed_retry(cpuset_mems_cookie));
38433885
#endif /* CONFIG_NUMA */
38443886
return NULL;
38453887
}
@@ -6372,7 +6414,7 @@ void kvfree_rcu_cb(struct rcu_head *head)
63726414

63736415
/**
63746416
* kfree - free previously allocated memory
6375-
* @object: pointer returned by kmalloc() or kmem_cache_alloc()
6417+
* @object: pointer returned by kmalloc(), kmalloc_nolock(), or kmem_cache_alloc()
63766418
*
63776419
* If @object is NULL, no operation is performed.
63786420
*/
@@ -6391,6 +6433,7 @@ void kfree(const void *object)
63916433
page = virt_to_page(object);
63926434
slab = page_slab(page);
63936435
if (!slab) {
6436+
/* kmalloc_nolock() doesn't support large kmalloc */
63946437
free_large_kmalloc(page, (void *)object);
63956438
return;
63966439
}
@@ -8337,6 +8380,9 @@ void __init kmem_cache_init_late(void)
83378380
flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM | WQ_PERCPU,
83388381
0);
83398382
WARN_ON(!flushwq);
8383+
#ifdef CONFIG_SLAB_FREELIST_RANDOM
8384+
prandom_init_once(&slab_rnd_state);
8385+
#endif
83408386
}
83418387

83428388
int do_kmem_cache_create(struct kmem_cache *s, const char *name,

0 commit comments

Comments
 (0)