Skip to content

Commit f3421f8

Browse files
committed
slab: introduce percpu sheaves bootstrap
Until now, kmem_cache->cpu_sheaves was !NULL only for caches with sheaves enabled. Since we want to enable them for almost all caches, it's suboptimal to test the pointer in the fast paths, so instead allocate it for all caches in do_kmem_cache_create(). Instead of testing the cpu_sheaves pointer to recognize caches (yet) without sheaves, test kmem_cache->sheaf_capacity for being 0, where needed, using a new cache_has_sheaves() helper. However, for the fast paths sake we also assume that the main sheaf always exists (pcs->main is !NULL), and during bootstrap we cannot allocate sheaves yet. Solve this by introducing a single static bootstrap_sheaf that's assigned as pcs->main during bootstrap. It has a size of 0, so during allocations, the fast path will find it's empty. Since the size of 0 matches sheaf_capacity of 0, the freeing fast paths will find it's "full". In the slow path handlers, we use cache_has_sheaves() to recognize that the cache doesn't (yet) have real sheaves, and fall back. Thus sharing the single bootstrap sheaf like this for multiple caches and cpus is safe. Reviewed-by: Harry Yoo <harry.yoo@oracle.com> Reviewed-by: Hao Li <hao.li@linux.dev> Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
1 parent e47c897 commit f3421f8

3 files changed

Lines changed: 97 additions & 47 deletions

File tree

mm/slab.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,18 @@ struct kmem_cache {
277277
struct kmem_cache_node *node[MAX_NUMNODES];
278278
};
279279

280+
/*
281+
* Every cache has !NULL s->cpu_sheaves but they may point to the
282+
* bootstrap_sheaf temporarily during init, or permanently for the boot caches
283+
* and caches with debugging enabled, or all caches with CONFIG_SLUB_TINY. This
284+
* helper distinguishes whether cache has real non-bootstrap sheaves.
285+
*/
286+
static inline bool cache_has_sheaves(struct kmem_cache *s)
287+
{
288+
/* Test CONFIG_SLUB_TINY for code elimination purposes */
289+
return !IS_ENABLED(CONFIG_SLUB_TINY) && s->sheaf_capacity;
290+
}
291+
280292
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
281293
#define SLAB_SUPPORTS_SYSFS 1
282294
void sysfs_slab_unlink(struct kmem_cache *s);

mm/slab_common.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1663,11 +1663,8 @@ static bool kfree_rcu_sheaf(void *obj)
16631663
return false;
16641664

16651665
s = slab->slab_cache;
1666-
if (s->cpu_sheaves) {
1667-
if (likely(!IS_ENABLED(CONFIG_NUMA) ||
1668-
slab_nid(slab) == numa_mem_id()))
1669-
return __kfree_rcu_sheaf(s, obj);
1670-
}
1666+
if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()))
1667+
return __kfree_rcu_sheaf(s, obj);
16711668

16721669
return false;
16731670
}
@@ -2171,7 +2168,7 @@ EXPORT_SYMBOL_GPL(kvfree_rcu_barrier);
21712168
*/
21722169
void kvfree_rcu_barrier_on_cache(struct kmem_cache *s)
21732170
{
2174-
if (s->cpu_sheaves) {
2171+
if (cache_has_sheaves(s)) {
21752172
flush_rcu_sheaves_on_cache(s);
21762173
rcu_barrier();
21772174
}

mm/slub.c

Lines changed: 82 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2846,12 +2846,23 @@ static void pcs_destroy(struct kmem_cache *s)
28462846
{
28472847
int cpu;
28482848

2849+
/*
2850+
* We may be unwinding cache creation that failed before or during the
2851+
* allocation of this.
2852+
*/
2853+
if (!s->cpu_sheaves)
2854+
return;
2855+
2856+
/* pcs->main can only point to the bootstrap sheaf, nothing to free */
2857+
if (!cache_has_sheaves(s))
2858+
goto free_pcs;
2859+
28492860
for_each_possible_cpu(cpu) {
28502861
struct slub_percpu_sheaves *pcs;
28512862

28522863
pcs = per_cpu_ptr(s->cpu_sheaves, cpu);
28532864

2854-
/* can happen when unwinding failed create */
2865+
/* This can happen when unwinding failed cache creation. */
28552866
if (!pcs->main)
28562867
continue;
28572868

@@ -2873,6 +2884,7 @@ static void pcs_destroy(struct kmem_cache *s)
28732884
}
28742885
}
28752886

2887+
free_pcs:
28762888
free_percpu(s->cpu_sheaves);
28772889
s->cpu_sheaves = NULL;
28782890
}
@@ -4030,7 +4042,7 @@ static bool has_pcs_used(int cpu, struct kmem_cache *s)
40304042
{
40314043
struct slub_percpu_sheaves *pcs;
40324044

4033-
if (!s->cpu_sheaves)
4045+
if (!cache_has_sheaves(s))
40344046
return false;
40354047

40364048
pcs = per_cpu_ptr(s->cpu_sheaves, cpu);
@@ -4052,7 +4064,7 @@ static void flush_cpu_slab(struct work_struct *w)
40524064

40534065
s = sfw->s;
40544066

4055-
if (s->cpu_sheaves)
4067+
if (cache_has_sheaves(s))
40564068
pcs_flush_all(s);
40574069

40584070
flush_this_cpu_slab(s);
@@ -4157,7 +4169,7 @@ void flush_all_rcu_sheaves(void)
41574169
mutex_lock(&slab_mutex);
41584170

41594171
list_for_each_entry(s, &slab_caches, list) {
4160-
if (!s->cpu_sheaves)
4172+
if (!cache_has_sheaves(s))
41614173
continue;
41624174
flush_rcu_sheaves_on_cache(s);
41634175
}
@@ -4179,7 +4191,7 @@ static int slub_cpu_dead(unsigned int cpu)
41794191
mutex_lock(&slab_mutex);
41804192
list_for_each_entry(s, &slab_caches, list) {
41814193
__flush_cpu_slab(s, cpu);
4182-
if (s->cpu_sheaves)
4194+
if (cache_has_sheaves(s))
41834195
__pcs_flush_all_cpu(s, cpu);
41844196
}
41854197
mutex_unlock(&slab_mutex);
@@ -4979,6 +4991,12 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
49794991

49804992
lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
49814993

4994+
/* Bootstrap or debug cache, back off */
4995+
if (unlikely(!cache_has_sheaves(s))) {
4996+
local_unlock(&s->cpu_sheaves->lock);
4997+
return NULL;
4998+
}
4999+
49825000
if (pcs->spare && pcs->spare->size > 0) {
49835001
swap(pcs->main, pcs->spare);
49845002
return pcs;
@@ -5165,6 +5183,11 @@ unsigned int alloc_from_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
51655183
struct slab_sheaf *full;
51665184
struct node_barn *barn;
51675185

5186+
if (unlikely(!cache_has_sheaves(s))) {
5187+
local_unlock(&s->cpu_sheaves->lock);
5188+
return allocated;
5189+
}
5190+
51685191
if (pcs->spare && pcs->spare->size > 0) {
51695192
swap(pcs->main, pcs->spare);
51705193
goto do_alloc;
@@ -5244,8 +5267,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
52445267
if (unlikely(object))
52455268
goto out;
52465269

5247-
if (s->cpu_sheaves)
5248-
object = alloc_from_pcs(s, gfpflags, node);
5270+
object = alloc_from_pcs(s, gfpflags, node);
52495271

52505272
if (!object)
52515273
object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
@@ -5353,18 +5375,10 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
53535375
struct slab_sheaf *sheaf = NULL;
53545376
struct node_barn *barn;
53555377

5356-
if (unlikely(size > s->sheaf_capacity)) {
5378+
if (unlikely(!size))
5379+
return NULL;
53575380

5358-
/*
5359-
* slab_debug disables cpu sheaves intentionally so all
5360-
* prefilled sheaves become "oversize" and we give up on
5361-
* performance for the debugging. Same with SLUB_TINY.
5362-
* Creating a cache without sheaves and then requesting a
5363-
* prefilled sheaf is however not expected, so warn.
5364-
*/
5365-
WARN_ON_ONCE(s->sheaf_capacity == 0 &&
5366-
!IS_ENABLED(CONFIG_SLUB_TINY) &&
5367-
!(s->flags & SLAB_DEBUG_FLAGS));
5381+
if (unlikely(size > s->sheaf_capacity)) {
53685382

53695383
sheaf = kzalloc(struct_size(sheaf, objects, size), gfp);
53705384
if (!sheaf)
@@ -6082,6 +6096,12 @@ __pcs_replace_full_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs)
60826096
restart:
60836097
lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
60846098

6099+
/* Bootstrap or debug cache, back off */
6100+
if (unlikely(!cache_has_sheaves(s))) {
6101+
local_unlock(&s->cpu_sheaves->lock);
6102+
return NULL;
6103+
}
6104+
60856105
barn = get_barn(s);
60866106
if (!barn) {
60876107
local_unlock(&s->cpu_sheaves->lock);
@@ -6298,6 +6318,12 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
62986318
struct slab_sheaf *empty;
62996319
struct node_barn *barn;
63006320

6321+
/* Bootstrap or debug cache, fall back */
6322+
if (unlikely(!cache_has_sheaves(s))) {
6323+
local_unlock(&s->cpu_sheaves->lock);
6324+
goto fail;
6325+
}
6326+
63016327
if (pcs->spare && pcs->spare->size == 0) {
63026328
pcs->rcu_free = pcs->spare;
63036329
pcs->spare = NULL;
@@ -6694,9 +6720,8 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
66946720
if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false)))
66956721
return;
66966722

6697-
if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) ||
6698-
slab_nid(slab) == numa_mem_id())
6699-
&& likely(!slab_test_pfmemalloc(slab))) {
6723+
if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())
6724+
&& likely(!slab_test_pfmemalloc(slab))) {
67006725
if (likely(free_to_pcs(s, object)))
67016726
return;
67026727
}
@@ -7409,7 +7434,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
74097434
* freeing to sheaves is so incompatible with the detached freelist so
74107435
* once we go that way, we have to do everything differently
74117436
*/
7412-
if (s && s->cpu_sheaves) {
7437+
if (s && cache_has_sheaves(s)) {
74137438
free_to_pcs_bulk(s, size, p);
74147439
return;
74157440
}
@@ -7520,8 +7545,7 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
75207545
size--;
75217546
}
75227547

7523-
if (s->cpu_sheaves)
7524-
i = alloc_from_pcs_bulk(s, size, p);
7548+
i = alloc_from_pcs_bulk(s, size, p);
75257549

75267550
if (i < size) {
75277551
/*
@@ -7732,6 +7756,7 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
77327756

77337757
static int init_percpu_sheaves(struct kmem_cache *s)
77347758
{
7759+
static struct slab_sheaf bootstrap_sheaf = {};
77357760
int cpu;
77367761

77377762
for_each_possible_cpu(cpu) {
@@ -7741,7 +7766,28 @@ static int init_percpu_sheaves(struct kmem_cache *s)
77417766

77427767
local_trylock_init(&pcs->lock);
77437768

7744-
pcs->main = alloc_empty_sheaf(s, GFP_KERNEL);
7769+
/*
7770+
* Bootstrap sheaf has zero size so fast-path allocation fails.
7771+
* It has also size == s->sheaf_capacity, so fast-path free
7772+
* fails. In the slow paths we recognize the situation by
7773+
* checking s->sheaf_capacity. This allows fast paths to assume
7774+
* s->cpu_sheaves and pcs->main always exists and are valid.
7775+
* It's also safe to share the single static bootstrap_sheaf
7776+
* with zero-sized objects array as it's never modified.
7777+
*
7778+
* Bootstrap_sheaf also has NULL pointer to kmem_cache so we
7779+
* recognize it and not attempt to free it when destroying the
7780+
* cache.
7781+
*
7782+
* We keep bootstrap_sheaf for kmem_cache and kmem_cache_node,
7783+
* caches with debug enabled, and all caches with SLUB_TINY.
7784+
* For kmalloc caches it's used temporarily during the initial
7785+
* bootstrap.
7786+
*/
7787+
if (!s->sheaf_capacity)
7788+
pcs->main = &bootstrap_sheaf;
7789+
else
7790+
pcs->main = alloc_empty_sheaf(s, GFP_KERNEL);
77457791

77467792
if (!pcs->main)
77477793
return -ENOMEM;
@@ -7816,8 +7862,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
78167862
void __kmem_cache_release(struct kmem_cache *s)
78177863
{
78187864
cache_random_seq_destroy(s);
7819-
if (s->cpu_sheaves)
7820-
pcs_destroy(s);
7865+
pcs_destroy(s);
78217866
#ifdef CONFIG_PREEMPT_RT
78227867
if (s->cpu_slab)
78237868
lockdep_unregister_key(&s->lock_key);
@@ -7839,7 +7884,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
78397884
continue;
78407885
}
78417886

7842-
if (s->cpu_sheaves) {
7887+
if (cache_has_sheaves(s)) {
78437888
barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node);
78447889

78457890
if (!barn)
@@ -8162,7 +8207,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
81628207
flush_all_cpus_locked(s);
81638208

81648209
/* we might have rcu sheaves in flight */
8165-
if (s->cpu_sheaves)
8210+
if (cache_has_sheaves(s))
81668211
rcu_barrier();
81678212

81688213
/* Attempt to free all objects */
@@ -8474,7 +8519,7 @@ static int slab_mem_going_online_callback(int nid)
84748519
if (get_node(s, nid))
84758520
continue;
84768521

8477-
if (s->cpu_sheaves) {
8522+
if (cache_has_sheaves(s)) {
84788523
barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid);
84798524

84808525
if (!barn) {
@@ -8682,12 +8727,10 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
86828727

86838728
set_cpu_partial(s);
86848729

8685-
if (s->sheaf_capacity) {
8686-
s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves);
8687-
if (!s->cpu_sheaves) {
8688-
err = -ENOMEM;
8689-
goto out;
8690-
}
8730+
s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves);
8731+
if (!s->cpu_sheaves) {
8732+
err = -ENOMEM;
8733+
goto out;
86918734
}
86928735

86938736
#ifdef CONFIG_NUMA
@@ -8706,11 +8749,9 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
87068749
if (!alloc_kmem_cache_cpus(s))
87078750
goto out;
87088751

8709-
if (s->cpu_sheaves) {
8710-
err = init_percpu_sheaves(s);
8711-
if (err)
8712-
goto out;
8713-
}
8752+
err = init_percpu_sheaves(s);
8753+
if (err)
8754+
goto out;
87148755

87158756
err = 0;
87168757

0 commit comments

Comments
 (0)