Skip to content

Commit 073d5f1

Browse files
committed
slab: simplify kmalloc_nolock()
The kmalloc_nolock() implementation has several complications and restrictions due to SLUB's cpu slab locking, lockless fastpath and PREEMPT_RT differences. With cpu slab usage removed, we can simplify things: - relax the PREEMPT_RT context checks as they were before commit 99a3e3a ("slab: fix kmalloc_nolock() context check for PREEMPT_RT") and also reference the explanation comment in the page allocator - the local_lock_cpu_slab() macros became unused, remove them - we no longer need to set up lockdep classes on PREEMPT_RT - we no longer need to annotate ___slab_alloc as NOKPROBE_SYMBOL since there's no lockless cpu freelist manipulation anymore - __slab_alloc_node() can be called from kmalloc_nolock_noprof() unconditionally. It can also no longer return EBUSY. But trylock failures can still happen so retry with the larger bucket if the allocation fails for any reason. Note that we still need __CMPXCHG_DOUBLE, because while it was removed we don't use cmpxchg16b on cpu freelist anymore, we still use it on slab freelist, and the alternative is slab_lock() which can be interrupted by a nmi. Clarify the comment to mention it specifically. Acked-by: Alexei Starovoitov <ast@kernel.org> Reviewed-by: Hao Li <hao.li@linux.dev> Reviewed-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Harry Yoo <harry.yoo@oracle.com> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
1 parent ab2f752 commit 073d5f1

2 files changed

Lines changed: 29 additions & 116 deletions

File tree

mm/slab.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ struct kmem_cache_order_objects {
190190
*/
191191
struct kmem_cache {
192192
struct kmem_cache_cpu __percpu *cpu_slab;
193-
struct lock_class_key lock_key;
194193
struct slub_percpu_sheaves __percpu *cpu_sheaves;
195194
/* Used for retrieving partial slabs, etc. */
196195
slab_flags_t flags;

mm/slub.c

Lines changed: 29 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -3690,29 +3690,12 @@ static inline unsigned int init_tid(int cpu)
36903690

36913691
static void init_kmem_cache_cpus(struct kmem_cache *s)
36923692
{
3693-
#ifdef CONFIG_PREEMPT_RT
3694-
/*
3695-
* Register lockdep key for non-boot kmem caches to avoid
3696-
* WARN_ON_ONCE(static_obj(key))) in lockdep_register_key()
3697-
*/
3698-
bool finegrain_lockdep = !init_section_contains(s, 1);
3699-
#else
3700-
/*
3701-
* Don't bother with different lockdep classes for each
3702-
* kmem_cache, since we only use local_trylock_irqsave().
3703-
*/
3704-
bool finegrain_lockdep = false;
3705-
#endif
37063693
int cpu;
37073694
struct kmem_cache_cpu *c;
37083695

3709-
if (finegrain_lockdep)
3710-
lockdep_register_key(&s->lock_key);
37113696
for_each_possible_cpu(cpu) {
37123697
c = per_cpu_ptr(s->cpu_slab, cpu);
37133698
local_trylock_init(&c->lock);
3714-
if (finegrain_lockdep)
3715-
lockdep_set_class(&c->lock, &s->lock_key);
37163699
c->tid = init_tid(cpu);
37173700
}
37183701
}
@@ -3799,47 +3782,6 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
37993782
}
38003783
}
38013784

3802-
/*
3803-
* ___slab_alloc()'s caller is supposed to check if kmem_cache::kmem_cache_cpu::lock
3804-
* can be acquired without a deadlock before invoking the function.
3805-
*
3806-
* Without LOCKDEP we trust the code to be correct. kmalloc_nolock() is
3807-
* using local_lock_is_locked() properly before calling local_lock_cpu_slab(),
3808-
* and kmalloc() is not used in an unsupported context.
3809-
*
3810-
* With LOCKDEP, on PREEMPT_RT lockdep does its checking in local_lock_irqsave().
3811-
* On !PREEMPT_RT we use trylock to avoid false positives in NMI, but
3812-
* lockdep_assert() will catch a bug in case:
3813-
* #1
3814-
* kmalloc() -> ___slab_alloc() -> irqsave -> NMI -> bpf -> kmalloc_nolock()
3815-
* or
3816-
* #2
3817-
* kmalloc() -> ___slab_alloc() -> irqsave -> tracepoint/kprobe -> bpf -> kmalloc_nolock()
3818-
*
3819-
* On PREEMPT_RT an invocation is not possible from IRQ-off or preempt
3820-
* disabled context. The lock will always be acquired and if needed it
3821-
* block and sleep until the lock is available.
3822-
* #1 is possible in !PREEMPT_RT only.
3823-
* #2 is possible in both with a twist that irqsave is replaced with rt_spinlock:
3824-
* kmalloc() -> ___slab_alloc() -> rt_spin_lock(kmem_cache_A) ->
3825-
* tracepoint/kprobe -> bpf -> kmalloc_nolock() -> rt_spin_lock(kmem_cache_B)
3826-
*
3827-
* local_lock_is_locked() prevents the case kmem_cache_A == kmem_cache_B
3828-
*/
3829-
#if defined(CONFIG_PREEMPT_RT) || !defined(CONFIG_LOCKDEP)
3830-
#define local_lock_cpu_slab(s, flags) \
3831-
local_lock_irqsave(&(s)->cpu_slab->lock, flags)
3832-
#else
3833-
#define local_lock_cpu_slab(s, flags) \
3834-
do { \
3835-
bool __l = local_trylock_irqsave(&(s)->cpu_slab->lock, flags); \
3836-
lockdep_assert(__l); \
3837-
} while (0)
3838-
#endif
3839-
3840-
#define local_unlock_cpu_slab(s, flags) \
3841-
local_unlock_irqrestore(&(s)->cpu_slab->lock, flags)
3842-
38433785
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
38443786
{
38453787
unsigned long flags;
@@ -4405,20 +4347,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
44054347
return object;
44064348
}
44074349

4408-
/*
4409-
* We disallow kprobes in ___slab_alloc() to prevent reentrance
4410-
*
4411-
* kmalloc() -> ___slab_alloc() -> local_lock_cpu_slab() protected part of
4412-
* ___slab_alloc() manipulating c->freelist -> kprobe -> bpf ->
4413-
* kmalloc_nolock() or kfree_nolock() -> __update_cpu_freelist_fast()
4414-
* manipulating c->freelist without lock.
4415-
*
4416-
* This does not prevent kprobe in functions called from ___slab_alloc() such as
4417-
* local_lock_irqsave() itself, and that is fine, we only need to protect the
4418-
* c->freelist manipulation in ___slab_alloc() itself.
4419-
*/
4420-
NOKPROBE_SYMBOL(___slab_alloc);
4421-
44224350
static __always_inline void *__slab_alloc_node(struct kmem_cache *s,
44234351
gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
44244352
{
@@ -5259,13 +5187,13 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
52595187
if (unlikely(!size))
52605188
return ZERO_SIZE_PTR;
52615189

5262-
if (IS_ENABLED(CONFIG_PREEMPT_RT) && !preemptible())
5263-
/*
5264-
* kmalloc_nolock() in PREEMPT_RT is not supported from
5265-
* non-preemptible context because local_lock becomes a
5266-
* sleeping lock on RT.
5267-
*/
5190+
/*
5191+
* See the comment for the same check in
5192+
* alloc_frozen_pages_nolock_noprof()
5193+
*/
5194+
if (IS_ENABLED(CONFIG_PREEMPT_RT) && (in_nmi() || in_hardirq()))
52685195
return NULL;
5196+
52695197
retry:
52705198
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
52715199
return NULL;
@@ -5274,10 +5202,11 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
52745202
if (!(s->flags & __CMPXCHG_DOUBLE) && !kmem_cache_debug(s))
52755203
/*
52765204
* kmalloc_nolock() is not supported on architectures that
5277-
* don't implement cmpxchg16b, but debug caches don't use
5278-
* per-cpu slab and per-cpu partial slabs. They rely on
5279-
* kmem_cache_node->list_lock, so kmalloc_nolock() can
5280-
* attempt to allocate from debug caches by
5205+
* don't implement cmpxchg16b and thus need slab_lock()
5206+
* which could be preempted by a nmi.
5207+
* But debug caches don't use that and only rely on
5208+
* kmem_cache_node->list_lock, so kmalloc_nolock() can attempt
5209+
* to allocate from debug caches by
52815210
* spin_trylock_irqsave(&n->list_lock, ...)
52825211
*/
52835212
return NULL;
@@ -5286,42 +5215,31 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
52865215
if (ret)
52875216
goto success;
52885217

5289-
ret = ERR_PTR(-EBUSY);
5290-
52915218
/*
52925219
* Do not call slab_alloc_node(), since trylock mode isn't
52935220
* compatible with slab_pre_alloc_hook/should_failslab and
52945221
* kfence_alloc. Hence call __slab_alloc_node() (at most twice)
52955222
* and slab_post_alloc_hook() directly.
5296-
*
5297-
* In !PREEMPT_RT ___slab_alloc() manipulates (freelist,tid) pair
5298-
* in irq saved region. It assumes that the same cpu will not
5299-
* __update_cpu_freelist_fast() into the same (freelist,tid) pair.
5300-
* Therefore use in_nmi() to check whether particular bucket is in
5301-
* irq protected section.
5302-
*
5303-
* If in_nmi() && local_lock_is_locked(s->cpu_slab) then it means that
5304-
* this cpu was interrupted somewhere inside ___slab_alloc() after
5305-
* it did local_lock_irqsave(&s->cpu_slab->lock, flags).
5306-
* In this case fast path with __update_cpu_freelist_fast() is not safe.
53075223
*/
5308-
if (!in_nmi() || !local_lock_is_locked(&s->cpu_slab->lock))
5309-
ret = __slab_alloc_node(s, alloc_gfp, node, _RET_IP_, size);
5224+
ret = __slab_alloc_node(s, alloc_gfp, node, _RET_IP_, size);
53105225

5311-
if (PTR_ERR(ret) == -EBUSY) {
5312-
if (can_retry) {
5313-
/* pick the next kmalloc bucket */
5314-
size = s->object_size + 1;
5315-
/*
5316-
* Another alternative is to
5317-
* if (memcg) alloc_gfp &= ~__GFP_ACCOUNT;
5318-
* else if (!memcg) alloc_gfp |= __GFP_ACCOUNT;
5319-
* to retry from bucket of the same size.
5320-
*/
5321-
can_retry = false;
5322-
goto retry;
5323-
}
5324-
ret = NULL;
5226+
/*
5227+
* It's possible we failed due to trylock as we preempted someone with
5228+
* the sheaves locked, and the list_lock is also held by another cpu.
5229+
* But it should be rare that multiple kmalloc buckets would have
5230+
* sheaves locked, so try a larger one.
5231+
*/
5232+
if (!ret && can_retry) {
5233+
/* pick the next kmalloc bucket */
5234+
size = s->object_size + 1;
5235+
/*
5236+
* Another alternative is to
5237+
* if (memcg) alloc_gfp &= ~__GFP_ACCOUNT;
5238+
* else if (!memcg) alloc_gfp |= __GFP_ACCOUNT;
5239+
* to retry from bucket of the same size.
5240+
*/
5241+
can_retry = false;
5242+
goto retry;
53255243
}
53265244

53275245
success:
@@ -7374,10 +7292,6 @@ void __kmem_cache_release(struct kmem_cache *s)
73747292
{
73757293
cache_random_seq_destroy(s);
73767294
pcs_destroy(s);
7377-
#ifdef CONFIG_PREEMPT_RT
7378-
if (s->cpu_slab)
7379-
lockdep_unregister_key(&s->lock_key);
7380-
#endif
73817295
free_percpu(s->cpu_slab);
73827296
free_kmem_cache_nodes(s);
73837297
}

0 commit comments

Comments
 (0)