@@ -3690,29 +3690,12 @@ static inline unsigned int init_tid(int cpu)
36903690
36913691static void init_kmem_cache_cpus (struct kmem_cache * s )
36923692{
3693- #ifdef CONFIG_PREEMPT_RT
3694- /*
3695- * Register lockdep key for non-boot kmem caches to avoid
3696- * WARN_ON_ONCE(static_obj(key))) in lockdep_register_key()
3697- */
3698- bool finegrain_lockdep = !init_section_contains (s , 1 );
3699- #else
3700- /*
3701- * Don't bother with different lockdep classes for each
3702- * kmem_cache, since we only use local_trylock_irqsave().
3703- */
3704- bool finegrain_lockdep = false;
3705- #endif
37063693 int cpu ;
37073694 struct kmem_cache_cpu * c ;
37083695
3709- if (finegrain_lockdep )
3710- lockdep_register_key (& s -> lock_key );
37113696 for_each_possible_cpu (cpu ) {
37123697 c = per_cpu_ptr (s -> cpu_slab , cpu );
37133698 local_trylock_init (& c -> lock );
3714- if (finegrain_lockdep )
3715- lockdep_set_class (& c -> lock , & s -> lock_key );
37163699 c -> tid = init_tid (cpu );
37173700 }
37183701}
@@ -3799,47 +3782,6 @@ static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
37993782 }
38003783}
38013784
3802- /*
3803- * ___slab_alloc()'s caller is supposed to check if kmem_cache::kmem_cache_cpu::lock
3804- * can be acquired without a deadlock before invoking the function.
3805- *
3806- * Without LOCKDEP we trust the code to be correct. kmalloc_nolock() is
3807- * using local_lock_is_locked() properly before calling local_lock_cpu_slab(),
3808- * and kmalloc() is not used in an unsupported context.
3809- *
3810- * With LOCKDEP, on PREEMPT_RT lockdep does its checking in local_lock_irqsave().
3811- * On !PREEMPT_RT we use trylock to avoid false positives in NMI, but
3812- * lockdep_assert() will catch a bug in case:
3813- * #1
3814- * kmalloc() -> ___slab_alloc() -> irqsave -> NMI -> bpf -> kmalloc_nolock()
3815- * or
3816- * #2
3817- * kmalloc() -> ___slab_alloc() -> irqsave -> tracepoint/kprobe -> bpf -> kmalloc_nolock()
3818- *
3819- * On PREEMPT_RT an invocation is not possible from IRQ-off or preempt
3820- * disabled context. The lock will always be acquired and if needed it
3821- * block and sleep until the lock is available.
3822- * #1 is possible in !PREEMPT_RT only.
3823- * #2 is possible in both with a twist that irqsave is replaced with rt_spinlock:
3824- * kmalloc() -> ___slab_alloc() -> rt_spin_lock(kmem_cache_A) ->
3825- * tracepoint/kprobe -> bpf -> kmalloc_nolock() -> rt_spin_lock(kmem_cache_B)
3826- *
3827- * local_lock_is_locked() prevents the case kmem_cache_A == kmem_cache_B
3828- */
3829- #if defined(CONFIG_PREEMPT_RT ) || !defined(CONFIG_LOCKDEP )
3830- #define local_lock_cpu_slab (s , flags ) \
3831- local_lock_irqsave(&(s)->cpu_slab->lock, flags)
3832- #else
3833- #define local_lock_cpu_slab (s , flags ) \
3834- do { \
3835- bool __l = local_trylock_irqsave(&(s)->cpu_slab->lock, flags); \
3836- lockdep_assert(__l); \
3837- } while (0)
3838- #endif
3839-
3840- #define local_unlock_cpu_slab (s , flags ) \
3841- local_unlock_irqrestore(&(s)->cpu_slab->lock, flags)
3842-
38433785static inline void flush_slab (struct kmem_cache * s , struct kmem_cache_cpu * c )
38443786{
38453787 unsigned long flags ;
@@ -4405,20 +4347,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
44054347 return object ;
44064348}
44074349
4408- /*
4409- * We disallow kprobes in ___slab_alloc() to prevent reentrance
4410- *
4411- * kmalloc() -> ___slab_alloc() -> local_lock_cpu_slab() protected part of
4412- * ___slab_alloc() manipulating c->freelist -> kprobe -> bpf ->
4413- * kmalloc_nolock() or kfree_nolock() -> __update_cpu_freelist_fast()
4414- * manipulating c->freelist without lock.
4415- *
4416- * This does not prevent kprobe in functions called from ___slab_alloc() such as
4417- * local_lock_irqsave() itself, and that is fine, we only need to protect the
4418- * c->freelist manipulation in ___slab_alloc() itself.
4419- */
4420- NOKPROBE_SYMBOL (___slab_alloc );
4421-
44224350static __always_inline void * __slab_alloc_node (struct kmem_cache * s ,
44234351 gfp_t gfpflags , int node , unsigned long addr , size_t orig_size )
44244352{
@@ -5259,13 +5187,13 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
52595187 if (unlikely (!size ))
52605188 return ZERO_SIZE_PTR ;
52615189
5262- if (IS_ENABLED (CONFIG_PREEMPT_RT ) && !preemptible ())
5263- /*
5264- * kmalloc_nolock() in PREEMPT_RT is not supported from
5265- * non-preemptible context because local_lock becomes a
5266- * sleeping lock on RT.
5267- */
5190+ /*
5191+ * See the comment for the same check in
5192+ * alloc_frozen_pages_nolock_noprof()
5193+ */
5194+ if (IS_ENABLED (CONFIG_PREEMPT_RT ) && (in_nmi () || in_hardirq ()))
52685195 return NULL ;
5196+
52695197retry :
52705198 if (unlikely (size > KMALLOC_MAX_CACHE_SIZE ))
52715199 return NULL ;
@@ -5274,10 +5202,11 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
52745202 if (!(s -> flags & __CMPXCHG_DOUBLE ) && !kmem_cache_debug (s ))
52755203 /*
52765204 * kmalloc_nolock() is not supported on architectures that
5277- * don't implement cmpxchg16b, but debug caches don't use
5278- * per-cpu slab and per-cpu partial slabs. They rely on
5279- * kmem_cache_node->list_lock, so kmalloc_nolock() can
5280- * attempt to allocate from debug caches by
5205+ * don't implement cmpxchg16b and thus need slab_lock()
5206+ * which could be preempted by a nmi.
5207+ * But debug caches don't use that and only rely on
5208+ * kmem_cache_node->list_lock, so kmalloc_nolock() can attempt
5209+ * to allocate from debug caches by
52815210 * spin_trylock_irqsave(&n->list_lock, ...)
52825211 */
52835212 return NULL ;
@@ -5286,42 +5215,31 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
52865215 if (ret )
52875216 goto success ;
52885217
5289- ret = ERR_PTR (- EBUSY );
5290-
52915218 /*
52925219 * Do not call slab_alloc_node(), since trylock mode isn't
52935220 * compatible with slab_pre_alloc_hook/should_failslab and
52945221 * kfence_alloc. Hence call __slab_alloc_node() (at most twice)
52955222 * and slab_post_alloc_hook() directly.
5296- *
5297- * In !PREEMPT_RT ___slab_alloc() manipulates (freelist,tid) pair
5298- * in irq saved region. It assumes that the same cpu will not
5299- * __update_cpu_freelist_fast() into the same (freelist,tid) pair.
5300- * Therefore use in_nmi() to check whether particular bucket is in
5301- * irq protected section.
5302- *
5303- * If in_nmi() && local_lock_is_locked(s->cpu_slab) then it means that
5304- * this cpu was interrupted somewhere inside ___slab_alloc() after
5305- * it did local_lock_irqsave(&s->cpu_slab->lock, flags).
5306- * In this case fast path with __update_cpu_freelist_fast() is not safe.
53075223 */
5308- if (!in_nmi () || !local_lock_is_locked (& s -> cpu_slab -> lock ))
5309- ret = __slab_alloc_node (s , alloc_gfp , node , _RET_IP_ , size );
5224+ ret = __slab_alloc_node (s , alloc_gfp , node , _RET_IP_ , size );
53105225
5311- if (PTR_ERR (ret ) == - EBUSY ) {
5312- if (can_retry ) {
5313- /* pick the next kmalloc bucket */
5314- size = s -> object_size + 1 ;
5315- /*
5316- * Another alternative is to
5317- * if (memcg) alloc_gfp &= ~__GFP_ACCOUNT;
5318- * else if (!memcg) alloc_gfp |= __GFP_ACCOUNT;
5319- * to retry from bucket of the same size.
5320- */
5321- can_retry = false;
5322- goto retry ;
5323- }
5324- ret = NULL ;
5226+ /*
5227+ * It's possible we failed due to trylock as we preempted someone with
5228+ * the sheaves locked, and the list_lock is also held by another cpu.
5229+ * But it should be rare that multiple kmalloc buckets would have
5230+ * sheaves locked, so try a larger one.
5231+ */
5232+ if (!ret && can_retry ) {
5233+ /* pick the next kmalloc bucket */
5234+ size = s -> object_size + 1 ;
5235+ /*
5236+ * Another alternative is to
5237+ * if (memcg) alloc_gfp &= ~__GFP_ACCOUNT;
5238+ * else if (!memcg) alloc_gfp |= __GFP_ACCOUNT;
5239+ * to retry from bucket of the same size.
5240+ */
5241+ can_retry = false;
5242+ goto retry ;
53255243 }
53265244
53275245success :
@@ -7374,10 +7292,6 @@ void __kmem_cache_release(struct kmem_cache *s)
73747292{
73757293 cache_random_seq_destroy (s );
73767294 pcs_destroy (s );
7377- #ifdef CONFIG_PREEMPT_RT
7378- if (s -> cpu_slab )
7379- lockdep_unregister_key (& s -> lock_key );
7380- #endif
73817295 free_percpu (s -> cpu_slab );
73827296 free_kmem_cache_nodes (s );
73837297}
0 commit comments