Skip to content

Commit 815c8e3

Browse files
committed
Merge branch 'slab/for-7.0/sheaves' into slab/for-next
Merge series "slab: replace cpu (partial) slabs with sheaves". The percpu sheaves caching layer was introduced as opt-in but the goal was to eventually move all caches to them. This is the next step, enabling sheaves for all caches (except the two bootstrap ones) and then removing the per cpu (partial) slabs and lots of associated code. Besides the lower locking overhead and much more likely fastpath when freeing, this removes the rather complicated code related to the cpu slab lockless fastpaths (using this_cpu_try_cmpxchg128/64) and all its complications for PREEMPT_RT or kmalloc_nolock(). The lockless slab freelist+counters update operation using try_cmpxchg128/64 remains and is crucial for freeing remote NUMA objects and to allow flushing objects from sheaves to slabs mostly without the node list_lock. Link: https://lore.kernel.org/all/20260123-sheaves-for-all-v4-0-041323d506f7@suse.cz/
2 parents 98e99fc + 40fd0ac commit 815c8e3

7 files changed

Lines changed: 982 additions & 1771 deletions

File tree

include/linux/slab.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,7 @@ enum _slab_flag_bits {
5757
#endif
5858
_SLAB_OBJECT_POISON,
5959
_SLAB_CMPXCHG_DOUBLE,
60-
#ifdef CONFIG_SLAB_OBJ_EXT
6160
_SLAB_NO_OBJ_EXT,
62-
#endif
6361
#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
6462
_SLAB_OBJ_EXT_IN_OBJ,
6563
#endif
@@ -241,11 +239,7 @@ enum _slab_flag_bits {
241239
#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
242240

243241
/* Slab created using create_boot_cache */
244-
#ifdef CONFIG_SLAB_OBJ_EXT
245242
#define SLAB_NO_OBJ_EXT __SLAB_FLAG_BIT(_SLAB_NO_OBJ_EXT)
246-
#else
247-
#define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED
248-
#endif
249243

250244
#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
251245
#define SLAB_OBJ_EXT_IN_OBJ __SLAB_FLAG_BIT(_SLAB_OBJ_EXT_IN_OBJ)

mm/Kconfig

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -247,17 +247,6 @@ config SLUB_STATS
247247
out which slabs are relevant to a particular load.
248248
Try running: slabinfo -DA
249249

250-
config SLUB_CPU_PARTIAL
251-
default y
252-
depends on SMP && !SLUB_TINY
253-
bool "Enable per cpu partial caches"
254-
help
255-
Per cpu partial caches accelerate objects allocation and freeing
256-
that is local to a processor at the price of more indeterminism
257-
in the latency of the free. On overflow these caches will be cleared
258-
which requires the taking of locks that may cause latency spikes.
259-
Typically one would choose no for a realtime system.
260-
261250
config RANDOM_KMALLOC_CACHES
262251
default n
263252
depends on !SLUB_TINY

mm/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,7 @@ static inline struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned int ord
846846
struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned int order);
847847
#define alloc_frozen_pages_nolock(...) \
848848
alloc_hooks(alloc_frozen_pages_nolock_noprof(__VA_ARGS__))
849+
void free_frozen_pages_nolock(struct page *page, unsigned int order);
849850

850851
extern void zone_pcp_reset(struct zone *zone);
851852
extern void zone_pcp_disable(struct zone *zone);

mm/page_alloc.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2981,6 +2981,11 @@ void free_frozen_pages(struct page *page, unsigned int order)
29812981
__free_frozen_pages(page, order, FPI_NONE);
29822982
}
29832983

2984+
void free_frozen_pages_nolock(struct page *page, unsigned int order)
2985+
{
2986+
__free_frozen_pages(page, order, FPI_TRYLOCK);
2987+
}
2988+
29842989
/*
29852990
* Free a batch of folios
29862991
*/

mm/slab.h

Lines changed: 17 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,12 @@
2121
# define system_has_freelist_aba() system_has_cmpxchg128()
2222
# define try_cmpxchg_freelist try_cmpxchg128
2323
# endif
24-
#define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg128
2524
typedef u128 freelist_full_t;
2625
#else /* CONFIG_64BIT */
2726
# ifdef system_has_cmpxchg64
2827
# define system_has_freelist_aba() system_has_cmpxchg64()
2928
# define try_cmpxchg_freelist try_cmpxchg64
3029
# endif
31-
#define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg64
3230
typedef u64 freelist_full_t;
3331
#endif /* CONFIG_64BIT */
3432

@@ -79,19 +77,7 @@ struct slab {
7977
struct kmem_cache *slab_cache;
8078
union {
8179
struct {
82-
union {
83-
struct list_head slab_list;
84-
struct { /* For deferred deactivate_slab() */
85-
struct llist_node llnode;
86-
void *flush_freelist;
87-
};
88-
#ifdef CONFIG_SLUB_CPU_PARTIAL
89-
struct {
90-
struct slab *next;
91-
int slabs; /* Nr of slabs left */
92-
};
93-
#endif
94-
};
80+
struct list_head slab_list;
9581
/* Double-word boundary */
9682
struct freelist_counters;
9783
};
@@ -196,23 +182,6 @@ static inline size_t slab_size(const struct slab *slab)
196182
return PAGE_SIZE << slab_order(slab);
197183
}
198184

199-
#ifdef CONFIG_SLUB_CPU_PARTIAL
200-
#define slub_percpu_partial(c) ((c)->partial)
201-
202-
#define slub_set_percpu_partial(c, p) \
203-
({ \
204-
slub_percpu_partial(c) = (p)->next; \
205-
})
206-
207-
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
208-
#else
209-
#define slub_percpu_partial(c) NULL
210-
211-
#define slub_set_percpu_partial(c, p)
212-
213-
#define slub_percpu_partial_read_once(c) NULL
214-
#endif // CONFIG_SLUB_CPU_PARTIAL
215-
216185
/*
217186
* Word size structure that can be atomically updated or read and that
218187
* contains both the order and the number of objects that a slab of the
@@ -226,8 +195,6 @@ struct kmem_cache_order_objects {
226195
* Slab cache management.
227196
*/
228197
struct kmem_cache {
229-
struct kmem_cache_cpu __percpu *cpu_slab;
230-
struct lock_class_key lock_key;
231198
struct slub_percpu_sheaves __percpu *cpu_sheaves;
232199
/* Used for retrieving partial slabs, etc. */
233200
slab_flags_t flags;
@@ -236,12 +203,6 @@ struct kmem_cache {
236203
unsigned int object_size; /* Object size without metadata */
237204
struct reciprocal_value reciprocal_size;
238205
unsigned int offset; /* Free pointer offset */
239-
#ifdef CONFIG_SLUB_CPU_PARTIAL
240-
/* Number of per cpu partial objects to keep around */
241-
unsigned int cpu_partial;
242-
/* Number of per cpu partial slabs to keep around */
243-
unsigned int cpu_partial_slabs;
244-
#endif
245206
unsigned int sheaf_capacity;
246207
struct kmem_cache_order_objects oo;
247208

@@ -282,9 +243,25 @@ struct kmem_cache {
282243
unsigned int usersize; /* Usercopy region size */
283244
#endif
284245

246+
#ifdef CONFIG_SLUB_STATS
247+
struct kmem_cache_stats __percpu *cpu_stats;
248+
#endif
249+
285250
struct kmem_cache_node *node[MAX_NUMNODES];
286251
};
287252

253+
/*
254+
* Every cache has !NULL s->cpu_sheaves but they may point to the
255+
* bootstrap_sheaf temporarily during init, or permanently for the boot caches
256+
* and caches with debugging enabled, or all caches with CONFIG_SLUB_TINY. This
257+
* helper distinguishes whether cache has real non-bootstrap sheaves.
258+
*/
259+
static inline bool cache_has_sheaves(struct kmem_cache *s)
260+
{
261+
/* Test CONFIG_SLUB_TINY for code elimination purposes */
262+
return !IS_ENABLED(CONFIG_SLUB_TINY) && s->sheaf_capacity;
263+
}
264+
288265
#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
289266
#define SLAB_SUPPORTS_SYSFS 1
290267
void sysfs_slab_unlink(struct kmem_cache *s);

mm/slab_common.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1604,11 +1604,8 @@ static bool kfree_rcu_sheaf(void *obj)
16041604
return false;
16051605

16061606
s = slab->slab_cache;
1607-
if (s->cpu_sheaves) {
1608-
if (likely(!IS_ENABLED(CONFIG_NUMA) ||
1609-
slab_nid(slab) == numa_mem_id()))
1610-
return __kfree_rcu_sheaf(s, obj);
1611-
}
1607+
if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()))
1608+
return __kfree_rcu_sheaf(s, obj);
16121609

16131610
return false;
16141611
}
@@ -2112,7 +2109,7 @@ EXPORT_SYMBOL_GPL(kvfree_rcu_barrier);
21122109
*/
21132110
void kvfree_rcu_barrier_on_cache(struct kmem_cache *s)
21142111
{
2115-
if (s->cpu_sheaves) {
2112+
if (cache_has_sheaves(s)) {
21162113
flush_rcu_sheaves_on_cache(s);
21172114
rcu_barrier();
21182115
}

0 commit comments

Comments
 (0)