Skip to content

Commit 1ce20c2

Browse files
committed
slab: handle pfmemalloc slabs properly with sheaves
When a pfmemalloc allocation actually dips into reserves, the slab is marked accordingly and non-pfmemalloc allocations should not be allowed to allocate from it. The sheaves percpu caching currently doesn't follow this rule, so implement it before we expand sheaves usage to all caches. Make sure objects from pfmemalloc slabs don't end up in percpu sheaves. When freeing, skip sheaves when freeing an object from pfmemalloc slab. When refilling sheaves, use __GFP_NOMEMALLOC to override any pfmemalloc context - the allocation will fallback to regular slab allocations when sheaves are depleted and can't be refilled because of the override. For kfree_rcu(), detect pfmemalloc slabs after processing the rcu_sheaf after the grace period in __rcu_free_sheaf_prepare() and simply flush it if any object is from pfmemalloc slabs. For prefilled sheaves, try to refill them first with __GFP_NOMEMALLOC and if it fails, retry without __GFP_NOMEMALLOC but then mark the sheaf pfmemalloc, which makes it flushed back to slabs when returned. Link: https://patch.msgid.link/20251105-sheaves-cleanups-v1-3-b8218e1ac7ef@suse.cz Reviewed-by: Harry Yoo <harry.yoo@oracle.com> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
1 parent ea6b5e5 commit 1ce20c2

1 file changed

Lines changed: 55 additions & 14 deletions

File tree

mm/slub.c

Lines changed: 55 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,10 @@ struct slab_sheaf {
469469
struct rcu_head rcu_head;
470470
struct list_head barn_list;
471471
/* only used for prefilled sheafs */
472-
unsigned int capacity;
472+
struct {
473+
unsigned int capacity;
474+
bool pfmemalloc;
475+
};
473476
};
474477
struct kmem_cache *cache;
475478
unsigned int size;
@@ -2651,7 +2654,7 @@ static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp)
26512654
if (!sheaf)
26522655
return NULL;
26532656

2654-
if (refill_sheaf(s, sheaf, gfp)) {
2657+
if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC)) {
26552658
free_empty_sheaf(s, sheaf);
26562659
return NULL;
26572660
}
@@ -2729,12 +2732,13 @@ static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf)
27292732
sheaf->size = 0;
27302733
}
27312734

2732-
static void __rcu_free_sheaf_prepare(struct kmem_cache *s,
2735+
static bool __rcu_free_sheaf_prepare(struct kmem_cache *s,
27332736
struct slab_sheaf *sheaf)
27342737
{
27352738
bool init = slab_want_init_on_free(s);
27362739
void **p = &sheaf->objects[0];
27372740
unsigned int i = 0;
2741+
bool pfmemalloc = false;
27382742

27392743
while (i < sheaf->size) {
27402744
struct slab *slab = virt_to_slab(p[i]);
@@ -2747,8 +2751,13 @@ static void __rcu_free_sheaf_prepare(struct kmem_cache *s,
27472751
continue;
27482752
}
27492753

2754+
if (slab_test_pfmemalloc(slab))
2755+
pfmemalloc = true;
2756+
27502757
i++;
27512758
}
2759+
2760+
return pfmemalloc;
27522761
}
27532762

27542763
static void rcu_free_sheaf_nobarn(struct rcu_head *head)
@@ -5041,7 +5050,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
50415050
return NULL;
50425051

50435052
if (empty) {
5044-
if (!refill_sheaf(s, empty, gfp)) {
5053+
if (!refill_sheaf(s, empty, gfp | __GFP_NOMEMALLOC)) {
50455054
full = empty;
50465055
} else {
50475056
/*
@@ -5341,6 +5350,26 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t gfpflags, int nod
53415350
}
53425351
EXPORT_SYMBOL(kmem_cache_alloc_node_noprof);
53435352

5353+
static int __prefill_sheaf_pfmemalloc(struct kmem_cache *s,
5354+
struct slab_sheaf *sheaf, gfp_t gfp)
5355+
{
5356+
int ret = 0;
5357+
5358+
ret = refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC);
5359+
5360+
if (likely(!ret || !gfp_pfmemalloc_allowed(gfp)))
5361+
return ret;
5362+
5363+
/*
5364+
* if we are allowed to, refill sheaf with pfmemalloc but then remember
5365+
* it for when it's returned
5366+
*/
5367+
ret = refill_sheaf(s, sheaf, gfp);
5368+
sheaf->pfmemalloc = true;
5369+
5370+
return ret;
5371+
}
5372+
53445373
/*
53455374
* returns a sheaf that has at least the requested size
53465375
* when prefilling is needed, do so with given gfp flags
@@ -5375,6 +5404,10 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
53755404
sheaf->cache = s;
53765405
sheaf->capacity = size;
53775406

5407+
/*
5408+
* we do not need to care about pfmemalloc here because oversize
5409+
* sheaves area always flushed and freed when returned
5410+
*/
53785411
if (!__kmem_cache_alloc_bulk(s, gfp, size,
53795412
&sheaf->objects[0])) {
53805413
kfree(sheaf);
@@ -5411,17 +5444,18 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
54115444
if (!sheaf)
54125445
sheaf = alloc_empty_sheaf(s, gfp);
54135446

5414-
if (sheaf && sheaf->size < size) {
5415-
if (refill_sheaf(s, sheaf, gfp)) {
5447+
if (sheaf) {
5448+
sheaf->capacity = s->sheaf_capacity;
5449+
sheaf->pfmemalloc = false;
5450+
5451+
if (sheaf->size < size &&
5452+
__prefill_sheaf_pfmemalloc(s, sheaf, gfp)) {
54165453
sheaf_flush_unused(s, sheaf);
54175454
free_empty_sheaf(s, sheaf);
54185455
sheaf = NULL;
54195456
}
54205457
}
54215458

5422-
if (sheaf)
5423-
sheaf->capacity = s->sheaf_capacity;
5424-
54255459
return sheaf;
54265460
}
54275461

@@ -5441,7 +5475,8 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
54415475
struct slub_percpu_sheaves *pcs;
54425476
struct node_barn *barn;
54435477

5444-
if (unlikely(sheaf->capacity != s->sheaf_capacity)) {
5478+
if (unlikely((sheaf->capacity != s->sheaf_capacity)
5479+
|| sheaf->pfmemalloc)) {
54455480
sheaf_flush_unused(s, sheaf);
54465481
kfree(sheaf);
54475482
return;
@@ -5507,7 +5542,7 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
55075542

55085543
if (likely(sheaf->capacity >= size)) {
55095544
if (likely(sheaf->capacity == s->sheaf_capacity))
5510-
return refill_sheaf(s, sheaf, gfp);
5545+
return __prefill_sheaf_pfmemalloc(s, sheaf, gfp);
55115546

55125547
if (!__kmem_cache_alloc_bulk(s, gfp, sheaf->capacity - sheaf->size,
55135548
&sheaf->objects[sheaf->size])) {
@@ -6215,8 +6250,12 @@ static void rcu_free_sheaf(struct rcu_head *head)
62156250
* handles it fine. The only downside is that sheaf will serve fewer
62166251
* allocations when reused. It only happens due to debugging, which is a
62176252
* performance hit anyway.
6253+
*
6254+
* If it returns true, there was at least one object from pfmemalloc
6255+
* slab so simply flush everything.
62186256
*/
6219-
__rcu_free_sheaf_prepare(s, sheaf);
6257+
if (__rcu_free_sheaf_prepare(s, sheaf))
6258+
goto flush;
62206259

62216260
n = get_node(s, sheaf->node);
62226261
if (!n)
@@ -6371,7 +6410,8 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
63716410
continue;
63726411
}
63736412

6374-
if (unlikely(IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node)) {
6413+
if (unlikely((IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node)
6414+
|| slab_test_pfmemalloc(slab))) {
63756415
remote_objects[remote_nr] = p[i];
63766416
p[i] = p[--size];
63776417
if (++remote_nr >= PCS_BATCH_MAX)
@@ -6669,7 +6709,8 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
66696709
return;
66706710

66716711
if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) ||
6672-
slab_nid(slab) == numa_mem_id())) {
6712+
slab_nid(slab) == numa_mem_id())
6713+
&& likely(!slab_test_pfmemalloc(slab))) {
66736714
if (likely(free_to_pcs(s, object)))
66746715
return;
66756716
}

0 commit comments

Comments
 (0)