Skip to content

Commit 46dea17

Browse files
committed
slab: refill sheaves from all nodes
__refill_objects() currently only attempts to get partial slabs from the local node and then allocates new slab(s). Expand it to trying also other nodes while observing the remote node defrag ratio, similarly to get_any_partial(). This will prevent allocating new slabs on a node while other nodes have many free slabs. It does mean sheaves will contain non-local objects in that case. Allocations that care about specific node will still be served appropriately, but might get a slowpath allocation. Like get_any_partial() we do observe cpuset_zone_allowed(), although we might be refilling a sheaf that will be then used from a different allocation context. We can also use the resulting refill_objects() in __kmem_cache_alloc_bulk() for non-debug caches. This means kmem_cache_alloc_bulk() will get better performance when sheaves are exhausted. kmem_cache_alloc_bulk() cannot indicate a preferred node so it's compatible with sheaves refill in preferring the local node. Its users also have gfp flags that allow spinning, so document that as a requirement. Reviewed-by: Suren Baghdasaryan <surenb@google.com> Reviewed-by: Hao Li <hao.li@linux.dev> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
1 parent 6c2f307 commit 46dea17

1 file changed

Lines changed: 106 additions & 31 deletions

File tree

mm/slub.c

Lines changed: 106 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2512,8 +2512,8 @@ static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf)
25122512
}
25132513

25142514
static unsigned int
2515-
__refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
2516-
unsigned int max);
2515+
refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
2516+
unsigned int max);
25172517

25182518
static int refill_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf,
25192519
gfp_t gfp)
@@ -2524,8 +2524,8 @@ static int refill_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf,
25242524
if (!to_fill)
25252525
return 0;
25262526

2527-
filled = __refill_objects(s, &sheaf->objects[sheaf->size], gfp,
2528-
to_fill, to_fill);
2527+
filled = refill_objects(s, &sheaf->objects[sheaf->size], gfp, to_fill,
2528+
to_fill);
25292529

25302530
sheaf->size += filled;
25312531

@@ -6563,29 +6563,22 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
65636563
EXPORT_SYMBOL(kmem_cache_free_bulk);
65646564

65656565
static unsigned int
6566-
__refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
6567-
unsigned int max)
6566+
__refill_objects_node(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
6567+
unsigned int max, struct kmem_cache_node *n)
65686568
{
65696569
struct partial_bulk_context pc;
65706570
struct slab *slab, *slab2;
65716571
unsigned int refilled = 0;
65726572
unsigned long flags;
65736573
void *object;
6574-
int node;
65756574

65766575
pc.flags = gfp;
65776576
pc.min_objects = min;
65786577
pc.max_objects = max;
65796578

6580-
node = numa_mem_id();
6581-
6582-
if (WARN_ON_ONCE(!gfpflags_allow_spinning(gfp)))
6579+
if (!get_partial_node_bulk(s, n, &pc))
65836580
return 0;
65846581

6585-
/* TODO: consider also other nodes? */
6586-
if (!get_partial_node_bulk(s, get_node(s, node), &pc))
6587-
goto new_slab;
6588-
65896582
list_for_each_entry_safe(slab, slab2, &pc.slabs, slab_list) {
65906583

65916584
list_del(&slab->slab_list);
@@ -6623,8 +6616,6 @@ __refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
66236616
}
66246617

66256618
if (unlikely(!list_empty(&pc.slabs))) {
6626-
struct kmem_cache_node *n = get_node(s, node);
6627-
66286619
spin_lock_irqsave(&n->list_lock, flags);
66296620

66306621
list_for_each_entry_safe(slab, slab2, &pc.slabs, slab_list) {
@@ -6646,13 +6637,92 @@ __refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
66466637
}
66476638
}
66486639

6640+
return refilled;
6641+
}
66496642

6650-
if (likely(refilled >= min))
6651-
goto out;
6643+
#ifdef CONFIG_NUMA
6644+
static unsigned int
6645+
__refill_objects_any(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
6646+
unsigned int max)
6647+
{
6648+
struct zonelist *zonelist;
6649+
struct zoneref *z;
6650+
struct zone *zone;
6651+
enum zone_type highest_zoneidx = gfp_zone(gfp);
6652+
unsigned int cpuset_mems_cookie;
6653+
unsigned int refilled = 0;
6654+
6655+
/* see get_from_any_partial() for the defrag ratio description */
6656+
if (!s->remote_node_defrag_ratio ||
6657+
get_cycles() % 1024 > s->remote_node_defrag_ratio)
6658+
return 0;
6659+
6660+
do {
6661+
cpuset_mems_cookie = read_mems_allowed_begin();
6662+
zonelist = node_zonelist(mempolicy_slab_node(), gfp);
6663+
for_each_zone_zonelist(zone, z, zonelist, highest_zoneidx) {
6664+
struct kmem_cache_node *n;
6665+
unsigned int r;
6666+
6667+
n = get_node(s, zone_to_nid(zone));
6668+
6669+
if (!n || !cpuset_zone_allowed(zone, gfp) ||
6670+
n->nr_partial <= s->min_partial)
6671+
continue;
6672+
6673+
r = __refill_objects_node(s, p, gfp, min, max, n);
6674+
refilled += r;
6675+
6676+
if (r >= min) {
6677+
/*
6678+
* Don't check read_mems_allowed_retry() here -
6679+
* if mems_allowed was updated in parallel, that
6680+
* was a harmless race between allocation and
6681+
* the cpuset update
6682+
*/
6683+
return refilled;
6684+
}
6685+
p += r;
6686+
min -= r;
6687+
max -= r;
6688+
}
6689+
} while (read_mems_allowed_retry(cpuset_mems_cookie));
6690+
6691+
return refilled;
6692+
}
6693+
#else
6694+
static inline unsigned int
6695+
__refill_objects_any(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
6696+
unsigned int max)
6697+
{
6698+
return 0;
6699+
}
6700+
#endif
6701+
6702+
static unsigned int
6703+
refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
6704+
unsigned int max)
6705+
{
6706+
int local_node = numa_mem_id();
6707+
unsigned int refilled;
6708+
struct slab *slab;
6709+
6710+
if (WARN_ON_ONCE(!gfpflags_allow_spinning(gfp)))
6711+
return 0;
6712+
6713+
refilled = __refill_objects_node(s, p, gfp, min, max,
6714+
get_node(s, local_node));
6715+
if (refilled >= min)
6716+
return refilled;
6717+
6718+
refilled += __refill_objects_any(s, p + refilled, gfp, min - refilled,
6719+
max - refilled);
6720+
if (refilled >= min)
6721+
return refilled;
66526722

66536723
new_slab:
66546724

6655-
slab = new_slab(s, pc.flags, node);
6725+
slab = new_slab(s, gfp, local_node);
66566726
if (!slab)
66576727
goto out;
66586728

@@ -6667,8 +6737,8 @@ __refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
66676737

66686738
if (refilled < min)
66696739
goto new_slab;
6670-
out:
66716740

6741+
out:
66726742
return refilled;
66736743
}
66746744

@@ -6678,18 +6748,20 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
66786748
{
66796749
int i;
66806750

6681-
/*
6682-
* TODO: this might be more efficient (if necessary) by reusing
6683-
* __refill_objects()
6684-
*/
6685-
for (i = 0; i < size; i++) {
6751+
if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
6752+
for (i = 0; i < size; i++) {
66866753

6687-
p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_,
6688-
s->object_size);
6689-
if (unlikely(!p[i]))
6690-
goto error;
6754+
p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, _RET_IP_,
6755+
s->object_size);
6756+
if (unlikely(!p[i]))
6757+
goto error;
66916758

6692-
maybe_wipe_obj_freeptr(s, p[i]);
6759+
maybe_wipe_obj_freeptr(s, p[i]);
6760+
}
6761+
} else {
6762+
i = refill_objects(s, p, flags, size, size);
6763+
if (i < size)
6764+
goto error;
66936765
}
66946766

66956767
return i;
@@ -6700,7 +6772,10 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
67006772

67016773
}
67026774

6703-
/* Note that interrupts must be enabled when calling this function. */
6775+
/*
6776+
* Note that interrupts must be enabled when calling this function and gfp
6777+
* flags must allow spinning.
6778+
*/
67046779
int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
67056780
void **p)
67066781
{

0 commit comments

Comments
 (0)