Skip to content

Commit a77d6d3

Browse files
hygonitehcaster
authored andcommitted
mm/slab: place slabobj_ext metadata in unused space within s->size
When a cache has high s->align value and s->object_size is not aligned to it, each object ends up with some unused space because of alignment. If this wasted space is big enough, we can use it to store the slabobj_ext metadata instead of wasting it. On my system, this happens with caches like kmem_cache, mm_struct, pid, task_struct, sighand_cache, xfs_inode, and others. To place the slabobj_ext metadata within each object, the existing slab_obj_ext() logic can still be used by setting: - slab->obj_exts = slab_address(slab) + (slabobj_ext offset) - stride = s->size slab_obj_ext() doesn't need know where the metadata is stored, so this method works without adding extra overhead to slab_obj_ext(). A good example benefiting from this optimization is xfs_inode (object_size: 992, align: 64). To measure memory savings, 2 millions of files were created on XFS. [ MEMCG=y, MEM_ALLOC_PROFILING=n ] Before patch (creating ~2.64M directories on xfs): Slab: 5175976 kB SReclaimable: 3837524 kB SUnreclaim: 1338452 kB After patch (creating ~2.64M directories on xfs): Slab: 5152912 kB SReclaimable: 3838568 kB SUnreclaim: 1314344 kB (-23.54 MiB) Enjoy the memory savings! Suggested-by: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Harry Yoo <harry.yoo@oracle.com> Link: https://patch.msgid.link/20260113061845.159790-10-harry.yoo@oracle.com Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
1 parent fab0694 commit a77d6d3

3 files changed

Lines changed: 101 additions & 11 deletions

File tree

include/linux/slab.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ enum _slab_flag_bits {
5959
_SLAB_CMPXCHG_DOUBLE,
6060
#ifdef CONFIG_SLAB_OBJ_EXT
6161
_SLAB_NO_OBJ_EXT,
62+
#endif
63+
#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
64+
_SLAB_OBJ_EXT_IN_OBJ,
6265
#endif
6366
_SLAB_FLAGS_LAST_BIT
6467
};
@@ -244,6 +247,12 @@ enum _slab_flag_bits {
244247
#define SLAB_NO_OBJ_EXT __SLAB_FLAG_UNUSED
245248
#endif
246249

250+
#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
251+
#define SLAB_OBJ_EXT_IN_OBJ __SLAB_FLAG_BIT(_SLAB_OBJ_EXT_IN_OBJ)
252+
#else
253+
#define SLAB_OBJ_EXT_IN_OBJ __SLAB_FLAG_UNUSED
254+
#endif
255+
247256
/*
248257
* ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
249258
*

mm/slab_common.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,13 @@ DEFINE_MUTEX(slab_mutex);
4343
struct kmem_cache *kmem_cache;
4444

4545
/*
46-
* Set of flags that will prevent slab merging
46+
* Set of flags that will prevent slab merging.
47+
* Any flag that adds per-object metadata should be included,
48+
* since slab merging can update s->inuse that affects the metadata layout.
4749
*/
4850
#define SLAB_NEVER_MERGE (SLAB_DEBUG_FLAGS | SLAB_TYPESAFE_BY_RCU | \
49-
SLAB_NOLEAKTRACE | SLAB_FAILSLAB | SLAB_NO_MERGE)
51+
SLAB_NOLEAKTRACE | SLAB_FAILSLAB | SLAB_NO_MERGE | \
52+
SLAB_OBJ_EXT_IN_OBJ)
5053

5154
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
5255
SLAB_CACHE_DMA32 | SLAB_ACCOUNT)

mm/slub.c

Lines changed: 87 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,46 @@ static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
972972
{
973973
return false;
974974
}
975+
976+
#endif
977+
978+
#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
979+
static bool obj_exts_in_object(struct kmem_cache *s, struct slab *slab)
980+
{
981+
/*
982+
* Note we cannot rely on the SLAB_OBJ_EXT_IN_OBJ flag here and need to
983+
* check the stride. A cache can have SLAB_OBJ_EXT_IN_OBJ set, but
984+
* allocations within_slab_leftover are preferred. And those may be
985+
* possible or not depending on the particular slab's size.
986+
*/
987+
return obj_exts_in_slab(s, slab) &&
988+
(slab_get_stride(slab) == s->size);
989+
}
990+
991+
static unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
992+
{
993+
unsigned int offset = get_info_end(s);
994+
995+
if (kmem_cache_debug_flags(s, SLAB_STORE_USER))
996+
offset += sizeof(struct track) * 2;
997+
998+
if (slub_debug_orig_size(s))
999+
offset += sizeof(unsigned long);
1000+
1001+
offset += kasan_metadata_size(s, false);
1002+
1003+
return offset;
1004+
}
1005+
#else
1006+
static inline bool obj_exts_in_object(struct kmem_cache *s, struct slab *slab)
1007+
{
1008+
return false;
1009+
}
1010+
1011+
static inline unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
1012+
{
1013+
return 0;
1014+
}
9751015
#endif
9761016

9771017
#ifdef CONFIG_SLUB_DEBUG
@@ -1272,6 +1312,9 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
12721312

12731313
off += kasan_metadata_size(s, false);
12741314

1315+
if (obj_exts_in_object(s, slab))
1316+
off += sizeof(struct slabobj_ext);
1317+
12751318
if (off != size_from_object(s))
12761319
/* Beginning of the filler is the free pointer */
12771320
print_section(KERN_ERR, "Padding ", p + off,
@@ -1453,8 +1496,11 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
14531496
* between metadata and the next object, independent of alignment.
14541497
* - Filled with 0x5a (POISON_INUSE) when SLAB_POISON is set.
14551498
* [Final alignment padding]
1456-
* - Any bytes added by ALIGN(size, s->align) to reach s->size.
1457-
* - Filled with 0x5a (POISON_INUSE) when SLAB_POISON is set.
1499+
* - Bytes added by ALIGN(size, s->align) to reach s->size.
1500+
* - When the padding is large enough, it can be used to store
1501+
* struct slabobj_ext for accounting metadata (obj_exts_in_object()).
1502+
* - The remaining bytes (if any) are filled with 0x5a (POISON_INUSE)
1503+
* when SLAB_POISON is set.
14581504
*
14591505
* Notes:
14601506
* - Redzones are filled by init_object() with SLUB_RED_ACTIVE/INACTIVE.
@@ -1485,6 +1531,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
14851531

14861532
off += kasan_metadata_size(s, false);
14871533

1534+
if (obj_exts_in_object(s, slab))
1535+
off += sizeof(struct slabobj_ext);
1536+
14881537
if (size_from_object(s) == off)
14891538
return 1;
14901539

@@ -1510,7 +1559,7 @@ slab_pad_check(struct kmem_cache *s, struct slab *slab)
15101559
length = slab_size(slab);
15111560
end = start + length;
15121561

1513-
if (obj_exts_in_slab(s, slab)) {
1562+
if (obj_exts_in_slab(s, slab) && !obj_exts_in_object(s, slab)) {
15141563
remainder = length;
15151564
remainder -= obj_exts_offset_in_slab(s, slab);
15161565
remainder -= obj_exts_size_in_slab(slab);
@@ -2384,6 +2433,24 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
23842433
#endif
23852434
slab->obj_exts = obj_exts;
23862435
slab_set_stride(slab, sizeof(struct slabobj_ext));
2436+
} else if (s->flags & SLAB_OBJ_EXT_IN_OBJ) {
2437+
unsigned int offset = obj_exts_offset_in_object(s);
2438+
2439+
obj_exts = (unsigned long)slab_address(slab);
2440+
obj_exts += s->red_left_pad;
2441+
obj_exts += offset;
2442+
2443+
get_slab_obj_exts(obj_exts);
2444+
for_each_object(addr, s, slab_address(slab), slab->objects)
2445+
memset(kasan_reset_tag(addr) + offset, 0,
2446+
sizeof(struct slabobj_ext));
2447+
put_slab_obj_exts(obj_exts);
2448+
2449+
#ifdef CONFIG_MEMCG
2450+
obj_exts |= MEMCG_DATA_OBJEXTS;
2451+
#endif
2452+
slab->obj_exts = obj_exts;
2453+
slab_set_stride(slab, s->size);
23872454
}
23882455
}
23892456

@@ -7028,8 +7095,10 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
70287095
}
70297096
EXPORT_SYMBOL(kmem_cache_free);
70307097

7031-
static inline size_t slab_ksize(const struct kmem_cache *s)
7098+
static inline size_t slab_ksize(struct slab *slab)
70327099
{
7100+
struct kmem_cache *s = slab->slab_cache;
7101+
70337102
#ifdef CONFIG_SLUB_DEBUG
70347103
/*
70357104
* Debugging requires use of the padding between object
@@ -7042,11 +7111,13 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
70427111
return s->object_size;
70437112
/*
70447113
* If we have the need to store the freelist pointer
7045-
* back there or track user information then we can
7114+
* or any other metadata back there then we can
70467115
* only use the space before that information.
70477116
*/
70487117
if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
70497118
return s->inuse;
7119+
else if (obj_exts_in_object(s, slab))
7120+
return s->inuse;
70507121
/*
70517122
* Else we can use all the padding etc for the allocation
70527123
*/
@@ -7055,8 +7126,8 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
70557126

70567127
static size_t __ksize(const void *object)
70577128
{
7058-
const struct page *page;
7059-
const struct slab *slab;
7129+
struct page *page;
7130+
struct slab *slab;
70607131

70617132
if (unlikely(object == ZERO_SIZE_PTR))
70627133
return 0;
@@ -7075,7 +7146,7 @@ static size_t __ksize(const void *object)
70757146
skip_orig_size_check(slab->slab_cache, object);
70767147
#endif
70777148

7078-
return slab_ksize(slab->slab_cache);
7149+
return slab_ksize(slab);
70797150
}
70807151

70817152
/**
@@ -8199,6 +8270,7 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
81998270
{
82008271
slab_flags_t flags = s->flags;
82018272
unsigned int size = s->object_size;
8273+
unsigned int aligned_size;
82028274
unsigned int order;
82038275

82048276
/*
@@ -8308,7 +8380,13 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
83088380
* offset 0. In order to align the objects we have to simply size
83098381
* each object to conform to the alignment.
83108382
*/
8311-
size = ALIGN(size, s->align);
8383+
aligned_size = ALIGN(size, s->align);
8384+
#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
8385+
if (aligned_size - size >= sizeof(struct slabobj_ext))
8386+
s->flags |= SLAB_OBJ_EXT_IN_OBJ;
8387+
#endif
8388+
size = aligned_size;
8389+
83128390
s->size = size;
83138391
s->reciprocal_size = reciprocal_value(size);
83148392
order = calculate_order(size);

0 commit comments

Comments
 (0)