2222#include <linux/list.h>
2323#include <linux/mm.h>
2424#include <linux/mutex.h>
25+ #include <linux/poison.h>
2526#include <linux/printk.h>
2627#include <linux/rculist.h>
2728#include <linux/rcupdate.h>
4344#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN)
4445#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
4546 STACK_DEPOT_EXTRA_BITS)
46- #if IS_ENABLED (CONFIG_KMSAN ) && CONFIG_STACKDEPOT_MAX_FRAMES >= 32
47- /*
48- * KMSAN is frequently used in fuzzing scenarios and thus saves a lot of stack
49- * traces. As KMSAN does not support evicting stack traces from the stack
50- * depot, the stack depot capacity might be reached quickly with large stack
51- * records. Adjust the maximum number of stack depot pools for this case.
52- */
53- #define DEPOT_POOLS_CAP (8192 * (CONFIG_STACKDEPOT_MAX_FRAMES / 16))
54- #else
5547#define DEPOT_POOLS_CAP 8192
56- #endif
5748#define DEPOT_MAX_POOLS \
5849 (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \
5950 (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP)
@@ -93,9 +84,6 @@ struct stack_record {
9384 };
9485};
9586
96- #define DEPOT_STACK_RECORD_SIZE \
97- ALIGN(sizeof(struct stack_record), 1 << DEPOT_STACK_ALIGN)
98-
9987static bool stack_depot_disabled ;
10088static bool __stack_depot_early_init_requested __initdata = IS_ENABLED (CONFIG_STACKDEPOT_ALWAYS_INIT );
10189static bool __stack_depot_early_init_passed __initdata ;
@@ -121,32 +109,31 @@ static void *stack_pools[DEPOT_MAX_POOLS];
121109static void * new_pool ;
122110/* Number of pools in stack_pools. */
123111static int pools_num ;
112+ /* Offset to the unused space in the currently used pool. */
113+ static size_t pool_offset = DEPOT_POOL_SIZE ;
124114/* Freelist of stack records within stack_pools. */
125115static LIST_HEAD (free_stacks );
126- /*
127- * Stack depot tries to keep an extra pool allocated even before it runs out
128- * of space in the currently used pool. This flag marks whether this extra pool
129- * needs to be allocated. It has the value 0 when either an extra pool is not
130- * yet allocated or if the limit on the number of pools is reached.
131- */
132- static bool new_pool_required = true;
133116/* The lock must be held when performing pool or freelist modifications. */
134117static DEFINE_RAW_SPINLOCK (pool_lock );
135118
136119/* Statistics counters for debugfs. */
137120enum depot_counter_id {
138- DEPOT_COUNTER_ALLOCS ,
139- DEPOT_COUNTER_FREES ,
140- DEPOT_COUNTER_INUSE ,
121+ DEPOT_COUNTER_REFD_ALLOCS ,
122+ DEPOT_COUNTER_REFD_FREES ,
123+ DEPOT_COUNTER_REFD_INUSE ,
141124 DEPOT_COUNTER_FREELIST_SIZE ,
125+ DEPOT_COUNTER_PERSIST_COUNT ,
126+ DEPOT_COUNTER_PERSIST_BYTES ,
142127 DEPOT_COUNTER_COUNT ,
143128};
144129static long counters [DEPOT_COUNTER_COUNT ];
145130static const char * const counter_names [] = {
146- [DEPOT_COUNTER_ALLOCS ] = "allocations " ,
147- [DEPOT_COUNTER_FREES ] = "frees " ,
148- [DEPOT_COUNTER_INUSE ] = "in_use " ,
131+ [DEPOT_COUNTER_REFD_ALLOCS ] = "refcounted_allocations " ,
132+ [DEPOT_COUNTER_REFD_FREES ] = "refcounted_frees " ,
133+ [DEPOT_COUNTER_REFD_INUSE ] = "refcounted_in_use " ,
149134 [DEPOT_COUNTER_FREELIST_SIZE ] = "freelist_size" ,
135+ [DEPOT_COUNTER_PERSIST_COUNT ] = "persistent_count" ,
136+ [DEPOT_COUNTER_PERSIST_BYTES ] = "persistent_bytes" ,
150137};
151138static_assert (ARRAY_SIZE (counter_names ) == DEPOT_COUNTER_COUNT );
152139
@@ -294,48 +281,52 @@ int stack_depot_init(void)
294281EXPORT_SYMBOL_GPL (stack_depot_init );
295282
296283/*
297- * Initializes new stack depot @pool, release all its entries to the freelist,
298- * and update the list of pools.
284+ * Initializes new stack pool, and updates the list of pools.
299285 */
300- static void depot_init_pool (void * pool )
286+ static bool depot_init_pool (void * * prealloc )
301287{
302- int offset ;
303-
304288 lockdep_assert_held (& pool_lock );
305289
306- /* Initialize handles and link stack records into the freelist. */
307- for (offset = 0 ; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE ;
308- offset += DEPOT_STACK_RECORD_SIZE ) {
309- struct stack_record * stack = pool + offset ;
310-
311- stack -> handle .pool_index = pools_num ;
312- stack -> handle .offset = offset >> DEPOT_STACK_ALIGN ;
313- stack -> handle .extra = 0 ;
314-
315- /*
316- * Stack traces of size 0 are never saved, and we can simply use
317- * the size field as an indicator if this is a new unused stack
318- * record in the freelist.
319- */
320- stack -> size = 0 ;
290+ if (unlikely (pools_num >= DEPOT_MAX_POOLS )) {
291+ /* Bail out if we reached the pool limit. */
292+ WARN_ON_ONCE (pools_num > DEPOT_MAX_POOLS ); /* should never happen */
293+ WARN_ON_ONCE (!new_pool ); /* to avoid unnecessary pre-allocation */
294+ WARN_ONCE (1 , "Stack depot reached limit capacity" );
295+ return false;
296+ }
321297
322- INIT_LIST_HEAD (& stack -> hash_list );
323- /*
324- * Add to the freelist front to prioritize never-used entries:
325- * required in case there are entries in the freelist, but their
326- * RCU cookie still belongs to the current RCU grace period
327- * (there can still be concurrent readers).
328- */
329- list_add (& stack -> free_list , & free_stacks );
330- counters [DEPOT_COUNTER_FREELIST_SIZE ]++ ;
298+ if (!new_pool && * prealloc ) {
299+ /* We have preallocated memory, use it. */
300+ WRITE_ONCE (new_pool , * prealloc );
301+ * prealloc = NULL ;
331302 }
332303
304+ if (!new_pool )
305+ return false; /* new_pool and *prealloc are NULL */
306+
333307 /* Save reference to the pool to be used by depot_fetch_stack(). */
334- stack_pools [pools_num ] = pool ;
308+ stack_pools [pools_num ] = new_pool ;
309+
310+ /*
311+ * Stack depot tries to keep an extra pool allocated even before it runs
312+ * out of space in the currently used pool.
313+ *
314+ * To indicate that a new preallocation is needed new_pool is reset to
315+ * NULL; do not reset to NULL if we have reached the maximum number of
316+ * pools.
317+ */
318+ if (pools_num < DEPOT_MAX_POOLS )
319+ WRITE_ONCE (new_pool , NULL );
320+ else
321+ WRITE_ONCE (new_pool , STACK_DEPOT_POISON );
335322
336323 /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */
337324 WRITE_ONCE (pools_num , pools_num + 1 );
338325 ASSERT_EXCLUSIVE_WRITER (pools_num );
326+
327+ pool_offset = 0 ;
328+
329+ return true;
339330}
340331
341332/* Keeps the preallocated memory to be used for a new stack depot pool. */
@@ -347,63 +338,51 @@ static void depot_keep_new_pool(void **prealloc)
347338 * If a new pool is already saved or the maximum number of
348339 * pools is reached, do not use the preallocated memory.
349340 */
350- if (! new_pool_required )
341+ if (new_pool )
351342 return ;
352343
353- /*
354- * Use the preallocated memory for the new pool
355- * as long as we do not exceed the maximum number of pools.
356- */
357- if (pools_num < DEPOT_MAX_POOLS ) {
358- new_pool = * prealloc ;
359- * prealloc = NULL ;
360- }
361-
362- /*
363- * At this point, either a new pool is kept or the maximum
364- * number of pools is reached. In either case, take note that
365- * keeping another pool is not required.
366- */
367- WRITE_ONCE (new_pool_required , false);
344+ WRITE_ONCE (new_pool , * prealloc );
345+ * prealloc = NULL ;
368346}
369347
370348/*
371- * Try to initialize a new stack depot pool from either a previous or the
372- * current pre-allocation, and release all its entries to the freelist .
349+ * Try to initialize a new stack record from the current pool, a cached pool, or
350+ * the current pre-allocation.
373351 */
374- static bool depot_try_init_pool (void * * prealloc )
352+ static struct stack_record * depot_pop_free_pool (void * * prealloc , size_t size )
375353{
354+ struct stack_record * stack ;
355+ void * current_pool ;
356+ u32 pool_index ;
357+
376358 lockdep_assert_held (& pool_lock );
377359
378- /* Check if we have a new pool saved and use it. */
379- if (new_pool ) {
380- depot_init_pool ( new_pool ) ;
381- new_pool = NULL ;
360+ if ( pool_offset + size > DEPOT_POOL_SIZE ) {
361+ if (! depot_init_pool ( prealloc ))
362+ return NULL ;
363+ }
382364
383- /* Take note that we might need a new new_pool. */
384- if (pools_num < DEPOT_MAX_POOLS )
385- WRITE_ONCE (new_pool_required , true);
365+ if (WARN_ON_ONCE (pools_num < 1 ))
366+ return NULL ;
367+ pool_index = pools_num - 1 ;
368+ current_pool = stack_pools [pool_index ];
369+ if (WARN_ON_ONCE (!current_pool ))
370+ return NULL ;
386371
387- return true;
388- }
372+ stack = current_pool + pool_offset ;
389373
390- /* Bail out if we reached the pool limit . */
391- if ( unlikely ( pools_num >= DEPOT_MAX_POOLS )) {
392- WARN_ONCE ( 1 , "Stack depot reached limit capacity" ) ;
393- return false ;
394- }
374+ /* Pre-initialize handle once . */
375+ stack -> handle . pool_index = pool_index ;
376+ stack -> handle . offset = pool_offset >> DEPOT_STACK_ALIGN ;
377+ stack -> handle . extra = 0 ;
378+ INIT_LIST_HEAD ( & stack -> hash_list );
395379
396- /* Check if we have preallocated memory and use it. */
397- if (* prealloc ) {
398- depot_init_pool (* prealloc );
399- * prealloc = NULL ;
400- return true;
401- }
380+ pool_offset += size ;
402381
403- return false ;
382+ return stack ;
404383}
405384
406- /* Try to find next free usable entry. */
385+ /* Try to find next free usable entry from the freelist . */
407386static struct stack_record * depot_pop_free (void )
408387{
409388 struct stack_record * stack ;
@@ -420,7 +399,7 @@ static struct stack_record *depot_pop_free(void)
420399 * check the first entry.
421400 */
422401 stack = list_first_entry (& free_stacks , struct stack_record , free_list );
423- if (stack -> size && !poll_state_synchronize_rcu (stack -> rcu_state ))
402+ if (!poll_state_synchronize_rcu (stack -> rcu_state ))
424403 return NULL ;
425404
426405 list_del (& stack -> free_list );
@@ -429,48 +408,73 @@ static struct stack_record *depot_pop_free(void)
429408 return stack ;
430409}
431410
411+ static inline size_t depot_stack_record_size (struct stack_record * s , unsigned int nr_entries )
412+ {
413+ const size_t used = flex_array_size (s , entries , nr_entries );
414+ const size_t unused = sizeof (s -> entries ) - used ;
415+
416+ WARN_ON_ONCE (sizeof (s -> entries ) < used );
417+
418+ return ALIGN (sizeof (struct stack_record ) - unused , 1 << DEPOT_STACK_ALIGN );
419+ }
420+
432421/* Allocates a new stack in a stack depot pool. */
433422static struct stack_record *
434- depot_alloc_stack (unsigned long * entries , int size , u32 hash , void * * prealloc )
423+ depot_alloc_stack (unsigned long * entries , unsigned int nr_entries , u32 hash , depot_flags_t flags , void * * prealloc )
435424{
436- struct stack_record * stack ;
425+ struct stack_record * stack = NULL ;
426+ size_t record_size ;
437427
438428 lockdep_assert_held (& pool_lock );
439429
440430 /* This should already be checked by public API entry points. */
441- if (WARN_ON_ONCE (!size ))
431+ if (WARN_ON_ONCE (!nr_entries ))
442432 return NULL ;
443433
444- /* Check if we have a stack record to save the stack trace. */
445- stack = depot_pop_free ();
446- if (!stack ) {
447- /* No usable entries on the freelist - try to refill the freelist. */
448- if (!depot_try_init_pool (prealloc ))
449- return NULL ;
434+ /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
435+ if (nr_entries > CONFIG_STACKDEPOT_MAX_FRAMES )
436+ nr_entries = CONFIG_STACKDEPOT_MAX_FRAMES ;
437+
438+ if (flags & STACK_DEPOT_FLAG_GET ) {
439+ /*
440+ * Evictable entries have to allocate the max. size so they may
441+ * safely be re-used by differently sized allocations.
442+ */
443+ record_size = depot_stack_record_size (stack , CONFIG_STACKDEPOT_MAX_FRAMES );
450444 stack = depot_pop_free ();
451- if ( WARN_ON (! stack ))
452- return NULL ;
445+ } else {
446+ record_size = depot_stack_record_size ( stack , nr_entries ) ;
453447 }
454448
455- /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */
456- if (size > CONFIG_STACKDEPOT_MAX_FRAMES )
457- size = CONFIG_STACKDEPOT_MAX_FRAMES ;
449+ if (!stack ) {
450+ stack = depot_pop_free_pool (prealloc , record_size );
451+ if (!stack )
452+ return NULL ;
453+ }
458454
459455 /* Save the stack trace. */
460456 stack -> hash = hash ;
461- stack -> size = size ;
462- /* stack->handle is already filled in by depot_init_pool(). */
463- refcount_set (& stack -> count , 1 );
464- memcpy (stack -> entries , entries , flex_array_size (stack , entries , size ));
457+ stack -> size = nr_entries ;
458+ /* stack->handle is already filled in by depot_pop_free_pool(). */
459+ memcpy (stack -> entries , entries , flex_array_size (stack , entries , nr_entries ));
460+
461+ if (flags & STACK_DEPOT_FLAG_GET ) {
462+ refcount_set (& stack -> count , 1 );
463+ counters [DEPOT_COUNTER_REFD_ALLOCS ]++ ;
464+ counters [DEPOT_COUNTER_REFD_INUSE ]++ ;
465+ } else {
466+ /* Warn on attempts to switch to refcounting this entry. */
467+ refcount_set (& stack -> count , REFCOUNT_SATURATED );
468+ counters [DEPOT_COUNTER_PERSIST_COUNT ]++ ;
469+ counters [DEPOT_COUNTER_PERSIST_BYTES ] += record_size ;
470+ }
465471
466472 /*
467473 * Let KMSAN know the stored stack record is initialized. This shall
468474 * prevent false positive reports if instrumented code accesses it.
469475 */
470- kmsan_unpoison_memory (stack , DEPOT_STACK_RECORD_SIZE );
476+ kmsan_unpoison_memory (stack , record_size );
471477
472- counters [DEPOT_COUNTER_ALLOCS ]++ ;
473- counters [DEPOT_COUNTER_INUSE ]++ ;
474478 return stack ;
475479}
476480
@@ -538,8 +542,8 @@ static void depot_free_stack(struct stack_record *stack)
538542 list_add_tail (& stack -> free_list , & free_stacks );
539543
540544 counters [DEPOT_COUNTER_FREELIST_SIZE ]++ ;
541- counters [DEPOT_COUNTER_FREES ]++ ;
542- counters [DEPOT_COUNTER_INUSE ]-- ;
545+ counters [DEPOT_COUNTER_REFD_FREES ]++ ;
546+ counters [DEPOT_COUNTER_REFD_INUSE ]-- ;
543547
544548 printk_deferred_exit ();
545549 raw_spin_unlock_irqrestore (& pool_lock , flags );
@@ -660,7 +664,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
660664 * Allocate memory for a new pool if required now:
661665 * we won't be able to do that under the lock.
662666 */
663- if (unlikely (can_alloc && READ_ONCE (new_pool_required ))) {
667+ if (unlikely (can_alloc && ! READ_ONCE (new_pool ))) {
664668 /*
665669 * Zero out zone modifiers, as we don't have specific zone
666670 * requirements. Keep the flags related to allocation in atomic
@@ -681,7 +685,7 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
681685 found = find_stack (bucket , entries , nr_entries , hash , depot_flags );
682686 if (!found ) {
683687 struct stack_record * new =
684- depot_alloc_stack (entries , nr_entries , hash , & prealloc );
688+ depot_alloc_stack (entries , nr_entries , hash , depot_flags , & prealloc );
685689
686690 if (new ) {
687691 /*
0 commit comments