@@ -459,8 +459,7 @@ static void notrace irq_work_raise(struct bpf_mem_cache *c)
459459 * Typical case will be between 11K and 116K closer to 11K.
460460 * bpf progs can and should share bpf_mem_cache when possible.
461461 */
462-
463- static void prefill_mem_cache (struct bpf_mem_cache * c , int cpu )
462+ static void init_refill_work (struct bpf_mem_cache * c )
464463{
465464 init_irq_work (& c -> refill_work , bpf_mem_refill );
466465 if (c -> unit_size <= 256 ) {
@@ -476,14 +475,42 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
476475 c -> high_watermark = max (96 * 256 / c -> unit_size , 3 );
477476 }
478477 c -> batch = max ((c -> high_watermark - c -> low_watermark ) / 4 * 3 , 1 );
478+ }
479479
480+ static void prefill_mem_cache (struct bpf_mem_cache * c , int cpu )
481+ {
480482 /* To avoid consuming memory assume that 1st run of bpf
481483 * prog won't be doing more than 4 map_update_elem from
482484 * irq disabled region
483485 */
484486 alloc_bulk (c , c -> unit_size <= 256 ? 4 : 1 , cpu_to_node (cpu ), false);
485487}
486488
489+ static int check_obj_size (struct bpf_mem_cache * c , unsigned int idx )
490+ {
491+ struct llist_node * first ;
492+ unsigned int obj_size ;
493+
494+ /* For per-cpu allocator, the size of free objects in free list doesn't
495+ * match with unit_size and now there is no way to get the size of
496+ * per-cpu pointer saved in free object, so just skip the checking.
497+ */
498+ if (c -> percpu_size )
499+ return 0 ;
500+
501+ first = c -> free_llist .first ;
502+ if (!first )
503+ return 0 ;
504+
505+ obj_size = ksize (first );
506+ if (obj_size != c -> unit_size ) {
507+ WARN_ONCE (1 , "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n" ,
508+ idx , obj_size , c -> unit_size );
509+ return - EINVAL ;
510+ }
511+ return 0 ;
512+ }
513+
487514/* When size != 0 bpf_mem_cache for each cpu.
488515 * This is typical bpf hash map use case when all elements have equal size.
489516 *
@@ -494,10 +521,10 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
494521int bpf_mem_alloc_init (struct bpf_mem_alloc * ma , int size , bool percpu )
495522{
496523 static u16 sizes [NUM_CACHES ] = {96 , 192 , 16 , 32 , 64 , 128 , 256 , 512 , 1024 , 2048 , 4096 };
524+ int cpu , i , err , unit_size , percpu_size = 0 ;
497525 struct bpf_mem_caches * cc , __percpu * pcc ;
498526 struct bpf_mem_cache * c , __percpu * pc ;
499527 struct obj_cgroup * objcg = NULL ;
500- int cpu , i , unit_size , percpu_size = 0 ;
501528
502529 if (size ) {
503530 pc = __alloc_percpu_gfp (sizeof (* pc ), 8 , GFP_KERNEL );
@@ -521,6 +548,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
521548 c -> objcg = objcg ;
522549 c -> percpu_size = percpu_size ;
523550 c -> tgt = c ;
551+ init_refill_work (c );
524552 prefill_mem_cache (c , cpu );
525553 }
526554 ma -> cache = pc ;
@@ -534,6 +562,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
534562 pcc = __alloc_percpu_gfp (sizeof (* cc ), 8 , GFP_KERNEL );
535563 if (!pcc )
536564 return - ENOMEM ;
565+ err = 0 ;
537566#ifdef CONFIG_MEMCG_KMEM
538567 objcg = get_obj_cgroup_from_current ();
539568#endif
@@ -544,11 +573,30 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
544573 c -> unit_size = sizes [i ];
545574 c -> objcg = objcg ;
546575 c -> tgt = c ;
576+
577+ init_refill_work (c );
578+ /* Another bpf_mem_cache will be used when allocating
579+ * c->unit_size in bpf_mem_alloc(), so doesn't prefill
580+ * for the bpf_mem_cache because these free objects will
581+ * never be used.
582+ */
583+ if (i != bpf_mem_cache_idx (c -> unit_size ))
584+ continue ;
547585 prefill_mem_cache (c , cpu );
586+ err = check_obj_size (c , i );
587+ if (err )
588+ goto out ;
548589 }
549590 }
591+
592+ out :
550593 ma -> caches = pcc ;
551- return 0 ;
594+ /* refill_work is either zeroed or initialized, so it is safe to
595+ * call irq_work_sync().
596+ */
597+ if (err )
598+ bpf_mem_alloc_destroy (ma );
599+ return err ;
552600}
553601
554602static void drain_mem_cache (struct bpf_mem_cache * c )
@@ -916,3 +964,41 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
916964
917965 return !ret ? NULL : ret + LLIST_NODE_SZ ;
918966}
967+
968+ /* Most of the logic is taken from setup_kmalloc_cache_index_table() */
969+ static __init int bpf_mem_cache_adjust_size (void )
970+ {
971+ unsigned int size , index ;
972+
973+ /* Normally KMALLOC_MIN_SIZE is 8-bytes, but it can be
974+ * up-to 256-bytes.
975+ */
976+ size = KMALLOC_MIN_SIZE ;
977+ if (size <= 192 )
978+ index = size_index [(size - 1 ) / 8 ];
979+ else
980+ index = fls (size - 1 ) - 1 ;
981+ for (size = 8 ; size < KMALLOC_MIN_SIZE && size <= 192 ; size += 8 )
982+ size_index [(size - 1 ) / 8 ] = index ;
983+
984+ /* The minimal alignment is 64-bytes, so disable 96-bytes cache and
985+ * use 128-bytes cache instead.
986+ */
987+ if (KMALLOC_MIN_SIZE >= 64 ) {
988+ index = size_index [(128 - 1 ) / 8 ];
989+ for (size = 64 + 8 ; size <= 96 ; size += 8 )
990+ size_index [(size - 1 ) / 8 ] = index ;
991+ }
992+
993+ /* The minimal alignment is 128-bytes, so disable 192-bytes cache and
994+ * use 256-bytes cache instead.
995+ */
996+ if (KMALLOC_MIN_SIZE >= 128 ) {
997+ index = fls (256 - 1 ) - 1 ;
998+ for (size = 128 + 8 ; size <= 192 ; size += 8 )
999+ size_index [(size - 1 ) / 8 ] = index ;
1000+ }
1001+
1002+ return 0 ;
1003+ }
1004+ subsys_initcall (bpf_mem_cache_adjust_size );
0 commit comments