Skip to content

Commit 8f50d5c

Browse files
author
Martin KaFai Lau
committed
Merge branch 'Allow struct_ops maps with a large number of programs'
Kui-Feng Lee says: ==================== The BPF struct_ops previously only allowed for one page to be used for the trampolines of all links in a map. However, we have recently run out of space due to the large number of BPF program links. By allocating additional pages when we exhaust an existing page, we can accommodate more links in a single map. The variable st_map->image has been changed to st_map->image_pages, and its type has been changed to an array of pointers to buffers of PAGE_SIZE. Additional pages are allocated when all existing pages are exhausted. The test case loads a struct_ops maps having 40 programs. Their trampolines takes about 6.6k+ bytes over 1.5 pages on x86. --- Major differences from v3: - Refactor buffer allocations to bpf_struct_ops_tramp_buf_alloc() and bpf_struct_ops_tramp_buf_free(). Major differences from v2: - Move image buffer allocation to bpf_struct_ops_prepare_trampoline(). Major differences from v1: - Always free pages if failing to update. - Allocate 8 pages at most. v3: https://lore.kernel.org/all/20240224030302.1500343-1-thinker.li@gmail.com/ v2: https://lore.kernel.org/all/20240221225911.757861-1-thinker.li@gmail.com/ v1: https://lore.kernel.org/all/20240216182828.201727-1-thinker.li@gmail.com/ ==================== Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2 parents 01031fd + 93bc28d commit 8f50d5c

7 files changed

Lines changed: 279 additions & 60 deletions

File tree

include/linux/bpf.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1763,7 +1763,9 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
17631763
struct bpf_tramp_link *link,
17641764
const struct btf_func_model *model,
17651765
void *stub_func,
1766-
void *image, void *image_end);
1766+
void **image, u32 *image_off,
1767+
bool allow_alloc);
1768+
void bpf_struct_ops_image_free(void *image);
17671769
static inline bool bpf_try_module_get(const void *data, struct module *owner)
17681770
{
17691771
if (owner == BPF_MODULE_OWNER)

kernel/bpf/bpf_struct_ops.c

Lines changed: 95 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ struct bpf_struct_ops_value {
1818
char data[] ____cacheline_aligned_in_smp;
1919
};
2020

21+
#define MAX_TRAMP_IMAGE_PAGES 8
22+
2123
struct bpf_struct_ops_map {
2224
struct bpf_map map;
2325
struct rcu_head rcu;
@@ -30,12 +32,11 @@ struct bpf_struct_ops_map {
3032
*/
3133
struct bpf_link **links;
3234
u32 links_cnt;
33-
/* image is a page that has all the trampolines
35+
u32 image_pages_cnt;
36+
/* image_pages is an array of pages that has all the trampolines
3437
* that stores the func args before calling the bpf_prog.
35-
* A PAGE_SIZE "image" is enough to store all trampoline for
36-
* "links[]".
3738
*/
38-
void *image;
39+
void *image_pages[MAX_TRAMP_IMAGE_PAGES];
3940
/* The owner moduler's btf. */
4041
struct btf *btf;
4142
/* uvalue->data stores the kernel struct
@@ -116,6 +117,31 @@ static bool is_valid_value_type(struct btf *btf, s32 value_id,
116117
return true;
117118
}
118119

120+
static void *bpf_struct_ops_image_alloc(void)
121+
{
122+
void *image;
123+
int err;
124+
125+
err = bpf_jit_charge_modmem(PAGE_SIZE);
126+
if (err)
127+
return ERR_PTR(err);
128+
image = arch_alloc_bpf_trampoline(PAGE_SIZE);
129+
if (!image) {
130+
bpf_jit_uncharge_modmem(PAGE_SIZE);
131+
return ERR_PTR(-ENOMEM);
132+
}
133+
134+
return image;
135+
}
136+
137+
void bpf_struct_ops_image_free(void *image)
138+
{
139+
if (image) {
140+
arch_free_bpf_trampoline(image, PAGE_SIZE);
141+
bpf_jit_uncharge_modmem(PAGE_SIZE);
142+
}
143+
}
144+
119145
#define MAYBE_NULL_SUFFIX "__nullable"
120146
#define MAX_STUB_NAME 128
121147

@@ -461,6 +487,15 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
461487
}
462488
}
463489

490+
static void bpf_struct_ops_map_free_image(struct bpf_struct_ops_map *st_map)
491+
{
492+
int i;
493+
494+
for (i = 0; i < st_map->image_pages_cnt; i++)
495+
bpf_struct_ops_image_free(st_map->image_pages[i]);
496+
st_map->image_pages_cnt = 0;
497+
}
498+
464499
static int check_zero_holes(const struct btf *btf, const struct btf_type *t, void *data)
465500
{
466501
const struct btf_member *member;
@@ -506,9 +541,12 @@ const struct bpf_link_ops bpf_struct_ops_link_lops = {
506541
int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
507542
struct bpf_tramp_link *link,
508543
const struct btf_func_model *model,
509-
void *stub_func, void *image, void *image_end)
544+
void *stub_func,
545+
void **_image, u32 *_image_off,
546+
bool allow_alloc)
510547
{
511-
u32 flags = BPF_TRAMP_F_INDIRECT;
548+
u32 image_off = *_image_off, flags = BPF_TRAMP_F_INDIRECT;
549+
void *image = *_image;
512550
int size;
513551

514552
tlinks[BPF_TRAMP_FENTRY].links[0] = link;
@@ -518,12 +556,32 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
518556
flags |= BPF_TRAMP_F_RET_FENTRY_RET;
519557

520558
size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
521-
if (size < 0)
522-
return size;
523-
if (size > (unsigned long)image_end - (unsigned long)image)
524-
return -E2BIG;
525-
return arch_prepare_bpf_trampoline(NULL, image, image_end,
559+
if (size <= 0)
560+
return size ? : -EFAULT;
561+
562+
/* Allocate image buffer if necessary */
563+
if (!image || size > PAGE_SIZE - image_off) {
564+
if (!allow_alloc)
565+
return -E2BIG;
566+
567+
image = bpf_struct_ops_image_alloc();
568+
if (IS_ERR(image))
569+
return PTR_ERR(image);
570+
image_off = 0;
571+
}
572+
573+
size = arch_prepare_bpf_trampoline(NULL, image + image_off,
574+
image + PAGE_SIZE,
526575
model, flags, tlinks, stub_func);
576+
if (size <= 0) {
577+
if (image != *_image)
578+
bpf_struct_ops_image_free(image);
579+
return size ? : -EFAULT;
580+
}
581+
582+
*_image = image;
583+
*_image_off = image_off + size;
584+
return 0;
527585
}
528586

529587
static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
@@ -539,8 +597,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
539597
struct bpf_tramp_links *tlinks;
540598
void *udata, *kdata;
541599
int prog_fd, err;
542-
void *image, *image_end;
543-
u32 i;
600+
u32 i, trampoline_start, image_off = 0;
601+
void *cur_image = NULL, *image = NULL;
544602

545603
if (flags)
546604
return -EINVAL;
@@ -578,8 +636,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
578636

579637
udata = &uvalue->data;
580638
kdata = &kvalue->data;
581-
image = st_map->image;
582-
image_end = st_map->image + PAGE_SIZE;
583639

584640
module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
585641
for_each_member(i, t, member) {
@@ -658,28 +714,39 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
658714
&bpf_struct_ops_link_lops, prog);
659715
st_map->links[i] = &link->link;
660716

717+
trampoline_start = image_off;
661718
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
662-
&st_ops->func_models[i],
663-
*(void **)(st_ops->cfi_stubs + moff),
664-
image, image_end);
719+
&st_ops->func_models[i],
720+
*(void **)(st_ops->cfi_stubs + moff),
721+
&image, &image_off,
722+
st_map->image_pages_cnt < MAX_TRAMP_IMAGE_PAGES);
723+
if (err)
724+
goto reset_unlock;
725+
726+
if (cur_image != image) {
727+
st_map->image_pages[st_map->image_pages_cnt++] = image;
728+
cur_image = image;
729+
trampoline_start = 0;
730+
}
665731
if (err < 0)
666732
goto reset_unlock;
667733

668-
*(void **)(kdata + moff) = image + cfi_get_offset();
669-
image += err;
734+
*(void **)(kdata + moff) = image + trampoline_start + cfi_get_offset();
670735

671736
/* put prog_id to udata */
672737
*(unsigned long *)(udata + moff) = prog->aux->id;
673738
}
674739

740+
if (st_ops->validate) {
741+
err = st_ops->validate(kdata);
742+
if (err)
743+
goto reset_unlock;
744+
}
745+
for (i = 0; i < st_map->image_pages_cnt; i++)
746+
arch_protect_bpf_trampoline(st_map->image_pages[i], PAGE_SIZE);
747+
675748
if (st_map->map.map_flags & BPF_F_LINK) {
676749
err = 0;
677-
if (st_ops->validate) {
678-
err = st_ops->validate(kdata);
679-
if (err)
680-
goto reset_unlock;
681-
}
682-
arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
683750
/* Let bpf_link handle registration & unregistration.
684751
*
685752
* Pair with smp_load_acquire() during lookup_elem().
@@ -688,7 +755,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
688755
goto unlock;
689756
}
690757

691-
arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
692758
err = st_ops->reg(kdata);
693759
if (likely(!err)) {
694760
/* This refcnt increment on the map here after
@@ -711,9 +777,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
711777
* there was a race in registering the struct_ops (under the same name) to
712778
* a sub-system through different struct_ops's maps.
713779
*/
714-
arch_unprotect_bpf_trampoline(st_map->image, PAGE_SIZE);
715780

716781
reset_unlock:
782+
bpf_struct_ops_map_free_image(st_map);
717783
bpf_struct_ops_map_put_progs(st_map);
718784
memset(uvalue, 0, map->value_size);
719785
memset(kvalue, 0, map->value_size);
@@ -780,10 +846,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
780846
if (st_map->links)
781847
bpf_struct_ops_map_put_progs(st_map);
782848
bpf_map_area_free(st_map->links);
783-
if (st_map->image) {
784-
arch_free_bpf_trampoline(st_map->image, PAGE_SIZE);
785-
bpf_jit_uncharge_modmem(PAGE_SIZE);
786-
}
849+
bpf_struct_ops_map_free_image(st_map);
787850
bpf_map_area_free(st_map->uvalue);
788851
bpf_map_area_free(st_map);
789852
}
@@ -893,20 +956,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
893956
st_map->st_ops_desc = st_ops_desc;
894957
map = &st_map->map;
895958

896-
ret = bpf_jit_charge_modmem(PAGE_SIZE);
897-
if (ret)
898-
goto errout_free;
899-
900-
st_map->image = arch_alloc_bpf_trampoline(PAGE_SIZE);
901-
if (!st_map->image) {
902-
/* __bpf_struct_ops_map_free() uses st_map->image as flag
903-
* for "charged or not". In this case, we need to unchange
904-
* here.
905-
*/
906-
bpf_jit_uncharge_modmem(PAGE_SIZE);
907-
ret = -ENOMEM;
908-
goto errout_free;
909-
}
910959
st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
911960
st_map->links_cnt = btf_type_vlen(t);
912961
st_map->links =

net/bpf/bpf_dummy_struct_ops.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
9191
struct bpf_tramp_link *link = NULL;
9292
void *image = NULL;
9393
unsigned int op_idx;
94+
u32 image_off = 0;
9495
int prog_ret;
9596
s32 type_id;
9697
int err;
@@ -114,12 +115,6 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
114115
goto out;
115116
}
116117

117-
image = arch_alloc_bpf_trampoline(PAGE_SIZE);
118-
if (!image) {
119-
err = -ENOMEM;
120-
goto out;
121-
}
122-
123118
link = kzalloc(sizeof(*link), GFP_USER);
124119
if (!link) {
125120
err = -ENOMEM;
@@ -133,7 +128,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
133128
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
134129
&st_ops->func_models[op_idx],
135130
&dummy_ops_test_ret_function,
136-
image, image + PAGE_SIZE);
131+
&image, &image_off,
132+
true);
137133
if (err < 0)
138134
goto out;
139135

@@ -147,7 +143,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
147143
err = -EFAULT;
148144
out:
149145
kfree(args);
150-
arch_free_bpf_trampoline(image, PAGE_SIZE);
146+
bpf_struct_ops_image_free(image);
151147
if (link)
152148
bpf_link_put(&link->link);
153149
kfree(tlinks);

net/ipv4/tcp_cong.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,11 +146,7 @@ EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
146146
int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca)
147147
{
148148
struct tcp_congestion_ops *existing;
149-
int ret;
150-
151-
ret = tcp_validate_congestion_control(ca);
152-
if (ret)
153-
return ret;
149+
int ret = 0;
154150

155151
ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
156152

tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,50 @@ struct bpf_testmod_ops {
4343
int b;
4444
} unsupported;
4545
int data;
46+
47+
/* The following pointers are used to test the maps having multiple
48+
* pages of trampolines.
49+
*/
50+
int (*tramp_1)(int value);
51+
int (*tramp_2)(int value);
52+
int (*tramp_3)(int value);
53+
int (*tramp_4)(int value);
54+
int (*tramp_5)(int value);
55+
int (*tramp_6)(int value);
56+
int (*tramp_7)(int value);
57+
int (*tramp_8)(int value);
58+
int (*tramp_9)(int value);
59+
int (*tramp_10)(int value);
60+
int (*tramp_11)(int value);
61+
int (*tramp_12)(int value);
62+
int (*tramp_13)(int value);
63+
int (*tramp_14)(int value);
64+
int (*tramp_15)(int value);
65+
int (*tramp_16)(int value);
66+
int (*tramp_17)(int value);
67+
int (*tramp_18)(int value);
68+
int (*tramp_19)(int value);
69+
int (*tramp_20)(int value);
70+
int (*tramp_21)(int value);
71+
int (*tramp_22)(int value);
72+
int (*tramp_23)(int value);
73+
int (*tramp_24)(int value);
74+
int (*tramp_25)(int value);
75+
int (*tramp_26)(int value);
76+
int (*tramp_27)(int value);
77+
int (*tramp_28)(int value);
78+
int (*tramp_29)(int value);
79+
int (*tramp_30)(int value);
80+
int (*tramp_31)(int value);
81+
int (*tramp_32)(int value);
82+
int (*tramp_33)(int value);
83+
int (*tramp_34)(int value);
84+
int (*tramp_35)(int value);
85+
int (*tramp_36)(int value);
86+
int (*tramp_37)(int value);
87+
int (*tramp_38)(int value);
88+
int (*tramp_39)(int value);
89+
int (*tramp_40)(int value);
4690
};
4791

4892
#endif /* _BPF_TESTMOD_H */
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
3+
#include <test_progs.h>
4+
5+
#include "struct_ops_multi_pages.skel.h"
6+
7+
static void do_struct_ops_multi_pages(void)
8+
{
9+
struct struct_ops_multi_pages *skel;
10+
struct bpf_link *link;
11+
12+
/* The size of all trampolines of skel->maps.multi_pages should be
13+
* over 1 page (at least for x86).
14+
*/
15+
skel = struct_ops_multi_pages__open_and_load();
16+
if (!ASSERT_OK_PTR(skel, "struct_ops_multi_pages_open_and_load"))
17+
return;
18+
19+
link = bpf_map__attach_struct_ops(skel->maps.multi_pages);
20+
ASSERT_OK_PTR(link, "attach_multi_pages");
21+
22+
bpf_link__destroy(link);
23+
struct_ops_multi_pages__destroy(skel);
24+
}
25+
26+
void test_struct_ops_multi_pages(void)
27+
{
28+
if (test__start_subtest("multi_pages"))
29+
do_struct_ops_multi_pages();
30+
}

0 commit comments

Comments
 (0)