Skip to content

Commit 187e2af

Browse files
ThinkerYzu1Martin KaFai Lau
authored andcommitted
bpf: struct_ops supports more than one page for trampolines.
The BPF struct_ops previously only allowed one page of trampolines. Each function pointer of a struct_ops is implemented by a struct_ops bpf program. Each struct_ops bpf program requires a trampoline. The following selftest patch shows each page can hold a little more than 20 trampolines. While one page is more than enough for the tcp-cc usecase, the sched_ext use case shows that one page is not always enough and hits the one page limit. This patch overcomes the one page limit by allocating another page when needed and it is limited to a total of MAX_IMAGE_PAGES (8) pages which is more than enough for reasonable usages. The variable st_map->image has been changed to st_map->image_pages, and its type has been changed to an array of pointers to pages. Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com> Link: https://lore.kernel.org/r/20240224223418.526631-3-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
1 parent 73e4f9e commit 187e2af

3 files changed

Lines changed: 96 additions & 50 deletions

File tree

include/linux/bpf.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1763,7 +1763,9 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
17631763
struct bpf_tramp_link *link,
17641764
const struct btf_func_model *model,
17651765
void *stub_func,
1766-
void *image, void *image_end);
1766+
void **image, u32 *image_off,
1767+
bool allow_alloc);
1768+
void bpf_struct_ops_image_free(void *image);
17671769
static inline bool bpf_try_module_get(const void *data, struct module *owner)
17681770
{
17691771
if (owner == BPF_MODULE_OWNER)

kernel/bpf/bpf_struct_ops.c

Lines changed: 89 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ struct bpf_struct_ops_value {
1818
char data[] ____cacheline_aligned_in_smp;
1919
};
2020

21+
#define MAX_TRAMP_IMAGE_PAGES 8
22+
2123
struct bpf_struct_ops_map {
2224
struct bpf_map map;
2325
struct rcu_head rcu;
@@ -30,12 +32,11 @@ struct bpf_struct_ops_map {
3032
*/
3133
struct bpf_link **links;
3234
u32 links_cnt;
33-
/* image is a page that has all the trampolines
35+
u32 image_pages_cnt;
36+
/* image_pages is an array of pages that has all the trampolines
3437
* that stores the func args before calling the bpf_prog.
35-
* A PAGE_SIZE "image" is enough to store all trampoline for
36-
* "links[]".
3738
*/
38-
void *image;
39+
void *image_pages[MAX_TRAMP_IMAGE_PAGES];
3940
/* The owner moduler's btf. */
4041
struct btf *btf;
4142
/* uvalue->data stores the kernel struct
@@ -116,6 +117,31 @@ static bool is_valid_value_type(struct btf *btf, s32 value_id,
116117
return true;
117118
}
118119

120+
static void *bpf_struct_ops_image_alloc(void)
121+
{
122+
void *image;
123+
int err;
124+
125+
err = bpf_jit_charge_modmem(PAGE_SIZE);
126+
if (err)
127+
return ERR_PTR(err);
128+
image = arch_alloc_bpf_trampoline(PAGE_SIZE);
129+
if (!image) {
130+
bpf_jit_uncharge_modmem(PAGE_SIZE);
131+
return ERR_PTR(-ENOMEM);
132+
}
133+
134+
return image;
135+
}
136+
137+
void bpf_struct_ops_image_free(void *image)
138+
{
139+
if (image) {
140+
arch_free_bpf_trampoline(image, PAGE_SIZE);
141+
bpf_jit_uncharge_modmem(PAGE_SIZE);
142+
}
143+
}
144+
119145
#define MAYBE_NULL_SUFFIX "__nullable"
120146
#define MAX_STUB_NAME 128
121147

@@ -461,6 +487,15 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map)
461487
}
462488
}
463489

490+
static void bpf_struct_ops_map_free_image(struct bpf_struct_ops_map *st_map)
491+
{
492+
int i;
493+
494+
for (i = 0; i < st_map->image_pages_cnt; i++)
495+
bpf_struct_ops_image_free(st_map->image_pages[i]);
496+
st_map->image_pages_cnt = 0;
497+
}
498+
464499
static int check_zero_holes(const struct btf *btf, const struct btf_type *t, void *data)
465500
{
466501
const struct btf_member *member;
@@ -506,9 +541,12 @@ const struct bpf_link_ops bpf_struct_ops_link_lops = {
506541
int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
507542
struct bpf_tramp_link *link,
508543
const struct btf_func_model *model,
509-
void *stub_func, void *image, void *image_end)
544+
void *stub_func,
545+
void **_image, u32 *_image_off,
546+
bool allow_alloc)
510547
{
511-
u32 flags = BPF_TRAMP_F_INDIRECT;
548+
u32 image_off = *_image_off, flags = BPF_TRAMP_F_INDIRECT;
549+
void *image = *_image;
512550
int size;
513551

514552
tlinks[BPF_TRAMP_FENTRY].links[0] = link;
@@ -518,12 +556,32 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
518556
flags |= BPF_TRAMP_F_RET_FENTRY_RET;
519557

520558
size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
521-
if (size < 0)
522-
return size;
523-
if (size > (unsigned long)image_end - (unsigned long)image)
524-
return -E2BIG;
525-
return arch_prepare_bpf_trampoline(NULL, image, image_end,
559+
if (size <= 0)
560+
return size ? : -EFAULT;
561+
562+
/* Allocate image buffer if necessary */
563+
if (!image || size > PAGE_SIZE - image_off) {
564+
if (!allow_alloc)
565+
return -E2BIG;
566+
567+
image = bpf_struct_ops_image_alloc();
568+
if (IS_ERR(image))
569+
return PTR_ERR(image);
570+
image_off = 0;
571+
}
572+
573+
size = arch_prepare_bpf_trampoline(NULL, image + image_off,
574+
image + PAGE_SIZE,
526575
model, flags, tlinks, stub_func);
576+
if (size <= 0) {
577+
if (image != *_image)
578+
bpf_struct_ops_image_free(image);
579+
return size ? : -EFAULT;
580+
}
581+
582+
*_image = image;
583+
*_image_off = image_off + size;
584+
return 0;
527585
}
528586

529587
static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
@@ -539,8 +597,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
539597
struct bpf_tramp_links *tlinks;
540598
void *udata, *kdata;
541599
int prog_fd, err;
542-
void *image, *image_end;
543-
u32 i;
600+
u32 i, trampoline_start, image_off = 0;
601+
void *cur_image = NULL, *image = NULL;
544602

545603
if (flags)
546604
return -EINVAL;
@@ -578,8 +636,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
578636

579637
udata = &uvalue->data;
580638
kdata = &kvalue->data;
581-
image = st_map->image;
582-
image_end = st_map->image + PAGE_SIZE;
583639

584640
module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
585641
for_each_member(i, t, member) {
@@ -658,15 +714,24 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
658714
&bpf_struct_ops_link_lops, prog);
659715
st_map->links[i] = &link->link;
660716

717+
trampoline_start = image_off;
661718
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
662-
&st_ops->func_models[i],
663-
*(void **)(st_ops->cfi_stubs + moff),
664-
image, image_end);
719+
&st_ops->func_models[i],
720+
*(void **)(st_ops->cfi_stubs + moff),
721+
&image, &image_off,
722+
st_map->image_pages_cnt < MAX_TRAMP_IMAGE_PAGES);
723+
if (err)
724+
goto reset_unlock;
725+
726+
if (cur_image != image) {
727+
st_map->image_pages[st_map->image_pages_cnt++] = image;
728+
cur_image = image;
729+
trampoline_start = 0;
730+
}
665731
if (err < 0)
666732
goto reset_unlock;
667733

668-
*(void **)(kdata + moff) = image + cfi_get_offset();
669-
image += err;
734+
*(void **)(kdata + moff) = image + trampoline_start + cfi_get_offset();
670735

671736
/* put prog_id to udata */
672737
*(unsigned long *)(udata + moff) = prog->aux->id;
@@ -677,10 +742,11 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
677742
if (err)
678743
goto reset_unlock;
679744
}
745+
for (i = 0; i < st_map->image_pages_cnt; i++)
746+
arch_protect_bpf_trampoline(st_map->image_pages[i], PAGE_SIZE);
680747

681748
if (st_map->map.map_flags & BPF_F_LINK) {
682749
err = 0;
683-
arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
684750
/* Let bpf_link handle registration & unregistration.
685751
*
686752
* Pair with smp_load_acquire() during lookup_elem().
@@ -689,7 +755,6 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
689755
goto unlock;
690756
}
691757

692-
arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
693758
err = st_ops->reg(kdata);
694759
if (likely(!err)) {
695760
/* This refcnt increment on the map here after
@@ -712,9 +777,9 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
712777
* there was a race in registering the struct_ops (under the same name) to
713778
* a sub-system through different struct_ops's maps.
714779
*/
715-
arch_unprotect_bpf_trampoline(st_map->image, PAGE_SIZE);
716780

717781
reset_unlock:
782+
bpf_struct_ops_map_free_image(st_map);
718783
bpf_struct_ops_map_put_progs(st_map);
719784
memset(uvalue, 0, map->value_size);
720785
memset(kvalue, 0, map->value_size);
@@ -781,10 +846,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
781846
if (st_map->links)
782847
bpf_struct_ops_map_put_progs(st_map);
783848
bpf_map_area_free(st_map->links);
784-
if (st_map->image) {
785-
arch_free_bpf_trampoline(st_map->image, PAGE_SIZE);
786-
bpf_jit_uncharge_modmem(PAGE_SIZE);
787-
}
849+
bpf_struct_ops_map_free_image(st_map);
788850
bpf_map_area_free(st_map->uvalue);
789851
bpf_map_area_free(st_map);
790852
}
@@ -894,20 +956,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
894956
st_map->st_ops_desc = st_ops_desc;
895957
map = &st_map->map;
896958

897-
ret = bpf_jit_charge_modmem(PAGE_SIZE);
898-
if (ret)
899-
goto errout_free;
900-
901-
st_map->image = arch_alloc_bpf_trampoline(PAGE_SIZE);
902-
if (!st_map->image) {
903-
/* __bpf_struct_ops_map_free() uses st_map->image as flag
904-
* for "charged or not". In this case, we need to unchange
905-
* here.
906-
*/
907-
bpf_jit_uncharge_modmem(PAGE_SIZE);
908-
ret = -ENOMEM;
909-
goto errout_free;
910-
}
911959
st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
912960
st_map->links_cnt = btf_type_vlen(t);
913961
st_map->links =

net/bpf/bpf_dummy_struct_ops.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
9191
struct bpf_tramp_link *link = NULL;
9292
void *image = NULL;
9393
unsigned int op_idx;
94+
u32 image_off = 0;
9495
int prog_ret;
9596
s32 type_id;
9697
int err;
@@ -114,12 +115,6 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
114115
goto out;
115116
}
116117

117-
image = arch_alloc_bpf_trampoline(PAGE_SIZE);
118-
if (!image) {
119-
err = -ENOMEM;
120-
goto out;
121-
}
122-
123118
link = kzalloc(sizeof(*link), GFP_USER);
124119
if (!link) {
125120
err = -ENOMEM;
@@ -133,7 +128,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
133128
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
134129
&st_ops->func_models[op_idx],
135130
&dummy_ops_test_ret_function,
136-
image, image + PAGE_SIZE);
131+
&image, &image_off,
132+
true);
137133
if (err < 0)
138134
goto out;
139135

@@ -147,7 +143,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
147143
err = -EFAULT;
148144
out:
149145
kfree(args);
150-
arch_free_bpf_trampoline(image, PAGE_SIZE);
146+
bpf_struct_ops_image_free(image);
151147
if (link)
152148
bpf_link_put(&link->link);
153149
kfree(tlinks);

0 commit comments

Comments
 (0)