Skip to content

Commit 8f79870

Browse files
anakryikoborkmann
authored andcommitted
selftests/bpf: Extend uprobe/uretprobe triggering benchmarks
Settle on three "flavors" of uprobe/uretprobe, installed on different kinds of instruction: nop, push, and ret. All three are testing different internal code paths emulating or single-stepping instructions, so are interesting to compare and benchmark separately. To ensure `push rbp` instruction we ensure that uprobe_target_push() is not a leaf function by calling (global __weak) noop function and returning something afterwards (if we don't do that, compiler will just do a tail call optimization). Also, we need to make sure that compiler isn't skipping frame pointer generation, so let's add `-fno-omit-frame-pointers` to Makefile. Just to give an idea of where we currently stand in terms of relative performance of different uprobe/uretprobe cases vs a cheap syscall (getpgid()) baseline, here are results from my local machine: $ benchs/run_bench_uprobes.sh base : 1.561 ± 0.020M/s uprobe-nop : 0.947 ± 0.007M/s uprobe-push : 0.951 ± 0.004M/s uprobe-ret : 0.443 ± 0.007M/s uretprobe-nop : 0.471 ± 0.013M/s uretprobe-push : 0.483 ± 0.004M/s uretprobe-ret : 0.306 ± 0.007M/s Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20240301214551.1686095-1-andrii@kernel.org
1 parent 25703ad commit 8f79870

4 files changed

Lines changed: 103 additions & 46 deletions

File tree

tools/testing/selftests/bpf/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
3434
LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
3535

3636
CFLAGS += -g $(OPT_FLAGS) -rdynamic \
37-
-Wall -Werror \
37+
-Wall -Werror -fno-omit-frame-pointer \
3838
$(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \
3939
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
4040
-I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)

tools/testing/selftests/bpf/bench.c

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -499,10 +499,12 @@ extern const struct bench bench_trig_fentry;
499499
extern const struct bench bench_trig_fentry_sleep;
500500
extern const struct bench bench_trig_fmodret;
501501
extern const struct bench bench_trig_uprobe_base;
502-
extern const struct bench bench_trig_uprobe_with_nop;
503-
extern const struct bench bench_trig_uretprobe_with_nop;
504-
extern const struct bench bench_trig_uprobe_without_nop;
505-
extern const struct bench bench_trig_uretprobe_without_nop;
502+
extern const struct bench bench_trig_uprobe_nop;
503+
extern const struct bench bench_trig_uretprobe_nop;
504+
extern const struct bench bench_trig_uprobe_push;
505+
extern const struct bench bench_trig_uretprobe_push;
506+
extern const struct bench bench_trig_uprobe_ret;
507+
extern const struct bench bench_trig_uretprobe_ret;
506508
extern const struct bench bench_rb_libbpf;
507509
extern const struct bench bench_rb_custom;
508510
extern const struct bench bench_pb_libbpf;
@@ -541,10 +543,12 @@ static const struct bench *benchs[] = {
541543
&bench_trig_fentry_sleep,
542544
&bench_trig_fmodret,
543545
&bench_trig_uprobe_base,
544-
&bench_trig_uprobe_with_nop,
545-
&bench_trig_uretprobe_with_nop,
546-
&bench_trig_uprobe_without_nop,
547-
&bench_trig_uretprobe_without_nop,
546+
&bench_trig_uprobe_nop,
547+
&bench_trig_uretprobe_nop,
548+
&bench_trig_uprobe_push,
549+
&bench_trig_uretprobe_push,
550+
&bench_trig_uprobe_ret,
551+
&bench_trig_uretprobe_ret,
548552
&bench_rb_libbpf,
549553
&bench_rb_custom,
550554
&bench_pb_libbpf,

tools/testing/selftests/bpf/benchs/bench_trigger.c

Lines changed: 81 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -113,40 +113,60 @@ static void trigger_fmodret_setup(void)
113113
* GCC doesn't generate stack setup preample for these functions due to them
114114
* having no input arguments and doing nothing in the body.
115115
*/
116-
__weak void uprobe_target_with_nop(void)
116+
__weak void uprobe_target_nop(void)
117117
{
118118
asm volatile ("nop");
119119
}
120120

121-
__weak void uprobe_target_without_nop(void)
121+
__weak void opaque_noop_func(void)
122+
{
123+
}
124+
125+
__weak int uprobe_target_push(void)
126+
{
127+
/* overhead of function call is negligible compared to uprobe
128+
* triggering, so this shouldn't affect benchmark results much
129+
*/
130+
opaque_noop_func();
131+
return 1;
132+
}
133+
134+
__weak void uprobe_target_ret(void)
122135
{
123136
asm volatile ("");
124137
}
125138

126139
static void *uprobe_base_producer(void *input)
127140
{
128141
while (true) {
129-
uprobe_target_with_nop();
142+
uprobe_target_nop();
130143
atomic_inc(&base_hits.value);
131144
}
132145
return NULL;
133146
}
134147

135-
static void *uprobe_producer_with_nop(void *input)
148+
static void *uprobe_producer_nop(void *input)
149+
{
150+
while (true)
151+
uprobe_target_nop();
152+
return NULL;
153+
}
154+
155+
static void *uprobe_producer_push(void *input)
136156
{
137157
while (true)
138-
uprobe_target_with_nop();
158+
uprobe_target_push();
139159
return NULL;
140160
}
141161

142-
static void *uprobe_producer_without_nop(void *input)
162+
static void *uprobe_producer_ret(void *input)
143163
{
144164
while (true)
145-
uprobe_target_without_nop();
165+
uprobe_target_ret();
146166
return NULL;
147167
}
148168

149-
static void usetup(bool use_retprobe, bool use_nop)
169+
static void usetup(bool use_retprobe, void *target_addr)
150170
{
151171
size_t uprobe_offset;
152172
struct bpf_link *link;
@@ -159,11 +179,7 @@ static void usetup(bool use_retprobe, bool use_nop)
159179
exit(1);
160180
}
161181

162-
if (use_nop)
163-
uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop);
164-
else
165-
uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop);
166-
182+
uprobe_offset = get_uprobe_offset(target_addr);
167183
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
168184
use_retprobe,
169185
-1 /* all PIDs */,
@@ -176,24 +192,34 @@ static void usetup(bool use_retprobe, bool use_nop)
176192
ctx.skel->links.bench_trigger_uprobe = link;
177193
}
178194

179-
static void uprobe_setup_with_nop(void)
195+
static void uprobe_setup_nop(void)
196+
{
197+
usetup(false, &uprobe_target_nop);
198+
}
199+
200+
static void uretprobe_setup_nop(void)
201+
{
202+
usetup(true, &uprobe_target_nop);
203+
}
204+
205+
static void uprobe_setup_push(void)
180206
{
181-
usetup(false, true);
207+
usetup(false, &uprobe_target_push);
182208
}
183209

184-
static void uretprobe_setup_with_nop(void)
210+
static void uretprobe_setup_push(void)
185211
{
186-
usetup(true, true);
212+
usetup(true, &uprobe_target_push);
187213
}
188214

189-
static void uprobe_setup_without_nop(void)
215+
static void uprobe_setup_ret(void)
190216
{
191-
usetup(false, false);
217+
usetup(false, &uprobe_target_ret);
192218
}
193219

194-
static void uretprobe_setup_without_nop(void)
220+
static void uretprobe_setup_ret(void)
195221
{
196-
usetup(true, false);
222+
usetup(true, &uprobe_target_ret);
197223
}
198224

199225
const struct bench bench_trig_base = {
@@ -274,37 +300,55 @@ const struct bench bench_trig_uprobe_base = {
274300
.report_final = hits_drops_report_final,
275301
};
276302

277-
const struct bench bench_trig_uprobe_with_nop = {
278-
.name = "trig-uprobe-with-nop",
279-
.setup = uprobe_setup_with_nop,
280-
.producer_thread = uprobe_producer_with_nop,
303+
const struct bench bench_trig_uprobe_nop = {
304+
.name = "trig-uprobe-nop",
305+
.setup = uprobe_setup_nop,
306+
.producer_thread = uprobe_producer_nop,
307+
.measure = trigger_measure,
308+
.report_progress = hits_drops_report_progress,
309+
.report_final = hits_drops_report_final,
310+
};
311+
312+
const struct bench bench_trig_uretprobe_nop = {
313+
.name = "trig-uretprobe-nop",
314+
.setup = uretprobe_setup_nop,
315+
.producer_thread = uprobe_producer_nop,
316+
.measure = trigger_measure,
317+
.report_progress = hits_drops_report_progress,
318+
.report_final = hits_drops_report_final,
319+
};
320+
321+
const struct bench bench_trig_uprobe_push = {
322+
.name = "trig-uprobe-push",
323+
.setup = uprobe_setup_push,
324+
.producer_thread = uprobe_producer_push,
281325
.measure = trigger_measure,
282326
.report_progress = hits_drops_report_progress,
283327
.report_final = hits_drops_report_final,
284328
};
285329

286-
const struct bench bench_trig_uretprobe_with_nop = {
287-
.name = "trig-uretprobe-with-nop",
288-
.setup = uretprobe_setup_with_nop,
289-
.producer_thread = uprobe_producer_with_nop,
330+
const struct bench bench_trig_uretprobe_push = {
331+
.name = "trig-uretprobe-push",
332+
.setup = uretprobe_setup_push,
333+
.producer_thread = uprobe_producer_push,
290334
.measure = trigger_measure,
291335
.report_progress = hits_drops_report_progress,
292336
.report_final = hits_drops_report_final,
293337
};
294338

295-
const struct bench bench_trig_uprobe_without_nop = {
296-
.name = "trig-uprobe-without-nop",
297-
.setup = uprobe_setup_without_nop,
298-
.producer_thread = uprobe_producer_without_nop,
339+
const struct bench bench_trig_uprobe_ret = {
340+
.name = "trig-uprobe-ret",
341+
.setup = uprobe_setup_ret,
342+
.producer_thread = uprobe_producer_ret,
299343
.measure = trigger_measure,
300344
.report_progress = hits_drops_report_progress,
301345
.report_final = hits_drops_report_final,
302346
};
303347

304-
const struct bench bench_trig_uretprobe_without_nop = {
305-
.name = "trig-uretprobe-without-nop",
306-
.setup = uretprobe_setup_without_nop,
307-
.producer_thread = uprobe_producer_without_nop,
348+
const struct bench bench_trig_uretprobe_ret = {
349+
.name = "trig-uretprobe-ret",
350+
.setup = uretprobe_setup_ret,
351+
.producer_thread = uprobe_producer_ret,
308352
.measure = trigger_measure,
309353
.report_progress = hits_drops_report_progress,
310354
.report_final = hits_drops_report_final,
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
set -eufo pipefail
4+
5+
for i in base {uprobe,uretprobe}-{nop,push,ret}
6+
do
7+
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
8+
printf "%-15s: %s\n" $i "$summary"
9+
done

0 commit comments

Comments
 (0)