Skip to content

Commit b56c68f

Browse files
mrutland-armrostedt
authored andcommitted
ftrace: Add sample with custom ops
When reworking core ftrace code or architectural ftrace code, it's often necessary to test/analyse/benchmark a number of ftrace_ops configurations. This patch adds a module which can be used to explore some of those configurations. I'm using this to benchmark various options for changing the way trampolines and handling of ftrace_ops work on arm64, and ensuring other architectures aren't adversely affected. For example, in a QEMU+KVM VM running on a 2GHz Xeon E5-2660 workstation, loading the module in various configurations produces: | # insmod ftrace-ops.ko | ftrace_ops: registering: | relevant ops: 1 | tracee: tracee_relevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | irrelevant ops: 0 | tracee: tracee_irrelevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | saving registers: NO | assist recursion: NO | assist RCU: NO | ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 1681558ns (16ns / call) | # insmod ftrace-ops.ko nr_ops_irrelevant=5 | ftrace_ops: registering: | relevant ops: 1 | tracee: tracee_relevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | irrelevant ops: 5 | tracee: tracee_irrelevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | saving registers: NO | assist recursion: NO | assist RCU: NO | ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 1693042ns (16ns / call) | # insmod ftrace-ops.ko nr_ops_relevant=2 | ftrace_ops: registering: | relevant ops: 2 | tracee: tracee_relevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | irrelevant ops: 0 | tracee: tracee_irrelevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | saving registers: NO | assist recursion: NO | assist RCU: NO | ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 11965582ns (119ns / call) | # insmod ftrace-ops.ko save_regs=true | ftrace_ops: registering: | relevant ops: 1 | tracee: tracee_relevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | irrelevant ops: 0 | tracee: tracee_irrelevant [ftrace_ops] | tracer: ops_func_nop [ftrace_ops] | saving registers: YES | assist recursion: NO | assist RCU: NO | ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 4459624ns (44ns / call) Link: https://lkml.kernel.org/r/20230103124912.2948963-4-mark.rutland@arm.com Cc: Florent Revest <revest@chromium.org> Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> Signed-off-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1 parent 7f09d63 commit b56c68f

4 files changed

Lines changed: 261 additions & 0 deletions

File tree

samples/Kconfig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ config SAMPLE_FTRACE_DIRECT_MULTI
4646
that hooks to wake_up_process and schedule, and prints
4747
the function addresses.
4848

49+
config SAMPLE_FTRACE_OPS
50+
tristate "Build custom ftrace ops example"
51+
depends on FUNCTION_TRACER
52+
help
53+
This builds an ftrace ops example that hooks two functions and
54+
measures the time taken to invoke one function a number of times.
55+
4956
config SAMPLE_TRACE_ARRAY
5057
tristate "Build sample module for kernel access to Ftrace instancess"
5158
depends on EVENT_TRACING && m

samples/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ obj-$(CONFIG_SAMPLE_TRACE_CUSTOM_EVENTS) += trace_events/
2424
obj-$(CONFIG_SAMPLE_TRACE_PRINTK) += trace_printk/
2525
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace/
2626
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace/
27+
obj-$(CONFIG_SAMPLE_FTRACE_OPS) += ftrace/
2728
obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += ftrace/
2829
subdir-$(CONFIG_SAMPLE_UHID) += uhid
2930
obj-$(CONFIG_VIDEO_PCI_SKELETON) += v4l/

samples/ftrace/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-too.o
55
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-modify.o
66
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi.o
77
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi-modify.o
8+
obj-$(CONFIG_SAMPLE_FTRACE_OPS) += ftrace-ops.o
89

910
CFLAGS_sample-trace-array.o := -I$(src)
1011
obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += sample-trace-array.o

samples/ftrace/ftrace-ops.c

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
3+
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
4+
5+
#include <linux/ftrace.h>
6+
#include <linux/ktime.h>
7+
#include <linux/module.h>
8+
9+
#include <asm/barrier.h>
10+
11+
/*
12+
* Arbitrary large value chosen to be sufficiently large to minimize noise but
13+
* sufficiently small to complete quickly.
14+
*/
15+
unsigned int nr_function_calls = 100000;
16+
module_param(nr_function_calls, uint, 0);
17+
MODULE_PARM_DESC(nr_function_calls, "How many times to call the relevant tracee");
18+
19+
/*
20+
* The number of ops associated with a call site affects whether a tracer can
21+
* be called directly or whether it's necessary to go via the list func, which
22+
* can be significantly more expensive.
23+
*/
24+
unsigned int nr_ops_relevant = 1;
25+
module_param(nr_ops_relevant, uint, 0);
26+
MODULE_PARM_DESC(nr_ops_relevant, "How many ftrace_ops to associate with the relevant tracee");
27+
28+
/*
29+
* On architectures where all call sites share the same trampoline, having
30+
* tracers enabled for distinct functions can force the use of the list func
31+
* and incur overhead for all call sites.
32+
*/
33+
unsigned int nr_ops_irrelevant = 0;
34+
module_param(nr_ops_irrelevant, uint, 0);
35+
MODULE_PARM_DESC(nr_ops_irrelevant, "How many ftrace_ops to associate with the irrelevant tracee");
36+
37+
/*
38+
* On architectures with DYNAMIC_FTRACE_WITH_REGS, saving the full pt_regs can
39+
* be more expensive than only saving the minimal necessary regs.
40+
*/
41+
bool save_regs = false;
42+
module_param(save_regs, bool, 0);
43+
MODULE_PARM_DESC(save_regs, "Register ops with FTRACE_OPS_FL_SAVE_REGS (save all registers in the trampoline)");
44+
45+
bool assist_recursion = false;
46+
module_param(assist_recursion, bool, 0);
47+
MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RECURSION");
48+
49+
bool assist_rcu = false;
50+
module_param(assist_rcu, bool, 0);
51+
MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RCU");
52+
53+
/*
54+
* By default, a trivial tracer is used which immediately returns to mimimize
55+
* overhead. Sometimes a consistency check using a more expensive tracer is
56+
* desireable.
57+
*/
58+
bool check_count = false;
59+
module_param(check_count, bool, 0);
60+
MODULE_PARM_DESC(check_count, "Check that tracers are called the expected number of times\n");
61+
62+
/*
63+
* Usually it's not interesting to leave the ops registered after the test
64+
* runs, but sometimes it can be useful to leave them registered so that they
65+
* can be inspected through the tracefs 'enabled_functions' file.
66+
*/
67+
bool persist = false;
68+
module_param(persist, bool, 0);
69+
MODULE_PARM_DESC(persist, "Successfully load module and leave ftrace ops registered after test completes\n");
70+
71+
/*
72+
* Marked as noinline to ensure that an out-of-line traceable copy is
73+
* generated by the compiler.
74+
*
75+
* The barrier() ensures the compiler won't elide calls by determining there
76+
* are no side-effects.
77+
*/
78+
static noinline void tracee_relevant(void)
79+
{
80+
barrier();
81+
}
82+
83+
/*
84+
* Marked as noinline to ensure that an out-of-line traceable copy is
85+
* generated by the compiler.
86+
*
87+
* The barrier() ensures the compiler won't elide calls by determining there
88+
* are no side-effects.
89+
*/
90+
static noinline void tracee_irrelevant(void)
91+
{
92+
barrier();
93+
}
94+
95+
struct sample_ops {
96+
struct ftrace_ops ops;
97+
unsigned int count;
98+
};
99+
100+
static void ops_func_nop(unsigned long ip, unsigned long parent_ip,
101+
struct ftrace_ops *op,
102+
struct ftrace_regs *fregs)
103+
{
104+
/* do nothing */
105+
}
106+
107+
static void ops_func_count(unsigned long ip, unsigned long parent_ip,
108+
struct ftrace_ops *op,
109+
struct ftrace_regs *fregs)
110+
{
111+
struct sample_ops *self;
112+
113+
self = container_of(op, struct sample_ops, ops);
114+
self->count++;
115+
}
116+
117+
struct sample_ops *ops_relevant;
118+
struct sample_ops *ops_irrelevant;
119+
120+
static struct sample_ops *ops_alloc_init(void *tracee, ftrace_func_t func,
121+
unsigned long flags, int nr)
122+
{
123+
struct sample_ops *ops;
124+
125+
ops = kcalloc(nr, sizeof(*ops), GFP_KERNEL);
126+
if (WARN_ON_ONCE(!ops))
127+
return NULL;
128+
129+
for (unsigned int i = 0; i < nr; i++) {
130+
ops[i].ops.func = func;
131+
ops[i].ops.flags = flags;
132+
WARN_ON_ONCE(ftrace_set_filter_ip(&ops[i].ops, (unsigned long)tracee, 0, 0));
133+
WARN_ON_ONCE(register_ftrace_function(&ops[i].ops));
134+
}
135+
136+
return ops;
137+
}
138+
139+
static void ops_destroy(struct sample_ops *ops, int nr)
140+
{
141+
if (!ops)
142+
return;
143+
144+
for (unsigned int i = 0; i < nr; i++) {
145+
WARN_ON_ONCE(unregister_ftrace_function(&ops[i].ops));
146+
ftrace_free_filter(&ops[i].ops);
147+
}
148+
149+
kfree(ops);
150+
}
151+
152+
static void ops_check(struct sample_ops *ops, int nr,
153+
unsigned int expected_count)
154+
{
155+
if (!ops || !check_count)
156+
return;
157+
158+
for (unsigned int i = 0; i < nr; i++) {
159+
if (ops->count == expected_count)
160+
continue;
161+
pr_warn("Counter called %u times (expected %u)\n",
162+
ops->count, expected_count);
163+
}
164+
}
165+
166+
ftrace_func_t tracer_relevant = ops_func_nop;
167+
ftrace_func_t tracer_irrelevant = ops_func_nop;
168+
169+
static int __init ftrace_ops_sample_init(void)
170+
{
171+
unsigned long flags = 0;
172+
ktime_t start, end;
173+
u64 period;
174+
175+
if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && save_regs) {
176+
pr_info("this kernel does not support saving registers\n");
177+
save_regs = false;
178+
} else if (save_regs) {
179+
flags |= FTRACE_OPS_FL_SAVE_REGS;
180+
}
181+
182+
if (assist_recursion)
183+
flags |= FTRACE_OPS_FL_RECURSION;
184+
185+
if (assist_rcu)
186+
flags |= FTRACE_OPS_FL_RCU;
187+
188+
if (check_count) {
189+
tracer_relevant = ops_func_count;
190+
tracer_irrelevant = ops_func_count;
191+
}
192+
193+
pr_info("registering:\n"
194+
" relevant ops: %u\n"
195+
" tracee: %ps\n"
196+
" tracer: %ps\n"
197+
" irrelevant ops: %u\n"
198+
" tracee: %ps\n"
199+
" tracer: %ps\n"
200+
" saving registers: %s\n"
201+
" assist recursion: %s\n"
202+
" assist RCU: %s\n",
203+
nr_ops_relevant, tracee_relevant, tracer_relevant,
204+
nr_ops_irrelevant, tracee_irrelevant, tracer_irrelevant,
205+
save_regs ? "YES" : "NO",
206+
assist_recursion ? "YES" : "NO",
207+
assist_rcu ? "YES" : "NO");
208+
209+
ops_relevant = ops_alloc_init(tracee_relevant, tracer_relevant,
210+
flags, nr_ops_relevant);
211+
ops_irrelevant = ops_alloc_init(tracee_irrelevant, tracer_irrelevant,
212+
flags, nr_ops_irrelevant);
213+
214+
start = ktime_get();
215+
for (unsigned int i = 0; i < nr_function_calls; i++)
216+
tracee_relevant();
217+
end = ktime_get();
218+
219+
ops_check(ops_relevant, nr_ops_relevant, nr_function_calls);
220+
ops_check(ops_irrelevant, nr_ops_irrelevant, 0);
221+
222+
period = ktime_to_ns(ktime_sub(end, start));
223+
224+
pr_info("Attempted %u calls to %ps in %lluns (%lluns / call)\n",
225+
nr_function_calls, tracee_relevant,
226+
period, period / nr_function_calls);
227+
228+
if (persist)
229+
return 0;
230+
231+
ops_destroy(ops_relevant, nr_ops_relevant);
232+
ops_destroy(ops_irrelevant, nr_ops_irrelevant);
233+
234+
/*
235+
* The benchmark completed sucessfully, but there's no reason to keep
236+
* the module around. Return an error do the user doesn't have to
237+
* manually unload the module.
238+
*/
239+
return -EINVAL;
240+
}
241+
module_init(ftrace_ops_sample_init);
242+
243+
static void __exit ftrace_ops_sample_exit(void)
244+
{
245+
ops_destroy(ops_relevant, nr_ops_relevant);
246+
ops_destroy(ops_irrelevant, nr_ops_irrelevant);
247+
}
248+
module_exit(ftrace_ops_sample_exit);
249+
250+
MODULE_AUTHOR("Mark Rutland");
251+
MODULE_DESCRIPTION("Example of using custom ftrace_ops");
252+
MODULE_LICENSE("GPL");

0 commit comments

Comments
 (0)