Skip to content

Commit 953c2f0

Browse files
committed
tracing: Add sample code for custom trace events
Add sample code to show how to create custom trace events in the tracefs directory that can be enabled and modified like any event in tracefs (including triggers, histograms, synthetic events and event probes). The example is creating a custom sched_switch and a sched_waking to limit what is recorded: If the custom sched switch only records the prev_prio, next_prio and next_pid, it can bring the size from 64 bytes per event, down to just 16 bytes! If sched_waking only records the prio and pid of the woken event, it will bring the size down from 36 bytes to 12 bytes per event. This will allow for a much smaller footprint into the ring buffer and keep more events from dropping. Link: https://lkml.kernel.org/r/20220303220625.369226746@goodmis.org Cc: Ingo Molnar <mingo@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Tom Zanussi <zanussi@kernel.org> Suggested-by: Joel Fernandes <joel@joelfernandes.org> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1 parent 8bcd066 commit 953c2f0

4 files changed

Lines changed: 281 additions & 1 deletion

File tree

samples/Kconfig

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,13 @@ config SAMPLE_TRACE_EVENTS
1414
tristate "Build trace_events examples -- loadable modules only"
1515
depends on EVENT_TRACING && m
1616
help
17-
This build trace event example modules.
17+
This builds the trace event example module.
18+
19+
config SAMPLE_TRACE_CUSTOM_EVENTS
20+
tristate "Build custom trace event example -- loadable modules only"
21+
depends on EVENT_TRACING && m
22+
help
23+
This builds the custom trace event example module.
1824

1925
config SAMPLE_TRACE_PRINTK
2026
tristate "Build trace_printk module - tests various trace_printk formats"

samples/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ obj-$(CONFIG_SAMPLE_RPMSG_CLIENT) += rpmsg/
2020
subdir-$(CONFIG_SAMPLE_SECCOMP) += seccomp
2121
subdir-$(CONFIG_SAMPLE_TIMER) += timers
2222
obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace_events/
23+
obj-$(CONFIG_SAMPLE_TRACE_CUSTOM_EVENTS) += trace_events/
2324
obj-$(CONFIG_SAMPLE_TRACE_PRINTK) += trace_printk/
2425
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace/
2526
obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace/

samples/trace_events/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,5 @@
1313
CFLAGS_trace-events-sample.o := -I$(src)
1414

1515
obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
16+
17+
obj-$(CONFIG_SAMPLE_TRACE_CUSTOM_EVENTS) += trace_custom_sched.o
Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* event tracer
4+
*
5+
* Copyright (C) 2022 Google Inc, Steven Rostedt <rostedt@goodmis.org>
6+
*/
7+
8+
#define pr_fmt(fmt) fmt
9+
10+
#include <linux/trace_events.h>
11+
#include <linux/version.h>
12+
#include <linux/module.h>
13+
#include <linux/sched.h>
14+
#include <trace/events/sched.h>
15+
16+
#define THIS_SYSTEM "custom_sched"
17+
18+
#define SCHED_PRINT_FMT \
19+
C("prev_prio=%d next_pid=%d next_prio=%d", REC->prev_prio, REC->next_pid, \
20+
REC->next_prio)
21+
22+
#define SCHED_WAKING_FMT \
23+
C("pid=%d prio=%d", REC->pid, REC->prio)
24+
25+
#undef C
26+
#define C(a, b...) a, b
27+
28+
static struct trace_event_fields sched_switch_fields[] = {
29+
{
30+
.type = "unsigned short",
31+
.name = "prev_prio",
32+
.size = sizeof(short),
33+
.align = __alignof__(short),
34+
.is_signed = 0,
35+
.filter_type = FILTER_OTHER,
36+
},
37+
{
38+
.type = "unsigned short",
39+
.name = "next_prio",
40+
.size = sizeof(short),
41+
.align = __alignof__(short),
42+
.is_signed = 0,
43+
.filter_type = FILTER_OTHER,
44+
},
45+
{
46+
.type = "unsigned int",
47+
.name = "next_prio",
48+
.size = sizeof(int),
49+
.align = __alignof__(int),
50+
.is_signed = 0,
51+
.filter_type = FILTER_OTHER,
52+
},
53+
{}
54+
};
55+
56+
struct sched_event {
57+
struct trace_entry ent;
58+
unsigned short prev_prio;
59+
unsigned short next_prio;
60+
unsigned int next_pid;
61+
};
62+
63+
static struct trace_event_fields sched_waking_fields[] = {
64+
{
65+
.type = "unsigned int",
66+
.name = "pid",
67+
.size = sizeof(int),
68+
.align = __alignof__(int),
69+
.is_signed = 0,
70+
.filter_type = FILTER_OTHER,
71+
},
72+
{
73+
.type = "unsigned short",
74+
.name = "prio",
75+
.size = sizeof(short),
76+
.align = __alignof__(short),
77+
.is_signed = 0,
78+
.filter_type = FILTER_OTHER,
79+
},
80+
{}
81+
};
82+
83+
struct wake_event {
84+
struct trace_entry ent;
85+
unsigned int pid;
86+
unsigned short prio;
87+
};
88+
89+
static void sched_switch_probe(void *data, bool preempt, struct task_struct *prev,
90+
struct task_struct *next)
91+
{
92+
struct trace_event_file *trace_file = data;
93+
struct trace_event_buffer fbuffer;
94+
struct sched_event *entry;
95+
96+
if (trace_trigger_soft_disabled(trace_file))
97+
return;
98+
99+
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
100+
sizeof(*entry));
101+
102+
if (!entry)
103+
return;
104+
105+
entry->prev_prio = prev->prio;
106+
entry->next_prio = next->prio;
107+
entry->next_pid = next->pid;
108+
109+
trace_event_buffer_commit(&fbuffer);
110+
}
111+
112+
static struct trace_event_class sched_switch_class = {
113+
.system = THIS_SYSTEM,
114+
.reg = trace_event_reg,
115+
.fields_array = sched_switch_fields,
116+
.fields = LIST_HEAD_INIT(sched_switch_class.fields),
117+
.probe = sched_switch_probe,
118+
};
119+
120+
static void sched_waking_probe(void *data, struct task_struct *t)
121+
{
122+
struct trace_event_file *trace_file = data;
123+
struct trace_event_buffer fbuffer;
124+
struct wake_event *entry;
125+
126+
if (trace_trigger_soft_disabled(trace_file))
127+
return;
128+
129+
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
130+
sizeof(*entry));
131+
132+
if (!entry)
133+
return;
134+
135+
entry->prio = t->prio;
136+
entry->pid = t->pid;
137+
138+
trace_event_buffer_commit(&fbuffer);
139+
}
140+
141+
static struct trace_event_class sched_waking_class = {
142+
.system = THIS_SYSTEM,
143+
.reg = trace_event_reg,
144+
.fields_array = sched_waking_fields,
145+
.fields = LIST_HEAD_INIT(sched_waking_class.fields),
146+
.probe = sched_waking_probe,
147+
};
148+
149+
static enum print_line_t sched_switch_output(struct trace_iterator *iter, int flags,
150+
struct trace_event *trace_event)
151+
{
152+
struct trace_seq *s = &iter->seq;
153+
struct sched_event *REC = (struct sched_event *)iter->ent;
154+
int ret;
155+
156+
ret = trace_raw_output_prep(iter, trace_event);
157+
if (ret != TRACE_TYPE_HANDLED)
158+
return ret;
159+
160+
trace_seq_printf(s, SCHED_PRINT_FMT);
161+
trace_seq_putc(s, '\n');
162+
163+
return trace_handle_return(s);
164+
}
165+
166+
static struct trace_event_functions sched_switch_funcs = {
167+
.trace = sched_switch_output,
168+
};
169+
170+
static enum print_line_t sched_waking_output(struct trace_iterator *iter, int flags,
171+
struct trace_event *trace_event)
172+
{
173+
struct trace_seq *s = &iter->seq;
174+
struct wake_event *REC = (struct wake_event *)iter->ent;
175+
int ret;
176+
177+
ret = trace_raw_output_prep(iter, trace_event);
178+
if (ret != TRACE_TYPE_HANDLED)
179+
return ret;
180+
181+
trace_seq_printf(s, SCHED_WAKING_FMT);
182+
trace_seq_putc(s, '\n');
183+
184+
return trace_handle_return(s);
185+
}
186+
187+
static struct trace_event_functions sched_waking_funcs = {
188+
.trace = sched_waking_output,
189+
};
190+
191+
#undef C
192+
#define C(a, b...) #a "," __stringify(b)
193+
194+
static struct trace_event_call sched_switch_call = {
195+
.class = &sched_switch_class,
196+
.event = {
197+
.funcs = &sched_switch_funcs,
198+
},
199+
.print_fmt = SCHED_PRINT_FMT,
200+
.module = THIS_MODULE,
201+
.flags = TRACE_EVENT_FL_TRACEPOINT,
202+
};
203+
204+
static struct trace_event_call sched_waking_call = {
205+
.class = &sched_waking_class,
206+
.event = {
207+
.funcs = &sched_waking_funcs,
208+
},
209+
.print_fmt = SCHED_WAKING_FMT,
210+
.module = THIS_MODULE,
211+
.flags = TRACE_EVENT_FL_TRACEPOINT,
212+
};
213+
214+
static void fct(struct tracepoint *tp, void *priv)
215+
{
216+
if (tp->name && strcmp(tp->name, "sched_switch") == 0)
217+
sched_switch_call.tp = tp;
218+
else if (tp->name && strcmp(tp->name, "sched_waking") == 0)
219+
sched_waking_call.tp = tp;
220+
}
221+
222+
static int add_event(struct trace_event_call *call)
223+
{
224+
int ret;
225+
226+
ret = register_trace_event(&call->event);
227+
if (WARN_ON(!ret))
228+
return -ENODEV;
229+
230+
ret = trace_add_event_call(call);
231+
if (WARN_ON(ret))
232+
unregister_trace_event(&call->event);
233+
234+
return ret;
235+
}
236+
237+
static int __init trace_sched_init(void)
238+
{
239+
int ret;
240+
241+
check_trace_callback_type_sched_switch(sched_switch_probe);
242+
check_trace_callback_type_sched_waking(sched_waking_probe);
243+
244+
for_each_kernel_tracepoint(fct, NULL);
245+
246+
ret = add_event(&sched_switch_call);
247+
if (ret)
248+
return ret;
249+
250+
ret = add_event(&sched_waking_call);
251+
if (ret)
252+
trace_remove_event_call(&sched_switch_call);
253+
254+
return ret;
255+
}
256+
257+
static void __exit trace_sched_exit(void)
258+
{
259+
trace_set_clr_event(THIS_SYSTEM, "sched_switch", 0);
260+
trace_set_clr_event(THIS_SYSTEM, "sched_waking", 0);
261+
262+
trace_remove_event_call(&sched_switch_call);
263+
trace_remove_event_call(&sched_waking_call);
264+
}
265+
266+
module_init(trace_sched_init);
267+
module_exit(trace_sched_exit);
268+
269+
MODULE_AUTHOR("Steven Rostedt");
270+
MODULE_DESCRIPTION("Custom scheduling events");
271+
MODULE_LICENSE("GPL");

0 commit comments

Comments
 (0)