Skip to content

Commit a46023d

Browse files
committed
tracing: Guard __DECLARE_TRACE() use of __DO_TRACE_CALL() with SRCU-fast
The current use of guard(preempt_notrace)() within __DECLARE_TRACE() to protect invocation of __DO_TRACE_CALL() means that BPF programs attached to tracepoints are non-preemptible. This is unhelpful in real-time systems, whose users apparently wish to use BPF while also achieving low latencies. (Who knew?) One option would be to use preemptible RCU, but this introduces many opportunities for infinite recursion, which many consider to be counterproductive, especially given the relatively small stacks provided by the Linux kernel. These opportunities could be shut down by sufficiently energetic duplication of code, but this sort of thing is considered impolite in some circles. Therefore, use the shiny new SRCU-fast API, which provides somewhat faster readers than those of preemptible RCU, at least on Paul E. McKenney's laptop, where task_struct access is more expensive than access to per-CPU variables. And SRCU-fast provides way faster readers than does SRCU, courtesy of being able to avoid the read-side use of smp_mb(). Also, it is quite straightforward to create srcu_read_{,un}lock_fast_notrace() functions. Link: https://lore.kernel.org/all/20250613152218.1924093-1-bigeasy@linutronix.de/ Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Alexei Starovoitov <ast@kernel.org> Link: https://patch.msgid.link/20260126231256.499701982@kernel.org Co-developed-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1 parent a77cb6a commit a46023d

3 files changed

Lines changed: 21 additions & 10 deletions

File tree

include/linux/tracepoint.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,15 @@ void for_each_tracepoint_in_module(struct module *mod,
108108
* An alternative is to use the following for batch reclaim associated
109109
* with a given tracepoint:
110110
*
111-
* - tracepoint_is_faultable() == false: call_rcu()
111+
* - tracepoint_is_faultable() == false: call_srcu()
112112
* - tracepoint_is_faultable() == true: call_rcu_tasks_trace()
113113
*/
114114
#ifdef CONFIG_TRACEPOINTS
115+
extern struct srcu_struct tracepoint_srcu;
115116
static inline void tracepoint_synchronize_unregister(void)
116117
{
117118
synchronize_rcu_tasks_trace();
118-
synchronize_rcu();
119+
synchronize_srcu(&tracepoint_srcu);
119120
}
120121
static inline bool tracepoint_is_faultable(struct tracepoint *tp)
121122
{
@@ -275,13 +276,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
275276
return static_branch_unlikely(&__tracepoint_##name.key);\
276277
}
277278

278-
#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \
279+
#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \
279280
__DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \
280281
static inline void __do_trace_##name(proto) \
281282
{ \
282283
TRACEPOINT_CHECK(name) \
283284
if (cond) { \
284-
guard(preempt_notrace)(); \
285+
guard(srcu_fast_notrace)(&tracepoint_srcu); \
285286
__DO_TRACE_CALL(name, TP_ARGS(args)); \
286287
} \
287288
} \

include/trace/trace_events.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ __DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \
436436
static notrace void \
437437
trace_event_raw_event_##call(void *__data, proto) \
438438
{ \
439+
guard(preempt_notrace)(); \
439440
do_trace_event_raw_event_##call(__data, args); \
440441
}
441442

@@ -447,9 +448,8 @@ static notrace void \
447448
trace_event_raw_event_##call(void *__data, proto) \
448449
{ \
449450
might_fault(); \
450-
preempt_disable_notrace(); \
451+
guard(preempt_notrace)(); \
451452
do_trace_event_raw_event_##call(__data, args); \
452-
preempt_enable_notrace(); \
453453
}
454454

455455
/*

kernel/tracepoint.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,13 @@ enum tp_transition_sync {
3434

3535
struct tp_transition_snapshot {
3636
unsigned long rcu;
37+
unsigned long srcu_gp;
3738
bool ongoing;
3839
};
3940

41+
DEFINE_SRCU_FAST(tracepoint_srcu);
42+
EXPORT_SYMBOL_GPL(tracepoint_srcu);
43+
4044
/* Protected by tracepoints_mutex */
4145
static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC];
4246

@@ -46,6 +50,7 @@ static void tp_rcu_get_state(enum tp_transition_sync sync)
4650

4751
/* Keep the latest get_state snapshot. */
4852
snapshot->rcu = get_state_synchronize_rcu();
53+
snapshot->srcu_gp = start_poll_synchronize_srcu(&tracepoint_srcu);
4954
snapshot->ongoing = true;
5055
}
5156

@@ -56,6 +61,8 @@ static void tp_rcu_cond_sync(enum tp_transition_sync sync)
5661
if (!snapshot->ongoing)
5762
return;
5863
cond_synchronize_rcu(snapshot->rcu);
64+
if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu_gp))
65+
synchronize_srcu(&tracepoint_srcu);
5966
snapshot->ongoing = false;
6067
}
6168

@@ -112,10 +119,13 @@ static inline void release_probes(struct tracepoint *tp, struct tracepoint_func
112119
struct tp_probes *tp_probes = container_of(old,
113120
struct tp_probes, probes[0]);
114121

115-
if (tracepoint_is_faultable(tp))
116-
call_rcu_tasks_trace(&tp_probes->rcu, rcu_free_old_probes);
117-
else
118-
call_rcu(&tp_probes->rcu, rcu_free_old_probes);
122+
if (tracepoint_is_faultable(tp)) {
123+
call_rcu_tasks_trace(&tp_probes->rcu,
124+
rcu_free_old_probes);
125+
} else {
126+
call_srcu(&tracepoint_srcu, &tp_probes->rcu,
127+
rcu_free_old_probes);
128+
}
119129
}
120130
}
121131

0 commit comments

Comments
 (0)