Skip to content

Commit 231264d

Browse files
compudjrostedt
authored andcommitted
tracepoint: Fix static call function vs data state mismatch
On a 1->0->1 callbacks transition, there is an issue with the new callback using the old callback's data. Considering __DO_TRACE_CALL: do { \ struct tracepoint_func *it_func_ptr; \ void *__data; \ it_func_ptr = \ rcu_dereference_raw((&__tracepoint_##name)->funcs); \ if (it_func_ptr) { \ __data = (it_func_ptr)->data; \ ----> [ delayed here on one CPU (e.g. vcpu preempted by the host) ] static_call(tp_func_##name)(__data, args); \ } \ } while (0) It has loaded the tp->funcs of the old callback, so it will try to use the old data. This can be fixed by adding a RCU sync anywhere in the 1->0->1 transition chain. On a N->2->1 transition, we need an rcu-sync because you may have a sequence of 3->2->1 (or 1->2->1) where the element 0 data is unchanged between 2->1, but was changed from 3->2 (or from 1->2), which may be observed by the static call. This can be fixed by adding an unconditional RCU sync in transition 2->1. Note, this fixes a correctness issue at the cost of adding a tremendous performance regression to the disabling of tracepoints. Before this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m0.778s user 0m0.000s sys 0m0.061s After this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m10.593s user 0m0.017s sys 0m0.259s A follow up fix will introduce a more lightweight scheme based on RCU get_state and cond_sync, that will return the performance back to what it was. As both this change and the lightweight versions are complex on their own, for bisecting any issues that this may cause, they are kept as two separate changes. Link: https://lkml.kernel.org/r/20210805132717.23813-3-mathieu.desnoyers@efficios.com Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/ Cc: stable@vger.kernel.org Cc: Ingo Molnar <mingo@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: "Paul E. McKenney" <paulmck@kernel.org> Cc: Stefan Metzmacher <metze@samba.org> Fixes: d25e37d ("tracepoint: Optimize using static_call()") Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
1 parent f7ec412 commit 231264d

1 file changed

Lines changed: 82 additions & 20 deletions

File tree

kernel/tracepoint.c

Lines changed: 82 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@
1515
#include <linux/sched/task.h>
1616
#include <linux/static_key.h>
1717

18+
enum tp_func_state {
19+
TP_FUNC_0,
20+
TP_FUNC_1,
21+
TP_FUNC_2,
22+
TP_FUNC_N,
23+
};
24+
1825
extern tracepoint_ptr_t __start___tracepoints_ptrs[];
1926
extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
2027

@@ -246,26 +253,29 @@ static void *func_remove(struct tracepoint_func **funcs,
246253
return old;
247254
}
248255

249-
static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs, bool sync)
256+
/*
257+
* Count the number of functions (enum tp_func_state) in a tp_funcs array.
258+
*/
259+
static enum tp_func_state nr_func_state(const struct tracepoint_func *tp_funcs)
260+
{
261+
if (!tp_funcs)
262+
return TP_FUNC_0;
263+
if (!tp_funcs[1].func)
264+
return TP_FUNC_1;
265+
if (!tp_funcs[2].func)
266+
return TP_FUNC_2;
267+
return TP_FUNC_N; /* 3 or more */
268+
}
269+
270+
static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs)
250271
{
251272
void *func = tp->iterator;
252273

253274
/* Synthetic events do not have static call sites */
254275
if (!tp->static_call_key)
255276
return;
256-
257-
if (!tp_funcs[1].func) {
277+
if (nr_func_state(tp_funcs) == TP_FUNC_1)
258278
func = tp_funcs[0].func;
259-
/*
260-
* If going from the iterator back to a single caller,
261-
* we need to synchronize with __DO_TRACE to make sure
262-
* that the data passed to the callback is the one that
263-
* belongs to that callback.
264-
*/
265-
if (sync)
266-
tracepoint_synchronize_unregister();
267-
}
268-
269279
__static_call_update(tp->static_call_key, tp->static_call_tramp, func);
270280
}
271281

@@ -299,9 +309,31 @@ static int tracepoint_add_func(struct tracepoint *tp,
299309
* a pointer to it. This array is referenced by __DO_TRACE from
300310
* include/linux/tracepoint.h using rcu_dereference_sched().
301311
*/
302-
tracepoint_update_call(tp, tp_funcs, false);
303-
rcu_assign_pointer(tp->funcs, tp_funcs);
304-
static_key_enable(&tp->key);
312+
switch (nr_func_state(tp_funcs)) {
313+
case TP_FUNC_1: /* 0->1 */
314+
/* Set static call to first function */
315+
tracepoint_update_call(tp, tp_funcs);
316+
/* Both iterator and static call handle NULL tp->funcs */
317+
rcu_assign_pointer(tp->funcs, tp_funcs);
318+
static_key_enable(&tp->key);
319+
break;
320+
case TP_FUNC_2: /* 1->2 */
321+
/* Set iterator static call */
322+
tracepoint_update_call(tp, tp_funcs);
323+
/*
324+
* Iterator callback installed before updating tp->funcs.
325+
* Requires ordering between RCU assign/dereference and
326+
* static call update/call.
327+
*/
328+
rcu_assign_pointer(tp->funcs, tp_funcs);
329+
break;
330+
case TP_FUNC_N: /* N->N+1 (N>1) */
331+
rcu_assign_pointer(tp->funcs, tp_funcs);
332+
break;
333+
default:
334+
WARN_ON_ONCE(1);
335+
break;
336+
}
305337

306338
release_probes(old);
307339
return 0;
@@ -328,17 +360,47 @@ static int tracepoint_remove_func(struct tracepoint *tp,
328360
/* Failed allocating new tp_funcs, replaced func with stub */
329361
return 0;
330362

331-
if (!tp_funcs) {
363+
switch (nr_func_state(tp_funcs)) {
364+
case TP_FUNC_0: /* 1->0 */
332365
/* Removed last function */
333366
if (tp->unregfunc && static_key_enabled(&tp->key))
334367
tp->unregfunc();
335368

336369
static_key_disable(&tp->key);
370+
/* Set iterator static call */
371+
tracepoint_update_call(tp, tp_funcs);
372+
/* Both iterator and static call handle NULL tp->funcs */
373+
rcu_assign_pointer(tp->funcs, NULL);
374+
/*
375+
* Make sure new func never uses old data after a 1->0->1
376+
* transition sequence.
377+
* Considering that transition 0->1 is the common case
378+
* and don't have rcu-sync, issue rcu-sync after
379+
* transition 1->0 to break that sequence by waiting for
380+
* readers to be quiescent.
381+
*/
382+
tracepoint_synchronize_unregister();
383+
break;
384+
case TP_FUNC_1: /* 2->1 */
337385
rcu_assign_pointer(tp->funcs, tp_funcs);
338-
} else {
386+
/*
387+
* On 2->1 transition, RCU sync is needed before setting
388+
* static call to first callback, because the observer
389+
* may have loaded any prior tp->funcs after the last one
390+
* associated with an rcu-sync.
391+
*/
392+
tracepoint_synchronize_unregister();
393+
/* Set static call to first function */
394+
tracepoint_update_call(tp, tp_funcs);
395+
break;
396+
case TP_FUNC_2: /* N->N-1 (N>2) */
397+
fallthrough;
398+
case TP_FUNC_N:
339399
rcu_assign_pointer(tp->funcs, tp_funcs);
340-
tracepoint_update_call(tp, tp_funcs,
341-
tp_funcs[0].data != old[0].data);
400+
break;
401+
default:
402+
WARN_ON_ONCE(1);
403+
break;
342404
}
343405
release_probes(old);
344406
return 0;

0 commit comments

Comments
 (0)