Skip to content

Commit bf5a8c2

Browse files
Leonardo BrasPeter Zijlstra
authored andcommitted
trace,smp: Add tracepoints for scheduling remotelly called functions
Add a tracepoint for when a CSD is queued to a remote CPU's call_single_queue. This allows finding exactly which CPU queued a given CSD when looking at a csd_function_{entry,exit} event, and also enables us to accurately measure IPI delivery time with e.g. a synthetic event: $ echo 'hist:keys=cpu,csd.hex:ts=common_timestamp.usecs' >\ /sys/kernel/tracing/events/smp/csd_queue_cpu/trigger $ echo 'csd_latency unsigned int dst_cpu; unsigned long csd; u64 time' >\ /sys/kernel/tracing/synthetic_events $ echo \ 'hist:keys=common_cpu,csd.hex:'\ 'time=common_timestamp.usecs-$ts:'\ 'onmatch(smp.csd_queue_cpu).trace(csd_latency,common_cpu,csd,$time)' >\ /sys/kernel/tracing/events/smp/csd_function_entry/trigger $ trace-cmd record -e 'synthetic:csd_latency' hackbench $ trace-cmd report <...>-467 [001] 21.824263: csd_queue_cpu: cpu=0 callsite=try_to_wake_up+0x2ea func=sched_ttwu_pending csd=0xffff8880076148b8 <...>-467 [001] 21.824280: ipi_send_cpu: cpu=0 callsite=try_to_wake_up+0x2ea callback=generic_smp_call_function_single_interrupt+0x0 <...>-489 [000] 21.824299: csd_function_entry: func=sched_ttwu_pending csd=0xffff8880076148b8 <...>-489 [000] 21.824320: csd_latency: dst_cpu=0, csd=18446612682193848504, time=36 Suggested-by: Valentin Schneider <vschneid@redhat.com> Signed-off-by: Leonardo Bras <leobras@redhat.com> Tested-and-reviewed-by: Valentin Schneider <vschneid@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20230615065944.188876-7-leobras@redhat.com
1 parent 949fa3f commit bf5a8c2

2 files changed

Lines changed: 32 additions & 11 deletions

File tree

include/trace/events/csd.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,33 @@
77

88
#include <linux/tracepoint.h>
99

10+
TRACE_EVENT(csd_queue_cpu,
11+
12+
TP_PROTO(const unsigned int cpu,
13+
unsigned long callsite,
14+
smp_call_func_t func,
15+
struct __call_single_data *csd),
16+
17+
TP_ARGS(cpu, callsite, func, csd),
18+
19+
TP_STRUCT__entry(
20+
__field(unsigned int, cpu)
21+
__field(void *, callsite)
22+
__field(void *, func)
23+
__field(void *, csd)
24+
),
25+
26+
TP_fast_assign(
27+
__entry->cpu = cpu;
28+
__entry->callsite = (void *)callsite;
29+
__entry->func = func;
30+
__entry->csd = csd;
31+
),
32+
33+
TP_printk("cpu=%u callsite=%pS func=%ps csd=%p",
34+
__entry->cpu, __entry->callsite, __entry->func, __entry->csd)
35+
);
36+
1037
/*
1138
* Tracepoints for a function which is called as an effect of smp_call_function.*
1239
*/

kernel/smp.c

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -340,15 +340,15 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
340340
* even if we haven't sent the smp_call IPI yet (e.g. the stopper
341341
* executes migration_cpu_stop() on the remote CPU).
342342
*/
343-
if (trace_ipi_send_cpu_enabled()) {
343+
if (trace_csd_queue_cpu_enabled()) {
344344
call_single_data_t *csd;
345345
smp_call_func_t func;
346346

347347
csd = container_of(node, call_single_data_t, node.llist);
348348
func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
349349
sched_ttwu_pending : csd->func;
350350

351-
trace_ipi_send_cpu(cpu, _RET_IP_, func);
351+
trace_csd_queue_cpu(cpu, _RET_IP_, func, csd);
352352
}
353353

354354
/*
@@ -741,7 +741,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
741741
int cpu, last_cpu, this_cpu = smp_processor_id();
742742
struct call_function_data *cfd;
743743
bool wait = scf_flags & SCF_WAIT;
744-
int nr_cpus = 0, nr_queued = 0;
744+
int nr_cpus = 0;
745745
bool run_remote = false;
746746
bool run_local = false;
747747

@@ -799,21 +799,15 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
799799
csd->node.src = smp_processor_id();
800800
csd->node.dst = cpu;
801801
#endif
802+
trace_csd_queue_cpu(cpu, _RET_IP_, func, csd);
803+
802804
if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
803805
__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
804806
nr_cpus++;
805807
last_cpu = cpu;
806808
}
807-
nr_queued++;
808809
}
809810

810-
/*
811-
* Trace each smp_function_call_*() as an IPI, actual IPIs
812-
* will be traced with func==generic_smp_call_function_single_ipi().
813-
*/
814-
if (nr_queued)
815-
trace_ipi_send_cpumask(cfd->cpumask, _RET_IP_, func);
816-
817811
/*
818812
* Choose the most efficient way to send an IPI. Note that the
819813
* number of CPUs might be zero due to concurrent changes to the

0 commit comments

Comments
 (0)