Skip to content

Commit 5c31249

Browse files
author
Peter Zijlstra
committed
trace,smp: Trace all smp_function_call*() invocations
(Ab)use the trace_ipi_send_cpu*() family to trace all smp_function_call*() invocations, not only those that result in an actual IPI. The queued entries log their callback function while the actual IPIs are traced on generic_smp_call_function_single_interrupt(). Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
1 parent 68e2d17 commit 5c31249

1 file changed

Lines changed: 36 additions & 30 deletions

File tree

kernel/smp.c

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -104,18 +104,20 @@ void __init call_function_init(void)
104104
}
105105

106106
static __always_inline void
107-
send_call_function_single_ipi(int cpu, smp_call_func_t func)
107+
send_call_function_single_ipi(int cpu)
108108
{
109109
if (call_function_single_prep_ipi(cpu)) {
110-
trace_ipi_send_cpu(cpu, _RET_IP_, func);
110+
trace_ipi_send_cpu(cpu, _RET_IP_,
111+
generic_smp_call_function_single_interrupt);
111112
arch_send_call_function_single_ipi(cpu);
112113
}
113114
}
114115

115116
static __always_inline void
116-
send_call_function_ipi_mask(struct cpumask *mask, smp_call_func_t func)
117+
send_call_function_ipi_mask(struct cpumask *mask)
117118
{
118-
trace_ipi_send_cpumask(mask, _RET_IP_, func);
119+
trace_ipi_send_cpumask(mask, _RET_IP_,
120+
generic_smp_call_function_single_interrupt);
119121
arch_send_call_function_ipi_mask(mask);
120122
}
121123

@@ -316,25 +318,6 @@ static __always_inline void csd_unlock(struct __call_single_data *csd)
316318
smp_store_release(&csd->node.u_flags, 0);
317319
}
318320

319-
static __always_inline void
320-
raw_smp_call_single_queue(int cpu, struct llist_node *node, smp_call_func_t func)
321-
{
322-
/*
323-
* The list addition should be visible to the target CPU when it pops
324-
* the head of the list to pull the entry off it in the IPI handler
325-
* because of normal cache coherency rules implied by the underlying
326-
* llist ops.
327-
*
328-
* If IPIs can go out of order to the cache coherency protocol
329-
* in an architecture, sufficient synchronisation should be added
330-
* to arch code to make it appear to obey cache coherency WRT
331-
* locking and barrier primitives. Generic code isn't really
332-
* equipped to do the right thing...
333-
*/
334-
if (llist_add(node, &per_cpu(call_single_queue, cpu)))
335-
send_call_function_single_ipi(cpu, func);
336-
}
337-
338321
static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
339322

340323
void __smp_call_single_queue(int cpu, struct llist_node *node)
@@ -354,10 +337,23 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
354337
func = CSD_TYPE(csd) == CSD_TYPE_TTWU ?
355338
sched_ttwu_pending : csd->func;
356339

357-
raw_smp_call_single_queue(cpu, node, func);
358-
} else {
359-
raw_smp_call_single_queue(cpu, node, NULL);
340+
trace_ipi_send_cpu(cpu, _RET_IP_, func);
360341
}
342+
343+
/*
344+
* The list addition should be visible to the target CPU when it pops
345+
* the head of the list to pull the entry off it in the IPI handler
346+
* because of normal cache coherency rules implied by the underlying
347+
* llist ops.
348+
*
349+
* If IPIs can go out of order to the cache coherency protocol
350+
* in an architecture, sufficient synchronisation should be added
351+
* to arch code to make it appear to obey cache coherency WRT
352+
* locking and barrier primitives. Generic code isn't really
353+
* equipped to do the right thing...
354+
*/
355+
if (llist_add(node, &per_cpu(call_single_queue, cpu)))
356+
send_call_function_single_ipi(cpu);
361357
}
362358

363359
/*
@@ -732,9 +728,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
732728
int cpu, last_cpu, this_cpu = smp_processor_id();
733729
struct call_function_data *cfd;
734730
bool wait = scf_flags & SCF_WAIT;
731+
int nr_cpus = 0, nr_queued = 0;
735732
bool run_remote = false;
736733
bool run_local = false;
737-
int nr_cpus = 0;
738734

739735
lockdep_assert_preemption_disabled();
740736

@@ -776,8 +772,10 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
776772
for_each_cpu(cpu, cfd->cpumask) {
777773
call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
778774

779-
if (cond_func && !cond_func(cpu, info))
775+
if (cond_func && !cond_func(cpu, info)) {
776+
__cpumask_clear_cpu(cpu, cfd->cpumask);
780777
continue;
778+
}
781779

782780
csd_lock(csd);
783781
if (wait)
@@ -793,17 +791,25 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
793791
nr_cpus++;
794792
last_cpu = cpu;
795793
}
794+
nr_queued++;
796795
}
797796

797+
/*
798+
* Trace each smp_function_call_*() as an IPI, actual IPIs
799+
* will be traced with func==generic_smp_call_function_single_ipi().
800+
*/
801+
if (nr_queued)
802+
trace_ipi_send_cpumask(cfd->cpumask, _RET_IP_, func);
803+
798804
/*
799805
* Choose the most efficient way to send an IPI. Note that the
800806
* number of CPUs might be zero due to concurrent changes to the
801807
* provided mask.
802808
*/
803809
if (nr_cpus == 1)
804-
send_call_function_single_ipi(last_cpu, func);
810+
send_call_function_single_ipi(last_cpu);
805811
else if (likely(nr_cpus > 1))
806-
send_call_function_ipi_mask(cfd->cpumask_ipi, func);
812+
send_call_function_ipi_mask(cfd->cpumask_ipi);
807813
}
808814

809815
if (run_local && (!cond_func || cond_func(this_cpu, info))) {

0 commit comments

Comments
 (0)