@@ -104,18 +104,20 @@ void __init call_function_init(void)
104104}
105105
106106static __always_inline void
107- send_call_function_single_ipi (int cpu , smp_call_func_t func )
107+ send_call_function_single_ipi (int cpu )
108108{
109109 if (call_function_single_prep_ipi (cpu )) {
110- trace_ipi_send_cpu (cpu , _RET_IP_ , func );
110+ trace_ipi_send_cpu (cpu , _RET_IP_ ,
111+ generic_smp_call_function_single_interrupt );
111112 arch_send_call_function_single_ipi (cpu );
112113 }
113114}
114115
115116static __always_inline void
116- send_call_function_ipi_mask (struct cpumask * mask , smp_call_func_t func )
117+ send_call_function_ipi_mask (struct cpumask * mask )
117118{
118- trace_ipi_send_cpumask (mask , _RET_IP_ , func );
119+ trace_ipi_send_cpumask (mask , _RET_IP_ ,
120+ generic_smp_call_function_single_interrupt );
119121 arch_send_call_function_ipi_mask (mask );
120122}
121123
@@ -316,25 +318,6 @@ static __always_inline void csd_unlock(struct __call_single_data *csd)
316318 smp_store_release (& csd -> node .u_flags , 0 );
317319}
318320
319- static __always_inline void
320- raw_smp_call_single_queue (int cpu , struct llist_node * node , smp_call_func_t func )
321- {
322- /*
323- * The list addition should be visible to the target CPU when it pops
324- * the head of the list to pull the entry off it in the IPI handler
325- * because of normal cache coherency rules implied by the underlying
326- * llist ops.
327- *
328- * If IPIs can go out of order to the cache coherency protocol
329- * in an architecture, sufficient synchronisation should be added
330- * to arch code to make it appear to obey cache coherency WRT
331- * locking and barrier primitives. Generic code isn't really
332- * equipped to do the right thing...
333- */
334- if (llist_add (node , & per_cpu (call_single_queue , cpu )))
335- send_call_function_single_ipi (cpu , func );
336- }
337-
338321static DEFINE_PER_CPU_SHARED_ALIGNED (call_single_data_t , csd_data ) ;
339322
340323void __smp_call_single_queue (int cpu , struct llist_node * node )
@@ -354,10 +337,23 @@ void __smp_call_single_queue(int cpu, struct llist_node *node)
354337 func = CSD_TYPE (csd ) == CSD_TYPE_TTWU ?
355338 sched_ttwu_pending : csd -> func ;
356339
357- raw_smp_call_single_queue (cpu , node , func );
358- } else {
359- raw_smp_call_single_queue (cpu , node , NULL );
340+ trace_ipi_send_cpu (cpu , _RET_IP_ , func );
360341 }
342+
343+ /*
344+ * The list addition should be visible to the target CPU when it pops
345+ * the head of the list to pull the entry off it in the IPI handler
346+ * because of normal cache coherency rules implied by the underlying
347+ * llist ops.
348+ *
349+ * If IPIs can go out of order to the cache coherency protocol
350+ * in an architecture, sufficient synchronisation should be added
351+ * to arch code to make it appear to obey cache coherency WRT
352+ * locking and barrier primitives. Generic code isn't really
353+ * equipped to do the right thing...
354+ */
355+ if (llist_add (node , & per_cpu (call_single_queue , cpu )))
356+ send_call_function_single_ipi (cpu );
361357}
362358
363359/*
@@ -732,9 +728,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
732728 int cpu , last_cpu , this_cpu = smp_processor_id ();
733729 struct call_function_data * cfd ;
734730 bool wait = scf_flags & SCF_WAIT ;
731+ int nr_cpus = 0 , nr_queued = 0 ;
735732 bool run_remote = false;
736733 bool run_local = false;
737- int nr_cpus = 0 ;
738734
739735 lockdep_assert_preemption_disabled ();
740736
@@ -776,8 +772,10 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
776772 for_each_cpu (cpu , cfd -> cpumask ) {
777773 call_single_data_t * csd = per_cpu_ptr (cfd -> csd , cpu );
778774
779- if (cond_func && !cond_func (cpu , info ))
775+ if (cond_func && !cond_func (cpu , info )) {
776+ __cpumask_clear_cpu (cpu , cfd -> cpumask );
780777 continue ;
778+ }
781779
782780 csd_lock (csd );
783781 if (wait )
@@ -793,17 +791,25 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
793791 nr_cpus ++ ;
794792 last_cpu = cpu ;
795793 }
794+ nr_queued ++ ;
796795 }
797796
797+ /*
798+ * Trace each smp_function_call_*() as an IPI, actual IPIs
799+ * will be traced with func==generic_smp_call_function_single_ipi().
800+ */
801+ if (nr_queued )
802+ trace_ipi_send_cpumask (cfd -> cpumask , _RET_IP_ , func );
803+
798804 /*
799805 * Choose the most efficient way to send an IPI. Note that the
800806 * number of CPUs might be zero due to concurrent changes to the
801807 * provided mask.
802808 */
803809 if (nr_cpus == 1 )
804- send_call_function_single_ipi (last_cpu , func );
810+ send_call_function_single_ipi (last_cpu );
805811 else if (likely (nr_cpus > 1 ))
806- send_call_function_ipi_mask (cfd -> cpumask_ipi , func );
812+ send_call_function_ipi_mask (cfd -> cpumask_ipi );
807813 }
808814
809815 if (run_local && (!cond_func || cond_func (this_cpu , info ))) {
0 commit comments