@@ -35,14 +35,15 @@ static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
3535#ifdef CONFIG_TICK_ONESHOT
3636static DEFINE_PER_CPU (struct clock_event_device * , tick_oneshot_wakeup_device ) ;
3737
38- static void tick_broadcast_setup_oneshot (struct clock_event_device * bc );
38+ static void tick_broadcast_setup_oneshot (struct clock_event_device * bc , bool from_periodic );
3939static void tick_broadcast_clear_oneshot (int cpu );
4040static void tick_resume_broadcast_oneshot (struct clock_event_device * bc );
4141# ifdef CONFIG_HOTPLUG_CPU
4242static void tick_broadcast_oneshot_offline (unsigned int cpu );
4343# endif
4444#else
45- static inline void tick_broadcast_setup_oneshot (struct clock_event_device * bc ) { BUG (); }
45+ static inline void
46+ tick_broadcast_setup_oneshot (struct clock_event_device * bc , bool from_periodic ) { BUG (); }
4647static inline void tick_broadcast_clear_oneshot (int cpu ) { }
4748static inline void tick_resume_broadcast_oneshot (struct clock_event_device * bc ) { }
4849# ifdef CONFIG_HOTPLUG_CPU
@@ -264,7 +265,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
264265 if (tick_broadcast_device .mode == TICKDEV_MODE_PERIODIC )
265266 tick_broadcast_start_periodic (bc );
266267 else
267- tick_broadcast_setup_oneshot (bc );
268+ tick_broadcast_setup_oneshot (bc , false );
268269 ret = 1 ;
269270 } else {
270271 /*
@@ -500,7 +501,7 @@ void tick_broadcast_control(enum tick_broadcast_mode mode)
500501 if (tick_broadcast_device .mode == TICKDEV_MODE_PERIODIC )
501502 tick_broadcast_start_periodic (bc );
502503 else
503- tick_broadcast_setup_oneshot (bc );
504+ tick_broadcast_setup_oneshot (bc , false );
504505 }
505506 }
506507out :
@@ -1020,48 +1021,101 @@ static inline ktime_t tick_get_next_period(void)
10201021/**
10211022 * tick_broadcast_setup_oneshot - setup the broadcast device
10221023 */
1023- static void tick_broadcast_setup_oneshot (struct clock_event_device * bc )
1024+ static void tick_broadcast_setup_oneshot (struct clock_event_device * bc ,
1025+ bool from_periodic )
10241026{
10251027 int cpu = smp_processor_id ();
1028+ ktime_t nexttick = 0 ;
10261029
10271030 if (!bc )
10281031 return ;
10291032
1030- /* Set it up only once ! */
1031- if (bc -> event_handler != tick_handle_oneshot_broadcast ) {
1032- int was_periodic = clockevent_state_periodic (bc );
1033-
1034- bc -> event_handler = tick_handle_oneshot_broadcast ;
1035-
1033+ /*
1034+ * When the broadcast device was switched to oneshot by the first
1035+ * CPU handling the NOHZ change, the other CPUs will reach this
1036+ * code via hrtimer_run_queues() -> tick_check_oneshot_change()
1037+ * too. Set up the broadcast device only once!
1038+ */
1039+ if (bc -> event_handler == tick_handle_oneshot_broadcast ) {
10361040 /*
1037- * We must be careful here. There might be other CPUs
1038- * waiting for periodic broadcast. We need to set the
1039- * oneshot_mask bits for those and program the
1040- * broadcast device to fire.
1041+ * The CPU which switched from periodic to oneshot mode
1042+ * set the broadcast oneshot bit for all other CPUs which
1043+ * are in the general (periodic) broadcast mask to ensure
1044+ * that CPUs which wait for the periodic broadcast are
1045+ * woken up.
1046+ *
1047+ * Clear the bit for the local CPU as the set bit would
1048+ * prevent the first tick_broadcast_enter() after this CPU
1049+ * switched to oneshot state to program the broadcast
1050+ * device.
1051+ *
1052+ * This code can also be reached via tick_broadcast_control(),
1053+ * but this cannot avoid the tick_broadcast_clear_oneshot()
1054+ * as that would break the periodic to oneshot transition of
1055+ * secondary CPUs. But that's harmless as the below only
1056+ * clears already cleared bits.
10411057 */
1058+ tick_broadcast_clear_oneshot (cpu );
1059+ return ;
1060+ }
1061+
1062+
1063+ bc -> event_handler = tick_handle_oneshot_broadcast ;
1064+ bc -> next_event = KTIME_MAX ;
1065+
1066+ /*
1067+ * When the tick mode is switched from periodic to oneshot it must
1068+ * be ensured that CPUs which are waiting for periodic broadcast
1069+ * get their wake-up at the next tick. This is achieved by ORing
1070+ * tick_broadcast_mask into tick_broadcast_oneshot_mask.
1071+ *
1072+ * For other callers, e.g. broadcast device replacement,
1073+ * tick_broadcast_oneshot_mask must not be touched as this would
1074+ * set bits for CPUs which are already NOHZ, but not idle. Their
1075+ * next tick_broadcast_enter() would observe the bit set and fail
1076+ * to update the expiry time and the broadcast event device.
1077+ */
1078+ if (from_periodic ) {
10421079 cpumask_copy (tmpmask , tick_broadcast_mask );
1080+ /* Remove the local CPU as it is obviously not idle */
10431081 cpumask_clear_cpu (cpu , tmpmask );
1044- cpumask_or (tick_broadcast_oneshot_mask ,
1045- tick_broadcast_oneshot_mask , tmpmask );
1082+ cpumask_or (tick_broadcast_oneshot_mask , tick_broadcast_oneshot_mask , tmpmask );
10461083
1047- if (was_periodic && !cpumask_empty (tmpmask )) {
1048- ktime_t nextevt = tick_get_next_period ();
1084+ /*
1085+ * Ensure that the oneshot broadcast handler will wake the
1086+ * CPUs which are still waiting for periodic broadcast.
1087+ */
1088+ nexttick = tick_get_next_period ();
1089+ tick_broadcast_init_next_event (tmpmask , nexttick );
10491090
1050- clockevents_switch_state (bc , CLOCK_EVT_STATE_ONESHOT );
1051- tick_broadcast_init_next_event (tmpmask , nextevt );
1052- tick_broadcast_set_event (bc , cpu , nextevt );
1053- } else
1054- bc -> next_event = KTIME_MAX ;
1055- } else {
10561091 /*
1057- * The first cpu which switches to oneshot mode sets
1058- * the bit for all other cpus which are in the general
1059- * (periodic) broadcast mask. So the bit is set and
1060- * would prevent the first broadcast enter after this
1061- * to program the bc device.
1092+ * If the underlying broadcast clock event device is
1093+ * already in oneshot state, then there is nothing to do.
1094+ * The device was already armed for the next tick
1095+ * in tick_handle_broadcast_periodic()
10621096 */
1063- tick_broadcast_clear_oneshot (cpu );
1097+ if (clockevent_state_oneshot (bc ))
1098+ return ;
10641099 }
1100+
1101+ /*
1102+ * When switching from periodic to oneshot mode arm the broadcast
1103+ * device for the next tick.
1104+ *
1105+ * If the broadcast device has been replaced in oneshot mode and
1106+ * the oneshot broadcast mask is not empty, then arm it to expire
1107+ * immediately in order to reevaluate the next expiring timer.
1108+ * @nexttick is 0 and therefore in the past which will cause the
1109+ * clockevent code to force an event.
1110+ *
1111+ * For both cases the programming can be avoided when the oneshot
1112+ * broadcast mask is empty.
1113+ *
1114+ * tick_broadcast_set_event() implicitly switches the broadcast
1115+ * device to oneshot state.
1116+ */
1117+ if (!cpumask_empty (tick_broadcast_oneshot_mask ))
1118+ tick_broadcast_set_event (bc , cpu , nexttick );
10651119}
10661120
10671121/*
@@ -1070,14 +1124,16 @@ static void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
10701124void tick_broadcast_switch_to_oneshot (void )
10711125{
10721126 struct clock_event_device * bc ;
1127+ enum tick_device_mode oldmode ;
10731128 unsigned long flags ;
10741129
10751130 raw_spin_lock_irqsave (& tick_broadcast_lock , flags );
10761131
1132+ oldmode = tick_broadcast_device .mode ;
10771133 tick_broadcast_device .mode = TICKDEV_MODE_ONESHOT ;
10781134 bc = tick_broadcast_device .evtdev ;
10791135 if (bc )
1080- tick_broadcast_setup_oneshot (bc );
1136+ tick_broadcast_setup_oneshot (bc , oldmode == TICKDEV_MODE_PERIODIC );
10811137
10821138 raw_spin_unlock_irqrestore (& tick_broadcast_lock , flags );
10831139}
0 commit comments