Skip to content

Commit 1f1893d

Browse files
committed
Merge branch 'timers/core' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/core
Pull tick/NOHZ updates from Frederic Weisbecker: - A fix for rare jiffies update stalls that were reported by Paul McKenney - Tick side cleanups after RCU_FAST_NO_HZ removal - Handle softirqs on idle more gracefully Link: https://lore.kernel.org/all/20220307233034.34550-1-frederic@kernel.org
2 parents 58dedf0 + f96272a commit 1f1893d

6 files changed

Lines changed: 78 additions & 22 deletions

File tree

include/linux/interrupt.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,16 @@ enum
579579
NR_SOFTIRQS
580580
};
581581

582-
#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ))
582+
/*
583+
* The following vectors can be safely ignored after ksoftirqd is parked:
584+
*
585+
* _ RCU:
586+
* 1) rcutree_migrate_callbacks() migrates the queue.
587+
* 2) rcu_report_dead() reports the final quiescent states.
588+
*
589+
* _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue
590+
*/
591+
#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ))
583592

584593
/* map softirq index to softirq name. update 'softirq_to_name' in
585594
* kernel/softirq.c when adding a new softirq.

include/linux/rcutiny.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void)
6464
rcu_tasks_qs(current, (preempt)); \
6565
} while (0)
6666

67-
static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
67+
static inline int rcu_needs_cpu(void)
6868
{
69-
*nextevt = KTIME_MAX;
7069
return 0;
7170
}
7271

include/linux/rcutree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
void rcu_softirq_qs(void);
2121
void rcu_note_context_switch(bool preempt);
22-
int rcu_needs_cpu(u64 basem, u64 *nextevt);
22+
int rcu_needs_cpu(void);
2323
void rcu_cpu_stall_reset(void);
2424

2525
/*

kernel/rcu/tree.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,9 +1086,8 @@ void rcu_irq_enter_irqson(void)
10861086
* Just check whether or not this CPU has non-offloaded RCU callbacks
10871087
* queued.
10881088
*/
1089-
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1089+
int rcu_needs_cpu(void)
10901090
{
1091-
*nextevt = KTIME_MAX;
10921091
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
10931092
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
10941093
}

kernel/time/tick-sched.c

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,8 @@ static ktime_t tick_init_jiffy_update(void)
169169
return period;
170170
}
171171

172+
#define MAX_STALLED_JIFFIES 5
173+
172174
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
173175
{
174176
int cpu = smp_processor_id();
@@ -196,6 +198,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
196198
if (tick_do_timer_cpu == cpu)
197199
tick_do_update_jiffies64(now);
198200

201+
/*
202+
* If jiffies update stalled for too long (timekeeper in stop_machine()
203+
* or VMEXIT'ed for several msecs), force an update.
204+
*/
205+
if (ts->last_tick_jiffies != jiffies) {
206+
ts->stalled_jiffies = 0;
207+
ts->last_tick_jiffies = READ_ONCE(jiffies);
208+
} else {
209+
if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
210+
tick_do_update_jiffies64(now);
211+
ts->stalled_jiffies = 0;
212+
ts->last_tick_jiffies = READ_ONCE(jiffies);
213+
}
214+
}
215+
199216
if (ts->inidle)
200217
ts->got_idle_tick = 1;
201218
}
@@ -768,7 +785,7 @@ static inline bool local_timer_softirq_pending(void)
768785

769786
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
770787
{
771-
u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
788+
u64 basemono, next_tick, delta, expires;
772789
unsigned long basejiff;
773790
unsigned int seq;
774791

@@ -791,7 +808,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
791808
* minimal delta which brings us back to this place
792809
* immediately. Lather, rinse and repeat...
793810
*/
794-
if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
811+
if (rcu_needs_cpu() || arch_needs_cpu() ||
795812
irq_work_needs_cpu() || local_timer_softirq_pending()) {
796813
next_tick = basemono + TICK_NSEC;
797814
} else {
@@ -802,10 +819,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
802819
* disabled this also looks at the next expiring
803820
* hrtimer.
804821
*/
805-
next_tmr = get_next_timer_interrupt(basejiff, basemono);
806-
ts->next_timer = next_tmr;
807-
/* Take the next rcu event into account */
808-
next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
822+
next_tick = get_next_timer_interrupt(basejiff, basemono);
823+
ts->next_timer = next_tick;
809824
}
810825

811826
/*
@@ -984,6 +999,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
984999
__tick_nohz_full_update_tick(ts, ktime_get());
9851000
}
9861001

1002+
/*
1003+
* A pending softirq outside an IRQ (or softirq disabled section) context
1004+
* should be waiting for ksoftirqd to handle it. Therefore we shouldn't
1005+
* reach here due to the need_resched() early check in can_stop_idle_tick().
1006+
*
1007+
* However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the
1008+
* cpu_down() process, softirqs can still be raised while ksoftirqd is parked,
1009+
* triggering the below since wakep_softirqd() is ignored.
1010+
*
1011+
*/
1012+
static bool report_idle_softirq(void)
1013+
{
1014+
static int ratelimit;
1015+
unsigned int pending = local_softirq_pending();
1016+
1017+
if (likely(!pending))
1018+
return false;
1019+
1020+
/* Some softirqs claim to be safe against hotplug and ksoftirqd parking */
1021+
if (!cpu_active(smp_processor_id())) {
1022+
pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK;
1023+
if (!pending)
1024+
return false;
1025+
}
1026+
1027+
if (ratelimit < 10)
1028+
return false;
1029+
1030+
/* On RT, softirqs handling may be waiting on some lock */
1031+
if (!local_bh_blocked())
1032+
return false;
1033+
1034+
pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
1035+
pending);
1036+
ratelimit++;
1037+
1038+
return true;
1039+
}
1040+
9871041
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
9881042
{
9891043
/*
@@ -1010,17 +1064,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
10101064
if (need_resched())
10111065
return false;
10121066

1013-
if (unlikely(local_softirq_pending())) {
1014-
static int ratelimit;
1015-
1016-
if (ratelimit < 10 && !local_bh_blocked() &&
1017-
(local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
1018-
pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
1019-
(unsigned int) local_softirq_pending());
1020-
ratelimit++;
1021-
}
1067+
if (unlikely(report_idle_softirq()))
10221068
return false;
1023-
}
10241069

10251070
if (tick_nohz_full_enabled()) {
10261071
/*

kernel/time/tick-sched.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ enum tick_nohz_mode {
4949
* @timer_expires_base: Base time clock monotonic for @timer_expires
5050
* @next_timer: Expiry time of next expiring timer for debugging purpose only
5151
* @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick
52+
* @last_tick_jiffies: Value of jiffies seen on last tick
53+
* @stalled_jiffies: Number of stalled jiffies detected across ticks
5254
*/
5355
struct tick_sched {
5456
struct hrtimer sched_timer;
@@ -77,6 +79,8 @@ struct tick_sched {
7779
u64 next_timer;
7880
ktime_t idle_expires;
7981
atomic_t tick_dep_mask;
82+
unsigned long last_tick_jiffies;
83+
unsigned int stalled_jiffies;
8084
};
8185

8286
extern struct tick_sched *tick_get_tick_sched(int cpu);

0 commit comments

Comments
 (0)