Skip to content

Commit 620a30f

Browse files
Frederic WeisbeckerKAGA-KOKO
authored andcommitted
timers/nohz: Protect idle/iowait sleep time under seqcount
Reading idle/IO sleep time (eg: from /proc/stat) can race with idle exit updates because the state machine handling the stats is not atomic and requires a coherent read batch. As a result reading the sleep time may report irrelevant or backward values. Fix this with protecting the simple state machine within a seqcount. This is expected to be cheap enough not to add measurable performance impact on the idle path. Note this only fixes reader VS writer condition partitially. A race remains that involves remote updates of the CPU iowait task counter. It can hardly be fixed. Reported-by: Yu Liao <liaoyu15@huawei.com> Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20230222144649.624380-4-frederic@kernel.org
1 parent 07b65a8 commit 620a30f

2 files changed

Lines changed: 17 additions & 6 deletions

File tree

kernel/time/tick-sched.c

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -646,28 +646,34 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
646646

647647
delta = ktime_sub(now, ts->idle_entrytime);
648648

649+
write_seqcount_begin(&ts->idle_sleeptime_seq);
649650
if (nr_iowait_cpu(smp_processor_id()) > 0)
650651
ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
651652
else
652653
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
653654

654655
ts->idle_entrytime = now;
655656
ts->idle_active = 0;
657+
write_seqcount_end(&ts->idle_sleeptime_seq);
656658

657659
sched_clock_idle_wakeup_event();
658660
}
659661

660662
static void tick_nohz_start_idle(struct tick_sched *ts)
661663
{
664+
write_seqcount_begin(&ts->idle_sleeptime_seq);
662665
ts->idle_entrytime = ktime_get();
663666
ts->idle_active = 1;
667+
write_seqcount_end(&ts->idle_sleeptime_seq);
668+
664669
sched_clock_idle_sleep_event();
665670
}
666671

667672
static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
668673
bool compute_delta, u64 *last_update_time)
669674
{
670675
ktime_t now, idle;
676+
unsigned int seq;
671677

672678
if (!tick_nohz_active)
673679
return -1;
@@ -676,13 +682,17 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
676682
if (last_update_time)
677683
*last_update_time = ktime_to_us(now);
678684

679-
if (ts->idle_active && compute_delta) {
680-
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
685+
do {
686+
seq = read_seqcount_begin(&ts->idle_sleeptime_seq);
681687

682-
idle = ktime_add(*sleeptime, delta);
683-
} else {
684-
idle = *sleeptime;
685-
}
688+
if (ts->idle_active && compute_delta) {
689+
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
690+
691+
idle = ktime_add(*sleeptime, delta);
692+
} else {
693+
idle = *sleeptime;
694+
}
695+
} while (read_seqcount_retry(&ts->idle_sleeptime_seq, seq));
686696

687697
return ktime_to_us(idle);
688698

kernel/time/tick-sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ struct tick_sched {
7575
ktime_t idle_waketime;
7676

7777
/* Idle entry */
78+
seqcount_t idle_sleeptime_seq;
7879
ktime_t idle_entrytime;
7980

8081
/* Tick stop */

0 commit comments

Comments
 (0)