Skip to content

Commit d703738

Browse files
hubitao-yaomaKAGA-KOKO
authored andcommitted
watchdog/softlockup: Low-overhead detection of interrupt storm
The following softlockup is caused by interrupt storm, but it cannot be identified from the call tree. Because the call tree is just a snapshot and doesn't fully capture the behavior of the CPU during the soft lockup. watchdog: BUG: soft lockup - CPU#28 stuck for 23s! [fio:83921] ... Call trace: __do_softirq+0xa0/0x37c __irq_exit_rcu+0x108/0x140 irq_exit+0x14/0x20 __handle_domain_irq+0x84/0xe0 gic_handle_irq+0x80/0x108 el0_irq_naked+0x50/0x58 Therefore, it is necessary to report CPU utilization during the softlockup_threshold period (report once every sample_period, for a total of 5 reportings), like this: watchdog: BUG: soft lockup - CPU#28 stuck for 23s! [fio:83921] CPU#28 Utilization every 4s during lockup: #1: 0% system, 0% softirq, 100% hardirq, 0% idle #2: 0% system, 0% softirq, 100% hardirq, 0% idle #3: 0% system, 0% softirq, 100% hardirq, 0% idle #4: 0% system, 0% softirq, 100% hardirq, 0% idle #5: 0% system, 0% softirq, 100% hardirq, 0% idle ... This is helpful in determining whether an interrupt storm has occurred or in identifying the cause of the softlockup. The criteria for determination are as follows: a. If the hardirq utilization is high, then interrupt storm should be considered and the root cause cannot be determined from the call tree. b. If the softirq utilization is high, then the call might not necessarily point at the root cause. c. If the system utilization is high, then analyzing the root cause from the call tree is possible in most cases. The mechanism requires a considerable amount of global storage space when configured for the maximum number of CPUs. Therefore, adding a SOFTLOCKUP_DETECTOR_INTR_STORM Kconfig knob that defaults to "yes" if the max number of CPUs is <= 128. Signed-off-by: Bitao Hu <yaoma@linux.alibaba.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Douglas Anderson <dianders@chromium.org> Reviewed-by: Liu Song <liusong@linux.alibaba.com> Link: https://lore.kernel.org/r/20240411074134.30922-5-yaoma@linux.alibaba.com
1 parent 25a4a01 commit d703738

2 files changed

Lines changed: 112 additions & 1 deletion

File tree

kernel/watchdog.c

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include <linux/cpu.h>
1717
#include <linux/nmi.h>
1818
#include <linux/init.h>
19+
#include <linux/kernel_stat.h>
20+
#include <linux/math64.h>
1921
#include <linux/module.h>
2022
#include <linux/sysctl.h>
2123
#include <linux/tick.h>
@@ -35,6 +37,8 @@ static DEFINE_MUTEX(watchdog_mutex);
3537
# define WATCHDOG_HARDLOCKUP_DEFAULT 0
3638
#endif
3739

40+
#define NUM_SAMPLE_PERIODS 5
41+
3842
unsigned long __read_mostly watchdog_enabled;
3943
int __read_mostly watchdog_user_enabled = 1;
4044
static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT;
@@ -333,6 +337,96 @@ __setup("watchdog_thresh=", watchdog_thresh_setup);
333337

334338
static void __lockup_detector_cleanup(void);
335339

340+
#ifdef CONFIG_SOFTLOCKUP_DETECTOR_INTR_STORM
341+
enum stats_per_group {
342+
STATS_SYSTEM,
343+
STATS_SOFTIRQ,
344+
STATS_HARDIRQ,
345+
STATS_IDLE,
346+
NUM_STATS_PER_GROUP,
347+
};
348+
349+
static const enum cpu_usage_stat tracked_stats[NUM_STATS_PER_GROUP] = {
350+
CPUTIME_SYSTEM,
351+
CPUTIME_SOFTIRQ,
352+
CPUTIME_IRQ,
353+
CPUTIME_IDLE,
354+
};
355+
356+
static DEFINE_PER_CPU(u16, cpustat_old[NUM_STATS_PER_GROUP]);
357+
static DEFINE_PER_CPU(u8, cpustat_util[NUM_SAMPLE_PERIODS][NUM_STATS_PER_GROUP]);
358+
static DEFINE_PER_CPU(u8, cpustat_tail);
359+
360+
/*
361+
* We don't need nanosecond resolution. A granularity of 16ms is
362+
* sufficient for our precision, allowing us to use u16 to store
363+
* cpustats, which will roll over roughly every ~1000 seconds.
364+
* 2^24 ~= 16 * 10^6
365+
*/
366+
static u16 get_16bit_precision(u64 data_ns)
367+
{
368+
return data_ns >> 24LL; /* 2^24ns ~= 16.8ms */
369+
}
370+
371+
static void update_cpustat(void)
372+
{
373+
int i;
374+
u8 util;
375+
u16 old_stat, new_stat;
376+
struct kernel_cpustat kcpustat;
377+
u64 *cpustat = kcpustat.cpustat;
378+
u8 tail = __this_cpu_read(cpustat_tail);
379+
u16 sample_period_16 = get_16bit_precision(sample_period);
380+
381+
kcpustat_cpu_fetch(&kcpustat, smp_processor_id());
382+
383+
for (i = 0; i < NUM_STATS_PER_GROUP; i++) {
384+
old_stat = __this_cpu_read(cpustat_old[i]);
385+
new_stat = get_16bit_precision(cpustat[tracked_stats[i]]);
386+
util = DIV_ROUND_UP(100 * (new_stat - old_stat), sample_period_16);
387+
__this_cpu_write(cpustat_util[tail][i], util);
388+
__this_cpu_write(cpustat_old[i], new_stat);
389+
}
390+
391+
__this_cpu_write(cpustat_tail, (tail + 1) % NUM_SAMPLE_PERIODS);
392+
}
393+
394+
static void print_cpustat(void)
395+
{
396+
int i, group;
397+
u8 tail = __this_cpu_read(cpustat_tail);
398+
u64 sample_period_second = sample_period;
399+
400+
do_div(sample_period_second, NSEC_PER_SEC);
401+
402+
/*
403+
* Outputting the "watchdog" prefix on every line is redundant and not
404+
* concise, and the original alarm information is sufficient for
405+
* positioning in logs, hence here printk() is used instead of pr_crit().
406+
*/
407+
printk(KERN_CRIT "CPU#%d Utilization every %llus during lockup:\n",
408+
smp_processor_id(), sample_period_second);
409+
410+
for (i = 0; i < NUM_SAMPLE_PERIODS; i++) {
411+
group = (tail + i) % NUM_SAMPLE_PERIODS;
412+
printk(KERN_CRIT "\t#%d: %3u%% system,\t%3u%% softirq,\t"
413+
"%3u%% hardirq,\t%3u%% idle\n", i + 1,
414+
__this_cpu_read(cpustat_util[group][STATS_SYSTEM]),
415+
__this_cpu_read(cpustat_util[group][STATS_SOFTIRQ]),
416+
__this_cpu_read(cpustat_util[group][STATS_HARDIRQ]),
417+
__this_cpu_read(cpustat_util[group][STATS_IDLE]));
418+
}
419+
}
420+
421+
static void report_cpu_status(void)
422+
{
423+
print_cpustat();
424+
}
425+
#else
426+
static inline void update_cpustat(void) { }
427+
static inline void report_cpu_status(void) { }
428+
#endif
429+
336430
/*
337431
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
338432
* lockups can have false positives under extreme conditions. So we generally
@@ -364,7 +458,7 @@ static void set_sample_period(void)
364458
* and hard thresholds) to increment before the
365459
* hardlockup detector generates a warning
366460
*/
367-
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
461+
sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / NUM_SAMPLE_PERIODS);
368462
watchdog_update_hrtimer_threshold(sample_period);
369463
}
370464

@@ -504,6 +598,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
504598
*/
505599
period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts));
506600

601+
update_cpustat();
602+
507603
/* Reset the interval when touched by known problematic code. */
508604
if (period_ts == SOFTLOCKUP_DELAY_REPORT) {
509605
if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
@@ -539,6 +635,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
539635
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
540636
smp_processor_id(), duration,
541637
current->comm, task_pid_nr(current));
638+
report_cpu_status();
542639
print_modules();
543640
print_irqtrace_events(current);
544641
if (regs)

lib/Kconfig.debug

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,20 @@ config SOFTLOCKUP_DETECTOR
10291029
chance to run. The current stack trace is displayed upon
10301030
detection and the system will stay locked up.
10311031

1032+
config SOFTLOCKUP_DETECTOR_INTR_STORM
1033+
bool "Detect Interrupt Storm in Soft Lockups"
1034+
depends on SOFTLOCKUP_DETECTOR && IRQ_TIME_ACCOUNTING
1035+
select GENERIC_IRQ_STAT_SNAPSHOT
1036+
default y if NR_CPUS <= 128
1037+
help
1038+
Say Y here to enable the kernel to detect interrupt storm
1039+
during "soft lockups".
1040+
1041+
"soft lockups" can be caused by a variety of reasons. If one is
1042+
caused by an interrupt storm, then the storming interrupts will not
1043+
be on the callstack. To detect this case, it is necessary to report
1044+
the CPU stats and the interrupt counts during the "soft lockups".
1045+
10321046
config BOOTPARAM_SOFTLOCKUP_PANIC
10331047
bool "Panic (Reboot) On Soft Lockups"
10341048
depends on SOFTLOCKUP_DETECTOR

0 commit comments

Comments
 (0)