Skip to content

Commit eac80dd

Browse files
mrutland-armkees
authored andcommitted
lkdtm/bugs: add test for panic() with stuck secondary CPUs
Upon a panic() the kernel will use either smp_send_stop() or crash_smp_send_stop() to attempt to stop secondary CPUs via an IPI, which may or may not be an NMI. Generally it's preferable that this is an NMI so that CPUs can be stopped in as many situations as possible, but it's not always possible to provide an NMI, and there are cases where CPUs may be unable to handle the NMI regardless. This patch adds a test for panic() where all other CPUs are stuck with interrupts disabled, which can be used to check whether the kernel gracefully handles CPUs failing to respond to a stop, and whether NMIs actually work to stop CPUs. For example, on arm64 *without* an NMI, this results in: | # echo PANIC_STOP_IRQOFF > /sys/kernel/debug/provoke-crash/DIRECT | lkdtm: Performing direct entry PANIC_STOP_IRQOFF | Kernel panic - not syncing: panic stop irqoff test | CPU: 2 PID: 24 Comm: migration/2 Not tainted 6.5.0-rc3-00077-ge6c782389895-dirty #4 | Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 | Stopper: multi_cpu_stop+0x0/0x1a0 <- stop_machine_cpuslocked+0x158/0x1a4 | Call trace: | dump_backtrace+0x94/0xec | show_stack+0x18/0x24 | dump_stack_lvl+0x74/0xc0 | dump_stack+0x18/0x24 | panic+0x358/0x3e8 | lkdtm_PANIC+0x0/0x18 | multi_cpu_stop+0x9c/0x1a0 | cpu_stopper_thread+0x84/0x118 | smpboot_thread_fn+0x224/0x248 | kthread+0x114/0x118 | ret_from_fork+0x10/0x20 | SMP: stopping secondary CPUs | SMP: failed to stop secondary CPUs 0-3 | Kernel Offset: 0x401cf3490000 from 0xffff80008000000c0 | PHYS_OFFSET: 0x40000000 | CPU features: 0x00000000,68c167a1,cce6773f | Memory Limit: none | ---[ end Kernel panic - not syncing: panic stop irqoff test ]--- Note the "failed to stop secondary CPUs 0-3" message. On arm64 *with* an NMI, this results in: | # echo PANIC_STOP_IRQOFF > /sys/kernel/debug/provoke-crash/DIRECT | lkdtm: Performing direct entry PANIC_STOP_IRQOFF | Kernel panic - not syncing: panic stop irqoff test | CPU: 1 PID: 19 Comm: migration/1 Not tainted 6.5.0-rc3-00077-ge6c782389895-dirty #4 | Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 | Stopper: multi_cpu_stop+0x0/0x1a0 <- stop_machine_cpuslocked+0x158/0x1a4 | Call trace: | dump_backtrace+0x94/0xec | show_stack+0x18/0x24 | dump_stack_lvl+0x74/0xc0 | dump_stack+0x18/0x24 | panic+0x358/0x3e8 | lkdtm_PANIC+0x0/0x18 | multi_cpu_stop+0x9c/0x1a0 | cpu_stopper_thread+0x84/0x118 | smpboot_thread_fn+0x224/0x248 | kthread+0x114/0x118 | ret_from_fork+0x10/0x20 | SMP: stopping secondary CPUs | Kernel Offset: 0x55a9c0bc0000 from 0xffff800080000000 | PHYS_OFFSET: 0x40000000 | CPU features: 0x00000000,68c167a1,fce6773f | Memory Limit: none | ---[ end Kernel panic - not syncing: panic stop irqoff test ]--- Note the absence of a "failed to stop secondary CPUs" message, since we don't log anything when secondary CPUs are successfully stopped. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Cc: Douglas Anderson <dianders@chromium.org> Cc: Kees Cook <keescook@chromium.org> Cc: Stephen Boyd <swboyd@chromium.org> Cc: Sumit Garg <sumit.garg@linaro.org> Reviewed-by: Kees Cook <keescook@chromium.org> Reviewed-by: Douglas Anderson <dianders@chromium.org> Reviewed-by: Stephen Boyd <swboyd@chromium.org> Link: https://lore.kernel.org/r/20230921161634.4063233-1-mark.rutland@arm.com Signed-off-by: Kees Cook <keescook@chromium.org>
1 parent 6cd5932 commit eac80dd

2 files changed

Lines changed: 30 additions & 1 deletion

File tree

drivers/misc/lkdtm/bugs.c

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66
* test source files.
77
*/
88
#include "lkdtm.h"
9+
#include <linux/cpu.h>
910
#include <linux/list.h>
1011
#include <linux/sched.h>
1112
#include <linux/sched/signal.h>
1213
#include <linux/sched/task_stack.h>
13-
#include <linux/uaccess.h>
1414
#include <linux/slab.h>
15+
#include <linux/stop_machine.h>
16+
#include <linux/uaccess.h>
1517

1618
#if IS_ENABLED(CONFIG_X86_32) && !IS_ENABLED(CONFIG_UML)
1719
#include <asm/desc.h>
@@ -73,6 +75,31 @@ static void lkdtm_PANIC(void)
7375
panic("dumptest");
7476
}
7577

78+
static int panic_stop_irqoff_fn(void *arg)
79+
{
80+
atomic_t *v = arg;
81+
82+
/*
83+
* As stop_machine() disables interrupts, all CPUs within this function
84+
* have interrupts disabled and cannot take a regular IPI.
85+
*
86+
* The last CPU which enters here will trigger a panic, and as all CPUs
87+
* cannot take a regular IPI, we'll only be able to stop secondaries if
88+
* smp_send_stop() or crash_smp_send_stop() uses an NMI.
89+
*/
90+
if (atomic_inc_return(v) == num_online_cpus())
91+
panic("panic stop irqoff test");
92+
93+
for (;;)
94+
cpu_relax();
95+
}
96+
97+
static void lkdtm_PANIC_STOP_IRQOFF(void)
98+
{
99+
atomic_t v = ATOMIC_INIT(0);
100+
stop_machine(panic_stop_irqoff_fn, &v, cpu_online_mask);
101+
}
102+
76103
static void lkdtm_BUG(void)
77104
{
78105
BUG();
@@ -638,6 +665,7 @@ static noinline void lkdtm_CORRUPT_PAC(void)
638665

639666
static struct crashtype crashtypes[] = {
640667
CRASHTYPE(PANIC),
668+
CRASHTYPE(PANIC_STOP_IRQOFF),
641669
CRASHTYPE(BUG),
642670
CRASHTYPE(WARNING),
643671
CRASHTYPE(WARNING_MESSAGE),

tools/testing/selftests/lkdtm/tests.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#PANIC
2+
#PANIC_STOP_IRQOFF Crashes entire system
23
BUG kernel BUG at
34
WARNING WARNING:
45
WARNING_MESSAGE message trigger

0 commit comments

Comments
 (0)