Skip to content

Commit 2e171ab

Browse files
Pnina Federakpm00
authored andcommitted
panic: add panic_force_cpu= parameter to redirect panic to a specific CPU
Some platforms require panic handling to execute on a specific CPU for crash dump to work reliably. This can be due to firmware limitations, interrupt routing constraints, or platform-specific requirements where only a single CPU is able to safely enter the crash kernel. Add the panic_force_cpu= kernel command-line parameter to redirect panic execution to a designated CPU. When the parameter is provided, the CPU that initially triggers panic forwards the panic context to the target CPU via IPI, which then proceeds with the normal panic and kexec flow. The IPI delivery is implemented as a weak function (panic_smp_redirect_cpu) so architectures with NMI support can override it for more reliable delivery. If the specified CPU is invalid, offline, or a panic is already in progress on another CPU, the redirection is skipped and panic continues on the current CPU. [pnina.feder@mobileye.com: fix unused variable warning] Link: https://lkml.kernel.org/r/20260126122618.2967950-1-pnina.feder@mobileye.com Link: https://lkml.kernel.org/r/20260122102457.1154599-1-pnina.feder@mobileye.com Signed-off-by: Pnina Feder <pnina.feder@mobileye.com> Reviewed-by: Petr Mladek <pmladek@suse.com> Cc: Baoquan He <bhe@redhat.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Mel Gorman <mgorman@suse.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sergey Senozhatsky <senozhatsky@chromium.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent f3951e9 commit 2e171ab

4 files changed

Lines changed: 186 additions & 2 deletions

File tree

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4788,6 +4788,21 @@ Kernel parameters
47884788
panic_on_warn=1 panic() instead of WARN(). Useful to cause kdump
47894789
on a WARN().
47904790

4791+
panic_force_cpu=
4792+
[KNL,SMP] Force panic handling to execute on a specific CPU.
4793+
Format: <cpu number>
4794+
Some platforms require panic handling to occur on a
4795+
specific CPU for the crash kernel to function correctly.
4796+
This can be due to firmware limitations, interrupt routing
4797+
constraints, or platform-specific requirements where only
4798+
a particular CPU can safely enter the crash kernel.
4799+
When set, panic() will redirect execution to the specified
4800+
CPU before proceeding with the normal panic and kexec flow.
4801+
If the target CPU is offline or unavailable, panic proceeds
4802+
on the current CPU.
4803+
This option should only be used for systems with the above
4804+
constraints as it might cause the panic operation to be less reliable.
4805+
47914806
panic_print= Bitmask for printing system info when panic happens.
47924807
User can chose combination of the following bits:
47934808
bit 0: print all tasks info

include/linux/panic.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,14 @@ void abort(void);
4141
* PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec().
4242
*/
4343
extern atomic_t panic_cpu;
44+
45+
/*
46+
* panic_redirect_cpu is used when panic is redirected to a specific CPU via
47+
* the panic_force_cpu= boot parameter. It holds the CPU number that originally
48+
* triggered the panic before redirection. A value of PANIC_CPU_INVALID means
49+
* no redirection has occurred.
50+
*/
51+
extern atomic_t panic_redirect_cpu;
4452
#define PANIC_CPU_INVALID -1
4553

4654
bool panic_try_start(void);

include/linux/smp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd);
6262
void __noreturn panic_smp_self_stop(void);
6363
void __noreturn nmi_panic_self_stop(struct pt_regs *regs);
6464
void crash_smp_send_stop(void);
65+
int panic_smp_redirect_cpu(int target_cpu, void *msg);
6566

6667
/*
6768
* Call a function on all processors

kernel/panic.c

Lines changed: 162 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
#define PANIC_TIMER_STEP 100
4444
#define PANIC_BLINK_SPD 18
45+
#define PANIC_MSG_BUFSZ 1024
4546

4647
#ifdef CONFIG_SMP
4748
/*
@@ -74,6 +75,8 @@ EXPORT_SYMBOL_GPL(panic_timeout);
7475

7576
unsigned long panic_print;
7677

78+
static int panic_force_cpu = -1;
79+
7780
ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
7881

7982
EXPORT_SYMBOL(panic_notifier_list);
@@ -300,6 +303,150 @@ void __weak crash_smp_send_stop(void)
300303
}
301304

302305
atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
306+
atomic_t panic_redirect_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
307+
308+
#if defined(CONFIG_SMP) && defined(CONFIG_CRASH_DUMP)
309+
static char *panic_force_buf;
310+
311+
static int __init panic_force_cpu_setup(char *str)
312+
{
313+
int cpu;
314+
315+
if (!str)
316+
return -EINVAL;
317+
318+
if (kstrtoint(str, 0, &cpu) || cpu < 0 || cpu >= nr_cpu_ids) {
319+
pr_warn("panic_force_cpu: invalid value '%s'\n", str);
320+
return -EINVAL;
321+
}
322+
323+
panic_force_cpu = cpu;
324+
return 0;
325+
}
326+
early_param("panic_force_cpu", panic_force_cpu_setup);
327+
328+
static int __init panic_force_cpu_late_init(void)
329+
{
330+
if (panic_force_cpu < 0)
331+
return 0;
332+
333+
panic_force_buf = kmalloc(PANIC_MSG_BUFSZ, GFP_KERNEL);
334+
335+
return 0;
336+
}
337+
late_initcall(panic_force_cpu_late_init);
338+
339+
static void do_panic_on_target_cpu(void *info)
340+
{
341+
panic("%s", (char *)info);
342+
}
343+
344+
/**
345+
* panic_smp_redirect_cpu - Redirect panic to target CPU
346+
* @target_cpu: CPU that should handle the panic
347+
* @msg: formatted panic message
348+
*
349+
* Default implementation uses IPI. Architectures with NMI support
350+
* can override this for more reliable delivery.
351+
*
352+
* Return: 0 on success, negative errno on failure
353+
*/
354+
int __weak panic_smp_redirect_cpu(int target_cpu, void *msg)
355+
{
356+
static call_single_data_t panic_csd;
357+
358+
panic_csd.func = do_panic_on_target_cpu;
359+
panic_csd.info = msg;
360+
361+
return smp_call_function_single_async(target_cpu, &panic_csd);
362+
}
363+
364+
/**
365+
* panic_try_force_cpu - Redirect panic to a specific CPU for crash kernel
366+
* @fmt: panic message format string
367+
* @args: arguments for format string
368+
*
369+
* Some platforms require panic handling to occur on a specific CPU
370+
* for the crash kernel to function correctly. This function redirects
371+
* panic handling to the CPU specified via the panic_force_cpu= boot parameter.
372+
*
373+
* Returns false if panic should proceed on current CPU.
374+
* Returns true if panic was redirected.
375+
*/
376+
__printf(1, 0)
377+
static bool panic_try_force_cpu(const char *fmt, va_list args)
378+
{
379+
int this_cpu = raw_smp_processor_id();
380+
int old_cpu = PANIC_CPU_INVALID;
381+
const char *msg;
382+
383+
/* Feature not enabled via boot parameter */
384+
if (panic_force_cpu < 0)
385+
return false;
386+
387+
/* Already on target CPU - proceed normally */
388+
if (this_cpu == panic_force_cpu)
389+
return false;
390+
391+
/* Target CPU is offline, can't redirect */
392+
if (!cpu_online(panic_force_cpu)) {
393+
pr_warn("panic: target CPU %d is offline, continuing on CPU %d\n",
394+
panic_force_cpu, this_cpu);
395+
return false;
396+
}
397+
398+
/* Another panic already in progress */
399+
if (panic_in_progress())
400+
return false;
401+
402+
/*
403+
* Only one CPU can do the redirect. Use atomic cmpxchg to ensure
404+
* we don't race with another CPU also trying to redirect.
405+
*/
406+
if (!atomic_try_cmpxchg(&panic_redirect_cpu, &old_cpu, this_cpu))
407+
return false;
408+
409+
/*
410+
* Use dynamically allocated buffer if available, otherwise
411+
* fall back to static message for early boot panics or allocation failure.
412+
*/
413+
if (panic_force_buf) {
414+
vsnprintf(panic_force_buf, PANIC_MSG_BUFSZ, fmt, args);
415+
msg = panic_force_buf;
416+
} else {
417+
msg = "Redirected panic (buffer unavailable)";
418+
}
419+
420+
console_verbose();
421+
bust_spinlocks(1);
422+
423+
pr_emerg("panic: Redirecting from CPU %d to CPU %d for crash kernel.\n",
424+
this_cpu, panic_force_cpu);
425+
426+
/* Dump original CPU before redirecting */
427+
if (!test_taint(TAINT_DIE) &&
428+
oops_in_progress <= 1 &&
429+
IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) {
430+
dump_stack();
431+
}
432+
433+
if (panic_smp_redirect_cpu(panic_force_cpu, (void *)msg) != 0) {
434+
atomic_set(&panic_redirect_cpu, PANIC_CPU_INVALID);
435+
pr_warn("panic: failed to redirect to CPU %d, continuing on CPU %d\n",
436+
panic_force_cpu, this_cpu);
437+
return false;
438+
}
439+
440+
/* IPI/NMI sent, this CPU should stop */
441+
return true;
442+
}
443+
#else
444+
__printf(1, 0)
445+
static inline bool panic_try_force_cpu(const char *fmt, va_list args)
446+
{
447+
return false;
448+
}
449+
#endif /* CONFIG_SMP && CONFIG_CRASH_DUMP */
303450

304451
bool panic_try_start(void)
305452
{
@@ -428,7 +575,7 @@ static void panic_other_cpus_shutdown(bool crash_kexec)
428575
*/
429576
void vpanic(const char *fmt, va_list args)
430577
{
431-
static char buf[1024];
578+
static char buf[PANIC_MSG_BUFSZ];
432579
long i, i_next = 0, len;
433580
int state = 0;
434581
bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
@@ -452,6 +599,15 @@ void vpanic(const char *fmt, va_list args)
452599
local_irq_disable();
453600
preempt_disable_notrace();
454601

602+
/* Redirect panic to target CPU if configured via panic_force_cpu=. */
603+
if (panic_try_force_cpu(fmt, args)) {
604+
/*
605+
* Mark ourselves offline so panic_other_cpus_shutdown() won't wait
606+
* for us on architectures that check num_online_cpus().
607+
*/
608+
set_cpu_online(smp_processor_id(), false);
609+
panic_smp_self_stop();
610+
}
455611
/*
456612
* It's possible to come here directly from a panic-assertion and
457613
* not have preempt disabled. Some functions called from here want
@@ -484,7 +640,11 @@ void vpanic(const char *fmt, va_list args)
484640
/*
485641
* Avoid nested stack-dumping if a panic occurs during oops processing
486642
*/
487-
if (test_taint(TAINT_DIE) || oops_in_progress > 1) {
643+
if (atomic_read(&panic_redirect_cpu) != PANIC_CPU_INVALID &&
644+
panic_force_cpu == raw_smp_processor_id()) {
645+
pr_emerg("panic: Redirected from CPU %d, skipping stack dump.\n",
646+
atomic_read(&panic_redirect_cpu));
647+
} else if (test_taint(TAINT_DIE) || oops_in_progress > 1) {
488648
panic_this_cpu_backtrace_printed = true;
489649
} else if (IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) {
490650
dump_stack();

0 commit comments

Comments
 (0)