Skip to content

Commit e38f65d

Browse files
soleenakpm00
authored andcommitted
kho: warn and fail on metadata or preserved memory in scratch area
Patch series "KHO: kfence + KHO memory corruption fix", v3. This series fixes a memory corruption bug in KHO that occurs when KFENCE is enabled. The root cause is that KHO metadata, allocated via kzalloc(), can be randomly serviced by kfence_alloc(). When a kernel boots via KHO, the early memblock allocator is restricted to a "scratch area". This forces the KFENCE pool to be allocated within this scratch area, creating a conflict. If KHO metadata is subsequently placed in this pool, it gets corrupted during the next kexec operation. Google is using KHO and have had obscure crashes due to this memory corruption, with stacks all over the place. I would prefer this fix to be properly backported to stable so we can also automatically consume it once we switch to the upstream KHO. Patch 1/3 introduces a debug-only feature (CONFIG_KEXEC_HANDOVER_DEBUG) that adds checks to detect and fail any operation that attempts to place KHO metadata or preserved memory within the scratch area. This serves as a validation and diagnostic tool to confirm the problem without affecting production builds. Patch 2/3 Increases bitmap to PAGE_SIZE, so buddy allocator can be used. Patch 3/3 Provides the fix by modifying KHO to allocate its metadata directly from the buddy allocator instead of slab. This bypasses the KFENCE interception entirely. This patch (of 3): It is invalid for KHO metadata or preserved memory regions to be located within the KHO scratch area, as this area is overwritten when the next kernel is loaded, and used early in boot by the next kernel. This can lead to memory corruption. Add checks to kho_preserve_* and KHO's internal metadata allocators (xa_load_or_alloc, new_chunk) to verify that the physical address of the memory does not overlap with any defined scratch region. If an overlap is detected, the operation will fail and a WARN_ON is triggered. To avoid performance overhead in production kernels, these checks are enabled only when CONFIG_KEXEC_HANDOVER_DEBUG is selected. [rppt@kernel.org: fix KEXEC_HANDOVER_DEBUG Kconfig dependency] Link: https://lkml.kernel.org/r/aQHUyyFtiNZhx8jo@kernel.org [pasha.tatashin@soleen.com: build fix] Link: https://lkml.kernel.org/r/CA+CK2bBnorfsTymKtv4rKvqGBHs=y=MjEMMRg_tE-RME6n-zUw@mail.gmail.com Link: https://lkml.kernel.org/r/20251021000852.2924827-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20251021000852.2924827-2-pasha.tatashin@soleen.com Fixes: fc33e4b ("kexec: enable KHO support for memory preservation") Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com> Signed-off-by: Mike Rapoport <rppt@kernel.org> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> Reviewed-by: Pratyush Yadav <pratyush@kernel.org> Cc: Alexander Graf <graf@amazon.com> Cc: Christian Brauner <brauner@kernel.org> Cc: David Matlack <dmatlack@google.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Masahiro Yamada <masahiroy@kernel.org> Cc: Miguel Ojeda <ojeda@kernel.org> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Samiullah Khawaja <skhawaja@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 77008e1 commit e38f65d

5 files changed

Lines changed: 93 additions & 19 deletions

File tree

kernel/Kconfig.kexec

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,15 @@ config KEXEC_HANDOVER
109109
to keep data or state alive across the kexec. For this to work,
110110
both source and target kernels need to have this option enabled.
111111

112+
config KEXEC_HANDOVER_DEBUG
113+
bool "Enable Kexec Handover debug checks"
114+
depends on KEXEC_HANDOVER
115+
help
116+
This option enables extra sanity checks for the Kexec Handover
117+
subsystem. Since, KHO performance is crucial in live update
118+
scenarios and the extra code might be adding overhead it is
119+
only optionally enabled.
120+
112121
config CRASH_DUMP
113122
bool "kernel crash dumps"
114123
default ARCH_DEFAULT_CRASH_DUMP

kernel/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ obj-$(CONFIG_KEXEC) += kexec.o
8383
obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
8484
obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
8585
obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o
86+
obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o
8687
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
8788
obj-$(CONFIG_COMPAT) += compat.o
8889
obj-$(CONFIG_CGROUPS) += cgroup/

kernel/kexec_handover.c

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#define pr_fmt(fmt) "KHO: " fmt
1010

11+
#include <linux/cleanup.h>
1112
#include <linux/cma.h>
1213
#include <linux/count_zeros.h>
1314
#include <linux/debugfs.h>
@@ -22,6 +23,7 @@
2223

2324
#include <asm/early_ioremap.h>
2425

26+
#include "kexec_handover_internal.h"
2527
/*
2628
* KHO is tightly coupled with mm init and needs access to some of mm
2729
* internal APIs.
@@ -133,26 +135,26 @@ static struct kho_out kho_out = {
133135

134136
static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz)
135137
{
136-
void *elm, *res;
138+
void *res = xa_load(xa, index);
137139

138-
elm = xa_load(xa, index);
139-
if (elm)
140-
return elm;
140+
if (res)
141+
return res;
142+
143+
void *elm __free(kfree) = kzalloc(sz, GFP_KERNEL);
141144

142-
elm = kzalloc(sz, GFP_KERNEL);
143145
if (!elm)
144146
return ERR_PTR(-ENOMEM);
145147

148+
if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), sz)))
149+
return ERR_PTR(-EINVAL);
150+
146151
res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL);
147152
if (xa_is_err(res))
148-
res = ERR_PTR(xa_err(res));
149-
150-
if (res) {
151-
kfree(elm);
153+
return ERR_PTR(xa_err(res));
154+
else if (res)
152155
return res;
153-
}
154156

155-
return elm;
157+
return no_free_ptr(elm);
156158
}
157159

158160
static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn,
@@ -345,15 +347,19 @@ static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE);
345347
static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk,
346348
unsigned long order)
347349
{
348-
struct khoser_mem_chunk *chunk;
350+
struct khoser_mem_chunk *chunk __free(kfree) = NULL;
349351

350352
chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
351353
if (!chunk)
352-
return NULL;
354+
return ERR_PTR(-ENOMEM);
355+
356+
if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE)))
357+
return ERR_PTR(-EINVAL);
358+
353359
chunk->hdr.order = order;
354360
if (cur_chunk)
355361
KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk);
356-
return chunk;
362+
return no_free_ptr(chunk);
357363
}
358364

359365
static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk)
@@ -374,14 +380,17 @@ static int kho_mem_serialize(struct kho_serialization *ser)
374380
struct khoser_mem_chunk *chunk = NULL;
375381
struct kho_mem_phys *physxa;
376382
unsigned long order;
383+
int err = -ENOMEM;
377384

378385
xa_for_each(&ser->track.orders, order, physxa) {
379386
struct kho_mem_phys_bits *bits;
380387
unsigned long phys;
381388

382389
chunk = new_chunk(chunk, order);
383-
if (!chunk)
390+
if (IS_ERR(chunk)) {
391+
err = PTR_ERR(chunk);
384392
goto err_free;
393+
}
385394

386395
if (!first_chunk)
387396
first_chunk = chunk;
@@ -391,8 +400,10 @@ static int kho_mem_serialize(struct kho_serialization *ser)
391400

392401
if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) {
393402
chunk = new_chunk(chunk, order);
394-
if (!chunk)
403+
if (IS_ERR(chunk)) {
404+
err = PTR_ERR(chunk);
395405
goto err_free;
406+
}
396407
}
397408

398409
elm = &chunk->bitmaps[chunk->hdr.num_elms];
@@ -409,7 +420,7 @@ static int kho_mem_serialize(struct kho_serialization *ser)
409420

410421
err_free:
411422
kho_mem_ser_free(first_chunk);
412-
return -ENOMEM;
423+
return err;
413424
}
414425

415426
static void __init deserialize_bitmap(unsigned int order,
@@ -465,8 +476,8 @@ static void __init kho_mem_deserialize(const void *fdt)
465476
* area for early allocations that happen before page allocator is
466477
* initialized.
467478
*/
468-
static struct kho_scratch *kho_scratch;
469-
static unsigned int kho_scratch_cnt;
479+
struct kho_scratch *kho_scratch;
480+
unsigned int kho_scratch_cnt;
470481

471482
/*
472483
* The scratch areas are scaled by default as percent of memory allocated from
@@ -752,6 +763,9 @@ int kho_preserve_folio(struct folio *folio)
752763
const unsigned int order = folio_order(folio);
753764
struct kho_mem_track *track = &kho_out.ser.track;
754765

766+
if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order)))
767+
return -EINVAL;
768+
755769
return __kho_preserve_order(track, pfn, order);
756770
}
757771
EXPORT_SYMBOL_GPL(kho_preserve_folio);
@@ -775,6 +789,11 @@ int kho_preserve_pages(struct page *page, unsigned int nr_pages)
775789
unsigned long failed_pfn = 0;
776790
int err = 0;
777791

792+
if (WARN_ON(kho_scratch_overlap(start_pfn << PAGE_SHIFT,
793+
nr_pages << PAGE_SHIFT))) {
794+
return -EINVAL;
795+
}
796+
778797
while (pfn < end_pfn) {
779798
const unsigned int order =
780799
min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn));

kernel/kexec_handover_debug.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* kexec_handover_debug.c - kexec handover optional debug functionality
4+
* Copyright (C) 2025 Google LLC, Pasha Tatashin <pasha.tatashin@soleen.com>
5+
*/
6+
7+
#define pr_fmt(fmt) "KHO: " fmt
8+
9+
#include "kexec_handover_internal.h"
10+
11+
bool kho_scratch_overlap(phys_addr_t phys, size_t size)
12+
{
13+
phys_addr_t scratch_start, scratch_end;
14+
unsigned int i;
15+
16+
for (i = 0; i < kho_scratch_cnt; i++) {
17+
scratch_start = kho_scratch[i].addr;
18+
scratch_end = kho_scratch[i].addr + kho_scratch[i].size;
19+
20+
if (phys < scratch_end && (phys + size) > scratch_start)
21+
return true;
22+
}
23+
24+
return false;
25+
}

kernel/kexec_handover_internal.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H
3+
#define LINUX_KEXEC_HANDOVER_INTERNAL_H
4+
5+
#include <linux/kexec_handover.h>
6+
#include <linux/types.h>
7+
8+
extern struct kho_scratch *kho_scratch;
9+
extern unsigned int kho_scratch_cnt;
10+
11+
#ifdef CONFIG_KEXEC_HANDOVER_DEBUG
12+
bool kho_scratch_overlap(phys_addr_t phys, size_t size);
13+
#else
14+
static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size)
15+
{
16+
return false;
17+
}
18+
#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */
19+
20+
#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */

0 commit comments

Comments
 (0)