Skip to content

Commit ff4ef2f

Browse files
MiaoheLinakpm00
authored andcommitted
selftests/mm: add memory failure anonymous page test
Patch series "selftests/mm: add memory failure selftests", v4. Introduce selftests to validate the functionality of memory failure. These tests help ensure that memory failure handling for anonymous pages, pagecaches pages works correctly, including proper SIGBUS delivery to user processes, page isolation, and recovery paths. Currently madvise syscall is used to inject memory failures. And only anonymous pages and pagecaches are tested. More test scenarios, e.g. hugetlb, shmem, thp, will be added. Also more memory failure injecting methods will be supported, e.g. APEI Error INJection, if required. This patch (of 3): This patch adds a new kselftest to validate memory failure handling for anonymous pages. The test performs the following operations: 1. Allocates anonymous pages using mmap(). 2. Injects memory failure via madvise syscall. 3. Verifies expected error handling behavior. 4. Unpoison memory. This test helps ensure that memory failure handling for anonymous pages works correctly, including proper SIGBUS delivery to user processes, page isolation and recovery paths. Link: https://lkml.kernel.org/r/20260206031639.2707102-1-linmiaohe@huawei.com Link: https://lkml.kernel.org/r/20260206031639.2707102-2-linmiaohe@huawei.com Signed-off-by: Miaohe Lin <linmiaohe@huawei.com> Cc: David Hildenbrand <david@kernel.org> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Mark Brown <broonie@kernel.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Naoya Horiguchi <nao.horiguchi@gmail.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: kernel test robot <lkp@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent a67fe41 commit ff4ef2f

9 files changed

Lines changed: 314 additions & 0 deletions

File tree

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11691,6 +11691,7 @@ F: include/linux/memory-failure.h
1169111691
F: include/trace/events/memory-failure.h
1169211692
F: mm/hwpoison-inject.c
1169311693
F: mm/memory-failure.c
11694+
F: tools/testing/selftests/mm/memory-failure.c
1169411695

1169511696
HYCON HY46XX TOUCHSCREEN SUPPORT
1169611697
M: Giulio Benetti <giulio.benetti@benettiengineering.com>

tools/testing/selftests/mm/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ map_hugetlb
1212
map_populate
1313
thuge-gen
1414
compaction_test
15+
memory-failure
1516
migration
1617
mlock2-tests
1718
mrelease_test

tools/testing/selftests/mm/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ TEST_GEN_FILES += map_populate
7575
ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64))
7676
TEST_GEN_FILES += memfd_secret
7777
endif
78+
TEST_GEN_FILES += memory-failure
7879
TEST_GEN_FILES += migration
7980
TEST_GEN_FILES += mkdirty
8081
TEST_GEN_FILES += mlock-random-test
@@ -154,6 +155,7 @@ TEST_PROGS += ksft_ksm_numa.sh
154155
TEST_PROGS += ksft_madv_guard.sh
155156
TEST_PROGS += ksft_madv_populate.sh
156157
TEST_PROGS += ksft_memfd_secret.sh
158+
TEST_PROGS += ksft_memory_failure.sh
157159
TEST_PROGS += ksft_migration.sh
158160
TEST_PROGS += ksft_mkdirty.sh
159161
TEST_PROGS += ksft_mlock.sh

tools/testing/selftests/mm/config

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,5 @@ CONFIG_ANON_VMA_NAME=y
1111
CONFIG_FTRACE=y
1212
CONFIG_PROFILING=y
1313
CONFIG_UPROBES=y
14+
CONFIG_MEMORY_FAILURE=y
15+
CONFIG_HWPOISON_INJECT=m
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/sh -e
2+
# SPDX-License-Identifier: GPL-2.0
3+
4+
./run_vmtests.sh -t memory-failure
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Memory-failure functional tests.
4+
*
5+
* Author(s): Miaohe Lin <linmiaohe@huawei.com>
6+
*/
7+
8+
#include "../kselftest_harness.h"
9+
10+
#include <sys/mman.h>
11+
#include <linux/mman.h>
12+
#include <linux/string.h>
13+
#include <signal.h>
14+
#include <setjmp.h>
15+
#include <unistd.h>
16+
#include <fcntl.h>
17+
18+
#include "vm_util.h"
19+
20+
enum inject_type {
21+
MADV_HARD,
22+
MADV_SOFT,
23+
};
24+
25+
enum result_type {
26+
MADV_HARD_ANON,
27+
MADV_SOFT_ANON,
28+
};
29+
30+
static jmp_buf signal_jmp_buf;
31+
static siginfo_t siginfo;
32+
const char *pagemap_proc = "/proc/self/pagemap";
33+
const char *kpageflags_proc = "/proc/kpageflags";
34+
35+
FIXTURE(memory_failure)
36+
{
37+
unsigned long page_size;
38+
unsigned long corrupted_size;
39+
unsigned long pfn;
40+
int pagemap_fd;
41+
int kpageflags_fd;
42+
bool triggered;
43+
};
44+
45+
FIXTURE_VARIANT(memory_failure)
46+
{
47+
enum inject_type type;
48+
int (*inject)(FIXTURE_DATA(memory_failure) * self, void *vaddr);
49+
};
50+
51+
static int madv_hard_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr)
52+
{
53+
return madvise(vaddr, self->page_size, MADV_HWPOISON);
54+
}
55+
56+
FIXTURE_VARIANT_ADD(memory_failure, madv_hard)
57+
{
58+
.type = MADV_HARD,
59+
.inject = madv_hard_inject,
60+
};
61+
62+
static int madv_soft_inject(FIXTURE_DATA(memory_failure) * self, void *vaddr)
63+
{
64+
return madvise(vaddr, self->page_size, MADV_SOFT_OFFLINE);
65+
}
66+
67+
FIXTURE_VARIANT_ADD(memory_failure, madv_soft)
68+
{
69+
.type = MADV_SOFT,
70+
.inject = madv_soft_inject,
71+
};
72+
73+
static void sigbus_action(int signo, siginfo_t *si, void *args)
74+
{
75+
memcpy(&siginfo, si, sizeof(siginfo_t));
76+
siglongjmp(signal_jmp_buf, 1);
77+
}
78+
79+
static int setup_sighandler(void)
80+
{
81+
struct sigaction sa = {
82+
.sa_sigaction = sigbus_action,
83+
.sa_flags = SA_SIGINFO,
84+
};
85+
86+
return sigaction(SIGBUS, &sa, NULL);
87+
}
88+
89+
FIXTURE_SETUP(memory_failure)
90+
{
91+
memset(self, 0, sizeof(*self));
92+
93+
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
94+
95+
memset(&siginfo, 0, sizeof(siginfo));
96+
if (setup_sighandler())
97+
SKIP(return, "setup sighandler failed.\n");
98+
99+
self->pagemap_fd = open(pagemap_proc, O_RDONLY);
100+
if (self->pagemap_fd == -1)
101+
SKIP(return, "open %s failed.\n", pagemap_proc);
102+
103+
self->kpageflags_fd = open(kpageflags_proc, O_RDONLY);
104+
if (self->kpageflags_fd == -1)
105+
SKIP(return, "open %s failed.\n", kpageflags_proc);
106+
}
107+
108+
static void teardown_sighandler(void)
109+
{
110+
struct sigaction sa = {
111+
.sa_handler = SIG_DFL,
112+
.sa_flags = SA_SIGINFO,
113+
};
114+
115+
sigaction(SIGBUS, &sa, NULL);
116+
}
117+
118+
FIXTURE_TEARDOWN(memory_failure)
119+
{
120+
close(self->kpageflags_fd);
121+
close(self->pagemap_fd);
122+
teardown_sighandler();
123+
}
124+
125+
static void prepare(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
126+
void *vaddr)
127+
{
128+
self->pfn = pagemap_get_pfn(self->pagemap_fd, vaddr);
129+
ASSERT_NE(self->pfn, -1UL);
130+
131+
ASSERT_EQ(get_hardware_corrupted_size(&self->corrupted_size), 0);
132+
}
133+
134+
static bool check_memory(void *vaddr, unsigned long size)
135+
{
136+
char buf[64];
137+
138+
memset(buf, 0xce, sizeof(buf));
139+
while (size >= sizeof(buf)) {
140+
if (memcmp(vaddr, buf, sizeof(buf)))
141+
return false;
142+
size -= sizeof(buf);
143+
vaddr += sizeof(buf);
144+
}
145+
146+
return true;
147+
}
148+
149+
static void check(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
150+
void *vaddr, enum result_type type, int setjmp)
151+
{
152+
unsigned long size;
153+
uint64_t pfn_flags;
154+
155+
switch (type) {
156+
case MADV_SOFT_ANON:
157+
/* It is not expected to receive a SIGBUS signal. */
158+
ASSERT_EQ(setjmp, 0);
159+
160+
/* The page content should remain unchanged. */
161+
ASSERT_TRUE(check_memory(vaddr, self->page_size));
162+
163+
/* The backing pfn of addr should have changed. */
164+
ASSERT_NE(pagemap_get_pfn(self->pagemap_fd, vaddr), self->pfn);
165+
break;
166+
case MADV_HARD_ANON:
167+
/* The SIGBUS signal should have been received. */
168+
ASSERT_EQ(setjmp, 1);
169+
170+
/* Check if siginfo contains correct SIGBUS context. */
171+
ASSERT_EQ(siginfo.si_signo, SIGBUS);
172+
ASSERT_EQ(siginfo.si_code, BUS_MCEERR_AR);
173+
ASSERT_EQ(1UL << siginfo.si_addr_lsb, self->page_size);
174+
ASSERT_EQ(siginfo.si_addr, vaddr);
175+
176+
/* XXX Check backing pte is hwpoison entry when supported. */
177+
ASSERT_TRUE(pagemap_is_swapped(self->pagemap_fd, vaddr));
178+
break;
179+
default:
180+
SKIP(return, "unexpected inject type %d.\n", type);
181+
}
182+
183+
/* Check if the value of HardwareCorrupted has increased. */
184+
ASSERT_EQ(get_hardware_corrupted_size(&size), 0);
185+
ASSERT_EQ(size, self->corrupted_size + self->page_size / 1024);
186+
187+
/* Check if HWPoison flag is set. */
188+
ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0);
189+
ASSERT_EQ(pfn_flags & KPF_HWPOISON, KPF_HWPOISON);
190+
}
191+
192+
static void cleanup(struct __test_metadata *_metadata, FIXTURE_DATA(memory_failure) * self,
193+
void *vaddr)
194+
{
195+
unsigned long size;
196+
uint64_t pfn_flags;
197+
198+
ASSERT_EQ(unpoison_memory(self->pfn), 0);
199+
200+
/* Check if HWPoison flag is cleared. */
201+
ASSERT_EQ(pageflags_get(self->pfn, self->kpageflags_fd, &pfn_flags), 0);
202+
ASSERT_NE(pfn_flags & KPF_HWPOISON, KPF_HWPOISON);
203+
204+
/* Check if the value of HardwareCorrupted has decreased. */
205+
ASSERT_EQ(get_hardware_corrupted_size(&size), 0);
206+
ASSERT_EQ(size, self->corrupted_size);
207+
}
208+
209+
TEST_F(memory_failure, anon)
210+
{
211+
char *addr;
212+
int ret;
213+
214+
addr = mmap(0, self->page_size, PROT_READ | PROT_WRITE,
215+
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
216+
if (addr == MAP_FAILED)
217+
SKIP(return, "mmap failed, not enough memory.\n");
218+
memset(addr, 0xce, self->page_size);
219+
220+
prepare(_metadata, self, addr);
221+
222+
ret = sigsetjmp(signal_jmp_buf, 1);
223+
if (!self->triggered) {
224+
self->triggered = true;
225+
ASSERT_EQ(variant->inject(self, addr), 0);
226+
FORCE_READ(*addr);
227+
}
228+
229+
if (variant->type == MADV_HARD)
230+
check(_metadata, self, addr, MADV_HARD_ANON, ret);
231+
else
232+
check(_metadata, self, addr, MADV_SOFT_ANON, ret);
233+
234+
cleanup(_metadata, self, addr);
235+
236+
ASSERT_EQ(munmap(addr, self->page_size), 0);
237+
}
238+
239+
TEST_HARNESS_MAIN

tools/testing/selftests/mm/run_vmtests.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ separated by spaces:
9191
test VMA merge cases behave as expected
9292
- rmap
9393
test rmap behaves as expected
94+
- memory-failure
95+
test memory-failure behaves as expected
9496
9597
example: ./run_vmtests.sh -t "hmm mmap ksm"
9698
EOF
@@ -527,6 +529,25 @@ CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned
527529

528530
CATEGORY="rmap" run_test ./rmap
529531

532+
# Try to load hwpoison_inject if not present.
533+
HWPOISON_DIR=/sys/kernel/debug/hwpoison/
534+
if [ ! -d "$HWPOISON_DIR" ]; then
535+
if ! modprobe -q -R hwpoison_inject; then
536+
echo "Module hwpoison_inject not found, skipping..."
537+
else
538+
modprobe hwpoison_inject > /dev/null 2>&1
539+
LOADED_MOD=1
540+
fi
541+
fi
542+
543+
if [ -d "$HWPOISON_DIR" ]; then
544+
CATEGORY="memory-failure" run_test ./memory-failure
545+
fi
546+
547+
if [ -n "${LOADED_MOD}" ]; then
548+
modprobe -r hwpoison_inject > /dev/null 2>&1
549+
fi
550+
530551
if [ "${HAVE_HUGEPAGES}" = 1 ]; then
531552
echo "$orig_nr_hugepgs" > /proc/sys/vm/nr_hugepages
532553
fi

tools/testing/selftests/mm/vm_util.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,3 +723,44 @@ int ksm_stop(void)
723723
close(ksm_fd);
724724
return ret == 1 ? 0 : -errno;
725725
}
726+
727+
int get_hardware_corrupted_size(unsigned long *val)
728+
{
729+
unsigned long size;
730+
char *line = NULL;
731+
size_t linelen = 0;
732+
FILE *f = fopen("/proc/meminfo", "r");
733+
int ret = -1;
734+
735+
if (!f)
736+
return ret;
737+
738+
while (getline(&line, &linelen, f) > 0) {
739+
if (sscanf(line, "HardwareCorrupted: %12lu kB", &size) == 1) {
740+
*val = size;
741+
ret = 0;
742+
break;
743+
}
744+
}
745+
746+
free(line);
747+
fclose(f);
748+
return ret;
749+
}
750+
751+
int unpoison_memory(unsigned long pfn)
752+
{
753+
int unpoison_fd, len;
754+
char buf[32];
755+
ssize_t ret;
756+
757+
unpoison_fd = open("/sys/kernel/debug/hwpoison/unpoison-pfn", O_WRONLY);
758+
if (unpoison_fd < 0)
759+
return -errno;
760+
761+
len = sprintf(buf, "0x%lx\n", pfn);
762+
ret = write(unpoison_fd, buf, len);
763+
close(unpoison_fd);
764+
765+
return ret > 0 ? 0 : -errno;
766+
}

tools/testing/selftests/mm/vm_util.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
#define KPF_COMPOUND_HEAD BIT_ULL(15)
2222
#define KPF_COMPOUND_TAIL BIT_ULL(16)
23+
#define KPF_HWPOISON BIT_ULL(19)
2324
#define KPF_THP BIT_ULL(22)
2425
/*
2526
* Ignore the checkpatch warning, we must read from x but don't want to do
@@ -154,6 +155,8 @@ long ksm_get_full_scans(void);
154155
int ksm_use_zero_pages(void);
155156
int ksm_start(void);
156157
int ksm_stop(void);
158+
int get_hardware_corrupted_size(unsigned long *val);
159+
int unpoison_memory(unsigned long pfn);
157160

158161
/*
159162
* On ppc64 this will only work with radix 2M hugepage size

0 commit comments

Comments
 (0)