Skip to content

Commit a91cc48

Browse files
committed
KVM: selftests: Test READ=>WRITE dirty logging behavior for shadow MMU
Update the nested dirty log test to validate KVM's handling of READ faults when dirty logging is enabled. Specifically, set the Dirty bit in the guest PTEs used to map L2 GPAs, so that KVM will create writable SPTEs when handling L2 read faults. When handling read faults in the shadow MMU, KVM opportunistically creates a writable SPTE if the mapping can be writable *and* the gPTE is dirty (or doesn't support the Dirty bit), i.e. if KVM doesn't need to intercept writes in order to emulate Dirty-bit updates. To actually test the L2 READ=>WRITE sequence, e.g. without masking a false pass by other test activity, route the READ=>WRITE and WRITE=>WRITE sequences to separate L1 pages, and differentiate between "marked dirty due to a WRITE access/fault" and "marked dirty due to creating a writable SPTE for a READ access/fault". The updated sequence exposes the bug fixed by KVM commit 1f4e5fc ("KVM: x86: fix nested guest live migration with PML") when the guest performs a READ=>WRITE sequence with dirty guest PTEs. Opportunistically tweak and rename the address macros, and add comments, to make it more obvious what the test is doing. E.g. NESTED_TEST_MEM1 vs. GUEST_TEST_MEM doesn't make it all that obvious that the test is creating aliases in both the L2 GPA and GVA address spaces, but only when L1 is using TDP to run L2. Cc: Yosry Ahmed <yosry.ahmed@linux.dev> Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev> Link: https://patch.msgid.link/20260115172154.709024-1-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 55058e3 commit a91cc48

3 files changed

Lines changed: 143 additions & 52 deletions

File tree

tools/testing/selftests/kvm/include/x86/processor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1486,6 +1486,7 @@ bool kvm_cpu_has_tdp(void);
14861486
void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size);
14871487
void tdp_identity_map_default_memslots(struct kvm_vm *vm);
14881488
void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size);
1489+
uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa);
14891490

14901491
/*
14911492
* Basic CPU control in CR0

tools/testing/selftests/kvm/lib/x86/processor.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,13 @@ static uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm,
390390
return virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K);
391391
}
392392

393+
uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa)
394+
{
395+
int level = PG_LEVEL_4K;
396+
397+
return __vm_get_page_table_entry(vm, &vm->stage2_mmu, l2_gpa, &level);
398+
}
399+
393400
uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr)
394401
{
395402
int level = PG_LEVEL_4K;

tools/testing/selftests/kvm/x86/nested_dirty_log_test.c

Lines changed: 135 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -17,43 +17,77 @@
1717

1818
/* The memory slot index to track dirty pages */
1919
#define TEST_MEM_SLOT_INDEX 1
20-
#define TEST_MEM_PAGES 3
2120

22-
/* L1 guest test virtual memory offset */
23-
#define GUEST_TEST_MEM 0xc0000000
21+
/*
22+
* Allocate four pages total. Two pages are used to verify that the KVM marks
23+
* the accessed page/GFN as marked dirty, but not the "other" page. Times two
24+
* so that each "normal" page can be accessed from L2 via an aliased L2 GVA+GPA
25+
* (when TDP is enabled), to verify KVM marks _L1's_ page/GFN as dirty (to
26+
* detect failures, L2 => L1 GPAs can't be identity mapped in the TDP page
27+
* tables, as marking L2's GPA dirty would get a false pass if L1 == L2).
28+
*/
29+
#define TEST_MEM_PAGES 4
30+
31+
#define TEST_MEM_BASE 0xc0000000
32+
#define TEST_MEM_ALIAS_BASE 0xc0002000
33+
34+
#define TEST_GUEST_ADDR(base, idx) ((base) + (idx) * PAGE_SIZE)
2435

25-
/* L2 guest test virtual memory offset */
26-
#define NESTED_TEST_MEM1 0xc0001000
27-
#define NESTED_TEST_MEM2 0xc0002000
36+
#define TEST_GVA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx)
37+
#define TEST_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx)
38+
39+
#define TEST_ALIAS_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_ALIAS_BASE, idx)
40+
41+
#define TEST_HVA(vm, idx) addr_gpa2hva(vm, TEST_GPA(idx))
2842

2943
#define L2_GUEST_STACK_SIZE 64
3044

31-
static void l2_guest_code(u64 *a, u64 *b)
32-
{
33-
READ_ONCE(*a);
34-
WRITE_ONCE(*a, 1);
35-
GUEST_SYNC(true);
36-
GUEST_SYNC(false);
45+
/* Use the page offset bits to communicate the access+fault type. */
46+
#define TEST_SYNC_READ_FAULT BIT(0)
47+
#define TEST_SYNC_WRITE_FAULT BIT(1)
48+
#define TEST_SYNC_NO_FAULT BIT(2)
3749

38-
WRITE_ONCE(*b, 1);
39-
GUEST_SYNC(true);
40-
WRITE_ONCE(*b, 1);
41-
GUEST_SYNC(true);
42-
GUEST_SYNC(false);
50+
static void l2_guest_code(vm_vaddr_t base)
51+
{
52+
vm_vaddr_t page0 = TEST_GUEST_ADDR(base, 0);
53+
vm_vaddr_t page1 = TEST_GUEST_ADDR(base, 1);
54+
55+
READ_ONCE(*(u64 *)page0);
56+
GUEST_SYNC(page0 | TEST_SYNC_READ_FAULT);
57+
WRITE_ONCE(*(u64 *)page0, 1);
58+
GUEST_SYNC(page0 | TEST_SYNC_WRITE_FAULT);
59+
READ_ONCE(*(u64 *)page0);
60+
GUEST_SYNC(page0 | TEST_SYNC_NO_FAULT);
61+
62+
WRITE_ONCE(*(u64 *)page1, 1);
63+
GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
64+
WRITE_ONCE(*(u64 *)page1, 1);
65+
GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
66+
READ_ONCE(*(u64 *)page1);
67+
GUEST_SYNC(page1 | TEST_SYNC_NO_FAULT);
4368

4469
/* Exit to L1 and never come back. */
4570
vmcall();
4671
}
4772

4873
static void l2_guest_code_tdp_enabled(void)
4974
{
50-
l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
75+
/*
76+
* Use the aliased virtual addresses when running with TDP to verify
77+
* that KVM correctly handles the case where a page is dirtied via a
78+
* different GPA than would be used by L1.
79+
*/
80+
l2_guest_code(TEST_MEM_ALIAS_BASE);
5181
}
5282

5383
static void l2_guest_code_tdp_disabled(void)
5484
{
55-
/* Access the same L1 GPAs as l2_guest_code_tdp_enabled() */
56-
l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
85+
/*
86+
* Use the "normal" virtual addresses when running without TDP enabled,
87+
* in which case L2 will use the same page tables as L1, and thus needs
88+
* to use the same virtual addresses that are mapped into L1.
89+
*/
90+
l2_guest_code(TEST_MEM_BASE);
5791
}
5892

5993
void l1_vmx_code(struct vmx_pages *vmx)
@@ -72,9 +106,9 @@ void l1_vmx_code(struct vmx_pages *vmx)
72106

73107
prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
74108

75-
GUEST_SYNC(false);
109+
GUEST_SYNC(TEST_SYNC_NO_FAULT);
76110
GUEST_ASSERT(!vmlaunch());
77-
GUEST_SYNC(false);
111+
GUEST_SYNC(TEST_SYNC_NO_FAULT);
78112
GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
79113
GUEST_DONE();
80114
}
@@ -91,9 +125,9 @@ static void l1_svm_code(struct svm_test_data *svm)
91125

92126
generic_svm_setup(svm, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
93127

94-
GUEST_SYNC(false);
128+
GUEST_SYNC(TEST_SYNC_NO_FAULT);
95129
run_guest(svm->vmcb, svm->vmcb_gpa);
96-
GUEST_SYNC(false);
130+
GUEST_SYNC(TEST_SYNC_NO_FAULT);
97131
GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
98132
GUEST_DONE();
99133
}
@@ -106,12 +140,66 @@ static void l1_guest_code(void *data)
106140
l1_svm_code(data);
107141
}
108142

143+
static void test_handle_ucall_sync(struct kvm_vm *vm, u64 arg,
144+
unsigned long *bmap)
145+
{
146+
vm_vaddr_t gva = arg & ~(PAGE_SIZE - 1);
147+
int page_nr, i;
148+
149+
/*
150+
* Extract the page number of underlying physical page, which is also
151+
* the _L1_ page number. The dirty bitmap _must_ be updated based on
152+
* the L1 GPA, not L2 GPA, i.e. whether or not L2 used an aliased GPA
153+
* (i.e. if TDP enabled for L2) is irrelevant with respect to the dirty
154+
* bitmap and which underlying physical page is accessed.
155+
*
156+
* Note, gva will be '0' if there was no access, i.e. if the purpose of
157+
* the sync is to verify all pages are clean.
158+
*/
159+
if (!gva)
160+
page_nr = 0;
161+
else if (gva >= TEST_MEM_ALIAS_BASE)
162+
page_nr = (gva - TEST_MEM_ALIAS_BASE) >> PAGE_SHIFT;
163+
else
164+
page_nr = (gva - TEST_MEM_BASE) >> PAGE_SHIFT;
165+
TEST_ASSERT(page_nr == 0 || page_nr == 1,
166+
"Test bug, unexpected frame number '%u' for arg = %lx", page_nr, arg);
167+
TEST_ASSERT(gva || (arg & TEST_SYNC_NO_FAULT),
168+
"Test bug, gva must be valid if a fault is expected");
169+
170+
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
171+
172+
/*
173+
* Check all pages to verify the correct physical page was modified (or
174+
* not), and that all pages are clean/dirty as expected.
175+
*
176+
* If a fault of any kind is expected, the target page should be dirty
177+
* as the Dirty bit is set in the gPTE. KVM should create a writable
178+
* SPTE even on a read fault, *and* KVM must mark the GFN as dirty
179+
* when doing so.
180+
*/
181+
for (i = 0; i < TEST_MEM_PAGES; i++) {
182+
if (i == page_nr && (arg & TEST_SYNC_WRITE_FAULT))
183+
TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 1,
184+
"Page %u incorrectly not written by guest", i);
185+
else
186+
TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 0xaaaaaaaaaaaaaaaaULL,
187+
"Page %u incorrectly written by guest", i);
188+
189+
if (i == page_nr && !(arg & TEST_SYNC_NO_FAULT))
190+
TEST_ASSERT(test_bit(i, bmap),
191+
"Page %u incorrectly reported clean on %s fault",
192+
i, arg & TEST_SYNC_READ_FAULT ? "read" : "write");
193+
else
194+
TEST_ASSERT(!test_bit(i, bmap),
195+
"Page %u incorrectly reported dirty", i);
196+
}
197+
}
198+
109199
static void test_dirty_log(bool nested_tdp)
110200
{
111201
vm_vaddr_t nested_gva = 0;
112202
unsigned long *bmap;
113-
uint64_t *host_test_mem;
114-
115203
struct kvm_vcpu *vcpu;
116204
struct kvm_vm *vm;
117205
struct ucall uc;
@@ -133,35 +221,46 @@ static void test_dirty_log(bool nested_tdp)
133221

134222
/* Add an extra memory slot for testing dirty logging */
135223
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
136-
GUEST_TEST_MEM,
224+
TEST_MEM_BASE,
137225
TEST_MEM_SLOT_INDEX,
138226
TEST_MEM_PAGES,
139227
KVM_MEM_LOG_DIRTY_PAGES);
140228

141229
/*
142-
* Add an identity map for GVA range [0xc0000000, 0xc0002000). This
230+
* Add an identity map for GVA range [0xc0000000, 0xc0004000). This
143231
* affects both L1 and L2. However...
144232
*/
145-
virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
233+
virt_map(vm, TEST_MEM_BASE, TEST_MEM_BASE, TEST_MEM_PAGES);
146234

147235
/*
148-
* ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
149-
* 0xc0000000.
236+
* ... pages in the L2 GPA address range [0xc0002000, 0xc0004000) will
237+
* map to [0xc0000000, 0xc0002000) when TDP is enabled (for L2).
150238
*
151239
* When TDP is disabled, the L2 guest code will still access the same L1
152240
* GPAs as the TDP enabled case.
241+
*
242+
* Set the Dirty bit in the PTEs used by L2 so that KVM will create
243+
* writable SPTEs when handling read faults (if the Dirty bit isn't
244+
* set, KVM must intercept the next write to emulate the Dirty bit
245+
* update).
153246
*/
154247
if (nested_tdp) {
155248
tdp_identity_map_default_memslots(vm);
156-
tdp_map(vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE);
157-
tdp_map(vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE);
249+
tdp_map(vm, TEST_ALIAS_GPA(0), TEST_GPA(0), PAGE_SIZE);
250+
tdp_map(vm, TEST_ALIAS_GPA(1), TEST_GPA(1), PAGE_SIZE);
251+
252+
*tdp_get_pte(vm, TEST_ALIAS_GPA(0)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
253+
*tdp_get_pte(vm, TEST_ALIAS_GPA(1)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
254+
} else {
255+
*vm_get_pte(vm, TEST_GVA(0)) |= PTE_DIRTY_MASK(&vm->mmu);
256+
*vm_get_pte(vm, TEST_GVA(1)) |= PTE_DIRTY_MASK(&vm->mmu);
158257
}
159258

160259
bmap = bitmap_zalloc(TEST_MEM_PAGES);
161-
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
162260

163261
while (!done) {
164-
memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
262+
memset(TEST_HVA(vm, 0), 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
263+
165264
vcpu_run(vcpu);
166265
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
167266

@@ -170,23 +269,7 @@ static void test_dirty_log(bool nested_tdp)
170269
REPORT_GUEST_ASSERT(uc);
171270
/* NOT REACHED */
172271
case UCALL_SYNC:
173-
/*
174-
* The nested guest wrote at offset 0x1000 in the memslot, but the
175-
* dirty bitmap must be filled in according to L1 GPA, not L2.
176-
*/
177-
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
178-
if (uc.args[1]) {
179-
TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
180-
TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
181-
} else {
182-
TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
183-
TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
184-
}
185-
186-
TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
187-
TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
188-
TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
189-
TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
272+
test_handle_ucall_sync(vm, uc.args[1], bmap);
190273
break;
191274
case UCALL_DONE:
192275
done = true;

0 commit comments

Comments
 (0)