Skip to content

Commit 508d1cc

Browse files
yosrym93sean-jc
authored andcommitted
KVM: selftests: Reuse virt mapping functions for nested EPTs
Rework tdp_map() and friends to use __virt_pg_map() and drop the custom EPT code in __tdp_pg_map() and tdp_create_pte(). The EPT code and __virt_pg_map() are practically identical, the main differences are: - EPT uses the EPT struct overlay instead of the PTE masks. - EPT always assumes 4-level EPTs. To reuse __virt_pg_map(), extend the PTE masks to work with EPT's RWX and X-only capabilities, and provide a tdp_mmu_init() API so that EPT can pass in the EPT PTE masks along with the root page level (which is currently hardcoded to '4'). Don't reuse KVM's insane overloading of the USER bit for EPT_R as there's no reason to multiplex bits in the selftests, e.g. selftests aren't trying to shadow guest PTEs and thus don't care about funnelling protections into a common permissions check. Another benefit of reusing the code is having separate handling for upper-level PTEs vs 4K PTEs, which avoids some quirks like setting the large bit on a 4K PTE in the EPTs. For all intents and purposes, no functional change intended. Suggested-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev> Co-developed-by: Sean Christopherson <seanjc@google.com> Link: https://patch.msgid.link/20251230230150.4150236-14-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 8296b16 commit 508d1cc

4 files changed

Lines changed: 52 additions & 108 deletions

File tree

tools/testing/selftests/kvm/include/x86/kvm_util_arch.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ struct pte_masks {
1414
uint64_t present;
1515
uint64_t writable;
1616
uint64_t user;
17+
uint64_t readable;
18+
uint64_t executable;
1719
uint64_t accessed;
1820
uint64_t dirty;
1921
uint64_t huge;
@@ -37,8 +39,6 @@ struct kvm_vm_arch {
3739
uint64_t s_bit;
3840
int sev_fd;
3941
bool is_pt_protected;
40-
41-
struct kvm_mmu *tdp_mmu;
4242
};
4343

4444
static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)

tools/testing/selftests/kvm/include/x86/processor.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,20 +1444,32 @@ enum pg_level {
14441444
#define PTE_PRESENT_MASK(mmu) ((mmu)->arch.pte_masks.present)
14451445
#define PTE_WRITABLE_MASK(mmu) ((mmu)->arch.pte_masks.writable)
14461446
#define PTE_USER_MASK(mmu) ((mmu)->arch.pte_masks.user)
1447+
#define PTE_READABLE_MASK(mmu) ((mmu)->arch.pte_masks.readable)
1448+
#define PTE_EXECUTABLE_MASK(mmu) ((mmu)->arch.pte_masks.executable)
14471449
#define PTE_ACCESSED_MASK(mmu) ((mmu)->arch.pte_masks.accessed)
14481450
#define PTE_DIRTY_MASK(mmu) ((mmu)->arch.pte_masks.dirty)
14491451
#define PTE_HUGE_MASK(mmu) ((mmu)->arch.pte_masks.huge)
14501452
#define PTE_NX_MASK(mmu) ((mmu)->arch.pte_masks.nx)
14511453
#define PTE_C_BIT_MASK(mmu) ((mmu)->arch.pte_masks.c)
14521454
#define PTE_S_BIT_MASK(mmu) ((mmu)->arch.pte_masks.s)
14531455

1454-
#define is_present_pte(mmu, pte) (!!(*(pte) & PTE_PRESENT_MASK(mmu)))
1456+
/*
1457+
* For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present
1458+
* if it's executable or readable, as EPT supports execute-only PTEs, but not
1459+
* write-only PTEs.
1460+
*/
1461+
#define is_present_pte(mmu, pte) \
1462+
(PTE_PRESENT_MASK(mmu) ? \
1463+
!!(*(pte) & PTE_PRESENT_MASK(mmu)) : \
1464+
!!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))
1465+
#define is_executable_pte(mmu, pte) \
1466+
((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu))
14551467
#define is_writable_pte(mmu, pte) (!!(*(pte) & PTE_WRITABLE_MASK(mmu)))
14561468
#define is_user_pte(mmu, pte) (!!(*(pte) & PTE_USER_MASK(mmu)))
14571469
#define is_accessed_pte(mmu, pte) (!!(*(pte) & PTE_ACCESSED_MASK(mmu)))
14581470
#define is_dirty_pte(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
14591471
#define is_huge_pte(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
1460-
#define is_nx_pte(mmu, pte) (!!(*(pte) & PTE_NX_MASK(mmu)))
1472+
#define is_nx_pte(mmu, pte) (!is_executable_pte(mmu, pte))
14611473

14621474
void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
14631475
struct pte_masks *pte_masks);

tools/testing/selftests/kvm/lib/x86/processor.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,10 @@ static void virt_mmu_init(struct kvm_vm *vm, struct kvm_mmu *mmu,
165165
mmu->pgd_created = true;
166166
mmu->arch.pte_masks = *pte_masks;
167167
}
168+
169+
TEST_ASSERT(mmu->pgtable_levels == 4 || mmu->pgtable_levels == 5,
170+
"Selftests MMU only supports 4-level and 5-level paging, not %u-level paging",
171+
mmu->pgtable_levels);
168172
}
169173

170174
void virt_arch_pgd_alloc(struct kvm_vm *vm)
@@ -180,6 +184,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
180184
.dirty = BIT_ULL(6),
181185
.huge = BIT_ULL(7),
182186
.nx = BIT_ULL(63),
187+
.executable = 0,
183188
.c = vm->arch.c_bit,
184189
.s = vm->arch.s_bit,
185190
};
@@ -190,10 +195,10 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
190195
void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
191196
struct pte_masks *pte_masks)
192197
{
193-
TEST_ASSERT(!vm->arch.tdp_mmu, "TDP MMU already initialized");
198+
TEST_ASSERT(!vm->stage2_mmu.pgtable_levels, "TDP MMU already initialized");
194199

195-
vm->arch.tdp_mmu = calloc(1, sizeof(*vm->arch.tdp_mmu));
196-
virt_mmu_init(vm, vm->arch.tdp_mmu, pte_masks);
200+
vm->stage2_mmu.pgtable_levels = pgtable_levels;
201+
virt_mmu_init(vm, &vm->stage2_mmu, pte_masks);
197202
}
198203

199204
static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu,
@@ -223,7 +228,8 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
223228
paddr = vm_untag_gpa(vm, paddr);
224229

225230
if (!is_present_pte(mmu, pte)) {
226-
*pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu);
231+
*pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
232+
PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu);
227233
if (current_level == target_level)
228234
*pte |= PTE_HUGE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
229235
else
@@ -269,6 +275,9 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
269275
TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
270276
"Unexpected bits in paddr: %lx", paddr);
271277

278+
TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu),
279+
"X and NX bit masks cannot be used simultaneously");
280+
272281
/*
273282
* Allocate upper level page tables, if not already present. Return
274283
* early if a hugepage was created.
@@ -286,7 +295,9 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
286295
pte = virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K);
287296
TEST_ASSERT(!is_present_pte(mmu, pte),
288297
"PTE already present for 4k page at vaddr: 0x%lx", vaddr);
289-
*pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
298+
*pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
299+
PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) |
300+
(paddr & PHYSICAL_PAGE_MASK);
290301

291302
/*
292303
* Neither SEV nor TDX supports shared page tables, so only the final

tools/testing/selftests/kvm/lib/x86/vmx.c

Lines changed: 20 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,6 @@ bool enable_evmcs;
2525
struct hv_enlightened_vmcs *current_evmcs;
2626
struct hv_vp_assist_page *current_vp_assist;
2727

28-
struct eptPageTableEntry {
29-
uint64_t readable:1;
30-
uint64_t writable:1;
31-
uint64_t executable:1;
32-
uint64_t memory_type:3;
33-
uint64_t ignore_pat:1;
34-
uint64_t page_size:1;
35-
uint64_t accessed:1;
36-
uint64_t dirty:1;
37-
uint64_t ignored_11_10:2;
38-
uint64_t address:40;
39-
uint64_t ignored_62_52:11;
40-
uint64_t suppress_ve:1;
41-
};
42-
4328
int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
4429
{
4530
uint16_t evmcs_ver;
@@ -58,13 +43,24 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
5843

5944
void vm_enable_ept(struct kvm_vm *vm)
6045
{
61-
TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
62-
if (vm->arch.tdp_mmu)
63-
return;
46+
struct pte_masks pte_masks;
6447

65-
/* TODO: Drop eptPageTableEntry in favor of PTE masks. */
66-
struct pte_masks pte_masks = (struct pte_masks) {
48+
TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
6749

50+
/*
51+
* EPTs do not have 'present' or 'user' bits, instead bit 0 is the
52+
* 'readable' bit.
53+
*/
54+
pte_masks = (struct pte_masks) {
55+
.present = 0,
56+
.user = 0,
57+
.readable = BIT_ULL(0),
58+
.writable = BIT_ULL(1),
59+
.executable = BIT_ULL(2),
60+
.huge = BIT_ULL(7),
61+
.accessed = BIT_ULL(8),
62+
.dirty = BIT_ULL(9),
63+
.nx = 0,
6864
};
6965

7066
/* TODO: Add support for 5-level EPT. */
@@ -120,8 +116,8 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
120116
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
121117
memset(vmx->vmwrite_hva, 0, getpagesize());
122118

123-
if (vm->arch.tdp_mmu)
124-
vmx->eptp_gpa = vm->arch.tdp_mmu->pgd;
119+
if (vm->stage2_mmu.pgd_created)
120+
vmx->eptp_gpa = vm->stage2_mmu.pgd;
125121

126122
*p_vmx_gva = vmx_gva;
127123
return vmx;
@@ -377,82 +373,6 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
377373
init_vmcs_guest_state(guest_rip, guest_rsp);
378374
}
379375

380-
static void tdp_create_pte(struct kvm_vm *vm,
381-
struct eptPageTableEntry *pte,
382-
uint64_t nested_paddr,
383-
uint64_t paddr,
384-
int current_level,
385-
int target_level)
386-
{
387-
if (!pte->readable) {
388-
pte->writable = true;
389-
pte->readable = true;
390-
pte->executable = true;
391-
pte->page_size = (current_level == target_level);
392-
if (pte->page_size)
393-
pte->address = paddr >> vm->page_shift;
394-
else
395-
pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
396-
} else {
397-
/*
398-
* Entry already present. Assert that the caller doesn't want
399-
* a hugepage at this level, and that there isn't a hugepage at
400-
* this level.
401-
*/
402-
TEST_ASSERT(current_level != target_level,
403-
"Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
404-
current_level, nested_paddr);
405-
TEST_ASSERT(!pte->page_size,
406-
"Cannot create page table at level: %u, nested_paddr: 0x%lx",
407-
current_level, nested_paddr);
408-
}
409-
}
410-
411-
412-
void __tdp_pg_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
413-
int target_level)
414-
{
415-
const uint64_t page_size = PG_LEVEL_SIZE(target_level);
416-
void *eptp_hva = addr_gpa2hva(vm, vm->arch.tdp_mmu->pgd);
417-
struct eptPageTableEntry *pt = eptp_hva, *pte;
418-
uint16_t index;
419-
420-
TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
421-
"Unknown or unsupported guest mode: 0x%x", vm->mode);
422-
423-
TEST_ASSERT((nested_paddr >> 48) == 0,
424-
"Nested physical address 0x%lx is > 48-bits and requires 5-level EPT",
425-
nested_paddr);
426-
TEST_ASSERT((nested_paddr % page_size) == 0,
427-
"Nested physical address not on page boundary,\n"
428-
" nested_paddr: 0x%lx page_size: 0x%lx",
429-
nested_paddr, page_size);
430-
TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
431-
"Physical address beyond beyond maximum supported,\n"
432-
" nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
433-
paddr, vm->max_gfn, vm->page_size);
434-
TEST_ASSERT((paddr % page_size) == 0,
435-
"Physical address not on page boundary,\n"
436-
" paddr: 0x%lx page_size: 0x%lx",
437-
paddr, page_size);
438-
TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
439-
"Physical address beyond beyond maximum supported,\n"
440-
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
441-
paddr, vm->max_gfn, vm->page_size);
442-
443-
for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
444-
index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
445-
pte = &pt[index];
446-
447-
tdp_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
448-
449-
if (pte->page_size)
450-
break;
451-
452-
pt = addr_gpa2hva(vm, pte->address * vm->page_size);
453-
}
454-
}
455-
456376
/*
457377
* Map a range of EPT guest physical addresses to the VM's physical address
458378
*
@@ -473,14 +393,15 @@ void __tdp_pg_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
473393
void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
474394
uint64_t size, int level)
475395
{
396+
struct kvm_mmu *mmu = &vm->stage2_mmu;
476397
size_t page_size = PG_LEVEL_SIZE(level);
477398
size_t npages = size / page_size;
478399

479400
TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
480401
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
481402

482403
while (npages--) {
483-
__tdp_pg_map(vm, nested_paddr, paddr, level);
404+
__virt_pg_map(vm, mmu, nested_paddr, paddr, level);
484405
nested_paddr += page_size;
485406
paddr += page_size;
486407
}

0 commit comments

Comments
 (0)