Skip to content

Commit c63cf13

Browse files
codomaniabonzini
authored andcommitted
KVM: SEV: Add support to handle RMP nested page faults
When SEV-SNP is enabled in the guest, the hardware places restrictions on all memory accesses based on the contents of the RMP table. When hardware encounters RMP check failure caused by the guest memory access it raises the #NPF. The error code contains additional information on the access type. See the APM volume 2 for additional information. When using gmem, RMP faults resulting from mismatches between the state in the RMP table vs. what the guest expects via its page table result in KVM_EXIT_MEMORY_FAULTs being forwarded to userspace to handle. This means the only expected case that needs to be handled in the kernel is when the page size of the entry in the RMP table is larger than the mapping in the nested page table, in which case a PSMASH instruction needs to be issued to split the large RMP entry into individual 4K entries so that subsequent accesses can succeed. Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> Co-developed-by: Michael Roth <michael.roth@amd.com> Signed-off-by: Michael Roth <michael.roth@amd.com> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> Message-ID: <20240501085210.2213060-12-michael.roth@amd.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 9b54e24 commit c63cf13

9 files changed

Lines changed: 159 additions & 6 deletions

File tree

arch/x86/include/asm/kvm_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,6 +1942,7 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
19421942
const struct kvm_memory_slot *memslot);
19431943
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
19441944
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
1945+
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
19451946

19461947
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
19471948

arch/x86/include/asm/sev.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
9191
/* RMUPDATE detected 4K page and 2MB page overlap. */
9292
#define RMPUPDATE_FAIL_OVERLAP 4
9393

94+
/* PSMASH failed due to concurrent access by another CPU */
95+
#define PSMASH_FAIL_INUSE 3
96+
9497
/* RMP page size */
9598
#define RMP_PG_SIZE_4K 0
9699
#define RMP_PG_SIZE_2M 1

arch/x86/kvm/mmu.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,8 +253,6 @@ static inline bool kvm_mmu_honors_guest_mtrrs(struct kvm *kvm)
253253
return __kvm_mmu_honors_guest_mtrrs(kvm_arch_has_noncoherent_dma(kvm));
254254
}
255255

256-
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
257-
258256
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
259257

260258
int kvm_mmu_post_init_vm(struct kvm *kvm);

arch/x86/kvm/mmu/mmu.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6802,6 +6802,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
68026802

68036803
return need_tlb_flush;
68046804
}
6805+
EXPORT_SYMBOL_GPL(kvm_zap_gfn_range);
68056806

68066807
static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
68076808
const struct kvm_memory_slot *slot)

arch/x86/kvm/svm/sev.c

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3465,6 +3465,23 @@ static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
34653465
svm->vmcb->control.ghcb_gpa = value;
34663466
}
34673467

3468+
static int snp_rmptable_psmash(kvm_pfn_t pfn)
3469+
{
3470+
int ret;
3471+
3472+
pfn = pfn & ~(KVM_PAGES_PER_HPAGE(PG_LEVEL_2M) - 1);
3473+
3474+
/*
3475+
* PSMASH_FAIL_INUSE indicates another processor is modifying the
3476+
* entry, so retry until that's no longer the case.
3477+
*/
3478+
do {
3479+
ret = psmash(pfn);
3480+
} while (ret == PSMASH_FAIL_INUSE);
3481+
3482+
return ret;
3483+
}
3484+
34683485
static int snp_complete_psc_msr(struct kvm_vcpu *vcpu)
34693486
{
34703487
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4229,3 +4246,95 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
42294246

42304247
return p;
42314248
}
4249+
4250+
void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code)
4251+
{
4252+
struct kvm_memory_slot *slot;
4253+
struct kvm *kvm = vcpu->kvm;
4254+
int order, rmp_level, ret;
4255+
bool assigned;
4256+
kvm_pfn_t pfn;
4257+
gfn_t gfn;
4258+
4259+
gfn = gpa >> PAGE_SHIFT;
4260+
4261+
/*
4262+
* The only time RMP faults occur for shared pages is when the guest is
4263+
* triggering an RMP fault for an implicit page-state change from
4264+
* shared->private. Implicit page-state changes are forwarded to
4265+
* userspace via KVM_EXIT_MEMORY_FAULT events, however, so RMP faults
4266+
* for shared pages should not end up here.
4267+
*/
4268+
if (!kvm_mem_is_private(kvm, gfn)) {
4269+
pr_warn_ratelimited("SEV: Unexpected RMP fault for non-private GPA 0x%llx\n",
4270+
gpa);
4271+
return;
4272+
}
4273+
4274+
slot = gfn_to_memslot(kvm, gfn);
4275+
if (!kvm_slot_can_be_private(slot)) {
4276+
pr_warn_ratelimited("SEV: Unexpected RMP fault, non-private slot for GPA 0x%llx\n",
4277+
gpa);
4278+
return;
4279+
}
4280+
4281+
ret = kvm_gmem_get_pfn(kvm, slot, gfn, &pfn, &order);
4282+
if (ret) {
4283+
pr_warn_ratelimited("SEV: Unexpected RMP fault, no backing page for private GPA 0x%llx\n",
4284+
gpa);
4285+
return;
4286+
}
4287+
4288+
ret = snp_lookup_rmpentry(pfn, &assigned, &rmp_level);
4289+
if (ret || !assigned) {
4290+
pr_warn_ratelimited("SEV: Unexpected RMP fault, no assigned RMP entry found for GPA 0x%llx PFN 0x%llx error %d\n",
4291+
gpa, pfn, ret);
4292+
goto out_no_trace;
4293+
}
4294+
4295+
/*
4296+
* There are 2 cases where a PSMASH may be needed to resolve an #NPF
4297+
* with PFERR_GUEST_RMP_BIT set:
4298+
*
4299+
* 1) RMPADJUST/PVALIDATE can trigger an #NPF with PFERR_GUEST_SIZEM
4300+
* bit set if the guest issues them with a smaller granularity than
4301+
* what is indicated by the page-size bit in the 2MB RMP entry for
4302+
* the PFN that backs the GPA.
4303+
*
4304+
* 2) Guest access via NPT can trigger an #NPF if the NPT mapping is
4305+
* smaller than what is indicated by the 2MB RMP entry for the PFN
4306+
* that backs the GPA.
4307+
*
4308+
* In both these cases, the corresponding 2M RMP entry needs to
4309+
* be PSMASH'd to 512 4K RMP entries. If the RMP entry is already
4310+
* split into 4K RMP entries, then this is likely a spurious case which
4311+
* can occur when there are concurrent accesses by the guest to a 2MB
4312+
* GPA range that is backed by a 2MB-aligned PFN who's RMP entry is in
4313+
* the process of being PMASH'd into 4K entries. These cases should
4314+
* resolve automatically on subsequent accesses, so just ignore them
4315+
* here.
4316+
*/
4317+
if (rmp_level == PG_LEVEL_4K)
4318+
goto out;
4319+
4320+
ret = snp_rmptable_psmash(pfn);
4321+
if (ret) {
4322+
/*
4323+
* Look it up again. If it's 4K now then the PSMASH may have
4324+
* raced with another process and the issue has already resolved
4325+
* itself.
4326+
*/
4327+
if (!snp_lookup_rmpentry(pfn, &assigned, &rmp_level) &&
4328+
assigned && rmp_level == PG_LEVEL_4K)
4329+
goto out;
4330+
4331+
pr_warn_ratelimited("SEV: Unable to split RMP entry for GPA 0x%llx PFN 0x%llx ret %d\n",
4332+
gpa, pfn, ret);
4333+
}
4334+
4335+
kvm_zap_gfn_range(kvm, gfn, gfn + PTRS_PER_PMD);
4336+
out:
4337+
trace_kvm_rmp_fault(vcpu, gpa, pfn, error_code, rmp_level, ret);
4338+
out_no_trace:
4339+
put_page(pfn_to_page(pfn));
4340+
}

arch/x86/kvm/svm/svm.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2044,6 +2044,7 @@ static int pf_interception(struct kvm_vcpu *vcpu)
20442044
static int npf_interception(struct kvm_vcpu *vcpu)
20452045
{
20462046
struct vcpu_svm *svm = to_svm(vcpu);
2047+
int rc;
20472048

20482049
u64 fault_address = svm->vmcb->control.exit_info_2;
20492050
u64 error_code = svm->vmcb->control.exit_info_1;
@@ -2061,10 +2062,15 @@ static int npf_interception(struct kvm_vcpu *vcpu)
20612062
error_code |= PFERR_PRIVATE_ACCESS;
20622063

20632064
trace_kvm_page_fault(vcpu, fault_address, error_code);
2064-
return kvm_mmu_page_fault(vcpu, fault_address, error_code,
2065-
static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2066-
svm->vmcb->control.insn_bytes : NULL,
2067-
svm->vmcb->control.insn_len);
2065+
rc = kvm_mmu_page_fault(vcpu, fault_address, error_code,
2066+
static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2067+
svm->vmcb->control.insn_bytes : NULL,
2068+
svm->vmcb->control.insn_len);
2069+
2070+
if (rc > 0 && error_code & PFERR_GUEST_RMP_MASK)
2071+
sev_handle_rmp_fault(vcpu, fault_address, error_code);
2072+
2073+
return rc;
20682074
}
20692075

20702076
static int db_interception(struct kvm_vcpu *vcpu)

arch/x86/kvm/svm/svm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,7 @@ void sev_hardware_unsetup(void);
728728
int sev_cpu_init(struct svm_cpu_data *sd);
729729
int sev_dev_get_attr(u32 group, u64 attr, u64 *val);
730730
extern unsigned int max_sev_asid;
731+
void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);
731732
#else
732733
static inline struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) {
733734
return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
@@ -741,6 +742,8 @@ static inline void sev_hardware_unsetup(void) {}
741742
static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; }
742743
static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; }
743744
#define max_sev_asid 0
745+
static inline void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) {}
746+
744747
#endif
745748

746749
/* vmenter.S */

arch/x86/kvm/trace.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,6 +1834,37 @@ TRACE_EVENT(kvm_vmgexit_msr_protocol_exit,
18341834
__entry->vcpu_id, __entry->ghcb_gpa, __entry->result)
18351835
);
18361836

1837+
/*
1838+
* Tracepoint for #NPFs due to RMP faults.
1839+
*/
1840+
TRACE_EVENT(kvm_rmp_fault,
1841+
TP_PROTO(struct kvm_vcpu *vcpu, u64 gpa, u64 pfn, u64 error_code,
1842+
int rmp_level, int psmash_ret),
1843+
TP_ARGS(vcpu, gpa, pfn, error_code, rmp_level, psmash_ret),
1844+
1845+
TP_STRUCT__entry(
1846+
__field(unsigned int, vcpu_id)
1847+
__field(u64, gpa)
1848+
__field(u64, pfn)
1849+
__field(u64, error_code)
1850+
__field(int, rmp_level)
1851+
__field(int, psmash_ret)
1852+
),
1853+
1854+
TP_fast_assign(
1855+
__entry->vcpu_id = vcpu->vcpu_id;
1856+
__entry->gpa = gpa;
1857+
__entry->pfn = pfn;
1858+
__entry->error_code = error_code;
1859+
__entry->rmp_level = rmp_level;
1860+
__entry->psmash_ret = psmash_ret;
1861+
),
1862+
1863+
TP_printk("vcpu %u gpa %016llx pfn 0x%llx error_code 0x%llx rmp_level %d psmash_ret %d",
1864+
__entry->vcpu_id, __entry->gpa, __entry->pfn,
1865+
__entry->error_code, __entry->rmp_level, __entry->psmash_ret)
1866+
);
1867+
18371868
#endif /* _TRACE_KVM_H */
18381869

18391870
#undef TRACE_INCLUDE_PATH

arch/x86/kvm/x86.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13997,6 +13997,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
1399713997
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
1399813998
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
1399913999
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit);
14000+
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_rmp_fault);
1400014001

1400114002
static int __init kvm_x86_init(void)
1400214003
{

0 commit comments

Comments
 (0)