Skip to content

Commit 258d985

Browse files
sean-jcbonzini
authored andcommitted
KVM: x86/mmu: Use EMULTYPE flag to track write #PFs to shadow pages
Use a new EMULTYPE flag, EMULTYPE_WRITE_PF_TO_SP, to track page faults on self-changing writes to shadowed page tables instead of propagating that information to the emulator via a semi-persistent vCPU flag. Using a flag in "struct kvm_vcpu_arch" is confusing, especially as implemented, as it's not at all obvious that clearing the flag only when emulation actually occurs is correct. E.g. if KVM sets the flag and then retries the fault without ever getting to the emulator, the flag will be left set for future calls into the emulator. But because the flag is consumed if and only if both EMULTYPE_PF and EMULTYPE_ALLOW_RETRY_PF are set, and because EMULTYPE_ALLOW_RETRY_PF is deliberately not set for direct MMUs, emulated MMIO, or while L2 is active, KVM avoids false positives on a stale flag since FNAME(page_fault) is guaranteed to be run and refresh the flag before it's ultimately consumed by the tail end of reexecute_instruction(). Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20230202182817.407394-2-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent f3e7074 commit 258d985

5 files changed

Lines changed: 37 additions & 36 deletions

File tree

arch/x86/include/asm/kvm_host.h

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -947,23 +947,6 @@ struct kvm_vcpu_arch {
947947

948948
u64 msr_kvm_poll_control;
949949

950-
/*
951-
* Indicates the guest is trying to write a gfn that contains one or
952-
* more of the PTEs used to translate the write itself, i.e. the access
953-
* is changing its own translation in the guest page tables. KVM exits
954-
* to userspace if emulation of the faulting instruction fails and this
955-
* flag is set, as KVM cannot make forward progress.
956-
*
957-
* If emulation fails for a write to guest page tables, KVM unprotects
958-
* (zaps) the shadow page for the target gfn and resumes the guest to
959-
* retry the non-emulatable instruction (on hardware). Unprotecting the
960-
* gfn doesn't allow forward progress for a self-changing access because
961-
* doing so also zaps the translation for the gfn, i.e. retrying the
962-
* instruction will hit a !PRESENT fault, which results in a new shadow
963-
* page and sends KVM back to square one.
964-
*/
965-
bool write_fault_to_shadow_pgtable;
966-
967950
/* set at EPT violation at this point */
968951
unsigned long exit_qualification;
969952

@@ -1907,6 +1890,25 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
19071890
* EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
19081891
* state and inject single-step #DBs after skipping
19091892
* an instruction (after completing userspace I/O).
1893+
*
1894+
* EMULTYPE_WRITE_PF_TO_SP - Set when emulating an intercepted page fault that
1895+
* is attempting to write a gfn that contains one or
1896+
* more of the PTEs used to translate the write itself,
1897+
* and the owning page table is being shadowed by KVM.
1898+
* If emulation of the faulting instruction fails and
1899+
* this flag is set, KVM will exit to userspace instead
1900+
* of retrying emulation as KVM cannot make forward
1901+
* progress.
1902+
*
1903+
* If emulation fails for a write to guest page tables,
1904+
* KVM unprotects (zaps) the shadow page for the target
1905+
* gfn and resumes the guest to retry the non-emulatable
1906+
* instruction (on hardware). Unprotecting the gfn
1907+
* doesn't allow forward progress for a self-changing
1908+
* access because doing so also zaps the translation for
1909+
* the gfn, i.e. retrying the instruction will hit a
1910+
* !PRESENT fault, which results in a new shadow page
1911+
* and sends KVM back to square one.
19101912
*/
19111913
#define EMULTYPE_NO_DECODE (1 << 0)
19121914
#define EMULTYPE_TRAP_UD (1 << 1)
@@ -1916,6 +1918,7 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
19161918
#define EMULTYPE_VMWARE_GP (1 << 5)
19171919
#define EMULTYPE_PF (1 << 6)
19181920
#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
1921+
#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
19191922

19201923
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
19211924
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,

arch/x86/kvm/mmu/mmu.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4203,7 +4203,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
42034203
work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
42044204
return;
42054205

4206-
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
4206+
kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
42074207
}
42084208

42094209
static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
@@ -5664,7 +5664,8 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
56645664

56655665
if (r == RET_PF_INVALID) {
56665666
r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
5667-
lower_32_bits(error_code), false);
5667+
lower_32_bits(error_code), false,
5668+
&emulation_type);
56685669
if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
56695670
return -EIO;
56705671
}

arch/x86/kvm/mmu/mmu_internal.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,13 @@ struct kvm_page_fault {
240240
kvm_pfn_t pfn;
241241
hva_t hva;
242242
bool map_writable;
243+
244+
/*
245+
* Indicates the guest is trying to write a gfn that contains one or
246+
* more of the PTEs used to translate the write itself, i.e. the access
247+
* is changing its own translation in the guest page tables.
248+
*/
249+
bool write_fault_to_shadow_pgtable;
243250
};
244251

245252
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
@@ -273,7 +280,7 @@ enum {
273280
};
274281

275282
static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
276-
u32 err, bool prefetch)
283+
u32 err, bool prefetch, int *emulation_type)
277284
{
278285
struct kvm_page_fault fault = {
279286
.addr = cr2_or_gpa,
@@ -312,6 +319,9 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
312319
else
313320
r = vcpu->arch.mmu->page_fault(vcpu, &fault);
314321

322+
if (fault.write_fault_to_shadow_pgtable && emulation_type)
323+
*emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
324+
315325
/*
316326
* Similar to above, prefetch faults aren't truly spurious, and the
317327
* async #PF path doesn't do emulation. Do count faults that are fixed

arch/x86/kvm/mmu/paging_tmpl.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -825,10 +825,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
825825
if (r)
826826
return r;
827827

828-
vcpu->arch.write_fault_to_shadow_pgtable = false;
829-
830828
is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
831-
&walker, fault->user, &vcpu->arch.write_fault_to_shadow_pgtable);
829+
&walker, fault->user, &fault->write_fault_to_shadow_pgtable);
832830

833831
if (is_self_change_mapping)
834832
fault->max_level = PG_LEVEL_4K;

arch/x86/kvm/x86.c

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8463,7 +8463,6 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
84638463
}
84648464

84658465
static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
8466-
bool write_fault_to_shadow_pgtable,
84678466
int emulation_type)
84688467
{
84698468
gpa_t gpa = cr2_or_gpa;
@@ -8534,7 +8533,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
85348533
* be fixed by unprotecting shadow page and it should
85358534
* be reported to userspace.
85368535
*/
8537-
return !write_fault_to_shadow_pgtable;
8536+
return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP);
85388537
}
85398538

85408539
static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
@@ -8782,20 +8781,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
87828781
int r;
87838782
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
87848783
bool writeback = true;
8785-
bool write_fault_to_spt;
87868784

87878785
if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len)))
87888786
return 1;
87898787

87908788
vcpu->arch.l1tf_flush_l1d = true;
87918789

8792-
/*
8793-
* Clear write_fault_to_shadow_pgtable here to ensure it is
8794-
* never reused.
8795-
*/
8796-
write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
8797-
vcpu->arch.write_fault_to_shadow_pgtable = false;
8798-
87998790
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
88008791
kvm_clear_exception_queue(vcpu);
88018792

@@ -8816,7 +8807,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
88168807
return 1;
88178808
}
88188809
if (reexecute_instruction(vcpu, cr2_or_gpa,
8819-
write_fault_to_spt,
88208810
emulation_type))
88218811
return 1;
88228812

@@ -8895,8 +8885,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
88958885
return 1;
88968886

88978887
if (r == EMULATION_FAILED) {
8898-
if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
8899-
emulation_type))
8888+
if (reexecute_instruction(vcpu, cr2_or_gpa, emulation_type))
89008889
return 1;
89018890

89028891
return handle_emulation_failure(vcpu, emulation_type);

0 commit comments

Comments
 (0)