Skip to content

Commit a1c288f

Browse files
committed
Merge tag 'kvm-x86-misc-6.4' of https://github.com/kvm-x86/linux into HEAD
KVM x86 changes for 6.4: - Optimize CR0.WP toggling by avoiding an MMU reload when TDP is enabled, and by giving the guest control of CR0.WP when EPT is enabled on VMX (VMX-only because SVM doesn't support per-bit controls) - Add CR0/CR4 helpers to query single bits, and clean up related code where KVM was interpreting kvm_read_cr4_bits()'s "unsigned long" return as a bool - Move AMD_PSFD to cpufeatures.h and purge KVM's definition - Misc cleanups
2 parents e1a6d5c + cf9f4c0 commit a1c288f

13 files changed

Lines changed: 139 additions & 52 deletions

File tree

arch/x86/include/asm/cpufeatures.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@
337337
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
338338
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
339339
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
340+
#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */
340341
#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
341342
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
342343

arch/x86/kvm/cpuid.c

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,6 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
6060
return ret;
6161
}
6262

63-
/*
64-
* This one is tied to SSB in the user API, and not
65-
* visible in /proc/cpuinfo.
66-
*/
67-
#define KVM_X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */
68-
6963
#define F feature_bit
7064

7165
/* Scattered Flag - For features that are scattered by cpufeatures.h. */
@@ -266,7 +260,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
266260
/* Update OSXSAVE bit */
267261
if (boot_cpu_has(X86_FEATURE_XSAVE))
268262
cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
269-
kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
263+
kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
270264

271265
cpuid_entry_change(best, X86_FEATURE_APIC,
272266
vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
@@ -275,7 +269,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
275269
best = cpuid_entry2_find(entries, nent, 7, 0);
276270
if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
277271
cpuid_entry_change(best, X86_FEATURE_OSPKE,
278-
kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
272+
kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
279273

280274
best = cpuid_entry2_find(entries, nent, 0xD, 0);
281275
if (best)
@@ -715,7 +709,7 @@ void kvm_set_cpu_caps(void)
715709
F(CLZERO) | F(XSAVEERPTR) |
716710
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
717711
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
718-
__feature_bit(KVM_X86_FEATURE_AMD_PSFD)
712+
F(AMD_PSFD)
719713
);
720714

721715
/*

arch/x86/kvm/emulate.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1640,6 +1640,14 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
16401640
goto exception;
16411641
break;
16421642
case VCPU_SREG_CS:
1643+
/*
1644+
* KVM uses "none" when loading CS as part of emulating Real
1645+
* Mode exceptions and IRET (handled above). In all other
1646+
* cases, loading CS without a control transfer is a KVM bug.
1647+
*/
1648+
if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
1649+
goto exception;
1650+
16431651
if (!(seg_desc.type & 8))
16441652
goto exception;
16451653

arch/x86/kvm/kvm_cache_regs.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
#include <linux/kvm_host.h>
66

7-
#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
7+
#define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP)
88
#define KVM_POSSIBLE_CR4_GUEST_BITS \
99
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
1010
| X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
@@ -157,6 +157,14 @@ static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
157157
return vcpu->arch.cr0 & mask;
158158
}
159159

160+
static __always_inline bool kvm_is_cr0_bit_set(struct kvm_vcpu *vcpu,
161+
unsigned long cr0_bit)
162+
{
163+
BUILD_BUG_ON(!is_power_of_2(cr0_bit));
164+
165+
return !!kvm_read_cr0_bits(vcpu, cr0_bit);
166+
}
167+
160168
static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
161169
{
162170
return kvm_read_cr0_bits(vcpu, ~0UL);
@@ -171,6 +179,14 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
171179
return vcpu->arch.cr4 & mask;
172180
}
173181

182+
static __always_inline bool kvm_is_cr4_bit_set(struct kvm_vcpu *vcpu,
183+
unsigned long cr4_bit)
184+
{
185+
BUILD_BUG_ON(!is_power_of_2(cr4_bit));
186+
187+
return !!kvm_read_cr4_bits(vcpu, cr4_bit);
188+
}
189+
174190
static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
175191
{
176192
if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))

arch/x86/kvm/mmu.h

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
113113
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
114114
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
115115
u64 fault_address, char *insn, int insn_len);
116+
void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
117+
struct kvm_mmu *mmu);
116118

117119
int kvm_mmu_load(struct kvm_vcpu *vcpu);
118120
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
@@ -132,7 +134,7 @@ static inline unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3)
132134
{
133135
BUILD_BUG_ON((X86_CR3_PCID_MASK & PAGE_MASK) != 0);
134136

135-
return kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)
137+
return kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)
136138
? cr3 & X86_CR3_PCID_MASK
137139
: 0;
138140
}
@@ -153,6 +155,24 @@ static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
153155
vcpu->arch.mmu->root_role.level);
154156
}
155157

158+
static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
159+
struct kvm_mmu *mmu)
160+
{
161+
/*
162+
* When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e.
163+
* @mmu's snapshot of CR0.WP and thus all related paging metadata may
164+
* be stale. Refresh CR0.WP and the metadata on-demand when checking
165+
* for permission faults. Exempt nested MMUs, i.e. MMUs for shadowing
166+
* nEPT and nNPT, as CR0.WP is ignored in both cases. Note, KVM does
167+
* need to refresh nested_mmu, a.k.a. the walker used to translate L2
168+
* GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP.
169+
*/
170+
if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu)
171+
return;
172+
173+
__kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
174+
}
175+
156176
/*
157177
* Check if a given access (described through the I/D, W/R and U/S bits of a
158178
* page fault error code pfec) causes a permission fault with the given PTE
@@ -184,8 +204,12 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
184204
u64 implicit_access = access & PFERR_IMPLICIT_ACCESS;
185205
bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC;
186206
int index = (pfec + (not_smap << PFERR_RSVD_BIT)) >> 1;
187-
bool fault = (mmu->permissions[index] >> pte_access) & 1;
188207
u32 errcode = PFERR_PRESENT_MASK;
208+
bool fault;
209+
210+
kvm_mmu_refresh_passthrough_bits(vcpu, mmu);
211+
212+
fault = (mmu->permissions[index] >> pte_access) & 1;
189213

190214
WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
191215
if (unlikely(mmu->pkru_mask)) {

arch/x86/kvm/mmu/mmu.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5112,6 +5112,21 @@ kvm_calc_cpu_role(struct kvm_vcpu *vcpu, const struct kvm_mmu_role_regs *regs)
51125112
return role;
51135113
}
51145114

5115+
void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu,
5116+
struct kvm_mmu *mmu)
5117+
{
5118+
const bool cr0_wp = kvm_is_cr0_bit_set(vcpu, X86_CR0_WP);
5119+
5120+
BUILD_BUG_ON((KVM_MMU_CR0_ROLE_BITS & KVM_POSSIBLE_CR0_GUEST_BITS) != X86_CR0_WP);
5121+
BUILD_BUG_ON((KVM_MMU_CR4_ROLE_BITS & KVM_POSSIBLE_CR4_GUEST_BITS));
5122+
5123+
if (is_cr0_wp(mmu) == cr0_wp)
5124+
return;
5125+
5126+
mmu->cpu_role.base.cr0_wp = cr0_wp;
5127+
reset_guest_paging_metadata(vcpu, mmu);
5128+
}
5129+
51155130
static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
51165131
{
51175132
/* tdp_root_level is architecture forced level, use it if nonzero */

arch/x86/kvm/pmu.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -540,9 +540,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
540540
if (!pmc)
541541
return 1;
542542

543-
if (!(kvm_read_cr4(vcpu) & X86_CR4_PCE) &&
543+
if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_PCE) &&
544544
(static_call(kvm_x86_get_cpl)(vcpu) != 0) &&
545-
(kvm_read_cr0(vcpu) & X86_CR0_PE))
545+
kvm_is_cr0_bit_set(vcpu, X86_CR0_PE))
546546
return 1;
547547

548548
*data = pmc_read_counter(pmc) & mask;

arch/x86/kvm/svm/svm.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4522,7 +4522,6 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
45224522
void *insn, int insn_len)
45234523
{
45244524
bool smep, smap, is_user;
4525-
unsigned long cr4;
45264525
u64 error_code;
45274526

45284527
/* Emulation is always possible when KVM has access to all guest state. */
@@ -4614,9 +4613,8 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
46144613
if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK))
46154614
goto resume_guest;
46164615

4617-
cr4 = kvm_read_cr4(vcpu);
4618-
smep = cr4 & X86_CR4_SMEP;
4619-
smap = cr4 & X86_CR4_SMAP;
4616+
smep = kvm_is_cr4_bit_set(vcpu, X86_CR4_SMEP);
4617+
smap = kvm_is_cr4_bit_set(vcpu, X86_CR4_SMAP);
46204618
is_user = svm_get_cpl(vcpu) == 3;
46214619
if (smap && (!smep || is_user)) {
46224620
pr_err_ratelimited("SEV Guest triggered AMD Erratum 1096\n");

arch/x86/kvm/vmx/nested.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4481,7 +4481,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
44814481
* CR0_GUEST_HOST_MASK is already set in the original vmcs01
44824482
* (KVM doesn't change it);
44834483
*/
4484-
vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
4484+
vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
44854485
vmx_set_cr0(vcpu, vmcs12->host_cr0);
44864486

44874487
/* Same as above - no reason to call set_cr4_guest_host_mask(). */
@@ -4632,7 +4632,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
46324632
*/
46334633
vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
46344634

4635-
vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
4635+
vcpu->arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
46364636
vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
46374637

46384638
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
@@ -5154,7 +5154,7 @@ static int handle_vmxon(struct kvm_vcpu *vcpu)
51545154
* does force CR0.PE=1, but only to also force VM86 in order to emulate
51555155
* Real Mode, and so there's no need to check CR0.PE manually.
51565156
*/
5157-
if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
5157+
if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_VMXE)) {
51585158
kvm_queue_exception(vcpu, UD_VECTOR);
51595159
return 1;
51605160
}

arch/x86/kvm/vmx/vmx.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4746,7 +4746,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
47464746
/* 22.2.1, 20.8.1 */
47474747
vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
47484748

4749-
vmx->vcpu.arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
4749+
vmx->vcpu.arch.cr0_guest_owned_bits = vmx_l1_guest_owned_cr0_bits();
47504750
vmcs_writel(CR0_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr0_guest_owned_bits);
47514751

47524752
set_cr4_guest_host_mask(vmx);
@@ -5136,7 +5136,7 @@ bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
51365136
if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
51375137
return true;
51385138

5139-
return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
5139+
return vmx_get_cpl(vcpu) == 3 && kvm_is_cr0_bit_set(vcpu, X86_CR0_AM) &&
51405140
(kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
51415141
}
51425142

@@ -5473,7 +5473,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
54735473
break;
54745474
case 3: /* lmsw */
54755475
val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
5476-
trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
5476+
trace_kvm_cr_write(0, (kvm_read_cr0_bits(vcpu, ~0xful) | val));
54775477
kvm_lmsw(vcpu, val);
54785478

54795479
return kvm_skip_emulated_instruction(vcpu);
@@ -7531,7 +7531,7 @@ static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
75317531
if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
75327532
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
75337533

7534-
if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
7534+
if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) {
75357535
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
75367536
cache = MTRR_TYPE_WRBACK;
75377537
else

0 commit comments

Comments
 (0)