Skip to content

Commit 7c7ec32

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more kvm fixes from Paolo Bonzini: "Five small fixes. The nested migration bug will be fixed with a better API in 5.10 or 5.11, for now this is a fix that works with existing userspace but keeps the current ugly API" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: SVM: Add a dedicated INVD intercept routine KVM: x86: Reset MMU context if guest toggles CR4.SMAP or CR4.PKE KVM: x86: fix MSR_IA32_TSC read for nested migration selftests: kvm: Fix assert failure in single-step test KVM: x86: VMX: Make smaller physical guest address space support user-configurable
2 parents b463b6f + 4bb05f3 commit 7c7ec32

5 files changed

Lines changed: 40 additions & 12 deletions

File tree

arch/x86/kvm/svm/svm.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2183,6 +2183,12 @@ static int iret_interception(struct vcpu_svm *svm)
21832183
return 1;
21842184
}
21852185

2186+
static int invd_interception(struct vcpu_svm *svm)
2187+
{
2188+
/* Treat an INVD instruction as a NOP and just skip it. */
2189+
return kvm_skip_emulated_instruction(&svm->vcpu);
2190+
}
2191+
21862192
static int invlpg_interception(struct vcpu_svm *svm)
21872193
{
21882194
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
@@ -2774,7 +2780,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
27742780
[SVM_EXIT_RDPMC] = rdpmc_interception,
27752781
[SVM_EXIT_CPUID] = cpuid_interception,
27762782
[SVM_EXIT_IRET] = iret_interception,
2777-
[SVM_EXIT_INVD] = emulate_on_interception,
2783+
[SVM_EXIT_INVD] = invd_interception,
27782784
[SVM_EXIT_PAUSE] = pause_interception,
27792785
[SVM_EXIT_HLT] = halt_interception,
27802786
[SVM_EXIT_INVLPG] = invlpg_interception,

arch/x86/kvm/vmx/vmx.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ static bool __read_mostly enable_preemption_timer = 1;
129129
module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
130130
#endif
131131

132+
extern bool __read_mostly allow_smaller_maxphyaddr;
133+
module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
134+
132135
#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
133136
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
134137
#define KVM_VM_CR0_ALWAYS_ON \
@@ -4803,6 +4806,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
48034806
* EPT will cause page fault only if we need to
48044807
* detect illegal GPAs.
48054808
*/
4809+
WARN_ON_ONCE(!allow_smaller_maxphyaddr);
48064810
kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code);
48074811
return 1;
48084812
} else
@@ -5331,7 +5335,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
53315335
* would also use advanced VM-exit information for EPT violations to
53325336
* reconstruct the page fault error code.
53335337
*/
5334-
if (unlikely(kvm_mmu_is_illegal_gpa(vcpu, gpa)))
5338+
if (unlikely(allow_smaller_maxphyaddr && kvm_mmu_is_illegal_gpa(vcpu, gpa)))
53355339
return kvm_emulate_instruction(vcpu, 0);
53365340

53375341
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
@@ -8305,11 +8309,12 @@ static int __init vmx_init(void)
83058309
vmx_check_vmcs12_offsets();
83068310

83078311
/*
8308-
* Intel processors don't have problems with
8309-
* GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable
8310-
* it for VMX by default
8312+
* Shadow paging doesn't have a (further) performance penalty
8313+
* from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it
8314+
* by default
83118315
*/
8312-
allow_smaller_maxphyaddr = true;
8316+
if (!enable_ept)
8317+
allow_smaller_maxphyaddr = true;
83138318

83148319
return 0;
83158320
}

arch/x86/kvm/vmx/vmx.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -552,7 +552,10 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
552552

553553
static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
554554
{
555-
return !enable_ept || cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
555+
if (!enable_ept)
556+
return true;
557+
558+
return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
556559
}
557560

558561
void dump_vmcs(void);

arch/x86/kvm/x86.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ static struct kvm_shared_msrs __percpu *shared_msrs;
188188
u64 __read_mostly host_efer;
189189
EXPORT_SYMBOL_GPL(host_efer);
190190

191-
bool __read_mostly allow_smaller_maxphyaddr;
191+
bool __read_mostly allow_smaller_maxphyaddr = 0;
192192
EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
193193

194194
static u64 __read_mostly host_xss;
@@ -976,6 +976,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
976976
unsigned long old_cr4 = kvm_read_cr4(vcpu);
977977
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
978978
X86_CR4_SMEP;
979+
unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE;
979980

980981
if (kvm_valid_cr4(vcpu, cr4))
981982
return 1;
@@ -1003,7 +1004,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
10031004
if (kvm_x86_ops.set_cr4(vcpu, cr4))
10041005
return 1;
10051006

1006-
if (((cr4 ^ old_cr4) & pdptr_bits) ||
1007+
if (((cr4 ^ old_cr4) & mmu_role_bits) ||
10071008
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
10081009
kvm_mmu_reset_context(vcpu);
10091010

@@ -3221,9 +3222,22 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
32213222
case MSR_IA32_POWER_CTL:
32223223
msr_info->data = vcpu->arch.msr_ia32_power_ctl;
32233224
break;
3224-
case MSR_IA32_TSC:
3225-
msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
3225+
case MSR_IA32_TSC: {
3226+
/*
3227+
* Intel SDM states that MSR_IA32_TSC read adds the TSC offset
3228+
* even when not intercepted. AMD manual doesn't explicitly
3229+
* state this but appears to behave the same.
3230+
*
3231+
* On userspace reads and writes, however, we unconditionally
3232+
* operate L1's TSC value to ensure backwards-compatible
3233+
* behavior for migration.
3234+
*/
3235+
u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
3236+
vcpu->arch.tsc_offset;
3237+
3238+
msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset;
32263239
break;
3240+
}
32273241
case MSR_MTRRcap:
32283242
case 0x200 ... 0x2ff:
32293243
return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);

tools/testing/selftests/kvm/x86_64/debug_regs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ int main(void)
7373
int i;
7474
/* Instruction lengths starting at ss_start */
7575
int ss_size[4] = {
76-
3, /* xor */
76+
2, /* xor */
7777
2, /* cpuid */
7878
5, /* mov */
7979
2, /* rdmsr */

0 commit comments

Comments
 (0)