Skip to content

Commit 58f21a0

Browse files
committed
KVM: nVMX: Don't update msr_autostore count when saving TSC for vmcs12
Rework nVMX's use of the MSR auto-store list to snapshot TSC to sneak MSR_IA32_TSC into the list _without_ updating KVM's software tracking, and drop the generic functionality so that future usage of the store list for nested specific logic needs to consider the implications of modifying the list. Updating the list only for vmcs02 and only on nested VM-Enter is a disaster waiting to happen, as it means vmcs01 is stale relative to the software tracking, and KVM could unintentionally leave an MSR in the store list in perpetuity while running L1, e.g. if KVM addressed the first issue and updated vmcs01 on nested VM-Exit without removing TSC from the list. Furthermore, mixing KVM's desire to save an MSR with L1's desire to save an MSR result KVM clobbering/ignoring the needs of vmcs01 or vmcs02. E.g. if KVM added MSR_IA32_TSC to the store list for its own purposes, and then _removed_ MSR_IA32_TSC from the list after emulating nested VM-Enter, then KVM would remove MSR_IA32_TSC from the list even though saving TSC on VM-Exit from L2 is still desirable (to provide L1 with an accurate TSC). Similarly, removing an MSR from the list based on vmcs12's settings could drop an MSR that KVM wants to save for its own purposes. In practice, the issues are currently benign, because KVM doesn't use the store list for vmcs01. But that will change with upcoming mediated PMU support. Alternatively, a "full" solution would be to track MSR list entries for vmcs12 separately from KVM's standard lists, but MSR_IA32_TSC is likely the only MSR that KVM would ever want to save on _every_ VM-Exit purely based on vmcs12. I.e. the added complexity isn't remotely justified at this time. Opportunistically escalate from a pr_warn_ratelimited() to a full WARN as KVM reserves eight entries in each MSR list, and as above KVM uses at most one entry. Opportunistically make vmx_find_loadstore_msr_slot() local to vmx.c as using it directly from nested code is unsafe due to the potential for mixing vmcs01 and vmcs02 state (see above). Cc: Jim Mattson <jmattson@google.com> Tested-by: Manali Shukla <manali.shukla@amd.com> Link: https://patch.msgid.link/20251206001720.468579-37-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 462f092 commit 58f21a0

3 files changed

Lines changed: 24 additions & 51 deletions

File tree

arch/x86/kvm/vmx/nested.c

Lines changed: 22 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,16 +1075,12 @@ static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
10751075
* does not include the time taken for emulation of the L2->L1
10761076
* VM-exit in L0, use the more accurate value.
10771077
*/
1078-
if (msr_index == MSR_IA32_TSC) {
1079-
int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore,
1080-
MSR_IA32_TSC);
1078+
if (msr_index == MSR_IA32_TSC && vmx->nested.tsc_autostore_slot >= 0) {
1079+
int slot = vmx->nested.tsc_autostore_slot;
1080+
u64 host_tsc = vmx->msr_autostore.val[slot].value;
10811081

1082-
if (i >= 0) {
1083-
u64 val = vmx->msr_autostore.val[i].value;
1084-
1085-
*data = kvm_read_l1_tsc(vcpu, val);
1086-
return true;
1087-
}
1082+
*data = kvm_read_l1_tsc(vcpu, host_tsc);
1083+
return true;
10881084
}
10891085

10901086
if (kvm_emulate_msr_read(vcpu, msr_index, data)) {
@@ -1163,42 +1159,6 @@ static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
11631159
return false;
11641160
}
11651161

1166-
static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
1167-
u32 msr_index)
1168-
{
1169-
struct vcpu_vmx *vmx = to_vmx(vcpu);
1170-
struct vmx_msrs *autostore = &vmx->msr_autostore;
1171-
bool in_vmcs12_store_list;
1172-
int msr_autostore_slot;
1173-
bool in_autostore_list;
1174-
int last;
1175-
1176-
msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index);
1177-
in_autostore_list = msr_autostore_slot >= 0;
1178-
in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
1179-
1180-
if (in_vmcs12_store_list && !in_autostore_list) {
1181-
if (autostore->nr == MAX_NR_LOADSTORE_MSRS) {
1182-
/*
1183-
* Emulated VMEntry does not fail here. Instead a less
1184-
* accurate value will be returned by
1185-
* nested_vmx_get_vmexit_msr_value() by reading KVM's
1186-
* internal MSR state instead of reading the value from
1187-
* the vmcs02 VMExit MSR-store area.
1188-
*/
1189-
pr_warn_ratelimited(
1190-
"Not enough msr entries in msr_autostore. Can't add msr %x\n",
1191-
msr_index);
1192-
return;
1193-
}
1194-
last = autostore->nr++;
1195-
autostore->val[last].index = msr_index;
1196-
} else if (!in_vmcs12_store_list && in_autostore_list) {
1197-
last = --autostore->nr;
1198-
autostore->val[msr_autostore_slot] = autostore->val[last];
1199-
}
1200-
}
1201-
12021162
/*
12031163
* Load guest's/host's cr3 at nested entry/exit. @nested_ept is true if we are
12041164
* emulating VM-Entry into a guest with EPT enabled. On failure, the expected
@@ -2699,12 +2659,25 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
26992659
}
27002660

27012661
/*
2702-
* Make sure the msr_autostore list is up to date before we set the
2703-
* count in the vmcs02.
2662+
* If vmcs12 is configured to save TSC on exit via the auto-store list,
2663+
* append the MSR to vmcs02's auto-store list so that KVM effectively
2664+
* reads TSC at the time of VM-Exit from L2. The saved value will be
2665+
* propagated to vmcs12's list on nested VM-Exit.
2666+
*
2667+
* Don't increment the number of MSRs in the vCPU structure, as saving
2668+
* TSC is specific to this particular incarnation of vmcb02, i.e. must
2669+
* not bleed into vmcs01.
27042670
*/
2705-
prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
2671+
if (nested_msr_store_list_has_msr(&vmx->vcpu, MSR_IA32_TSC) &&
2672+
!WARN_ON_ONCE(vmx->msr_autostore.nr >= ARRAY_SIZE(vmx->msr_autostore.val))) {
2673+
vmx->nested.tsc_autostore_slot = vmx->msr_autostore.nr;
2674+
vmx->msr_autostore.val[vmx->msr_autostore.nr].index = MSR_IA32_TSC;
27062675

2707-
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.nr);
2676+
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.nr + 1);
2677+
} else {
2678+
vmx->nested.tsc_autostore_slot = -1;
2679+
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.nr);
2680+
}
27082681
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
27092682
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
27102683

arch/x86/kvm/vmx/vmx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1029,7 +1029,7 @@ static __always_inline void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx
10291029
vm_exit_controls_clearbit(vmx, exit);
10301030
}
10311031

1032-
int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr)
1032+
static int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr)
10331033
{
10341034
unsigned int i;
10351035

arch/x86/kvm/vmx/vmx.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ struct nested_vmx {
191191
u16 vpid02;
192192
u16 last_vpid;
193193

194+
int tsc_autostore_slot;
194195
struct nested_vmx_msrs msrs;
195196

196197
/* SMM related state */
@@ -383,7 +384,6 @@ void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
383384
unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
384385
bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
385386
unsigned int flags);
386-
int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
387387
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
388388

389389
void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set);

0 commit comments

Comments
 (0)