Skip to content

Commit d3ba32d

Browse files
Dapeng Misean-jc
authored andcommitted
KVM: x86/pmu: Load/save GLOBAL_CTRL via entry/exit fields for mediated PMU
When running a guest with a mediated PMU, context switch PERF_GLOBAL_CTRL via the dedicated VMCS fields for both host and guest. For the host, always zero GLOBAL_CTRL on exit as the guest's state will still be loaded in hardware (KVM will context switch the bulk of PMU state outside of the inner run loop). For the guest, use the dedicated fields to atomically load and save PERF_GLOBAL_CTRL on all entry/exits. For now, require VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL support (introduced by Sapphire Rapids). KVM can support such CPUs by saving PERF_GLOBAL_CTRL via the MSR save list, a.k.a. the MSR auto-store list, but defer that support as it adds a small amount of complexity and is somewhat unique. To minimize VM-Entry latency, propagate IA32_PERF_GLOBAL_CTRL to the VMCS on-demand. But to minimize complexity, read IA32_PERF_GLOBAL_CTRL out of the VMCS on all non-failing VM-Exits. I.e. partially cache the MSR. KVM could track GLOBAL_CTRL as an EXREG and defer all reads, but writes are rare, i.e. the dirty tracking for an EXREG is unnecessary, and it's not obvious that shaving ~15-20 cycles per exit is meaningful given the total overhead associated with mediated PMU context switches. Suggested-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Co-developed-by: Mingwei Zhang <mizhang@google.com> Signed-off-by: Mingwei Zhang <mizhang@google.com> Tested-by: Xudong Hao <xudong.hao@intel.com> Co-developed-by: Sean Christopherson <seanjc@google.com> Tested-by: Manali Shukla <manali.shukla@amd.com> Link: https://patch.msgid.link/20251206001720.468579-22-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent 8062427 commit d3ba32d

8 files changed

Lines changed: 78 additions & 6 deletions

File tree

arch/x86/include/asm/kvm-x86-pmu-ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,7 @@ KVM_X86_PMU_OP_OPTIONAL(reset)
2323
KVM_X86_PMU_OP_OPTIONAL(deliver_pmi)
2424
KVM_X86_PMU_OP_OPTIONAL(cleanup)
2525

26+
KVM_X86_PMU_OP_OPTIONAL(write_global_ctrl)
27+
2628
#undef KVM_X86_PMU_OP
2729
#undef KVM_X86_PMU_OP_OPTIONAL

arch/x86/include/asm/vmx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
#define VM_EXIT_PT_CONCEAL_PIP 0x01000000
108108
#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000
109109
#define VM_EXIT_LOAD_CET_STATE 0x10000000
110+
#define VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL 0x40000000
110111

111112
#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
112113

arch/x86/kvm/pmu.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
103103
#undef __KVM_X86_PMU_OP
104104
}
105105

106-
void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
106+
void kvm_init_pmu_capability(struct kvm_pmu_ops *pmu_ops)
107107
{
108108
bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
109109
int min_nr_gp_ctrs = pmu_ops->MIN_NR_GP_COUNTERS;
@@ -139,6 +139,9 @@ void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
139139
!pmu_ops->is_mediated_pmu_supported(&kvm_host_pmu))
140140
enable_mediated_pmu = false;
141141

142+
if (!enable_mediated_pmu)
143+
pmu_ops->write_global_ctrl = NULL;
144+
142145
if (!enable_pmu) {
143146
memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap));
144147
return;
@@ -834,6 +837,9 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
834837
diff = pmu->global_ctrl ^ data;
835838
pmu->global_ctrl = data;
836839
reprogram_counters(pmu, diff);
840+
841+
if (kvm_vcpu_has_mediated_pmu(vcpu))
842+
kvm_pmu_call(write_global_ctrl)(data);
837843
}
838844
break;
839845
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
@@ -928,8 +934,11 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
928934
* in the global controls). Emulate that behavior when refreshing the
929935
* PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL.
930936
*/
931-
if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
937+
if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters) {
932938
pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
939+
if (kvm_vcpu_has_mediated_pmu(vcpu))
940+
kvm_pmu_call(write_global_ctrl)(pmu->global_ctrl);
941+
}
933942

934943
bitmap_set(pmu->all_valid_pmc_idx, 0, pmu->nr_arch_gp_counters);
935944
bitmap_set(pmu->all_valid_pmc_idx, KVM_FIXED_PMC_BASE_IDX,

arch/x86/kvm/pmu.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ struct kvm_pmu_ops {
3838
void (*cleanup)(struct kvm_vcpu *vcpu);
3939

4040
bool (*is_mediated_pmu_supported)(struct x86_pmu_capability *host_pmu);
41+
void (*write_global_ctrl)(u64 global_ctrl);
4142

4243
const u64 EVENTSEL_EVENT;
4344
const int MAX_NR_GP_COUNTERS;
@@ -183,7 +184,7 @@ static inline bool pmc_is_locally_enabled(struct kvm_pmc *pmc)
183184

184185
extern struct x86_pmu_capability kvm_pmu_cap;
185186

186-
void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops);
187+
void kvm_init_pmu_capability(struct kvm_pmu_ops *pmu_ops);
187188

188189
void kvm_pmu_recalc_pmc_emulation(struct kvm_pmu *pmu, struct kvm_pmc *pmc);
189190

arch/x86/kvm/vmx/capabilities.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,12 @@ static inline bool cpu_has_load_cet_ctrl(void)
109109
{
110110
return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_CET_STATE);
111111
}
112+
113+
static inline bool cpu_has_save_perf_global_ctrl(void)
114+
{
115+
return vmcs_config.vmexit_ctrl & VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL;
116+
}
117+
112118
static inline bool cpu_has_vmx_mpx(void)
113119
{
114120
return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS;

arch/x86/kvm/vmx/pmu_intel.c

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -778,7 +778,29 @@ static bool intel_pmu_is_mediated_pmu_supported(struct x86_pmu_capability *host_
778778
* Require v4+ for MSR_CORE_PERF_GLOBAL_STATUS_SET, and full-width
779779
* writes so that KVM can precisely load guest counter values.
780780
*/
781-
return host_pmu->version >= 4 && host_perf_cap & PERF_CAP_FW_WRITES;
781+
if (host_pmu->version < 4 || !(host_perf_cap & PERF_CAP_FW_WRITES))
782+
return false;
783+
784+
/*
785+
* All CPUs that support a mediated PMU are expected to support loading
786+
* PERF_GLOBAL_CTRL via dedicated VMCS fields.
787+
*/
788+
if (WARN_ON_ONCE(!cpu_has_load_perf_global_ctrl()))
789+
return false;
790+
791+
/*
792+
* KVM doesn't yet support mediated PMU on CPUs without support for
793+
* saving PERF_GLOBAL_CTRL via a dedicated VMCS field.
794+
*/
795+
if (!cpu_has_save_perf_global_ctrl())
796+
return false;
797+
798+
return true;
799+
}
800+
801+
static void intel_pmu_write_global_ctrl(u64 global_ctrl)
802+
{
803+
vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, global_ctrl);
782804
}
783805

784806
struct kvm_pmu_ops intel_pmu_ops __initdata = {
@@ -794,6 +816,7 @@ struct kvm_pmu_ops intel_pmu_ops __initdata = {
794816
.cleanup = intel_pmu_cleanup,
795817

796818
.is_mediated_pmu_supported = intel_pmu_is_mediated_pmu_supported,
819+
.write_global_ctrl = intel_pmu_write_global_ctrl,
797820

798821
.EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT,
799822
.MAX_NR_GP_COUNTERS = KVM_MAX_NR_INTEL_GP_COUNTERS,

arch/x86/kvm/vmx/vmx.c

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4294,6 +4294,18 @@ static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
42944294
vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET, MSR_TYPE_RW, intercept);
42954295
}
42964296

4297+
if (enable_mediated_pmu) {
4298+
bool is_mediated_pmu = kvm_vcpu_has_mediated_pmu(vcpu);
4299+
struct vcpu_vmx *vmx = to_vmx(vcpu);
4300+
4301+
vm_entry_controls_changebit(vmx,
4302+
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, is_mediated_pmu);
4303+
4304+
vm_exit_controls_changebit(vmx,
4305+
VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
4306+
VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL, is_mediated_pmu);
4307+
}
4308+
42974309
/*
42984310
* x2APIC and LBR MSR intercepts are modified on-demand and cannot be
42994311
* filtered by userspace.
@@ -4476,6 +4488,16 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
44764488
vmcs_writel(HOST_SSP, 0);
44774489
vmcs_writel(HOST_INTR_SSP_TABLE, 0);
44784490
}
4491+
4492+
/*
4493+
* When running a guest with a mediated PMU, guest state is resident in
4494+
* hardware after VM-Exit. Zero PERF_GLOBAL_CTRL on exit so that host
4495+
* activity doesn't bleed into the guest counters. When running with
4496+
* an emulated PMU, PERF_GLOBAL_CTRL is dynamically computed on every
4497+
* entry/exit to merge guest and host PMU usage.
4498+
*/
4499+
if (enable_mediated_pmu)
4500+
vmcs_write64(HOST_IA32_PERF_GLOBAL_CTRL, 0);
44794501
}
44804502

44814503
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
@@ -4543,7 +4565,8 @@ static u32 vmx_get_initial_vmexit_ctrl(void)
45434565
VM_EXIT_CLEAR_IA32_RTIT_CTL);
45444566
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
45454567
return vmexit_ctrl &
4546-
~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
4568+
~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER |
4569+
VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL);
45474570
}
45484571

45494572
void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
@@ -7270,6 +7293,9 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
72707293
struct perf_guest_switch_msr *msrs;
72717294
struct kvm_pmu *pmu = vcpu_to_pmu(&vmx->vcpu);
72727295

7296+
if (kvm_vcpu_has_mediated_pmu(&vmx->vcpu))
7297+
return;
7298+
72737299
pmu->host_cross_mapped_mask = 0;
72747300
if (pmu->pebs_enable & pmu->global_ctrl)
72757301
intel_pmu_cross_mapped_check(pmu);
@@ -7572,6 +7598,9 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
75727598

75737599
vmx->loaded_vmcs->launched = 1;
75747600

7601+
if (!msr_write_intercepted(vmx, MSR_CORE_PERF_GLOBAL_CTRL))
7602+
vcpu_to_pmu(vcpu)->global_ctrl = vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL);
7603+
75757604
vmx_recover_nmi_blocking(vmx);
75767605
vmx_complete_interrupts(vmx);
75777606

arch/x86/kvm/vmx/vmx.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,8 @@ static inline u8 vmx_get_rvi(void)
510510
VM_EXIT_CLEAR_BNDCFGS | \
511511
VM_EXIT_PT_CONCEAL_PIP | \
512512
VM_EXIT_CLEAR_IA32_RTIT_CTL | \
513-
VM_EXIT_LOAD_CET_STATE)
513+
VM_EXIT_LOAD_CET_STATE | \
514+
VM_EXIT_SAVE_IA32_PERF_GLOBAL_CTRL)
514515

515516
#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \
516517
(PIN_BASED_EXT_INTR_MASK | \

0 commit comments

Comments
 (0)