Skip to content

Commit 01edb1c

Browse files
committed
Merge tag 'kvm-x86-pmu-6.8' of https://github.com/kvm-x86/linux into HEAD
KVM x86 PMU changes for 6.8: - Fix a variety of bugs where KVM fail to stop/reset counters and other state prior to refreshing the vPMU model. - Fix a double-overflow PMU bug by tracking emulated counter events using a dedicated field instead of snapshotting the "previous" counter. If the hardware PMC count triggers overflow that is recognized in the same VM-Exit that KVM manually bumps an event count, KVM would pend PMIs for both the hardware-triggered overflow and for KVM-triggered overflow.
2 parents 33d0403 + fd89499 commit 01edb1c

7 files changed

Lines changed: 137 additions & 109 deletions

File tree

arch/x86/include/asm/kvm-x86-pmu-ops.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ KVM_X86_PMU_OP(get_msr)
2222
KVM_X86_PMU_OP(set_msr)
2323
KVM_X86_PMU_OP(refresh)
2424
KVM_X86_PMU_OP(init)
25-
KVM_X86_PMU_OP(reset)
25+
KVM_X86_PMU_OP_OPTIONAL(reset)
2626
KVM_X86_PMU_OP_OPTIONAL(deliver_pmi)
2727
KVM_X86_PMU_OP_OPTIONAL(cleanup)
2828

arch/x86/include/asm/kvm_host.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,8 +500,23 @@ struct kvm_pmc {
500500
u8 idx;
501501
bool is_paused;
502502
bool intr;
503+
/*
504+
* Base value of the PMC counter, relative to the *consumed* count in
505+
* the associated perf_event. This value includes counter updates from
506+
* the perf_event and emulated_count since the last time the counter
507+
* was reprogrammed, but it is *not* the current value as seen by the
508+
* guest or userspace.
509+
*
510+
* The count is relative to the associated perf_event so that KVM
511+
* doesn't need to reprogram the perf_event every time the guest writes
512+
* to the counter.
513+
*/
503514
u64 counter;
504-
u64 prev_counter;
515+
/*
516+
* PMC events triggered by KVM emulation that haven't been fully
517+
* processed, i.e. haven't undergone overflow detection.
518+
*/
519+
u64 emulated_counter;
505520
u64 eventsel;
506521
struct perf_event *perf_event;
507522
struct kvm_vcpu *vcpu;

arch/x86/kvm/pmu.c

Lines changed: 117 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,9 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
127127
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
128128

129129
/*
130-
* Ignore overflow events for counters that are scheduled to be
131-
* reprogrammed, e.g. if a PMI for the previous event races with KVM's
132-
* handling of a related guest WRMSR.
130+
* Ignore asynchronous overflow events for counters that are scheduled
131+
* to be reprogrammed, e.g. if a PMI for the previous event races with
132+
* KVM's handling of a related guest WRMSR.
133133
*/
134134
if (test_and_set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi))
135135
return;
@@ -161,6 +161,15 @@ static u64 pmc_get_pebs_precise_level(struct kvm_pmc *pmc)
161161
return 1;
162162
}
163163

164+
static u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
165+
{
166+
u64 sample_period = (-counter_value) & pmc_bitmask(pmc);
167+
168+
if (!sample_period)
169+
sample_period = pmc_bitmask(pmc) + 1;
170+
return sample_period;
171+
}
172+
164173
static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
165174
bool exclude_user, bool exclude_kernel,
166175
bool intr)
@@ -215,17 +224,30 @@ static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
215224
return 0;
216225
}
217226

218-
static void pmc_pause_counter(struct kvm_pmc *pmc)
227+
static bool pmc_pause_counter(struct kvm_pmc *pmc)
219228
{
220229
u64 counter = pmc->counter;
221-
222-
if (!pmc->perf_event || pmc->is_paused)
223-
return;
230+
u64 prev_counter;
224231

225232
/* update counter, reset event value to avoid redundant accumulation */
226-
counter += perf_event_pause(pmc->perf_event, true);
233+
if (pmc->perf_event && !pmc->is_paused)
234+
counter += perf_event_pause(pmc->perf_event, true);
235+
236+
/*
237+
* Snapshot the previous counter *after* accumulating state from perf.
238+
* If overflow already happened, hardware (via perf) is responsible for
239+
* generating a PMI. KVM just needs to detect overflow on emulated
240+
* counter events that haven't yet been processed.
241+
*/
242+
prev_counter = counter & pmc_bitmask(pmc);
243+
244+
counter += pmc->emulated_counter;
227245
pmc->counter = counter & pmc_bitmask(pmc);
246+
247+
pmc->emulated_counter = 0;
228248
pmc->is_paused = true;
249+
250+
return pmc->counter < prev_counter;
229251
}
230252

231253
static bool pmc_resume_counter(struct kvm_pmc *pmc)
@@ -250,6 +272,51 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
250272
return true;
251273
}
252274

275+
static void pmc_release_perf_event(struct kvm_pmc *pmc)
276+
{
277+
if (pmc->perf_event) {
278+
perf_event_release_kernel(pmc->perf_event);
279+
pmc->perf_event = NULL;
280+
pmc->current_config = 0;
281+
pmc_to_pmu(pmc)->event_count--;
282+
}
283+
}
284+
285+
static void pmc_stop_counter(struct kvm_pmc *pmc)
286+
{
287+
if (pmc->perf_event) {
288+
pmc->counter = pmc_read_counter(pmc);
289+
pmc_release_perf_event(pmc);
290+
}
291+
}
292+
293+
static void pmc_update_sample_period(struct kvm_pmc *pmc)
294+
{
295+
if (!pmc->perf_event || pmc->is_paused ||
296+
!is_sampling_event(pmc->perf_event))
297+
return;
298+
299+
perf_event_period(pmc->perf_event,
300+
get_sample_period(pmc, pmc->counter));
301+
}
302+
303+
void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
304+
{
305+
/*
306+
* Drop any unconsumed accumulated counts, the WRMSR is a write, not a
307+
* read-modify-write. Adjust the counter value so that its value is
308+
* relative to the current count, as reading the current count from
309+
* perf is faster than pausing and repgrogramming the event in order to
310+
* reset it to '0'. Note, this very sneakily offsets the accumulated
311+
* emulated count too, by using pmc_read_counter()!
312+
*/
313+
pmc->emulated_counter = 0;
314+
pmc->counter += val - pmc_read_counter(pmc);
315+
pmc->counter &= pmc_bitmask(pmc);
316+
pmc_update_sample_period(pmc);
317+
}
318+
EXPORT_SYMBOL_GPL(pmc_write_counter);
319+
253320
static int filter_cmp(const void *pa, const void *pb, u64 mask)
254321
{
255322
u64 a = *(u64 *)pa & mask;
@@ -383,14 +450,15 @@ static void reprogram_counter(struct kvm_pmc *pmc)
383450
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
384451
u64 eventsel = pmc->eventsel;
385452
u64 new_config = eventsel;
453+
bool emulate_overflow;
386454
u8 fixed_ctr_ctrl;
387455

388-
pmc_pause_counter(pmc);
456+
emulate_overflow = pmc_pause_counter(pmc);
389457

390458
if (!pmc_event_is_allowed(pmc))
391459
goto reprogram_complete;
392460

393-
if (pmc->counter < pmc->prev_counter)
461+
if (emulate_overflow)
394462
__kvm_perf_overflow(pmc, false);
395463

396464
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
@@ -430,7 +498,6 @@ static void reprogram_counter(struct kvm_pmc *pmc)
430498

431499
reprogram_complete:
432500
clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
433-
pmc->prev_counter = 0;
434501
}
435502

436503
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
@@ -639,32 +706,60 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
639706
return 0;
640707
}
641708

642-
/* refresh PMU settings. This function generally is called when underlying
643-
* settings are changed (such as changes of PMU CPUID by guest VMs), which
644-
* should rarely happen.
709+
static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
710+
{
711+
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
712+
struct kvm_pmc *pmc;
713+
int i;
714+
715+
pmu->need_cleanup = false;
716+
717+
bitmap_zero(pmu->reprogram_pmi, X86_PMC_IDX_MAX);
718+
719+
for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
720+
pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
721+
if (!pmc)
722+
continue;
723+
724+
pmc_stop_counter(pmc);
725+
pmc->counter = 0;
726+
pmc->emulated_counter = 0;
727+
728+
if (pmc_is_gp(pmc))
729+
pmc->eventsel = 0;
730+
}
731+
732+
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0;
733+
734+
static_call_cond(kvm_x86_pmu_reset)(vcpu);
735+
}
736+
737+
738+
/*
739+
* Refresh the PMU configuration for the vCPU, e.g. if userspace changes CPUID
740+
* and/or PERF_CAPABILITIES.
645741
*/
646742
void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
647743
{
648744
if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm))
649745
return;
650746

747+
/*
748+
* Stop/release all existing counters/events before realizing the new
749+
* vPMU model.
750+
*/
751+
kvm_pmu_reset(vcpu);
752+
651753
bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX);
652754
static_call(kvm_x86_pmu_refresh)(vcpu);
653755
}
654756

655-
void kvm_pmu_reset(struct kvm_vcpu *vcpu)
656-
{
657-
static_call(kvm_x86_pmu_reset)(vcpu);
658-
}
659-
660757
void kvm_pmu_init(struct kvm_vcpu *vcpu)
661758
{
662759
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
663760

664761
memset(pmu, 0, sizeof(*pmu));
665762
static_call(kvm_x86_pmu_init)(vcpu);
666-
pmu->event_count = 0;
667-
pmu->need_cleanup = false;
668763
kvm_pmu_refresh(vcpu);
669764
}
670765

@@ -700,8 +795,7 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
700795

701796
static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
702797
{
703-
pmc->prev_counter = pmc->counter;
704-
pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
798+
pmc->emulated_counter++;
705799
kvm_pmu_request_counter_reprogram(pmc);
706800
}
707801

arch/x86/kvm/pmu.h

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -66,37 +66,16 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
6666
{
6767
u64 counter, enabled, running;
6868

69-
counter = pmc->counter;
69+
counter = pmc->counter + pmc->emulated_counter;
70+
7071
if (pmc->perf_event && !pmc->is_paused)
7172
counter += perf_event_read_value(pmc->perf_event,
7273
&enabled, &running);
7374
/* FIXME: Scaling needed? */
7475
return counter & pmc_bitmask(pmc);
7576
}
7677

77-
static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val)
78-
{
79-
pmc->counter += val - pmc_read_counter(pmc);
80-
pmc->counter &= pmc_bitmask(pmc);
81-
}
82-
83-
static inline void pmc_release_perf_event(struct kvm_pmc *pmc)
84-
{
85-
if (pmc->perf_event) {
86-
perf_event_release_kernel(pmc->perf_event);
87-
pmc->perf_event = NULL;
88-
pmc->current_config = 0;
89-
pmc_to_pmu(pmc)->event_count--;
90-
}
91-
}
92-
93-
static inline void pmc_stop_counter(struct kvm_pmc *pmc)
94-
{
95-
if (pmc->perf_event) {
96-
pmc->counter = pmc_read_counter(pmc);
97-
pmc_release_perf_event(pmc);
98-
}
99-
}
78+
void pmc_write_counter(struct kvm_pmc *pmc, u64 val);
10079

10180
static inline bool pmc_is_gp(struct kvm_pmc *pmc)
10281
{
@@ -146,25 +125,6 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
146125
return NULL;
147126
}
148127

149-
static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
150-
{
151-
u64 sample_period = (-counter_value) & pmc_bitmask(pmc);
152-
153-
if (!sample_period)
154-
sample_period = pmc_bitmask(pmc) + 1;
155-
return sample_period;
156-
}
157-
158-
static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
159-
{
160-
if (!pmc->perf_event || pmc->is_paused ||
161-
!is_sampling_event(pmc->perf_event))
162-
return;
163-
164-
perf_event_period(pmc->perf_event,
165-
get_sample_period(pmc, pmc->counter));
166-
}
167-
168128
static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
169129
{
170130
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -261,7 +221,6 @@ bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
261221
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
262222
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
263223
void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
264-
void kvm_pmu_reset(struct kvm_vcpu *vcpu);
265224
void kvm_pmu_init(struct kvm_vcpu *vcpu);
266225
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
267226
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);

arch/x86/kvm/svm/pmu.c

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,6 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
161161
pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
162162
if (pmc) {
163163
pmc_write_counter(pmc, data);
164-
pmc_update_sample_period(pmc);
165164
return 0;
166165
}
167166
/* MSR_EVNTSELn */
@@ -233,21 +232,6 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
233232
}
234233
}
235234

236-
static void amd_pmu_reset(struct kvm_vcpu *vcpu)
237-
{
238-
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
239-
int i;
240-
241-
for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
242-
struct kvm_pmc *pmc = &pmu->gp_counters[i];
243-
244-
pmc_stop_counter(pmc);
245-
pmc->counter = pmc->prev_counter = pmc->eventsel = 0;
246-
}
247-
248-
pmu->global_ctrl = pmu->global_status = 0;
249-
}
250-
251235
struct kvm_pmu_ops amd_pmu_ops __initdata = {
252236
.hw_event_available = amd_hw_event_available,
253237
.pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
@@ -259,7 +243,6 @@ struct kvm_pmu_ops amd_pmu_ops __initdata = {
259243
.set_msr = amd_pmu_set_msr,
260244
.refresh = amd_pmu_refresh,
261245
.init = amd_pmu_init,
262-
.reset = amd_pmu_reset,
263246
.EVENTSEL_EVENT = AMD64_EVENTSEL_EVENT,
264247
.MAX_NR_GP_COUNTERS = KVM_AMD_PMC_MAX_GENERIC,
265248
.MIN_NR_GP_COUNTERS = AMD64_NUM_COUNTERS,

0 commit comments

Comments
 (0)