@@ -127,9 +127,9 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
127127 struct kvm_pmc * pmc = perf_event -> overflow_handler_context ;
128128
129129 /*
130- * Ignore overflow events for counters that are scheduled to be
131- * reprogrammed, e.g. if a PMI for the previous event races with KVM's
132- * handling of a related guest WRMSR.
130+ * Ignore asynchronous overflow events for counters that are scheduled
131+ * to be reprogrammed, e.g. if a PMI for the previous event races with
132+ * KVM's handling of a related guest WRMSR.
133133 */
134134 if (test_and_set_bit (pmc -> idx , pmc_to_pmu (pmc )-> reprogram_pmi ))
135135 return ;
@@ -161,6 +161,15 @@ static u64 pmc_get_pebs_precise_level(struct kvm_pmc *pmc)
161161 return 1 ;
162162}
163163
164+ static u64 get_sample_period (struct kvm_pmc * pmc , u64 counter_value )
165+ {
166+ u64 sample_period = (- counter_value ) & pmc_bitmask (pmc );
167+
168+ if (!sample_period )
169+ sample_period = pmc_bitmask (pmc ) + 1 ;
170+ return sample_period ;
171+ }
172+
164173static int pmc_reprogram_counter (struct kvm_pmc * pmc , u32 type , u64 config ,
165174 bool exclude_user , bool exclude_kernel ,
166175 bool intr )
@@ -215,17 +224,30 @@ static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
215224 return 0 ;
216225}
217226
218- static void pmc_pause_counter (struct kvm_pmc * pmc )
227+ static bool pmc_pause_counter (struct kvm_pmc * pmc )
219228{
220229 u64 counter = pmc -> counter ;
221-
222- if (!pmc -> perf_event || pmc -> is_paused )
223- return ;
230+ u64 prev_counter ;
224231
225232 /* update counter, reset event value to avoid redundant accumulation */
226- counter += perf_event_pause (pmc -> perf_event , true);
233+ if (pmc -> perf_event && !pmc -> is_paused )
234+ counter += perf_event_pause (pmc -> perf_event , true);
235+
236+ /*
237+ * Snapshot the previous counter *after* accumulating state from perf.
238+ * If overflow already happened, hardware (via perf) is responsible for
239+ * generating a PMI. KVM just needs to detect overflow on emulated
240+ * counter events that haven't yet been processed.
241+ */
242+ prev_counter = counter & pmc_bitmask (pmc );
243+
244+ counter += pmc -> emulated_counter ;
227245 pmc -> counter = counter & pmc_bitmask (pmc );
246+
247+ pmc -> emulated_counter = 0 ;
228248 pmc -> is_paused = true;
249+
250+ return pmc -> counter < prev_counter ;
229251}
230252
231253static bool pmc_resume_counter (struct kvm_pmc * pmc )
@@ -250,6 +272,51 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
250272 return true;
251273}
252274
275+ static void pmc_release_perf_event (struct kvm_pmc * pmc )
276+ {
277+ if (pmc -> perf_event ) {
278+ perf_event_release_kernel (pmc -> perf_event );
279+ pmc -> perf_event = NULL ;
280+ pmc -> current_config = 0 ;
281+ pmc_to_pmu (pmc )-> event_count -- ;
282+ }
283+ }
284+
285+ static void pmc_stop_counter (struct kvm_pmc * pmc )
286+ {
287+ if (pmc -> perf_event ) {
288+ pmc -> counter = pmc_read_counter (pmc );
289+ pmc_release_perf_event (pmc );
290+ }
291+ }
292+
293+ static void pmc_update_sample_period (struct kvm_pmc * pmc )
294+ {
295+ if (!pmc -> perf_event || pmc -> is_paused ||
296+ !is_sampling_event (pmc -> perf_event ))
297+ return ;
298+
299+ perf_event_period (pmc -> perf_event ,
300+ get_sample_period (pmc , pmc -> counter ));
301+ }
302+
303+ void pmc_write_counter (struct kvm_pmc * pmc , u64 val )
304+ {
305+ /*
306+ * Drop any unconsumed accumulated counts, the WRMSR is a write, not a
307+ * read-modify-write. Adjust the counter value so that its value is
308+ * relative to the current count, as reading the current count from
309+ * perf is faster than pausing and repgrogramming the event in order to
310+ * reset it to '0'. Note, this very sneakily offsets the accumulated
311+ * emulated count too, by using pmc_read_counter()!
312+ */
313+ pmc -> emulated_counter = 0 ;
314+ pmc -> counter += val - pmc_read_counter (pmc );
315+ pmc -> counter &= pmc_bitmask (pmc );
316+ pmc_update_sample_period (pmc );
317+ }
318+ EXPORT_SYMBOL_GPL (pmc_write_counter );
319+
253320static int filter_cmp (const void * pa , const void * pb , u64 mask )
254321{
255322 u64 a = * (u64 * )pa & mask ;
@@ -383,14 +450,15 @@ static void reprogram_counter(struct kvm_pmc *pmc)
383450 struct kvm_pmu * pmu = pmc_to_pmu (pmc );
384451 u64 eventsel = pmc -> eventsel ;
385452 u64 new_config = eventsel ;
453+ bool emulate_overflow ;
386454 u8 fixed_ctr_ctrl ;
387455
388- pmc_pause_counter (pmc );
456+ emulate_overflow = pmc_pause_counter (pmc );
389457
390458 if (!pmc_event_is_allowed (pmc ))
391459 goto reprogram_complete ;
392460
393- if (pmc -> counter < pmc -> prev_counter )
461+ if (emulate_overflow )
394462 __kvm_perf_overflow (pmc , false);
395463
396464 if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL )
@@ -430,7 +498,6 @@ static void reprogram_counter(struct kvm_pmc *pmc)
430498
431499reprogram_complete :
432500 clear_bit (pmc -> idx , (unsigned long * )& pmc_to_pmu (pmc )-> reprogram_pmi );
433- pmc -> prev_counter = 0 ;
434501}
435502
436503void kvm_pmu_handle_event (struct kvm_vcpu * vcpu )
@@ -639,32 +706,60 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
639706 return 0 ;
640707}
641708
642- /* refresh PMU settings. This function generally is called when underlying
643- * settings are changed (such as changes of PMU CPUID by guest VMs), which
644- * should rarely happen.
709+ static void kvm_pmu_reset (struct kvm_vcpu * vcpu )
710+ {
711+ struct kvm_pmu * pmu = vcpu_to_pmu (vcpu );
712+ struct kvm_pmc * pmc ;
713+ int i ;
714+
715+ pmu -> need_cleanup = false;
716+
717+ bitmap_zero (pmu -> reprogram_pmi , X86_PMC_IDX_MAX );
718+
719+ for_each_set_bit (i , pmu -> all_valid_pmc_idx , X86_PMC_IDX_MAX ) {
720+ pmc = static_call (kvm_x86_pmu_pmc_idx_to_pmc )(pmu , i );
721+ if (!pmc )
722+ continue ;
723+
724+ pmc_stop_counter (pmc );
725+ pmc -> counter = 0 ;
726+ pmc -> emulated_counter = 0 ;
727+
728+ if (pmc_is_gp (pmc ))
729+ pmc -> eventsel = 0 ;
730+ }
731+
732+ pmu -> fixed_ctr_ctrl = pmu -> global_ctrl = pmu -> global_status = 0 ;
733+
734+ static_call_cond (kvm_x86_pmu_reset )(vcpu );
735+ }
736+
737+
738+ /*
739+ * Refresh the PMU configuration for the vCPU, e.g. if userspace changes CPUID
740+ * and/or PERF_CAPABILITIES.
645741 */
646742void kvm_pmu_refresh (struct kvm_vcpu * vcpu )
647743{
648744 if (KVM_BUG_ON (kvm_vcpu_has_run (vcpu ), vcpu -> kvm ))
649745 return ;
650746
747+ /*
748+ * Stop/release all existing counters/events before realizing the new
749+ * vPMU model.
750+ */
751+ kvm_pmu_reset (vcpu );
752+
651753 bitmap_zero (vcpu_to_pmu (vcpu )-> all_valid_pmc_idx , X86_PMC_IDX_MAX );
652754 static_call (kvm_x86_pmu_refresh )(vcpu );
653755}
654756
655- void kvm_pmu_reset (struct kvm_vcpu * vcpu )
656- {
657- static_call (kvm_x86_pmu_reset )(vcpu );
658- }
659-
660757void kvm_pmu_init (struct kvm_vcpu * vcpu )
661758{
662759 struct kvm_pmu * pmu = vcpu_to_pmu (vcpu );
663760
664761 memset (pmu , 0 , sizeof (* pmu ));
665762 static_call (kvm_x86_pmu_init )(vcpu );
666- pmu -> event_count = 0 ;
667- pmu -> need_cleanup = false;
668763 kvm_pmu_refresh (vcpu );
669764}
670765
@@ -700,8 +795,7 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
700795
701796static void kvm_pmu_incr_counter (struct kvm_pmc * pmc )
702797{
703- pmc -> prev_counter = pmc -> counter ;
704- pmc -> counter = (pmc -> counter + 1 ) & pmc_bitmask (pmc );
798+ pmc -> emulated_counter ++ ;
705799 kvm_pmu_request_counter_reprogram (pmc );
706800}
707801
0 commit comments