@@ -2563,6 +2563,45 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
25632563 cpuc -> fixed_ctrl_val &= ~mask ;
25642564}
25652565
2566+ static inline void __intel_pmu_update_event_ext (int idx , u64 ext )
2567+ {
2568+ struct cpu_hw_events * cpuc = this_cpu_ptr (& cpu_hw_events );
2569+ u32 msr ;
2570+
2571+ if (idx < INTEL_PMC_IDX_FIXED ) {
2572+ msr = MSR_IA32_PMC_V6_GP0_CFG_C +
2573+ x86_pmu .addr_offset (idx , false);
2574+ } else {
2575+ msr = MSR_IA32_PMC_V6_FX0_CFG_C +
2576+ x86_pmu .addr_offset (idx - INTEL_PMC_IDX_FIXED , false);
2577+ }
2578+
2579+ cpuc -> cfg_c_val [idx ] = ext ;
2580+ wrmsrq (msr , ext );
2581+ }
2582+
2583+ static void intel_pmu_disable_event_ext (struct perf_event * event )
2584+ {
2585+ if (!x86_pmu .arch_pebs )
2586+ return ;
2587+
2588+ /*
2589+ * Only clear CFG_C MSR for PEBS counter group events,
2590+ * it avoids the HW counter's value to be added into
2591+ * other PEBS records incorrectly after PEBS counter
2592+ * group events are disabled.
2593+ *
2594+ * For other events, it's unnecessary to clear CFG_C MSRs
2595+ * since CFG_C doesn't take effect if counter is in
2596+ * disabled state. That helps to reduce the WRMSR overhead
2597+ * in context switches.
2598+ */
2599+ if (!is_pebs_counter_event_group (event ))
2600+ return ;
2601+
2602+ __intel_pmu_update_event_ext (event -> hw .idx , 0 );
2603+ }
2604+
25662605static void intel_pmu_disable_event (struct perf_event * event )
25672606{
25682607 struct hw_perf_event * hwc = & event -> hw ;
@@ -2571,9 +2610,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
25712610 switch (idx ) {
25722611 case 0 ... INTEL_PMC_IDX_FIXED - 1 :
25732612 intel_clear_masks (event , idx );
2613+ intel_pmu_disable_event_ext (event );
25742614 x86_pmu_disable_event (event );
25752615 break ;
25762616 case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1 :
2617+ intel_pmu_disable_event_ext (event );
2618+ fallthrough ;
25772619 case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END :
25782620 intel_pmu_disable_fixed (event );
25792621 break ;
@@ -2940,6 +2982,66 @@ static void intel_pmu_enable_acr(struct perf_event *event)
29402982
29412983DEFINE_STATIC_CALL_NULL (intel_pmu_enable_acr_event , intel_pmu_enable_acr );
29422984
2985+ static void intel_pmu_enable_event_ext (struct perf_event * event )
2986+ {
2987+ struct cpu_hw_events * cpuc = this_cpu_ptr (& cpu_hw_events );
2988+ struct hw_perf_event * hwc = & event -> hw ;
2989+ union arch_pebs_index old , new ;
2990+ struct arch_pebs_cap cap ;
2991+ u64 ext = 0 ;
2992+
2993+ if (!x86_pmu .arch_pebs )
2994+ return ;
2995+
2996+ cap = hybrid (cpuc -> pmu , arch_pebs_cap );
2997+
2998+ if (event -> attr .precise_ip ) {
2999+ u64 pebs_data_cfg = intel_get_arch_pebs_data_config (event );
3000+
3001+ ext |= ARCH_PEBS_EN ;
3002+ if (hwc -> flags & PERF_X86_EVENT_AUTO_RELOAD )
3003+ ext |= (- hwc -> sample_period ) & ARCH_PEBS_RELOAD ;
3004+
3005+ if (pebs_data_cfg && cap .caps ) {
3006+ if (pebs_data_cfg & PEBS_DATACFG_MEMINFO )
3007+ ext |= ARCH_PEBS_AUX & cap .caps ;
3008+
3009+ if (pebs_data_cfg & PEBS_DATACFG_GP )
3010+ ext |= ARCH_PEBS_GPR & cap .caps ;
3011+
3012+ if (pebs_data_cfg & PEBS_DATACFG_XMMS )
3013+ ext |= ARCH_PEBS_VECR_XMM & cap .caps ;
3014+
3015+ if (pebs_data_cfg & PEBS_DATACFG_LBRS )
3016+ ext |= ARCH_PEBS_LBR & cap .caps ;
3017+ }
3018+
3019+ if (cpuc -> n_pebs == cpuc -> n_large_pebs )
3020+ new .thresh = ARCH_PEBS_THRESH_MULTI ;
3021+ else
3022+ new .thresh = ARCH_PEBS_THRESH_SINGLE ;
3023+
3024+ rdmsrq (MSR_IA32_PEBS_INDEX , old .whole );
3025+ if (new .thresh != old .thresh || !old .en ) {
3026+ if (old .thresh == ARCH_PEBS_THRESH_MULTI && old .wr > 0 ) {
3027+ /*
3028+ * Large PEBS was enabled.
3029+ * Drain PEBS buffer before applying the single PEBS.
3030+ */
3031+ intel_pmu_drain_pebs_buffer ();
3032+ } else {
3033+ new .wr = 0 ;
3034+ new .full = 0 ;
3035+ new .en = 1 ;
3036+ wrmsrq (MSR_IA32_PEBS_INDEX , new .whole );
3037+ }
3038+ }
3039+ }
3040+
3041+ if (cpuc -> cfg_c_val [hwc -> idx ] != ext )
3042+ __intel_pmu_update_event_ext (hwc -> idx , ext );
3043+ }
3044+
29433045static void intel_pmu_enable_event (struct perf_event * event )
29443046{
29453047 u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE ;
@@ -2955,10 +3057,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
29553057 enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR ;
29563058 intel_set_masks (event , idx );
29573059 static_call_cond (intel_pmu_enable_acr_event )(event );
3060+ intel_pmu_enable_event_ext (event );
29583061 __x86_pmu_enable_event (hwc , enable_mask );
29593062 break ;
29603063 case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1 :
29613064 static_call_cond (intel_pmu_enable_acr_event )(event );
3065+ intel_pmu_enable_event_ext (event );
29623066 fallthrough ;
29633067 case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END :
29643068 intel_pmu_enable_fixed (event );
@@ -5301,6 +5405,30 @@ static inline bool intel_pmu_broken_perf_cap(void)
53015405 return false;
53025406}
53035407
5408+ static inline void __intel_update_pmu_caps (struct pmu * pmu )
5409+ {
5410+ struct pmu * dest_pmu = pmu ? pmu : x86_get_pmu (smp_processor_id ());
5411+
5412+ if (hybrid (pmu , arch_pebs_cap ).caps & ARCH_PEBS_VECR_XMM )
5413+ dest_pmu -> capabilities |= PERF_PMU_CAP_EXTENDED_REGS ;
5414+ }
5415+
5416+ static inline void __intel_update_large_pebs_flags (struct pmu * pmu )
5417+ {
5418+ u64 caps = hybrid (pmu , arch_pebs_cap ).caps ;
5419+
5420+ x86_pmu .large_pebs_flags |= PERF_SAMPLE_TIME ;
5421+ if (caps & ARCH_PEBS_LBR )
5422+ x86_pmu .large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK ;
5423+
5424+ if (!(caps & ARCH_PEBS_AUX ))
5425+ x86_pmu .large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC ;
5426+ if (!(caps & ARCH_PEBS_GPR )) {
5427+ x86_pmu .large_pebs_flags &=
5428+ ~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER );
5429+ }
5430+ }
5431+
53045432#define counter_mask (_gp , _fixed ) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))
53055433
53065434static void update_pmu_cap (struct pmu * pmu )
@@ -5349,8 +5477,12 @@ static void update_pmu_cap(struct pmu *pmu)
53495477 hybrid (pmu , arch_pebs_cap ).counters = pebs_mask ;
53505478 hybrid (pmu , arch_pebs_cap ).pdists = pdists_mask ;
53515479
5352- if (WARN_ON ((pebs_mask | pdists_mask ) & ~cntrs_mask ))
5480+ if (WARN_ON ((pebs_mask | pdists_mask ) & ~cntrs_mask )) {
53535481 x86_pmu .arch_pebs = 0 ;
5482+ } else {
5483+ __intel_update_pmu_caps (pmu );
5484+ __intel_update_large_pebs_flags (pmu );
5485+ }
53545486 } else {
53555487 WARN_ON (x86_pmu .arch_pebs == 1 );
53565488 x86_pmu .arch_pebs = 0 ;
@@ -5514,6 +5646,8 @@ static void intel_pmu_cpu_starting(int cpu)
55145646 }
55155647 }
55165648
5649+ __intel_update_pmu_caps (cpuc -> pmu );
5650+
55175651 if (!cpuc -> shared_regs )
55185652 return ;
55195653
0 commit comments