Skip to content

Commit 52448a0

Browse files
Dapeng MiPeter Zijlstra
authored andcommitted
perf/x86/intel: Setup PEBS data configuration and enable legacy groups
Different with legacy PEBS, arch-PEBS provides per-counter PEBS data configuration by programing MSR IA32_PMC_GPx/FXx_CFG_C MSRs. This patch obtains PEBS data configuration from event attribute and then writes the PEBS data configuration to MSR IA32_PMC_GPx/FXx_CFG_C and enable corresponding PEBS groups. Please notice this patch only enables XMM SIMD regs sampling for arch-PEBS, the other SIMD regs (OPMASK/YMM/ZMM) sampling on arch-PEBS would be supported after PMI based SIMD regs (OPMASK/YMM/ZMM) sampling is supported. Co-developed-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251029102136.61364-12-dapeng1.mi@linux.intel.com
1 parent e89c5d1 commit 52448a0

5 files changed

Lines changed: 171 additions & 1 deletion

File tree

arch/x86/events/intel/core.c

Lines changed: 135 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2563,6 +2563,45 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
25632563
cpuc->fixed_ctrl_val &= ~mask;
25642564
}
25652565

2566+
static inline void __intel_pmu_update_event_ext(int idx, u64 ext)
2567+
{
2568+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2569+
u32 msr;
2570+
2571+
if (idx < INTEL_PMC_IDX_FIXED) {
2572+
msr = MSR_IA32_PMC_V6_GP0_CFG_C +
2573+
x86_pmu.addr_offset(idx, false);
2574+
} else {
2575+
msr = MSR_IA32_PMC_V6_FX0_CFG_C +
2576+
x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
2577+
}
2578+
2579+
cpuc->cfg_c_val[idx] = ext;
2580+
wrmsrq(msr, ext);
2581+
}
2582+
2583+
static void intel_pmu_disable_event_ext(struct perf_event *event)
2584+
{
2585+
if (!x86_pmu.arch_pebs)
2586+
return;
2587+
2588+
/*
2589+
* Only clear CFG_C MSR for PEBS counter group events,
2590+
* it avoids the HW counter's value to be added into
2591+
* other PEBS records incorrectly after PEBS counter
2592+
* group events are disabled.
2593+
*
2594+
* For other events, it's unnecessary to clear CFG_C MSRs
2595+
* since CFG_C doesn't take effect if counter is in
2596+
* disabled state. That helps to reduce the WRMSR overhead
2597+
* in context switches.
2598+
*/
2599+
if (!is_pebs_counter_event_group(event))
2600+
return;
2601+
2602+
__intel_pmu_update_event_ext(event->hw.idx, 0);
2603+
}
2604+
25662605
static void intel_pmu_disable_event(struct perf_event *event)
25672606
{
25682607
struct hw_perf_event *hwc = &event->hw;
@@ -2571,9 +2610,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
25712610
switch (idx) {
25722611
case 0 ... INTEL_PMC_IDX_FIXED - 1:
25732612
intel_clear_masks(event, idx);
2613+
intel_pmu_disable_event_ext(event);
25742614
x86_pmu_disable_event(event);
25752615
break;
25762616
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
2617+
intel_pmu_disable_event_ext(event);
2618+
fallthrough;
25772619
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
25782620
intel_pmu_disable_fixed(event);
25792621
break;
@@ -2940,6 +2982,66 @@ static void intel_pmu_enable_acr(struct perf_event *event)
29402982

29412983
DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
29422984

2985+
static void intel_pmu_enable_event_ext(struct perf_event *event)
2986+
{
2987+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2988+
struct hw_perf_event *hwc = &event->hw;
2989+
union arch_pebs_index old, new;
2990+
struct arch_pebs_cap cap;
2991+
u64 ext = 0;
2992+
2993+
if (!x86_pmu.arch_pebs)
2994+
return;
2995+
2996+
cap = hybrid(cpuc->pmu, arch_pebs_cap);
2997+
2998+
if (event->attr.precise_ip) {
2999+
u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event);
3000+
3001+
ext |= ARCH_PEBS_EN;
3002+
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD)
3003+
ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD;
3004+
3005+
if (pebs_data_cfg && cap.caps) {
3006+
if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
3007+
ext |= ARCH_PEBS_AUX & cap.caps;
3008+
3009+
if (pebs_data_cfg & PEBS_DATACFG_GP)
3010+
ext |= ARCH_PEBS_GPR & cap.caps;
3011+
3012+
if (pebs_data_cfg & PEBS_DATACFG_XMMS)
3013+
ext |= ARCH_PEBS_VECR_XMM & cap.caps;
3014+
3015+
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
3016+
ext |= ARCH_PEBS_LBR & cap.caps;
3017+
}
3018+
3019+
if (cpuc->n_pebs == cpuc->n_large_pebs)
3020+
new.thresh = ARCH_PEBS_THRESH_MULTI;
3021+
else
3022+
new.thresh = ARCH_PEBS_THRESH_SINGLE;
3023+
3024+
rdmsrq(MSR_IA32_PEBS_INDEX, old.whole);
3025+
if (new.thresh != old.thresh || !old.en) {
3026+
if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) {
3027+
/*
3028+
* Large PEBS was enabled.
3029+
* Drain PEBS buffer before applying the single PEBS.
3030+
*/
3031+
intel_pmu_drain_pebs_buffer();
3032+
} else {
3033+
new.wr = 0;
3034+
new.full = 0;
3035+
new.en = 1;
3036+
wrmsrq(MSR_IA32_PEBS_INDEX, new.whole);
3037+
}
3038+
}
3039+
}
3040+
3041+
if (cpuc->cfg_c_val[hwc->idx] != ext)
3042+
__intel_pmu_update_event_ext(hwc->idx, ext);
3043+
}
3044+
29433045
static void intel_pmu_enable_event(struct perf_event *event)
29443046
{
29453047
u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
@@ -2955,10 +3057,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
29553057
enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
29563058
intel_set_masks(event, idx);
29573059
static_call_cond(intel_pmu_enable_acr_event)(event);
3060+
intel_pmu_enable_event_ext(event);
29583061
__x86_pmu_enable_event(hwc, enable_mask);
29593062
break;
29603063
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
29613064
static_call_cond(intel_pmu_enable_acr_event)(event);
3065+
intel_pmu_enable_event_ext(event);
29623066
fallthrough;
29633067
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
29643068
intel_pmu_enable_fixed(event);
@@ -5301,6 +5405,30 @@ static inline bool intel_pmu_broken_perf_cap(void)
53015405
return false;
53025406
}
53035407

5408+
static inline void __intel_update_pmu_caps(struct pmu *pmu)
5409+
{
5410+
struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id());
5411+
5412+
if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM)
5413+
dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
5414+
}
5415+
5416+
static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
5417+
{
5418+
u64 caps = hybrid(pmu, arch_pebs_cap).caps;
5419+
5420+
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
5421+
if (caps & ARCH_PEBS_LBR)
5422+
x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
5423+
5424+
if (!(caps & ARCH_PEBS_AUX))
5425+
x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
5426+
if (!(caps & ARCH_PEBS_GPR)) {
5427+
x86_pmu.large_pebs_flags &=
5428+
~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER);
5429+
}
5430+
}
5431+
53045432
#define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))
53055433

53065434
static void update_pmu_cap(struct pmu *pmu)
@@ -5349,8 +5477,12 @@ static void update_pmu_cap(struct pmu *pmu)
53495477
hybrid(pmu, arch_pebs_cap).counters = pebs_mask;
53505478
hybrid(pmu, arch_pebs_cap).pdists = pdists_mask;
53515479

5352-
if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask))
5480+
if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) {
53535481
x86_pmu.arch_pebs = 0;
5482+
} else {
5483+
__intel_update_pmu_caps(pmu);
5484+
__intel_update_large_pebs_flags(pmu);
5485+
}
53545486
} else {
53555487
WARN_ON(x86_pmu.arch_pebs == 1);
53565488
x86_pmu.arch_pebs = 0;
@@ -5514,6 +5646,8 @@ static void intel_pmu_cpu_starting(int cpu)
55145646
}
55155647
}
55165648

5649+
__intel_update_pmu_caps(cpuc->pmu);
5650+
55175651
if (!cpuc->shared_regs)
55185652
return;
55195653

arch/x86/events/intel/ds.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,6 +1528,18 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
15281528
}
15291529
}
15301530

1531+
u64 intel_get_arch_pebs_data_config(struct perf_event *event)
1532+
{
1533+
u64 pebs_data_cfg = 0;
1534+
1535+
if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
1536+
return 0;
1537+
1538+
pebs_data_cfg |= pebs_update_adaptive_cfg(event);
1539+
1540+
return pebs_data_cfg;
1541+
}
1542+
15311543
void intel_pmu_pebs_add(struct perf_event *event)
15321544
{
15331545
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -2947,6 +2959,11 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
29472959

29482960
index.wr = 0;
29492961
index.full = 0;
2962+
index.en = 1;
2963+
if (cpuc->n_pebs == cpuc->n_large_pebs)
2964+
index.thresh = ARCH_PEBS_THRESH_MULTI;
2965+
else
2966+
index.thresh = ARCH_PEBS_THRESH_SINGLE;
29502967
wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
29512968

29522969
mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;

arch/x86/events/perf_event.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ struct cpu_hw_events {
304304
/* Intel ACR configuration */
305305
u64 acr_cfg_b[X86_PMC_IDX_MAX];
306306
u64 acr_cfg_c[X86_PMC_IDX_MAX];
307+
/* Cached CFG_C values */
308+
u64 cfg_c_val[X86_PMC_IDX_MAX];
307309

308310
/*
309311
* Intel LBR bits
@@ -1782,6 +1784,8 @@ void intel_pmu_pebs_data_source_cmt(void);
17821784

17831785
void intel_pmu_pebs_data_source_lnl(void);
17841786

1787+
u64 intel_get_arch_pebs_data_config(struct perf_event *event);
1788+
17851789
int intel_pmu_setup_lbr_filter(struct perf_event *event);
17861790

17871791
void intel_pt_interrupt(void);

arch/x86/include/asm/intel_ds.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@
77
#define PEBS_BUFFER_SHIFT 4
88
#define PEBS_BUFFER_SIZE (PAGE_SIZE << PEBS_BUFFER_SHIFT)
99

10+
/*
11+
* The largest PEBS record could consume a page, ensure
12+
* a record at least can be written after triggering PMI.
13+
*/
14+
#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT)
15+
#define ARCH_PEBS_THRESH_SINGLE 1
16+
1017
/* The maximal number of PEBS events: */
1118
#define MAX_PEBS_EVENTS_FMT4 8
1219
#define MAX_PEBS_EVENTS 32

arch/x86/include/asm/msr-index.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,14 @@
333333
#define ARCH_PEBS_OFFSET_MASK 0x7fffff
334334
#define ARCH_PEBS_INDEX_WR_SHIFT 4
335335

336+
#define ARCH_PEBS_RELOAD 0xffffffff
337+
#define ARCH_PEBS_LBR_SHIFT 40
338+
#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT)
339+
#define ARCH_PEBS_VECR_XMM BIT_ULL(49)
340+
#define ARCH_PEBS_GPR BIT_ULL(61)
341+
#define ARCH_PEBS_AUX BIT_ULL(62)
342+
#define ARCH_PEBS_EN BIT_ULL(63)
343+
336344
#define MSR_IA32_RTIT_CTL 0x00000570
337345
#define RTIT_CTL_TRACEEN BIT(0)
338346
#define RTIT_CTL_CYCLEACC BIT(1)

0 commit comments

Comments
 (0)