Skip to content

Commit bb5f13d

Browse files
Dapeng MiPeter Zijlstra
authored andcommitted
perf/x86/intel: Add counter group support for arch-PEBS
Base on previous adaptive PEBS counter snapshot support, add counter group support for architectural PEBS. Since arch-PEBS shares same counter group layout with adaptive PEBS, directly reuse __setup_pebs_counter_group() helper to process arch-PEBS counter group. Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251029102136.61364-13-dapeng1.mi@linux.intel.com
1 parent 52448a0 commit bb5f13d

4 files changed

Lines changed: 77 additions & 9 deletions

File tree

arch/x86/events/intel/core.c

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3014,6 +3014,17 @@ static void intel_pmu_enable_event_ext(struct perf_event *event)
30143014

30153015
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
30163016
ext |= ARCH_PEBS_LBR & cap.caps;
3017+
3018+
if (pebs_data_cfg &
3019+
(PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT))
3020+
ext |= ARCH_PEBS_CNTR_GP & cap.caps;
3021+
3022+
if (pebs_data_cfg &
3023+
(PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT))
3024+
ext |= ARCH_PEBS_CNTR_FIXED & cap.caps;
3025+
3026+
if (pebs_data_cfg & PEBS_DATACFG_METRICS)
3027+
ext |= ARCH_PEBS_CNTR_METRICS & cap.caps;
30173028
}
30183029

30193030
if (cpuc->n_pebs == cpuc->n_large_pebs)
@@ -3038,6 +3049,9 @@ static void intel_pmu_enable_event_ext(struct perf_event *event)
30383049
}
30393050
}
30403051

3052+
if (is_pebs_counter_event_group(event))
3053+
ext |= ARCH_PEBS_CNTR_ALLOW;
3054+
30413055
if (cpuc->cfg_c_val[hwc->idx] != ext)
30423056
__intel_pmu_update_event_ext(hwc->idx, ext);
30433057
}
@@ -4323,6 +4337,20 @@ static bool intel_pmu_is_acr_group(struct perf_event *event)
43234337
return false;
43244338
}
43254339

4340+
static inline bool intel_pmu_has_pebs_counter_group(struct pmu *pmu)
4341+
{
4342+
u64 caps;
4343+
4344+
if (x86_pmu.intel_cap.pebs_format >= 6 && x86_pmu.intel_cap.pebs_baseline)
4345+
return true;
4346+
4347+
caps = hybrid(pmu, arch_pebs_cap).caps;
4348+
if (x86_pmu.arch_pebs && (caps & ARCH_PEBS_CNTR_MASK))
4349+
return true;
4350+
4351+
return false;
4352+
}
4353+
43264354
static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
43274355
u64 *cause_mask, int *num)
43284356
{
@@ -4471,8 +4499,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
44714499
}
44724500

44734501
if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
4474-
(x86_pmu.intel_cap.pebs_format >= 6) &&
4475-
x86_pmu.intel_cap.pebs_baseline &&
4502+
intel_pmu_has_pebs_counter_group(event->pmu) &&
44764503
is_sampling_event(event) &&
44774504
event->attr.precise_ip)
44784505
event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
@@ -5420,6 +5447,8 @@ static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
54205447
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
54215448
if (caps & ARCH_PEBS_LBR)
54225449
x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
5450+
if (caps & ARCH_PEBS_CNTR_MASK)
5451+
x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
54235452

54245453
if (!(caps & ARCH_PEBS_AUX))
54255454
x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
@@ -7134,8 +7163,11 @@ __init int intel_pmu_init(void)
71347163
* Many features on and after V6 require dynamic constraint,
71357164
* e.g., Arch PEBS, ACR.
71367165
*/
7137-
if (version >= 6)
7166+
if (version >= 6) {
71387167
x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
7168+
x86_pmu.late_setup = intel_pmu_late_setup;
7169+
}
7170+
71397171
/*
71407172
* Install the hw-cache-events table:
71417173
*/

arch/x86/events/intel/ds.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,13 +1530,20 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
15301530

15311531
u64 intel_get_arch_pebs_data_config(struct perf_event *event)
15321532
{
1533+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
15331534
u64 pebs_data_cfg = 0;
1535+
u64 cntr_mask;
15341536

15351537
if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
15361538
return 0;
15371539

15381540
pebs_data_cfg |= pebs_update_adaptive_cfg(event);
15391541

1542+
cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) |
1543+
(PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) |
1544+
PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS;
1545+
pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask;
1546+
15401547
return pebs_data_cfg;
15411548
}
15421549

@@ -2444,6 +2451,24 @@ static void setup_arch_pebs_sample_data(struct perf_event *event,
24442451
}
24452452
}
24462453

2454+
if (header->cntr) {
2455+
struct arch_pebs_cntr_header *cntr = next_record;
2456+
unsigned int nr;
2457+
2458+
next_record += sizeof(struct arch_pebs_cntr_header);
2459+
2460+
if (is_pebs_counter_event_group(event)) {
2461+
__setup_pebs_counter_group(cpuc, event,
2462+
(struct pebs_cntr_header *)cntr, next_record);
2463+
data->sample_flags |= PERF_SAMPLE_READ;
2464+
}
2465+
2466+
nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
2467+
if (cntr->metrics == INTEL_CNTR_METRICS)
2468+
nr += 2;
2469+
next_record += nr * sizeof(u64);
2470+
}
2471+
24472472
/* Parse followed fragments if there are. */
24482473
if (arch_pebs_record_continued(header)) {
24492474
at = at + header->size;
@@ -3094,10 +3119,8 @@ static void __init intel_ds_pebs_init(void)
30943119
break;
30953120

30963121
case 6:
3097-
if (x86_pmu.intel_cap.pebs_baseline) {
3122+
if (x86_pmu.intel_cap.pebs_baseline)
30983123
x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
3099-
x86_pmu.late_setup = intel_pmu_late_setup;
3100-
}
31013124
fallthrough;
31023125
case 5:
31033126
x86_pmu.pebs_ept = 1;

arch/x86/include/asm/msr-index.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,12 +334,18 @@
334334
#define ARCH_PEBS_INDEX_WR_SHIFT 4
335335

336336
#define ARCH_PEBS_RELOAD 0xffffffff
337+
#define ARCH_PEBS_CNTR_ALLOW BIT_ULL(35)
338+
#define ARCH_PEBS_CNTR_GP BIT_ULL(36)
339+
#define ARCH_PEBS_CNTR_FIXED BIT_ULL(37)
340+
#define ARCH_PEBS_CNTR_METRICS BIT_ULL(38)
337341
#define ARCH_PEBS_LBR_SHIFT 40
338342
#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT)
339343
#define ARCH_PEBS_VECR_XMM BIT_ULL(49)
340344
#define ARCH_PEBS_GPR BIT_ULL(61)
341345
#define ARCH_PEBS_AUX BIT_ULL(62)
342346
#define ARCH_PEBS_EN BIT_ULL(63)
347+
#define ARCH_PEBS_CNTR_MASK (ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \
348+
ARCH_PEBS_CNTR_METRICS)
343349

344350
#define MSR_IA32_RTIT_CTL 0x00000570
345351
#define RTIT_CTL_TRACEEN BIT(0)

arch/x86/include/asm/perf_event.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,16 +141,16 @@
141141
#define ARCH_PERFMON_EVENTS_COUNT 7
142142

143143
#define PEBS_DATACFG_MEMINFO BIT_ULL(0)
144-
#define PEBS_DATACFG_GP BIT_ULL(1)
144+
#define PEBS_DATACFG_GP BIT_ULL(1)
145145
#define PEBS_DATACFG_XMMS BIT_ULL(2)
146146
#define PEBS_DATACFG_LBRS BIT_ULL(3)
147-
#define PEBS_DATACFG_LBR_SHIFT 24
148147
#define PEBS_DATACFG_CNTR BIT_ULL(4)
148+
#define PEBS_DATACFG_METRICS BIT_ULL(5)
149+
#define PEBS_DATACFG_LBR_SHIFT 24
149150
#define PEBS_DATACFG_CNTR_SHIFT 32
150151
#define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0)
151152
#define PEBS_DATACFG_FIX_SHIFT 48
152153
#define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0)
153-
#define PEBS_DATACFG_METRICS BIT_ULL(5)
154154

155155
/* Steal the highest bit of pebs_data_cfg for SW usage */
156156
#define PEBS_UPDATE_DS_SW BIT_ULL(63)
@@ -603,6 +603,13 @@ struct arch_pebs_lbr_header {
603603
u64 ler_info;
604604
};
605605

606+
struct arch_pebs_cntr_header {
607+
u32 cntr;
608+
u32 fixed;
609+
u32 metrics;
610+
u32 reserved;
611+
};
612+
606613
/*
607614
* AMD Extended Performance Monitoring and Debug cpuid feature detection
608615
*/

0 commit comments

Comments
 (0)