Skip to content

Commit d243d0b

Browse files
Dapeng MiPeter Zijlstra
authored andcommitted
perf/x86/intel: Initialize architectural PEBS
arch-PEBS leverages CPUID.23H.4/5 sub-leaves enumerate arch-PEBS supported capabilities and counters bitmap. This patch parses these 2 sub-leaves and initializes arch-PEBS capabilities and corresponding structures. Since IA32_PEBS_ENABLE and MSR_PEBS_DATA_CFG MSRs are no longer existed for arch-PEBS, arch-PEBS doesn't need to manipulate these MSRs. Thus add a simple pair of __intel_pmu_pebs_enable/disable() callbacks for arch-PEBS. Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251029102136.61364-6-dapeng1.mi@linux.intel.com
1 parent 5e4e355 commit d243d0b

5 files changed

Lines changed: 132 additions & 33 deletions

File tree

arch/x86/events/core.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -554,28 +554,37 @@ static inline int precise_br_compat(struct perf_event *event)
554554
return m == b;
555555
}
556556

557-
int x86_pmu_max_precise(void)
557+
int x86_pmu_max_precise(struct pmu *pmu)
558558
{
559559
int precise = 0;
560560

561-
/* Support for constant skid */
562561
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
563-
precise++;
562+
/* arch PEBS */
563+
if (x86_pmu.arch_pebs) {
564+
precise = 2;
565+
if (hybrid(pmu, arch_pebs_cap).pdists)
566+
precise++;
567+
568+
return precise;
569+
}
564570

571+
/* legacy PEBS - support for constant skid */
572+
precise++;
565573
/* Support for IP fixup */
566574
if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
567575
precise++;
568576

569577
if (x86_pmu.pebs_prec_dist)
570578
precise++;
571579
}
580+
572581
return precise;
573582
}
574583

575584
int x86_pmu_hw_config(struct perf_event *event)
576585
{
577586
if (event->attr.precise_ip) {
578-
int precise = x86_pmu_max_precise();
587+
int precise = x86_pmu_max_precise(event->pmu);
579588

580589
if (event->attr.precise_ip > precise)
581590
return -EOPNOTSUPP;
@@ -2630,7 +2639,9 @@ static ssize_t max_precise_show(struct device *cdev,
26302639
struct device_attribute *attr,
26312640
char *buf)
26322641
{
2633-
return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise());
2642+
struct pmu *pmu = dev_get_drvdata(cdev);
2643+
2644+
return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise(pmu));
26342645
}
26352646

26362647
static DEVICE_ATTR_RO(max_precise);

arch/x86/events/intel/core.c

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5271,34 +5271,59 @@ static inline bool intel_pmu_broken_perf_cap(void)
52715271
return false;
52725272
}
52735273

5274+
#define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))
5275+
52745276
static void update_pmu_cap(struct pmu *pmu)
52755277
{
5276-
unsigned int cntr, fixed_cntr, ecx, edx;
5277-
union cpuid35_eax eax;
5278-
union cpuid35_ebx ebx;
5278+
unsigned int eax, ebx, ecx, edx;
5279+
union cpuid35_eax eax_0;
5280+
union cpuid35_ebx ebx_0;
5281+
u64 cntrs_mask = 0;
5282+
u64 pebs_mask = 0;
5283+
u64 pdists_mask = 0;
52795284

5280-
cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx);
5285+
cpuid(ARCH_PERFMON_EXT_LEAF, &eax_0.full, &ebx_0.full, &ecx, &edx);
52815286

5282-
if (ebx.split.umask2)
5287+
if (ebx_0.split.umask2)
52835288
hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2;
5284-
if (ebx.split.eq)
5289+
if (ebx_0.split.eq)
52855290
hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ;
52865291

5287-
if (eax.split.cntr_subleaf) {
5292+
if (eax_0.split.cntr_subleaf) {
52885293
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
5289-
&cntr, &fixed_cntr, &ecx, &edx);
5290-
hybrid(pmu, cntr_mask64) = cntr;
5291-
hybrid(pmu, fixed_cntr_mask64) = fixed_cntr;
5294+
&eax, &ebx, &ecx, &edx);
5295+
hybrid(pmu, cntr_mask64) = eax;
5296+
hybrid(pmu, fixed_cntr_mask64) = ebx;
5297+
cntrs_mask = counter_mask(eax, ebx);
52925298
}
52935299

5294-
if (eax.split.acr_subleaf) {
5300+
if (eax_0.split.acr_subleaf) {
52955301
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF,
5296-
&cntr, &fixed_cntr, &ecx, &edx);
5302+
&eax, &ebx, &ecx, &edx);
52975303
/* The mask of the counters which can be reloaded */
5298-
hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED);
5299-
5304+
hybrid(pmu, acr_cntr_mask64) = counter_mask(eax, ebx);
53005305
/* The mask of the counters which can cause a reload of reloadable counters */
5301-
hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED);
5306+
hybrid(pmu, acr_cause_mask64) = counter_mask(ecx, edx);
5307+
}
5308+
5309+
/* Bits[5:4] should be set simultaneously if arch-PEBS is supported */
5310+
if (eax_0.split.pebs_caps_subleaf && eax_0.split.pebs_cnts_subleaf) {
5311+
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_CAP_LEAF,
5312+
&eax, &ebx, &ecx, &edx);
5313+
hybrid(pmu, arch_pebs_cap).caps = (u64)ebx << 32;
5314+
5315+
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_COUNTER_LEAF,
5316+
&eax, &ebx, &ecx, &edx);
5317+
pebs_mask = counter_mask(eax, ecx);
5318+
pdists_mask = counter_mask(ebx, edx);
5319+
hybrid(pmu, arch_pebs_cap).counters = pebs_mask;
5320+
hybrid(pmu, arch_pebs_cap).pdists = pdists_mask;
5321+
5322+
if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask))
5323+
x86_pmu.arch_pebs = 0;
5324+
} else {
5325+
WARN_ON(x86_pmu.arch_pebs == 1);
5326+
x86_pmu.arch_pebs = 0;
53025327
}
53035328

53045329
if (!intel_pmu_broken_perf_cap()) {
@@ -6252,7 +6277,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
62526277
static umode_t
62536278
pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
62546279
{
6255-
return x86_pmu.ds_pebs ? attr->mode : 0;
6280+
return intel_pmu_has_pebs() ? attr->mode : 0;
62566281
}
62576282

62586283
static umode_t
@@ -7728,6 +7753,9 @@ __init int intel_pmu_init(void)
77287753
if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
77297754
update_pmu_cap(NULL);
77307755

7756+
if (x86_pmu.arch_pebs)
7757+
pr_cont("Architectural PEBS, ");
7758+
77317759
intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
77327760
&x86_pmu.fixed_cntr_mask64,
77337761
&x86_pmu.intel_ctrl);

arch/x86/events/intel/ds.c

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,6 +1531,15 @@ static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
15311531
intel_pmu_drain_pebs_buffer();
15321532
}
15331533

1534+
static void __intel_pmu_pebs_enable(struct perf_event *event)
1535+
{
1536+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1537+
struct hw_perf_event *hwc = &event->hw;
1538+
1539+
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
1540+
cpuc->pebs_enabled |= 1ULL << hwc->idx;
1541+
}
1542+
15341543
void intel_pmu_pebs_enable(struct perf_event *event)
15351544
{
15361545
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1539,9 +1548,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
15391548
struct debug_store *ds = cpuc->ds;
15401549
unsigned int idx = hwc->idx;
15411550

1542-
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
1543-
1544-
cpuc->pebs_enabled |= 1ULL << hwc->idx;
1551+
__intel_pmu_pebs_enable(event);
15451552

15461553
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
15471554
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
@@ -1603,14 +1610,22 @@ void intel_pmu_pebs_del(struct perf_event *event)
16031610
pebs_update_state(needed_cb, cpuc, event, false);
16041611
}
16051612

1606-
void intel_pmu_pebs_disable(struct perf_event *event)
1613+
static void __intel_pmu_pebs_disable(struct perf_event *event)
16071614
{
16081615
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
16091616
struct hw_perf_event *hwc = &event->hw;
16101617

16111618
intel_pmu_drain_large_pebs(cpuc);
1612-
16131619
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
1620+
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
1621+
}
1622+
1623+
void intel_pmu_pebs_disable(struct perf_event *event)
1624+
{
1625+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1626+
struct hw_perf_event *hwc = &event->hw;
1627+
1628+
__intel_pmu_pebs_disable(event);
16141629

16151630
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
16161631
(x86_pmu.version < 5))
@@ -1622,8 +1637,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
16221637

16231638
if (cpuc->enabled)
16241639
wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
1625-
1626-
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
16271640
}
16281641

16291642
void intel_pmu_pebs_enable_all(void)
@@ -2669,11 +2682,26 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
26692682
}
26702683
}
26712684

2685+
static void __init intel_arch_pebs_init(void)
2686+
{
2687+
/*
2688+
* Current hybrid platforms always both support arch-PEBS or not
2689+
* on all kinds of cores. So directly set x86_pmu.arch_pebs flag
2690+
* if boot cpu supports arch-PEBS.
2691+
*/
2692+
x86_pmu.arch_pebs = 1;
2693+
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
2694+
x86_pmu.pebs_capable = ~0ULL;
2695+
2696+
x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
2697+
x86_pmu.pebs_disable = __intel_pmu_pebs_disable;
2698+
}
2699+
26722700
/*
26732701
* PEBS probe and setup
26742702
*/
26752703

2676-
void __init intel_pebs_init(void)
2704+
static void __init intel_ds_pebs_init(void)
26772705
{
26782706
/*
26792707
* No support for 32bit formats
@@ -2788,6 +2816,14 @@ void __init intel_pebs_init(void)
27882816
}
27892817
}
27902818

2819+
void __init intel_pebs_init(void)
2820+
{
2821+
if (x86_pmu.intel_cap.pebs_format == 0xf)
2822+
intel_arch_pebs_init();
2823+
else
2824+
intel_ds_pebs_init();
2825+
}
2826+
27912827
void perf_restore_debug_store(void)
27922828
{
27932829
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);

arch/x86/events/perf_event.h

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,12 @@ enum hybrid_pmu_type {
708708
hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
709709
};
710710

711+
struct arch_pebs_cap {
712+
u64 caps;
713+
u64 counters;
714+
u64 pdists;
715+
};
716+
711717
struct x86_hybrid_pmu {
712718
struct pmu pmu;
713719
const char *name;
@@ -752,6 +758,8 @@ struct x86_hybrid_pmu {
752758
mid_ack :1,
753759
enabled_ack :1;
754760

761+
struct arch_pebs_cap arch_pebs_cap;
762+
755763
u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX];
756764
};
757765

@@ -906,7 +914,7 @@ struct x86_pmu {
906914
union perf_capabilities intel_cap;
907915

908916
/*
909-
* Intel DebugStore bits
917+
* Intel DebugStore and PEBS bits
910918
*/
911919
unsigned int bts :1,
912920
bts_active :1,
@@ -917,7 +925,8 @@ struct x86_pmu {
917925
pebs_no_tlb :1,
918926
pebs_no_isolation :1,
919927
pebs_block :1,
920-
pebs_ept :1;
928+
pebs_ept :1,
929+
arch_pebs :1;
921930
int pebs_record_size;
922931
int pebs_buffer_size;
923932
u64 pebs_events_mask;
@@ -929,6 +938,11 @@ struct x86_pmu {
929938
u64 rtm_abort_event;
930939
u64 pebs_capable;
931940

941+
/*
942+
* Intel Architectural PEBS
943+
*/
944+
struct arch_pebs_cap arch_pebs_cap;
945+
932946
/*
933947
* Intel LBR
934948
*/
@@ -1216,7 +1230,7 @@ int x86_reserve_hardware(void);
12161230

12171231
void x86_release_hardware(void);
12181232

1219-
int x86_pmu_max_precise(void);
1233+
int x86_pmu_max_precise(struct pmu *pmu);
12201234

12211235
void hw_perf_lbr_event_destroy(struct perf_event *event);
12221236

@@ -1791,6 +1805,11 @@ static inline int intel_pmu_max_num_pebs(struct pmu *pmu)
17911805
return fls((u32)hybrid(pmu, pebs_events_mask));
17921806
}
17931807

1808+
static inline bool intel_pmu_has_pebs(void)
1809+
{
1810+
return x86_pmu.ds_pebs || x86_pmu.arch_pebs;
1811+
}
1812+
17941813
#else /* CONFIG_CPU_SUP_INTEL */
17951814

17961815
static inline void reserve_ds_buffers(void)

arch/x86/include/asm/perf_event.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,8 @@ union cpuid10_edx {
200200
#define ARCH_PERFMON_EXT_LEAF 0x00000023
201201
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
202202
#define ARCH_PERFMON_ACR_LEAF 0x2
203+
#define ARCH_PERFMON_PEBS_CAP_LEAF 0x4
204+
#define ARCH_PERFMON_PEBS_COUNTER_LEAF 0x5
203205

204206
union cpuid35_eax {
205207
struct {
@@ -210,7 +212,10 @@ union cpuid35_eax {
210212
unsigned int acr_subleaf:1;
211213
/* Events Sub-Leaf */
212214
unsigned int events_subleaf:1;
213-
unsigned int reserved:28;
215+
/* arch-PEBS Sub-Leaves */
216+
unsigned int pebs_caps_subleaf:1;
217+
unsigned int pebs_cnts_subleaf:1;
218+
unsigned int reserved:26;
214219
} split;
215220
unsigned int full;
216221
};

0 commit comments

Comments
 (0)