Skip to content

Commit 7cd264d

Browse files
Dapeng MiPeter Zijlstra
authored andcommitted
perf/x86/intel: Add support for PEBS memory auxiliary info field in NVL
Similar to DMR (Panther Cove uarch), both P-core (Coyote Cove uarch) and E-core (Arctic Wolf uarch) of NVL adopt the new PEBS memory auxiliary info layout. Coyote Cove microarchitecture shares the same PMU capabilities, including the memory auxiliary info layout, with Panther Cove. Arctic Wolf microarchitecture has a similar layout to Panther Cove, with the only difference being specific data source encoding for L2 hit cases (up to the L2 cache level). The OMR encoding remains the same as in Panther Cove. For detailed information on the memory auxiliary info encoding, please refer to section 16.2 "PEBS LOAD LATENCY AND STORE LATENCY FACILITY" in the latest ISE documentation. This patch defines Arctic Wolf specific data source encoding and then supports PEBS memory auxiliary info field for NVL. Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20260114011750.350569-5-dapeng1.mi@linux.intel.com
1 parent d345b6b commit 7cd264d

2 files changed

Lines changed: 85 additions & 0 deletions

File tree

arch/x86/events/intel/ds.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,18 @@ union intel_x86_pebs_dse {
9696
unsigned int pnc_fb_full:1;
9797
unsigned int ld_reserved8:16;
9898
};
99+
struct {
100+
unsigned int arw_dse:8;
101+
unsigned int arw_l2_miss:1;
102+
unsigned int arw_xq_promotion:1;
103+
unsigned int arw_reissue:1;
104+
unsigned int arw_stlb_miss:1;
105+
unsigned int arw_locked:1;
106+
unsigned int arw_data_blk:1;
107+
unsigned int arw_addr_blk:1;
108+
unsigned int arw_fb_full:1;
109+
unsigned int ld_reserved9:16;
110+
};
99111
};
100112

101113

@@ -274,6 +286,29 @@ static u64 pnc_pebs_l2_hit_data_source[PNC_PEBS_DATA_SOURCE_MAX] = {
274286
OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
275287
};
276288

289+
/* Version for Arctic Wolf and later */
290+
291+
/* L2 hit */
292+
#define ARW_PEBS_DATA_SOURCE_MAX 16
293+
static u64 arw_pebs_l2_hit_data_source[ARW_PEBS_DATA_SOURCE_MAX] = {
294+
P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA), /* 0x00: non-cache access */
295+
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 hit */
296+
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: WCB Hit */
297+
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x03: L2 Hit Clean */
298+
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT), /* 0x04: L2 Hit Snoop HIT */
299+
OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM), /* 0x05: L2 Hit Snoop Hit Modified */
300+
OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x06: uncached */
301+
0, /* 0x07: Reserved */
302+
0, /* 0x08: Reserved */
303+
0, /* 0x09: Reserved */
304+
0, /* 0x0a: Reserved */
305+
0, /* 0x0b: Reserved */
306+
0, /* 0x0c: Reserved */
307+
0, /* 0x0d: Reserved */
308+
0, /* 0x0e: Reserved */
309+
0, /* 0x0f: Reserved */
310+
};
311+
277312
/* L2 miss */
278313
#define OMR_DATA_SOURCE_MAX 16
279314
static u64 omr_data_source[OMR_DATA_SOURCE_MAX] = {
@@ -458,6 +493,44 @@ u64 cmt_latency_data(struct perf_event *event, u64 status)
458493
dse.mtl_fwd_blk);
459494
}
460495

496+
static u64 arw_latency_data(struct perf_event *event, u64 status)
497+
{
498+
union intel_x86_pebs_dse dse;
499+
union perf_mem_data_src src;
500+
u64 val;
501+
502+
dse.val = status;
503+
504+
if (!dse.arw_l2_miss)
505+
val = arw_pebs_l2_hit_data_source[dse.arw_dse & 0xf];
506+
else
507+
val = parse_omr_data_source(dse.arw_dse);
508+
509+
if (!val)
510+
val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
511+
512+
if (dse.arw_stlb_miss)
513+
val |= P(TLB, MISS) | P(TLB, L2);
514+
else
515+
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
516+
517+
if (dse.arw_locked)
518+
val |= P(LOCK, LOCKED);
519+
520+
if (dse.arw_data_blk)
521+
val |= P(BLK, DATA);
522+
if (dse.arw_addr_blk)
523+
val |= P(BLK, ADDR);
524+
if (!dse.arw_data_blk && !dse.arw_addr_blk)
525+
val |= P(BLK, NA);
526+
527+
src.val = val;
528+
if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
529+
src.mem_op = P(OP, STORE);
530+
531+
return src.val;
532+
}
533+
461534
static u64 lnc_latency_data(struct perf_event *event, u64 status)
462535
{
463536
union intel_x86_pebs_dse dse;
@@ -551,6 +624,16 @@ u64 pnc_latency_data(struct perf_event *event, u64 status)
551624
return src.val;
552625
}
553626

627+
u64 nvl_latency_data(struct perf_event *event, u64 status)
628+
{
629+
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
630+
631+
if (pmu->pmu_type == hybrid_small)
632+
return arw_latency_data(event, status);
633+
634+
return pnc_latency_data(event, status);
635+
}
636+
554637
static u64 load_latency_data(struct perf_event *event, u64 status)
555638
{
556639
union intel_x86_pebs_dse dse;

arch/x86/events/perf_event.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,6 +1666,8 @@ u64 arl_h_latency_data(struct perf_event *event, u64 status);
16661666

16671667
u64 pnc_latency_data(struct perf_event *event, u64 status);
16681668

1669+
u64 nvl_latency_data(struct perf_event *event, u64 status);
1670+
16691671
extern struct event_constraint intel_core2_pebs_event_constraints[];
16701672

16711673
extern struct event_constraint intel_atom_pebs_event_constraints[];

0 commit comments

Comments
 (0)