Skip to content

Commit 167cde7

Browse files
Dapeng MiPeter Zijlstra
authored andcommitted
perf/x86/intel/ds: Factor out PEBS group processing code to functions
Adaptive PEBS and arch-PEBS share lots of same code to process these PEBS groups, like basic, GPR and meminfo groups. Extract these shared code to generic functions to avoid duplicated code. Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251029102136.61364-8-dapeng1.mi@linux.intel.com
1 parent 8807d92 commit 167cde7

1 file changed

Lines changed: 104 additions & 66 deletions

File tree

  • arch/x86/events/intel

arch/x86/events/intel/ds.c

Lines changed: 104 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -2072,6 +2072,90 @@ static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
20722072

20732073
#define PEBS_LATENCY_MASK 0xffff
20742074

2075+
static inline void __setup_perf_sample_data(struct perf_event *event,
2076+
struct pt_regs *iregs,
2077+
struct perf_sample_data *data)
2078+
{
2079+
perf_sample_data_init(data, 0, event->hw.last_period);
2080+
2081+
/*
2082+
* We must however always use iregs for the unwinder to stay sane; the
2083+
* record BP,SP,IP can point into thin air when the record is from a
2084+
* previous PMI context or an (I)RET happened between the record and
2085+
* PMI.
2086+
*/
2087+
perf_sample_save_callchain(data, event, iregs);
2088+
}
2089+
2090+
static inline void __setup_pebs_basic_group(struct perf_event *event,
2091+
struct pt_regs *regs,
2092+
struct perf_sample_data *data,
2093+
u64 sample_type, u64 ip,
2094+
u64 tsc, u16 retire)
2095+
{
2096+
/* The ip in basic is EventingIP */
2097+
set_linear_ip(regs, ip);
2098+
regs->flags = PERF_EFLAGS_EXACT;
2099+
setup_pebs_time(event, data, tsc);
2100+
2101+
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
2102+
data->weight.var3_w = retire;
2103+
}
2104+
2105+
static inline void __setup_pebs_gpr_group(struct perf_event *event,
2106+
struct pt_regs *regs,
2107+
struct pebs_gprs *gprs,
2108+
u64 sample_type)
2109+
{
2110+
if (event->attr.precise_ip < 2) {
2111+
set_linear_ip(regs, gprs->ip);
2112+
regs->flags &= ~PERF_EFLAGS_EXACT;
2113+
}
2114+
2115+
if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
2116+
adaptive_pebs_save_regs(regs, gprs);
2117+
}
2118+
2119+
static inline void __setup_pebs_meminfo_group(struct perf_event *event,
2120+
struct perf_sample_data *data,
2121+
u64 sample_type, u64 latency,
2122+
u16 instr_latency, u64 address,
2123+
u64 aux, u64 tsx_tuning, u64 ax)
2124+
{
2125+
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
2126+
u64 tsx_latency = intel_get_tsx_weight(tsx_tuning);
2127+
2128+
data->weight.var2_w = instr_latency;
2129+
2130+
/*
2131+
* Although meminfo::latency is defined as a u64,
2132+
* only the lower 32 bits include the valid data
2133+
* in practice on Ice Lake and earlier platforms.
2134+
*/
2135+
if (sample_type & PERF_SAMPLE_WEIGHT)
2136+
data->weight.full = latency ?: tsx_latency;
2137+
else
2138+
data->weight.var1_dw = (u32)latency ?: tsx_latency;
2139+
2140+
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2141+
}
2142+
2143+
if (sample_type & PERF_SAMPLE_DATA_SRC) {
2144+
data->data_src.val = get_data_src(event, aux);
2145+
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
2146+
}
2147+
2148+
if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
2149+
data->addr = address;
2150+
data->sample_flags |= PERF_SAMPLE_ADDR;
2151+
}
2152+
2153+
if (sample_type & PERF_SAMPLE_TRANSACTION) {
2154+
data->txn = intel_get_tsx_transaction(tsx_tuning, ax);
2155+
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
2156+
}
2157+
}
2158+
20752159
/*
20762160
* With adaptive PEBS the layout depends on what fields are configured.
20772161
*/
@@ -2081,44 +2165,32 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
20812165
struct pt_regs *regs)
20822166
{
20832167
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2168+
u64 sample_type = event->attr.sample_type;
20842169
struct pebs_basic *basic = __pebs;
20852170
void *next_record = basic + 1;
2086-
u64 sample_type, format_group;
20872171
struct pebs_meminfo *meminfo = NULL;
20882172
struct pebs_gprs *gprs = NULL;
20892173
struct x86_perf_regs *perf_regs;
2174+
u64 format_group;
2175+
u16 retire;
20902176

20912177
if (basic == NULL)
20922178
return;
20932179

20942180
perf_regs = container_of(regs, struct x86_perf_regs, regs);
20952181
perf_regs->xmm_regs = NULL;
20962182

2097-
sample_type = event->attr.sample_type;
20982183
format_group = basic->format_group;
2099-
perf_sample_data_init(data, 0, event->hw.last_period);
21002184

2101-
setup_pebs_time(event, data, basic->tsc);
2102-
2103-
/*
2104-
* We must however always use iregs for the unwinder to stay sane; the
2105-
* record BP,SP,IP can point into thin air when the record is from a
2106-
* previous PMI context or an (I)RET happened between the record and
2107-
* PMI.
2108-
*/
2109-
perf_sample_save_callchain(data, event, iregs);
2185+
__setup_perf_sample_data(event, iregs, data);
21102186

21112187
*regs = *iregs;
2112-
/* The ip in basic is EventingIP */
2113-
set_linear_ip(regs, basic->ip);
2114-
regs->flags = PERF_EFLAGS_EXACT;
21152188

2116-
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
2117-
if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
2118-
data->weight.var3_w = basic->retire_latency;
2119-
else
2120-
data->weight.var3_w = 0;
2121-
}
2189+
/* basic group */
2190+
retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ?
2191+
basic->retire_latency : 0;
2192+
__setup_pebs_basic_group(event, regs, data, sample_type,
2193+
basic->ip, basic->tsc, retire);
21222194

21232195
/*
21242196
* The record for MEMINFO is in front of GP
@@ -2134,54 +2206,20 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
21342206
gprs = next_record;
21352207
next_record = gprs + 1;
21362208

2137-
if (event->attr.precise_ip < 2) {
2138-
set_linear_ip(regs, gprs->ip);
2139-
regs->flags &= ~PERF_EFLAGS_EXACT;
2140-
}
2141-
2142-
if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
2143-
adaptive_pebs_save_regs(regs, gprs);
2209+
__setup_pebs_gpr_group(event, regs, gprs, sample_type);
21442210
}
21452211

21462212
if (format_group & PEBS_DATACFG_MEMINFO) {
2147-
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
2148-
u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
2149-
meminfo->cache_latency : meminfo->mem_latency;
2150-
2151-
if (x86_pmu.flags & PMU_FL_INSTR_LATENCY)
2152-
data->weight.var2_w = meminfo->instr_latency;
2153-
2154-
/*
2155-
* Although meminfo::latency is defined as a u64,
2156-
* only the lower 32 bits include the valid data
2157-
* in practice on Ice Lake and earlier platforms.
2158-
*/
2159-
if (sample_type & PERF_SAMPLE_WEIGHT) {
2160-
data->weight.full = latency ?:
2161-
intel_get_tsx_weight(meminfo->tsx_tuning);
2162-
} else {
2163-
data->weight.var1_dw = (u32)latency ?:
2164-
intel_get_tsx_weight(meminfo->tsx_tuning);
2165-
}
2166-
2167-
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
2168-
}
2169-
2170-
if (sample_type & PERF_SAMPLE_DATA_SRC) {
2171-
data->data_src.val = get_data_src(event, meminfo->aux);
2172-
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
2173-
}
2174-
2175-
if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
2176-
data->addr = meminfo->address;
2177-
data->sample_flags |= PERF_SAMPLE_ADDR;
2178-
}
2179-
2180-
if (sample_type & PERF_SAMPLE_TRANSACTION) {
2181-
data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
2182-
gprs ? gprs->ax : 0);
2183-
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
2184-
}
2213+
u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
2214+
meminfo->cache_latency : meminfo->mem_latency;
2215+
u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
2216+
meminfo->instr_latency : 0;
2217+
u64 ax = gprs ? gprs->ax : 0;
2218+
2219+
__setup_pebs_meminfo_group(event, data, sample_type, latency,
2220+
instr_latency, meminfo->address,
2221+
meminfo->aux, meminfo->tsx_tuning,
2222+
ax);
21852223
}
21862224

21872225
if (format_group & PEBS_DATACFG_XMMS) {

0 commit comments

Comments
 (0)