Skip to content

Commit 811082e

Browse files
captain5050namhyung
authored andcommitted
perf parse-events: Support user CPUs mixed with threads/processes
Counting events system-wide with a specified CPU prior to this change worked: ``` $ perf stat -e 'msr/tsc/,msr/tsc,cpu=cpu_core/,msr/tsc,cpu=cpu_atom/' -a sleep 1 Performance counter stats for 'system wide': 59,393,419,099 msr/tsc/ 33,927,965,927 msr/tsc,cpu=cpu_core/ 25,465,608,044 msr/tsc,cpu=cpu_atom/ ``` However, when counting with process the counts became system wide: ``` $ perf stat -e 'msr/tsc/,msr/tsc,cpu=cpu_core/,msr/tsc,cpu=cpu_atom/' perf test -F 10 10.1: Basic parsing test : Ok 10.2: Parsing without PMU name : Ok 10.3: Parsing with PMU name : Ok Performance counter stats for 'perf test -F 10': 59,233,549 msr/tsc/ 59,227,556 msr/tsc,cpu=cpu_core/ 59,224,053 msr/tsc,cpu=cpu_atom/ ``` Make the handling of CPU maps with event parsing clearer. When an event is parsed creating an evsel the cpus should be either the PMU's cpumask or user specified CPUs. Update perf_evlist__propagate_maps so that it doesn't clobber the user specified CPUs. Try to make the behavior clearer, firstly fix up missing cpumasks. Next, perform sanity checks and adjustments from the global evlist CPU requests and for the PMU including simplifying to the "any CPU"(-1) value. Finally remove the event if the cpumask is empty. So that events are opened with a CPU and a thread change stat's create_perf_stat_counter to give both. With the change things are fixed: ``` $ perf stat --no-scale -e 'msr/tsc/,msr/tsc,cpu=cpu_core/,msr/tsc,cpu=cpu_atom/' perf test -F 10 10.1: Basic parsing test : Ok 10.2: Parsing without PMU name : Ok 10.3: Parsing with PMU name : Ok Performance counter stats for 'perf test -F 10': 63,704,975 msr/tsc/ 47,060,704 msr/tsc,cpu=cpu_core/ (4.62%) 16,640,591 msr/tsc,cpu=cpu_atom/ (2.18%) ``` However, note the "--no-scale" option is used. This is necessary as the running time for the event on the counter isn't the same as the enabled time because the thread doesn't necessarily run on the CPUs specified for the counter. All counter values are scaled with: scaled_value = value * time_enabled / time_running and so without --no-scale the scaled_value becomes very large. This problem already exists on hybrid systems for the same reason. Here are 2 runs of the same code with an instructions event that counts the same on both types of core, there is no real multiplexing happening on the event: ``` $ perf stat -e instructions perf test -F 10 ... Performance counter stats for 'perf test -F 10': 87,896,447 cpu_atom/instructions/ (14.37%) 98,171,964 cpu_core/instructions/ (85.63%) ... $ perf stat --no-scale -e instructions perf test -F 10 ... Performance counter stats for 'perf test -F 10': 13,069,890 cpu_atom/instructions/ (19.32%) 83,460,274 cpu_core/instructions/ (80.68%) ... ``` The scaling has inflated per-PMU instruction counts and the overall count by 2x. To fix this the kernel needs changing when a task+CPU event (or just task event on hybrid) is scheduled out. A fix could be that the state isn't inactive but off for such events, so that time_enabled counts don't accumulate on them. Reviewed-by: Thomas Falcon <thomas.falcon@intel.com> Signed-off-by: Ian Rogers <irogers@google.com> Link: https://lore.kernel.org/r/20250719030517.1990983-13-irogers@google.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
1 parent e9387ba commit 811082e

3 files changed

Lines changed: 87 additions & 48 deletions

File tree

tools/lib/perf/evlist.c

Lines changed: 81 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -36,49 +36,88 @@ void perf_evlist__init(struct perf_evlist *evlist)
3636
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
3737
struct perf_evsel *evsel)
3838
{
39-
if (evsel->system_wide) {
40-
/* System wide: set the cpu map of the evsel to all online CPUs. */
41-
perf_cpu_map__put(evsel->cpus);
42-
evsel->cpus = perf_cpu_map__new_online_cpus();
43-
} else if (evlist->has_user_cpus && evsel->is_pmu_core) {
44-
/*
45-
* User requested CPUs on a core PMU, ensure the requested CPUs
46-
* are valid by intersecting with those of the PMU.
47-
*/
39+
if (perf_cpu_map__is_empty(evsel->cpus)) {
40+
if (perf_cpu_map__is_empty(evsel->pmu_cpus)) {
41+
/*
42+
* Assume the unset PMU cpus were for a system-wide
43+
* event, like a software or tracepoint.
44+
*/
45+
evsel->pmu_cpus = perf_cpu_map__new_online_cpus();
46+
}
47+
if (evlist->has_user_cpus && !evsel->system_wide) {
48+
/*
49+
* Use the user CPUs unless the evsel is set to be
50+
* system wide, such as the dummy event.
51+
*/
52+
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
53+
} else {
54+
/*
55+
* System wide and other modes, assume the cpu map
56+
* should be set to all PMU CPUs.
57+
*/
58+
evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus);
59+
}
60+
}
61+
/*
62+
* Avoid "any CPU"(-1) for uncore and PMUs that require a CPU, even if
63+
* requested.
64+
*/
65+
if (evsel->requires_cpu && perf_cpu_map__has_any_cpu(evsel->cpus)) {
4866
perf_cpu_map__put(evsel->cpus);
49-
evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->pmu_cpus);
67+
evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus);
68+
}
5069

51-
/*
52-
* Empty cpu lists would eventually get opened as "any" so remove
53-
* genuinely empty ones before they're opened in the wrong place.
54-
*/
55-
if (perf_cpu_map__is_empty(evsel->cpus)) {
56-
struct perf_evsel *next = perf_evlist__next(evlist, evsel);
57-
58-
perf_evlist__remove(evlist, evsel);
59-
/* Keep idx contiguous */
60-
if (next)
61-
list_for_each_entry_from(next, &evlist->entries, node)
62-
next->idx--;
70+
/*
71+
* Globally requested CPUs replace user requested unless the evsel is
72+
* set to be system wide.
73+
*/
74+
if (evlist->has_user_cpus && !evsel->system_wide) {
75+
assert(!perf_cpu_map__has_any_cpu(evlist->user_requested_cpus));
76+
if (!perf_cpu_map__equal(evsel->cpus, evlist->user_requested_cpus)) {
77+
perf_cpu_map__put(evsel->cpus);
78+
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
6379
}
64-
} else if (!evsel->pmu_cpus || evlist->has_user_cpus ||
65-
(!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) {
66-
/*
67-
* The PMU didn't specify a default cpu map, this isn't a core
68-
* event and the user requested CPUs or the evlist user
69-
* requested CPUs have the "any CPU" (aka dummy) CPU value. In
70-
* which case use the user requested CPUs rather than the PMU
71-
* ones.
72-
*/
80+
}
81+
82+
/* Ensure cpus only references valid PMU CPUs. */
83+
if (!perf_cpu_map__has_any_cpu(evsel->cpus) &&
84+
!perf_cpu_map__is_subset(evsel->pmu_cpus, evsel->cpus)) {
85+
struct perf_cpu_map *tmp = perf_cpu_map__intersect(evsel->pmu_cpus, evsel->cpus);
86+
7387
perf_cpu_map__put(evsel->cpus);
74-
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
75-
} else if (evsel->cpus != evsel->pmu_cpus) {
76-
/*
77-
* No user requested cpu map but the PMU cpu map doesn't match
78-
* the evsel's. Reset it back to the PMU cpu map.
79-
*/
88+
evsel->cpus = tmp;
89+
}
90+
91+
/*
92+
* Was event requested on all the PMU's CPUs but the user requested is
93+
* any CPU (-1)? If so switch to using any CPU (-1) to reduce the number
94+
* of events.
95+
*/
96+
if (!evsel->system_wide &&
97+
!evsel->requires_cpu &&
98+
perf_cpu_map__equal(evsel->cpus, evsel->pmu_cpus) &&
99+
perf_cpu_map__has_any_cpu(evlist->user_requested_cpus)) {
80100
perf_cpu_map__put(evsel->cpus);
81-
evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus);
101+
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
102+
}
103+
104+
/* Sanity check assert before the evsel is potentially removed. */
105+
assert(!evsel->requires_cpu || !perf_cpu_map__has_any_cpu(evsel->cpus));
106+
107+
/*
108+
* Empty cpu lists would eventually get opened as "any" so remove
109+
* genuinely empty ones before they're opened in the wrong place.
110+
*/
111+
if (perf_cpu_map__is_empty(evsel->cpus)) {
112+
struct perf_evsel *next = perf_evlist__next(evlist, evsel);
113+
114+
perf_evlist__remove(evlist, evsel);
115+
/* Keep idx contiguous */
116+
if (next)
117+
list_for_each_entry_from(next, &evlist->entries, node)
118+
next->idx--;
119+
120+
return;
82121
}
83122

84123
if (evsel->system_wide) {
@@ -98,6 +137,10 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
98137

99138
evlist->needs_map_propagation = true;
100139

140+
/* Clear the all_cpus set which will be merged into during propagation. */
141+
perf_cpu_map__put(evlist->all_cpus);
142+
evlist->all_cpus = NULL;
143+
101144
list_for_each_entry_safe(evsel, n, &evlist->entries, node)
102145
__perf_evlist__propagate_maps(evlist, evsel);
103146
}

tools/perf/util/parse-events.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -310,20 +310,18 @@ __add_event(struct list_head *list, int *idx,
310310
if (pmu) {
311311
is_pmu_core = pmu->is_core;
312312
pmu_cpus = perf_cpu_map__get(pmu->cpus);
313+
if (perf_cpu_map__is_empty(pmu_cpus))
314+
pmu_cpus = cpu_map__online();
313315
} else {
314316
is_pmu_core = (attr->type == PERF_TYPE_HARDWARE ||
315317
attr->type == PERF_TYPE_HW_CACHE);
316318
pmu_cpus = is_pmu_core ? cpu_map__online() : NULL;
317319
}
318320

319-
if (has_user_cpus) {
321+
if (has_user_cpus)
320322
cpus = perf_cpu_map__get(user_cpus);
321-
/* Existing behavior that pmu_cpus matches the given user ones. */
322-
perf_cpu_map__put(pmu_cpus);
323-
pmu_cpus = perf_cpu_map__get(user_cpus);
324-
} else {
323+
else
325324
cpus = perf_cpu_map__get(pmu_cpus);
326-
}
327325

328326
if (init_attr)
329327
event_attr_init(attr);

tools/perf/util/stat.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -769,8 +769,6 @@ int create_perf_stat_counter(struct evsel *evsel,
769769
attr->enable_on_exec = 1;
770770
}
771771

772-
if (target__has_cpu(target) && !target__has_per_thread(target))
773-
return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx);
774-
775-
return evsel__open_per_thread(evsel, evsel->core.threads);
772+
return evsel__open_per_cpu_and_thread(evsel, evsel__cpus(evsel), cpu_map_idx,
773+
evsel->core.threads);
776774
}

0 commit comments

Comments
 (0)