Skip to content

Commit 175c852

Browse files
captain5050namhyung
authored andcommitted
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core CPUs. If a cpumask is specified for the num_cpus or num_cpus_online tool events, compute the value relative to the given mask rather than for the full system. ``` $ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/, tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online, cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true Performance counter stats for 'true': 28 tool/num_cpus/ 16 tool/num_cpus,cpu=cpu_core/ 12 tool/num_cpus,cpu=cpu_atom/ 28 tool/num_cpus_online/ 16 tool/num_cpus_online,cpu=cpu_core/ 12 tool/num_cpus_online,cpu=cpu_atom/ 0.000767205 seconds time elapsed 0.000938000 seconds user 0.000000000 seconds sys ``` Reviewed-by: Thomas Falcon <thomas.falcon@intel.com> Signed-off-by: Ian Rogers <irogers@google.com> Tested-by: James Clark <james.clark@linaro.org> Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
1 parent bd741d8 commit 175c852

3 files changed

Lines changed: 51 additions & 9 deletions

File tree

tools/perf/util/expr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx
401401
if (ev != TOOL_PMU__EVENT_NONE) {
402402
u64 count;
403403

404-
if (tool_pmu__read_event(ev, &count))
404+
if (tool_pmu__read_event(ev, /*evsel=*/NULL, &count))
405405
result = count;
406406
else
407407
pr_err("Failure to read '%s'", literal);

tools/perf/util/tool_pmu.c

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ static bool has_pmem(void)
332332
return has_pmem;
333333
}
334334

335-
bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
335+
bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result)
336336
{
337337
const struct cpu_topology *topology;
338338

@@ -347,18 +347,60 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
347347
return true;
348348

349349
case TOOL_PMU__EVENT_NUM_CPUS:
350-
*result = cpu__max_present_cpu().cpu;
350+
if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
351+
/* No evsel to be specific to. */
352+
*result = cpu__max_present_cpu().cpu;
353+
} else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
354+
/* Evsel just has specific CPUs. */
355+
*result = perf_cpu_map__nr(evsel->core.cpus);
356+
} else {
357+
/*
358+
* "Any CPU" event that can be scheduled on any CPU in
359+
* the PMU's cpumask. The PMU cpumask should be saved in
360+
* own_cpus. If not present fall back to max.
361+
*/
362+
if (!perf_cpu_map__is_empty(evsel->core.own_cpus))
363+
*result = perf_cpu_map__nr(evsel->core.own_cpus);
364+
else
365+
*result = cpu__max_present_cpu().cpu;
366+
}
351367
return true;
352368

353369
case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
354370
struct perf_cpu_map *online = cpu_map__online();
355371

356-
if (online) {
372+
if (!online)
373+
return false;
374+
375+
if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
376+
/* No evsel to be specific to. */
357377
*result = perf_cpu_map__nr(online);
358-
perf_cpu_map__put(online);
359-
return true;
378+
} else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
379+
/* Evsel just has specific CPUs. */
380+
struct perf_cpu_map *tmp =
381+
perf_cpu_map__intersect(online, evsel->core.cpus);
382+
383+
*result = perf_cpu_map__nr(tmp);
384+
perf_cpu_map__put(tmp);
385+
} else {
386+
/*
387+
* "Any CPU" event that can be scheduled on any CPU in
388+
* the PMU's cpumask. The PMU cpumask should be saved in
389+
* own_cpus, if not present then just the online cpu
390+
* mask.
391+
*/
392+
if (!perf_cpu_map__is_empty(evsel->core.own_cpus)) {
393+
struct perf_cpu_map *tmp =
394+
perf_cpu_map__intersect(online, evsel->core.own_cpus);
395+
396+
*result = perf_cpu_map__nr(tmp);
397+
perf_cpu_map__put(tmp);
398+
} else {
399+
*result = perf_cpu_map__nr(online);
400+
}
360401
}
361-
return false;
402+
perf_cpu_map__put(online);
403+
return true;
362404
}
363405
case TOOL_PMU__EVENT_NUM_DIES:
364406
topology = online_topology();
@@ -417,7 +459,7 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
417459
old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
418460
val = 0;
419461
if (cpu_map_idx == 0 && thread == 0) {
420-
if (!tool_pmu__read_event(ev, &val)) {
462+
if (!tool_pmu__read_event(ev, evsel, &val)) {
421463
count->lost++;
422464
val = 0;
423465
}

tools/perf/util/tool_pmu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ enum tool_pmu_event tool_pmu__str_to_event(const char *str);
3434
bool tool_pmu__skip_event(const char *name);
3535
int tool_pmu__num_skip_events(void);
3636

37-
bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result);
37+
bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result);
3838

3939
u64 tool_pmu__cpu_slots_per_cycle(void);
4040

0 commit comments

Comments
 (0)