Skip to content

Commit ff85481

Browse files
captain5050acmel
authored andcommitted
perf evlist: Special map propagation for tool events that read on 1 CPU
Tool events like duration_time don't need a perf_cpu_map that contains all online CPUs. Having such a perf_cpu_map causes overheads when iterating between events for CPU affinity. During parsing mark events that just read on a single CPU map index as such, then during map propagation set up the evsel's CPUs and thereby the evlists accordingly. The setting cannot be done early in parsing as user CPUs are only fully known when evlist__create_maps is called. Signed-off-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Andres Freund <andres@anarazel.de> Cc: Dapeng Mi <dapeng1.mi@linux.intel.com> Cc: Dr. David Alan Gilbert <linux@treblig.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@linaro.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Falcon <thomas.falcon@intel.com> Cc: Thomas Richter <tmricht@linux.ibm.com> Cc: Yang Li <yang.lee@linux.alibaba.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent 63b320a commit ff85481

5 files changed

Lines changed: 48 additions & 4 deletions

File tree

tools/lib/perf/evlist.c

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,28 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
101101
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
102102
}
103103

104+
/*
105+
* Tool events may only read on the first CPU index to avoid double
106+
* counting things like duration_time. Make the evsel->cpus contain just
107+
* that single entry otherwise we may spend time changing affinity to
108+
* CPUs that just have tool events, etc.
109+
*/
110+
if (evsel->reads_only_on_cpu_idx0 && perf_cpu_map__nr(evsel->cpus) > 0) {
111+
struct perf_cpu_map *srcs[3] = {
112+
evlist->all_cpus,
113+
evlist->user_requested_cpus,
114+
evsel->pmu_cpus,
115+
};
116+
for (size_t i = 0; i < ARRAY_SIZE(srcs); i++) {
117+
if (!srcs[i])
118+
continue;
119+
120+
perf_cpu_map__put(evsel->cpus);
121+
evsel->cpus = perf_cpu_map__new_int(perf_cpu_map__cpu(srcs[i], 0).cpu);
122+
break;
123+
}
124+
}
125+
104126
/* Sanity check assert before the evsel is potentially removed. */
105127
assert(!evsel->requires_cpu || !perf_cpu_map__has_any_cpu(evsel->cpus));
106128

@@ -133,16 +155,22 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
133155

134156
static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
135157
{
136-
struct perf_evsel *evsel, *n;
137-
138158
evlist->needs_map_propagation = true;
139159

140160
/* Clear the all_cpus set which will be merged into during propagation. */
141161
perf_cpu_map__put(evlist->all_cpus);
142162
evlist->all_cpus = NULL;
143163

144-
list_for_each_entry_safe(evsel, n, &evlist->entries, node)
145-
__perf_evlist__propagate_maps(evlist, evsel);
164+
/* 2 rounds so that reads_only_on_cpu_idx0 benefit from knowing the other CPU maps. */
165+
for (int round = 0; round < 2; round++) {
166+
struct perf_evsel *evsel, *n;
167+
168+
list_for_each_entry_safe(evsel, n, &evlist->entries, node) {
169+
if ((!evsel->reads_only_on_cpu_idx0 && round == 0) ||
170+
(evsel->reads_only_on_cpu_idx0 && round == 1))
171+
__perf_evlist__propagate_maps(evlist, evsel);
172+
}
173+
}
146174
}
147175

148176
void perf_evlist__add(struct perf_evlist *evlist,

tools/lib/perf/include/internal/evsel.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ struct perf_evsel {
128128
bool requires_cpu;
129129
/** Is the PMU for the event a core one? Effects the handling of own_cpus. */
130130
bool is_pmu_core;
131+
/** Does the evsel on read on the first CPU index such as tool time events? */
132+
bool reads_only_on_cpu_idx0;
131133
int idx;
132134
};
133135

tools/perf/util/parse-events.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ __add_event(struct list_head *list, int *idx,
269269
evsel->core.pmu_cpus = pmu_cpus;
270270
evsel->core.requires_cpu = pmu ? pmu->is_uncore : false;
271271
evsel->core.is_pmu_core = is_pmu_core;
272+
evsel->core.reads_only_on_cpu_idx0 = perf_pmu__reads_only_on_cpu_idx0(attr);
272273
evsel->pmu = pmu;
273274
evsel->alternate_hw_config = alternate_hw_config;
274275
evsel->first_wildcard_match = first_wildcard_match;

tools/perf/util/pmu.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2718,3 +2718,14 @@ const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config)
27182718
}
27192719
return NULL;
27202720
}
2721+
2722+
bool perf_pmu__reads_only_on_cpu_idx0(const struct perf_event_attr *attr)
2723+
{
2724+
enum tool_pmu_event event;
2725+
2726+
if (attr->type != PERF_PMU_TYPE_TOOL)
2727+
return false;
2728+
2729+
event = (enum tool_pmu_event)attr->config;
2730+
return event != TOOL_PMU__EVENT_USER_TIME && event != TOOL_PMU__EVENT_SYSTEM_TIME;
2731+
}

tools/perf/util/pmu.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,8 @@ void perf_pmu__delete(struct perf_pmu *pmu);
350350
const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config);
351351
bool perf_pmu__is_fake(const struct perf_pmu *pmu);
352352

353+
bool perf_pmu__reads_only_on_cpu_idx0(const struct perf_event_attr *attr);
354+
353355
static inline enum pmu_kind perf_pmu__kind(const struct perf_pmu *pmu)
354356
{
355357
__u32 type;

0 commit comments

Comments
 (0)