Skip to content

Commit e2dec48

Browse files
committed
Merge tag 'perf-tools-fixes-for-v5.18-2022-04-14' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools fixes from Arnaldo Carvalho de Melo: - 'perf record --per-thread' mode doesn't have the CPU mask setup, so it can use it to figure out the number of mmaps, fix it. - Fix segfault accessing sample_id xyarray out of bounds, noticed while using Intel PT where we have a dummy event to capture text poke perf metadata events and we mixup the set of CPUs specified by the user with the all CPUs map needed for text poke. - Fix 'perf bench numa' to check if CPU used to bind task is online. - Fix 'perf bench numa' usage of affinity for machines with more than 1000 CPUs. - Fix misleading add event PMU debug message, noticed while using the 'intel_pt' PMU. - Fix error check return value of hashmap__new() in 'perf stat', it must use IS_ERR(). * tag 'perf-tools-fixes-for-v5.18-2022-04-14' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: perf bench: Fix numa bench to fix usage of affinity for machines with #CPUs > 1K perf bench: Fix numa testcase to check if CPU used to bind task is online perf record: Fix per-thread option perf tools: Fix segfault accessing sample_id xyarray perf stat: Fix error check return value of hashmap__new(), must use IS_ERR() perf tools: Fix misleading add event PMU debug message
2 parents 028192f + f58faed commit e2dec48

7 files changed

Lines changed: 176 additions & 45 deletions

File tree

tools/lib/perf/evlist.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
577577
{
578578
struct perf_evsel *evsel;
579579
const struct perf_cpu_map *cpus = evlist->user_requested_cpus;
580-
const struct perf_thread_map *threads = evlist->threads;
581580

582581
if (!ops || !ops->get || !ops->mmap)
583582
return -EINVAL;
@@ -589,7 +588,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
589588
perf_evlist__for_each_entry(evlist, evsel) {
590589
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
591590
evsel->sample_id == NULL &&
592-
perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
591+
perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0)
593592
return -ENOMEM;
594593
}
595594

tools/perf/bench/numa.c

Lines changed: 101 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <linux/numa.h>
3535
#include <linux/zalloc.h>
3636

37+
#include "../util/header.h"
3738
#include <numa.h>
3839
#include <numaif.h>
3940

@@ -54,7 +55,7 @@
5455

5556
struct thread_data {
5657
int curr_cpu;
57-
cpu_set_t bind_cpumask;
58+
cpu_set_t *bind_cpumask;
5859
int bind_node;
5960
u8 *process_data;
6061
int process_nr;
@@ -266,71 +267,115 @@ static bool node_has_cpus(int node)
266267
return ret;
267268
}
268269

269-
static cpu_set_t bind_to_cpu(int target_cpu)
270+
static cpu_set_t *bind_to_cpu(int target_cpu)
270271
{
271-
cpu_set_t orig_mask, mask;
272-
int ret;
272+
int nrcpus = numa_num_possible_cpus();
273+
cpu_set_t *orig_mask, *mask;
274+
size_t size;
273275

274-
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
275-
BUG_ON(ret);
276+
orig_mask = CPU_ALLOC(nrcpus);
277+
BUG_ON(!orig_mask);
278+
size = CPU_ALLOC_SIZE(nrcpus);
279+
CPU_ZERO_S(size, orig_mask);
280+
281+
if (sched_getaffinity(0, size, orig_mask))
282+
goto err_out;
283+
284+
mask = CPU_ALLOC(nrcpus);
285+
if (!mask)
286+
goto err_out;
276287

277-
CPU_ZERO(&mask);
288+
CPU_ZERO_S(size, mask);
278289

279290
if (target_cpu == -1) {
280291
int cpu;
281292

282293
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
283-
CPU_SET(cpu, &mask);
294+
CPU_SET_S(cpu, size, mask);
284295
} else {
285-
BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
286-
CPU_SET(target_cpu, &mask);
296+
if (target_cpu < 0 || target_cpu >= g->p.nr_cpus)
297+
goto err;
298+
299+
CPU_SET_S(target_cpu, size, mask);
287300
}
288301

289-
ret = sched_setaffinity(0, sizeof(mask), &mask);
290-
BUG_ON(ret);
302+
if (sched_setaffinity(0, size, mask))
303+
goto err;
291304

292305
return orig_mask;
306+
307+
err:
308+
CPU_FREE(mask);
309+
err_out:
310+
CPU_FREE(orig_mask);
311+
312+
/* BUG_ON due to failure in allocation of orig_mask/mask */
313+
BUG_ON(-1);
293314
}
294315

295-
static cpu_set_t bind_to_node(int target_node)
316+
static cpu_set_t *bind_to_node(int target_node)
296317
{
297-
cpu_set_t orig_mask, mask;
318+
int nrcpus = numa_num_possible_cpus();
319+
size_t size;
320+
cpu_set_t *orig_mask, *mask;
298321
int cpu;
299-
int ret;
300322

301-
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
302-
BUG_ON(ret);
323+
orig_mask = CPU_ALLOC(nrcpus);
324+
BUG_ON(!orig_mask);
325+
size = CPU_ALLOC_SIZE(nrcpus);
326+
CPU_ZERO_S(size, orig_mask);
303327

304-
CPU_ZERO(&mask);
328+
if (sched_getaffinity(0, size, orig_mask))
329+
goto err_out;
330+
331+
mask = CPU_ALLOC(nrcpus);
332+
if (!mask)
333+
goto err_out;
334+
335+
CPU_ZERO_S(size, mask);
305336

306337
if (target_node == NUMA_NO_NODE) {
307338
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
308-
CPU_SET(cpu, &mask);
339+
CPU_SET_S(cpu, size, mask);
309340
} else {
310341
struct bitmask *cpumask = numa_allocate_cpumask();
311342

312-
BUG_ON(!cpumask);
343+
if (!cpumask)
344+
goto err;
345+
313346
if (!numa_node_to_cpus(target_node, cpumask)) {
314347
for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
315348
if (numa_bitmask_isbitset(cpumask, cpu))
316-
CPU_SET(cpu, &mask);
349+
CPU_SET_S(cpu, size, mask);
317350
}
318351
}
319352
numa_free_cpumask(cpumask);
320353
}
321354

322-
ret = sched_setaffinity(0, sizeof(mask), &mask);
323-
BUG_ON(ret);
355+
if (sched_setaffinity(0, size, mask))
356+
goto err;
324357

325358
return orig_mask;
359+
360+
err:
361+
CPU_FREE(mask);
362+
err_out:
363+
CPU_FREE(orig_mask);
364+
365+
/* BUG_ON due to failure in allocation of orig_mask/mask */
366+
BUG_ON(-1);
326367
}
327368

328-
static void bind_to_cpumask(cpu_set_t mask)
369+
static void bind_to_cpumask(cpu_set_t *mask)
329370
{
330371
int ret;
372+
size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus());
331373

332-
ret = sched_setaffinity(0, sizeof(mask), &mask);
333-
BUG_ON(ret);
374+
ret = sched_setaffinity(0, size, mask);
375+
if (ret) {
376+
CPU_FREE(mask);
377+
BUG_ON(ret);
378+
}
334379
}
335380

336381
static void mempol_restore(void)
@@ -376,7 +421,7 @@ do { \
376421
static u8 *alloc_data(ssize_t bytes0, int map_flags,
377422
int init_zero, int init_cpu0, int thp, int init_random)
378423
{
379-
cpu_set_t orig_mask;
424+
cpu_set_t *orig_mask = NULL;
380425
ssize_t bytes;
381426
u8 *buf;
382427
int ret;
@@ -434,6 +479,7 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags,
434479
/* Restore affinity: */
435480
if (init_cpu0) {
436481
bind_to_cpumask(orig_mask);
482+
CPU_FREE(orig_mask);
437483
mempol_restore();
438484
}
439485

@@ -585,10 +631,16 @@ static int parse_setup_cpu_list(void)
585631
return -1;
586632
}
587633

634+
if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) {
635+
printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n");
636+
return -1;
637+
}
638+
588639
BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
589640
BUG_ON(bind_cpu_0 > bind_cpu_1);
590641

591642
for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
643+
size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus);
592644
int i;
593645

594646
for (i = 0; i < mul; i++) {
@@ -608,10 +660,15 @@ static int parse_setup_cpu_list(void)
608660
tprintf("%2d", bind_cpu);
609661
}
610662

611-
CPU_ZERO(&td->bind_cpumask);
663+
td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
664+
BUG_ON(!td->bind_cpumask);
665+
CPU_ZERO_S(size, td->bind_cpumask);
612666
for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
613-
BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
614-
CPU_SET(cpu, &td->bind_cpumask);
667+
if (cpu < 0 || cpu >= g->p.nr_cpus) {
668+
CPU_FREE(td->bind_cpumask);
669+
BUG_ON(-1);
670+
}
671+
CPU_SET_S(cpu, size, td->bind_cpumask);
615672
}
616673
t++;
617674
}
@@ -752,8 +809,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
752809
return parse_node_list(arg);
753810
}
754811

755-
#define BIT(x) (1ul << x)
756-
757812
static inline uint32_t lfsr_32(uint32_t lfsr)
758813
{
759814
const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
@@ -1241,7 +1296,7 @@ static void *worker_thread(void *__tdata)
12411296
* by migrating to CPU#0:
12421297
*/
12431298
if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
1244-
cpu_set_t orig_mask;
1299+
cpu_set_t *orig_mask;
12451300
int target_cpu;
12461301
int this_cpu;
12471302

@@ -1265,6 +1320,7 @@ static void *worker_thread(void *__tdata)
12651320
printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
12661321

12671322
bind_to_cpumask(orig_mask);
1323+
CPU_FREE(orig_mask);
12681324
}
12691325

12701326
if (details >= 3) {
@@ -1398,21 +1454,31 @@ static void init_thread_data(void)
13981454

13991455
for (t = 0; t < g->p.nr_tasks; t++) {
14001456
struct thread_data *td = g->threads + t;
1457+
size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus);
14011458
int cpu;
14021459

14031460
/* Allow all nodes by default: */
14041461
td->bind_node = NUMA_NO_NODE;
14051462

14061463
/* Allow all CPUs by default: */
1407-
CPU_ZERO(&td->bind_cpumask);
1464+
td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
1465+
BUG_ON(!td->bind_cpumask);
1466+
CPU_ZERO_S(cpuset_size, td->bind_cpumask);
14081467
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
1409-
CPU_SET(cpu, &td->bind_cpumask);
1468+
CPU_SET_S(cpu, cpuset_size, td->bind_cpumask);
14101469
}
14111470
}
14121471

14131472
static void deinit_thread_data(void)
14141473
{
14151474
ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
1475+
int t;
1476+
1477+
/* Free the bind_cpumask allocated for thread_data */
1478+
for (t = 0; t < g->p.nr_tasks; t++) {
1479+
struct thread_data *td = g->threads + t;
1480+
CPU_FREE(td->bind_cpumask);
1481+
}
14161482

14171483
free_data(g->threads, size);
14181484
}

tools/perf/builtin-record.c

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -989,8 +989,11 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
989989
struct mmap *overwrite_mmap = evlist->overwrite_mmap;
990990
struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
991991

992-
thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
993-
thread_data->mask->maps.nbits);
992+
if (cpu_map__is_dummy(cpus))
993+
thread_data->nr_mmaps = nr_mmaps;
994+
else
995+
thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
996+
thread_data->mask->maps.nbits);
994997
if (mmap) {
995998
thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
996999
if (!thread_data->maps)
@@ -1007,16 +1010,17 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
10071010
thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
10081011

10091012
for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1010-
if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
1013+
if (cpu_map__is_dummy(cpus) ||
1014+
test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
10111015
if (thread_data->maps) {
10121016
thread_data->maps[tm] = &mmap[m];
10131017
pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1014-
thread_data, cpus->map[m].cpu, tm, m);
1018+
thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
10151019
}
10161020
if (thread_data->overwrite_maps) {
10171021
thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
10181022
pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1019-
thread_data, cpus->map[m].cpu, tm, m);
1023+
thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
10201024
}
10211025
tm++;
10221026
}
@@ -3329,6 +3333,9 @@ static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_c
33293333
{
33303334
int c;
33313335

3336+
if (cpu_map__is_dummy(cpus))
3337+
return;
3338+
33323339
for (c = 0; c < cpus->nr; c++)
33333340
set_bit(cpus->map[c].cpu, mask->bits);
33343341
}
@@ -3680,6 +3687,11 @@ static int record__init_thread_masks(struct record *rec)
36803687
if (!record__threads_enabled(rec))
36813688
return record__init_thread_default_masks(rec, cpus);
36823689

3690+
if (cpu_map__is_dummy(cpus)) {
3691+
pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3692+
return -EINVAL;
3693+
}
3694+
36833695
switch (rec->opts.threads_spec) {
36843696
case THREAD_SPEC__CPU:
36853697
ret = record__init_thread_cpu_masks(rec, cpus);

0 commit comments

Comments
 (0)