Skip to content

Commit f58faed

Browse files
athira-rajeevacmel
authored andcommitted
perf bench: Fix numa bench to fix usage of affinity for machines with #CPUs > 1K
The 'perf bench numa' testcase fails on systems with more than 1K CPUs. Testcase: perf bench numa mem -p 1 -t 3 -P 512 -s 100 -zZ0qcm --thp 1 Snippet of code: <<>> perf: bench/numa.c:302: bind_to_node: Assertion `!(ret)' failed. Aborted (core dumped) <<>> bind_to_node() uses "sched_getaffinity" to save the original cpumask and this call is returning EINVAL ((invalid argument). This happens because the default mask size in glibc is 1024. To overcome this 1024 CPUs mask size limitation of cpu_set_t, change the mask size using the CPU_*_S macros ie, use CPU_ALLOC to allocate cpumask, CPU_ALLOC_SIZE for size. Apart from fixing this for "orig_mask", apply same logic to "mask" as well which is used to setaffinity so that mask size is large enough to represent number of possible CPU's in the system. sched_getaffinity is used in one more place in perf numa bench. It is in "bind_to_cpu" function. Apply the same logic there also. Though currently no failure is reported from there, it is ideal to change getaffinity to work with such system configurations having CPU's more than default mask size supported by glibc. Also fix "sched_setaffinity" to use mask size which is large enough to represent number of possible CPU's in the system. Fixed all places where "bind_cpumask" which is part of "struct thread_data" is used such that bind_cpumask works in all configuration. Reported-by: Disha Goel <disgoel@linux.vnet.ibm.com> Signed-off-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Cc: Ian Rogers <irogers@google.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nageswara R Sastry <rnsastry@linux.ibm.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220412164059.42654-3-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent 8cb7a18 commit f58faed

1 file changed

Lines changed: 95 additions & 33 deletions

File tree

tools/perf/bench/numa.c

Lines changed: 95 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555

5656
struct thread_data {
5757
int curr_cpu;
58-
cpu_set_t bind_cpumask;
58+
cpu_set_t *bind_cpumask;
5959
int bind_node;
6060
u8 *process_data;
6161
int process_nr;
@@ -267,71 +267,115 @@ static bool node_has_cpus(int node)
267267
return ret;
268268
}
269269

270-
static cpu_set_t bind_to_cpu(int target_cpu)
270+
static cpu_set_t *bind_to_cpu(int target_cpu)
271271
{
272-
cpu_set_t orig_mask, mask;
273-
int ret;
272+
int nrcpus = numa_num_possible_cpus();
273+
cpu_set_t *orig_mask, *mask;
274+
size_t size;
274275

275-
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
276-
BUG_ON(ret);
276+
orig_mask = CPU_ALLOC(nrcpus);
277+
BUG_ON(!orig_mask);
278+
size = CPU_ALLOC_SIZE(nrcpus);
279+
CPU_ZERO_S(size, orig_mask);
280+
281+
if (sched_getaffinity(0, size, orig_mask))
282+
goto err_out;
283+
284+
mask = CPU_ALLOC(nrcpus);
285+
if (!mask)
286+
goto err_out;
277287

278-
CPU_ZERO(&mask);
288+
CPU_ZERO_S(size, mask);
279289

280290
if (target_cpu == -1) {
281291
int cpu;
282292

283293
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
284-
CPU_SET(cpu, &mask);
294+
CPU_SET_S(cpu, size, mask);
285295
} else {
286-
BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
287-
CPU_SET(target_cpu, &mask);
296+
if (target_cpu < 0 || target_cpu >= g->p.nr_cpus)
297+
goto err;
298+
299+
CPU_SET_S(target_cpu, size, mask);
288300
}
289301

290-
ret = sched_setaffinity(0, sizeof(mask), &mask);
291-
BUG_ON(ret);
302+
if (sched_setaffinity(0, size, mask))
303+
goto err;
292304

293305
return orig_mask;
306+
307+
err:
308+
CPU_FREE(mask);
309+
err_out:
310+
CPU_FREE(orig_mask);
311+
312+
/* BUG_ON due to failure in allocation of orig_mask/mask */
313+
BUG_ON(-1);
294314
}
295315

296-
static cpu_set_t bind_to_node(int target_node)
316+
static cpu_set_t *bind_to_node(int target_node)
297317
{
298-
cpu_set_t orig_mask, mask;
318+
int nrcpus = numa_num_possible_cpus();
319+
size_t size;
320+
cpu_set_t *orig_mask, *mask;
299321
int cpu;
300-
int ret;
301322

302-
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
303-
BUG_ON(ret);
323+
orig_mask = CPU_ALLOC(nrcpus);
324+
BUG_ON(!orig_mask);
325+
size = CPU_ALLOC_SIZE(nrcpus);
326+
CPU_ZERO_S(size, orig_mask);
327+
328+
if (sched_getaffinity(0, size, orig_mask))
329+
goto err_out;
330+
331+
mask = CPU_ALLOC(nrcpus);
332+
if (!mask)
333+
goto err_out;
304334

305-
CPU_ZERO(&mask);
335+
CPU_ZERO_S(size, mask);
306336

307337
if (target_node == NUMA_NO_NODE) {
308338
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
309-
CPU_SET(cpu, &mask);
339+
CPU_SET_S(cpu, size, mask);
310340
} else {
311341
struct bitmask *cpumask = numa_allocate_cpumask();
312342

313-
BUG_ON(!cpumask);
343+
if (!cpumask)
344+
goto err;
345+
314346
if (!numa_node_to_cpus(target_node, cpumask)) {
315347
for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
316348
if (numa_bitmask_isbitset(cpumask, cpu))
317-
CPU_SET(cpu, &mask);
349+
CPU_SET_S(cpu, size, mask);
318350
}
319351
}
320352
numa_free_cpumask(cpumask);
321353
}
322354

323-
ret = sched_setaffinity(0, sizeof(mask), &mask);
324-
BUG_ON(ret);
355+
if (sched_setaffinity(0, size, mask))
356+
goto err;
325357

326358
return orig_mask;
359+
360+
err:
361+
CPU_FREE(mask);
362+
err_out:
363+
CPU_FREE(orig_mask);
364+
365+
/* BUG_ON due to failure in allocation of orig_mask/mask */
366+
BUG_ON(-1);
327367
}
328368

329-
static void bind_to_cpumask(cpu_set_t mask)
369+
static void bind_to_cpumask(cpu_set_t *mask)
330370
{
331371
int ret;
372+
size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus());
332373

333-
ret = sched_setaffinity(0, sizeof(mask), &mask);
334-
BUG_ON(ret);
374+
ret = sched_setaffinity(0, size, mask);
375+
if (ret) {
376+
CPU_FREE(mask);
377+
BUG_ON(ret);
378+
}
335379
}
336380

337381
static void mempol_restore(void)
@@ -377,7 +421,7 @@ do { \
377421
static u8 *alloc_data(ssize_t bytes0, int map_flags,
378422
int init_zero, int init_cpu0, int thp, int init_random)
379423
{
380-
cpu_set_t orig_mask;
424+
cpu_set_t *orig_mask = NULL;
381425
ssize_t bytes;
382426
u8 *buf;
383427
int ret;
@@ -435,6 +479,7 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags,
435479
/* Restore affinity: */
436480
if (init_cpu0) {
437481
bind_to_cpumask(orig_mask);
482+
CPU_FREE(orig_mask);
438483
mempol_restore();
439484
}
440485

@@ -595,6 +640,7 @@ static int parse_setup_cpu_list(void)
595640
BUG_ON(bind_cpu_0 > bind_cpu_1);
596641

597642
for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
643+
size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus);
598644
int i;
599645

600646
for (i = 0; i < mul; i++) {
@@ -614,10 +660,15 @@ static int parse_setup_cpu_list(void)
614660
tprintf("%2d", bind_cpu);
615661
}
616662

617-
CPU_ZERO(&td->bind_cpumask);
663+
td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
664+
BUG_ON(!td->bind_cpumask);
665+
CPU_ZERO_S(size, td->bind_cpumask);
618666
for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
619-
BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
620-
CPU_SET(cpu, &td->bind_cpumask);
667+
if (cpu < 0 || cpu >= g->p.nr_cpus) {
668+
CPU_FREE(td->bind_cpumask);
669+
BUG_ON(-1);
670+
}
671+
CPU_SET_S(cpu, size, td->bind_cpumask);
621672
}
622673
t++;
623674
}
@@ -1245,7 +1296,7 @@ static void *worker_thread(void *__tdata)
12451296
* by migrating to CPU#0:
12461297
*/
12471298
if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
1248-
cpu_set_t orig_mask;
1299+
cpu_set_t *orig_mask;
12491300
int target_cpu;
12501301
int this_cpu;
12511302

@@ -1269,6 +1320,7 @@ static void *worker_thread(void *__tdata)
12691320
printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
12701321

12711322
bind_to_cpumask(orig_mask);
1323+
CPU_FREE(orig_mask);
12721324
}
12731325

12741326
if (details >= 3) {
@@ -1402,21 +1454,31 @@ static void init_thread_data(void)
14021454

14031455
for (t = 0; t < g->p.nr_tasks; t++) {
14041456
struct thread_data *td = g->threads + t;
1457+
size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus);
14051458
int cpu;
14061459

14071460
/* Allow all nodes by default: */
14081461
td->bind_node = NUMA_NO_NODE;
14091462

14101463
/* Allow all CPUs by default: */
1411-
CPU_ZERO(&td->bind_cpumask);
1464+
td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
1465+
BUG_ON(!td->bind_cpumask);
1466+
CPU_ZERO_S(cpuset_size, td->bind_cpumask);
14121467
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
1413-
CPU_SET(cpu, &td->bind_cpumask);
1468+
CPU_SET_S(cpu, cpuset_size, td->bind_cpumask);
14141469
}
14151470
}
14161471

14171472
static void deinit_thread_data(void)
14181473
{
14191474
ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
1475+
int t;
1476+
1477+
/* Free the bind_cpumask allocated for thread_data */
1478+
for (t = 0; t < g->p.nr_tasks; t++) {
1479+
struct thread_data *td = g->threads + t;
1480+
CPU_FREE(td->bind_cpumask);
1481+
}
14201482

14211483
free_data(g->threads, size);
14221484
}

0 commit comments

Comments
 (0)