Skip to content

Commit a2b4d0f

Browse files
committed
tools/power turbostat: Favor cpu# over core#
Turbostat collects statistics and outputs results in "topology order", which means it prioritizes the core# over the cpu#. The strategy is to minimize wakesups to a core -- which is important when measuring an idle system. But core order is problematic, because Linux core#'s are physical (within each package), and thus subject to APIC-id scrambling that may be done by the hardware or the BIOS. As a result users may be are faced with rows in a confusing order: sudo turbostat -q --show topology,Busy%,CPU%c6,UncMHz sleep 1 Core CPU Busy% CPU%c6 UncMHz - - 1.25 72.18 3400 0 4 7.74 0.00 1 5 1.77 88.59 2 6 0.48 96.73 3 7 0.21 98.34 4 8 0.14 96.85 5 9 0.26 97.55 6 10 0.44 97.24 7 11 0.12 96.18 8 0 5.41 0.31 3400 8 1 0.19 12 2 0.41 0.22 12 3 0.08 32 12 0.04 99.21 33 13 0.25 94.92 Abandon the legacy "core# topology order" in favor of simply ordering by cpu#, with a special case to handle HT siblings that may not have adjacent cpu#'s. sudo ./turbostat -q --show topology,Busy%,CPU%c6,UncMHz sleep 1 1.003001 sec Core CPU Busy% CPU%c6 UncMHz - - 1.38 80.55 1600 8 0 10.94 0.00 1600 8 1 0.53 12 2 2.90 0.45 12 3 0.11 0 4 1.96 91.20 1 5 0.97 96.40 2 6 0.24 94.72 3 7 0.31 98.01 4 8 0.20 98.20 5 9 0.62 96.00 6 10 0.06 98.15 7 11 0.12 99.31 32 12 0.04 99.07 33 13 0.27 95.09 The result is that cpu#'s now take precedence over core#'s. Signed-off-by: Len Brown <len.brown@intel.com>
1 parent 6be5c15 commit a2b4d0f

1 file changed

Lines changed: 69 additions & 54 deletions

File tree

tools/power/x86/turbostat/turbostat.c

Lines changed: 69 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -2187,20 +2187,6 @@ struct pkg_data {
21872187
#define ODD_COUNTERS odd.threads, odd.cores, odd.packages
21882188
#define EVEN_COUNTERS even.threads, even.cores, even.packages
21892189

2190-
#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \
2191-
((thread_base) + \
2192-
((pkg_no) * \
2193-
topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
2194-
((node_no) * topo.cores_per_node * topo.threads_per_core) + \
2195-
((core_no) * topo.threads_per_core) + \
2196-
(thread_no))
2197-
2198-
#define GET_CORE(core_base, core_no, node_no, pkg_no) \
2199-
((core_base) + \
2200-
((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \
2201-
((node_no) * topo.cores_per_node) + \
2202-
(core_no))
2203-
22042190
/*
22052191
* The accumulated sum of MSR is defined as a monotonic
22062192
* increasing MSR, it will be accumulated periodically,
@@ -2392,6 +2378,8 @@ struct platform_counters {
23922378
struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */
23932379
} platform_counters_odd, platform_counters_even;
23942380

2381+
#define MAX_HT_ID 3 /* support SMT-4 */
2382+
23952383
struct cpu_topology {
23962384
int cpu_id;
23972385
int core_id; /* unique within a package */
@@ -2401,7 +2389,7 @@ struct cpu_topology {
24012389
int physical_node_id;
24022390
int logical_node_id; /* 0-based count within the package */
24032391
int ht_id; /* unique within a core */
2404-
int ht_sibling_cpu_id;
2392+
int ht_sibling_cpu_id[MAX_HT_ID + 1];
24052393
int type;
24062394
cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
24072395
} *cpus;
@@ -2458,27 +2446,38 @@ int cpu_is_not_allowed(int cpu)
24582446
int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
24592447
struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
24602448
{
2461-
int retval, pkg_no, core_no, thread_no, node_no;
2449+
int cpu, retval;
24622450

24632451
retval = 0;
24642452

2465-
for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2466-
for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
2467-
for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
2468-
for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
2469-
struct thread_data *t;
2470-
struct core_data *c;
2453+
for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
2454+
struct thread_data *t;
2455+
struct core_data *c;
2456+
struct pkg_data *p;
2457+
2458+
int pkg_id = cpus[cpu].package_id;
2459+
2460+
if (cpu_is_not_allowed(cpu))
2461+
continue;
2462+
2463+
if (cpus[cpu].ht_id > 0) /* skip HT sibling */
2464+
continue;
24712465

2472-
t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
2466+
t = &thread_base[cpu];
2467+
c = &core_base[GLOBAL_CORE_ID(cpus[cpu].core_id, pkg_id)];
2468+
p = &pkg_base[pkg_id];
24732469

2474-
if (cpu_is_not_allowed(t->cpu_id))
2475-
continue;
2470+
retval |= func(t, c, p);
24762471

2477-
c = GET_CORE(core_base, core_no, node_no, pkg_no);
2472+
/* Handle HT sibling now */
2473+
int i;
24782474

2479-
retval |= func(t, c, &pkg_base[pkg_no]);
2480-
}
2481-
}
2475+
for (i = MAX_HT_ID; i > 0; --i) { /* ht_id 0 is self */
2476+
if (cpus[cpu].ht_sibling_cpu_id[i] <= 0)
2477+
continue;
2478+
t = &thread_base[cpus[cpu].ht_sibling_cpu_id[i]];
2479+
2480+
retval |= func(t, c, p);
24822481
}
24832482
}
24842483
return retval;
@@ -6168,7 +6167,7 @@ static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
61686167
return 0;
61696168
}
61706169

6171-
int get_thread_siblings(struct cpu_topology *thiscpu)
6170+
int set_thread_siblings(struct cpu_topology *thiscpu)
61726171
{
61736172
char path[80], character;
61746173
FILE *filep;
@@ -6206,8 +6205,11 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
62066205
if (sib_core == thiscpu->core_id) {
62076206
CPU_SET_S(so, size, thiscpu->put_ids);
62086207
if ((so != cpu) && (cpus[so].ht_id < 0)) {
6209-
cpus[so].ht_id = thread_id++;
6210-
cpus[cpu].ht_sibling_cpu_id = so;
6208+
cpus[so].ht_id = thread_id;
6209+
cpus[cpu].ht_sibling_cpu_id[thread_id] = so;
6210+
if (debug)
6211+
fprintf(stderr, "%s: cpu%d.ht_sibling_cpu_id[%d] = %d\n", __func__, cpu, thread_id, so);
6212+
thread_id += 1;
62116213
}
62126214
}
62136215
}
@@ -6229,30 +6231,40 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
62296231
struct core_data *core_base, struct pkg_data *pkg_base,
62306232
struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
62316233
{
6232-
int retval, pkg_no, node_no, core_no, thread_no;
6234+
int cpu, retval;
62336235

62346236
retval = 0;
62356237

6236-
for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
6237-
for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
6238-
for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
6239-
for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
6240-
struct thread_data *t, *t2;
6241-
struct core_data *c, *c2;
6238+
for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
6239+
struct thread_data *t, *t2;
6240+
struct core_data *c, *c2;
6241+
struct pkg_data *p, *p2;
62426242

6243-
t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
6243+
if (cpu_is_not_allowed(cpu))
6244+
continue;
62446245

6245-
if (cpu_is_not_allowed(t->cpu_id))
6246-
continue;
6246+
if (cpus[cpu].ht_id > 0) /* skip HT sibling */
6247+
continue;
62476248

6248-
t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
6249+
t = &thread_base[cpu];
6250+
t2 = &thread_base2[cpu];
6251+
c = &core_base[GLOBAL_CORE_ID(cpus[cpu].core_id, cpus[cpu].package_id)];
6252+
c2 = &core_base2[GLOBAL_CORE_ID(cpus[cpu].core_id, cpus[cpu].package_id)];
6253+
p = &pkg_base[cpus[cpu].package_id];
6254+
p2 = &pkg_base2[cpus[cpu].package_id];
62496255

6250-
c = GET_CORE(core_base, core_no, node_no, pkg_no);
6251-
c2 = GET_CORE(core_base2, core_no, node_no, pkg_no);
6256+
retval |= func(t, c, p, t2, c2, p2);
62526257

6253-
retval |= func(t, c, &pkg_base[pkg_no], t2, c2, &pkg_base2[pkg_no]);
6254-
}
6255-
}
6258+
/* Handle HT sibling now */
6259+
int i;
6260+
6261+
for (i = MAX_HT_ID; i > 0; --i) { /* ht_id 0 is self */
6262+
if (cpus[cpu].ht_sibling_cpu_id[i] <= 0)
6263+
continue;
6264+
t = &thread_base[cpus[cpu].ht_sibling_cpu_id[i]];
6265+
t2 = &thread_base2[cpus[cpu].ht_sibling_cpu_id[i]];
6266+
6267+
retval |= func(t, c, p, t2, c2, p2);
62566268
}
62576269
}
62586270
return retval;
@@ -6391,10 +6403,13 @@ int mark_cpu_present(int cpu)
63916403
return 0;
63926404
}
63936405

6394-
int init_ht_id(int cpu)
6406+
int clear_ht_id(int cpu)
63956407
{
6408+
int i;
6409+
63966410
cpus[cpu].ht_id = -1;
6397-
cpus[cpu].ht_sibling_cpu_id = -1;
6411+
for (i = 0; i <= MAX_HT_ID; ++i)
6412+
cpus[cpu].ht_sibling_cpu_id[i] = -1;
63986413
return 0;
63996414
}
64006415

@@ -9579,7 +9594,7 @@ void topology_probe(bool startup)
95799594
cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
95809595
CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
95819596

9582-
for_all_proc_cpus(init_ht_id);
9597+
for_all_proc_cpus(clear_ht_id);
95839598

95849599
for_all_proc_cpus(set_cpu_hybrid_type);
95859600

@@ -9624,7 +9639,7 @@ void topology_probe(bool startup)
96249639
max_core_id = cpus[i].core_id;
96259640

96269641
/* get thread information */
9627-
siblings = get_thread_siblings(&cpus[i]);
9642+
siblings = set_thread_siblings(&cpus[i]);
96289643
if (siblings > max_siblings)
96299644
max_siblings = siblings;
96309645
if (cpus[i].ht_id == 0)
@@ -9748,8 +9763,8 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
97489763
if (node_id < 0)
97499764
node_id = 0;
97509765

9751-
t = GET_THREAD(thread_base, cpus[cpu_id].ht_id, core_id, node_id, pkg_id);
9752-
c = GET_CORE(core_base, core_id, node_id, pkg_id);
9766+
t = &thread_base[cpu_id];
9767+
c = &core_base[GLOBAL_CORE_ID(core_id, pkg_id)];
97539768

97549769
t->cpu_id = cpu_id;
97559770
if (!cpu_is_not_allowed(cpu_id)) {

0 commit comments

Comments
 (0)