@@ -2187,20 +2187,6 @@ struct pkg_data {
21872187#define ODD_COUNTERS odd.threads, odd.cores, odd.packages
21882188#define EVEN_COUNTERS even.threads, even.cores, even.packages
21892189
2190- #define GET_THREAD (thread_base , thread_no , core_no , node_no , pkg_no ) \
2191- ((thread_base) + \
2192- ((pkg_no) * \
2193- topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
2194- ((node_no) * topo.cores_per_node * topo.threads_per_core) + \
2195- ((core_no) * topo.threads_per_core) + \
2196- (thread_no))
2197-
2198- #define GET_CORE (core_base , core_no , node_no , pkg_no ) \
2199- ((core_base) + \
2200- ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \
2201- ((node_no) * topo.cores_per_node) + \
2202- (core_no))
2203-
22042190/*
22052191 * The accumulated sum of MSR is defined as a monotonic
22062192 * increasing MSR, it will be accumulated periodically,
@@ -2392,6 +2378,8 @@ struct platform_counters {
23922378 struct rapl_counter energy_psys ; /* MSR_PLATFORM_ENERGY_STATUS */
23932379} platform_counters_odd , platform_counters_even ;
23942380
2381+ #define MAX_HT_ID 3 /* support SMT-4 */
2382+
23952383struct cpu_topology {
23962384 int cpu_id ;
23972385 int core_id ; /* unique within a package */
@@ -2401,7 +2389,7 @@ struct cpu_topology {
24012389 int physical_node_id ;
24022390 int logical_node_id ; /* 0-based count within the package */
24032391 int ht_id ; /* unique within a core */
2404- int ht_sibling_cpu_id ;
2392+ int ht_sibling_cpu_id [ MAX_HT_ID + 1 ] ;
24052393 int type ;
24062394 cpu_set_t * put_ids ; /* Processing Unit/Thread IDs */
24072395} * cpus ;
@@ -2458,27 +2446,38 @@ int cpu_is_not_allowed(int cpu)
24582446int for_all_cpus (int (func ) (struct thread_data * , struct core_data * , struct pkg_data * ),
24592447 struct thread_data * thread_base , struct core_data * core_base , struct pkg_data * pkg_base )
24602448{
2461- int retval , pkg_no , core_no , thread_no , node_no ;
2449+ int cpu , retval ;
24622450
24632451 retval = 0 ;
24642452
2465- for (pkg_no = 0 ; pkg_no < topo .num_packages ; ++ pkg_no ) {
2466- for (node_no = 0 ; node_no < topo .nodes_per_pkg ; node_no ++ ) {
2467- for (core_no = 0 ; core_no < topo .cores_per_node ; ++ core_no ) {
2468- for (thread_no = 0 ; thread_no < topo .threads_per_core ; ++ thread_no ) {
2469- struct thread_data * t ;
2470- struct core_data * c ;
2453+ for (cpu = 0 ; cpu <= topo .max_cpu_num ; ++ cpu ) {
2454+ struct thread_data * t ;
2455+ struct core_data * c ;
2456+ struct pkg_data * p ;
2457+
2458+ int pkg_id = cpus [cpu ].package_id ;
2459+
2460+ if (cpu_is_not_allowed (cpu ))
2461+ continue ;
2462+
2463+ if (cpus [cpu ].ht_id > 0 ) /* skip HT sibling */
2464+ continue ;
24712465
2472- t = GET_THREAD (thread_base , thread_no , core_no , node_no , pkg_no );
2466+ t = & thread_base [cpu ];
2467+ c = & core_base [GLOBAL_CORE_ID (cpus [cpu ].core_id , pkg_id )];
2468+ p = & pkg_base [pkg_id ];
24732469
2474- if (cpu_is_not_allowed (t -> cpu_id ))
2475- continue ;
2470+ retval |= func (t , c , p );
24762471
2477- c = GET_CORE (core_base , core_no , node_no , pkg_no );
2472+ /* Handle HT sibling now */
2473+ int i ;
24782474
2479- retval |= func (t , c , & pkg_base [pkg_no ]);
2480- }
2481- }
2475+ for (i = MAX_HT_ID ; i > 0 ; -- i ) { /* ht_id 0 is self */
2476+ if (cpus [cpu ].ht_sibling_cpu_id [i ] <= 0 )
2477+ continue ;
2478+ t = & thread_base [cpus [cpu ].ht_sibling_cpu_id [i ]];
2479+
2480+ retval |= func (t , c , p );
24822481 }
24832482 }
24842483 return retval ;
@@ -6168,7 +6167,7 @@ static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
61686167 return 0 ;
61696168}
61706169
6171- int get_thread_siblings (struct cpu_topology * thiscpu )
6170+ int set_thread_siblings (struct cpu_topology * thiscpu )
61726171{
61736172 char path [80 ], character ;
61746173 FILE * filep ;
@@ -6206,8 +6205,11 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
62066205 if (sib_core == thiscpu -> core_id ) {
62076206 CPU_SET_S (so , size , thiscpu -> put_ids );
62086207 if ((so != cpu ) && (cpus [so ].ht_id < 0 )) {
6209- cpus [so ].ht_id = thread_id ++ ;
6210- cpus [cpu ].ht_sibling_cpu_id = so ;
6208+ cpus [so ].ht_id = thread_id ;
6209+ cpus [cpu ].ht_sibling_cpu_id [thread_id ] = so ;
6210+ if (debug )
6211+ fprintf (stderr , "%s: cpu%d.ht_sibling_cpu_id[%d] = %d\n" , __func__ , cpu , thread_id , so );
6212+ thread_id += 1 ;
62116213 }
62126214 }
62136215 }
@@ -6229,30 +6231,40 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
62296231 struct core_data * core_base , struct pkg_data * pkg_base ,
62306232 struct thread_data * thread_base2 , struct core_data * core_base2 , struct pkg_data * pkg_base2 )
62316233{
6232- int retval , pkg_no , node_no , core_no , thread_no ;
6234+ int cpu , retval ;
62336235
62346236 retval = 0 ;
62356237
6236- for (pkg_no = 0 ; pkg_no < topo .num_packages ; ++ pkg_no ) {
6237- for (node_no = 0 ; node_no < topo .nodes_per_pkg ; ++ node_no ) {
6238- for (core_no = 0 ; core_no < topo .cores_per_node ; ++ core_no ) {
6239- for (thread_no = 0 ; thread_no < topo .threads_per_core ; ++ thread_no ) {
6240- struct thread_data * t , * t2 ;
6241- struct core_data * c , * c2 ;
6238+ for (cpu = 0 ; cpu <= topo .max_cpu_num ; ++ cpu ) {
6239+ struct thread_data * t , * t2 ;
6240+ struct core_data * c , * c2 ;
6241+ struct pkg_data * p , * p2 ;
62426242
6243- t = GET_THREAD (thread_base , thread_no , core_no , node_no , pkg_no );
6243+ if (cpu_is_not_allowed (cpu ))
6244+ continue ;
62446245
6245- if (cpu_is_not_allowed ( t -> cpu_id ))
6246- continue ;
6246+ if (cpus [ cpu ]. ht_id > 0 ) /* skip HT sibling */
6247+ continue ;
62476248
6248- t2 = GET_THREAD (thread_base2 , thread_no , core_no , node_no , pkg_no );
6249+ t = & thread_base [cpu ];
6250+ t2 = & thread_base2 [cpu ];
6251+ c = & core_base [GLOBAL_CORE_ID (cpus [cpu ].core_id , cpus [cpu ].package_id )];
6252+ c2 = & core_base2 [GLOBAL_CORE_ID (cpus [cpu ].core_id , cpus [cpu ].package_id )];
6253+ p = & pkg_base [cpus [cpu ].package_id ];
6254+ p2 = & pkg_base2 [cpus [cpu ].package_id ];
62496255
6250- c = GET_CORE (core_base , core_no , node_no , pkg_no );
6251- c2 = GET_CORE (core_base2 , core_no , node_no , pkg_no );
6256+ retval |= func (t , c , p , t2 , c2 , p2 );
62526257
6253- retval |= func (t , c , & pkg_base [pkg_no ], t2 , c2 , & pkg_base2 [pkg_no ]);
6254- }
6255- }
6258+ /* Handle HT sibling now */
6259+ int i ;
6260+
6261+ for (i = MAX_HT_ID ; i > 0 ; -- i ) { /* ht_id 0 is self */
6262+ if (cpus [cpu ].ht_sibling_cpu_id [i ] <= 0 )
6263+ continue ;
6264+ t = & thread_base [cpus [cpu ].ht_sibling_cpu_id [i ]];
6265+ t2 = & thread_base2 [cpus [cpu ].ht_sibling_cpu_id [i ]];
6266+
6267+ retval |= func (t , c , p , t2 , c2 , p2 );
62566268 }
62576269 }
62586270 return retval ;
@@ -6391,10 +6403,13 @@ int mark_cpu_present(int cpu)
63916403 return 0 ;
63926404}
63936405
6394- int init_ht_id (int cpu )
6406+ int clear_ht_id (int cpu )
63956407{
6408+ int i ;
6409+
63966410 cpus [cpu ].ht_id = -1 ;
6397- cpus [cpu ].ht_sibling_cpu_id = -1 ;
6411+ for (i = 0 ; i <= MAX_HT_ID ; ++ i )
6412+ cpus [cpu ].ht_sibling_cpu_id [i ] = -1 ;
63986413 return 0 ;
63996414}
64006415
@@ -9579,7 +9594,7 @@ void topology_probe(bool startup)
95799594 cpu_affinity_setsize = CPU_ALLOC_SIZE ((topo .max_cpu_num + 1 ));
95809595 CPU_ZERO_S (cpu_affinity_setsize , cpu_affinity_set );
95819596
9582- for_all_proc_cpus (init_ht_id );
9597+ for_all_proc_cpus (clear_ht_id );
95839598
95849599 for_all_proc_cpus (set_cpu_hybrid_type );
95859600
@@ -9624,7 +9639,7 @@ void topology_probe(bool startup)
96249639 max_core_id = cpus [i ].core_id ;
96259640
96269641 /* get thread information */
9627- siblings = get_thread_siblings (& cpus [i ]);
9642+ siblings = set_thread_siblings (& cpus [i ]);
96289643 if (siblings > max_siblings )
96299644 max_siblings = siblings ;
96309645 if (cpus [i ].ht_id == 0 )
@@ -9748,8 +9763,8 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
97489763 if (node_id < 0 )
97499764 node_id = 0 ;
97509765
9751- t = GET_THREAD ( thread_base , cpus [cpu_id ]. ht_id , core_id , node_id , pkg_id ) ;
9752- c = GET_CORE ( core_base , core_id , node_id , pkg_id );
9766+ t = & thread_base [cpu_id ];
9767+ c = & core_base [ GLOBAL_CORE_ID ( core_id , pkg_id )] ;
97539768
97549769 t -> cpu_id = cpu_id ;
97559770 if (!cpu_is_not_allowed (cpu_id )) {
0 commit comments