@@ -13,27 +13,181 @@ extern gotoblas_t gotoblas_POWER10;
1313extern void openblas_warning (int verbose , const char * msg );
1414
1515static char * corename [] = {
16- "unknown" ,
17- "POWER6" ,
18- "POWER8" ,
19- "POWER9" ,
20- "POWER10"
16+ "unknown" ,
17+ "POWER6" ,
18+ "POWER8" ,
19+ "POWER9" ,
20+ "POWER10"
2121};
2222
2323#define NUM_CORETYPES 5
2424
2525char * gotoblas_corename (void ) {
26- if (gotoblas == & gotoblas_POWER6 ) return corename [1 ];
27- if (gotoblas == & gotoblas_POWER8 ) return corename [2 ];
26+ #ifndef C_PGI
27+ if (gotoblas == & gotoblas_POWER6 ) return corename [1 ];
28+ #endif
29+ if (gotoblas == & gotoblas_POWER8 ) return corename [2 ];
2830#if (!defined __GNUC__ ) || ( __GNUC__ >= 6 )
29- if (gotoblas == & gotoblas_POWER9 ) return corename [3 ];
31+ if (gotoblas == & gotoblas_POWER9 ) return corename [3 ];
3032#endif
3133#ifdef HAVE_P10_SUPPORT
32- if (gotoblas == & gotoblas_POWER10 ) return corename [4 ];
34+ if (gotoblas == & gotoblas_POWER10 ) return corename [4 ];
35+ #endif
36+ return corename [0 ];
37+ }
38+
39+ #if defined(__clang__ )
40+ static int __builtin_cpu_supports (char * arg )
41+ {
42+ return 0 ;
43+ }
44+ #endif
45+
46+ #if defined(C_PGI ) || defined(__clang__ )
47+ /*
48+ * NV HPC compilers do not yet implement __builtin_cpu_is().
49+ * Fake a version here for use in the CPU detection code below.
50+ *
51+ * Strategy here is to first check the CPU to see what it actually is,
52+ * and then test the input to see if what the CPU actually is matches
53+ * what was requested.
54+ */
55+
56+ #include <string.h>
57+
58+ /*
59+ * Define POWER processor version table.
60+ *
61+ * NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
62+ */
63+
64+ #define CPU_UNKNOWN 0
65+ #define CPU_POWER5 5
66+ #define CPU_POWER6 6
67+ #define CPU_POWER8 8
68+ #define CPU_POWER9 9
69+ #define CPU_POWER10 10
70+
71+ static struct {
72+ uint32_t pvr_mask ;
73+ uint32_t pvr_value ;
74+ const char * cpu_name ;
75+ uint32_t cpu_type ;
76+ } pvrPOWER [] = {
77+
78+ { /* POWER6 in P5+ mode; 2.04-compliant processor */
79+ .pvr_mask = 0xffffffff ,
80+ .pvr_value = 0x0f000001 ,
81+ .cpu_name = "POWER5+" ,
82+ .cpu_type = CPU_POWER5 ,
83+ },
84+
85+ { /* Power6 aka POWER6X*/
86+ .pvr_mask = 0xffff0000 ,
87+ .pvr_value = 0x003e0000 ,
88+ .cpu_name = "POWER6 (raw)" ,
89+ .cpu_type = CPU_POWER6 ,
90+ },
91+
92+ { /* Power7 */
93+ .pvr_mask = 0xffff0000 ,
94+ .pvr_value = 0x003f0000 ,
95+ .cpu_name = "POWER7 (raw)" ,
96+ .cpu_type = CPU_POWER6 ,
97+ },
98+
99+ { /* Power7+ */
100+ .pvr_mask = 0xffff0000 ,
101+ .pvr_value = 0x004A0000 ,
102+ .cpu_name = "POWER7+ (raw)" ,
103+ .cpu_type = CPU_POWER6 ,
104+ },
105+
106+ { /* Power8E */
107+ .pvr_mask = 0xffff0000 ,
108+ .pvr_value = 0x004b0000 ,
109+ .cpu_name = "POWER8E (raw)" ,
110+ .cpu_type = CPU_POWER8 ,
111+ },
112+
113+ { /* Power8NVL */
114+ .pvr_mask = 0xffff0000 ,
115+ .pvr_value = 0x004c0000 ,
116+ .cpu_name = "POWER8NVL (raw)" ,
117+ .cpu_type = CPU_POWER8 ,
118+ },
119+
120+ { /* Power8 */
121+ .pvr_mask = 0xffff0000 ,
122+ .pvr_value = 0x004d0000 ,
123+ .cpu_name = "POWER8 (raw)" ,
124+ .cpu_type = CPU_POWER8 ,
125+ },
126+
127+ { /* Power9 DD2.0 */
128+ .pvr_mask = 0xffffefff ,
129+ .pvr_value = 0x004e0200 ,
130+ .cpu_name = "POWER9 (raw)" ,
131+ .cpu_type = CPU_POWER9 ,
132+ },
133+
134+ { /* Power9 DD 2.1 */
135+ .pvr_mask = 0xffffefff ,
136+ .pvr_value = 0x004e0201 ,
137+ .cpu_name = "POWER9 (raw)" ,
138+ .cpu_type = CPU_POWER9 ,
139+ },
140+
141+ { /* Power9 DD2.2 or later */
142+ .pvr_mask = 0xffff0000 ,
143+ .pvr_value = 0x004e0000 ,
144+ .cpu_name = "POWER9 (raw)" ,
145+ .cpu_type = CPU_POWER9 ,
146+ },
147+
148+ { /* Power10 */
149+ .pvr_mask = 0xffff0000 ,
150+ .pvr_value = 0x00800000 ,
151+ .cpu_name = "POWER10 (raw)" ,
152+ .cpu_type = CPU_POWER10 ,
153+ },
154+
155+ { /* End of table, pvr_mask and pvr_value must be zero */
156+ .pvr_mask = 0x0 ,
157+ .pvr_value = 0x0 ,
158+ .cpu_name = "Unknown" ,
159+ .cpu_type = CPU_UNKNOWN ,
160+ },
161+ };
162+
163+ static int __builtin_cpu_is (const char * cpu ) {
164+ int i ;
165+ uint32_t pvr ;
166+ uint32_t cpu_type ;
167+
168+ asm("mfpvr %0" : "=r" (pvr ));
169+
170+ for (i = 0 ; i < sizeof pvrPOWER / sizeof * pvrPOWER ; ++ i ) {
171+ if ((pvr & pvrPOWER [i ].pvr_mask ) == pvrPOWER [i ].pvr_value ) {
172+ break ;
173+ }
174+ }
175+
176+ #if defined(DEBUG )
177+ printf ("%s: returning CPU=%s, cpu_type=%p\n" , __func__ ,
178+ pvrPOWER [i ].cpu_name , pvrPOWER [i ].cpu_type );
33179#endif
34- return corename [0 ];
180+ cpu_type = pvrPOWER [i ].cpu_type ;
181+
182+ if (!strcmp (cpu , "power8" ))
183+ return cpu_type == CPU_POWER8 ;
184+ if (!strcmp (cpu , "power9" ))
185+ return cpu_type == CPU_POWER9 ;
186+ return 0 ;
35187}
36188
189+ #endif /* C_PGI */
190+
37191#ifdef _AIX
38192#include <sys/systemcfg.h>
39193
@@ -90,98 +244,102 @@ static int __builtin_cpu_supports(char* arg)
90244
91245static gotoblas_t * get_coretype (void ) {
92246
93- if (__builtin_cpu_supports ("power6" ))
94- return & gotoblas_POWER6 ;
95- if (__builtin_cpu_supports ("power8" ))
96- return & gotoblas_POWER8 ;
247+ #ifndef C_PGI
248+ if (__builtin_cpu_is ("power6" ) || __builtin_cpu_is ("power6x" ))
249+ return & gotoblas_POWER6 ;
250+ #endif
251+ if (__builtin_cpu_is ("power8" ))
252+ return & gotoblas_POWER8 ;
97253#if (!defined __GNUC__ ) || ( __GNUC__ >= 6 )
98- if (__builtin_cpu_supports ("power9" ))
99- return & gotoblas_POWER9 ;
254+ if (__builtin_cpu_is ("power9" ))
255+ return & gotoblas_POWER9 ;
100256#endif
101257#ifdef HAVE_P10_SUPPORT
102258#ifdef _AIX
103- if (__builtin_cpu_supports ("power10" ))
259+ if (__builtin_cpu_supports ("power10" ))
104260#else
105- if (__builtin_cpu_supports ("arch_3_1" ) && __builtin_cpu_supports ("mma" ))
106- #endif
107- return & gotoblas_POWER10 ;
261+ if (__builtin_cpu_supports ("arch_3_1" ) && __builtin_cpu_supports ("mma" ))
108262#endif
109- /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
110- #if (!defined __GNUC__ ) || ( __GNUC__ < 11 ) || (__GNUC__ == 10 && __GNUC_MINOR__ < 2 )
111- if (__builtin_cpu_supports ("power10" ))
112- return & gotoblas_POWER9 ;
263+ return & gotoblas_POWER10 ;
113264#endif
114- return NULL ;
265+ /* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
266+ #if (!defined __GNUC__ ) || ( __GNUC__ >= 11 ) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2 )
267+ if (__builtin_cpu_is ("power10" ))
268+ return & gotoblas_POWER9 ;
269+ #endif
270+ return NULL ;
115271}
116272
117273static gotoblas_t * force_coretype (char * coretype ) {
118274
119- int i ;
120- int found = -1 ;
121- char message [128 ];
122-
123- for ( i = 0 ; i < NUM_CORETYPES ; i ++ )
124- {
125- if (!strncasecmp (coretype , corename [i ], 20 ))
126- {
127- found = i ;
128- break ;
129- }
130- }
275+ int i ;
276+ int found = -1 ;
277+ char message [128 ];
131278
132- switch (found )
133- {
134- case 1 : return (& gotoblas_POWER6 );
135- case 2 : return (& gotoblas_POWER8 );
279+ for ( i = 0 ; i < NUM_CORETYPES ; i ++ )
280+ {
281+ if (!strncasecmp (coretype , corename [i ], 20 ))
282+ {
283+ found = i ;
284+ break ;
285+ }
286+ }
287+
288+ switch (found )
289+ {
290+ #ifndef C_PGI
291+ case 1 : return (& gotoblas_POWER6 );
292+ #endif
293+ case 2 : return (& gotoblas_POWER8 );
136294#if (!defined __GNUC__ ) || ( __GNUC__ >= 6 )
137- case 3 : return (& gotoblas_POWER9 );
295+ case 3 : return (& gotoblas_POWER9 );
138296#endif
139297#ifdef HAVE_P10_SUPPORT
140- case 4 : return (& gotoblas_POWER10 );
298+ case 4 : return (& gotoblas_POWER10 );
141299#endif
142- default : return NULL ;
143- }
144- snprintf (message , 128 , "Core not found: %s\n" , coretype );
145- openblas_warning (1 , message );
300+ default : return NULL ;
301+ }
302+ snprintf (message , 128 , "Core not found: %s\n" , coretype );
303+ openblas_warning (1 , message );
146304}
147305
148306void gotoblas_dynamic_init (void ) {
149307
150- char coremsg [128 ];
151- char coren [22 ];
152- char * p ;
308+ char coremsg [128 ];
309+ char coren [22 ];
310+ char * p ;
153311
154312
155- if (gotoblas ) return ;
313+ if (gotoblas ) return ;
156314
157- p = getenv ("OPENBLAS_CORETYPE" );
158- if ( p )
159- {
160- gotoblas = force_coretype (p );
161- }
162- else
163- {
164- gotoblas = get_coretype ();
165- }
315+ p = getenv ("OPENBLAS_CORETYPE" );
316+ if ( p )
317+ {
318+ gotoblas = force_coretype (p );
319+ }
320+ else
321+ {
322+ gotoblas = get_coretype ();
323+ }
166324
167- if (gotoblas == NULL )
168- {
169- snprintf (coremsg , 128 , "Falling back to POWER8 core\n" );
170- openblas_warning (1 , coremsg );
171- gotoblas = & gotoblas_POWER8 ;
172- }
325+ if (gotoblas == NULL )
326+ {
327+ snprintf (coremsg , 128 , "Falling back to POWER8 core\n" );
328+ openblas_warning (1 , coremsg );
329+ gotoblas = & gotoblas_POWER8 ;
330+ }
173331
174- if (gotoblas && gotoblas -> init ) {
175- strncpy (coren ,gotoblas_corename (),20 );
176- sprintf (coremsg , "Core: %s\n" ,coren );
177- openblas_warning (2 , coremsg );
178- gotoblas -> init ();
179- } else {
180- openblas_warning (0 , "OpenBLAS : Architecture Initialization failed. No initialization function found.\n" );
181- exit (1 );
182- }
332+ if (gotoblas && gotoblas -> init ) {
333+ strncpy (coren ,gotoblas_corename (),20 );
334+ sprintf (coremsg , "Core: %s\n" ,coren );
335+ openblas_warning (2 , coremsg );
336+ gotoblas -> init ();
337+ } else {
338+ openblas_warning (0 , "OpenBLAS : Architecture Initialization failed. No initialization function found.\n" );
339+ exit (1 );
340+ }
183341}
184342
185343void gotoblas_dynamic_quit (void ) {
186- gotoblas = NULL ;
344+ gotoblas = NULL ;
187345}
0 commit comments