Skip to content

Commit 1021074

Browse files
author
Chip-Kerchner
committed
Revert PGI changes.
1 parent a922a07 commit 1021074

1 file changed

Lines changed: 234 additions & 76 deletions

File tree

driver/others/dynamic_power.c

Lines changed: 234 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -13,27 +13,181 @@ extern gotoblas_t gotoblas_POWER10;
1313
extern void openblas_warning(int verbose, const char *msg);
1414

1515
static char *corename[] = {
16-
"unknown",
17-
"POWER6",
18-
"POWER8",
19-
"POWER9",
20-
"POWER10"
16+
"unknown",
17+
"POWER6",
18+
"POWER8",
19+
"POWER9",
20+
"POWER10"
2121
};
2222

2323
#define NUM_CORETYPES 5
2424

2525
char *gotoblas_corename(void) {
26-
if (gotoblas == &gotoblas_POWER6) return corename[1];
27-
if (gotoblas == &gotoblas_POWER8) return corename[2];
26+
#ifndef C_PGI
27+
if (gotoblas == &gotoblas_POWER6) return corename[1];
28+
#endif
29+
if (gotoblas == &gotoblas_POWER8) return corename[2];
2830
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
29-
if (gotoblas == &gotoblas_POWER9) return corename[3];
31+
if (gotoblas == &gotoblas_POWER9) return corename[3];
3032
#endif
3133
#ifdef HAVE_P10_SUPPORT
32-
if (gotoblas == &gotoblas_POWER10) return corename[4];
34+
if (gotoblas == &gotoblas_POWER10) return corename[4];
35+
#endif
36+
return corename[0];
37+
}
38+
39+
#if defined(__clang__)
40+
static int __builtin_cpu_supports(char* arg)
41+
{
42+
return 0;
43+
}
44+
#endif
45+
46+
#if defined(C_PGI) || defined(__clang__)
47+
/*
48+
* NV HPC compilers do not yet implement __builtin_cpu_is().
49+
* Fake a version here for use in the CPU detection code below.
50+
*
51+
* Strategy here is to first check the CPU to see what it actually is,
52+
* and then test the input to see if what the CPU actually is matches
53+
* what was requested.
54+
*/
55+
56+
#include <string.h>
57+
58+
/*
59+
* Define POWER processor version table.
60+
*
61+
* NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
62+
*/
63+
64+
#define CPU_UNKNOWN 0
65+
#define CPU_POWER5 5
66+
#define CPU_POWER6 6
67+
#define CPU_POWER8 8
68+
#define CPU_POWER9 9
69+
#define CPU_POWER10 10
70+
71+
static struct {
72+
uint32_t pvr_mask;
73+
uint32_t pvr_value;
74+
const char* cpu_name;
75+
uint32_t cpu_type;
76+
} pvrPOWER [] = {
77+
78+
{ /* POWER6 in P5+ mode; 2.04-compliant processor */
79+
.pvr_mask = 0xffffffff,
80+
.pvr_value = 0x0f000001,
81+
.cpu_name = "POWER5+",
82+
.cpu_type = CPU_POWER5,
83+
},
84+
85+
{ /* Power6 aka POWER6X*/
86+
.pvr_mask = 0xffff0000,
87+
.pvr_value = 0x003e0000,
88+
.cpu_name = "POWER6 (raw)",
89+
.cpu_type = CPU_POWER6,
90+
},
91+
92+
{ /* Power7 */
93+
.pvr_mask = 0xffff0000,
94+
.pvr_value = 0x003f0000,
95+
.cpu_name = "POWER7 (raw)",
96+
.cpu_type = CPU_POWER6,
97+
},
98+
99+
{ /* Power7+ */
100+
.pvr_mask = 0xffff0000,
101+
.pvr_value = 0x004A0000,
102+
.cpu_name = "POWER7+ (raw)",
103+
.cpu_type = CPU_POWER6,
104+
},
105+
106+
{ /* Power8E */
107+
.pvr_mask = 0xffff0000,
108+
.pvr_value = 0x004b0000,
109+
.cpu_name = "POWER8E (raw)",
110+
.cpu_type = CPU_POWER8,
111+
},
112+
113+
{ /* Power8NVL */
114+
.pvr_mask = 0xffff0000,
115+
.pvr_value = 0x004c0000,
116+
.cpu_name = "POWER8NVL (raw)",
117+
.cpu_type = CPU_POWER8,
118+
},
119+
120+
{ /* Power8 */
121+
.pvr_mask = 0xffff0000,
122+
.pvr_value = 0x004d0000,
123+
.cpu_name = "POWER8 (raw)",
124+
.cpu_type = CPU_POWER8,
125+
},
126+
127+
{ /* Power9 DD2.0 */
128+
.pvr_mask = 0xffffefff,
129+
.pvr_value = 0x004e0200,
130+
.cpu_name = "POWER9 (raw)",
131+
.cpu_type = CPU_POWER9,
132+
},
133+
134+
{ /* Power9 DD 2.1 */
135+
.pvr_mask = 0xffffefff,
136+
.pvr_value = 0x004e0201,
137+
.cpu_name = "POWER9 (raw)",
138+
.cpu_type = CPU_POWER9,
139+
},
140+
141+
{ /* Power9 DD2.2 or later */
142+
.pvr_mask = 0xffff0000,
143+
.pvr_value = 0x004e0000,
144+
.cpu_name = "POWER9 (raw)",
145+
.cpu_type = CPU_POWER9,
146+
},
147+
148+
{ /* Power10 */
149+
.pvr_mask = 0xffff0000,
150+
.pvr_value = 0x00800000,
151+
.cpu_name = "POWER10 (raw)",
152+
.cpu_type = CPU_POWER10,
153+
},
154+
155+
{ /* End of table, pvr_mask and pvr_value must be zero */
156+
.pvr_mask = 0x0,
157+
.pvr_value = 0x0,
158+
.cpu_name = "Unknown",
159+
.cpu_type = CPU_UNKNOWN,
160+
},
161+
};
162+
163+
static int __builtin_cpu_is(const char *cpu) {
164+
int i;
165+
uint32_t pvr;
166+
uint32_t cpu_type;
167+
168+
asm("mfpvr %0" : "=r"(pvr));
169+
170+
for (i = 0 ; i < sizeof pvrPOWER / sizeof *pvrPOWER ; ++i) {
171+
if ((pvr & pvrPOWER[i].pvr_mask) == pvrPOWER[i].pvr_value) {
172+
break;
173+
}
174+
}
175+
176+
#if defined(DEBUG)
177+
printf("%s: returning CPU=%s, cpu_type=%p\n", __func__,
178+
pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
33179
#endif
34-
return corename[0];
180+
cpu_type = pvrPOWER[i].cpu_type;
181+
182+
if (!strcmp(cpu, "power8"))
183+
return cpu_type == CPU_POWER8;
184+
if (!strcmp(cpu, "power9"))
185+
return cpu_type == CPU_POWER9;
186+
return 0;
35187
}
36188

189+
#endif /* C_PGI */
190+
37191
#ifdef _AIX
38192
#include <sys/systemcfg.h>
39193

@@ -90,98 +244,102 @@ static int __builtin_cpu_supports(char* arg)
90244

91245
static gotoblas_t *get_coretype(void) {
92246

93-
if (__builtin_cpu_supports("power6"))
94-
return &gotoblas_POWER6;
95-
if (__builtin_cpu_supports("power8"))
96-
return &gotoblas_POWER8;
247+
#ifndef C_PGI
248+
if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x"))
249+
return &gotoblas_POWER6;
250+
#endif
251+
if (__builtin_cpu_is("power8"))
252+
return &gotoblas_POWER8;
97253
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
98-
if (__builtin_cpu_supports("power9"))
99-
return &gotoblas_POWER9;
254+
if (__builtin_cpu_is("power9"))
255+
return &gotoblas_POWER9;
100256
#endif
101257
#ifdef HAVE_P10_SUPPORT
102258
#ifdef _AIX
103-
if (__builtin_cpu_supports("power10"))
259+
if (__builtin_cpu_supports("power10"))
104260
#else
105-
if (__builtin_cpu_supports("arch_3_1") && __builtin_cpu_supports("mma"))
106-
#endif
107-
return &gotoblas_POWER10;
261+
if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"))
108262
#endif
109-
/* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
110-
#if (!defined __GNUC__) || ( __GNUC__ < 11) || (__GNUC__ == 10 && __GNUC_MINOR__ < 2)
111-
if (__builtin_cpu_supports("power10"))
112-
return &gotoblas_POWER9;
263+
return &gotoblas_POWER10;
113264
#endif
114-
return NULL;
265+
/* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
266+
#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
267+
if (__builtin_cpu_is("power10"))
268+
return &gotoblas_POWER9;
269+
#endif
270+
return NULL;
115271
}
116272

117273
static gotoblas_t *force_coretype(char * coretype) {
118274

119-
int i ;
120-
int found = -1;
121-
char message[128];
122-
123-
for ( i = 0 ; i < NUM_CORETYPES; i++)
124-
{
125-
if (!strncasecmp(coretype, corename[i], 20))
126-
{
127-
found = i;
128-
break;
129-
}
130-
}
275+
int i ;
276+
int found = -1;
277+
char message[128];
131278

132-
switch (found)
133-
{
134-
case 1: return (&gotoblas_POWER6);
135-
case 2: return (&gotoblas_POWER8);
279+
for ( i = 0 ; i < NUM_CORETYPES; i++)
280+
{
281+
if (!strncasecmp(coretype, corename[i], 20))
282+
{
283+
found = i;
284+
break;
285+
}
286+
}
287+
288+
switch (found)
289+
{
290+
#ifndef C_PGI
291+
case 1: return (&gotoblas_POWER6);
292+
#endif
293+
case 2: return (&gotoblas_POWER8);
136294
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
137-
case 3: return (&gotoblas_POWER9);
295+
case 3: return (&gotoblas_POWER9);
138296
#endif
139297
#ifdef HAVE_P10_SUPPORT
140-
case 4: return (&gotoblas_POWER10);
298+
case 4: return (&gotoblas_POWER10);
141299
#endif
142-
default: return NULL;
143-
}
144-
snprintf(message, 128, "Core not found: %s\n", coretype);
145-
openblas_warning(1, message);
300+
default: return NULL;
301+
}
302+
snprintf(message, 128, "Core not found: %s\n", coretype);
303+
openblas_warning(1, message);
146304
}
147305

148306
void gotoblas_dynamic_init(void) {
149307

150-
char coremsg[128];
151-
char coren[22];
152-
char *p;
308+
char coremsg[128];
309+
char coren[22];
310+
char *p;
153311

154312

155-
if (gotoblas) return;
313+
if (gotoblas) return;
156314

157-
p = getenv("OPENBLAS_CORETYPE");
158-
if ( p )
159-
{
160-
gotoblas = force_coretype(p);
161-
}
162-
else
163-
{
164-
gotoblas = get_coretype();
165-
}
315+
p = getenv("OPENBLAS_CORETYPE");
316+
if ( p )
317+
{
318+
gotoblas = force_coretype(p);
319+
}
320+
else
321+
{
322+
gotoblas = get_coretype();
323+
}
166324

167-
if (gotoblas == NULL)
168-
{
169-
snprintf(coremsg, 128, "Falling back to POWER8 core\n");
170-
openblas_warning(1, coremsg);
171-
gotoblas = &gotoblas_POWER8;
172-
}
325+
if (gotoblas == NULL)
326+
{
327+
snprintf(coremsg, 128, "Falling back to POWER8 core\n");
328+
openblas_warning(1, coremsg);
329+
gotoblas = &gotoblas_POWER8;
330+
}
173331

174-
if (gotoblas && gotoblas -> init) {
175-
strncpy(coren,gotoblas_corename(),20);
176-
sprintf(coremsg, "Core: %s\n",coren);
177-
openblas_warning(2, coremsg);
178-
gotoblas -> init();
179-
} else {
180-
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
181-
exit(1);
182-
}
332+
if (gotoblas && gotoblas -> init) {
333+
strncpy(coren,gotoblas_corename(),20);
334+
sprintf(coremsg, "Core: %s\n",coren);
335+
openblas_warning(2, coremsg);
336+
gotoblas -> init();
337+
} else {
338+
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
339+
exit(1);
340+
}
183341
}
184342

185343
void gotoblas_dynamic_quit(void) {
186-
gotoblas = NULL;
344+
gotoblas = NULL;
187345
}

0 commit comments

Comments
 (0)