Skip to content

Commit a9aeb67

Browse files
authored
Merge pull request #2465 from AGSaidi/neoverse-n1
Add Neoverse-N1 core
2 parents 430ee31 + c623a96 commit a9aeb67

10 files changed

Lines changed: 312 additions & 4 deletions

File tree

Makefile.arm64

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,23 @@ CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
2424
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
2525
endif
2626

27+
# Use a72 tunings because Neoverse-N1 is only available
28+
# in GCC>=9
29+
ifeq ($(CORE), NEOVERSEN1)
30+
ifeq ($(GCCVERSIONGTEQ7), 1)
31+
ifeq ($(GCCVERSIONGTEQ9), 1)
32+
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
33+
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
34+
else
35+
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
36+
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
37+
endif
38+
else
39+
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
40+
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
41+
endif
42+
endif
43+
2744
ifeq ($(CORE), THUNDERX)
2845
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
2946
FCOMMON_OPT += -march=armv8-a -mtune=thunderx

Makefile.system

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ ifeq ($(C_COMPILER), GCC)
328328
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
329329
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
330330
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
331+
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
331332
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
332333
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
333334
ifeq ($(GCCVERSIONGT4), 1)
@@ -554,6 +555,7 @@ DYNAMIC_CORE += CORTEXA53
554555
DYNAMIC_CORE += CORTEXA57
555556
DYNAMIC_CORE += CORTEXA72
556557
DYNAMIC_CORE += CORTEXA73
558+
DYNAMIC_CORE += NEOVERSEN1
557559
DYNAMIC_CORE += FALKOR
558560
DYNAMIC_CORE += THUNDERX
559561
DYNAMIC_CORE += THUNDERX2T99

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ CORTEXA53
8888
CORTEXA57
8989
CORTEXA72
9090
CORTEXA73
91+
NEOVERSEN1
9192
FALKOR
9293
THUNDERX
9394
THUNDERX2T99

cmake/arch.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ endif ()
4545

4646
if (DYNAMIC_ARCH)
4747
if (ARM64)
48-
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180)
48+
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1)
4949
endif ()
5050

5151
if (POWER)

cmake/prebuild.cmake

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,33 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
229229
set(ZGEMM_UNROLL_M 4)
230230
set(ZGEMM_UNROLL_N 4)
231231
set(SYMV_P 16)
232+
elseif ("${TCORE}" STREQUAL "NEOVERSEN1")
233+
file(APPEND ${TARGET_CONF_TEMP}
234+
"#define L1_CODE_SIZE\t65536\n"
235+
"#define L1_CODE_LINESIZE\t64\n"
236+
"#define L1_CODE_ASSOCIATIVE\t4\n"
237+
"#define L1_DATA_SIZE\t65536\n"
238+
"#define L1_DATA_LINESIZE\t64\n"
239+
"#define L1_DATA_ASSOCIATIVE\t2\n"
240+
"#define L2_SIZE\t1048576\n\n"
241+
"#define L2_LINESIZE\t64\n"
242+
"#define L2_ASSOCIATIVE\t16\n"
243+
"#define DTB_DEFAULT_ENTRIES\t64\n"
244+
"#define DTB_SIZE\t4096\n"
245+
"#define HAVE_VFPV4\n"
246+
"#define HAVE_VFPV3\n"
247+
"#define HAVE_VFP\n"
248+
"#define HAVE_NEON\n"
249+
"#define ARMV8\n")
250+
set(SGEMM_UNROLL_M 16)
251+
set(SGEMM_UNROLL_N 4)
252+
set(DGEMM_UNROLL_M 8)
253+
set(DGEMM_UNROLL_N 4)
254+
set(CGEMM_UNROLL_M 8)
255+
set(CGEMM_UNROLL_N 4)
256+
set(ZGEMM_UNROLL_M 4)
257+
set(ZGEMM_UNROLL_N 4)
258+
set(SYMV_P 16)
232259
elseif ("${TCORE}" STREQUAL "FALKOR")
233260
file(APPEND ${TARGET_CONF_TEMP}
234261
"#define L1_CODE_SIZE\t65536\n"

cpuid_arm64.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#define CPU_CORTEXA57 3
3535
#define CPU_CORTEXA72 4
3636
#define CPU_CORTEXA73 5
37+
#define CPU_NEOVERSEN1 11
3738
// Qualcomm
3839
#define CPU_FALKOR 6
3940
// Cavium
@@ -55,7 +56,8 @@ static char *cpuname[] = {
5556
"THUNDERX",
5657
"THUNDERX2T99",
5758
"TSV110",
58-
"EMAG8180"
59+
"EMAG8180",
60+
"NEOVERSEN1"
5961
};
6062

6163
static char *cpuname_lower[] = {
@@ -69,7 +71,8 @@ static char *cpuname_lower[] = {
6971
"thunderx",
7072
"thunderx2t99",
7173
"tsv110",
72-
"emag8180"
74+
"emag8180",
75+
"neoversen1"
7376
};
7477

7578
int get_feature(char *search)
@@ -144,6 +147,8 @@ int detect(void)
144147
return CPU_CORTEXA72;
145148
else if (strstr(cpu_part, "0xd09"))
146149
return CPU_CORTEXA73;
150+
else if (strstr(cpu_part, "0xd0c"))
151+
return CPU_NEOVERSEN1;
147152
}
148153
// Qualcomm
149154
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
@@ -285,6 +290,20 @@ void get_cpuconfig(void)
285290
printf("#define DTB_DEFAULT_ENTRIES 64\n");
286291
printf("#define DTB_SIZE 4096\n");
287292
break;
293+
case CPU_NEOVERSEN1:
294+
printf("#define %s\n", cpuname[d]);
295+
printf("#define L1_CODE_SIZE 65536\n");
296+
printf("#define L1_CODE_LINESIZE 64\n");
297+
printf("#define L1_CODE_ASSOCIATIVE 4\n");
298+
printf("#define L1_DATA_SIZE 65536\n");
299+
printf("#define L1_DATA_LINESIZE 64\n");
300+
printf("#define L1_DATA_ASSOCIATIVE 4\n");
301+
printf("#define L2_SIZE 1048576\n");
302+
printf("#define L2_LINESIZE 64\n");
303+
printf("#define L2_ASSOCIATIVE 16\n");
304+
printf("#define DTB_DEFAULT_ENTRIES 64\n");
305+
printf("#define DTB_SIZE 4096\n");
306+
break;
288307

289308
case CPU_FALKOR:
290309
printf("#define FALKOR\n");

driver/others/dynamic_arm64.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,11 @@ extern gotoblas_t gotoblas_THUNDERX;
5252
extern gotoblas_t gotoblas_THUNDERX2T99;
5353
extern gotoblas_t gotoblas_TSV110;
5454
extern gotoblas_t gotoblas_EMAG8180;
55+
extern gotoblas_t gotoblas_NEOVERSEN1;
5556

5657
extern void openblas_warning(int verbose, const char * msg);
5758

58-
#define NUM_CORETYPES 10
59+
#define NUM_CORETYPES 11
5960

6061
/*
6162
* In case asm/hwcap.h is outdated on the build system, make sure
@@ -80,6 +81,7 @@ static char *corename[] = {
8081
"thunderx2t99",
8182
"tsv110",
8283
"emag8180",
84+
"neoversen1",
8385
"unknown"
8486
};
8587

@@ -94,6 +96,7 @@ char *gotoblas_corename(void) {
9496
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
9597
if (gotoblas == &gotoblas_TSV110) return corename[ 8];
9698
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
99+
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
97100
return corename[NUM_CORETYPES];
98101
}
99102

@@ -123,6 +126,7 @@ static gotoblas_t *force_coretype(char *coretype) {
123126
case 7: return (&gotoblas_THUNDERX2T99);
124127
case 8: return (&gotoblas_TSV110);
125128
case 9: return (&gotoblas_EMAG8180);
129+
case 10: return (&gotoblas_NEOVERSEN1);
126130
}
127131
snprintf(message, 128, "Core not found: %s\n", coretype);
128132
openblas_warning(1, message);
@@ -168,6 +172,8 @@ static gotoblas_t *get_coretype(void) {
168172
return &gotoblas_CORTEXA72;
169173
case 0xd09: // Cortex A73
170174
return &gotoblas_CORTEXA73;
175+
case 0xd0c: // Neoverse N1
176+
return &gotoblas_NEOVERSEN1;
171177
}
172178
break;
173179
case 0x42: // Broadcom

getarch.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,6 +1028,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
10281028
#else
10291029
#endif
10301030

1031+
#ifdef FORCE_NEOVERSEN1
1032+
#define FORCE
1033+
#define ARCHITECTURE "ARM64"
1034+
#define SUBARCHITECTURE "NEOVERSEN1"
1035+
#define SUBDIRNAME "arm64"
1036+
#define ARCHCONFIG "-DNEOVERSEN1 " \
1037+
"-DL1_CODE_SIZE=65536 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=4 " \
1038+
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=4 " \
1039+
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
1040+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
1041+
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" \
1042+
"-march=armv8.2-a -mtune=cortex-a72"
1043+
#define LIBNAME "neoversen1"
1044+
#define CORENAME "NEOVERSEN1"
1045+
#else
1046+
#endif
1047+
1048+
10311049
#ifdef FORCE_FALKOR
10321050
#define FORCE
10331051
#define ARCHITECTURE "ARM64"

0 commit comments

Comments
 (0)