File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -712,9 +712,10 @@ fully working OpenBLAS for this platform.
712712
713713Go to the directory where you unpacked OpenBLAS,and enter the following commands:
714714```bash
715- CC=/Applications/Xcode_ 12 . 4 . app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
715+ CC=" /Applications/Xcode. app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
716716
717- CFLAGS= -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12 .4 .app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS14 .4 .sdk -arch arm64 -miphoneos-version-min=10 .0
717+ SDKROOT="$(xcrun --sdk iphoneos --show-sdk-path)"
718+ CFLAGS="-O2 -Wno-macro-redefined -isysroot $SDKROOT -arch arm64 -miphoneos-version-min=10 .0 "
718719
719720make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
720721```
Original file line number Diff line number Diff line change @@ -35,16 +35,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3535#define I x3
3636
3737#if !defined(DOUBLE)
38- #define SSQ s0
39- #define SCALE s1
40- #define REGZERO s5
41- #define REGONE s6
42- #else
38+ #define SSQF s0
39+ #endif
40+
4341#define SSQ d0
4442#define SCALE d1
4543#define REGZERO d5
4644#define REGONE d6
47- #endif
4845
4946/ *******************************************************************************
5047* Macro definitions
@@ -53,22 +50,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5350.macro KERNEL_F1
5451#if !defined(DOUBLE)
5552 ldr s4 , [ X ], # 4
56- fcmp s4 , REGZERO
57- beq 2f / * KERNEL_F1_NEXT_\@ * /
58- fabs s4 , s4
59- fcmp SCALE , s4
60- bge 1f / * KERNEL_F1_SCALE_GE_X_\@ * /
61- fdiv s2 , SCALE , s4
62- fmul s2 , s2 , s2
63- fmul s3 , SSQ , s2
64- fadd SSQ , REGONE , s3
65- fmov SCALE , s4
66- b 2f / * KERNEL_F1_NEXT_\@ * /
67- 1 : / * KERNEL_F1_SCALE_GE_X_\@: * /
68- fdiv s2 , s4 , SCALE
69- fmla SSQ , s2 , v2.s [ 0 ]
53+ fcvt d4 , s4
7054#else
7155 ldr d4 , [ X ], # 8
56+ #endif
7257 fcmp d4 , REGZERO
7358 beq 2f / * KERNEL_F1_NEXT_\@ * /
7459 fabs d4 , d4
@@ -83,29 +68,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
83681 : / * KERNEL_F1_SCALE_GE_X_\@: * /
8469 fdiv d2 , d4 , SCALE
8570 fmla SSQ , d2 , v2.d [ 0 ]
86- #endif
87712 : / * KERNEL_F1_NEXT_\@: * /
8872.endm
8973
9074.macro KERNEL_S1
9175#if !defined(DOUBLE)
9276 ldr s4 , [ X ]
93- fcmp s4 , REGZERO
94- beq KERNEL_S1_NEXT
95- fabs s4 , s4
96- fcmp SCALE , s4
97- bge KERNEL_S1_SCALE_GE_X
98- fdiv s2 , SCALE , s4
99- fmul s2 , s2 , s2
100- fmul s3 , SSQ , s2
101- fadd SSQ , REGONE , s3
102- fmov SCALE , s4
103- b KERNEL_S1_NEXT
104- KERNEL_S1_SCALE_GE_X:
105- fdiv s2 , s4 , SCALE
106- fmla SSQ , s2 , v2.s [ 0 ]
77+ fcvt d4 , s4
10778#else
10879 ldr d4 , [ X ]
80+ #endif
10981 fcmp d4 , REGZERO
11082 beq KERNEL_S1_NEXT
11183 fabs d4 , d4
@@ -120,7 +92,6 @@ KERNEL_S1_SCALE_GE_X:
12092KERNEL_S1_SCALE_GE_X:
12193 fdiv d2 , d4 , SCALE
12294 fmla SSQ , d2 , v2.d [ 0 ]
123- #endif
12495KERNEL_S1_NEXT:
12596 add X , X , INC_X
12697.endm
@@ -218,7 +189,9 @@ KERNEL_S1_NEXT:
218189.Lnrm2_kernel_L999:
219190 fsqrt SSQ , SSQ
220191 fmul SSQ , SCALE , SSQ
221-
192+ #if !defined(DOUBLE)
193+ fcvt SSQF , SSQ
194+ #endif
222195 ret
223196
224197 EPILOGUE
Original file line number Diff line number Diff line change @@ -3778,18 +3778,18 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
37783778#define ZGEMM_DEFAULT_UNROLL_N 4
37793779#define ZGEMM_DEFAULT_UNROLL_MN 16
37803780
3781- #define SGEMM_DEFAULT_P 128
3782- #define DGEMM_DEFAULT_P 160
3781+ #define SGEMM_DEFAULT_P 128
3782+ #define DGEMM_DEFAULT_P 128
37833783#define CGEMM_DEFAULT_P 128
37843784#define ZGEMM_DEFAULT_P 128
37853785
3786- #define SGEMM_DEFAULT_Q 352
3787- #define DGEMM_DEFAULT_Q 128
3786+ #define SGEMM_DEFAULT_Q 896
3787+ #define DGEMM_DEFAULT_Q 448
37883788#define CGEMM_DEFAULT_Q 224
37893789#define ZGEMM_DEFAULT_Q 112
37903790
3791- #define SGEMM_DEFAULT_R 4096
3792- #define DGEMM_DEFAULT_R 4096
3791+ #define SGEMM_DEFAULT_R 3072
3792+ #define DGEMM_DEFAULT_R 3072
37933793#define CGEMM_DEFAULT_R 4096
37943794#define ZGEMM_DEFAULT_R 4096
37953795
You can’t perform that action at this time.
0 commit comments