Skip to content

Commit 6de062c

Browse files
authored
Merge branch 'OpenMathLib:develop' into issue5414
2 parents a9a6eda + 52ec7fa commit 6de062c

3 files changed

Lines changed: 19 additions & 45 deletions

File tree

docs/install.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -712,9 +712,10 @@ fully working OpenBLAS for this platform.
712712
713713
Go to the directory where you unpacked OpenBLAS,and enter the following commands:
714714
```bash
715-
CC=/Applications/Xcode_12.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
715+
CC="/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
716716
717-
CFLAGS= -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS14.4.sdk -arch arm64 -miphoneos-version-min=10.0
717+
SDKROOT="$(xcrun --sdk iphoneos --show-sdk-path)"
718+
CFLAGS="-O2 -Wno-macro-redefined -isysroot $SDKROOT -arch arm64 -miphoneos-version-min=10.0"
718719
719720
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
720721
```

kernel/arm64/nrm2.S

Lines changed: 10 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3535
#define I x3
3636

3737
#if !defined(DOUBLE)
38-
#define SSQ s0
39-
#define SCALE s1
40-
#define REGZERO s5
41-
#define REGONE s6
42-
#else
38+
#define SSQF s0
39+
#endif
40+
4341
#define SSQ d0
4442
#define SCALE d1
4543
#define REGZERO d5
4644
#define REGONE d6
47-
#endif
4845

4946
/*******************************************************************************
5047
* Macro definitions
@@ -53,22 +50,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5350
.macro KERNEL_F1
5451
#if !defined(DOUBLE)
5552
ldr s4, [X], #4
56-
fcmp s4, REGZERO
57-
beq 2f /* KERNEL_F1_NEXT_\@ */
58-
fabs s4, s4
59-
fcmp SCALE, s4
60-
bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */
61-
fdiv s2, SCALE, s4
62-
fmul s2, s2, s2
63-
fmul s3, SSQ, s2
64-
fadd SSQ, REGONE, s3
65-
fmov SCALE, s4
66-
b 2f /* KERNEL_F1_NEXT_\@ */
67-
1: /* KERNEL_F1_SCALE_GE_X_\@: */
68-
fdiv s2, s4, SCALE
69-
fmla SSQ, s2, v2.s[0]
53+
fcvt d4, s4
7054
#else
7155
ldr d4, [X], #8
56+
#endif
7257
fcmp d4, REGZERO
7358
beq 2f /* KERNEL_F1_NEXT_\@ */
7459
fabs d4, d4
@@ -83,29 +68,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8368
1: /* KERNEL_F1_SCALE_GE_X_\@: */
8469
fdiv d2, d4, SCALE
8570
fmla SSQ, d2, v2.d[0]
86-
#endif
8771
2: /* KERNEL_F1_NEXT_\@: */
8872
.endm
8973

9074
.macro KERNEL_S1
9175
#if !defined(DOUBLE)
9276
ldr s4, [X]
93-
fcmp s4, REGZERO
94-
beq KERNEL_S1_NEXT
95-
fabs s4, s4
96-
fcmp SCALE, s4
97-
bge KERNEL_S1_SCALE_GE_X
98-
fdiv s2, SCALE, s4
99-
fmul s2, s2, s2
100-
fmul s3, SSQ, s2
101-
fadd SSQ, REGONE, s3
102-
fmov SCALE, s4
103-
b KERNEL_S1_NEXT
104-
KERNEL_S1_SCALE_GE_X:
105-
fdiv s2, s4, SCALE
106-
fmla SSQ, s2, v2.s[0]
77+
fcvt d4, s4
10778
#else
10879
ldr d4, [X]
80+
#endif
10981
fcmp d4, REGZERO
11082
beq KERNEL_S1_NEXT
11183
fabs d4, d4
@@ -120,7 +92,6 @@ KERNEL_S1_SCALE_GE_X:
12092
KERNEL_S1_SCALE_GE_X:
12193
fdiv d2, d4, SCALE
12294
fmla SSQ, d2, v2.d[0]
123-
#endif
12495
KERNEL_S1_NEXT:
12596
add X, X, INC_X
12697
.endm
@@ -218,7 +189,9 @@ KERNEL_S1_NEXT:
218189
.Lnrm2_kernel_L999:
219190
fsqrt SSQ, SSQ
220191
fmul SSQ, SCALE, SSQ
221-
192+
#if !defined(DOUBLE)
193+
fcvt SSQF, SSQ
194+
#endif
222195
ret
223196

224197
EPILOGUE

param.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3778,18 +3778,18 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
37783778
#define ZGEMM_DEFAULT_UNROLL_N 4
37793779
#define ZGEMM_DEFAULT_UNROLL_MN 16
37803780

3781-
#define SGEMM_DEFAULT_P 128
3782-
#define DGEMM_DEFAULT_P 160
3781+
#define SGEMM_DEFAULT_P 128
3782+
#define DGEMM_DEFAULT_P 128
37833783
#define CGEMM_DEFAULT_P 128
37843784
#define ZGEMM_DEFAULT_P 128
37853785

3786-
#define SGEMM_DEFAULT_Q 352
3787-
#define DGEMM_DEFAULT_Q 128
3786+
#define SGEMM_DEFAULT_Q 896
3787+
#define DGEMM_DEFAULT_Q 448
37883788
#define CGEMM_DEFAULT_Q 224
37893789
#define ZGEMM_DEFAULT_Q 112
37903790

3791-
#define SGEMM_DEFAULT_R 4096
3792-
#define DGEMM_DEFAULT_R 4096
3791+
#define SGEMM_DEFAULT_R 3072
3792+
#define DGEMM_DEFAULT_R 3072
37933793
#define CGEMM_DEFAULT_R 4096
37943794
#define ZGEMM_DEFAULT_R 4096
37953795

0 commit comments

Comments
 (0)