Skip to content

Commit d2d16d0

Browse files
authored
Merge pull request #1 from xianyi/develop
update
2 parents 4e979bf + b6a6ccb commit d2d16d0

135 files changed

Lines changed: 6185 additions & 1820 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.drone.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ steps:
9292
- mkdir build && cd build
9393
- cmake $CMAKE_FLAGS ..
9494
- make -j
95-
- ctest
95+
- ctest -V
9696

9797
---
9898
kind: pipeline
@@ -116,7 +116,7 @@ steps:
116116
- mkdir build && cd build
117117
- cmake $CMAKE_FLAGS ..
118118
- make -j
119-
- ctest
119+
- ctest -V
120120

121121
---
122122
kind: pipeline
@@ -140,4 +140,4 @@ steps:
140140
- mkdir build && cd build
141141
- cmake $CMAKE_FLAGS ..
142142
- make -j
143-
- ctest
143+
- ctest -V
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Only the "head" branch of the OpenBLAS package is tested
2+
3+
on:
4+
push:
5+
paths:
6+
- '**/nightly-Homebrew-build.yml'
7+
pull_request:
8+
branches:
9+
- develop
10+
paths:
11+
- '**/nightly-Homebrew-build.yml'
12+
schedule:
13+
- cron: 45 7 * * *
14+
# This is 7:45 AM UTC daily, late at night in the USA
15+
16+
# Since push and pull_request will still always be building and testing the `develop` branch,
17+
# it only makes sense to test if this file has been changed
18+
19+
name: Nightly-Homebrew-Build
20+
jobs:
21+
build-OpenBLAS-with-Homebrew:
22+
runs-on: macos-latest
23+
env:
24+
HOMEBREW_DEVELOPER: "ON"
25+
HOMEBREW_DISPLAY_INSTALL_TIMES: "ON"
26+
HOMEBREW_NO_ANALYTICS: "ON"
27+
HOMEBREW_NO_AUTO_UPDATE: "ON"
28+
HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON"
29+
HOMEBREW_NO_INSTALL_CLEANUP: "ON"
30+
31+
steps:
32+
- name: Random delay for cron job
33+
run: |
34+
delay=$(( RANDOM % 600 ))
35+
printf 'Delaying for %s seconds on event %s' ${delay} "${{ github.event_name }}"
36+
sleep ${delay}
37+
if: github.event_name == 'schedule'
38+
39+
- uses: actions/checkout@v2
40+
# This isn't even needed, technically. Homebrew will get `develop` via git
41+
42+
- name: Update Homebrew
43+
if: github.event_name != 'pull_request'
44+
run: brew update || true
45+
46+
- name: Install prerequisites
47+
run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas
48+
49+
- name: Install and bottle OpenBLAS
50+
run: brew install --fetch-HEAD --HEAD --build-bottle --keep-tmp openblas
51+
# the HEAD flags tell Homebrew to build the develop branch fetch via git
52+
53+
- name: Create bottle
54+
run: |
55+
brew bottle -v openblas
56+
mkdir bottles
57+
mv *.bottle.tar.gz bottles
58+
59+
- name: Upload bottle
60+
uses: actions/upload-artifact@v1
61+
with:
62+
name: openblas--HEAD.catalina.bottle.tar.gz
63+
path: bottles
64+
65+
- name: Show linkage
66+
run: brew linkage -v openblas
67+
68+
- name: Test openblas
69+
run: brew test --HEAD --verbose openblas
70+
71+
- name: Audit openblas formula
72+
run: |
73+
brew audit --strict openblas
74+
brew cat openblas
75+
76+
- name: Post logs on failure
77+
if: failure()
78+
run: brew gist-logs --with-hostname -v openblas

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,5 @@ build.*
8787
*.swp
8888
benchmark/*.goto
8989
benchmark/smallscaling
90-
90+
CMakeCache.txt
91+
CMakeFiles/*

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ matrix:
176176
- <<: *test-macos
177177
osx_image: xcode10.1
178178
env:
179-
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk"
179+
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
180180
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
181181
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
182182

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 8.dev)
9+
set(OpenBLAS_PATCH_VERSION 9.dev)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
# Adhere to GNU filesystem layout conventions

Changelog.txt

Lines changed: 133 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,144 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.9
4+
1-Mar-2020
5+
6+
common:
7+
* Fixed a miscompilation of the GETRF functions with CMAKE
8+
* Imported bugfix 390 from LAPACK (missing NaN propagation in xCOMBSSQ)
9+
* The size of the memory buffer used for splitting GEMM tasks across
10+
multiple threads can now be configured in the build system.
11+
12+
POWER:
13+
* Fixed several compilation problems related to endianness
14+
and ELF version on POWER8 and POWER9
15+
* Fixed use of the absolute value IAMIN/IAMAX instead of IMIN/IMAX
16+
* Fixed a race condition in the level3 blas code
17+
18+
MIPS64:
19+
* Fixed use of the absoltute value IAMIN/IAMAX instead of IMIN/IMAX
20+
21+
ARMV7:
22+
* Fixed a race condition in the level3 blas code
23+
* Fixed compilation on Android
24+
ARMV8:
25+
* Added support for Ampere EMAG8180
26+
* Added support for Neoverse N1
27+
* Improved performance of the blas_lock function
28+
* Fixed a race condition in the level3 blas code
29+
* Fixed a performance regression on TSV110-based servers
30+
31+
x86_64:
32+
* Fixed a long-standing error with undeclared register overwrites
33+
in the DSCAL microkernel for HASWELL,SKYLAKEX and ZEN
34+
* Fixed a long-standing bug in the SSE implementation of IAMAX
35+
* Fixed a CMAKE build failure with DYNAMIC_ARCH
36+
* Fixed cpu autodetection of Goldmont+, Cannon Lake and Ice Lake
37+
* Fixed a compilation failure on OSX with compiler name containing dash
38+
* Fixed compilation with MinGW on SkylakeX
39+
* Improved speed of the AVX512 GEMM3M kernel on SkylakeX
40+
* Added an AVX512 STRMM kernel for SkylakeX
41+
* Improved GEMM performance on Haswell and Zen
42+
43+
zarch:
44+
* fixed compilation of the DYNAMIC_ARCH code
45+
46+
====================================================================
47+
Version 0.3.8
48+
9-Feb-2020
49+
50+
common:
51+
` * LAPACK has been updated to 3.9.0 (plus patches up to
52+
January 2nd, 2020)
53+
* CMAKE support has been improved in several areas including
54+
cross-compilation
55+
* a thread race condition in the GEMM3M kernels was resolved
56+
* the "generic" (plain C) gemm beta kernel used by many targets
57+
has been sped up
58+
* an optimized version of the LAPACK trtrs functions has been added
59+
* an incompatibilty between the LAPACK tests and the OpenBLAS
60+
implementation of XERBLA was resolved, removing the numerous
61+
warnings about wrong error exits in the former
62+
* support for NetBSD has been added
63+
* support for compilation with g95 and non-GNU versions of ld
64+
has been improved
65+
* support for compilation with (upcoming) gcc 10 has been added
66+
67+
POWER:
68+
* worked around miscompilation of several POWER8 and POWER9
69+
kernels by older versions of gcc
70+
* added support for big-endian POWER8 and for compilation on AIX
71+
* corrected bugs in the big-endian support for PPC440 and PPC970
72+
* DYNAMIC_ARCH support is now available in CMAKE builds as well
73+
74+
ARMV8:
75+
* performance of DGEMM_BETA and SGEMM_NCOPY has been improved
76+
* compilation for 32bit works again
77+
* performance of the RPCC function has been improved
78+
* improved performance on small systems
79+
* DYNAMIC_ARCH support is now available in CMAKE builds as well
80+
* cross-compilation from OSX to IOS was simplified
81+
82+
x86_64:
83+
* a new AVX512 DGEMM kernel was added and the AVX512 SGEMM kernel
84+
was significantly improved
85+
* optimized AVX512 kernels for CGEMM and ZGEMM have been added
86+
* AVX2 kernels for STRMM, SGEMM, and CGEMM have been significantly
87+
sped up and optimized CGEMM3M and ZGEMM3M kernels have been added
88+
* added support for QEMU virtual cpus
89+
* a compilation problem with PGI and SUN compilers was fixed
90+
* Intel "Goldmont plus" is now autodetected
91+
* a potential crash on program exit on MS Windows has been fixed
92+
93+
x86:
94+
* an unwanted case sensitivity in the implementation of LSAME
95+
on older 32bit AMD cpus was fixed
96+
97+
zarch:
98+
* Z15 is now supported as Z14
99+
* DYNAMIC_ARCH is now available on ZARCH as well
100+
2101
====================================================================
3102
Version 0.3.7
4103
11-Aug 2019
5104

6105
common:
7-
* having the gmake special variables TARGET_ARCH or TARGET_MACH
8-
defined no longer causes build failures in ctest or utest
9-
* defining NO_AFFINITY or USE_TLS to 0 in gmake builds no longer
10-
has the same effect as setting them to 1
11-
* a new test program was added to allow checking the library for
12-
thread safety
13-
* a new option USE_LOCKING was added to ensure thread safety when
14-
OpenBLAS itself is built without multithreading but will be
15-
called from multiple threads.
16-
* a build failure on Linux with glibc versions earlier than 2.5
17-
was fixed
18-
* a runtime error with CPU enumeration (and NO_AFFINITY not set)
19-
on glibc 2.6 was fixed
20-
* NO_AFFINITY was added to the CMAKE options (and defaults to being
21-
active on Linux, as in the gmake builds)
106+
* having the gmake special variables TARGET_ARCH or TARGET_MACH
107+
defined no longer causes build failures in ctest or utest
108+
* defining NO_AFFINITY or USE_TLS to 0 in gmake builds no longer
109+
has the same effect as setting them to 1
110+
* a new test program was added to allow checking the library for
111+
thread safety
112+
* a new option USE_LOCKING was added to ensure thread safety when
113+
OpenBLAS itself is built without multithreading but will be
114+
called from multiple threads.
115+
* a build failure on Linux with glibc versions earlier than 2.5
116+
was fixed
117+
* a runtime error with CPU enumeration (and NO_AFFINITY not set)
118+
on glibc 2.6 was fixed
119+
* NO_AFFINITY was added to the CMAKE options (and defaults to being
120+
active on Linux, as in the gmake builds)
22121

23122
x86_64:
24-
* the build-time logic for detection of AVX512 availability in
25-
the processor and compiler was fixed
26-
* gmake builds on OSX now set the internal name of the library to
27-
libopenblas.0.dylib (consistent with CMAKE)
28-
* the Haswell DGEMM kernel received a significant speedup through
29-
improved prefetch and load instructions
30-
* performance of DGEMM, DTRMM, DTRSM and ZDOT on Zen/Zen2 was markedly
31-
increased by avoiding vpermpd instructions
32-
* the SKYLAKEX (AVX512) DGEMM helper functions have now been disabled
33-
to fix remaining errors in DGEMM, DSYMM and DTRMM
34-
35-
## POWER:
36-
* added support for building on FreeBSD/powerpc64 and FreeBSD/ppc970
37-
* added optimized kernels for POWER9 SGEMM and STRMM
38-
39-
## ARMV7:
40-
* fixed the softfp implementations of xAMAX and IxAMAX
41-
* removed the predefined -march= flags on both ARMV5 and ARMV6 as
42-
they were appropriate for only a subset of platforms
123+
* the build-time logic for detection of AVX512 availability in
124+
the processor and compiler was fixed
125+
* gmake builds on OSX now set the internal name of the library to
126+
libopenblas.0.dylib (consistent with CMAKE)
127+
* the Haswell DGEMM kernel received a significant speedup through
128+
improved prefetch and load instructions
129+
* performance of DGEMM, DTRMM, DTRSM and ZDOT on Zen/Zen2 was markedly
130+
increased by avoiding vpermpd instructions
131+
* the SKYLAKEX (AVX512) DGEMM helper functions have now been disabled
132+
to fix remaining errors in DGEMM, DSYMM and DTRMM
133+
134+
POWER:
135+
* added support for building on FreeBSD/powerpc64 and FreeBSD/ppc970
136+
* added optimized kernels for POWER9 SGEMM and STRMM
137+
138+
ARMV7:
139+
* fixed the softfp implementations of xAMAX and IxAMAX
140+
* removed the predefined -march= flags on both ARMV5 and ARMV6 as
141+
they were appropriate for only a subset of platforms
43142

44143
====================================================================
45144
Version 0.3.6

Makefile

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,21 @@ ifneq ($(INTERFACE64), 0)
5656
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) "
5757
endif
5858
endif
59-
60-
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
59+
@$(CC) --version > /dev/null 2>&1;\
60+
if [ $$? -eq 0 ]; then \
61+
cverinfo=`$(CC) --version | sed -n '1p'`; \
62+
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
63+
else \
64+
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
65+
fi
6166
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
62-
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
67+
@$(FC) --version > /dev/null 2>&1;\
68+
if [ $$? -eq 0 ]; then \
69+
fverinfo=`$(FC) --version | sed -n '1p'`; \
70+
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
71+
else \
72+
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
73+
fi
6374
endif
6475
ifneq ($(OSNAME), AIX)
6576
@echo -n " Library Name ... $(LIBNAME)"
@@ -68,9 +79,9 @@ else
6879
endif
6980

7081
ifndef SMP
71-
@echo " (Single threaded) "
82+
@echo " (Single-threading) "
7283
else
73-
@echo " (Multi threaded; Max num-threads is $(NUM_THREADS))"
84+
@echo " (Multi-threading; Max num-threads is $(NUM_THREADS))"
7485
endif
7586

7687
ifeq ($(USE_OPENMP), 1)
@@ -317,7 +328,7 @@ lapack-test :
317328
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz
318329
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
319330
ifneq ($(CROSS), 1)
320-
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
331+
( cd $(NETLIB_LAPACK_DIR)/INSTALL; $(MAKE) all; ./testlsame; ./testslamch; ./testdlamch; \
321332
./testsecond; ./testdsecnd; ./testieee; ./testversion )
322333
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING)
323334
endif

Makefile.arm

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
22
ifeq ($(OSNAME), Android)
3-
CCOMMON_OPT += -mfpu=neon
4-
FCOMMON_OPT += -mfpu=neon
3+
CCOMMON_OPT += -mfpu=neon -march=armv7-a
4+
FCOMMON_OPT += -mfpu=neon -march=armv7-a
55
else
66
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
77
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a

Makefile.arm64

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,23 @@ CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
2424
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
2525
endif
2626

27+
# Use a72 tunings because Neoverse-N1 is only available
28+
# in GCC>=9
29+
ifeq ($(CORE), NEOVERSEN1)
30+
ifeq ($(GCCVERSIONGTEQ7), 1)
31+
ifeq ($(GCCVERSIONGTEQ9), 1)
32+
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
33+
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
34+
else
35+
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
36+
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
37+
endif
38+
else
39+
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
40+
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
41+
endif
42+
endif
43+
2744
ifeq ($(CORE), THUNDERX)
2845
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
2946
FCOMMON_OPT += -march=armv8-a -mtune=thunderx

Makefile.install

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
8282
endif
8383
ifeq ($(OSNAME), Darwin)
8484
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
85-
@-install_name_tool -id "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
85+
@-install_name_tool -id "$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).$(MAJOR_VERSION).dylib" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
8686
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
8787
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
8888
ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib

0 commit comments

Comments
 (0)