Skip to content

Commit 71faa1c

Browse files
authored
Merge pull request #24 from xianyi/develop
rebase
2 parents b8f3605 + 8d2a796 commit 71faa1c

30 files changed

Lines changed: 681 additions & 1207 deletions

Makefile.install

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ endif
5151
ifneq ($(OSNAME), AIX)
5252
ifndef NO_LAPACKE
5353
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
54+
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
5455
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
5556
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
5657
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
@@ -100,6 +101,7 @@ else
100101
#install on AIX has different options syntax
101102
ifndef NO_LAPACKE
102103
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
104+
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
103105
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
104106
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
105107
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"

cmake/cc.cmake

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,10 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN")
9696
endif ()
9797
endif ()
9898

99+
if (${CORE} STREQUAL "SKYLAKEX")
100+
if (NOT DYNAMIC_ARCH)
101+
if (NOT NO_AVX512)
102+
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=skylake-avx512")
103+
endif ()
104+
endif ()
105+
endif ()

driver/level3/level3_gemm3m_thread.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ typedef struct {
104104
#define BETA_OPERATION(M_FROM, M_TO, N_FROM, N_TO, BETA, C, LDC) \
105105
GEMM_BETA((M_TO) - (M_FROM), (N_TO - N_FROM), 0, \
106106
BETA[0], BETA[1], NULL, 0, NULL, 0, \
107-
(FLOAT *)(C) + (M_FROM) + (N_FROM) * (LDC) * COMPSIZE, LDC)
107+
(FLOAT *)(C) + ((M_FROM) + (N_FROM) * (LDC)) * COMPSIZE, LDC)
108108
#endif
109109

110110
#ifndef ICOPYB_OPERATION
@@ -414,7 +414,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
414414

415415
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
416416
min_jj = MIN(n_to, xxx + div_n) - jjs;
417-
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N;
417+
if (min_jj > GEMM3M_UNROLL_N*3) min_jj = GEMM3M_UNROLL_N*3;
418418

419419
START_RPCC();
420420

@@ -550,7 +550,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
550550

551551
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
552552
min_jj = MIN(n_to, xxx + div_n) - jjs;
553-
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N;
553+
if (min_jj > GEMM3M_UNROLL_N*3) min_jj = GEMM3M_UNROLL_N*3;
554554

555555
START_RPCC();
556556

@@ -687,7 +687,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
687687

688688
for(jjs = xxx; jjs < MIN(n_to, xxx + div_n); jjs += min_jj){
689689
min_jj = MIN(n_to, xxx + div_n) - jjs;
690-
if (min_jj > GEMM3M_UNROLL_N) min_jj = GEMM3M_UNROLL_N;
690+
if (min_jj > GEMM3M_UNROLL_N*3) min_jj = GEMM3M_UNROLL_N*3;
691691

692692
START_RPCC();
693693

driver/others/memory.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -822,7 +822,7 @@ static void *alloc_qalloc(void *address){
822822

823823
static void alloc_windows_free(struct alloc_t *alloc_info){
824824

825-
VirtualFree(alloc_info, allocation_block_size, MEM_DECOMMIT);
825+
VirtualFree(alloc_info, 0, MEM_RELEASE);
826826

827827
}
828828

@@ -935,7 +935,7 @@ static void alloc_hugetlb_free(struct alloc_t *alloc_info){
935935

936936
#ifdef OS_WINDOWS
937937

938-
VirtualFree(alloc_info, allocation_block_size, MEM_LARGE_PAGES | MEM_DECOMMIT);
938+
VirtualFree(alloc_info, 0, MEM_LARGE_PAGES | MEM_RELEASE);
939939

940940
#endif
941941

@@ -2310,7 +2310,7 @@ static void *alloc_qalloc(void *address){
23102310

23112311
static void alloc_windows_free(struct release_t *release){
23122312

2313-
VirtualFree(release -> address, BUFFER_SIZE, MEM_DECOMMIT);
2313+
VirtualFree(release -> address, 0, MEM_RELEASE);
23142314

23152315
}
23162316

@@ -2432,7 +2432,7 @@ static void alloc_hugetlb_free(struct release_t *release){
24322432

24332433
#ifdef OS_WINDOWS
24342434

2435-
VirtualFree(release -> address, BUFFER_SIZE, MEM_LARGE_PAGES | MEM_DECOMMIT);
2435+
VirtualFree(release -> address, 0, MEM_LARGE_PAGES | MEM_RELEASE);
24362436

24372437
#endif
24382438

exports/dllinit.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,10 @@ BOOL APIENTRY DllMain(HINSTANCE hInst, DWORD reason, LPVOID reserved) {
5050
gotoblas_init();
5151
break;
5252
case DLL_PROCESS_DETACH:
53-
gotoblas_quit();
53+
// If the process is about to exit, don't bother releasing any resources
54+
// The kernel is much better at bulk releasing then.
55+
if (!reserved)
56+
gotoblas_quit();
5457
break;
5558
case DLL_THREAD_ATTACH:
5659
break;

interface/lapack/gesv.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,19 +44,19 @@
4444

4545
#ifndef COMPLEX
4646
#ifdef XDOUBLE
47-
#define ERROR_NAME "QGESV "
47+
#define ERROR_NAME "QGESV"
4848
#elif defined(DOUBLE)
49-
#define ERROR_NAME "DGESV "
49+
#define ERROR_NAME "DGESV"
5050
#else
51-
#define ERROR_NAME "SGESV "
51+
#define ERROR_NAME "SGESV"
5252
#endif
5353
#else
5454
#ifdef XDOUBLE
55-
#define ERROR_NAME "XGESV "
55+
#define ERROR_NAME "XGESV"
5656
#elif defined(DOUBLE)
57-
#define ERROR_NAME "ZGESV "
57+
#define ERROR_NAME "ZGESV"
5858
#else
59-
#define ERROR_NAME "CGESV "
59+
#define ERROR_NAME "CGESV"
6060
#endif
6161
#endif
6262

@@ -89,7 +89,7 @@ int NAME(blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, blasint *ipiv,
8989
if (args.m < 0) info = 1;
9090

9191
if (info) {
92-
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
92+
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME) - 1);
9393
*Info = - info;
9494
return 0;
9595
}

interface/lapack/getf2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint
7474
if (args.n < 0) info = 2;
7575
if (args.m < 0) info = 1;
7676
if (info) {
77-
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
77+
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME) - 1);
7878
*Info = - info;
7979
return 0;
8080
}

interface/lapack/getrf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint
7474
if (args.n < 0) info = 2;
7575
if (args.m < 0) info = 1;
7676
if (info) {
77-
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
77+
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME) - 1);
7878
*Info = - info;
7979
return 0;
8080
}

interface/lapack/getrs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ int NAME(char *TRANS, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA,
102102
if (trans < 0) info = 1;
103103

104104
if (info != 0) {
105-
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
105+
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME) - 1);
106106
return 0;
107107
}
108108

interface/lapack/lauu2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
9090
if (args.n < 0) info = 2;
9191
if (uplo < 0) info = 1;
9292
if (info) {
93-
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
93+
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME) - 1);
9494
*Info = - info;
9595
return 0;
9696
}

0 commit comments

Comments
 (0)