Skip to content

Commit 67ddda3

Browse files
committed
Merge branch 'develop' into addRVVVectorizedFP16Packing
2 parents 07d0e74 + b27a118 commit 67ddda3

53 files changed

Lines changed: 672 additions & 126 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cblas.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,14 @@ void cblas_cgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enu
456456
void cblas_zgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array,
457457
OPENBLAS_CONST void * alpha_array, OPENBLAS_CONST void ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST void ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST void * beta_array, void ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size);
458458

459+
void cblas_sgemm_batch_strided(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float * A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST blasint stridea, OPENBLAS_CONST float * B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST blasint strideb, OPENBLAS_CONST float beta, float * C, OPENBLAS_CONST blasint ldc, OPENBLAS_CONST blasint stridec, OPENBLAS_CONST blasint group_size);
460+
461+
void cblas_dgemm_batch_strided(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double * A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST blasint stridea, OPENBLAS_CONST double * B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST blasint strideb, OPENBLAS_CONST double beta, double * C, OPENBLAS_CONST blasint ldc, OPENBLAS_CONST blasint stridec, OPENBLAS_CONST blasint group_size);
462+
463+
void cblas_cgemm_batch_strided(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void * alpha, OPENBLAS_CONST void * A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST blasint stridea, OPENBLAS_CONST void * B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST blasint strideb, OPENBLAS_CONST void * beta, void * C, OPENBLAS_CONST blasint ldc, OPENBLAS_CONST blasint stridec, OPENBLAS_CONST blasint group_size);
464+
465+
void cblas_zgemm_batch_strided(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void * alpha, OPENBLAS_CONST void * A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST blasint stridea, OPENBLAS_CONST void * B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST blasint strideb, OPENBLAS_CONST void * beta, void * C, OPENBLAS_CONST blasint ldc, OPENBLAS_CONST blasint stridec, OPENBLAS_CONST blasint group_size);
466+
459467
/*** BFLOAT16 and INT8 extensions ***/
460468
/* convert float array to BFLOAT16 array by rounding */
461469
void cblas_sbstobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout);
@@ -477,6 +485,7 @@ void cblas_sbgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum C
477485
void cblas_sbgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array,
478486
OPENBLAS_CONST float * alpha_array, OPENBLAS_CONST bfloat16 ** A_array, OPENBLAS_CONST blasint * lda_array, OPENBLAS_CONST bfloat16 ** B_array, OPENBLAS_CONST blasint * ldb_array, OPENBLAS_CONST float * beta_array, float ** C_array, OPENBLAS_CONST blasint * ldc_array, OPENBLAS_CONST blasint group_count, OPENBLAS_CONST blasint * group_size);
479487

488+
void cblas_sbgemm_batch_strided(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 * A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST blasint stridea, OPENBLAS_CONST bfloat16 * B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST blasint strideb, OPENBLAS_CONST float beta, float * C, OPENBLAS_CONST blasint ldc, OPENBLAS_CONST blasint stridec, OPENBLAS_CONST blasint group_size);
480489
/*** FLOAT16 extensions ***/
481490
void cblas_shgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
482491
OPENBLAS_CONST float alpha, OPENBLAS_CONST hfloat16 *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST hfloat16 *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);

cmake/utils.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,9 @@ function(GenerateNamedObjects sources_in)
427427
endif ()
428428
if (${float_type} STREQUAL "BFLOAT16")
429429
list(APPEND obj_defines "BFLOAT16")
430+
endif ()
431+
if (${float_type} STREQUAL "HFLOAT16")
432+
list(APPEND obj_defines "HFLOAT16")
430433
endif ()
431434
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
432435
list(APPEND obj_defines "COMPLEX")

cpuid_arm64.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ int detect(void)
346346
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
347347
return CPU_A64FX;
348348
// Apple
349-
else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022"))
349+
else if (strstr(cpu_implementer, "0x61") /* && strstr(cpu_part, "0x022")*/)
350350
return CPU_VORTEX;
351351
// Phytium
352352
else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661")

interface/CMakeLists.txt

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,9 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS})
124124
#sdsdot, dsdot
125125
if (BUILD_SINGLE OR BUILD_DOUBLE)
126126
GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
127-
if(CBLAS_FLAG EQUAL 1)
128-
GenerateNamedObjects("gemm_batch.c" "" "gemm_batch" ${CBLAS_FLAG} "" "" false)
129-
endif ()
130-
endif ()
127+
GenerateNamedObjects("gemm_batch.c" "" "gemm_batch" ${CBLAS_FLAG} "" "" false)
128+
GenerateNamedObjects("gemm_batch_strided.c" "" "gemm_batch_strided" ${CBLAS_FLAG} "" "" false)
129+
endif ()
131130
if (BUILD_DOUBLE)
132131
GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
133132
endif ()
@@ -162,9 +161,8 @@ if (BUILD_BFLOAT16)
162161
GenerateNamedObjects("tobf16.c" "DOUBLE_PREC" "sbdtobf16" ${CBLAS_FLAG} "" "" true "BFLOAT16")
163162
GenerateNamedObjects("bf16to.c" "SINGLE_PREC" "sbf16tos" ${CBLAS_FLAG} "" "" true "BFLOAT16")
164163
GenerateNamedObjects("bf16to.c" "DOUBLE_PREC" "dbf16tod" ${CBLAS_FLAG} "" "" true "BFLOAT16")
165-
if(CBLAS_FLAG EQUAL 1)
166164
GenerateNamedObjects("gemm_batch.c" "" "sbgemm_batch" ${CBLAS_FLAG} "" "" true "BFLOAT16")
167-
endif ()
165+
GenerateNamedObjects("gemm_batch_strided.c" "" "sbgemm_batch_strided" ${CBLAS_FLAG} "" "" true "BFLOAT16")
168166
endif ()
169167
if (BUILD_HFLOAT16)
170168
GenerateNamedObjects("gemm.c" "" "shgemm" ${CBLAS_FLAG} "" "" true "HFLOAT16")
@@ -197,9 +195,8 @@ foreach (float_type ${FLOAT_TYPES})
197195
GenerateNamedObjects("max.c" "USE_ABS" "scamax" ${CBLAS_FLAG} "" "" true "COMPLEX")
198196
GenerateNamedObjects("asum.c" "" "scasum" ${CBLAS_FLAG} "" "" true "COMPLEX")
199197
GenerateNamedObjects("sum.c" "" "scsum" ${CBLAS_FLAG} "" "" true "COMPLEX")
200-
if(CBLAS_FLAG EQUAL 1)
201-
GenerateNamedObjects("gemm_batch.c" "" "cgemm_batch" ${CBLAS_FLAG} "" "" true "COMPLEX")
202-
endif ()
198+
GenerateNamedObjects("gemm_batch.c" "" "cgemm_batch" ${CBLAS_FLAG} "" "" true "COMPLEX")
199+
GenerateNamedObjects("gemm_batch_strided.c" "" "cgemm_batch_strided" ${CBLAS_FLAG} "" "" true "COMPLEX")
203200
endif ()
204201
if (${float_type} STREQUAL "ZCOMPLEX")
205202
GenerateNamedObjects("zscal.c" "SSCAL" "dscal" ${CBLAS_FLAG} "" "" false "ZCOMPLEX")
@@ -209,9 +206,8 @@ foreach (float_type ${FLOAT_TYPES})
209206
GenerateNamedObjects("max.c" "USE_ABS" "dzamax" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
210207
GenerateNamedObjects("asum.c" "" "dzasum" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
211208
GenerateNamedObjects("sum.c" "" "dzsum" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
212-
if(CBLAS_FLAG EQUAL 1)
213-
GenerateNamedObjects("gemm_batch.c" "" "zgemm_batch" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
214-
endif ()
209+
GenerateNamedObjects("gemm_batch.c" "" "zgemm_batch" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
210+
GenerateNamedObjects("gemm_batch_strided.c" "" "zgemm_batch_strided" ${CBLAS_FLAG} "" "" true "ZCOMPLEX")
215211
endif ()
216212
endforeach ()
217213

@@ -262,7 +258,8 @@ if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)
262258
GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "SINGLE")
263259
GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "SINGLE")
264260
GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "SINGLE")
265-
GenerateNamedObjects("gemm_batch.c" "" "gemm_batch" 1 "" "" false "SINGLE")
261+
GenerateNamedObjects("gemm_batch.c" "" "gemm_batch" 0 "" "" false "SINGLE")
262+
GenerateNamedObjects("gemm_batch_strided.c" "" "gemm_batch_strided" 0 "" "" false "SINGLE")
266263
GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "SINGLE")
267264
GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "SINGLE")
268265
GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "SINGLE")
@@ -276,7 +273,8 @@ if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
276273
GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "DOUBLE")
277274
GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "DOUBLE")
278275
GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "DOUBLE")
279-
GenerateNamedObjects("gemm_batch.c" "" "gemm_batch" 1 "" "" false "DOUBLE")
276+
GenerateNamedObjects("gemm_batch.c" "" "gemm_batch" 0 "" "" false "DOUBLE")
277+
GenerateNamedObjects("gemm_batch_strided.c" "" "gemm_batch_strided" 0 "" "" false "DOUBLE")
280278
GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "DOUBLE")
281279
GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "DOUBLE")
282280
GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "DOUBLE")

interface/Makefile

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,16 @@ SBLAS3OBJS = \
7272
sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \
7373
strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \
7474
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\
75-
sgeadd.$(SUFFIX) sgemmt.$(SUFFIX) sgemmtr.$(SUFFIX)
75+
sgeadd.$(SUFFIX) sgemmt.$(SUFFIX) sgemmtr.$(SUFFIX) \
76+
sgemm_batch.$(SUFFIX) sgemm_batch_strided.$(SUFFIX)
7677

7778
ifeq ($(BUILD_BFLOAT16),1)
7879
BBLAS3OBJS = bgemm.$(SUFFIX)
7980
BBLAS2OBJS = bgemv.$(SUFFIX)
8081
BBLAS1OBJS = bscal.$(SUFFIX)
8182
SBBLAS1OBJS = sbdot.$(SUFFIX)
8283
SBBLAS2OBJS = sbgemv.$(SUFFIX)
83-
SBBLAS3OBJS = sbgemm.$(SUFFIX) sbgemmt.$(SUFFIX) sbgemmtr.$(SUFFIX)
84+
SBBLAS3OBJS = sbgemm.$(SUFFIX) sbgemmt.$(SUFFIX) sbgemmtr.$(SUFFIX) sbgemm_batch.$(SUFFIX) sbgemm_batch_strided.$(SUFFIX)
8485
SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX)
8586
endif
8687

@@ -111,7 +112,8 @@ DBLAS3OBJS = \
111112
dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \
112113
dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \
113114
domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)\
114-
dgeadd.$(SUFFIX) dgemmt.$(SUFFIX) dgemmtr.$(SUFFIX)
115+
dgeadd.$(SUFFIX) dgemmt.$(SUFFIX) dgemmtr.$(SUFFIX) \
116+
dgemm_batch.$(SUFFIX) dgemm_batch_strided.$(SUFFIX)
115117

116118
CBLAS1OBJS = \
117119
caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \
@@ -140,7 +142,8 @@ CBLAS3OBJS = \
140142
ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \
141143
chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \
142144
comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)\
143-
cgeadd.$(SUFFIX) cgemmt.$(SUFFIX) cgemmtr.$(SUFFIX)
145+
cgeadd.$(SUFFIX) cgemmt.$(SUFFIX) cgemmtr.$(SUFFIX) \
146+
cgemm_batch.$(SUFFIX) cgemm_batch_strided.$(SUFFIX)
144147

145148
ZBLAS1OBJS = \
146149
zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \
@@ -169,7 +172,8 @@ ZBLAS3OBJS = \
169172
ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \
170173
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
171174
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)\
172-
zgeadd.$(SUFFIX) zgemmt.$(SUFFIX) zgemmtr.$(SUFFIX)
175+
zgeadd.$(SUFFIX) zgemmt.$(SUFFIX) zgemmtr.$(SUFFIX) \
176+
zgemm_batch.$(SUFFIX) zgemm_batch_strided.$(SUFFIX)
173177

174178
ifeq ($(SUPPORT_GEMM3M), 1)
175179

@@ -317,15 +321,15 @@ CSBLAS2OBJS = \
317321
CSBLAS3OBJS = \
318322
cblas_sgemm.$(SUFFIX) cblas_ssymm.$(SUFFIX) cblas_strmm.$(SUFFIX) cblas_strsm.$(SUFFIX) \
319323
cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\
320-
cblas_sgeadd.$(SUFFIX) cblas_sgemmt.$(SUFFIX) cblas_sgemmtr.$(SUFFIX) cblas_sgemm_batch.$(SUFFIX)
324+
cblas_sgeadd.$(SUFFIX) cblas_sgemmt.$(SUFFIX) cblas_sgemmtr.$(SUFFIX) cblas_sgemm_batch.$(SUFFIX) cblas_sgemm_batch_strided.$(SUFFIX)
321325

322326
ifeq ($(BUILD_BFLOAT16),1)
323327
CBBLAS3OBJS = cblas_bgemm.$(SUFFIX)
324328
CBBLAS2OBJS = cblas_bgemv.$(SUFFIX)
325329
CBBLAS1OBJS = cblas_bscal.$(SUFFIX)
326330
CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX)
327331
CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX)
328-
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) cblas_sbgemmt.$(SUFFIX) cblas_sbgemmtr.$(SUFFIX) cblas_sbgemm_batch.$(SUFFIX)
332+
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) cblas_sbgemmt.$(SUFFIX) cblas_sbgemmtr.$(SUFFIX) cblas_sbgemm_batch.$(SUFFIX) cblas_sbgemm_batch_strided.$(SUFFIX)
329333
CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX)
330334
ifeq ($(ONLY_CBLAS),1)
331335
CSBEXTOBJS += sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX)
@@ -353,7 +357,7 @@ CDBLAS2OBJS = \
353357
CDBLAS3OBJS += \
354358
cblas_dgemm.$(SUFFIX) cblas_dsymm.$(SUFFIX) cblas_dtrmm.$(SUFFIX) cblas_dtrsm.$(SUFFIX) \
355359
cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) \
356-
cblas_dgeadd.$(SUFFIX) cblas_dgemmt.$(SUFFIX) cblas_dgemmtr.$(SUFFIX) cblas_dgemm_batch.$(SUFFIX)
360+
cblas_dgeadd.$(SUFFIX) cblas_dgemmt.$(SUFFIX) cblas_dgemmtr.$(SUFFIX) cblas_dgemm_batch.$(SUFFIX) cblas_dgemm_batch_strided.$(SUFFIX)
357361

358362
CCBLAS1OBJS = \
359363
cblas_icamax.$(SUFFIX) cblas_icamin.$(SUFFIX) cblas_scasum.$(SUFFIX) cblas_caxpy.$(SUFFIX) \
@@ -378,7 +382,7 @@ CCBLAS3OBJS = \
378382
cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \
379383
cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \
380384
cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\
381-
cblas_cgeadd.$(SUFFIX) cblas_cgemmt.$(SUFFIX) cblas_cgemmtr.$(SUFFIX) cblas_cgemm_batch.$(SUFFIX)
385+
cblas_cgeadd.$(SUFFIX) cblas_cgemmt.$(SUFFIX) cblas_cgemmtr.$(SUFFIX) cblas_cgemm_batch.$(SUFFIX) cblas_cgemm_batch_strided.$(SUFFIX)
382386

383387
CXERBLAOBJ = \
384388
cblas_xerbla.$(SUFFIX)
@@ -409,7 +413,7 @@ CZBLAS3OBJS = \
409413
cblas_zsyrk.$(SUFFIX) cblas_zsyr2k.$(SUFFIX) \
410414
cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\
411415
cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \
412-
cblas_zgeadd.$(SUFFIX) cblas_zgemmt.$(SUFFIX) cblas_zgemmtr.$(SUFFIX) cblas_zgemm_batch.$(SUFFIX)
416+
cblas_zgeadd.$(SUFFIX) cblas_zgemmt.$(SUFFIX) cblas_zgemmtr.$(SUFFIX) cblas_zgemm_batch.$(SUFFIX) cblas_zgemm_batch_strided.$(SUFFIX)
413417

414418

415419
ifeq ($(SUPPORT_GEMM3M), 1)
@@ -2539,3 +2543,48 @@ cblas_cgemm_batch.$(SUFFIX) cblas_cgemm_batch.$(PSUFFIX) : gemm_batch.c ../param
25392543

25402544
cblas_zgemm_batch.$(SUFFIX) cblas_zgemm_batch.$(PSUFFIX) : gemm_batch.c ../param.h
25412545
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
2546+
2547+
cblas_sbgemm_batch_strided.$(SUFFIX) cblas_sbgemm_batch_strided.$(PSUFFIX) : gemm_batch_strided.c ../param.h
2548+
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
2549+
2550+
cblas_sgemm_batch_strided.$(SUFFIX) cblas_sgemm_batch_strided.$(PSUFFIX) : gemm_batch_strided.c ../param.h
2551+
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
2552+
2553+
cblas_dgemm_batch_strided.$(SUFFIX) cblas_dgemm_batch_strided.$(PSUFFIX) : gemm_batch_strided.c ../param.h
2554+
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
2555+
2556+
cblas_cgemm_batch_strided.$(SUFFIX) cblas_cgemm_batch_strided.$(PSUFFIX) : gemm_batch_strided.c ../param.h
2557+
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
2558+
2559+
cblas_zgemm_batch_strided.$(SUFFIX) cblas_zgemm_batch_strided.$(PSUFFIX) : gemm_batch_strided.c ../param.h
2560+
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
2561+
2562+
sbgemm_batch.$(SUFFIX) sbgemm_batch.$(PSUFFIX) : gemm_batch.c ../param.h
2563+
$(CC) -c $(CFLAGS) -UCBLAS $< -o $(@F)
2564+
2565+
sgemm_batch.$(SUFFIX) sgemm_batch.$(PSUFFIX) : gemm_batch.c ../param.h
2566+
$(CC) -c $(CFLAGS) -UCBLAS $< -o $(@F)
2567+
2568+
dgemm_batch.$(SUFFIX) dgemm_batch.$(PSUFFIX) : gemm_batch.c ../param.h
2569+
$(CC) -c $(CFLAGS) -UCBLAS $< -o $(@F)
2570+
2571+
cgemm_batch.$(SUFFIX) cgemm_batch.$(PSUFFIX) : gemm_batch.c ../param.h
2572+
$(CC) -c $(CFLAGS) -UCBLAS $< -o $(@F)
2573+
2574+
zgemm_batch.$(SUFFIX) zgemm_batch.$(PSUFFIX) : gemm_batch.c ../param.h
2575+
$(CC) -c $(CFLAGS) -UCBLAS $< -o $(@F)
2576+
2577+
sbgemm_batch_strided.$(SUFFIX) sbgemm_batch_strided.$(PSUFFIX) : gemm_batch_strided.c ../param.h
2578+
$(CC) -c $(CFLAGS) -UCBLAS $< -o $(@F)
2579+
2580+
sgemm_batch_strided.$(SUFFIX) sgemm_batch_strided.$(PSUFFIX) : gemm_batch_strided.c ../param.h
2581+
$(CC) -c $(CFLAGS) -UCBLAS $< -o $(@F)
2582+
2583+
dgemm_batch_strided.$(SUFFIX) dgemm_batch_strided.$(PSUFFIX) : gemm_batch_strided.c ../param.h
2584+
$(CC) -c $(CFLAGS) -UCBLAS $< -o $(@F)
2585+
2586+
cgemm_batch_strided.$(SUFFIX) cgemm_batch_strided.$(PSUFFIX) : gemm_batch_strided.c ../param.h
2587+
$(CC) -c $(CFLAGS) -UCBLAS $< -o $(@F)
2588+
2589+
zgemm_batch_strided.$(SUFFIX) zgemm_batch_strided.$(PSUFFIX) : gemm_batch_strided.c ../param.h
2590+
$(CC) -c $(CFLAGS) -UCBLAS $< -o $(@F)

0 commit comments

Comments
 (0)