Skip to content

Commit 9f5a74f

Browse files
authored
Merge pull request #2486 from qqqil/develop
add benchmark for trsv
2 parents 2afb109 + 233838b commit 9f5a74f

2 files changed

Lines changed: 309 additions & 0 deletions

File tree

benchmark/Makefile

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
7575
cher2k.goto zher2k.goto \
7676
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
7777
strmv.goto dtrmv.goto ctrmv.goto ztrmv.goto \
78+
strsv.goto dtrsv.goto ctrsv.goto ztrsv.goto \
7879
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
7980
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
8081
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
@@ -104,6 +105,7 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
104105
cher2k.acml zher2k.acml \
105106
sgemv.acml dgemv.acml cgemv.acml zgemv.acml \
106107
strmv.acml dtrmv.acml ctrmv.acml ztrmv.acml \
108+
strsv.acml dtrsv.acml ctrsv.acml ztrsv.acml \
107109
sgeev.acml dgeev.acml cgeev.acml zgeev.acml \
108110
sgesv.acml dgesv.acml cgesv.acml zgesv.acml \
109111
sgetri.acml dgetri.acml cgetri.acml zgetri.acml \
@@ -134,6 +136,7 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
134136
cher2k.atlas zher2k.atlas \
135137
sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \
136138
strmv.atlas dtrmv.atlas ctrmv.atlas ztrmv.atlas \
139+
strsv.atlas dtrsv.atlas ctrsv.atlas ztrsv.atlas \
137140
sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \
138141
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
139142
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
@@ -163,6 +166,7 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
163166
cher2k.mkl zher2k.mkl \
164167
sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \
165168
strmv.mkl dtrmv.mkl ctrmv.mkl ztrmv.mkl \
169+
strsv.mkl dtrsv.mkl ctrsv.mkl ztrsv.mkl \
166170
sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \
167171
sgesv.mkl dgesv.mkl cgesv.mkl zgesv.mkl \
168172
sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \
@@ -193,6 +197,7 @@ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
193197
cher2k.goto zher2k.goto \
194198
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
195199
strmv.goto dtrmv.goto ctrmv.goto ztrmv.goto \
200+
strsv.goto dtrsv.goto ctrsv.goto ztrsv.goto \
196201
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
197202
smallscaling \
198203
isamax.goto idamax.goto icamax.goto izamax.goto \
@@ -221,6 +226,7 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
221226
cher2k.acml zher2k.acml \
222227
sgemv.acml dgemv.acml cgemv.acml zgemv.acml \
223228
strmv.acml dtrmv.acml ctrmv.acml ztrmv.acml \
229+
strsv.acml dtrsv.acml ctrsv.acml ztrsv.acml \
224230
sgeev.acml dgeev.acml cgeev.acml zgeev.acml \
225231
sgesv.acml dgesv.acml cgesv.acml zgesv.acml \
226232
sgetri.acml dgetri.acml cgetri.acml zgetri.acml \
@@ -251,6 +257,7 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
251257
cher2k.atlas zher2k.atlas \
252258
sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \
253259
strmv.atlas dtrmv.atlas ctrmv.atlas ztrmv.atlas \
260+
strsv.atlas dtrsv.atlas ctrsv.atlas ztrsv.atlas \
254261
sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \
255262
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
256263
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
@@ -282,6 +289,7 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
282289
cher2k.mkl zher2k.mkl \
283290
sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \
284291
strmv.mkl dtrmv.mkl ctrmv.mkl ztrmv.mkl \
292+
strsv.mkl dtrsv.mkl ctrsv.mkl ztrsv.mkl \
285293
sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \
286294
sgesv.mkl dgesv.mkl cgesv.mkl zgesv.mkl \
287295
sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \
@@ -322,6 +330,7 @@ veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
322330
cher2k.veclib zher2k.veclib \
323331
sgemv.veclib dgemv.veclib cgemv.veclib zgemv.veclib \
324332
strmv.veclib dtrmv.veclib ctrmv.veclib ztrmv.veclib \
333+
strsv.veclib dtrsv.veclib ctrsv.veclib ztrsv.veclib \
325334
sgeev.veclib dgeev.veclib cgeev.veclib zgeev.veclib \
326335
sgesv.veclib dgesv.veclib cgesv.veclib zgesv.veclib \
327336
sgetri.veclib dgetri.veclib cgetri.veclib zgetri.veclib \
@@ -1222,6 +1231,72 @@ ztrmv.mkl : ztrmv.$(SUFFIX)
12221231
ztrmv.veclib : ztrmv.$(SUFFIX)
12231232
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
12241233

1234+
##################################### Strsv ####################################################
1235+
strsv.goto : strsv.$(SUFFIX) ../$(LIBNAME)
1236+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
1237+
1238+
strsv.acml : strsv.$(SUFFIX)
1239+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1240+
1241+
strsv.atlas : strsv.$(SUFFIX)
1242+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1243+
1244+
strsv.mkl : strsv.$(SUFFIX)
1245+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1246+
1247+
strsv.veclib : strsv.$(SUFFIX)
1248+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1249+
1250+
##################################### Dtrsv ####################################################
1251+
dtrsv.goto : dtrsv.$(SUFFIX) ../$(LIBNAME)
1252+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
1253+
1254+
dtrsv.acml : dtrsv.$(SUFFIX)
1255+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1256+
1257+
dtrsv.atlas : dtrsv.$(SUFFIX)
1258+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1259+
1260+
dtrsv.mkl : dtrsv.$(SUFFIX)
1261+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1262+
1263+
dtrsv.veclib : dtrsv.$(SUFFIX)
1264+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1265+
1266+
##################################### Ctrsv ####################################################
1267+
1268+
ctrsv.goto : ctrsv.$(SUFFIX) ../$(LIBNAME)
1269+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
1270+
1271+
ctrsv.acml : ctrsv.$(SUFFIX)
1272+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1273+
1274+
ctrsv.atlas : ctrsv.$(SUFFIX)
1275+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1276+
1277+
ctrsv.mkl : ctrsv.$(SUFFIX)
1278+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1279+
1280+
ctrsv.veclib : ctrsv.$(SUFFIX)
1281+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1282+
1283+
##################################### Ztrsv ####################################################
1284+
1285+
ztrsv.goto : ztrsv.$(SUFFIX) ../$(LIBNAME)
1286+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
1287+
1288+
ztrsv.acml : ztrsv.$(SUFFIX)
1289+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1290+
1291+
ztrsv.atlas : ztrsv.$(SUFFIX)
1292+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1293+
1294+
ztrsv.mkl : ztrsv.$(SUFFIX)
1295+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1296+
1297+
ztrsv.veclib : ztrsv.$(SUFFIX)
1298+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1299+
12251300
##################################### Sger ####################################################
12261301
sger.goto : sger.$(SUFFIX) ../$(LIBNAME)
12271302
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@@ -2309,6 +2384,18 @@ ctrmv.$(SUFFIX) : trmv.c
23092384
ztrmv.$(SUFFIX) : trmv.c
23102385
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
23112386

2387+
strsv.$(SUFFIX) : trsv.c
2388+
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
2389+
2390+
dtrsv.$(SUFFIX) : trsv.c
2391+
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
2392+
2393+
ctrsv.$(SUFFIX) : trsv.c
2394+
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
2395+
2396+
ztrsv.$(SUFFIX) : trsv.c
2397+
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
2398+
23122399
sger.$(SUFFIX) : ger.c
23132400
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
23142401

benchmark/trsv.c

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
/***************************************************************************
2+
Copyright (c) 2014, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include <stdio.h>
29+
#include <stdlib.h>
30+
#ifdef __CYGWIN32__
31+
#include <sys/time.h>
32+
#endif
33+
#include <time.h>
34+
#include "common.h"
35+
36+
37+
#undef GEMV
38+
#undef TRSV
39+
40+
#ifndef COMPLEX
41+
42+
#ifdef DOUBLE
43+
#define TRSV BLASFUNC(dtrsv)
44+
#else
45+
#define TRSV BLASFUNC(strsv)
46+
#endif
47+
48+
#else
49+
50+
#ifdef DOUBLE
51+
#define TRSV BLASFUNC(ztrsv)
52+
#else
53+
#define TRSV BLASFUNC(ctrsv)
54+
#endif
55+
56+
#endif
57+
58+
#if defined(__WIN32__) || defined(__WIN64__)
59+
60+
#ifndef DELTA_EPOCH_IN_MICROSECS
61+
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
62+
#endif
63+
64+
int gettimeofday(struct timeval *tv, void *tz){
65+
66+
FILETIME ft;
67+
unsigned __int64 tmpres = 0;
68+
static int tzflag;
69+
70+
if (NULL != tv)
71+
{
72+
GetSystemTimeAsFileTime(&ft);
73+
74+
tmpres |= ft.dwHighDateTime;
75+
tmpres <<= 32;
76+
tmpres |= ft.dwLowDateTime;
77+
78+
/*converting file time to unix epoch*/
79+
tmpres /= 10; /*convert into microseconds*/
80+
tmpres -= DELTA_EPOCH_IN_MICROSECS;
81+
tv->tv_sec = (long)(tmpres / 1000000UL);
82+
tv->tv_usec = (long)(tmpres % 1000000UL);
83+
}
84+
85+
return 0;
86+
}
87+
88+
#endif
89+
90+
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
91+
92+
static void *huge_malloc(BLASLONG size){
93+
int shmid;
94+
void *address;
95+
96+
#ifndef SHM_HUGETLB
97+
#define SHM_HUGETLB 04000
98+
#endif
99+
100+
if ((shmid =shmget(IPC_PRIVATE,
101+
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
102+
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
103+
printf( "Memory allocation failed(shmget).\n");
104+
exit(1);
105+
}
106+
107+
address = shmat(shmid, NULL, SHM_RND);
108+
109+
if ((BLASLONG)address == -1){
110+
printf( "Memory allocation failed(shmat).\n");
111+
exit(1);
112+
}
113+
114+
shmctl(shmid, IPC_RMID, 0);
115+
116+
return address;
117+
}
118+
119+
#define malloc huge_malloc
120+
121+
#endif
122+
123+
int main(int argc, char *argv[]){
124+
125+
FLOAT *a, *x;
126+
blasint n = 0, i, j;
127+
blasint inc_x=1;
128+
int loops = 1;
129+
int l;
130+
char *p;
131+
132+
int from = 1;
133+
int to = 200;
134+
int step = 1;
135+
136+
struct timespec time_start, time_end;
137+
time_t seconds = 0;
138+
139+
double time1,timeg;
140+
long long nanos = 0;
141+
142+
argc--;argv++;
143+
144+
if (argc > 0) { from = atol(*argv); argc--; argv++;}
145+
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
146+
if (argc > 0) { step = atol(*argv); argc--; argv++;}
147+
148+
char uplo ='L';
149+
char transa = 'N';
150+
char diag ='U';
151+
152+
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
153+
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
154+
if ((p = getenv("OPENBLAS_TRANSA"))) transa=*p;
155+
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
156+
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
157+
158+
fprintf(stderr, "From : %3d To : %3d Step = %3d Transa = '%c' Inc_x = %d uplo=%c diag=%c loop = %d\n", from, to, step,transa,inc_x,
159+
uplo,diag,loops);
160+
161+
162+
#ifdef linux
163+
srandom(getpid());
164+
#endif
165+
166+
fprintf(stderr, " SIZE Flops\n");
167+
fprintf(stderr, "============================================\n");
168+
169+
for(n = from; n <= to; n += step)
170+
{
171+
timeg=0;
172+
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * n * n * COMPSIZE)) == NULL){
173+
fprintf(stderr,"Out of Memory!!\n");exit(1);
174+
}
175+
176+
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * n * abs(inc_x) * COMPSIZE)) == NULL){
177+
fprintf(stderr,"Out of Memory!!\n");exit(1);
178+
}
179+
180+
for(j = 0; j < n; j++){
181+
for(i = 0; i < n * COMPSIZE; i++){
182+
a[i + j * n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
183+
}
184+
}
185+
186+
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
187+
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
188+
}
189+
190+
for(l =0;l< loops;l++){
191+
192+
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_start);
193+
194+
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);
195+
196+
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_end);
197+
nanos = time_end.tv_nsec - time_start.tv_nsec;
198+
seconds = time_end.tv_sec - time_start.tv_sec;
199+
200+
time1 = seconds + nanos /1.e9;
201+
timeg += time1;
202+
}
203+
204+
205+
timeg /= loops;
206+
long long muls = n*(n+1)/2.0;
207+
long long adds = (n - 1.0)*n/2.0;
208+
209+
fprintf(stderr, "%10d %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
210+
if(a != NULL){
211+
free(a);
212+
}
213+
214+
if( x != NULL){
215+
free(x);
216+
}
217+
218+
}
219+
220+
return 0;
221+
}
222+

0 commit comments

Comments
 (0)