Skip to content

Commit de74e11

Browse files
committed
add benchmark for trsv
1 parent ad9e531 commit de74e11

2 files changed

Lines changed: 309 additions & 0 deletions

File tree

benchmark/Makefile

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
7474
cher2k.goto zher2k.goto \
7575
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
7676
strmv.goto dtrmv.goto ctrmv.goto ztrmv.goto \
77+
strsv.goto dtrsv.goto ctrsv.goto ztrsv.goto \
7778
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
7879
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
7980
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
@@ -102,6 +103,7 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
102103
cher2k.acml zher2k.acml \
103104
sgemv.acml dgemv.acml cgemv.acml zgemv.acml \
104105
strmv.acml dtrmv.acml ctrmv.acml ztrmv.acml \
106+
strsv.acml dtrsv.acml ctrsv.acml ztrsv.acml \
105107
sgeev.acml dgeev.acml cgeev.acml zgeev.acml \
106108
sgesv.acml dgesv.acml cgesv.acml zgesv.acml \
107109
sgetri.acml dgetri.acml cgetri.acml zgetri.acml \
@@ -131,6 +133,7 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
131133
cher2k.atlas zher2k.atlas \
132134
sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \
133135
strmv.atlas dtrmv.atlas ctrmv.atlas ztrmv.atlas \
136+
strsv.atlas dtrsv.atlas ctrsv.atlas ztrsv.atlas \
134137
sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \
135138
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
136139
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
@@ -159,6 +162,7 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
159162
cher2k.mkl zher2k.mkl \
160163
sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \
161164
strmv.mkl dtrmv.mkl ctrmv.mkl ztrmv.mkl \
165+
strsv.mkl dtrsv.mkl ctrsv.mkl ztrsv.mkl \
162166
sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \
163167
sgesv.mkl dgesv.mkl cgesv.mkl zgesv.mkl \
164168
sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \
@@ -188,6 +192,7 @@ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
188192
cher2k.goto zher2k.goto \
189193
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
190194
strmv.goto dtrmv.goto ctrmv.goto ztrmv.goto \
195+
strsv.goto dtrsv.goto ctrsv.goto ztrsv.goto \
191196
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
192197
smallscaling \
193198
isamax.goto idamax.goto icamax.goto izamax.goto \
@@ -215,6 +220,7 @@ acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
215220
cher2k.acml zher2k.acml \
216221
sgemv.acml dgemv.acml cgemv.acml zgemv.acml \
217222
strmv.acml dtrmv.acml ctrmv.acml ztrmv.acml \
223+
strsv.acml dtrsv.acml ctrsv.acml ztrsv.acml \
218224
sgeev.acml dgeev.acml cgeev.acml zgeev.acml \
219225
sgesv.acml dgesv.acml cgesv.acml zgesv.acml \
220226
sgetri.acml dgetri.acml cgetri.acml zgetri.acml \
@@ -244,6 +250,7 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
244250
cher2k.atlas zher2k.atlas \
245251
sgemv.atlas dgemv.atlas cgemv.atlas zgemv.atlas \
246252
strmv.atlas dtrmv.atlas ctrmv.atlas ztrmv.atlas \
253+
strsv.atlas dtrsv.atlas ctrsv.atlas ztrsv.atlas \
247254
sgeev.atlas dgeev.atlas cgeev.atlas zgeev.atlas \
248255
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
249256
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
@@ -274,6 +281,7 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
274281
cher2k.mkl zher2k.mkl \
275282
sgemv.mkl dgemv.mkl cgemv.mkl zgemv.mkl \
276283
strmv.mkl dtrmv.mkl ctrmv.mkl ztrmv.mkl \
284+
strsv.mkl dtrsv.mkl ctrsv.mkl ztrsv.mkl \
277285
sgeev.mkl dgeev.mkl cgeev.mkl zgeev.mkl \
278286
sgesv.mkl dgesv.mkl cgesv.mkl zgesv.mkl \
279287
sgetri.mkl dgetri.mkl cgetri.mkl zgetri.mkl \
@@ -313,6 +321,7 @@ veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
313321
cher2k.veclib zher2k.veclib \
314322
sgemv.veclib dgemv.veclib cgemv.veclib zgemv.veclib \
315323
strmv.veclib dtrmv.veclib ctrmv.veclib ztrmv.veclib \
324+
strsv.veclib dtrsv.veclib ctrsv.veclib ztrsv.veclib \
316325
sgeev.veclib dgeev.veclib cgeev.veclib zgeev.veclib \
317326
sgesv.veclib dgesv.veclib cgesv.veclib zgesv.veclib \
318327
sgetri.veclib dgetri.veclib cgetri.veclib zgetri.veclib \
@@ -1183,6 +1192,72 @@ ztrmv.mkl : ztrmv.$(SUFFIX)
11831192
ztrmv.veclib : ztrmv.$(SUFFIX)
11841193
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
11851194

1195+
##################################### Strsv ####################################################
1196+
strsv.goto : strsv.$(SUFFIX) ../$(LIBNAME)
1197+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
1198+
1199+
strsv.acml : strsv.$(SUFFIX)
1200+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1201+
1202+
strsv.atlas : strsv.$(SUFFIX)
1203+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1204+
1205+
strsv.mkl : strsv.$(SUFFIX)
1206+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1207+
1208+
strsv.veclib : strsv.$(SUFFIX)
1209+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1210+
1211+
##################################### Dtrsv ####################################################
1212+
dtrsv.goto : dtrsv.$(SUFFIX) ../$(LIBNAME)
1213+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
1214+
1215+
dtrsv.acml : dtrsv.$(SUFFIX)
1216+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1217+
1218+
dtrsv.atlas : dtrsv.$(SUFFIX)
1219+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1220+
1221+
dtrsv.mkl : dtrsv.$(SUFFIX)
1222+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1223+
1224+
dtrsv.veclib : dtrsv.$(SUFFIX)
1225+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1226+
1227+
##################################### Ctrsv ####################################################
1228+
1229+
ctrsv.goto : ctrsv.$(SUFFIX) ../$(LIBNAME)
1230+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
1231+
1232+
ctrsv.acml : ctrsv.$(SUFFIX)
1233+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1234+
1235+
ctrsv.atlas : ctrsv.$(SUFFIX)
1236+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1237+
1238+
ctrsv.mkl : ctrsv.$(SUFFIX)
1239+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1240+
1241+
ctrsv.veclib : ctrsv.$(SUFFIX)
1242+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1243+
1244+
##################################### Ztrsv ####################################################
1245+
1246+
ztrsv.goto : ztrsv.$(SUFFIX) ../$(LIBNAME)
1247+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
1248+
1249+
ztrsv.acml : ztrsv.$(SUFFIX)
1250+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1251+
1252+
ztrsv.atlas : ztrsv.$(SUFFIX)
1253+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1254+
1255+
ztrsv.mkl : ztrsv.$(SUFFIX)
1256+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1257+
1258+
ztrsv.veclib : ztrsv.$(SUFFIX)
1259+
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
1260+
11861261
##################################### Sger ####################################################
11871262
sger.goto : sger.$(SUFFIX) ../$(LIBNAME)
11881263
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
@@ -2264,6 +2339,18 @@ ctrmv.$(SUFFIX) : trmv.c
22642339
ztrmv.$(SUFFIX) : trmv.c
22652340
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
22662341

2342+
strsv.$(SUFFIX) : trsv.c
2343+
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
2344+
2345+
dtrsv.$(SUFFIX) : trsv.c
2346+
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
2347+
2348+
ctrsv.$(SUFFIX) : trsv.c
2349+
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
2350+
2351+
ztrsv.$(SUFFIX) : trsv.c
2352+
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
2353+
22672354
sger.$(SUFFIX) : ger.c
22682355
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
22692356

benchmark/trsv.c

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
/***************************************************************************
2+
Copyright (c) 2014, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include <stdio.h>
29+
#include <stdlib.h>
30+
#ifdef __CYGWIN32__
31+
#include <sys/time.h>
32+
#endif
33+
#include <time.h>
34+
#include "common.h"
35+
36+
37+
#undef GEMV
38+
#undef TRSV
39+
40+
#ifndef COMPLEX
41+
42+
#ifdef DOUBLE
43+
#define TRSV BLASFUNC(dtrsv)
44+
#else
45+
#define TRSV BLASFUNC(strsv)
46+
#endif
47+
48+
#else
49+
50+
#ifdef DOUBLE
51+
#define TRSV BLASFUNC(ztrsv)
52+
#else
53+
#define TRSV BLASFUNC(ctrsv)
54+
#endif
55+
56+
#endif
57+
58+
#if defined(__WIN32__) || defined(__WIN64__)
59+
60+
#ifndef DELTA_EPOCH_IN_MICROSECS
61+
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
62+
#endif
63+
64+
int gettimeofday(struct timeval *tv, void *tz){
65+
66+
FILETIME ft;
67+
unsigned __int64 tmpres = 0;
68+
static int tzflag;
69+
70+
if (NULL != tv)
71+
{
72+
GetSystemTimeAsFileTime(&ft);
73+
74+
tmpres |= ft.dwHighDateTime;
75+
tmpres <<= 32;
76+
tmpres |= ft.dwLowDateTime;
77+
78+
/*converting file time to unix epoch*/
79+
tmpres /= 10; /*convert into microseconds*/
80+
tmpres -= DELTA_EPOCH_IN_MICROSECS;
81+
tv->tv_sec = (long)(tmpres / 1000000UL);
82+
tv->tv_usec = (long)(tmpres % 1000000UL);
83+
}
84+
85+
return 0;
86+
}
87+
88+
#endif
89+
90+
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
91+
92+
static void *huge_malloc(BLASLONG size){
93+
int shmid;
94+
void *address;
95+
96+
#ifndef SHM_HUGETLB
97+
#define SHM_HUGETLB 04000
98+
#endif
99+
100+
if ((shmid =shmget(IPC_PRIVATE,
101+
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
102+
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
103+
printf( "Memory allocation failed(shmget).\n");
104+
exit(1);
105+
}
106+
107+
address = shmat(shmid, NULL, SHM_RND);
108+
109+
if ((BLASLONG)address == -1){
110+
printf( "Memory allocation failed(shmat).\n");
111+
exit(1);
112+
}
113+
114+
shmctl(shmid, IPC_RMID, 0);
115+
116+
return address;
117+
}
118+
119+
#define malloc huge_malloc
120+
121+
#endif
122+
123+
int main(int argc, char *argv[]){
124+
125+
FLOAT *a, *x;
126+
blasint n = 0, i, j;
127+
blasint inc_x=1;
128+
int loops = 1;
129+
int l;
130+
char *p;
131+
132+
int from = 1;
133+
int to = 200;
134+
int step = 1;
135+
136+
struct timespec time_start, time_end;
137+
time_t seconds = 0;
138+
139+
double time1,timeg;
140+
long long nanos = 0;
141+
142+
argc--;argv++;
143+
144+
if (argc > 0) { from = atol(*argv); argc--; argv++;}
145+
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
146+
if (argc > 0) { step = atol(*argv); argc--; argv++;}
147+
148+
char uplo ='L';
149+
char transa = 'N';
150+
char diag ='U';
151+
152+
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
153+
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
154+
if ((p = getenv("OPENBLAS_TRANSA"))) transa=*p;
155+
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
156+
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
157+
158+
fprintf(stderr, "From : %3d To : %3d Step = %3d Transa = '%c' Inc_x = %d uplo=%c diag=%c loop = %d\n", from, to, step,transa,inc_x,
159+
uplo,diag,loops);
160+
161+
162+
#ifdef linux
163+
srandom(getpid());
164+
#endif
165+
166+
fprintf(stderr, " SIZE Flops\n");
167+
fprintf(stderr, "============================================\n");
168+
169+
for(n = from; n <= to; n += step)
170+
{
171+
timeg=0;
172+
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * n * n * COMPSIZE)) == NULL){
173+
fprintf(stderr,"Out of Memory!!\n");exit(1);
174+
}
175+
176+
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * n * abs(inc_x) * COMPSIZE)) == NULL){
177+
fprintf(stderr,"Out of Memory!!\n");exit(1);
178+
}
179+
180+
for(j = 0; j < n; j++){
181+
for(i = 0; i < n * COMPSIZE; i++){
182+
a[i + j * n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
183+
}
184+
}
185+
186+
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
187+
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
188+
}
189+
190+
for(l =0;l< loops;l++){
191+
192+
clock_gettime(CLOCK_REALTIME,&time_start);
193+
194+
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);
195+
196+
clock_gettime(CLOCK_REALTIME,&time_end);
197+
nanos = time_end.tv_nsec - time_start.tv_nsec;
198+
seconds = time_end.tv_sec - time_start.tv_sec;
199+
200+
time1 = seconds + nanos /1.e9;
201+
timeg += time1;
202+
}
203+
204+
205+
timeg /= loops;
206+
long long muls = n*(n+1)/2.0;
207+
long long adds = (n - 1.0)*n/2.0;
208+
209+
fprintf(stderr, "%10d %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
210+
if(a != NULL){
211+
free(a);
212+
}
213+
214+
if( x != NULL){
215+
free(x);
216+
}
217+
218+
}
219+
220+
return 0;
221+
}
222+

0 commit comments

Comments
 (0)