Skip to content

Commit 588f0e8

Browse files
committed
Add SBGEMV and SHGEMV routines to RISC-V.
1 parent 67ddda3 commit 588f0e8

3 files changed

Lines changed: 224 additions & 0 deletions

File tree

kernel/riscv64/KERNEL.RISCV64_ZVL256B

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,8 @@ SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
283283
ifndef SBGEMM_BETA
284284
SBGEMM_BETA = gemm_beta_rvv.c
285285
endif
286+
SBGEMVNKERNEL = sbgemv_n_vector.c
287+
SBGEMVTKERNEL = sbgemv_t_vector.c
286288
endif
287289

288290
SAXPBYKERNEL = axpby_vector_v2.c

kernel/riscv64/sbgemv_n_vector.c

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/***************************************************************************
2+
Copyright (c) 2020, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
#define FLOAT_V_T vfloat32m8_t
31+
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8)
32+
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
33+
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m8)
34+
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m8)
35+
36+
#define VSETVL(n) RISCV_RVV(vsetvl_e16m4)(n)
37+
38+
#if defined(HFLOAT16)
39+
#define IFLOAT_V_T vfloat16m4_t
40+
#define VLEV_IFLOAT RISCV_RVV(vle16_v_f16m4)
41+
#define VFMACCVF_FLOAT RISCV_RVV(vfwmacc_vf_f32m8)
42+
#else
43+
#define IFLOAT_V_T vbfloat16m4_t
44+
#define VLEV_IFLOAT RISCV_RVV(vle16_v_bf16m4)
45+
#define VFMACCVF_FLOAT RISCV_RVV(vfwmaccbf16_vf_f32m8)
46+
#endif
47+
48+
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
49+
{
50+
if (n < 0) return(0);
51+
52+
IFLOAT *a_ptr, temp;
53+
FLOAT *y_ptr;
54+
BLASLONG i, j, vl;
55+
IFLOAT_V_T va;
56+
FLOAT_V_T vy;
57+
58+
if (inc_y == 1) {
59+
for (j = 0; j < n; j++) {
60+
temp = (IFLOAT)(alpha * (FLOAT)(x[0]));
61+
y_ptr = y;
62+
a_ptr = a;
63+
for (i = m; i > 0; i -= vl) {
64+
vl = VSETVL(i);
65+
vy = VLEV_FLOAT(y_ptr, vl);
66+
va = VLEV_IFLOAT(a_ptr, vl);
67+
vy = VFMACCVF_FLOAT(vy, temp, va, vl);
68+
VSEV_FLOAT(y_ptr, vy, vl);
69+
y_ptr += vl;
70+
a_ptr += vl;
71+
}
72+
x += inc_x;
73+
a += lda;
74+
}
75+
} else {
76+
BLASLONG stride_y = inc_y * sizeof(FLOAT);
77+
for (j = 0; j < n; j++) {
78+
temp = (IFLOAT)(alpha * (FLOAT)(x[0]));
79+
y_ptr = y;
80+
a_ptr = a;
81+
for (i = m; i > 0; i -= vl) {
82+
vl = VSETVL(i);
83+
vy = VLSEV_FLOAT(y_ptr, stride_y, vl);
84+
va = VLEV_IFLOAT(a_ptr, vl);
85+
vy = VFMACCVF_FLOAT(vy, temp, va, vl);
86+
VSSEV_FLOAT(y_ptr, stride_y, vy, vl);
87+
y_ptr += vl * inc_y;
88+
a_ptr += vl;
89+
}
90+
x += inc_x;
91+
a += lda;
92+
}
93+
}
94+
return(0);
95+
}

kernel/riscv64/sbgemv_t_vector.c

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/***************************************************************************
2+
Copyright (c) 2013, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
#define FLOAT_V_T vfloat32m8_t
31+
#define FLOAT_V_T_M1 vfloat32m1_t
32+
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8)
33+
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
34+
35+
#define VSETVL(n) RISCV_RVV(vsetvl_e16m4)(n)
36+
37+
#if defined(HFLOAT16)
38+
#define IFLOAT_V_T vfloat16m4_t
39+
#define VLEV_IFLOAT RISCV_RVV(vle16_v_f16m4)
40+
#define VLSEV_IFLOAT RISCV_RVV(vlse16_v_f16m4)
41+
#define VFMACCVV_FLOAT RISCV_RVV(vfwmacc_vv_f32m8)
42+
#else
43+
#define IFLOAT_V_T vbfloat16m4_t
44+
#define VLEV_IFLOAT RISCV_RVV(vle16_v_bf16m4)
45+
#define VLSEV_IFLOAT RISCV_RVV(vlse16_v_bf16m4)
46+
#define VFMACCVV_FLOAT RISCV_RVV(vfwmaccbf16_vv_f32m8)
47+
#endif
48+
49+
#ifdef RISCV_0p10_INTRINSICS
50+
#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f32m8_f32m1(v_res, va, vb, gvl)
51+
#else
52+
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m8_f32m1)
53+
#endif
54+
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
55+
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
56+
57+
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
58+
{
59+
BLASLONG i = 0, j = 0, k = 0;
60+
BLASLONG ix = 0, iy = 0;
61+
IFLOAT *a_ptr = a;
62+
FLOAT temp;
63+
64+
IFLOAT_V_T va, vx;
65+
FLOAT_V_T vr, vz;
66+
BLASLONG gvl = 0;
67+
FLOAT_V_T_M1 v_res;
68+
69+
if (inc_x == 1) {
70+
for (i = 0; i < n; i++) {
71+
v_res = VFMVVF_FLOAT_M1(0, 1);
72+
gvl = VSETVL(m);
73+
j = 0;
74+
vz = VFMVVF_FLOAT(0, gvl);
75+
for (k = 0; k < m/gvl; k++) {
76+
va = VLEV_IFLOAT(&a_ptr[j], gvl);
77+
vx = VLEV_IFLOAT(&x[j], gvl);
78+
vr = VFMACCVV_FLOAT(vz, va, vx, gvl); // could vfmacc here and reduce outside loop
79+
v_res = VFREDSUM_FLOAT(vr, v_res, gvl); // but that reordering diverges far enough from scalar path to make tests fail
80+
j += gvl;
81+
}
82+
if (j < m) {
83+
gvl = VSETVL(m-j);
84+
va = VLEV_IFLOAT(&a_ptr[j], gvl);
85+
vx = VLEV_IFLOAT(&x[j], gvl);
86+
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
87+
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
88+
}
89+
temp = (FLOAT)EXTRACT_FLOAT(v_res);
90+
y[iy] += alpha * temp;
91+
92+
iy += inc_y;
93+
a_ptr += lda;
94+
}
95+
} else {
96+
BLASLONG stride_x = inc_x * sizeof(FLOAT);
97+
for (i = 0; i < n; i++) {
98+
v_res = VFMVVF_FLOAT_M1(0, 1);
99+
gvl = VSETVL(m);
100+
j = 0;
101+
ix = 0;
102+
vz = VFMVVF_FLOAT(0, gvl);
103+
for (k = 0; k < m/gvl; k++) {
104+
va = VLEV_IFLOAT(&a_ptr[j], gvl);
105+
vx = VLSEV_IFLOAT(&x[ix], stride_x, gvl);
106+
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
107+
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
108+
j += gvl;
109+
ix += inc_x * gvl;
110+
}
111+
if (j < m) {
112+
gvl = VSETVL(m-j);
113+
va = VLEV_IFLOAT(&a_ptr[j], gvl);
114+
vx = VLSEV_IFLOAT(&x[ix], stride_x, gvl);
115+
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
116+
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
117+
}
118+
temp = (FLOAT)EXTRACT_FLOAT(v_res);
119+
y[iy] += alpha * temp;
120+
121+
iy += inc_y;
122+
a_ptr += lda;
123+
}
124+
}
125+
126+
return (0);
127+
}

0 commit comments

Comments
 (0)