Skip to content

Commit aecb7f9

Browse files
committed
Change signature of SBGEMV.
1 parent 809e1cb commit aecb7f9

2 files changed

Lines changed: 39 additions & 4 deletions

File tree

kernel/riscv64/sbgemv_n_vector.c

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3232
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
3333
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m8)
3434
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m8)
35+
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m8)
36+
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
3537

3638
#define VSETVL(n) RISCV_RVV(vsetvl_e16m4)(n)
3739

@@ -45,7 +47,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4547
#define VFMACCVF_FLOAT RISCV_RVV(vfwmaccbf16_vf_f32m8)
4648
#endif
4749

48-
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
50+
int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
4951
{
5052
if (n < 0) return(0);
5153

@@ -55,7 +57,24 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL
5557
IFLOAT_V_T va;
5658
FLOAT_V_T vy;
5759

60+
y_ptr = y;
5861
if (inc_y == 1) {
62+
if (beta == 0.0) {
63+
for (i = m; i > 0; i -= vl) {
64+
vl = VSETVL(i);
65+
vy = VFMVVF_FLOAT(0.0, vl);
66+
VSEV_FLOAT(y_ptr, vy, vl);
67+
y_ptr += vl;
68+
}
69+
} else if (beta != 1.0) {
70+
for (i = m; i > 0; i -= vl) {
71+
vl = VSETVL(i);
72+
vy = VLEV_FLOAT(y_ptr, vl);
73+
vy = VFMULVF_FLOAT(vy, beta, vl);
74+
VSEV_FLOAT(y_ptr, vy, vl);
75+
y_ptr += vl;
76+
}
77+
}
5978
for (j = 0; j < n; j++) {
6079
temp = (IFLOAT)(alpha * (FLOAT)(x[0]));
6180
y_ptr = y;
@@ -74,6 +93,22 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL
7493
}
7594
} else {
7695
BLASLONG stride_y = inc_y * sizeof(FLOAT);
96+
if (beta == 0.0) {
97+
for (i = m; i > 0; i -= vl) {
98+
vl = VSETVL(i);
99+
vy = VFMVVF_FLOAT(0.0, vl);
100+
VSSEV_FLOAT(y_ptr, stride_y, vy, vl);
101+
y_ptr += vl * inc_y;
102+
}
103+
} else if (beta != 1.0) {
104+
for (i = m; i > 0; i -= vl) {
105+
vl = VSETVL(i);
106+
vy = VLSEV_FLOAT(y_ptr, stride_y, vl);
107+
vy = VFMULVF_FLOAT(vy, beta, vl);
108+
VSSEV_FLOAT(y_ptr, stride_y, vy, vl);
109+
y_ptr += vl * inc_y;
110+
}
111+
}
77112
for (j = 0; j < n; j++) {
78113
temp = (IFLOAT)(alpha * (FLOAT)(x[0]));
79114
y_ptr = y;

kernel/riscv64/sbgemv_t_vector.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5454
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
5555
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
5656

57-
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer)
57+
int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
5858
{
5959
BLASLONG i = 0, j = 0, k = 0;
6060
BLASLONG ix = 0, iy = 0;
@@ -92,7 +92,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL
9292
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
9393
}
9494
temp = (FLOAT)EXTRACT_FLOAT(v_res);
95-
y[iy] += alpha * temp;
95+
y[iy] = y[iy] * beta + alpha * temp;
9696

9797
iy += inc_y;
9898
a_ptr += lda;
@@ -123,7 +123,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL
123123
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
124124
}
125125
temp = (FLOAT)EXTRACT_FLOAT(v_res);
126-
y[iy] += alpha * temp;
126+
y[iy] = y[iy] * beta + alpha * temp;
127127

128128
iy += inc_y;
129129
a_ptr += lda;

0 commit comments

Comments
 (0)