@@ -32,6 +32,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3232#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
3333#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m8)
3434#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m8)
35+ #define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m8)
36+ #define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
3537
3638#define VSETVL (n ) RISCV_RVV(vsetvl_e16m4)(n)
3739
@@ -45,7 +47,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4547#define VFMACCVF_FLOAT RISCV_RVV(vfwmaccbf16_vf_f32m8)
4648#endif
4749
48- int CNAME (BLASLONG m , BLASLONG n , BLASLONG dummy1 , FLOAT alpha , IFLOAT * a , BLASLONG lda , IFLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y , FLOAT * buffer )
50+ int CNAME (BLASLONG m , BLASLONG n , FLOAT alpha , IFLOAT * a , BLASLONG lda , IFLOAT * x , BLASLONG inc_x , FLOAT beta , FLOAT * y , BLASLONG inc_y )
4951{
5052 if (n < 0 ) return (0 );
5153
@@ -55,7 +57,24 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL
5557 IFLOAT_V_T va ;
5658 FLOAT_V_T vy ;
5759
60+ y_ptr = y ;
5861 if (inc_y == 1 ) {
62+ if (beta == 0.0 ) {
63+ for (i = m ; i > 0 ; i -= vl ) {
64+ vl = VSETVL (i );
65+ vy = VFMVVF_FLOAT (0.0 , vl );
66+ VSEV_FLOAT (y_ptr , vy , vl );
67+ y_ptr += vl ;
68+ }
69+ } else if (beta != 1.0 ) {
70+ for (i = m ; i > 0 ; i -= vl ) {
71+ vl = VSETVL (i );
72+ vy = VLEV_FLOAT (y_ptr , vl );
73+ vy = VFMULVF_FLOAT (vy , beta , vl );
74+ VSEV_FLOAT (y_ptr , vy , vl );
75+ y_ptr += vl ;
76+ }
77+ }
5978 for (j = 0 ; j < n ; j ++ ) {
6079 temp = (IFLOAT )(alpha * (FLOAT )(x [0 ]));
6180 y_ptr = y ;
@@ -74,6 +93,22 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL
7493 }
7594 } else {
7695 BLASLONG stride_y = inc_y * sizeof (FLOAT );
96+ if (beta == 0.0 ) {
97+ for (i = m ; i > 0 ; i -= vl ) {
98+ vl = VSETVL (i );
99+ vy = VFMVVF_FLOAT (0.0 , vl );
100+ VSSEV_FLOAT (y_ptr , stride_y , vy , vl );
101+ y_ptr += vl * inc_y ;
102+ }
103+ } else if (beta != 1.0 ) {
104+ for (i = m ; i > 0 ; i -= vl ) {
105+ vl = VSETVL (i );
106+ vy = VLSEV_FLOAT (y_ptr , stride_y , vl );
107+ vy = VFMULVF_FLOAT (vy , beta , vl );
108+ VSSEV_FLOAT (y_ptr , stride_y , vy , vl );
109+ y_ptr += vl * inc_y ;
110+ }
111+ }
77112 for (j = 0 ; j < n ; j ++ ) {
78113 temp = (IFLOAT )(alpha * (FLOAT )(x [0 ]));
79114 y_ptr = y ;
0 commit comments