@@ -29,8 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929#include <math.h>
3030
3131#if defined(DOUBLE )
32-
33- #define ABS fabs
32+ #define VFMVFS_FLOAT vfmv_f_s_f64m1_f64
3433#define VSETVL (n ) vsetvl_e64m8(n)
3534#define VSETVL_MAX vsetvlmax_e64m1()
3635#define FLOAT_V_T vfloat64m8_t
@@ -54,8 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5453#define VADDVX_UINT vadd_vx_u64m8
5554#define VMVVX_UINT vmv_v_x_u64m8
5655#else
57-
58- #define ABS fabsf
56+ #define VFMVFS_FLOAT vfmv_f_s_f32m1_f32
5957#define VSETVL (n ) vsetvl_e32m8(n)
6058#define VSETVL_MAX vsetvlmax_e32m1()
6159#define FLOAT_V_T vfloat32m8_t
@@ -85,7 +83,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
8583{
8684 BLASLONG i = 0 , j = 0 ;
8785 FLOAT maxf = 0.0 ;
86+ #ifdef DOUBLE
87+ BLASLONG max_index = 0 ;
88+ #else
8889 unsigned int max_index = 0 ;
90+ #endif
8991 if (n <= 0 || inc_x <= 0 ) return (max_index );
9092
9193 FLOAT_V_T vx , v_max ;
@@ -117,11 +119,14 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
117119 j += gvl ;
118120 }
119121 v_res = VFREDMAXVS_FLOAT (v_res , v_max , v_z0 , gvl );
120- maxf = * (( FLOAT * ) & v_res );
122+ maxf = VFMVFS_FLOAT ( v_res );
121123 mask = VMFGEVF_FLOAT (v_max , maxf , gvl );
122124 max_index = VMFIRSTM (mask ,gvl );
123- max_index = * ((unsigned int * )& v_max_index + max_index );
124-
125+ #ifdef DOUBLE
126+ max_index = * ((BLASLONG * )& v_max_index + max_index );
127+ #else
128+ max_index = * ((unsigned int * )& v_max_index + max_index );
129+ #endif
125130 if (j < n ){
126131 gvl = VSETVL (n - j );
127132 vx = VLEV_FLOAT (& x [j ], gvl );
@@ -130,15 +135,19 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
130135 v_max = VFRSUBVF_MASK_FLOAT (mask , vx , vx , 0 , gvl );
131136
132137 v_res = VFREDMAXVS_FLOAT (v_res , v_max , v_z0 , gvl );
133- FLOAT cur_maxf = * (( FLOAT * ) & v_res );
138+ FLOAT cur_maxf = VFMVFS_FLOAT ( v_res );
134139 if (cur_maxf > maxf ){
135140 //tail index
136141 v_max_index = VIDV_UINT (gvl );
137142 v_max_index = VADDVX_UINT (v_max_index , j , gvl );
138143
139144 mask = VMFGEVF_FLOAT (v_max , cur_maxf , gvl );
140145 max_index = VMFIRSTM (mask ,gvl );
146+ #ifdef DOUBLE
147+ max_index = * ((BLASLONG * )& v_max_index + max_index );
148+ #else
141149 max_index = * ((unsigned int * )& v_max_index + max_index );
150+ #endif
142151 }
143152 }
144153 }else {
@@ -165,11 +174,14 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
165174 idx += inc_v ;
166175 }
167176 v_res = VFREDMAXVS_FLOAT (v_res , v_max , v_z0 , gvl );
168- maxf = * (( FLOAT * ) & v_res );
177+ maxf = VFMVFS_FLOAT ( v_res );
169178 mask = VMFGEVF_FLOAT (v_max , maxf , gvl );
170179 max_index = VMFIRSTM (mask ,gvl );
180+ #ifdef DOUBLE
181+ max_index = * ((BLASLONG * )& v_max_index + max_index );
182+ #else
171183 max_index = * ((unsigned int * )& v_max_index + max_index );
172-
184+ #endif
173185 if (j < n ){
174186 gvl = VSETVL (n - j );
175187 vx = VLSEV_FLOAT (& x [idx ], stride_x , gvl );
@@ -178,19 +190,21 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
178190 v_max = VFRSUBVF_MASK_FLOAT (mask , vx , vx , 0 , gvl );
179191
180192 v_res = VFREDMAXVS_FLOAT (v_res , v_max , v_z0 , gvl );
181- FLOAT cur_maxf = * (( FLOAT * ) & v_res );
193+ FLOAT cur_maxf = VFMVFS_FLOAT ( v_res );
182194 if (cur_maxf > maxf ){
183195 //tail index
184196 v_max_index = VIDV_UINT (gvl );
185197 v_max_index = VADDVX_UINT (v_max_index , j , gvl );
186198
187199 mask = VMFGEVF_FLOAT (v_max , cur_maxf , gvl );
188200 max_index = VMFIRSTM (mask ,gvl );
201+ #ifdef DOUBLE
202+ max_index = * ((BLASLONG * )& v_max_index + max_index );
203+ #else
189204 max_index = * ((unsigned int * )& v_max_index + max_index );
205+ #endif
190206 }
191207 }
192208 }
193209 return (max_index + 1 );
194210}
195-
196-
0 commit comments