@@ -53,8 +53,8 @@ PROLOGUE
5353#endif
5454
5555 /* init $f8 and $f9 to zero */
56- SUB s1 , s1 , s1
57- SUB s2 , s2 , s2
56+ xvxor.v $xr8 , $xr8 , $xr8
57+ xvxor.v $xr9 , $xr9 , $xr9
5858 slli.d INCX, INCX, BASE_SHIFT
5959 li.d TEMP, SIZE
6060 slli.d INCY, INCY, BASE_SHIFT
@@ -64,20 +64,6 @@ PROLOGUE
6464
6565 /* !((inc_x == 1) && (inc_y == 1)) */
6666
67- /* init $xr8 and $xr9 to zero */
68- #ifdef DOUBLE
69- xvldrepl.d $xr0, X, 0
70- #else
71- xvldrepl.w $xr0, X, 0
72- #endif
73- #ifdef DSDOT
74- xvfcvtl.d.s $xr0, $xr0
75- xvfsub.d $xr8, $xr0, $xr0
76- xvfsub.d $xr9, $xr0, $xr0
77- #else
78- XVFSUB $xr8, $xr0, $xr0
79- XVFSUB $xr9, $xr0, $xr0
80- #endif
8167
8268#ifdef DOUBLE
8369 srai.d I, N, 4
@@ -99,31 +85,31 @@ PROLOGUE
9985 addi.w I, I, -1
10086 addi.d X, X, 128
10187 addi.d Y, Y, 128
102- #ifdef DSDOT
88+ #ifndef DOUBLE
10389 xvfcvtl.d.s $xr10, $xr0
10490 xvfcvtl.d.s $xr11, $xr4
10591 xvfcvth.d.s $xr12, $xr0
10692 xvfcvth.d.s $xr13, $xr4
107- xvfmadd.d $xr8, $xr10, $xr12 , $xr8
108- xvfmadd.d $xr9, $xr11 , $xr13, $xr9
93+ xvfmadd.d $xr8, $xr10, $xr11 , $xr8
94+ xvfmadd.d $xr9, $xr12 , $xr13, $xr9
10995 xvfcvtl.d.s $xr10, $xr1
11096 xvfcvtl.d.s $xr11, $xr5
11197 xvfcvth.d.s $xr12, $xr1
11298 xvfcvth.d.s $xr13, $xr5
113- xvfmadd.d $xr8, $xr10, $xr12 , $xr8
114- xvfmadd.d $xr9, $xr11 , $xr13, $xr9
99+ xvfmadd.d $xr8, $xr10, $xr11 , $xr8
100+ xvfmadd.d $xr9, $xr12 , $xr13, $xr9
115101 xvfcvtl.d.s $xr10, $xr2
116102 xvfcvtl.d.s $xr11, $xr6
117103 xvfcvth.d.s $xr12, $xr2
118104 xvfcvth.d.s $xr13, $xr6
119- xvfmadd.d $xr8, $xr10, $xr12 , $xr8
120- xvfmadd.d $xr9, $xr11 , $xr13, $xr9
105+ xvfmadd.d $xr8, $xr10, $xr11 , $xr8
106+ xvfmadd.d $xr9, $xr12 , $xr13, $xr9
121107 xvfcvtl.d.s $xr10, $xr3
122108 xvfcvtl.d.s $xr11, $xr7
123109 xvfcvth.d.s $xr12, $xr3
124110 xvfcvth.d.s $xr13, $xr7
125- xvfmadd.d $xr8, $xr10, $xr12 , $xr8
126- xvfmadd.d $xr9, $xr11 , $xr13, $xr9
111+ xvfmadd.d $xr8, $xr10, $xr11 , $xr8
112+ xvfmadd.d $xr9, $xr12 , $xr13, $xr9
127113#else
128114 XVFMADD $xr8, $xr0, $xr4, $xr8
129115 XVFMADD $xr9, $xr1, $xr5, $xr9
@@ -149,41 +135,26 @@ PROLOGUE
149135 addi.w I, I, -1
150136 addi.d X, X, 32
151137 addi.d Y, Y, 32
152- #ifdef DSDOT
138+ #ifndef DOUBLE
153139 xvfcvtl.d.s $xr10, $xr0
154140 xvfcvtl.d.s $xr11, $xr4
155141 xvfcvth.d.s $xr12, $xr0
156142 xvfcvth.d.s $xr13, $xr4
157- xvfmadd.d $xr8, $xr10, $xr12 , $xr8
158- xvfmadd.d $xr9, $xr11 , $xr13, $xr9
143+ xvfmadd.d $xr8, $xr10, $xr11 , $xr8
144+ xvfmadd.d $xr9, $xr12 , $xr13, $xr9
159145#else
160146 XVFMADD $xr8, $xr0, $xr4, $xr8
161147#endif
162148 bnez I, .L13
163149 .align 3
164150.L14:
165151 /* store dot in s1 $f8 */
166- #ifdef DSDOT
167152 xvfadd.d $xr8, $xr8, $xr9
168- fsub .s s2, s2, s2 /* set s2 to 0.0 */
153+ fsub .d s2, s2, s2 /* set s2 to 0.0 */
169154 xvpermi.q $xr0, $xr8, 0x1
170155 vfadd.d $vr8, $vr8, $vr0
171156 vpackod.d $vr0, $vr8, $vr8
172157 vfadd.d $vr8, $vr8, $vr0
173- #else
174- XVFADD $xr8, $xr8, $xr9
175- SUB s2, s2, s2 /* set s2 to 0.0 */
176- xvpermi.q $xr0, $xr8, 0x1
177- VFADD $vr8, $vr8, $vr0
178- vpackod.d $vr0, $vr8, $vr8
179- #ifdef DOUBLE
180- VFADD $vr8, $vr8, $vr0
181- #else
182- VFADD $vr8, $vr8, $vr0
183- vpackod.w $vr0, $vr8, $vr8
184- VFADD $vr8, $vr8, $vr0
185- #endif /* defined DOUBLE */
186- #endif /* defined DSDOT */
187158 .align 3
188159.L15:
189160#ifdef DOUBLE
@@ -197,7 +168,7 @@ PROLOGUE
197168 /* FLOAT: 1~7 ; DOUBLE: 1~3 */
198169 LD a1, X, 0
199170 LD b1, Y, 0
200- #ifdef DSDOT
171+ #ifndef DOUBLE
201172 fcvt.d.s a1, a1
202173 fcvt.d.s b1, b1
203174 fmadd.d s1, b1, a1, s1
@@ -240,7 +211,7 @@ PROLOGUE
240211 add .d X, X, INCX
241212 LD b1, Y, 0 * SIZE
242213 add .d Y, Y, INCY
243- #ifdef DSDOT
214+ #ifndef DOUBLE
244215 fcvt.d.s a1, a1
245216 fcvt.d.s b1, b1
246217 fmadd.d s1, b1, a1, s1
@@ -252,7 +223,7 @@ PROLOGUE
252223 add .d X, X, INCX
253224 LD b1, Y, 0 * SIZE
254225 add .d Y, Y, INCY
255- #ifdef DSDOT
226+ #ifndef DOUBLE
256227 fcvt.d.s a1, a1
257228 fcvt.d.s b1, b1
258229 fmadd.d s2, b1, a1, s2
@@ -264,7 +235,7 @@ PROLOGUE
264235 add .d X, X, INCX
265236 LD b1, Y, 0 * SIZE
266237 add .d Y, Y, INCY
267- #ifdef DSDOT
238+ #ifndef DOUBLE
268239 fcvt.d.s a1, a1
269240 fcvt.d.s b1, b1
270241 fmadd.d s1, b1, a1, s1
@@ -276,7 +247,7 @@ PROLOGUE
276247 add .d X, X, INCX
277248 LD b1, Y, 0 * SIZE
278249 add .d Y, Y, INCY
279- #ifdef DSDOT
250+ #ifndef DOUBLE
280251 fcvt.d.s a1, a1
281252 fcvt.d.s b1, b1
282253 fmadd.d s2, b1, a1, s2
@@ -288,7 +259,7 @@ PROLOGUE
288259 add .d X, X, INCX
289260 LD b1, Y, 0 * SIZE
290261 add .d Y, Y, INCY
291- #ifdef DSDOT
262+ #ifndef DOUBLE
292263 fcvt.d.s a1, a1
293264 fcvt.d.s b1, b1
294265 fmadd.d s1, b1, a1, s1
@@ -300,7 +271,7 @@ PROLOGUE
300271 add .d X, X, INCX
301272 LD b1, Y, 0 * SIZE
302273 add .d Y, Y, INCY
303- #ifdef DSDOT
274+ #ifndef DOUBLE
304275 fcvt.d.s a1, a1
305276 fcvt.d.s b1, b1
306277 fmadd.d s2, b1, a1, s2
@@ -312,7 +283,7 @@ PROLOGUE
312283 add .d X, X, INCX
313284 LD b1, Y, 0 * SIZE
314285 add .d Y, Y, INCY
315- #ifdef DSDOT
286+ #ifndef DOUBLE
316287 fcvt.d.s a1, a1
317288 fcvt.d.s b1, b1
318289 fmadd.d s1, b1, a1, s1
@@ -325,7 +296,7 @@ PROLOGUE
325296 LD b1, Y, 0 * SIZE
326297 add .d Y, Y, INCY
327298 addi.d I, I, -1
328- #ifdef DSDOT
299+ #ifndef DOUBLE
329300 fcvt.d.s a1, a1
330301 fcvt.d.s b1, b1
331302 fmadd.d s2, b1, a1, s2
@@ -346,7 +317,7 @@ PROLOGUE
346317 LD b1, Y, 0 * SIZE
347318 add .d Y, Y, INCY
348319 addi.d I, I, -1
349- #ifdef DSDOT
320+ #ifndef DOUBLE
350321 fcvt.d.s a1, a1
351322 fcvt.d.s b1, b1
352323 fmadd.d s1, b1, a1, s1
@@ -357,12 +328,13 @@ PROLOGUE
357328 .align 3
358329
359330.L999:
360- #ifdef DSDOT
361331 fadd .d $f0, s1, s2
332+ move $r4, $r17
333+ #if defined(DOUBLE)
334+ #elif defined(DSDOT)
362335#else
363- ADD $f0, s1, s2
336+ fcvt.s.d $f0, $f0
364337#endif
365- move $r4, $r17
366338 jirl $r0, $r1, 0x0
367339
368340EPILOGUE
0 commit comments