@@ -63,42 +63,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6363 bge $r0, N, .L999
6464 bge $r0, INCX, .L999
6565 li.d TEMP, 1
66- li.w I, -1
6766 slli.d TEMP, TEMP, ZBASE_SHIFT
6867 slli.d INCX, INCX, ZBASE_SHIFT
69- xvreplgr2vr.w neg1, I
70- xvffint.s.w neg1, neg1
7168 srai.d I, N, 3
7269 bne INCX, TEMP, .L20
7370 bge $r0, I, .L23
7471 .align 3
7572
7673.L10:
77- xvld VX0, X, 0 * SIZE
78- xvld VX1, X, 8 * SIZE
79- addi.d I, I, -1
74+ xvld VX0, X, 0
75+ xvld VX1, X, 32
76+ #ifdef DOUBLE
77+ xvpickev.d x1, VX1, VX0
78+ xvpickod.d x2, VX1, VX0
79+ #else
8080 xvpickev.w x1, VX1, VX0
8181 xvpickod.w x2, VX1, VX0
82- xvfmul.s x3, neg1, x1
83- xvfmul.s x4, neg1, x2
84- xvfcmp.clt.s VT0, x1, res0
85- xvfcmp.clt.s VT1, x2, res0
86- xvbitsel.v x1, x1, x3, VT0
87- xvbitsel.v x2, x2, x4, VT1
82+ #endif
83+ XVFSUB x3, res0, x1
84+ XVFSUB x4, res0, x2
85+ XVFMAX x1, x1, x3
86+ XVFMAX x2, x2, x4
87+ XVFADD VM1, x1, x2
88+ XVFMAX VM0, VM0, VM1
89+ #ifdef DOUBLE
90+ xvld VX0, X, 64
91+ xvld VX1, X, 96
92+ xvpickev.d x1, VX1, VX0
93+ xvpickod.d x2, VX1, VX0
94+ XVFSUB x3, res0, x1
95+ XVFSUB x4, res0, x2
96+ XVFMAX x1, x1, x3
97+ XVFMAX x2, x2, x4
98+ XVFADD VM1, x1, x2
99+ XVFMAX VM0, VM0, VM1
100+ #endif
101+ addi.d I, I, -1
88102 addi.d X, X, 16 * SIZE
89- xvfadd.s VM1, x1, x2
90- xvfmax.s VM0, VM0, VM1
91103 blt $r0, I, .L10
92104 .align 3
93105
94106.L11:
107+ #ifdef DOUBLE
108+ xvpickve.d x1, VM0, 0
109+ xvpickve.d x2, VM0, 1
110+ XVFMAX VM0, x1, x2
111+ #else
95112 xvpickve.w x1, VM0, 0
96113 xvpickve.w x2, VM0, 1
97114 xvpickve.w x3, VM0, 2
98115 xvpickve.w x4, VM0, 3
99- xvfmax.s VM1, x1, x2
100- xvfmax.s VM0, x3, x4
101- xvfmax.s VM0, VM0, VM1
116+ XVFMAX VM0, x1, x2
117+ XVFMAX VM1, x3, x4
118+ XVFMAX VM0, VM0, VM1
119+ #endif
102120 b .L23
103121 .align 3
104122
@@ -107,66 +125,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
107125 .align 3
108126
109127.L21:
110- fld .s t1, X, 0 * SIZE
111- fld .s t2, X, 1 * SIZE
128+ LD t1, X, 0 * SIZE
129+ LD t2, X, 1 * SIZE
112130 add .d X, X, INCX
113- fld .s t3, X, 0 * SIZE
114- fld .s t4, X, 1 * SIZE
131+ LD t3, X, 0 * SIZE
132+ LD t4, X, 1 * SIZE
115133 add .d X, X, INCX
116- fabs .s t1, t1
117- fabs .s t2, t2
118- fabs .s t3, t3
119- fabs .s t4, t4
120- fadd .s t1, t1, t2
121- fadd .s t3, t3, t4
122- fmax.s s1, t1, t3
123- fld .s t1, X, 0 * SIZE
124- fld .s t2, X, 1 * SIZE
134+ FABS t1, t1
135+ FABS t2, t2
136+ FABS t3, t3
137+ FABS t4, t4
138+ ADD t1, t1, t2
139+ ADD t3, t3, t4
140+ FMAX s1, t1, t3
141+ LD t1, X, 0 * SIZE
142+ LD t2, X, 1 * SIZE
125143 add .d X, X, INCX
126- fld .s t3, X, 0 * SIZE
127- fld .s t4, X, 1 * SIZE
144+ LD t3, X, 0 * SIZE
145+ LD t4, X, 1 * SIZE
128146 add .d X, X, INCX
129- fabs .s t1, t1
130- fabs .s t2, t2
131- fabs .s t3, t3
132- fabs .s t4, t4
133- fadd .s t1, t1, t2
134- fadd .s t3, t3, t4
135- fmax.s s1, t1, t3
136- fld .s t1, X, 0 * SIZE
137- fld .s t2, X, 1 * SIZE
147+ FABS t1, t1
148+ FABS t2, t2
149+ FABS t3, t3
150+ FABS t4, t4
151+ ADD t1, t1, t2
152+ ADD t3, t3, t4
153+ FMAX s1, t1, t3
154+ LD t1, X, 0 * SIZE
155+ LD t2, X, 1 * SIZE
138156 add .d X, X, INCX
139- fld .s t3, X, 0 * SIZE
140- fld .s t4, X, 1 * SIZE
157+ LD t3, X, 0 * SIZE
158+ LD t4, X, 1 * SIZE
141159 add .d X, X, INCX
142- fabs .s t1, t1
143- fabs .s t2, t2
144- fabs .s t3, t3
145- fabs .s t4, t4
160+ FABS t1, t1
161+ FABS t2, t2
162+ FABS t3, t3
163+ FABS t4, t4
146164 addi.d I, I, -1
147- fadd .s t1, t1, t2
148- fadd .s t3, t3, t4
149- fmax.s s3, t1, t3
150- fld .s t1, X, 0 * SIZE
151- fld .s t2, X, 1 * SIZE
165+ ADD t1, t1, t2
166+ ADD t3, t3, t4
167+ FMAX s3, t1, t3
168+ LD t1, X, 0 * SIZE
169+ LD t2, X, 1 * SIZE
152170 add .d X, X, INCX
153- fld .s t3, X, 0 * SIZE
154- fld .s t4, X, 1 * SIZE
171+ LD t3, X, 0 * SIZE
172+ LD t4, X, 1 * SIZE
155173 add .d X, X, INCX
156- fabs .s t1, t1
157- fabs .s t2, t2
158- fabs .s t3, t3
159- fabs .s t4, t4
160- fadd .s t1, t1, t2
161- fadd .s t3, t3, t4
162- fmax.s s4, t1, t3
174+ FABS t1, t1
175+ FABS t2, t2
176+ FABS t3, t3
177+ FABS t4, t4
178+ ADD t1, t1, t2
179+ ADD t3, t3, t4
180+ FMAX s4, t1, t3
163181 blt $r0, I, .L21
164182 .align 3
165183
166184.L22:
167- fmax.s s1, s1, s2
168- fmax.s s3, s3, s4
169- fmax.s s1, s1, s3
185+ FMAX s1, s1, s2
186+ FMAX s3, s3, s4
187+ FMAX s1, s1, s3
170188 .align 3
171189
172190.L23: //N<8
@@ -182,12 +200,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
182200 FABS a1, a1
183201 ADD a0, a0, a1
184202 add .d X, X, INCX
185- fmax.s s1, a0, s1
203+ FMAX s1, a0, s1
186204 blt $r0, I, .L24
187205 .align 3
188206
189207.L999:
190- fmov.s $f0, $f22
208+ MOV $f0, $f22
191209 jirl $r0, $r1, 0x0
192210 .align 3
193211
0 commit comments