Skip to content

Commit 6159cff

Browse files
committed
loongarch: Fixed i{s/c/z}amin LASX opt
1 parent 7d75591 commit 6159cff

2 files changed

Lines changed: 141 additions & 39 deletions

File tree

kernel/loongarch64/iamin_lasx.S

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
144144
xvfmina.d VM1, VM0, VM1
145145
#else
146146
addi.d I, I, -1
147-
xvadd.w VI2, VI1, VINC8
147+
xvadd.w VI1, VI1, VINC8
148+
xvor.v VI2, VI1, VI1
148149
xvfmina.s VM1, VX0, VM0
149150
#endif
150151
XVCMPEQ VT0, VM0, VM1
@@ -189,6 +190,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
189190
XVFMINA VM0, VM0, VM1
190191
XVCMPEQ VT0, VM0, VM1
191192
xvbitsel.v VI0, VINC8, VINC4, VT0
193+
// $f9: x1
192194
fcmp.ceq.d $fcc0, $f15, $f9
193195
bceqz $fcc0, .L26
194196
XVCMPLT VT0, VI1, VI0
@@ -357,7 +359,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
357359
xvinsgr2vr.w VX0, t2, 5
358360
xvinsgr2vr.w VX0, t3, 6
359361
xvinsgr2vr.w VX0, t4, 7
360-
xvadd.w VI2, VI1, VINC8
362+
xvadd.w VI1, VI1, VINC8
363+
xvor.v VI2, VI1, VI1
361364
xvfmina.s VM1, VX0, VM0
362365
xvfcmp.ceq.s VT0, VM1, VM0
363366
#endif
@@ -393,7 +396,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
393396
movfr2gr.d i0, $f20
394397
.align 3
395398
#else
396-
fmov.s $f16, $f20
399+
fmov.s $f7, $f20
397400
.align 3
398401

399402
.L252:
@@ -449,9 +452,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
449452
.L292:
450453
xvfmina.s VM0, VX0, VM0
451454
xvfcmp.ceq.s VT0, VM0, VX0
452-
xvbitsel.v VI0, VI0, VI1, VT0
455+
xvbitsel.v VI0, VI0, $xr7, VT0
453456
movfr2gr.s i0, $f20
454-
455457
#endif
456458

457459
.L21: // N<8

kernel/loongarch64/icamin_lasx.S

Lines changed: 134 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7272
FABS a1, a1
7373
ADD s1, a1, a0
7474
#ifdef DOUBLE
75-
xvreplve0.d VM0, VM0
7675
xvxor.v VI3, VI3, VI3 // 0
7776
li.d I, -1
7877
xvreplgr2vr.d VI4, I
7978
xvffint.d.l VI4, VI4 // -1
8079
bne INCX, TEMP, .L20
80+
// Init VM0
81+
xvreplve0.d VM0, VM0
82+
xvld VX0, X, 0 * SIZE
83+
xvld VX1, X, 4 * SIZE
84+
xvpickev.d x1, VX1, VX0
85+
xvpickod.d x2, VX1, VX0
86+
xvfmul.d x3, VI4, x1
87+
xvfmul.d x4, VI4, x2
88+
xvfcmp.clt.d VT0, x1, VI3
89+
xvfcmp.clt.d VINC8, x2, VI3
90+
xvbitsel.v x1, x1, x3, VT0
91+
xvbitsel.v x2, x2, x4, VINC8
92+
xvfadd.d VM0, x1, x2
93+
8194
addi.d i0, i0, 1
8295
srai.d I, N, 2
8396
bge $r0, I, .L21
@@ -100,12 +113,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
100113
addi.d i0, i0, 2
101114
xvinsgr2vr.d VI0, i0, 3 //4
102115
#else
103-
xvreplve0.w VM0, VM0
104116
xvxor.v VI3, VI3, VI3 // 0
105117
li.w I, -1
106118
xvreplgr2vr.w VI4, I
107119
xvffint.s.w VI4, VI4 // -1
108120
bne INCX, TEMP, .L20
121+
// Init VM0
122+
xvld VX0, X, 0 * SIZE
123+
xvld VX1, X, 8 * SIZE
124+
xvpickev.w x1, VX1, VX0
125+
xvpickod.w x2, VX1, VX0
126+
xvfmul.s x3, VI4, x1
127+
xvfmul.s x4, VI4, x2
128+
xvfcmp.clt.s VT0, x1, VI3
129+
xvfcmp.clt.s VINC4, x2, VI3
130+
xvbitsel.v x1, x1, x3, VT0
131+
xvbitsel.v x2, x2, x4, VINC4
132+
xvfadd.s VM0, x1, x2
133+
109134
addi.w i0, i0, 1
110135
srai.d I, N, 3
111136
bge $r0, I, .L21
@@ -160,6 +185,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
160185
xvfcmp.clt.d VINC8, x2, VI3
161186
xvbitsel.v x1, x1, x3, VT0
162187
xvbitsel.v x2, x2, x4, VINC8
188+
addi.d X, X, 8 * SIZE
163189
#else
164190
xvadd.w VI1, VI1, VINC8
165191
xvld VX1, X, 8 * SIZE
@@ -172,11 +198,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
172198
xvfcmp.clt.s VINC4, x2, VI3
173199
xvbitsel.v x1, x1, x3, VT0
174200
xvbitsel.v x2, x2, x4, VINC4
201+
addi.d X, X, 16 * SIZE
175202
#endif
176203
XVFADD x1, x1, x2
177204
XVFMIN x3, VM0, x1
178205
XVCMPEQ VT0, x3, VM0
179-
addi.d X, X, 8 * SIZE
180206
xvbitsel.v VM0, x3, VM0, VT0
181207
xvbitsel.v VI0, VI1, VI0, VT0
182208
blt $r0, I, .L10
@@ -214,13 +240,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
214240
xvpickve.w x2, VM0, 1
215241
xvpickve.w x3, VM0, 2
216242
xvpickve.w x4, VM0, 3
217-
xvfcmp.clt.s VT0, x1, x2
243+
xvfcmp.clt.s VT0, x2, x1
218244
xvbitsel.v VM1, x1, x2, VT0
219245
xvbitsel.v VINC4, VI1, VI2, VT0
220-
xvfcmp.clt.s VT0, x3, x4
246+
xvfcmp.clt.s VT0, x4, x3
221247
xvbitsel.v VM0, x3, x4, VT0
222248
xvbitsel.v VINC8, VI3, VI4, VT0
223-
xvfcmp.clt.s VT0, VM0, VM1
249+
xvfcmp.clt.s VT0, VM1, VM0
224250
xvbitsel.v VM0, VM0, VM1, VT0
225251
xvbitsel.v VI0, VINC8, VINC4, VT0
226252
#endif
@@ -233,28 +259,105 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
233259

234260
.L20: // INCX!=1
235261
#ifdef DOUBLE
262+
// Init VM0
263+
ld.d t1, X, 0 * SIZE
264+
ld.d t2, X, 1 * SIZE
265+
add.d i1, X, INCX
266+
ld.d t3, i1, 0 * SIZE
267+
ld.d t4, i1, 1 * SIZE
268+
add.d i1, i1, INCX
269+
xvinsgr2vr.d x1, t1, 0
270+
xvinsgr2vr.d x2, t2, 0
271+
xvinsgr2vr.d x1, t3, 1
272+
xvinsgr2vr.d x2, t4, 1
273+
ld.d t1, i1, 0 * SIZE
274+
ld.d t2, i1, 1 * SIZE
275+
add.d i1, i1, INCX
276+
ld.d t3, i1, 0 * SIZE
277+
ld.d t4, i1, 1 * SIZE
278+
xvinsgr2vr.d x1, t1, 2
279+
xvinsgr2vr.d x2, t2, 2
280+
xvinsgr2vr.d x1, t3, 3
281+
xvinsgr2vr.d x2, t4, 3
282+
xvfmul.d x3, VI4, x1
283+
xvfmul.d x4, VI4, x2
284+
xvfcmp.clt.d VT0, x1, VI3
285+
xvfcmp.clt.d VINC8, x2, VI3
286+
xvbitsel.v x1, x1, x3, VT0
287+
xvbitsel.v x2, x2, x4, VINC8
288+
xvfadd.d VM0, x1, x2
289+
236290
addi.d i0, i0, 1
237291
srai.d I, N, 2
238292
bge $r0, I, .L21
239293
slli.d i0, i0, 2 //4
240294
xvreplgr2vr.d VINC4, i0
241295
addi.d i0, i0, -7
242296
xvinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
243-
addi.d i0, i0, 2
297+
addi.d i0, i0, 1
244298
xvinsgr2vr.d VI1, i0, 1
245-
addi.d i0, i0, -1
299+
addi.d i0, i0, 1
246300
xvinsgr2vr.d VI1, i0, 2
247-
addi.d i0, i0, 2
301+
addi.d i0, i0, 1
248302
xvinsgr2vr.d VI1, i0, 3
249303
addi.d i0, i0, 1
250304
xvinsgr2vr.d VI0, i0, 0 //1
251-
addi.d i0, i0, 2
252-
xvinsgr2vr.d VI0, i0, 1 //3
253-
addi.d i0, i0, -1
254-
xvinsgr2vr.d VI0, i0, 2 //2
255-
addi.d i0, i0, 2
305+
addi.d i0, i0, 1
306+
xvinsgr2vr.d VI0, i0, 1 //2
307+
addi.d i0, i0, 1
308+
xvinsgr2vr.d VI0, i0, 2 //3
309+
addi.d i0, i0, 1
256310
xvinsgr2vr.d VI0, i0, 3 //4
257311
#else
312+
// Init VM0
313+
ld.w t1, X, 0 * SIZE
314+
ld.w t2, X, 1 * SIZE
315+
add.d i1, X, INCX
316+
ld.w t3, i1, 0 * SIZE
317+
ld.w t4, i1, 1 * SIZE
318+
add.d i1, i1, INCX
319+
xvinsgr2vr.w x1, t1, 0
320+
xvinsgr2vr.w x2, t2, 0
321+
xvinsgr2vr.w x1, t3, 1
322+
xvinsgr2vr.w x2, t4, 1
323+
ld.w t1, i1, 0 * SIZE
324+
ld.w t2, i1, 1 * SIZE
325+
add.d i1, i1, INCX
326+
ld.w t3, i1, 0 * SIZE
327+
ld.w t4, i1, 1 * SIZE
328+
add.d i1, i1, INCX
329+
xvinsgr2vr.w x1, t1, 2
330+
xvinsgr2vr.w x2, t2, 2
331+
xvinsgr2vr.w x1, t3, 3
332+
xvinsgr2vr.w x2, t4, 3
333+
ld.w t1, i1, 0 * SIZE
334+
ld.w t2, i1, 1 * SIZE
335+
add.d i1, i1, INCX
336+
ld.w t3, i1, 0 * SIZE
337+
ld.w t4, i1, 1 * SIZE
338+
add.d i1, i1, INCX
339+
xvinsgr2vr.w x1, t1, 4
340+
xvinsgr2vr.w x2, t2, 4
341+
xvinsgr2vr.w x1, t3, 5
342+
xvinsgr2vr.w x2, t4, 5
343+
ld.w t1, i1, 0 * SIZE
344+
ld.w t2, i1, 1 * SIZE
345+
add.d i1, i1, INCX
346+
ld.w t3, i1, 0 * SIZE
347+
ld.w t4, i1, 1 * SIZE
348+
add.d i1, i1, INCX
349+
xvinsgr2vr.w x1, t1, 6
350+
xvinsgr2vr.w x2, t2, 6
351+
xvinsgr2vr.w x1, t3, 7
352+
xvinsgr2vr.w x2, t4, 7
353+
xvfmul.s x3, VI4, x1
354+
xvfmul.s x4, VI4, x2
355+
xvfcmp.clt.s VT0, x1, VI3
356+
xvfcmp.clt.s VINC8, x2, VI3
357+
xvbitsel.v x1, x1, x3, VT0
358+
xvbitsel.v x2, x2, x4, VINC8
359+
xvfadd.s VM0, x1, x2
360+
258361
addi.w i0, i0, 1
259362
srai.d I, N, 3
260363
bge $r0, I, .L21
@@ -264,31 +367,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
264367
xvinsgr2vr.w VI1, i0, 0 //initialize the index value for vectorization
265368
addi.w i0, i0, 1
266369
xvinsgr2vr.w VI1, i0, 1
267-
addi.w i0, i0, 3
370+
addi.w i0, i0, 1
268371
xvinsgr2vr.w VI1, i0, 2
269372
addi.w i0, i0, 1
270373
xvinsgr2vr.w VI1, i0, 3
271-
addi.w i0, i0, -3
374+
addi.w i0, i0, 1
272375
xvinsgr2vr.w VI1, i0, 4
273376
addi.w i0, i0, 1
274377
xvinsgr2vr.w VI1, i0, 5
275-
addi.w i0, i0, 3
378+
addi.w i0, i0, 1
276379
xvinsgr2vr.w VI1, i0, 6
277380
addi.w i0, i0, 1
278381
xvinsgr2vr.w VI1, i0, 7
279382
addi.w i0, i0, 1
280383
xvinsgr2vr.w VI0, i0, 0 //1
281384
addi.w i0, i0, 1
282385
xvinsgr2vr.w VI0, i0, 1 //2
283-
addi.w i0, i0, 3
284-
xvinsgr2vr.w VI0, i0, 2 //5
285386
addi.w i0, i0, 1
286-
xvinsgr2vr.w VI0, i0, 3 //6
287-
addi.w i0, i0, -3
288-
xvinsgr2vr.w VI0, i0, 4 //3
387+
xvinsgr2vr.w VI0, i0, 2 //3
388+
addi.w i0, i0, 1
389+
xvinsgr2vr.w VI0, i0, 3 //4
390+
addi.w i0, i0, 1
391+
xvinsgr2vr.w VI0, i0, 4 //5
392+
addi.w i0, i0, 1
393+
xvinsgr2vr.w VI0, i0, 5 //6
289394
addi.w i0, i0, 1
290-
xvinsgr2vr.w VI0, i0, 5 //4
291-
addi.w i0, i0, 3
292395
xvinsgr2vr.w VI0, i0, 6 //7
293396
addi.w i0, i0, 1
294397
xvinsgr2vr.w VI0, i0, 7 //8
@@ -350,7 +453,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
350453
xvinsgr2vr.w x2, t2, 4
351454
xvinsgr2vr.w x1, t3, 5
352455
xvinsgr2vr.w x2, t4, 5
353-
xvadd.w VI1, VI1, VINC8
354456
ld.w t1, X, 0 * SIZE
355457
ld.w t2, X, 1 * SIZE
356458
add.d X, X, INCX
@@ -361,8 +463,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
361463
xvinsgr2vr.w x2, t2, 6
362464
xvinsgr2vr.w x1, t3, 7
363465
xvinsgr2vr.w x2, t4, 7
364-
xvpickev.w x1, VX1, VX0
365-
xvpickod.w x2, VX1, VX0
366466
#endif
367467
addi.d I, I, -1
368468
XVFMUL x3, VI4, x1
@@ -410,13 +510,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
410510
xvpickve.w x2, VM0, 1
411511
xvpickve.w x3, VM0, 2
412512
xvpickve.w x4, VM0, 3
413-
xvfcmp.clt.s VT0, x1, x2
513+
xvfcmp.clt.s VT0, x2, x1
414514
xvbitsel.v VM1, x1, x2, VT0
415515
xvbitsel.v VINC4, VI1, VI2, VT0
416-
xvfcmp.clt.s VT0, x3, x4
516+
xvfcmp.clt.s VT0, x4, x3
417517
xvbitsel.v VM0, x3, x4, VT0
418518
xvbitsel.v VINC8, VI3, VI4, VT0
419-
xvfcmp.clt.s VT0, VM0, VM1
519+
xvfcmp.clt.s VT0, VM1, VM0
420520
xvbitsel.v VM0, VM0, VM1, VT0
421521
#endif
422522
xvbitsel.v VI0, VINC8, VINC4, VT0
@@ -475,13 +575,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
475575
xvpickve.w x2, VM0, 5
476576
xvpickve.w x3, VM0, 6
477577
xvpickve.w x4, VM0, 7
478-
xvfcmp.clt.s VT0, x1, x2
578+
xvfcmp.clt.s VT0, x2, x1
479579
xvbitsel.v x1, x1, x2, VT0
480580
xvbitsel.v VINC4, VI1, VI2, VT0
481-
xvfcmp.clt.s VT0, x3, x4
581+
xvfcmp.clt.s VT0, x4, x3
482582
xvbitsel.v VM0, x3, x4, VT0
483583
xvbitsel.v VINC8, VI3, VI4, VT0
484-
xvfcmp.clt.s VT0, VM0, x1
584+
xvfcmp.clt.s VT0, x1, VM0
485585
xvbitsel.v VM0, VM0, x1, VT0
486586
xvbitsel.v VI0, VINC8, VINC4, VT0
487587
fcmp.ceq.d $fcc0, $f15, $f9
@@ -512,7 +612,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
512612
.align 3
513613

514614
.L292:
515-
fcmp.clt.s $fcc0, $f15, $f13
615+
fcmp.clt.s $fcc0, $f13, $f15
516616
fsel $f15, $f15, $f13, $fcc0
517617
fsel $f20, $f20, $f16, $fcc0
518618
movfr2gr.s i0, $f20

0 commit comments

Comments
 (0)