Skip to content

Commit ac460eb

Browse files
committed
loongarch: Fixed i{c/z}amin LSX opt
1 parent 56d114b commit ac460eb

1 file changed

Lines changed: 99 additions & 17 deletions

File tree

kernel/loongarch64/icamin_lsx.S

Lines changed: 99 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7070
LD a1, X, 1 * SIZE
7171
FABS a0, a0
7272
FABS a1, a1
73-
ADD s1, a1, a0
74-
vreplvei.w VM0, VM0, 0
73+
ADD s1, a1, a0 // Initialization value
7574
vxor.v VI3, VI3, VI3 // 0
7675
#ifdef DOUBLE
7776
li.d I, -1
7877
vreplgr2vr.d VI4, I
7978
vffint.d.l VI4, VI4 // -1
80-
bne INCX, TEMP, .L20
79+
bne INCX, TEMP, .L20 // incx != 1
80+
81+
// Init Index
8182
addi.d i0, i0, 1
82-
srai.d I, N, 2
83-
bge $r0, I, .L21
84-
slli.d i0, i0, 1 //2
83+
slli.d i0, i0, 1 // 2
8584
vreplgr2vr.d VINC4, i0
8685
addi.d i0, i0, -3
8786
vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
@@ -91,14 +90,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
9190
vinsgr2vr.d VI0, i0, 0 //1
9291
addi.d i0, i0, 1
9392
vinsgr2vr.d VI0, i0, 1 //2
93+
94+
srai.d I, N, 2
95+
bge $r0, I, .L21
96+
97+
// Init VM0
98+
vld VX0, X, 0 * SIZE
99+
vld VX1, X, 2 * SIZE
100+
vpickev.d x1, VX1, VX0
101+
vpickod.d x2, VX1, VX0
102+
vfmul.d x3, VI4, x1
103+
vfmul.d x4, VI4, x2
104+
vfcmp.clt.d VT0, x1, VI3
105+
vfcmp.clt.d VINC8, x2, VI3
106+
vbitsel.v x1, x1, x3, VT0
107+
vbitsel.v x2, x2, x4, VINC8
108+
vfadd.d VM0, x1, x2
94109
#else
95110
li.w I, -1
96111
vreplgr2vr.w VI4, I
97112
vffint.s.w VI4, VI4 // -1
98-
bne INCX, TEMP, .L20
113+
bne INCX, TEMP, .L20 // incx != 1
114+
115+
// Init Index
99116
addi.w i0, i0, 1
100-
srai.d I, N, 2
101-
bge $r0, I, .L21
102117
slli.w i0, i0, 2 //4
103118
vreplgr2vr.w VINC4, i0
104119
addi.w i0, i0, -7
@@ -117,6 +132,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
117132
vinsgr2vr.w VI0, i0, 2 //3
118133
addi.w i0, i0, 1
119134
vinsgr2vr.w VI0, i0, 3 //4
135+
136+
srai.d I, N, 2
137+
bge $r0, I, .L21
138+
139+
// Init VM0
140+
vld VX0, X, 0 * SIZE
141+
vld VX1, X, 4 * SIZE
142+
vpickev.w x1, VX1, VX0
143+
vpickod.w x2, VX1, VX0
144+
vfmul.s x3, VI4, x1
145+
vfmul.s x4, VI4, x2
146+
vfcmp.clt.s VT0, x1, VI3
147+
vfcmp.clt.s VINC8, x2, VI3
148+
vbitsel.v x1, x1, x3, VT0
149+
vbitsel.v x2, x2, x4, VINC8
150+
vfadd.s VM0, x1, x2
120151
#endif
121152
.align 3
122153

@@ -139,6 +170,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
139170
vfcmp.ceq.d VT0, x3, VM0
140171
vbitsel.v VM0, x3, VM0, VT0
141172
vbitsel.v VI0, VI1, VI0, VT0
173+
142174
vld VX0, X, 4 * SIZE
143175
vadd.d VI1, VI1, VINC4
144176
vld VX1, X, 6 * SIZE
@@ -206,9 +238,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
206238
.L20: // INCX!=1
207239
#ifdef DOUBLE
208240
addi.d i0, i0, 1
209-
srai.d I, N, 2
210-
bge $r0, I, .L21
211-
slli.d i0, i0, 1 //2
241+
// Init index
242+
slli.d i0, i0, 1 //2
212243
vreplgr2vr.d VINC4, i0
213244
addi.d i0, i0, -3
214245
vinsgr2vr.d VI1, i0, 0 //initialize the index value for vectorization
@@ -218,10 +249,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
218249
vinsgr2vr.d VI0, i0, 0 //1
219250
addi.d i0, i0, 1
220251
vinsgr2vr.d VI0, i0, 1 //2
252+
253+
srai.d I, N, 2
254+
bge $r0, I, .L21 // N < 4
255+
256+
// Init VM0
257+
ld.d t1, X, 0 * SIZE
258+
ld.d t2, X, 1 * SIZE
259+
add.d i1, X, INCX
260+
ld.d t3, i1, 0 * SIZE
261+
ld.d t4, i1, 1 * SIZE
262+
add.d i1, i1, INCX
263+
vinsgr2vr.d x1, t1, 0
264+
vinsgr2vr.d x2, t2, 0
265+
vinsgr2vr.d x1, t3, 1
266+
vinsgr2vr.d x2, t4, 1
267+
vfmul.d x3, VI4, x1
268+
vfmul.d x4, VI4, x2
269+
vfcmp.clt.d VT0, x1, VI3
270+
vfcmp.clt.d VINC8, x2, VI3
271+
vbitsel.v x1, x1, x3, VT0
272+
vbitsel.v x2, x2, x4, VINC8
273+
vfadd.d VM0, x1, x2
221274
#else
222275
addi.w i0, i0, 1
223-
srai.d I, N, 2
224-
bge $r0, I, .L21
276+
277+
// Init index
225278
slli.w i0, i0, 2 //4
226279
vreplgr2vr.w VINC4, i0
227280
addi.w i0, i0, -7
@@ -240,6 +293,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
240293
vinsgr2vr.w VI0, i0, 2 //3
241294
addi.w i0, i0, 1
242295
vinsgr2vr.w VI0, i0, 3 //4
296+
297+
srai.d I, N, 2
298+
bge $r0, I, .L21 // N < 4
299+
300+
// Init VM0
301+
ld.w t1, X, 0 * SIZE
302+
ld.w t2, X, 1 * SIZE
303+
add.d i1, X, INCX
304+
ld.w t3, i1, 0 * SIZE
305+
ld.w t4, i1, 1 * SIZE
306+
add.d i1, i1, INCX
307+
vinsgr2vr.w x1, t1, 0
308+
vinsgr2vr.w x2, t2, 0
309+
vinsgr2vr.w x1, t3, 1
310+
vinsgr2vr.w x2, t4, 1
311+
ld.w t1, i1, 0 * SIZE
312+
ld.w t2, i1, 1 * SIZE
313+
add.d i1, i1, INCX
314+
ld.w t3, i1, 0 * SIZE
315+
ld.w t4, i1, 1 * SIZE
316+
add.d i1, i1, INCX
317+
vinsgr2vr.w x1, t1, 2
318+
vinsgr2vr.w x2, t2, 2
319+
vinsgr2vr.w x1, t3, 3
320+
vinsgr2vr.w x2, t4, 3
321+
vfcmp.clt.s VT0, x1, VI3
322+
vfcmp.clt.s VINC8, x2, VI3
323+
vbitsel.v x1, x1, x3, VT0
324+
vbitsel.v x2, x2, x4, VINC8
325+
vfadd.s VM0, x1, x2
243326
#endif
244327
.align 3
245328

@@ -300,8 +383,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
300383
vinsgr2vr.w x2, t2, 2
301384
vinsgr2vr.w x1, t3, 3
302385
vinsgr2vr.w x2, t4, 3
303-
vpickev.w x1, VX1, VX0
304-
vpickod.w x2, VX1, VX0
305386
#endif
306387
addi.d I, I, -1
307388
VFMUL x3, VI4, x1
@@ -358,12 +439,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
358439
#ifdef DOUBLE
359440
vfmina.d VM0, x1, x2
360441
vfcmp.ceq.d VT0, x1, VM0
442+
vbitsel.v VI0, VI2, VI1, VT0
361443
#else
362444
fcmp.ceq.d $fcc0, $f15, $f10
363445
bceqz $fcc0, .L27
364446
vfcmp.clt.s VT0, VI2, VI0
365-
#endif
366447
vbitsel.v VI0, VI0, VI2, VT0
448+
#endif
367449
.align 3
368450

369451
.L27:

0 commit comments

Comments
 (0)