Skip to content

Commit dbf4ed8

Browse files
committed
xtensa: add helpers for division, remainder and shifts
Don't rely on libgcc presence, build own versions of the helpers with correct ABI. Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
1 parent 8c9ab55 commit dbf4ed8

10 files changed

Lines changed: 539 additions & 0 deletions

File tree

arch/xtensa/include/asm/asmmacro.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,40 @@
191191
#endif
192192
.endm
193193

194+
.macro do_nsau cnt, val, tmp, a
195+
#if XCHAL_HAVE_NSA
196+
nsau \cnt, \val
197+
#else
198+
mov \a, \val
199+
movi \cnt, 0
200+
extui \tmp, \a, 16, 16
201+
bnez \tmp, 0f
202+
movi \cnt, 16
203+
slli \a, \a, 16
204+
0:
205+
extui \tmp, \a, 24, 8
206+
bnez \tmp, 1f
207+
addi \cnt, \cnt, 8
208+
slli \a, \a, 8
209+
1:
210+
movi \tmp, __nsau_data
211+
extui \a, \a, 24, 8
212+
add \tmp, \tmp, \a
213+
l8ui \tmp, \tmp, 0
214+
add \cnt, \cnt, \tmp
215+
#endif /* !XCHAL_HAVE_NSA */
216+
.endm
217+
218+
.macro do_abs dst, src, tmp
219+
#if XCHAL_HAVE_ABS
220+
abs \dst, \src
221+
#else
222+
neg \tmp, \src
223+
movgez \tmp, \src, \src
224+
mov \dst, \tmp
225+
#endif
226+
.endm
227+
194228
#define XTENSA_STACK_ALIGNMENT 16
195229

196230
#if defined(__XTENSA_WINDOWED_ABI__)

arch/xtensa/lib/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,7 @@
44
#
55

66
lib-y += memcopy.o memset.o checksum.o \
7+
ashldi3.o ashrdi3.o lshrdi3.o \
8+
divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o \
79
usercopy.o strncpy_user.o strnlen_user.o
810
lib-$(CONFIG_PCI) += pci-auto.o

arch/xtensa/lib/ashldi3.S

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
2+
#include <linux/linkage.h>
3+
#include <asm/asmmacro.h>
4+
#include <asm/core.h>
5+
6+
#ifdef __XTENSA_EB__
7+
#define uh a2
8+
#define ul a3
9+
#else
10+
#define uh a3
11+
#define ul a2
12+
#endif /* __XTENSA_EB__ */
13+
14+
ENTRY(__ashldi3)
15+
16+
abi_entry_default
17+
ssl a4
18+
bgei a4, 32, .Llow_only
19+
src uh, uh, ul
20+
sll ul, ul
21+
abi_ret_default
22+
23+
.Llow_only:
24+
sll uh, ul
25+
movi ul, 0
26+
abi_ret_default
27+
28+
ENDPROC(__ashldi3)

arch/xtensa/lib/ashrdi3.S

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
2+
#include <linux/linkage.h>
3+
#include <asm/asmmacro.h>
4+
#include <asm/core.h>
5+
6+
#ifdef __XTENSA_EB__
7+
#define uh a2
8+
#define ul a3
9+
#else
10+
#define uh a3
11+
#define ul a2
12+
#endif /* __XTENSA_EB__ */
13+
14+
ENTRY(__ashrdi3)
15+
16+
abi_entry_default
17+
ssr a4
18+
bgei a4, 32, .Lhigh_only
19+
src ul, uh, ul
20+
sra uh, uh
21+
abi_ret_default
22+
23+
.Lhigh_only:
24+
sra ul, uh
25+
srai uh, uh, 31
26+
abi_ret_default
27+
28+
ENDPROC(__ashrdi3)

arch/xtensa/lib/divsi3.S

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
2+
#include <linux/linkage.h>
3+
#include <asm/asmmacro.h>
4+
#include <asm/core.h>
5+
6+
ENTRY(__divsi3)
7+
8+
abi_entry_default
9+
#if XCHAL_HAVE_DIV32
10+
quos a2, a2, a3
11+
#else
12+
xor a7, a2, a3 /* sign = dividend ^ divisor */
13+
do_abs a6, a2, a4 /* udividend = abs (dividend) */
14+
do_abs a3, a3, a4 /* udivisor = abs (divisor) */
15+
bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
16+
do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */
17+
do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */
18+
bgeu a5, a4, .Lspecial
19+
20+
sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
21+
ssl a4
22+
sll a3, a3 /* udivisor <<= count */
23+
movi a2, 0 /* quotient = 0 */
24+
25+
/* test-subtract-and-shift loop; one quotient bit on each iteration */
26+
#if XCHAL_HAVE_LOOPS
27+
loopnez a4, .Lloopend
28+
#endif /* XCHAL_HAVE_LOOPS */
29+
.Lloop:
30+
bltu a6, a3, .Lzerobit
31+
sub a6, a6, a3
32+
addi a2, a2, 1
33+
.Lzerobit:
34+
slli a2, a2, 1
35+
srli a3, a3, 1
36+
#if !XCHAL_HAVE_LOOPS
37+
addi a4, a4, -1
38+
bnez a4, .Lloop
39+
#endif /* !XCHAL_HAVE_LOOPS */
40+
.Lloopend:
41+
42+
bltu a6, a3, .Lreturn
43+
addi a2, a2, 1 /* increment if udividend >= udivisor */
44+
.Lreturn:
45+
neg a5, a2
46+
movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */
47+
abi_ret_default
48+
49+
.Lle_one:
50+
beqz a3, .Lerror
51+
neg a2, a6 /* if udivisor == 1, then return... */
52+
movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */
53+
abi_ret_default
54+
55+
.Lspecial:
56+
bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
57+
movi a2, 1
58+
movi a4, -1
59+
movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */
60+
abi_ret_default
61+
62+
.Lerror:
63+
/* Divide by zero: Use an illegal instruction to force an exception.
64+
The subsequent "DIV0" string can be recognized by the exception
65+
handler to identify the real cause of the exception. */
66+
ill
67+
.ascii "DIV0"
68+
69+
.Lreturn0:
70+
movi a2, 0
71+
#endif /* XCHAL_HAVE_DIV32 */
72+
abi_ret_default
73+
74+
ENDPROC(__divsi3)

arch/xtensa/lib/lshrdi3.S

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
2+
#include <linux/linkage.h>
3+
#include <asm/asmmacro.h>
4+
#include <asm/core.h>
5+
6+
#ifdef __XTENSA_EB__
7+
#define uh a2
8+
#define ul a3
9+
#else
10+
#define uh a3
11+
#define ul a2
12+
#endif /* __XTENSA_EB__ */
13+
14+
ENTRY(__lshrdi3)
15+
16+
abi_entry_default
17+
ssr a4
18+
bgei a4, 32, .Lhigh_only
19+
src ul, uh, ul
20+
srl uh, uh
21+
abi_ret_default
22+
23+
.Lhigh_only:
24+
srl ul, uh
25+
movi uh, 0
26+
abi_ret_default
27+
28+
ENDPROC(__lshrdi3)

arch/xtensa/lib/modsi3.S

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
2+
#include <linux/linkage.h>
3+
#include <asm/asmmacro.h>
4+
#include <asm/core.h>
5+
6+
ENTRY(__modsi3)
7+
8+
abi_entry_default
9+
#if XCHAL_HAVE_DIV32
10+
rems a2, a2, a3
11+
#else
12+
mov a7, a2 /* save original (signed) dividend */
13+
do_abs a2, a2, a4 /* udividend = abs (dividend) */
14+
do_abs a3, a3, a4 /* udivisor = abs (divisor) */
15+
bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
16+
do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */
17+
do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */
18+
bgeu a5, a4, .Lspecial
19+
20+
sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
21+
ssl a4
22+
sll a3, a3 /* udivisor <<= count */
23+
24+
/* test-subtract-and-shift loop */
25+
#if XCHAL_HAVE_LOOPS
26+
loopnez a4, .Lloopend
27+
#endif /* XCHAL_HAVE_LOOPS */
28+
.Lloop:
29+
bltu a2, a3, .Lzerobit
30+
sub a2, a2, a3
31+
.Lzerobit:
32+
srli a3, a3, 1
33+
#if !XCHAL_HAVE_LOOPS
34+
addi a4, a4, -1
35+
bnez a4, .Lloop
36+
#endif /* !XCHAL_HAVE_LOOPS */
37+
.Lloopend:
38+
39+
.Lspecial:
40+
bltu a2, a3, .Lreturn
41+
sub a2, a2, a3 /* subtract again if udividend >= udivisor */
42+
.Lreturn:
43+
bgez a7, .Lpositive
44+
neg a2, a2 /* if (dividend < 0), return -udividend */
45+
.Lpositive:
46+
abi_ret_default
47+
48+
.Lle_one:
49+
bnez a3, .Lreturn0
50+
51+
/* Divide by zero: Use an illegal instruction to force an exception.
52+
The subsequent "DIV0" string can be recognized by the exception
53+
handler to identify the real cause of the exception. */
54+
ill
55+
.ascii "DIV0"
56+
57+
.Lreturn0:
58+
movi a2, 0
59+
#endif /* XCHAL_HAVE_DIV32 */
60+
abi_ret_default
61+
62+
ENDPROC(__modsi3)
63+
64+
#if !XCHAL_HAVE_NSA
65+
.section .rodata
66+
.align 4
67+
.global __nsau_data
68+
.type __nsau_data, @object
69+
__nsau_data:
70+
.byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
71+
.byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
72+
.byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
73+
.byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
74+
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
75+
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
76+
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
77+
.byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
78+
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
79+
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
80+
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
81+
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
82+
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
83+
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
84+
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
85+
.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
86+
.size __nsau_data, . - __nsau_data
87+
#endif /* !XCHAL_HAVE_NSA */

0 commit comments

Comments
 (0)