Skip to content

Commit 75ab93a

Browse files
Merge patch series "Zbb string optimizations"
Heiko Stuebner <heiko@sntech.de> says: From: Heiko Stuebner <heiko.stuebner@vrull.eu> This series still tries to allow optimized string functions for specific extensions. The last approach of using an inline base function to hold the alternative calls did cause some issues in a number of places So instead of that we're now just using an alternative j at the beginning of the generic function to jump to a separate place inside the function itself. * b4-shazam-merge: RISC-V: add zbb support to string functions RISC-V: add infrastructure to allow different str* implementations Link: https://lore.kernel.org/r/20230113212301.3534711-1-heiko@sntech.de Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2 parents 285b6a1 + b6fcdb1 commit 75ab93a

12 files changed

Lines changed: 468 additions & 1 deletion

File tree

arch/riscv/Kconfig

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,30 @@ config RISCV_ISA_SVPBMT
416416

417417
If you don't know what to do here, say Y.
418418

419+
config TOOLCHAIN_HAS_ZBB
420+
bool
421+
default y
422+
depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbb)
423+
depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbb)
424+
depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
425+
depends on AS_IS_GNU
426+
427+
config RISCV_ISA_ZBB
428+
bool "Zbb extension support for bit manipulation instructions"
429+
depends on TOOLCHAIN_HAS_ZBB
430+
depends on !XIP_KERNEL && MMU
431+
select RISCV_ALTERNATIVE
432+
default y
433+
help
434+
Adds support to dynamically detect the presence of the ZBB
435+
extension (basic bit manipulation) and enable its usage.
436+
437+
The Zbb extension provides instructions to accelerate a number
438+
of bit-specific operations (count bit population, sign extending,
439+
bitrotation, etc).
440+
441+
If you don't know what to do here, say Y.
442+
419443
config TOOLCHAIN_HAS_ZICBOM
420444
bool
421445
default y

arch/riscv/include/asm/errata_list.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424

2525
#define CPUFEATURE_SVPBMT 0
2626
#define CPUFEATURE_ZICBOM 1
27-
#define CPUFEATURE_NUMBER 2
27+
#define CPUFEATURE_ZBB 2
28+
#define CPUFEATURE_NUMBER 3
2829

2930
#ifdef __ASSEMBLY__
3031

arch/riscv/include/asm/hwcap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ enum riscv_isa_ext_id {
5858
RISCV_ISA_EXT_SSTC,
5959
RISCV_ISA_EXT_SVINVAL,
6060
RISCV_ISA_EXT_SVPBMT,
61+
RISCV_ISA_EXT_ZBB,
6162
RISCV_ISA_EXT_ZICBOM,
6263
RISCV_ISA_EXT_ZIHINTPAUSE,
6364
RISCV_ISA_EXT_ID_MAX

arch/riscv/include/asm/string.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,16 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
1818
#define __HAVE_ARCH_MEMMOVE
1919
extern asmlinkage void *memmove(void *, const void *, size_t);
2020
extern asmlinkage void *__memmove(void *, const void *, size_t);
21+
22+
#define __HAVE_ARCH_STRCMP
23+
extern asmlinkage int strcmp(const char *cs, const char *ct);
24+
25+
#define __HAVE_ARCH_STRLEN
26+
extern asmlinkage __kernel_size_t strlen(const char *);
27+
28+
#define __HAVE_ARCH_STRNCMP
29+
extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count);
30+
2131
/* For those files which don't want to check by kasan. */
2232
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
2333
#define memcpy(dst, src, len) __memcpy(dst, src, len)

arch/riscv/kernel/cpu.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ arch_initcall(riscv_cpuinfo_init);
185185
* New entries to this struct should follow the ordering rules described above.
186186
*/
187187
static struct riscv_isa_ext_data isa_ext_arr[] = {
188+
__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
188189
__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
189190
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
190191
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),

arch/riscv/kernel/cpufeature.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ void __init riscv_fill_hwcap(void)
227227
SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
228228
SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
229229
SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
230+
SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
230231
SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
231232
SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
232233
}
@@ -302,6 +303,20 @@ static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage)
302303
return true;
303304
}
304305

306+
static bool __init_or_module cpufeature_probe_zbb(unsigned int stage)
307+
{
308+
if (!IS_ENABLED(CONFIG_RISCV_ISA_ZBB))
309+
return false;
310+
311+
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
312+
return false;
313+
314+
if (!riscv_isa_extension_available(NULL, ZBB))
315+
return false;
316+
317+
return true;
318+
}
319+
305320
/*
306321
* Probe presence of individual extensions.
307322
*
@@ -320,6 +335,9 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
320335
if (cpufeature_probe_zicbom(stage))
321336
cpu_req_feature |= BIT(CPUFEATURE_ZICBOM);
322337

338+
if (cpufeature_probe_zbb(stage))
339+
cpu_req_feature |= BIT(CPUFEATURE_ZBB);
340+
323341
return cpu_req_feature;
324342
}
325343

arch/riscv/kernel/riscv_ksyms.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
EXPORT_SYMBOL(memset);
1313
EXPORT_SYMBOL(memcpy);
1414
EXPORT_SYMBOL(memmove);
15+
EXPORT_SYMBOL(strcmp);
16+
EXPORT_SYMBOL(strlen);
17+
EXPORT_SYMBOL(strncmp);
1518
EXPORT_SYMBOL(__memset);
1619
EXPORT_SYMBOL(__memcpy);
1720
EXPORT_SYMBOL(__memmove);

arch/riscv/lib/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ lib-y += delay.o
33
lib-y += memcpy.o
44
lib-y += memset.o
55
lib-y += memmove.o
6+
lib-y += strcmp.o
7+
lib-y += strlen.o
8+
lib-y += strncmp.o
69
lib-$(CONFIG_MMU) += uaccess.o
710
lib-$(CONFIG_64BIT) += tishift.o
811

arch/riscv/lib/strcmp.S

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
3+
#include <linux/linkage.h>
4+
#include <asm/asm.h>
5+
#include <asm-generic/export.h>
6+
#include <asm/alternative-macros.h>
7+
#include <asm/errata_list.h>
8+
9+
/* int strcmp(const char *cs, const char *ct) */
10+
SYM_FUNC_START(strcmp)
11+
12+
ALTERNATIVE("nop", "j strcmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
13+
14+
/*
15+
* Returns
16+
* a0 - comparison result, value like strcmp
17+
*
18+
* Parameters
19+
* a0 - string1
20+
* a1 - string2
21+
*
22+
* Clobbers
23+
* t0, t1
24+
*/
25+
1:
26+
lbu t0, 0(a0)
27+
lbu t1, 0(a1)
28+
addi a0, a0, 1
29+
addi a1, a1, 1
30+
bne t0, t1, 2f
31+
bnez t0, 1b
32+
li a0, 0
33+
ret
34+
2:
35+
/*
36+
* strcmp only needs to return (< 0, 0, > 0) values
37+
* not necessarily -1, 0, +1
38+
*/
39+
sub a0, t0, t1
40+
ret
41+
42+
/*
43+
* Variant of strcmp using the ZBB extension if available
44+
*/
45+
#ifdef CONFIG_RISCV_ISA_ZBB
46+
strcmp_zbb:
47+
48+
.option push
49+
.option arch,+zbb
50+
51+
/*
52+
* Returns
53+
* a0 - comparison result, value like strcmp
54+
*
55+
* Parameters
56+
* a0 - string1
57+
* a1 - string2
58+
*
59+
* Clobbers
60+
* t0, t1, t2, t3, t4, t5
61+
*/
62+
63+
or t2, a0, a1
64+
li t4, -1
65+
and t2, t2, SZREG-1
66+
bnez t2, 3f
67+
68+
/* Main loop for aligned string. */
69+
.p2align 3
70+
1:
71+
REG_L t0, 0(a0)
72+
REG_L t1, 0(a1)
73+
orc.b t3, t0
74+
bne t3, t4, 2f
75+
addi a0, a0, SZREG
76+
addi a1, a1, SZREG
77+
beq t0, t1, 1b
78+
79+
/*
80+
* Words don't match, and no null byte in the first
81+
* word. Get bytes in big-endian order and compare.
82+
*/
83+
#ifndef CONFIG_CPU_BIG_ENDIAN
84+
rev8 t0, t0
85+
rev8 t1, t1
86+
#endif
87+
88+
/* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
89+
sltu a0, t0, t1
90+
neg a0, a0
91+
ori a0, a0, 1
92+
ret
93+
94+
2:
95+
/*
96+
* Found a null byte.
97+
* If words don't match, fall back to simple loop.
98+
*/
99+
bne t0, t1, 3f
100+
101+
/* Otherwise, strings are equal. */
102+
li a0, 0
103+
ret
104+
105+
/* Simple loop for misaligned strings. */
106+
.p2align 3
107+
3:
108+
lbu t0, 0(a0)
109+
lbu t1, 0(a1)
110+
addi a0, a0, 1
111+
addi a1, a1, 1
112+
bne t0, t1, 4f
113+
bnez t0, 3b
114+
115+
4:
116+
sub a0, t0, t1
117+
ret
118+
119+
.option pop
120+
#endif
121+
SYM_FUNC_END(strcmp)

arch/riscv/lib/strlen.S

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
3+
#include <linux/linkage.h>
4+
#include <asm/asm.h>
5+
#include <asm-generic/export.h>
6+
#include <asm/alternative-macros.h>
7+
#include <asm/errata_list.h>
8+
9+
/* int strlen(const char *s) */
10+
SYM_FUNC_START(strlen)
11+
12+
ALTERNATIVE("nop", "j strlen_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
13+
14+
/*
15+
* Returns
16+
* a0 - string length
17+
*
18+
* Parameters
19+
* a0 - String to measure
20+
*
21+
* Clobbers:
22+
* t0, t1
23+
*/
24+
mv t1, a0
25+
1:
26+
lbu t0, 0(t1)
27+
beqz t0, 2f
28+
addi t1, t1, 1
29+
j 1b
30+
2:
31+
sub a0, t1, a0
32+
ret
33+
34+
/*
35+
* Variant of strlen using the ZBB extension if available
36+
*/
37+
#ifdef CONFIG_RISCV_ISA_ZBB
38+
strlen_zbb:
39+
40+
#ifdef CONFIG_CPU_BIG_ENDIAN
41+
# define CZ clz
42+
# define SHIFT sll
43+
#else
44+
# define CZ ctz
45+
# define SHIFT srl
46+
#endif
47+
48+
.option push
49+
.option arch,+zbb
50+
51+
/*
52+
* Returns
53+
* a0 - string length
54+
*
55+
* Parameters
56+
* a0 - String to measure
57+
*
58+
* Clobbers
59+
* t0, t1, t2, t3
60+
*/
61+
62+
/* Number of irrelevant bytes in the first word. */
63+
andi t2, a0, SZREG-1
64+
65+
/* Align pointer. */
66+
andi t0, a0, -SZREG
67+
68+
li t3, SZREG
69+
sub t3, t3, t2
70+
slli t2, t2, 3
71+
72+
/* Get the first word. */
73+
REG_L t1, 0(t0)
74+
75+
/*
76+
* Shift away the partial data we loaded to remove the irrelevant bytes
77+
* preceding the string with the effect of adding NUL bytes at the
78+
* end of the string's first word.
79+
*/
80+
SHIFT t1, t1, t2
81+
82+
/* Convert non-NUL into 0xff and NUL into 0x00. */
83+
orc.b t1, t1
84+
85+
/* Convert non-NUL into 0x00 and NUL into 0xff. */
86+
not t1, t1
87+
88+
/*
89+
* Search for the first set bit (corresponding to a NUL byte in the
90+
* original chunk).
91+
*/
92+
CZ t1, t1
93+
94+
/*
95+
* The first chunk is special: compare against the number
96+
* of valid bytes in this chunk.
97+
*/
98+
srli a0, t1, 3
99+
bgtu t3, a0, 3f
100+
101+
/* Prepare for the word comparison loop. */
102+
addi t2, t0, SZREG
103+
li t3, -1
104+
105+
/*
106+
* Our critical loop is 4 instructions and processes data in
107+
* 4 byte or 8 byte chunks.
108+
*/
109+
.p2align 3
110+
1:
111+
REG_L t1, SZREG(t0)
112+
addi t0, t0, SZREG
113+
orc.b t1, t1
114+
beq t1, t3, 1b
115+
2:
116+
not t1, t1
117+
CZ t1, t1
118+
119+
/* Get number of processed words. */
120+
sub t2, t0, t2
121+
122+
/* Add number of characters in the first word. */
123+
add a0, a0, t2
124+
srli t1, t1, 3
125+
126+
/* Add number of characters in the last word. */
127+
add a0, a0, t1
128+
3:
129+
ret
130+
131+
.option pop
132+
#endif
133+
SYM_FUNC_END(strlen)

0 commit comments

Comments
 (0)