Skip to content

Commit b6fcdb1

Browse files
Heiko Stuebnerpalmer-dabbelt
authored andcommitted
RISC-V: add zbb support to string functions
Add handling for ZBB extension and add support for using it as a variant for optimized string functions. Support for the Zbb-str-variants is limited to the GNU-assembler for now, as LLVM has not yet acquired the functionality to selectively change the arch option in assembler code. This is still under review at https://reviews.llvm.org/D123515 Co-developed-by: Christoph Muellner <christoph.muellner@vrull.eu> Signed-off-by: Christoph Muellner <christoph.muellner@vrull.eu> Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> Reviewed-by: Conor Dooley <conor.dooley@microchip.com> Link: https://lore.kernel.org/r/20230113212301.3534711-3-heiko@sntech.de Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1 parent 56e0790 commit b6fcdb1

8 files changed

Lines changed: 334 additions & 1 deletion

File tree

arch/riscv/Kconfig

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,30 @@ config RISCV_ISA_SVPBMT
416416

417417
If you don't know what to do here, say Y.
418418

419+
config TOOLCHAIN_HAS_ZBB
420+
bool
421+
default y
422+
depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zbb)
423+
depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zbb)
424+
depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900
425+
depends on AS_IS_GNU
426+
427+
config RISCV_ISA_ZBB
428+
bool "Zbb extension support for bit manipulation instructions"
429+
depends on TOOLCHAIN_HAS_ZBB
430+
depends on !XIP_KERNEL && MMU
431+
select RISCV_ALTERNATIVE
432+
default y
433+
help
434+
Adds support to dynamically detect the presence of the ZBB
435+
extension (basic bit manipulation) and enable its usage.
436+
437+
The Zbb extension provides instructions to accelerate a number
438+
of bit-specific operations (count bit population, sign extending,
439+
bitrotation, etc).
440+
441+
If you don't know what to do here, say Y.
442+
419443
config TOOLCHAIN_HAS_ZICBOM
420444
bool
421445
default y

arch/riscv/include/asm/errata_list.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424

2525
#define CPUFEATURE_SVPBMT 0
2626
#define CPUFEATURE_ZICBOM 1
27-
#define CPUFEATURE_NUMBER 2
27+
#define CPUFEATURE_ZBB 2
28+
#define CPUFEATURE_NUMBER 3
2829

2930
#ifdef __ASSEMBLY__
3031

arch/riscv/include/asm/hwcap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ enum riscv_isa_ext_id {
5858
RISCV_ISA_EXT_SSTC,
5959
RISCV_ISA_EXT_SVINVAL,
6060
RISCV_ISA_EXT_SVPBMT,
61+
RISCV_ISA_EXT_ZBB,
6162
RISCV_ISA_EXT_ZICBOM,
6263
RISCV_ISA_EXT_ZIHINTPAUSE,
6364
RISCV_ISA_EXT_ID_MAX

arch/riscv/kernel/cpu.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ arch_initcall(riscv_cpuinfo_init);
185185
* New entries to this struct should follow the ordering rules described above.
186186
*/
187187
static struct riscv_isa_ext_data isa_ext_arr[] = {
188+
__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
188189
__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
189190
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
190191
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),

arch/riscv/kernel/cpufeature.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ void __init riscv_fill_hwcap(void)
227227
SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
228228
SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
229229
SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
230+
SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
230231
SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
231232
SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
232233
}
@@ -302,6 +303,20 @@ static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage)
302303
return true;
303304
}
304305

306+
static bool __init_or_module cpufeature_probe_zbb(unsigned int stage)
307+
{
308+
if (!IS_ENABLED(CONFIG_RISCV_ISA_ZBB))
309+
return false;
310+
311+
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
312+
return false;
313+
314+
if (!riscv_isa_extension_available(NULL, ZBB))
315+
return false;
316+
317+
return true;
318+
}
319+
305320
/*
306321
* Probe presence of individual extensions.
307322
*
@@ -320,6 +335,9 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
320335
if (cpufeature_probe_zicbom(stage))
321336
cpu_req_feature |= BIT(CPUFEATURE_ZICBOM);
322337

338+
if (cpufeature_probe_zbb(stage))
339+
cpu_req_feature |= BIT(CPUFEATURE_ZBB);
340+
323341
return cpu_req_feature;
324342
}
325343

arch/riscv/lib/strcmp.S

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,14 @@
33
#include <linux/linkage.h>
44
#include <asm/asm.h>
55
#include <asm-generic/export.h>
6+
#include <asm/alternative-macros.h>
7+
#include <asm/errata_list.h>
68

79
/* int strcmp(const char *cs, const char *ct) */
810
SYM_FUNC_START(strcmp)
11+
12+
ALTERNATIVE("nop", "j strcmp_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
13+
914
/*
1015
* Returns
1116
* a0 - comparison result, value like strcmp
@@ -33,4 +38,84 @@ SYM_FUNC_START(strcmp)
3338
*/
3439
sub a0, t0, t1
3540
ret
41+
42+
/*
43+
* Variant of strcmp using the ZBB extension if available
44+
*/
45+
#ifdef CONFIG_RISCV_ISA_ZBB
46+
strcmp_zbb:
47+
48+
.option push
49+
.option arch,+zbb
50+
51+
/*
52+
* Returns
53+
* a0 - comparison result, value like strcmp
54+
*
55+
* Parameters
56+
* a0 - string1
57+
* a1 - string2
58+
*
59+
* Clobbers
60+
* t0, t1, t2, t3, t4, t5
61+
*/
62+
63+
or t2, a0, a1
64+
li t4, -1
65+
and t2, t2, SZREG-1
66+
bnez t2, 3f
67+
68+
/* Main loop for aligned string. */
69+
.p2align 3
70+
1:
71+
REG_L t0, 0(a0)
72+
REG_L t1, 0(a1)
73+
orc.b t3, t0
74+
bne t3, t4, 2f
75+
addi a0, a0, SZREG
76+
addi a1, a1, SZREG
77+
beq t0, t1, 1b
78+
79+
/*
80+
* Words don't match, and no null byte in the first
81+
* word. Get bytes in big-endian order and compare.
82+
*/
83+
#ifndef CONFIG_CPU_BIG_ENDIAN
84+
rev8 t0, t0
85+
rev8 t1, t1
86+
#endif
87+
88+
/* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
89+
sltu a0, t0, t1
90+
neg a0, a0
91+
ori a0, a0, 1
92+
ret
93+
94+
2:
95+
/*
96+
* Found a null byte.
97+
* If words don't match, fall back to simple loop.
98+
*/
99+
bne t0, t1, 3f
100+
101+
/* Otherwise, strings are equal. */
102+
li a0, 0
103+
ret
104+
105+
/* Simple loop for misaligned strings. */
106+
.p2align 3
107+
3:
108+
lbu t0, 0(a0)
109+
lbu t1, 0(a1)
110+
addi a0, a0, 1
111+
addi a1, a1, 1
112+
bne t0, t1, 4f
113+
bnez t0, 3b
114+
115+
4:
116+
sub a0, t0, t1
117+
ret
118+
119+
.option pop
120+
#endif
36121
SYM_FUNC_END(strcmp)

arch/riscv/lib/strlen.S

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,14 @@
33
#include <linux/linkage.h>
44
#include <asm/asm.h>
55
#include <asm-generic/export.h>
6+
#include <asm/alternative-macros.h>
7+
#include <asm/errata_list.h>
68

79
/* int strlen(const char *s) */
810
SYM_FUNC_START(strlen)
11+
12+
ALTERNATIVE("nop", "j strlen_zbb", 0, CPUFEATURE_ZBB, CONFIG_RISCV_ISA_ZBB)
13+
914
/*
1015
* Returns
1116
* a0 - string length
@@ -25,4 +30,104 @@ SYM_FUNC_START(strlen)
2530
2:
2631
sub a0, t1, a0
2732
ret
33+
34+
/*
35+
* Variant of strlen using the ZBB extension if available
36+
*/
37+
#ifdef CONFIG_RISCV_ISA_ZBB
38+
strlen_zbb:
39+
40+
#ifdef CONFIG_CPU_BIG_ENDIAN
41+
# define CZ clz
42+
# define SHIFT sll
43+
#else
44+
# define CZ ctz
45+
# define SHIFT srl
46+
#endif
47+
48+
.option push
49+
.option arch,+zbb
50+
51+
/*
52+
* Returns
53+
* a0 - string length
54+
*
55+
* Parameters
56+
* a0 - String to measure
57+
*
58+
* Clobbers
59+
* t0, t1, t2, t3
60+
*/
61+
62+
/* Number of irrelevant bytes in the first word. */
63+
andi t2, a0, SZREG-1
64+
65+
/* Align pointer. */
66+
andi t0, a0, -SZREG
67+
68+
li t3, SZREG
69+
sub t3, t3, t2
70+
slli t2, t2, 3
71+
72+
/* Get the first word. */
73+
REG_L t1, 0(t0)
74+
75+
/*
76+
* Shift away the partial data we loaded to remove the irrelevant bytes
77+
* preceding the string with the effect of adding NUL bytes at the
78+
* end of the string's first word.
79+
*/
80+
SHIFT t1, t1, t2
81+
82+
/* Convert non-NUL into 0xff and NUL into 0x00. */
83+
orc.b t1, t1
84+
85+
/* Convert non-NUL into 0x00 and NUL into 0xff. */
86+
not t1, t1
87+
88+
/*
89+
* Search for the first set bit (corresponding to a NUL byte in the
90+
* original chunk).
91+
*/
92+
CZ t1, t1
93+
94+
/*
95+
* The first chunk is special: compare against the number
96+
* of valid bytes in this chunk.
97+
*/
98+
srli a0, t1, 3
99+
bgtu t3, a0, 3f
100+
101+
/* Prepare for the word comparison loop. */
102+
addi t2, t0, SZREG
103+
li t3, -1
104+
105+
/*
106+
* Our critical loop is 4 instructions and processes data in
107+
* 4 byte or 8 byte chunks.
108+
*/
109+
.p2align 3
110+
1:
111+
REG_L t1, SZREG(t0)
112+
addi t0, t0, SZREG
113+
orc.b t1, t1
114+
beq t1, t3, 1b
115+
2:
116+
not t1, t1
117+
CZ t1, t1
118+
119+
/* Get number of processed words. */
120+
sub t2, t0, t2
121+
122+
/* Add number of characters in the first word. */
123+
add a0, a0, t2
124+
srli t1, t1, 3
125+
126+
/* Add number of characters in the last word. */
127+
add a0, a0, t1
128+
3:
129+
ret
130+
131+
.option pop
132+
#endif
28133
SYM_FUNC_END(strlen)

0 commit comments

Comments
 (0)