Skip to content

Commit ab0f774

Browse files
Andrew Jonespalmer-dabbelt
authored andcommitted
RISC-V: Use Zicboz in clear_page when available
Using memset() to zero a 4K page takes 563 total instructions, where 20 are branches. clear_page(), with Zicboz and a 64 byte block size, takes 169 total instructions, where 4 are branches and 33 are nops. Even though the block size is a variable, thanks to alternatives, we can still implement a Duff device without having to do any preliminary calculations. This is achieved by using the alternatives' cpufeature value (the upper 16 bits of patch_id). The value used is the maximum zicboz block size order accepted at the patch site. This enables us to stop patching / unrolling when 4K bytes have been zeroed (we would loop and continue after 4K if the page size would be larger) For 4K pages, unrolling 16 times allows block sizes of 64 and 128 to only loop a few times and larger block sizes to not loop at all. Since cbo.zero doesn't take an offset, we also need an 'add' after each instruction, making the loop body 112 to 160 bytes. Hopefully this is small enough to not cause icache misses. Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Acked-by: Conor Dooley <conor.dooley@microchip.com> Link: https://lore.kernel.org/r/20230224162631.405473-7-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1 parent d25f256 commit ab0f774

6 files changed

Lines changed: 108 additions & 1 deletion

File tree

arch/riscv/Kconfig

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,19 @@ config RISCV_ISA_ZICBOM
457457

458458
If you don't know what to do here, say Y.
459459

460+
config RISCV_ISA_ZICBOZ
461+
bool "Zicboz extension support for faster zeroing of memory"
462+
depends on !XIP_KERNEL && MMU
463+
select RISCV_ALTERNATIVE
464+
default y
465+
help
466+
Enable the use of the ZICBOZ extension (cbo.zero instruction)
467+
when available.
468+
469+
The Zicboz extension is used for faster zeroing of memory.
470+
471+
If you don't know what to do here, say Y.
472+
460473
config TOOLCHAIN_HAS_ZIHINTPAUSE
461474
bool
462475
default y

arch/riscv/include/asm/insn-def.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,4 +192,8 @@
192192
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
193193
RS1(base), SIMM12(2))
194194

195+
#define CBO_zero(base) \
196+
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
197+
RS1(base), SIMM12(4))
198+
195199
#endif /* __ASM_INSN_DEF_H */

arch/riscv/include/asm/page.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,14 @@
4949

5050
#ifndef __ASSEMBLY__
5151

52+
#ifdef CONFIG_RISCV_ISA_ZICBOZ
53+
void clear_page(void *page);
54+
#else
5255
#define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE)
56+
#endif
5357
#define copy_page(to, from) memcpy((to), (from), PAGE_SIZE)
5458

55-
#define clear_user_page(pgaddr, vaddr, page) memset((pgaddr), 0, PAGE_SIZE)
59+
#define clear_user_page(pgaddr, vaddr, page) clear_page(pgaddr)
5660
#define copy_user_page(vto, vfrom, vaddr, topg) \
5761
memcpy((vto), (vfrom), PAGE_SIZE)
5862

arch/riscv/kernel/cpufeature.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,17 @@ static bool riscv_cpufeature_patch_check(u16 id, u16 value)
293293
if (!value)
294294
return true;
295295

296+
switch (id) {
297+
case RISCV_ISA_EXT_ZICBOZ:
298+
/*
299+
* Zicboz alternative applications provide the maximum
300+
* supported block size order, or zero when it doesn't
301+
* matter. If the current block size exceeds the maximum,
302+
* then the alternative cannot be applied.
303+
*/
304+
return riscv_cboz_block_size <= (1U << value);
305+
}
306+
296307
return false;
297308
}
298309

arch/riscv/lib/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@ lib-y += strlen.o
88
lib-y += strncmp.o
99
lib-$(CONFIG_MMU) += uaccess.o
1010
lib-$(CONFIG_64BIT) += tishift.o
11+
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
1112

1213
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o

arch/riscv/lib/clear_page.S

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/*
3+
* Copyright (c) 2023 Ventana Micro Systems Inc.
4+
*/
5+
6+
#include <linux/linkage.h>
7+
#include <asm/asm.h>
8+
#include <asm/alternative-macros.h>
9+
#include <asm-generic/export.h>
10+
#include <asm/hwcap.h>
11+
#include <asm/insn-def.h>
12+
#include <asm/page.h>
13+
14+
#define CBOZ_ALT(order, old, new) \
15+
ALTERNATIVE(old, new, 0, \
16+
((order) << 16) | RISCV_ISA_EXT_ZICBOZ, \
17+
CONFIG_RISCV_ISA_ZICBOZ)
18+
19+
/* void clear_page(void *page) */
20+
SYM_FUNC_START(clear_page)
21+
li a2, PAGE_SIZE
22+
23+
/*
24+
* If Zicboz isn't present, or somehow has a block
25+
* size larger than 4K, then fallback to memset.
26+
*/
27+
CBOZ_ALT(12, "j .Lno_zicboz", "nop")
28+
29+
lw a1, riscv_cboz_block_size
30+
add a2, a0, a2
31+
.Lzero_loop:
32+
CBO_zero(a0)
33+
add a0, a0, a1
34+
CBOZ_ALT(11, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
35+
CBO_zero(a0)
36+
add a0, a0, a1
37+
CBOZ_ALT(10, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
38+
CBO_zero(a0)
39+
add a0, a0, a1
40+
CBO_zero(a0)
41+
add a0, a0, a1
42+
CBOZ_ALT(9, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
43+
CBO_zero(a0)
44+
add a0, a0, a1
45+
CBO_zero(a0)
46+
add a0, a0, a1
47+
CBO_zero(a0)
48+
add a0, a0, a1
49+
CBO_zero(a0)
50+
add a0, a0, a1
51+
CBOZ_ALT(8, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
52+
CBO_zero(a0)
53+
add a0, a0, a1
54+
CBO_zero(a0)
55+
add a0, a0, a1
56+
CBO_zero(a0)
57+
add a0, a0, a1
58+
CBO_zero(a0)
59+
add a0, a0, a1
60+
CBO_zero(a0)
61+
add a0, a0, a1
62+
CBO_zero(a0)
63+
add a0, a0, a1
64+
CBO_zero(a0)
65+
add a0, a0, a1
66+
CBO_zero(a0)
67+
add a0, a0, a1
68+
bltu a0, a2, .Lzero_loop
69+
ret
70+
.Lno_zicboz:
71+
li a1, 0
72+
tail __memset
73+
SYM_FUNC_END(clear_page)
74+
EXPORT_SYMBOL(clear_page)

0 commit comments

Comments
 (0)