Skip to content

Commit 344323e

Browse files
rmurphy-armwilldeacon
authored andcommitted
arm64: Rewrite __arch_clear_user()
Now that we're always using STTR variants rather than abstracting two different addressing modes, the user_ldst macro here is frankly more obfuscating than helpful. Rewrite __arch_clear_user() with regular USER() annotations so that it's clearer what's going on, and take the opportunity to minimise the branchiness in the most common paths, while also allowing the exception fixup to return an accurate result. Apparently some folks examine large reads from /dev/zero closely enough to notice the loop being hot, so align it per the other critical loops (presumably around a typical instruction fetch granularity). Reviewed-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Robin Murphy <robin.murphy@arm.com> Link: https://lore.kernel.org/r/1cbd78b12c076a8ad4656a345811cfb9425df0b3.1622128527.git.robin.murphy@arm.com Signed-off-by: Will Deacon <will@kernel.org>
1 parent 9e51caf commit 344323e

1 file changed

Lines changed: 27 additions & 20 deletions

File tree

arch/arm64/lib/clear_user.S

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
/* SPDX-License-Identifier: GPL-2.0-only */
22
/*
3-
* Based on arch/arm/lib/clear_user.S
4-
*
5-
* Copyright (C) 2012 ARM Ltd.
3+
* Copyright (C) 2021 Arm Ltd.
64
*/
7-
#include <linux/linkage.h>
85

9-
#include <asm/asm-uaccess.h>
6+
#include <linux/linkage.h>
107
#include <asm/assembler.h>
118

129
.text
@@ -19,32 +16,42 @@
1916
*
2017
* Alignment fixed up by hardware.
2118
*/
19+
20+
.p2align 4
21+
// Alignment is for the loop, but since the prologue (including BTI)
22+
// is also 16 bytes we can keep any padding outside the function
2223
SYM_FUNC_START(__arch_clear_user)
23-
mov x2, x1 // save the size for fixup return
24+
add x2, x0, x1
2425
subs x1, x1, #8
2526
b.mi 2f
2627
1:
27-
user_ldst 9f, sttr, xzr, x0, 8
28+
USER(9f, sttr xzr, [x0])
29+
add x0, x0, #8
2830
subs x1, x1, #8
29-
b.pl 1b
30-
2: adds x1, x1, #4
31-
b.mi 3f
32-
user_ldst 9f, sttr, wzr, x0, 4
33-
sub x1, x1, #4
34-
3: adds x1, x1, #2
35-
b.mi 4f
36-
user_ldst 9f, sttrh, wzr, x0, 2
37-
sub x1, x1, #2
38-
4: adds x1, x1, #1
39-
b.mi 5f
40-
user_ldst 9f, sttrb, wzr, x0, 0
31+
b.hi 1b
32+
USER(9f, sttr xzr, [x2, #-8])
33+
mov x0, #0
34+
ret
35+
36+
2: tbz x1, #2, 3f
37+
USER(9f, sttr wzr, [x0])
38+
USER(8f, sttr wzr, [x2, #-4])
39+
mov x0, #0
40+
ret
41+
42+
3: tbz x1, #1, 4f
43+
USER(9f, sttrh wzr, [x0])
44+
4: tbz x1, #0, 5f
45+
USER(7f, sttrb wzr, [x2, #-1])
4146
5: mov x0, #0
4247
ret
4348
SYM_FUNC_END(__arch_clear_user)
4449
EXPORT_SYMBOL(__arch_clear_user)
4550

4651
.section .fixup,"ax"
4752
.align 2
48-
9: mov x0, x2 // return the original size
53+
7: sub x0, x2, #5 // Adjust for faulting on the final byte...
54+
8: add x0, x0, #4 // ...or the second word of the 4-7 byte case
55+
9: sub x0, x2, x0
4956
ret
5057
.previous

0 commit comments

Comments
 (0)