Skip to content

Commit 1547db7

Browse files
Xiu Jianfengmpe
authored andcommitted
powerpc: Move system_call_exception() to syscall.c
This is a lead-up patch to enable syscall stack randomization, which uses alloca() and makes the compiler add unconditional stack canaries on syscall entry. In order to avoid triggering needless checks and slowing down the entry path, the feature needs to disable stack protector at the compilation unit level as there is no general way to control stack protector coverage with a function attribute. So move system_call_exception() to syscall.c to avoid affecting other functions in interrupt.c. Suggested-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220701082435.126596-2-xiujianfeng@huawei.com
1 parent 978030f commit 1547db7

3 files changed

Lines changed: 174 additions & 162 deletions

File tree

arch/powerpc/kernel/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ obj-y := cputable.o syscalls.o \
6262
udbg.o misc.o io.o misc_$(BITS).o \
6363
of_platform.o prom_parse.o firmware.o \
6464
hw_breakpoint_constraints.o interrupt.o \
65-
kdebugfs.o stacktrace.o
65+
kdebugfs.o stacktrace.o syscall.o
6666
obj-y += ptrace/
6767
obj-$(CONFIG_PPC64) += setup_64.o irq_64.o\
6868
paca.o nvram_64.o note.o

arch/powerpc/kernel/interrupt.c

Lines changed: 0 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@
2424
unsigned long global_dbcr0[NR_CPUS];
2525
#endif
2626

27-
typedef long (*syscall_fn)(long, long, long, long, long, long);
28-
2927
#ifdef CONFIG_PPC_BOOK3S_64
3028
DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
3129
static inline bool exit_must_hard_disable(void)
@@ -73,165 +71,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
7371
return true;
7472
}
7573

76-
/* Has to run notrace because it is entered not completely "reconciled" */
77-
notrace long system_call_exception(long r3, long r4, long r5,
78-
long r6, long r7, long r8,
79-
unsigned long r0, struct pt_regs *regs)
80-
{
81-
syscall_fn f;
82-
83-
kuap_lock();
84-
85-
regs->orig_gpr3 = r3;
86-
87-
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
88-
BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
89-
90-
trace_hardirqs_off(); /* finish reconciling */
91-
92-
CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
93-
user_exit_irqoff();
94-
95-
BUG_ON(regs_is_unrecoverable(regs));
96-
BUG_ON(!(regs->msr & MSR_PR));
97-
BUG_ON(arch_irq_disabled_regs(regs));
98-
99-
#ifdef CONFIG_PPC_PKEY
100-
if (mmu_has_feature(MMU_FTR_PKEY)) {
101-
unsigned long amr, iamr;
102-
bool flush_needed = false;
103-
/*
104-
* When entering from userspace we mostly have the AMR/IAMR
105-
* different from kernel default values. Hence don't compare.
106-
*/
107-
amr = mfspr(SPRN_AMR);
108-
iamr = mfspr(SPRN_IAMR);
109-
regs->amr = amr;
110-
regs->iamr = iamr;
111-
if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
112-
mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
113-
flush_needed = true;
114-
}
115-
if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
116-
mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
117-
flush_needed = true;
118-
}
119-
if (flush_needed)
120-
isync();
121-
} else
122-
#endif
123-
kuap_assert_locked();
124-
125-
booke_restore_dbcr0();
126-
127-
account_cpu_user_entry();
128-
129-
account_stolen_time();
130-
131-
/*
132-
* This is not required for the syscall exit path, but makes the
133-
* stack frame look nicer. If this was initialised in the first stack
134-
* frame, or if the unwinder was taught the first stack frame always
135-
* returns to user with IRQS_ENABLED, this store could be avoided!
136-
*/
137-
irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
138-
139-
/*
140-
* If system call is called with TM active, set _TIF_RESTOREALL to
141-
* prevent RFSCV being used to return to userspace, because POWER9
142-
* TM implementation has problems with this instruction returning to
143-
* transactional state. Final register values are not relevant because
144-
* the transaction will be aborted upon return anyway. Or in the case
145-
* of unsupported_scv SIGILL fault, the return state does not much
146-
* matter because it's an edge case.
147-
*/
148-
if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
149-
unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
150-
set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
151-
152-
/*
153-
* If the system call was made with a transaction active, doom it and
154-
* return without performing the system call. Unless it was an
155-
* unsupported scv vector, in which case it's treated like an illegal
156-
* instruction.
157-
*/
158-
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
159-
if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
160-
!trap_is_unsupported_scv(regs)) {
161-
/* Enable TM in the kernel, and disable EE (for scv) */
162-
hard_irq_disable();
163-
mtmsr(mfmsr() | MSR_TM);
164-
165-
/* tabort, this dooms the transaction, nothing else */
166-
asm volatile(".long 0x7c00071d | ((%0) << 16)"
167-
:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
168-
169-
/*
170-
* Userspace will never see the return value. Execution will
171-
* resume after the tbegin. of the aborted transaction with the
172-
* checkpointed register state. A context switch could occur
173-
* or signal delivered to the process before resuming the
174-
* doomed transaction context, but that should all be handled
175-
* as expected.
176-
*/
177-
return -ENOSYS;
178-
}
179-
#endif // CONFIG_PPC_TRANSACTIONAL_MEM
180-
181-
local_irq_enable();
182-
183-
if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
184-
if (unlikely(trap_is_unsupported_scv(regs))) {
185-
/* Unsupported scv vector */
186-
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
187-
return regs->gpr[3];
188-
}
189-
/*
190-
* We use the return value of do_syscall_trace_enter() as the
191-
* syscall number. If the syscall was rejected for any reason
192-
* do_syscall_trace_enter() returns an invalid syscall number
193-
* and the test against NR_syscalls will fail and the return
194-
* value to be used is in regs->gpr[3].
195-
*/
196-
r0 = do_syscall_trace_enter(regs);
197-
if (unlikely(r0 >= NR_syscalls))
198-
return regs->gpr[3];
199-
r3 = regs->gpr[3];
200-
r4 = regs->gpr[4];
201-
r5 = regs->gpr[5];
202-
r6 = regs->gpr[6];
203-
r7 = regs->gpr[7];
204-
r8 = regs->gpr[8];
205-
206-
} else if (unlikely(r0 >= NR_syscalls)) {
207-
if (unlikely(trap_is_unsupported_scv(regs))) {
208-
/* Unsupported scv vector */
209-
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
210-
return regs->gpr[3];
211-
}
212-
return -ENOSYS;
213-
}
214-
215-
/* May be faster to do array_index_nospec? */
216-
barrier_nospec();
217-
218-
if (unlikely(is_compat_task())) {
219-
f = (void *)compat_sys_call_table[r0];
220-
221-
r3 &= 0x00000000ffffffffULL;
222-
r4 &= 0x00000000ffffffffULL;
223-
r5 &= 0x00000000ffffffffULL;
224-
r6 &= 0x00000000ffffffffULL;
225-
r7 &= 0x00000000ffffffffULL;
226-
r8 &= 0x00000000ffffffffULL;
227-
228-
} else {
229-
f = (void *)sys_call_table[r0];
230-
}
231-
232-
return f(r3, r4, r5, r6, r7, r8);
233-
}
234-
23574
static notrace void booke_load_dbcr0(void)
23675
{
23776
#ifdef CONFIG_PPC_ADV_DEBUG_REGS

arch/powerpc/kernel/syscall.c

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
3+
#include <linux/compat.h>
4+
#include <linux/context_tracking.h>
5+
6+
#include <asm/interrupt.h>
7+
#include <asm/kup.h>
8+
#include <asm/syscall.h>
9+
#include <asm/time.h>
10+
#include <asm/tm.h>
11+
#include <asm/unistd.h>
12+
13+
14+
typedef long (*syscall_fn)(long, long, long, long, long, long);
15+
16+
/* Has to run notrace because it is entered not completely "reconciled" */
17+
notrace long system_call_exception(long r3, long r4, long r5,
18+
long r6, long r7, long r8,
19+
unsigned long r0, struct pt_regs *regs)
20+
{
21+
syscall_fn f;
22+
23+
kuap_lock();
24+
25+
regs->orig_gpr3 = r3;
26+
27+
if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
28+
BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
29+
30+
trace_hardirqs_off(); /* finish reconciling */
31+
32+
CT_WARN_ON(ct_state() == CONTEXT_KERNEL);
33+
user_exit_irqoff();
34+
35+
BUG_ON(regs_is_unrecoverable(regs));
36+
BUG_ON(!(regs->msr & MSR_PR));
37+
BUG_ON(arch_irq_disabled_regs(regs));
38+
39+
#ifdef CONFIG_PPC_PKEY
40+
if (mmu_has_feature(MMU_FTR_PKEY)) {
41+
unsigned long amr, iamr;
42+
bool flush_needed = false;
43+
/*
44+
* When entering from userspace we mostly have the AMR/IAMR
45+
* different from kernel default values. Hence don't compare.
46+
*/
47+
amr = mfspr(SPRN_AMR);
48+
iamr = mfspr(SPRN_IAMR);
49+
regs->amr = amr;
50+
regs->iamr = iamr;
51+
if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) {
52+
mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
53+
flush_needed = true;
54+
}
55+
if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
56+
mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
57+
flush_needed = true;
58+
}
59+
if (flush_needed)
60+
isync();
61+
} else
62+
#endif
63+
kuap_assert_locked();
64+
65+
booke_restore_dbcr0();
66+
67+
account_cpu_user_entry();
68+
69+
account_stolen_time();
70+
71+
/*
72+
* This is not required for the syscall exit path, but makes the
73+
* stack frame look nicer. If this was initialised in the first stack
74+
* frame, or if the unwinder was taught the first stack frame always
75+
* returns to user with IRQS_ENABLED, this store could be avoided!
76+
*/
77+
irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
78+
79+
/*
80+
* If system call is called with TM active, set _TIF_RESTOREALL to
81+
* prevent RFSCV being used to return to userspace, because POWER9
82+
* TM implementation has problems with this instruction returning to
83+
* transactional state. Final register values are not relevant because
84+
* the transaction will be aborted upon return anyway. Or in the case
85+
* of unsupported_scv SIGILL fault, the return state does not much
86+
* matter because it's an edge case.
87+
*/
88+
if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
89+
unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
90+
set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
91+
92+
/*
93+
* If the system call was made with a transaction active, doom it and
94+
* return without performing the system call. Unless it was an
95+
* unsupported scv vector, in which case it's treated like an illegal
96+
* instruction.
97+
*/
98+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
99+
if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
100+
!trap_is_unsupported_scv(regs)) {
101+
/* Enable TM in the kernel, and disable EE (for scv) */
102+
hard_irq_disable();
103+
mtmsr(mfmsr() | MSR_TM);
104+
105+
/* tabort, this dooms the transaction, nothing else */
106+
asm volatile(".long 0x7c00071d | ((%0) << 16)"
107+
:: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
108+
109+
/*
110+
* Userspace will never see the return value. Execution will
111+
* resume after the tbegin. of the aborted transaction with the
112+
* checkpointed register state. A context switch could occur
113+
* or signal delivered to the process before resuming the
114+
* doomed transaction context, but that should all be handled
115+
* as expected.
116+
*/
117+
return -ENOSYS;
118+
}
119+
#endif // CONFIG_PPC_TRANSACTIONAL_MEM
120+
121+
local_irq_enable();
122+
123+
if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
124+
if (unlikely(trap_is_unsupported_scv(regs))) {
125+
/* Unsupported scv vector */
126+
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
127+
return regs->gpr[3];
128+
}
129+
/*
130+
* We use the return value of do_syscall_trace_enter() as the
131+
* syscall number. If the syscall was rejected for any reason
132+
* do_syscall_trace_enter() returns an invalid syscall number
133+
* and the test against NR_syscalls will fail and the return
134+
* value to be used is in regs->gpr[3].
135+
*/
136+
r0 = do_syscall_trace_enter(regs);
137+
if (unlikely(r0 >= NR_syscalls))
138+
return regs->gpr[3];
139+
r3 = regs->gpr[3];
140+
r4 = regs->gpr[4];
141+
r5 = regs->gpr[5];
142+
r6 = regs->gpr[6];
143+
r7 = regs->gpr[7];
144+
r8 = regs->gpr[8];
145+
146+
} else if (unlikely(r0 >= NR_syscalls)) {
147+
if (unlikely(trap_is_unsupported_scv(regs))) {
148+
/* Unsupported scv vector */
149+
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
150+
return regs->gpr[3];
151+
}
152+
return -ENOSYS;
153+
}
154+
155+
/* May be faster to do array_index_nospec? */
156+
barrier_nospec();
157+
158+
if (unlikely(is_compat_task())) {
159+
f = (void *)compat_sys_call_table[r0];
160+
161+
r3 &= 0x00000000ffffffffULL;
162+
r4 &= 0x00000000ffffffffULL;
163+
r5 &= 0x00000000ffffffffULL;
164+
r6 &= 0x00000000ffffffffULL;
165+
r7 &= 0x00000000ffffffffULL;
166+
r8 &= 0x00000000ffffffffULL;
167+
168+
} else {
169+
f = (void *)sys_call_table[r0];
170+
}
171+
172+
return f(r3, r4, r5, r6, r7, r8);
173+
}

0 commit comments

Comments
 (0)