Skip to content

Commit 5c4378b

Browse files
author
Thomas Gleixner
committed
Merge branch 'core/entry' into sched/core
Pull the entry update to avoid merge conflicts with the time slice extension changes. Signed-off-by: Thomas Gleixner <tglx@kernel.org>
2 parents 377521a + 31c9387 commit 5c4378b

4 files changed

Lines changed: 158 additions & 122 deletions

File tree

include/linux/entry-common.h

Lines changed: 148 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#ifndef __LINUX_ENTRYCOMMON_H
33
#define __LINUX_ENTRYCOMMON_H
44

5+
#include <linux/audit.h>
56
#include <linux/irq-entry-common.h>
67
#include <linux/livepatch.h>
78
#include <linux/ptrace.h>
@@ -45,7 +46,84 @@
4546
SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
4647
ARCH_SYSCALL_WORK_EXIT)
4748

48-
long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work);
49+
/**
50+
* arch_ptrace_report_syscall_entry - Architecture specific ptrace_report_syscall_entry() wrapper
51+
*
52+
* Invoked from syscall_trace_enter() to wrap ptrace_report_syscall_entry().
53+
*
54+
* This allows architecture specific ptrace_report_syscall_entry()
55+
* implementations. If not defined by the architecture this falls back to
56+
* to ptrace_report_syscall_entry().
57+
*/
58+
static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs);
59+
60+
#ifndef arch_ptrace_report_syscall_entry
61+
static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs)
62+
{
63+
return ptrace_report_syscall_entry(regs);
64+
}
65+
#endif
66+
67+
bool syscall_user_dispatch(struct pt_regs *regs);
68+
long trace_syscall_enter(struct pt_regs *regs, long syscall);
69+
void trace_syscall_exit(struct pt_regs *regs, long ret);
70+
71+
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
72+
{
73+
if (unlikely(audit_context())) {
74+
unsigned long args[6];
75+
76+
syscall_get_arguments(current, regs, args);
77+
audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
78+
}
79+
}
80+
81+
static __always_inline long syscall_trace_enter(struct pt_regs *regs, unsigned long work)
82+
{
83+
long syscall, ret = 0;
84+
85+
/*
86+
* Handle Syscall User Dispatch. This must comes first, since
87+
* the ABI here can be something that doesn't make sense for
88+
* other syscall_work features.
89+
*/
90+
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
91+
if (syscall_user_dispatch(regs))
92+
return -1L;
93+
}
94+
95+
/*
96+
* User space got a time slice extension granted and relinquishes
97+
* the CPU. The work stops the slice timer to avoid an extra round
98+
* through hrtimer_interrupt().
99+
*/
100+
if (work & SYSCALL_WORK_SYSCALL_RSEQ_SLICE)
101+
rseq_syscall_enter_work(syscall_get_nr(current, regs));
102+
103+
/* Handle ptrace */
104+
if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
105+
ret = arch_ptrace_report_syscall_entry(regs);
106+
if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
107+
return -1L;
108+
}
109+
110+
/* Do seccomp after ptrace, to catch any tracer changes. */
111+
if (work & SYSCALL_WORK_SECCOMP) {
112+
ret = __secure_computing();
113+
if (ret == -1L)
114+
return ret;
115+
}
116+
117+
/* Either of the above might have changed the syscall number */
118+
syscall = syscall_get_nr(current, regs);
119+
120+
if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
121+
syscall = trace_syscall_enter(regs, syscall);
122+
123+
syscall_enter_audit(regs, syscall);
124+
125+
return ret ? : syscall;
126+
}
49127

50128
/**
51129
* syscall_enter_from_user_mode_work - Check and handle work before invoking
@@ -75,7 +153,7 @@ static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *re
75153
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
76154

77155
if (work & SYSCALL_WORK_ENTER)
78-
syscall = syscall_trace_enter(regs, syscall, work);
156+
syscall = syscall_trace_enter(regs, work);
79157

80158
return syscall;
81159
}
@@ -112,27 +190,78 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l
112190
return ret;
113191
}
114192

193+
/*
194+
* If SYSCALL_EMU is set, then the only reason to report is when
195+
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
196+
* instruction has been already reported in syscall_enter_from_user_mode().
197+
*/
198+
static __always_inline bool report_single_step(unsigned long work)
199+
{
200+
if (work & SYSCALL_WORK_SYSCALL_EMU)
201+
return false;
202+
203+
return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
204+
}
205+
206+
/**
207+
* arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit()
208+
*
209+
* This allows architecture specific ptrace_report_syscall_exit()
210+
* implementations. If not defined by the architecture this falls back to
211+
* to ptrace_report_syscall_exit().
212+
*/
213+
static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs,
214+
int step);
215+
216+
#ifndef arch_ptrace_report_syscall_exit
217+
static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs,
218+
int step)
219+
{
220+
ptrace_report_syscall_exit(regs, step);
221+
}
222+
#endif
223+
115224
/**
116225
* syscall_exit_work - Handle work before returning to user mode
117226
* @regs: Pointer to current pt_regs
118227
* @work: Current thread syscall work
119228
*
120229
* Do one-time syscall specific work.
121230
*/
122-
void syscall_exit_work(struct pt_regs *regs, unsigned long work);
231+
static __always_inline void syscall_exit_work(struct pt_regs *regs, unsigned long work)
232+
{
233+
bool step;
234+
235+
/*
236+
* If the syscall was rolled back due to syscall user dispatching,
237+
* then the tracers below are not invoked for the same reason as
238+
* the entry side was not invoked in syscall_trace_enter(): The ABI
239+
* of these syscalls is unknown.
240+
*/
241+
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
242+
if (unlikely(current->syscall_dispatch.on_dispatch)) {
243+
current->syscall_dispatch.on_dispatch = false;
244+
return;
245+
}
246+
}
247+
248+
audit_syscall_exit(regs);
249+
250+
if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
251+
trace_syscall_exit(regs, syscall_get_return_value(current, regs));
252+
253+
step = report_single_step(work);
254+
if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
255+
arch_ptrace_report_syscall_exit(regs, step);
256+
}
123257

124258
/**
125-
* syscall_exit_to_user_mode_work - Handle work before returning to user mode
259+
* syscall_exit_to_user_mode_work - Handle one time work before returning to user mode
126260
* @regs: Pointer to currents pt_regs
127261
*
128-
* Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
129-
* exit_to_user_mode() to perform the final transition to user mode.
262+
* Step 1 of syscall_exit_to_user_mode() with the same calling convention.
130263
*
131-
* Calling convention is the same as for syscall_exit_to_user_mode() and it
132-
* returns with all work handled and interrupts disabled. The caller must
133-
* invoke exit_to_user_mode() before actually switching to user mode to
134-
* make the final state transitions. Interrupts must stay disabled between
135-
* return from this function and the invocation of exit_to_user_mode().
264+
* The caller must invoke steps 2-3 of syscall_exit_to_user_mode() afterwards.
136265
*/
137266
static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
138267
{
@@ -155,15 +284,13 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
155284
*/
156285
if (unlikely(work & SYSCALL_WORK_EXIT))
157286
syscall_exit_work(regs, work);
158-
local_irq_disable_exit_to_user();
159-
syscall_exit_to_user_mode_prepare(regs);
160287
}
161288

162289
/**
163290
* syscall_exit_to_user_mode - Handle work before returning to user mode
164291
* @regs: Pointer to currents pt_regs
165292
*
166-
* Invoked with interrupts enabled and fully valid regs. Returns with all
293+
* Invoked with interrupts enabled and fully valid @regs. Returns with all
167294
* work handled, interrupts disabled such that the caller can immediately
168295
* switch to user mode. Called from architecture specific syscall and ret
169296
* from fork code.
@@ -176,6 +303,7 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
176303
* - ptrace (single stepping)
177304
*
178305
* 2) Preparatory work
306+
* - Disable interrupts
179307
* - Exit to user mode loop (common TIF handling). Invokes
180308
* arch_exit_to_user_mode_work() for architecture specific TIF work
181309
* - Architecture specific one time work arch_exit_to_user_mode_prepare()
@@ -184,14 +312,17 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
184312
* 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
185313
* functionality in exit_to_user_mode().
186314
*
187-
* This is a combination of syscall_exit_to_user_mode_work() (1,2) and
188-
* exit_to_user_mode(). This function is preferred unless there is a
189-
* compelling architectural reason to use the separate functions.
315+
* This is a combination of syscall_exit_to_user_mode_work() (1), disabling
316+
* interrupts followed by syscall_exit_to_user_mode_prepare() (2) and
317+
* exit_to_user_mode() (3). This function is preferred unless there is a
318+
* compelling architectural reason to invoke the functions separately.
190319
*/
191320
static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs)
192321
{
193322
instrumentation_begin();
194323
syscall_exit_to_user_mode_work(regs);
324+
local_irq_disable_exit_to_user();
325+
syscall_exit_to_user_mode_prepare(regs);
195326
instrumentation_end();
196327
exit_to_user_mode();
197328
}

kernel/entry/common.h

Lines changed: 0 additions & 7 deletions
This file was deleted.

kernel/entry/syscall-common.c

Lines changed: 8 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,111 +1,23 @@
11
// SPDX-License-Identifier: GPL-2.0
22

3-
#include <linux/audit.h>
43
#include <linux/entry-common.h>
5-
#include "common.h"
64

75
#define CREATE_TRACE_POINTS
86
#include <trace/events/syscalls.h>
97

10-
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
11-
{
12-
if (unlikely(audit_context())) {
13-
unsigned long args[6];
14-
15-
syscall_get_arguments(current, regs, args);
16-
audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
17-
}
18-
}
8+
/* Out of line to prevent tracepoint code duplication */
199

20-
long syscall_trace_enter(struct pt_regs *regs, long syscall, unsigned long work)
10+
long trace_syscall_enter(struct pt_regs *regs, long syscall)
2111
{
22-
long ret = 0;
23-
24-
/*
25-
* Handle Syscall User Dispatch. This must comes first, since
26-
* the ABI here can be something that doesn't make sense for
27-
* other syscall_work features.
28-
*/
29-
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
30-
if (syscall_user_dispatch(regs))
31-
return -1L;
32-
}
33-
12+
trace_sys_enter(regs, syscall);
3413
/*
35-
* User space got a time slice extension granted and relinquishes
36-
* the CPU. The work stops the slice timer to avoid an extra round
37-
* through hrtimer_interrupt().
14+
* Probes or BPF hooks in the tracepoint may have changed the
15+
* system call number. Reread it.
3816
*/
39-
if (work & SYSCALL_WORK_SYSCALL_RSEQ_SLICE)
40-
rseq_syscall_enter_work(syscall);
41-
42-
/* Handle ptrace */
43-
if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
44-
ret = ptrace_report_syscall_entry(regs);
45-
if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
46-
return -1L;
47-
}
48-
49-
/* Do seccomp after ptrace, to catch any tracer changes. */
50-
if (work & SYSCALL_WORK_SECCOMP) {
51-
ret = __secure_computing();
52-
if (ret == -1L)
53-
return ret;
54-
}
55-
56-
/* Either of the above might have changed the syscall number */
57-
syscall = syscall_get_nr(current, regs);
58-
59-
if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) {
60-
trace_sys_enter(regs, syscall);
61-
/*
62-
* Probes or BPF hooks in the tracepoint may have changed the
63-
* system call number as well.
64-
*/
65-
syscall = syscall_get_nr(current, regs);
66-
}
67-
68-
syscall_enter_audit(regs, syscall);
69-
70-
return ret ? : syscall;
71-
}
72-
73-
/*
74-
* If SYSCALL_EMU is set, then the only reason to report is when
75-
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
76-
* instruction has been already reported in syscall_enter_from_user_mode().
77-
*/
78-
static inline bool report_single_step(unsigned long work)
79-
{
80-
if (work & SYSCALL_WORK_SYSCALL_EMU)
81-
return false;
82-
83-
return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
17+
return syscall_get_nr(current, regs);
8418
}
8519

86-
void syscall_exit_work(struct pt_regs *regs, unsigned long work)
20+
void trace_syscall_exit(struct pt_regs *regs, long ret)
8721
{
88-
bool step;
89-
90-
/*
91-
* If the syscall was rolled back due to syscall user dispatching,
92-
* then the tracers below are not invoked for the same reason as
93-
* the entry side was not invoked in syscall_trace_enter(): The ABI
94-
* of these syscalls is unknown.
95-
*/
96-
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
97-
if (unlikely(current->syscall_dispatch.on_dispatch)) {
98-
current->syscall_dispatch.on_dispatch = false;
99-
return;
100-
}
101-
}
102-
103-
audit_syscall_exit(regs);
104-
105-
if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
106-
trace_sys_exit(regs, syscall_get_return_value(current, regs));
107-
108-
step = report_single_step(work);
109-
if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
110-
ptrace_report_syscall_exit(regs, step);
22+
trace_sys_exit(regs, ret);
11123
}

kernel/entry/syscall_user_dispatch.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
/*
33
* Copyright (C) 2020 Collabora Ltd.
44
*/
5+
6+
#include <linux/entry-common.h>
57
#include <linux/sched.h>
68
#include <linux/prctl.h>
79
#include <linux/ptrace.h>
@@ -15,8 +17,6 @@
1517

1618
#include <asm/syscall.h>
1719

18-
#include "common.h"
19-
2020
static void trigger_sigsys(struct pt_regs *regs)
2121
{
2222
struct kernel_siginfo info;

0 commit comments

Comments
 (0)