22#ifndef __LINUX_ENTRYCOMMON_H
33#define __LINUX_ENTRYCOMMON_H
44
5+ #include <linux/audit.h>
56#include <linux/irq-entry-common.h>
67#include <linux/livepatch.h>
78#include <linux/ptrace.h>
4546 SYSCALL_WORK_SYSCALL_EXIT_TRAP | \
4647 ARCH_SYSCALL_WORK_EXIT)
4748
48- long syscall_trace_enter (struct pt_regs * regs , long syscall , unsigned long work );
49+ /**
50+ * arch_ptrace_report_syscall_entry - Architecture specific ptrace_report_syscall_entry() wrapper
51+ *
52+ * Invoked from syscall_trace_enter() to wrap ptrace_report_syscall_entry().
53+ *
54+ * This allows architecture specific ptrace_report_syscall_entry()
55+ * implementations. If not defined by the architecture this falls back to
56+ * to ptrace_report_syscall_entry().
57+ */
58+ static __always_inline int arch_ptrace_report_syscall_entry (struct pt_regs * regs );
59+
60+ #ifndef arch_ptrace_report_syscall_entry
61+ static __always_inline int arch_ptrace_report_syscall_entry (struct pt_regs * regs )
62+ {
63+ return ptrace_report_syscall_entry (regs );
64+ }
65+ #endif
66+
67+ bool syscall_user_dispatch (struct pt_regs * regs );
68+ long trace_syscall_enter (struct pt_regs * regs , long syscall );
69+ void trace_syscall_exit (struct pt_regs * regs , long ret );
70+
71+ static inline void syscall_enter_audit (struct pt_regs * regs , long syscall )
72+ {
73+ if (unlikely (audit_context ())) {
74+ unsigned long args [6 ];
75+
76+ syscall_get_arguments (current , regs , args );
77+ audit_syscall_entry (syscall , args [0 ], args [1 ], args [2 ], args [3 ]);
78+ }
79+ }
80+
81+ static __always_inline long syscall_trace_enter (struct pt_regs * regs , unsigned long work )
82+ {
83+ long syscall , ret = 0 ;
84+
85+ /*
86+ * Handle Syscall User Dispatch. This must comes first, since
87+ * the ABI here can be something that doesn't make sense for
88+ * other syscall_work features.
89+ */
90+ if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH ) {
91+ if (syscall_user_dispatch (regs ))
92+ return -1L ;
93+ }
94+
95+ /*
96+ * User space got a time slice extension granted and relinquishes
97+ * the CPU. The work stops the slice timer to avoid an extra round
98+ * through hrtimer_interrupt().
99+ */
100+ if (work & SYSCALL_WORK_SYSCALL_RSEQ_SLICE )
101+ rseq_syscall_enter_work (syscall_get_nr (current , regs ));
102+
103+ /* Handle ptrace */
104+ if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU )) {
105+ ret = arch_ptrace_report_syscall_entry (regs );
106+ if (ret || (work & SYSCALL_WORK_SYSCALL_EMU ))
107+ return -1L ;
108+ }
109+
110+ /* Do seccomp after ptrace, to catch any tracer changes. */
111+ if (work & SYSCALL_WORK_SECCOMP ) {
112+ ret = __secure_computing ();
113+ if (ret == -1L )
114+ return ret ;
115+ }
116+
117+ /* Either of the above might have changed the syscall number */
118+ syscall = syscall_get_nr (current , regs );
119+
120+ if (unlikely (work & SYSCALL_WORK_SYSCALL_TRACEPOINT ))
121+ syscall = trace_syscall_enter (regs , syscall );
122+
123+ syscall_enter_audit (regs , syscall );
124+
125+ return ret ? : syscall ;
126+ }
49127
50128/**
51129 * syscall_enter_from_user_mode_work - Check and handle work before invoking
@@ -75,7 +153,7 @@ static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *re
75153 unsigned long work = READ_ONCE (current_thread_info ()-> syscall_work );
76154
77155 if (work & SYSCALL_WORK_ENTER )
78- syscall = syscall_trace_enter (regs , syscall , work );
156+ syscall = syscall_trace_enter (regs , work );
79157
80158 return syscall ;
81159}
@@ -112,27 +190,78 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l
112190 return ret ;
113191}
114192
193+ /*
194+ * If SYSCALL_EMU is set, then the only reason to report is when
195+ * SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
196+ * instruction has been already reported in syscall_enter_from_user_mode().
197+ */
198+ static __always_inline bool report_single_step (unsigned long work )
199+ {
200+ if (work & SYSCALL_WORK_SYSCALL_EMU )
201+ return false;
202+
203+ return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP ;
204+ }
205+
206+ /**
207+ * arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit()
208+ *
209+ * This allows architecture specific ptrace_report_syscall_exit()
210+ * implementations. If not defined by the architecture this falls back to
211+ * to ptrace_report_syscall_exit().
212+ */
213+ static __always_inline void arch_ptrace_report_syscall_exit (struct pt_regs * regs ,
214+ int step );
215+
216+ #ifndef arch_ptrace_report_syscall_exit
217+ static __always_inline void arch_ptrace_report_syscall_exit (struct pt_regs * regs ,
218+ int step )
219+ {
220+ ptrace_report_syscall_exit (regs , step );
221+ }
222+ #endif
223+
115224/**
116225 * syscall_exit_work - Handle work before returning to user mode
117226 * @regs: Pointer to current pt_regs
118227 * @work: Current thread syscall work
119228 *
120229 * Do one-time syscall specific work.
121230 */
122- void syscall_exit_work (struct pt_regs * regs , unsigned long work );
231+ static __always_inline void syscall_exit_work (struct pt_regs * regs , unsigned long work )
232+ {
233+ bool step ;
234+
235+ /*
236+ * If the syscall was rolled back due to syscall user dispatching,
237+ * then the tracers below are not invoked for the same reason as
238+ * the entry side was not invoked in syscall_trace_enter(): The ABI
239+ * of these syscalls is unknown.
240+ */
241+ if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH ) {
242+ if (unlikely (current -> syscall_dispatch .on_dispatch )) {
243+ current -> syscall_dispatch .on_dispatch = false;
244+ return ;
245+ }
246+ }
247+
248+ audit_syscall_exit (regs );
249+
250+ if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT )
251+ trace_syscall_exit (regs , syscall_get_return_value (current , regs ));
252+
253+ step = report_single_step (work );
254+ if (step || work & SYSCALL_WORK_SYSCALL_TRACE )
255+ arch_ptrace_report_syscall_exit (regs , step );
256+ }
123257
124258/**
125- * syscall_exit_to_user_mode_work - Handle work before returning to user mode
259+ * syscall_exit_to_user_mode_work - Handle one time work before returning to user mode
126260 * @regs: Pointer to currents pt_regs
127261 *
128- * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
129- * exit_to_user_mode() to perform the final transition to user mode.
262+ * Step 1 of syscall_exit_to_user_mode() with the same calling convention.
130263 *
131- * Calling convention is the same as for syscall_exit_to_user_mode() and it
132- * returns with all work handled and interrupts disabled. The caller must
133- * invoke exit_to_user_mode() before actually switching to user mode to
134- * make the final state transitions. Interrupts must stay disabled between
135- * return from this function and the invocation of exit_to_user_mode().
264+ * The caller must invoke steps 2-3 of syscall_exit_to_user_mode() afterwards.
136265 */
137266static __always_inline void syscall_exit_to_user_mode_work (struct pt_regs * regs )
138267{
@@ -155,15 +284,13 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
155284 */
156285 if (unlikely (work & SYSCALL_WORK_EXIT ))
157286 syscall_exit_work (regs , work );
158- local_irq_disable_exit_to_user ();
159- syscall_exit_to_user_mode_prepare (regs );
160287}
161288
162289/**
163290 * syscall_exit_to_user_mode - Handle work before returning to user mode
164291 * @regs: Pointer to currents pt_regs
165292 *
166- * Invoked with interrupts enabled and fully valid regs. Returns with all
293+ * Invoked with interrupts enabled and fully valid @ regs. Returns with all
167294 * work handled, interrupts disabled such that the caller can immediately
168295 * switch to user mode. Called from architecture specific syscall and ret
169296 * from fork code.
@@ -176,6 +303,7 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
176303 * - ptrace (single stepping)
177304 *
178305 * 2) Preparatory work
306+ * - Disable interrupts
179307 * - Exit to user mode loop (common TIF handling). Invokes
180308 * arch_exit_to_user_mode_work() for architecture specific TIF work
181309 * - Architecture specific one time work arch_exit_to_user_mode_prepare()
@@ -184,14 +312,17 @@ static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs)
184312 * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
185313 * functionality in exit_to_user_mode().
186314 *
187- * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
188- * exit_to_user_mode(). This function is preferred unless there is a
189- * compelling architectural reason to use the separate functions.
315+ * This is a combination of syscall_exit_to_user_mode_work() (1), disabling
316+ * interrupts followed by syscall_exit_to_user_mode_prepare() (2) and
317+ * exit_to_user_mode() (3). This function is preferred unless there is a
318+ * compelling architectural reason to invoke the functions separately.
190319 */
191320static __always_inline void syscall_exit_to_user_mode (struct pt_regs * regs )
192321{
193322 instrumentation_begin ();
194323 syscall_exit_to_user_mode_work (regs );
324+ local_irq_disable_exit_to_user ();
325+ syscall_exit_to_user_mode_prepare (regs );
195326 instrumentation_end ();
196327 exit_to_user_mode ();
197328}
0 commit comments