Skip to content

Commit 31c9387

Browse files
ruanjinjie-engThomas Gleixner
authored andcommitted
entry: Inline syscall_exit_work() and syscall_trace_enter()
After switching ARM64 to the generic entry code, a syscall_exit_work() appeared as a profiling hotspot because it is not inlined. Inlining both syscall_trace_enter() and syscall_exit_work() provides a performance gain when any of the work items is enabled. With audit enabled this results in a ~4% performance gain for perf bench basic syscall on a kunpeng920 system: | Metric | Baseline | Inlined | Change | | ---------- | ----------- | ----------- | ------ | | Total time | 2.353 [sec] | 2.264 [sec] | ↓3.8% | | usecs/op | 0.235374 | 0.226472 | ↓3.8% | | ops/sec | 4,248,588 | 4,415,554 | ↑3.9% | Small gains can be observed on x86 as well, though the generated code optimizes for the work case, which is counterproductive for high performance scenarios where such entry/exit work is usually avoided. Avoid this by marking the work check in syscall_enter_from_user_mode_work() unlikely, which is what the corresponding check in the exit path does already. [ tglx: Massage changelog and add the unlikely() ] Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> Signed-off-by: Thomas Gleixner <tglx@kernel.org> Link: https://patch.msgid.link/20260128031934.3906955-14-ruanjinjie@huawei.com
1 parent 578b21f commit 31c9387

4 files changed

Lines changed: 102 additions & 99 deletions

File tree

include/linux/entry-common.h

Lines changed: 92 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#ifndef __LINUX_ENTRYCOMMON_H
33
#define __LINUX_ENTRYCOMMON_H
44

5+
#include <linux/audit.h>
56
#include <linux/irq-entry-common.h>
67
#include <linux/livepatch.h>
78
#include <linux/ptrace.h>
@@ -63,7 +64,58 @@ static __always_inline int arch_ptrace_report_syscall_entry(struct pt_regs *regs
6364
}
6465
#endif
6566

66-
long syscall_trace_enter(struct pt_regs *regs, unsigned long work);
67+
bool syscall_user_dispatch(struct pt_regs *regs);
68+
long trace_syscall_enter(struct pt_regs *regs, long syscall);
69+
void trace_syscall_exit(struct pt_regs *regs, long ret);
70+
71+
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
72+
{
73+
if (unlikely(audit_context())) {
74+
unsigned long args[6];
75+
76+
syscall_get_arguments(current, regs, args);
77+
audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
78+
}
79+
}
80+
81+
static __always_inline long syscall_trace_enter(struct pt_regs *regs, unsigned long work)
82+
{
83+
long syscall, ret = 0;
84+
85+
/*
86+
* Handle Syscall User Dispatch. This must comes first, since
87+
* the ABI here can be something that doesn't make sense for
88+
* other syscall_work features.
89+
*/
90+
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
91+
if (syscall_user_dispatch(regs))
92+
return -1L;
93+
}
94+
95+
/* Handle ptrace */
96+
if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
97+
ret = arch_ptrace_report_syscall_entry(regs);
98+
if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
99+
return -1L;
100+
}
101+
102+
/* Do seccomp after ptrace, to catch any tracer changes. */
103+
if (work & SYSCALL_WORK_SECCOMP) {
104+
ret = __secure_computing();
105+
if (ret == -1L)
106+
return ret;
107+
}
108+
109+
/* Either of the above might have changed the syscall number */
110+
syscall = syscall_get_nr(current, regs);
111+
112+
if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT))
113+
syscall = trace_syscall_enter(regs, syscall);
114+
115+
syscall_enter_audit(regs, syscall);
116+
117+
return ret ? : syscall;
118+
}
67119

68120
/**
69121
* syscall_enter_from_user_mode_work - Check and handle work before invoking
@@ -130,6 +182,19 @@ static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, l
130182
return ret;
131183
}
132184

185+
/*
186+
* If SYSCALL_EMU is set, then the only reason to report is when
187+
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
188+
* instruction has been already reported in syscall_enter_from_user_mode().
189+
*/
190+
static __always_inline bool report_single_step(unsigned long work)
191+
{
192+
if (work & SYSCALL_WORK_SYSCALL_EMU)
193+
return false;
194+
195+
return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
196+
}
197+
133198
/**
134199
* arch_ptrace_report_syscall_exit - Architecture specific ptrace_report_syscall_exit()
135200
*
@@ -155,7 +220,32 @@ static __always_inline void arch_ptrace_report_syscall_exit(struct pt_regs *regs
155220
*
156221
* Do one-time syscall specific work.
157222
*/
158-
void syscall_exit_work(struct pt_regs *regs, unsigned long work);
223+
static __always_inline void syscall_exit_work(struct pt_regs *regs, unsigned long work)
224+
{
225+
bool step;
226+
227+
/*
228+
* If the syscall was rolled back due to syscall user dispatching,
229+
* then the tracers below are not invoked for the same reason as
230+
* the entry side was not invoked in syscall_trace_enter(): The ABI
231+
* of these syscalls is unknown.
232+
*/
233+
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
234+
if (unlikely(current->syscall_dispatch.on_dispatch)) {
235+
current->syscall_dispatch.on_dispatch = false;
236+
return;
237+
}
238+
}
239+
240+
audit_syscall_exit(regs);
241+
242+
if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
243+
trace_syscall_exit(regs, syscall_get_return_value(current, regs));
244+
245+
step = report_single_step(work);
246+
if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
247+
arch_ptrace_report_syscall_exit(regs, step);
248+
}
159249

160250
/**
161251
* syscall_exit_to_user_mode_work - Handle one time work before returning to user mode

kernel/entry/common.h

Lines changed: 0 additions & 7 deletions
This file was deleted.

kernel/entry/syscall-common.c

Lines changed: 8 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,103 +1,23 @@
11
// SPDX-License-Identifier: GPL-2.0
22

3-
#include <linux/audit.h>
43
#include <linux/entry-common.h>
5-
#include "common.h"
64

75
#define CREATE_TRACE_POINTS
86
#include <trace/events/syscalls.h>
97

10-
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
11-
{
12-
if (unlikely(audit_context())) {
13-
unsigned long args[6];
14-
15-
syscall_get_arguments(current, regs, args);
16-
audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
17-
}
18-
}
8+
/* Out of line to prevent tracepoint code duplication */
199

20-
long syscall_trace_enter(struct pt_regs *regs, unsigned long work)
10+
long trace_syscall_enter(struct pt_regs *regs, long syscall)
2111
{
22-
long syscall, ret = 0;
23-
12+
trace_sys_enter(regs, syscall);
2413
/*
25-
* Handle Syscall User Dispatch. This must comes first, since
26-
* the ABI here can be something that doesn't make sense for
27-
* other syscall_work features.
14+
* Probes or BPF hooks in the tracepoint may have changed the
15+
* system call number. Reread it.
2816
*/
29-
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
30-
if (syscall_user_dispatch(regs))
31-
return -1L;
32-
}
33-
34-
/* Handle ptrace */
35-
if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) {
36-
ret = arch_ptrace_report_syscall_entry(regs);
37-
if (ret || (work & SYSCALL_WORK_SYSCALL_EMU))
38-
return -1L;
39-
}
40-
41-
/* Do seccomp after ptrace, to catch any tracer changes. */
42-
if (work & SYSCALL_WORK_SECCOMP) {
43-
ret = __secure_computing();
44-
if (ret == -1L)
45-
return ret;
46-
}
47-
48-
/* Either of the above might have changed the syscall number */
49-
syscall = syscall_get_nr(current, regs);
50-
51-
if (unlikely(work & SYSCALL_WORK_SYSCALL_TRACEPOINT)) {
52-
trace_sys_enter(regs, syscall);
53-
/*
54-
* Probes or BPF hooks in the tracepoint may have changed the
55-
* system call number as well.
56-
*/
57-
syscall = syscall_get_nr(current, regs);
58-
}
59-
60-
syscall_enter_audit(regs, syscall);
61-
62-
return ret ? : syscall;
17+
return syscall_get_nr(current, regs);
6318
}
6419

65-
/*
66-
* If SYSCALL_EMU is set, then the only reason to report is when
67-
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
68-
* instruction has been already reported in syscall_enter_from_user_mode().
69-
*/
70-
static inline bool report_single_step(unsigned long work)
20+
void trace_syscall_exit(struct pt_regs *regs, long ret)
7121
{
72-
if (work & SYSCALL_WORK_SYSCALL_EMU)
73-
return false;
74-
75-
return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP;
76-
}
77-
78-
void syscall_exit_work(struct pt_regs *regs, unsigned long work)
79-
{
80-
bool step;
81-
82-
/*
83-
* If the syscall was rolled back due to syscall user dispatching,
84-
* then the tracers below are not invoked for the same reason as
85-
* the entry side was not invoked in syscall_trace_enter(): The ABI
86-
* of these syscalls is unknown.
87-
*/
88-
if (work & SYSCALL_WORK_SYSCALL_USER_DISPATCH) {
89-
if (unlikely(current->syscall_dispatch.on_dispatch)) {
90-
current->syscall_dispatch.on_dispatch = false;
91-
return;
92-
}
93-
}
94-
95-
audit_syscall_exit(regs);
96-
97-
if (work & SYSCALL_WORK_SYSCALL_TRACEPOINT)
98-
trace_sys_exit(regs, syscall_get_return_value(current, regs));
99-
100-
step = report_single_step(work);
101-
if (step || work & SYSCALL_WORK_SYSCALL_TRACE)
102-
arch_ptrace_report_syscall_exit(regs, step);
22+
trace_sys_exit(regs, ret);
10323
}

kernel/entry/syscall_user_dispatch.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
/*
33
* Copyright (C) 2020 Collabora Ltd.
44
*/
5+
6+
#include <linux/entry-common.h>
57
#include <linux/sched.h>
68
#include <linux/prctl.h>
79
#include <linux/ptrace.h>
@@ -15,8 +17,6 @@
1517

1618
#include <asm/syscall.h>
1719

18-
#include "common.h"
19-
2020
static void trigger_sigsys(struct pt_regs *regs)
2121
{
2222
struct kernel_siginfo info;

0 commit comments

Comments
 (0)