Skip to content

Commit 5c2938f

Browse files
captain5050namhyung
authored andcommitted
perf syscalltbl: Remove struct syscalltbl
The syscalltbl held entries of system call name and number pairs, generated from a native syscalltbl at start up. As there are gaps in the system call number there is a notion of index into the table. Going forward we want the system call table to be identifiable by a machine type, for example, i386 vs x86-64. Change the interface to the syscalltbl so (1) a (currently unused machine type of EM_HOST) is passed (2) the index to syscall number and system call name mapping is computed at build time. Two tables are used for this, an array of system call number to name, an array of system call numbers sorted by the system call name. The sorted array doesn't store strings in part to save memory and relocations. The index notion is carried forward and is an index into the sorted array of system call numbers, the data structures are opaque (held only in syscalltbl.c), and so the number of indices for a machine type is exposed as a new API. The arrays are computed in the syscalltbl.sh script and so no start-up time computation and storage is necessary. Signed-off-by: Ian Rogers <irogers@google.com> Reviewed-by: Howard Chu <howardchu95@gmail.com> Reviewed-by: Charlie Jenkins <charlie@rivosinc.com> Reviewed-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Arnaldo Carvalho de Melo <acme@kernel.org> Link: https://lore.kernel.org/r/20250319050741.269828-6-irogers@google.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
1 parent 3d94b84 commit 5c2938f

4 files changed

Lines changed: 117 additions & 160 deletions

File tree

tools/perf/builtin-trace.c

Lines changed: 64 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ enum summary_mode {
149149

150150
struct trace {
151151
struct perf_tool tool;
152-
struct syscalltbl *sctbl;
153152
struct {
154153
/** Sorted sycall numbers used by the trace. */
155154
struct syscall *table;
@@ -188,6 +187,14 @@ struct trace {
188187
pid_t *entries;
189188
struct bpf_map *map;
190189
} filter_pids;
190+
/*
191+
* TODO: The map is from an ID (aka system call number) to struct
192+
* syscall_stats. If there is >1 e_machine, such as i386 and x86-64
193+
* processes, then the stats here will gather wrong the statistics for
194+
* the non EM_HOST system calls. A fix would be to add the e_machine
195+
* into the key, but this would make the code inconsistent with the
196+
* per-thread version.
197+
*/
191198
struct hashmap *syscall_stats;
192199
double duration_filter;
193200
double runtime_ms;
@@ -2141,7 +2148,7 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
21412148
return 0;
21422149
}
21432150

2144-
name = syscalltbl__name(trace->sctbl, sc->id);
2151+
name = syscalltbl__name(sc->e_machine, sc->id);
21452152
if (name == NULL) {
21462153
sc->nonexistent = true;
21472154
return -EEXIST;
@@ -2241,10 +2248,14 @@ static int trace__validate_ev_qualifier(struct trace *trace)
22412248

22422249
strlist__for_each_entry(pos, trace->ev_qualifier) {
22432250
const char *sc = pos->s;
2244-
int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
2251+
/*
2252+
* TODO: Assume more than the validation/warnings are all for
2253+
* the same binary type as perf.
2254+
*/
2255+
int id = syscalltbl__id(EM_HOST, sc), match_next = -1;
22452256

22462257
if (id < 0) {
2247-
id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
2258+
id = syscalltbl__strglobmatch_first(EM_HOST, sc, &match_next);
22482259
if (id >= 0)
22492260
goto matches;
22502261

@@ -2264,7 +2275,7 @@ static int trace__validate_ev_qualifier(struct trace *trace)
22642275
continue;
22652276

22662277
while (1) {
2267-
id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
2278+
id = syscalltbl__strglobmatch_next(EM_HOST, sc, &match_next);
22682279
if (id < 0)
22692280
break;
22702281
if (nr_allocated == nr_used) {
@@ -2722,6 +2733,7 @@ static int trace__sys_enter(struct trace *trace, struct evsel *evsel,
27222733
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
27232734
int augmented_args_size = 0;
27242735
void *augmented_args = NULL;
2736+
/* TODO: get e_machine from thread. */
27252737
struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
27262738
struct thread_trace *ttrace;
27272739

@@ -2796,6 +2808,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel,
27962808
struct thread_trace *ttrace;
27972809
struct thread *thread;
27982810
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2811+
/* TODO: get e_machine from thread. */
27992812
struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
28002813
char msg[1024];
28012814
void *args, *augmented_args = NULL;
@@ -2871,6 +2884,7 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel,
28712884
struct thread *thread;
28722885
int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0;
28732886
int alignment = trace->args_alignment;
2887+
/* TODO: get e_machine from thread. */
28742888
struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
28752889
struct thread_trace *ttrace;
28762890

@@ -3224,6 +3238,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
32243238

32253239
if (evsel == trace->syscalls.events.bpf_output) {
32263240
int id = perf_evsel__sc_tp_uint(evsel, id, sample);
3241+
/* TODO: get e_machine from thread. */
32273242
struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id);
32283243

32293244
if (sc) {
@@ -3731,9 +3746,9 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str
37313746
return trace->skel->progs.syscall_unaugmented;
37323747
}
37333748

3734-
static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
3749+
static void trace__init_syscall_bpf_progs(struct trace *trace, int e_machine, int id)
37353750
{
3736-
struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id);
3751+
struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, id);
37373752

37383753
if (sc == NULL)
37393754
return;
@@ -3742,22 +3757,22 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
37423757
sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit");
37433758
}
37443759

3745-
static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id)
3760+
static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int e_machine, int id)
37463761
{
3747-
struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id);
3762+
struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, id);
37483763
return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
37493764
}
37503765

3751-
static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
3766+
static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int e_machine, int id)
37523767
{
3753-
struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id);
3768+
struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, id);
37543769
return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented);
37553770
}
37563771

3757-
static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array)
3772+
static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int e_machine, int key, unsigned int *beauty_array)
37583773
{
37593774
struct tep_format_field *field;
3760-
struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, key);
3775+
struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, key);
37613776
const struct btf_type *bt;
37623777
char *struct_offset, *tmp, name[32];
37633778
bool can_augment = false;
@@ -3854,8 +3869,8 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
38543869
return NULL;
38553870

38563871
try_to_find_pair:
3857-
for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
3858-
int id = syscalltbl__id_at_idx(trace->sctbl, i);
3872+
for (int i = 0, num_idx = syscalltbl__num_idx(sc.e_machine); i < num_idx; ++i) {
3873+
int id = syscalltbl__id_at_idx(sc.e_machine, i);
38593874
/* calling trace__syscall_info() may invalidate '_sc' */
38603875
struct syscall *pair = trace__syscall_info(trace, NULL, sc.e_machine, id);
38613876
struct bpf_program *pair_prog;
@@ -3941,35 +3956,35 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
39413956
return NULL;
39423957
}
39433958

3944-
static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
3959+
static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_machine)
39453960
{
39463961
int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter);
39473962
int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit);
39483963
int beauty_map_fd = bpf_map__fd(trace->skel->maps.beauty_map_enter);
39493964
int err = 0;
39503965
unsigned int beauty_array[6];
39513966

3952-
for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
3953-
int prog_fd, key = syscalltbl__id_at_idx(trace->sctbl, i);
3967+
for (int i = 0, num_idx = syscalltbl__num_idx(e_machine); i < num_idx; ++i) {
3968+
int prog_fd, key = syscalltbl__id_at_idx(e_machine, i);
39543969

39553970
if (!trace__syscall_enabled(trace, key))
39563971
continue;
39573972

3958-
trace__init_syscall_bpf_progs(trace, key);
3973+
trace__init_syscall_bpf_progs(trace, e_machine, key);
39593974

39603975
// It'll get at least the "!raw_syscalls:unaugmented"
3961-
prog_fd = trace__bpf_prog_sys_enter_fd(trace, key);
3976+
prog_fd = trace__bpf_prog_sys_enter_fd(trace, e_machine, key);
39623977
err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY);
39633978
if (err)
39643979
break;
3965-
prog_fd = trace__bpf_prog_sys_exit_fd(trace, key);
3980+
prog_fd = trace__bpf_prog_sys_exit_fd(trace, e_machine, key);
39663981
err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY);
39673982
if (err)
39683983
break;
39693984

39703985
/* use beauty_map to tell BPF how many bytes to collect, set beauty_map's value here */
39713986
memset(beauty_array, 0, sizeof(beauty_array));
3972-
err = trace__bpf_sys_enter_beauty_map(trace, key, (unsigned int *)beauty_array);
3987+
err = trace__bpf_sys_enter_beauty_map(trace, e_machine, key, (unsigned int *)beauty_array);
39733988
if (err)
39743989
continue;
39753990
err = bpf_map_update_elem(beauty_map_fd, &key, beauty_array, BPF_ANY);
@@ -4005,9 +4020,9 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
40054020
* first and second arg (this one on the raw_syscalls:sys_exit prog
40064021
* array tail call, then that one will be used.
40074022
*/
4008-
for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) {
4009-
int key = syscalltbl__id_at_idx(trace->sctbl, i);
4010-
struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, key);
4023+
for (int i = 0, num_idx = syscalltbl__num_idx(e_machine); i < num_idx; ++i) {
4024+
int key = syscalltbl__id_at_idx(e_machine, i);
4025+
struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, key);
40114026
struct bpf_program *pair_prog;
40124027
int prog_fd;
40134028

@@ -4032,7 +4047,7 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace)
40324047
* Get syscall info again as find usable entry above might
40334048
* modify the syscall table and shuffle it.
40344049
*/
4035-
sc = trace__syscall_info(trace, NULL, EM_HOST, key);
4050+
sc = trace__syscall_info(trace, NULL, e_machine, key);
40364051
sc->bpf_prog.sys_enter = pair_prog;
40374052

40384053
/*
@@ -4457,8 +4472,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
44574472
goto out_error_mem;
44584473

44594474
#ifdef HAVE_BPF_SKEL
4460-
if (trace->skel && trace->skel->progs.sys_enter)
4461-
trace__init_syscalls_bpf_prog_array_maps(trace);
4475+
if (trace->skel && trace->skel->progs.sys_enter) {
4476+
/*
4477+
* TODO: Initialize for all host binary machine types, not just
4478+
* those matching the perf binary.
4479+
*/
4480+
trace__init_syscalls_bpf_prog_array_maps(trace, EM_HOST);
4481+
}
44624482
#endif
44634483

44644484
if (trace->ev_qualifier_ids.nr > 0) {
@@ -4483,7 +4503,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
44834503
* So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is
44844504
* not in use.
44854505
*/
4486-
trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close"));
4506+
/* TODO: support for more than just perf binary machine type close. */
4507+
trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(EM_HOST, "close"));
44874508

44884509
err = trace__expand_filters(trace, &evsel);
44894510
if (err)
@@ -4796,7 +4817,7 @@ static struct syscall_entry *syscall__sort_stats(struct hashmap *syscall_stats)
47964817
return entry;
47974818
}
47984819

4799-
static size_t syscall__dump_stats(struct trace *trace, FILE *fp,
4820+
static size_t syscall__dump_stats(struct trace *trace, int e_machine, FILE *fp,
48004821
struct hashmap *syscall_stats)
48014822
{
48024823
size_t printed = 0;
@@ -4827,7 +4848,7 @@ static size_t syscall__dump_stats(struct trace *trace, FILE *fp,
48274848
pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0;
48284849
avg /= NSEC_PER_MSEC;
48294850

4830-
sc = trace__syscall_info(trace, /*evsel=*/NULL, EM_HOST, entry->syscall);
4851+
sc = trace__syscall_info(trace, /*evsel=*/NULL, e_machine, entry->syscall);
48314852
if (!sc)
48324853
continue;
48334854

@@ -4854,14 +4875,14 @@ static size_t syscall__dump_stats(struct trace *trace, FILE *fp,
48544875
}
48554876

48564877
static size_t thread__dump_stats(struct thread_trace *ttrace,
4857-
struct trace *trace, FILE *fp)
4878+
struct trace *trace, int e_machine, FILE *fp)
48584879
{
4859-
return syscall__dump_stats(trace, fp, ttrace->syscall_stats);
4880+
return syscall__dump_stats(trace, e_machine, fp, ttrace->syscall_stats);
48604881
}
48614882

4862-
static size_t system__dump_stats(struct trace *trace, FILE *fp)
4883+
static size_t system__dump_stats(struct trace *trace, int e_machine, FILE *fp)
48634884
{
4864-
return syscall__dump_stats(trace, fp, trace->syscall_stats);
4885+
return syscall__dump_stats(trace, e_machine, fp, trace->syscall_stats);
48654886
}
48664887

48674888
static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
@@ -4887,7 +4908,8 @@ static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trac
48874908
else if (fputc('\n', fp) != EOF)
48884909
++printed;
48894910

4890-
printed += thread__dump_stats(ttrace, trace, fp);
4911+
/* TODO: get e_machine from thread. */
4912+
printed += thread__dump_stats(ttrace, trace, EM_HOST, fp);
48914913

48924914
return printed;
48934915
}
@@ -4948,7 +4970,8 @@ static size_t trace__fprintf_total_summary(struct trace *trace, FILE *fp)
49484970
else if (fputc('\n', fp) != EOF)
49494971
++printed;
49504972

4951-
printed += system__dump_stats(trace, fp);
4973+
/* TODO: get all system e_machines. */
4974+
printed += system__dump_stats(trace, EM_HOST, fp);
49524975

49534976
return printed;
49544977
}
@@ -5140,8 +5163,9 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
51405163
*sep = '\0';
51415164

51425165
list = 0;
5143-
if (syscalltbl__id(trace->sctbl, s) >= 0 ||
5144-
syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
5166+
/* TODO: support for more than just perf binary machine type syscalls. */
5167+
if (syscalltbl__id(EM_HOST, s) >= 0 ||
5168+
syscalltbl__strglobmatch_first(EM_HOST, s, &idx) >= 0) {
51455169
list = 1;
51465170
goto do_concat;
51475171
}
@@ -5294,7 +5318,6 @@ static void trace__exit(struct trace *trace)
52945318
syscall__exit(&trace->syscalls.table[i]);
52955319
zfree(&trace->syscalls.table);
52965320
}
5297-
syscalltbl__delete(trace->sctbl);
52985321
zfree(&trace->perfconfig_events);
52995322
}
53005323

@@ -5443,9 +5466,8 @@ int cmd_trace(int argc, const char **argv)
54435466
sigaction(SIGCHLD, &sigchld_act, NULL);
54445467

54455468
trace.evlist = evlist__new();
5446-
trace.sctbl = syscalltbl__new();
54475469

5448-
if (trace.evlist == NULL || trace.sctbl == NULL) {
5470+
if (trace.evlist == NULL) {
54495471
pr_err("Not enough memory to run!\n");
54505472
err = -ENOMEM;
54515473
goto out;

tools/perf/scripts/syscalltbl.sh

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -50,37 +50,27 @@ fi
5050
infile="$1"
5151
outfile="$2"
5252

53-
nxt=0
54-
55-
syscall_macro() {
56-
nr="$1"
57-
name="$2"
58-
59-
echo " [$nr] = \"$name\","
60-
}
61-
62-
emit() {
63-
nr="$1"
64-
entry="$2"
65-
66-
syscall_macro "$nr" "$entry"
67-
}
68-
69-
echo "static const char *const syscalltbl[] = {" > $outfile
70-
7153
sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
7254
grep -E "^[0-9]+[[:space:]]+$abis" "$infile" | sort -n > $sorted_table
7355

74-
max_nr=0
56+
echo "static const char *const syscall_num_to_name[] = {" > $outfile
7557
# the params are: nr abi name entry compat
7658
# use _ for intentionally unused variables according to SC2034
7759
while read nr _ name _ _; do
78-
emit "$nr" "$name" >> $outfile
79-
max_nr=$nr
60+
echo " [$nr] = \"$name\"," >> $outfile
8061
done < $sorted_table
62+
echo "};" >> $outfile
8163

82-
rm -f $sorted_table
64+
echo "static const uint16_t syscall_sorted_names[] = {" >> $outfile
8365

66+
# When sorting by name, add a suffix of 0s upto 20 characters so that system
67+
# calls that differ with a numerical suffix don't sort before those
68+
# without. This default behavior of sort differs from that of strcmp used at
69+
# runtime. Use sed to strip the trailing 0s suffix afterwards.
70+
grep -E "^[0-9]+[[:space:]]+$abis" "$infile" | awk '{printf $3; for (i = length($3); i < 20; i++) { printf "0"; }; print " " $1}'| sort | sed 's/\([a-zA-Z1-9]\+\)0\+ \([0-9]\+\)/\1 \2/' > $sorted_table
71+
while read name nr; do
72+
echo " $nr, /* $name */" >> $outfile
73+
done < $sorted_table
8474
echo "};" >> $outfile
8575

86-
echo "#define SYSCALLTBL_MAX_ID ${max_nr}" >> $outfile
76+
rm -f $sorted_table

0 commit comments

Comments
 (0)