Skip to content

Commit 10742d0

Browse files
namhyungacmel
authored andcommitted
perf record: Implement basic filtering for off-cpu
It should honor cpu and task filtering with -a, -C or -p, -t options. Committer testing: # perf record --off-cpu --cpu 1 perf bench sched messaging -l 1000 # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 1.722 [sec] [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 1.446 MB perf.data (7248 samples) ] # # perf script | head -20 perf 97164 [001] 38287.696761: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97164 [001] 38287.696764: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97164 [001] 38287.696765: 9 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97164 [001] 38287.696767: 212 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97164 [001] 38287.696768: 5130 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97164 [001] 38287.696770: 123063 cycles: ffffffffb6e0011e syscall_return_via_sysret+0x38 (vmlinux) perf 97164 [001] 38287.696803: 2292748 cycles: ffffffffb636c82d __fput+0xad (vmlinux) swapper 0 [001] 38287.702852: 1927474 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) :97513 97513 [001] 38287.767207: 1172536 cycles: ffffffffb612ff65 newidle_balance+0x5 (vmlinux) swapper 0 [001] 38287.769567: 1073081 cycles: ffffffffb618216d ktime_get_mono_fast_ns+0xd (vmlinux) :97533 97533 [001] 38287.770962: 984460 cycles: ffffffffb65b2900 selinux_socket_sendmsg+0x0 (vmlinux) :97540 97540 [001] 38287.772242: 883462 cycles: ffffffffb6d0bf59 irqentry_exit_to_user_mode+0x9 (vmlinux) swapper 0 [001] 38287.773633: 741963 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) :97552 97552 [001] 38287.774539: 606680 cycles: ffffffffb62eda0a page_add_file_rmap+0x7a (vmlinux) :97556 97556 [001] 38287.775333: 502254 cycles: ffffffffb634f964 get_obj_cgroup_from_current+0xc4 (vmlinux) :97561 97561 [001] 38287.776163: 427891 cycles: ffffffffb61b1522 cgroup_rstat_updated+0x22 (vmlinux) swapper 0 [001] 38287.776854: 359030 cycles: ffffffffb612fc5e load_balance+0x9ce (vmlinux) :97567 97567 [001] 38287.777312: 330371 cycles: ffffffffb6a8d8d0 skb_set_owner_w+0x0 (vmlinux) :97566 97566 [001] 38287.777589: 311622 cycles: ffffffffb614a7a8 native_queued_spin_lock_slowpath+0x148 (vmlinux) :97512 97512 [001] 38287.777671: 307851 cycles: ffffffffb62e0f35 find_vma+0x55 (vmlinux) # # perf record --off-cpu --cpu 4 perf bench sched messaging -l 1000 # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 1.613 [sec] [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 1.415 MB perf.data (6729 samples) ] # perf script | head -20 perf 97650 [004] 38323.728036: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97650 [004] 38323.728040: 1 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97650 [004] 38323.728041: 9 cycles: ffffffffb6070174 native_write_msr+0x4 (vmlinux) perf 97650 [004] 38323.728042: 208 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97650 [004] 38323.728044: 5026 cycles: ffffffffb6070176 native_write_msr+0x6 (vmlinux) perf 97650 [004] 38323.728046: 119970 cycles: ffffffffb6d0bebc syscall_exit_to_user_mode+0x1c (vmlinux) perf 97650 [004] 38323.728078: 2190103 cycles: 54b756 perf_tool__process_synth_event+0x16 (/home/acme/bin/perf) swapper 0 [004] 38323.783357: 1593139 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.785352: 1593139 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.797330: 1418936 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.802350: 1418936 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) swapper 0 [004] 38323.806333: 1418936 cycles: ffffffffb6761378 mwait_idle_with_hints.constprop.0+0x48 (vmlinux) :97996 97996 [004] 38323.807145: 1418936 cycles: 7f5db9be6917 [unknown] ([unknown]) :97959 97959 [004] 38323.807730: 1445074 cycles: ffffffffb6329d36 memcg_slab_post_alloc_hook+0x146 (vmlinux) :97959 97959 [004] 38323.808103: 1341584 cycles: ffffffffb62fd90f get_page_from_freelist+0x112f (vmlinux) :97959 97959 [004] 38323.808451: 1227537 cycles: ffffffffb65b2905 selinux_socket_sendmsg+0x5 (vmlinux) :97959 97959 [004] 38323.808768: 1184321 cycles: ffffffffb6d1ba35 _raw_spin_lock_irqsave+0x15 (vmlinux) :97959 97959 [004] 38323.809073: 1153017 cycles: ffffffffb6a8d92d skb_set_owner_w+0x5d (vmlinux) :97959 97959 [004] 38323.809402: 1126875 cycles: ffffffffb6329c64 memcg_slab_post_alloc_hook+0x74 (vmlinux) :97959 97959 [004] 38323.809695: 1073248 cycles: ffffffffb6e0001d entry_SYSCALL_64+0x1d (vmlinux) # Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Blake Jones <blakejones@google.com> Cc: Hao Luo <haoluo@google.com> Cc: Ian Rogers <irogers@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Milian Wolff <milian.wolff@kdab.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <songliubraving@fb.com> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220518224725.742882-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent edc41a1 commit 10742d0

4 files changed

Lines changed: 123 additions & 15 deletions

File tree

tools/perf/builtin-record.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -892,7 +892,7 @@ static int record__config_text_poke(struct evlist *evlist)
892892

893893
static int record__config_off_cpu(struct record *rec)
894894
{
895-
return off_cpu_prepare(rec->evlist);
895+
return off_cpu_prepare(rec->evlist, &rec->opts.target);
896896
}
897897

898898
static bool record__kcore_readable(struct machine *machine)

tools/perf/util/bpf_off_cpu.c

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
#include "util/off_cpu.h"
77
#include "util/perf-hooks.h"
88
#include "util/session.h"
9+
#include "util/target.h"
10+
#include "util/cpumap.h"
11+
#include "util/thread_map.h"
912
#include <bpf/bpf.h>
1013

1114
#include "bpf_skel/off_cpu.skel.h"
@@ -60,8 +63,23 @@ static int off_cpu_config(struct evlist *evlist)
6063
return 0;
6164
}
6265

63-
static void off_cpu_start(void *arg __maybe_unused)
66+
static void off_cpu_start(void *arg)
6467
{
68+
struct evlist *evlist = arg;
69+
70+
/* update task filter for the given workload */
71+
if (!skel->bss->has_cpu && !skel->bss->has_task &&
72+
perf_thread_map__pid(evlist->core.threads, 0) != -1) {
73+
int fd;
74+
u32 pid;
75+
u8 val = 1;
76+
77+
skel->bss->has_task = 1;
78+
fd = bpf_map__fd(skel->maps.task_filter);
79+
pid = perf_thread_map__pid(evlist->core.threads, 0);
80+
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
81+
}
82+
6583
skel->bss->enabled = 1;
6684
}
6785

@@ -71,31 +89,75 @@ static void off_cpu_finish(void *arg __maybe_unused)
7189
off_cpu_bpf__destroy(skel);
7290
}
7391

74-
int off_cpu_prepare(struct evlist *evlist)
92+
int off_cpu_prepare(struct evlist *evlist, struct target *target)
7593
{
76-
int err;
94+
int err, fd, i;
95+
int ncpus = 1, ntasks = 1;
7796

7897
if (off_cpu_config(evlist) < 0) {
7998
pr_err("Failed to config off-cpu BPF event\n");
8099
return -1;
81100
}
82101

83-
set_max_rlimit();
84-
85-
skel = off_cpu_bpf__open_and_load();
102+
skel = off_cpu_bpf__open();
86103
if (!skel) {
87104
pr_err("Failed to open off-cpu BPF skeleton\n");
88105
return -1;
89106
}
90107

108+
/* don't need to set cpu filter for system-wide mode */
109+
if (target->cpu_list) {
110+
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
111+
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
112+
}
113+
114+
if (target__has_task(target)) {
115+
ntasks = perf_thread_map__nr(evlist->core.threads);
116+
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
117+
}
118+
119+
set_max_rlimit();
120+
121+
err = off_cpu_bpf__load(skel);
122+
if (err) {
123+
pr_err("Failed to load off-cpu skeleton\n");
124+
goto out;
125+
}
126+
127+
if (target->cpu_list) {
128+
u32 cpu;
129+
u8 val = 1;
130+
131+
skel->bss->has_cpu = 1;
132+
fd = bpf_map__fd(skel->maps.cpu_filter);
133+
134+
for (i = 0; i < ncpus; i++) {
135+
cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
136+
bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
137+
}
138+
}
139+
140+
if (target__has_task(target)) {
141+
u32 pid;
142+
u8 val = 1;
143+
144+
skel->bss->has_task = 1;
145+
fd = bpf_map__fd(skel->maps.task_filter);
146+
147+
for (i = 0; i < ntasks; i++) {
148+
pid = perf_thread_map__pid(evlist->core.threads, i);
149+
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
150+
}
151+
}
152+
91153
err = off_cpu_bpf__attach(skel);
92154
if (err) {
93155
pr_err("Failed to attach off-cpu BPF skeleton\n");
94156
goto out;
95157
}
96158

97-
if (perf_hooks__set_hook("record_start", off_cpu_start, NULL) ||
98-
perf_hooks__set_hook("record_end", off_cpu_finish, NULL)) {
159+
if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) ||
160+
perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) {
99161
pr_err("Failed to attach off-cpu skeleton\n");
100162
goto out;
101163
}

tools/perf/util/bpf_skel/off_cpu.bpf.c

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,28 @@ struct {
4949
__uint(max_entries, MAX_ENTRIES);
5050
} off_cpu SEC(".maps");
5151

52+
struct {
53+
__uint(type, BPF_MAP_TYPE_HASH);
54+
__uint(key_size, sizeof(__u32));
55+
__uint(value_size, sizeof(__u8));
56+
__uint(max_entries, 1);
57+
} cpu_filter SEC(".maps");
58+
59+
struct {
60+
__uint(type, BPF_MAP_TYPE_HASH);
61+
__uint(key_size, sizeof(__u32));
62+
__uint(value_size, sizeof(__u8));
63+
__uint(max_entries, 1);
64+
} task_filter SEC(".maps");
65+
5266
/* old kernel task_struct definition */
5367
struct task_struct___old {
5468
long state;
5569
} __attribute__((preserve_access_index));
5670

5771
int enabled = 0;
72+
int has_cpu = 0;
73+
int has_task = 0;
5874

5975
/*
6076
* Old kernel used to call it task_struct->state and now it's '__state'.
@@ -74,6 +90,37 @@ static inline int get_task_state(struct task_struct *t)
7490
return BPF_CORE_READ(t_old, state);
7591
}
7692

93+
static inline int can_record(struct task_struct *t, int state)
94+
{
95+
/* kernel threads don't have user stack */
96+
if (t->flags & PF_KTHREAD)
97+
return 0;
98+
99+
if (state != TASK_INTERRUPTIBLE &&
100+
state != TASK_UNINTERRUPTIBLE)
101+
return 0;
102+
103+
if (has_cpu) {
104+
__u32 cpu = bpf_get_smp_processor_id();
105+
__u8 *ok;
106+
107+
ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
108+
if (!ok)
109+
return 0;
110+
}
111+
112+
if (has_task) {
113+
__u8 *ok;
114+
__u32 pid = t->pid;
115+
116+
ok = bpf_map_lookup_elem(&task_filter, &pid);
117+
if (!ok)
118+
return 0;
119+
}
120+
121+
return 1;
122+
}
123+
77124
SEC("tp_btf/sched_switch")
78125
int on_switch(u64 *ctx)
79126
{
@@ -92,10 +139,7 @@ int on_switch(u64 *ctx)
92139

93140
ts = bpf_ktime_get_ns();
94141

95-
if (prev->flags & PF_KTHREAD)
96-
goto next;
97-
if (state != TASK_INTERRUPTIBLE &&
98-
state != TASK_UNINTERRUPTIBLE)
142+
if (!can_record(prev, state))
99143
goto next;
100144

101145
stack_id = bpf_get_stackid(ctx, &stacks,

tools/perf/util/off_cpu.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,17 @@
22
#define PERF_UTIL_OFF_CPU_H
33

44
struct evlist;
5+
struct target;
56
struct perf_session;
67

78
#define OFFCPU_EVENT "offcpu-time"
89

910
#ifdef HAVE_BPF_SKEL
10-
int off_cpu_prepare(struct evlist *evlist);
11+
int off_cpu_prepare(struct evlist *evlist, struct target *target);
1112
int off_cpu_write(struct perf_session *session);
1213
#else
13-
static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused)
14+
static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused,
15+
struct target *target __maybe_unused)
1416
{
1517
return -1;
1618
}

0 commit comments

Comments
 (0)