Skip to content

Commit c948d9f

Browse files
committed
sched_ext: Add scx_cpu0 example scheduler
Add scx_cpu0, a simple scheduler that queues all tasks to a single DSQ and only dispatches them from CPU0 in FIFO order. This is useful for testing bypass behavior when many tasks are concentrated on a single CPU. If the load balancer doesn't work, bypass mode can trigger task hangs or RCU stalls as the queue is long and there's only one CPU working on it. v2: Check whether task is on CPU0 at enqueue using scx_bpf_task_cpu() instead of nr_cpus_allowed (Andrea Righi). Cc: Dan Schatzberg <schatzberg.dan@gmail.com> Cc: Emil Tsalapatis <etsal@meta.com> Reviewed-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: Tejun Heo <tj@kernel.org>
1 parent 582f700 commit c948d9f

3 files changed

Lines changed: 195 additions & 1 deletion

File tree

tools/sched_ext/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ $(INCLUDE_DIR)/%.bpf.skel.h: $(SCXOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BP
187187

188188
SCX_COMMON_DEPS := include/scx/common.h include/scx/user_exit_info.h | $(BINDIR)
189189

190-
c-sched-targets = scx_simple scx_qmap scx_central scx_flatcg
190+
c-sched-targets = scx_simple scx_cpu0 scx_qmap scx_central scx_flatcg
191191

192192
$(addprefix $(BINDIR)/,$(c-sched-targets)): \
193193
$(BINDIR)/%: \

tools/sched_ext/scx_cpu0.bpf.c

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* A CPU0 scheduler.
4+
*
5+
* This scheduler queues all tasks to a shared DSQ and only dispatches them on
6+
* CPU0 in FIFO order. This is useful for testing bypass behavior when many
7+
* tasks are concentrated on a single CPU. If the load balancer doesn't work,
8+
* bypass mode can trigger task hangs or RCU stalls as the queue is long and
9+
* there's only one CPU working on it.
10+
*
11+
* - Statistics tracking how many tasks are queued to local and CPU0 DSQs.
12+
* - Termination notification for userspace.
13+
*
14+
* Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
15+
* Copyright (c) 2025 Tejun Heo <tj@kernel.org>
16+
*/
17+
#include <scx/common.bpf.h>
18+
19+
char _license[] SEC("license") = "GPL";
20+
21+
const volatile u32 nr_cpus = 32; /* !0 for veristat, set during init */
22+
23+
UEI_DEFINE(uei);
24+
25+
/*
26+
* We create a custom DSQ with ID 0 that we dispatch to and consume from on
27+
* CPU0.
28+
*/
29+
#define DSQ_CPU0 0
30+
31+
struct {
32+
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
33+
__uint(key_size, sizeof(u32));
34+
__uint(value_size, sizeof(u64));
35+
__uint(max_entries, 2); /* [local, cpu0] */
36+
} stats SEC(".maps");
37+
38+
static void stat_inc(u32 idx)
39+
{
40+
u64 *cnt_p = bpf_map_lookup_elem(&stats, &idx);
41+
if (cnt_p)
42+
(*cnt_p)++;
43+
}
44+
45+
s32 BPF_STRUCT_OPS(cpu0_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
46+
{
47+
return 0;
48+
}
49+
50+
void BPF_STRUCT_OPS(cpu0_enqueue, struct task_struct *p, u64 enq_flags)
51+
{
52+
/*
53+
* select_cpu() always picks CPU0. If @p is not on CPU0, it can't run on
54+
* CPU 0. Queue on whichever CPU it's currently only.
55+
*/
56+
if (scx_bpf_task_cpu(p) != 0) {
57+
stat_inc(0); /* count local queueing */
58+
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
59+
return;
60+
}
61+
62+
stat_inc(1); /* count cpu0 queueing */
63+
scx_bpf_dsq_insert(p, DSQ_CPU0, SCX_SLICE_DFL, enq_flags);
64+
}
65+
66+
void BPF_STRUCT_OPS(cpu0_dispatch, s32 cpu, struct task_struct *prev)
67+
{
68+
if (cpu == 0)
69+
scx_bpf_dsq_move_to_local(DSQ_CPU0);
70+
}
71+
72+
s32 BPF_STRUCT_OPS_SLEEPABLE(cpu0_init)
73+
{
74+
return scx_bpf_create_dsq(DSQ_CPU0, -1);
75+
}
76+
77+
void BPF_STRUCT_OPS(cpu0_exit, struct scx_exit_info *ei)
78+
{
79+
UEI_RECORD(uei, ei);
80+
}
81+
82+
SCX_OPS_DEFINE(cpu0_ops,
83+
.select_cpu = (void *)cpu0_select_cpu,
84+
.enqueue = (void *)cpu0_enqueue,
85+
.dispatch = (void *)cpu0_dispatch,
86+
.init = (void *)cpu0_init,
87+
.exit = (void *)cpu0_exit,
88+
.name = "cpu0");

tools/sched_ext/scx_cpu0.c

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (c) 2025 Meta Platforms, Inc. and affiliates.
4+
* Copyright (c) 2025 Tejun Heo <tj@kernel.org>
5+
*/
6+
#include <stdio.h>
7+
#include <unistd.h>
8+
#include <signal.h>
9+
#include <assert.h>
10+
#include <libgen.h>
11+
#include <bpf/bpf.h>
12+
#include <scx/common.h>
13+
#include "scx_cpu0.bpf.skel.h"
14+
15+
const char help_fmt[] =
16+
"A cpu0 sched_ext scheduler.\n"
17+
"\n"
18+
"See the top-level comment in .bpf.c for more details.\n"
19+
"\n"
20+
"Usage: %s [-v]\n"
21+
"\n"
22+
" -v Print libbpf debug messages\n"
23+
" -h Display this help and exit\n";
24+
25+
static bool verbose;
26+
static volatile int exit_req;
27+
28+
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
29+
{
30+
if (level == LIBBPF_DEBUG && !verbose)
31+
return 0;
32+
return vfprintf(stderr, format, args);
33+
}
34+
35+
static void sigint_handler(int sig)
36+
{
37+
exit_req = 1;
38+
}
39+
40+
static void read_stats(struct scx_cpu0 *skel, __u64 *stats)
41+
{
42+
int nr_cpus = libbpf_num_possible_cpus();
43+
assert(nr_cpus > 0);
44+
__u64 cnts[2][nr_cpus];
45+
__u32 idx;
46+
47+
memset(stats, 0, sizeof(stats[0]) * 2);
48+
49+
for (idx = 0; idx < 2; idx++) {
50+
int ret, cpu;
51+
52+
ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats),
53+
&idx, cnts[idx]);
54+
if (ret < 0)
55+
continue;
56+
for (cpu = 0; cpu < nr_cpus; cpu++)
57+
stats[idx] += cnts[idx][cpu];
58+
}
59+
}
60+
61+
int main(int argc, char **argv)
62+
{
63+
struct scx_cpu0 *skel;
64+
struct bpf_link *link;
65+
__u32 opt;
66+
__u64 ecode;
67+
68+
libbpf_set_print(libbpf_print_fn);
69+
signal(SIGINT, sigint_handler);
70+
signal(SIGTERM, sigint_handler);
71+
restart:
72+
skel = SCX_OPS_OPEN(cpu0_ops, scx_cpu0);
73+
74+
skel->rodata->nr_cpus = libbpf_num_possible_cpus();
75+
76+
while ((opt = getopt(argc, argv, "vh")) != -1) {
77+
switch (opt) {
78+
case 'v':
79+
verbose = true;
80+
break;
81+
default:
82+
fprintf(stderr, help_fmt, basename(argv[0]));
83+
return opt != 'h';
84+
}
85+
}
86+
87+
SCX_OPS_LOAD(skel, cpu0_ops, scx_cpu0, uei);
88+
link = SCX_OPS_ATTACH(skel, cpu0_ops, scx_cpu0);
89+
90+
while (!exit_req && !UEI_EXITED(skel, uei)) {
91+
__u64 stats[2];
92+
93+
read_stats(skel, stats);
94+
printf("local=%llu cpu0=%llu\n", stats[0], stats[1]);
95+
fflush(stdout);
96+
sleep(1);
97+
}
98+
99+
bpf_link__destroy(link);
100+
ecode = UEI_REPORT(skel, uei);
101+
scx_cpu0__destroy(skel);
102+
103+
if (UEI_ECODE_RESTART(ecode))
104+
goto restart;
105+
return 0;
106+
}

0 commit comments

Comments
 (0)