Skip to content

Commit 4cf4465

Browse files
committed
Merge tag 'sched_ext-for-7.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fixes from Tejun Heo: - Various bug fixes for the example schedulers and selftests * tag 'sched_ext-for-7.0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: tools/sched_ext: fix getopt not re-parsed on restart tools/sched_ext: scx_userland: fix data races on shared counters tools/sched_ext: scx_pair: fix stride == 0 crash on single-CPU systems tools/sched_ext: scx_central: fix CPU_SET and skeleton leak on early exit tools/sched_ext: scx_userland: fix stale data on restart tools/sched_ext: scx_flatcg: fix potential stack overflow from VLA in fcg_read_stats selftests/sched_ext: Fix rt_stall flaky failure tools/sched_ext: scx_userland: fix restart and stats thread lifecycle bugs tools/sched_ext: scx_central: fix sched_setaffinity() call with the set size tools/sched_ext: scx_flatcg: zero-initialize stats counter array
2 parents 8eb604d + 640c9dc commit 4cf4465

8 files changed

Lines changed: 96 additions & 18 deletions

File tree

tools/sched_ext/scx_central.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,13 @@ int main(int argc, char **argv)
5050
__u64 seq = 0, ecode;
5151
__s32 opt;
5252
cpu_set_t *cpuset;
53+
size_t cpuset_size;
5354

5455
libbpf_set_print(libbpf_print_fn);
5556
signal(SIGINT, sigint_handler);
5657
signal(SIGTERM, sigint_handler);
5758
restart:
59+
optind = 1;
5860
skel = SCX_OPS_OPEN(central_ops, scx_central);
5961

6062
skel->rodata->central_cpu = 0;
@@ -73,6 +75,7 @@ int main(int argc, char **argv)
7375
u32 central_cpu = strtoul(optarg, NULL, 0);
7476
if (central_cpu >= skel->rodata->nr_cpu_ids) {
7577
fprintf(stderr, "invalid central CPU id value, %u given (%u max)\n", central_cpu, skel->rodata->nr_cpu_ids);
78+
scx_central__destroy(skel);
7679
return -1;
7780
}
7881
skel->rodata->central_cpu = (s32)central_cpu;
@@ -106,9 +109,10 @@ int main(int argc, char **argv)
106109
*/
107110
cpuset = CPU_ALLOC(skel->rodata->nr_cpu_ids);
108111
SCX_BUG_ON(!cpuset, "Failed to allocate cpuset");
109-
CPU_ZERO_S(CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids), cpuset);
110-
CPU_SET(skel->rodata->central_cpu, cpuset);
111-
SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset),
112+
cpuset_size = CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids);
113+
CPU_ZERO_S(cpuset_size, cpuset);
114+
CPU_SET_S(skel->rodata->central_cpu, cpuset_size, cpuset);
115+
SCX_BUG_ON(sched_setaffinity(0, cpuset_size, cpuset),
112116
"Failed to affinitize to central CPU %d (max %d)",
113117
skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1);
114118
CPU_FREE(cpuset);

tools/sched_ext/scx_cpu0.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ int main(int argc, char **argv)
6969
signal(SIGINT, sigint_handler);
7070
signal(SIGTERM, sigint_handler);
7171
restart:
72+
optind = 1;
7273
skel = SCX_OPS_OPEN(cpu0_ops, scx_cpu0);
7374

7475
skel->rodata->nr_cpus = libbpf_num_possible_cpus();

tools/sched_ext/scx_flatcg.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,21 +102,27 @@ static float read_cpu_util(__u64 *last_sum, __u64 *last_idle)
102102

103103
static void fcg_read_stats(struct scx_flatcg *skel, __u64 *stats)
104104
{
105-
__u64 cnts[FCG_NR_STATS][skel->rodata->nr_cpus];
105+
__u64 *cnts;
106106
__u32 idx;
107107

108+
cnts = calloc(skel->rodata->nr_cpus, sizeof(__u64));
109+
if (!cnts)
110+
return;
111+
108112
memset(stats, 0, sizeof(stats[0]) * FCG_NR_STATS);
109113

110114
for (idx = 0; idx < FCG_NR_STATS; idx++) {
111115
int ret, cpu;
112116

113117
ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats),
114-
&idx, cnts[idx]);
118+
&idx, cnts);
115119
if (ret < 0)
116120
continue;
117121
for (cpu = 0; cpu < skel->rodata->nr_cpus; cpu++)
118-
stats[idx] += cnts[idx][cpu];
122+
stats[idx] += cnts[cpu];
119123
}
124+
125+
free(cnts);
120126
}
121127

122128
int main(int argc, char **argv)
@@ -135,6 +141,7 @@ int main(int argc, char **argv)
135141
signal(SIGINT, sigint_handler);
136142
signal(SIGTERM, sigint_handler);
137143
restart:
144+
optind = 1;
138145
skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
139146

140147
skel->rodata->nr_cpus = libbpf_num_possible_cpus();

tools/sched_ext/scx_pair.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,10 @@ int main(int argc, char **argv)
5353
signal(SIGINT, sigint_handler);
5454
signal(SIGTERM, sigint_handler);
5555
restart:
56+
optind = 1;
5657
skel = SCX_OPS_OPEN(pair_ops, scx_pair);
5758

5859
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
59-
assert(skel->rodata->nr_cpu_ids > 0);
6060
skel->rodata->pair_batch_dur_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
6161

6262
/* pair up the earlier half to the latter by default, override with -s */
@@ -76,6 +76,12 @@ int main(int argc, char **argv)
7676
}
7777
}
7878

79+
/* Stride must be positive to pair distinct CPUs. */
80+
if (stride <= 0) {
81+
fprintf(stderr, "Invalid stride %d, must be positive\n", stride);
82+
scx_pair__destroy(skel);
83+
return -1;
84+
}
7985
bpf_map__set_max_entries(skel->maps.pair_ctx, skel->rodata->nr_cpu_ids / 2);
8086

8187
/* Resize arrays so their element count is equal to cpu count. */

tools/sched_ext/scx_sdt.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ int main(int argc, char **argv)
5151
signal(SIGINT, sigint_handler);
5252
signal(SIGTERM, sigint_handler);
5353
restart:
54+
optind = 1;
5455
skel = SCX_OPS_OPEN(sdt_ops, scx_sdt);
5556

5657
while ((opt = getopt(argc, argv, "fvh")) != -1) {

tools/sched_ext/scx_simple.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ int main(int argc, char **argv)
7171
signal(SIGINT, sigint_handler);
7272
signal(SIGTERM, sigint_handler);
7373
restart:
74+
optind = 1;
7475
skel = SCX_OPS_OPEN(simple_ops, scx_simple);
7576

7677
while ((opt = getopt(argc, argv, "fvh")) != -1) {

tools/sched_ext/scx_userland.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ static bool verbose;
5454
static volatile int exit_req;
5555
static int enqueued_fd, dispatched_fd;
5656

57+
static pthread_t stats_printer;
5758
static struct scx_userland *skel;
5859
static struct bpf_link *ops_link;
5960

@@ -156,9 +157,9 @@ static int dispatch_task(__s32 pid)
156157

157158
err = bpf_map_update_elem(dispatched_fd, NULL, &pid, 0);
158159
if (err) {
159-
nr_vruntime_failed++;
160+
__atomic_add_fetch(&nr_vruntime_failed, 1, __ATOMIC_RELAXED);
160161
} else {
161-
nr_vruntime_dispatches++;
162+
__atomic_add_fetch(&nr_vruntime_dispatches, 1, __ATOMIC_RELAXED);
162163
}
163164

164165
return err;
@@ -201,8 +202,8 @@ static int vruntime_enqueue(const struct scx_userland_enqueued_task *bpf_task)
201202
return ENOENT;
202203

203204
update_enqueued(curr, bpf_task);
204-
nr_vruntime_enqueues++;
205-
nr_curr_enqueued++;
205+
__atomic_add_fetch(&nr_vruntime_enqueues, 1, __ATOMIC_RELAXED);
206+
__atomic_add_fetch(&nr_curr_enqueued, 1, __ATOMIC_RELAXED);
206207

207208
/*
208209
* Enqueue the task in a vruntime-sorted list. A more optimal data
@@ -278,9 +279,9 @@ static void dispatch_batch(void)
278279
LIST_INSERT_HEAD(&vruntime_head, task, entries);
279280
break;
280281
}
281-
nr_curr_enqueued--;
282+
__atomic_sub_fetch(&nr_curr_enqueued, 1, __ATOMIC_RELAXED);
282283
}
283-
skel->bss->nr_scheduled = nr_curr_enqueued;
284+
skel->bss->nr_scheduled = __atomic_load_n(&nr_curr_enqueued, __ATOMIC_RELAXED);
284285
}
285286

286287
static void *run_stats_printer(void *arg)
@@ -305,9 +306,9 @@ static void *run_stats_printer(void *arg)
305306
printf("|-----------------------|\n");
306307
printf("| VRUNTIME / USER |\n");
307308
printf("|-----------------------|\n");
308-
printf("| enq: %10llu |\n", nr_vruntime_enqueues);
309-
printf("| disp: %10llu |\n", nr_vruntime_dispatches);
310-
printf("| failed: %10llu |\n", nr_vruntime_failed);
309+
printf("| enq: %10llu |\n", __atomic_load_n(&nr_vruntime_enqueues, __ATOMIC_RELAXED));
310+
printf("| disp: %10llu |\n", __atomic_load_n(&nr_vruntime_dispatches, __ATOMIC_RELAXED));
311+
printf("| failed: %10llu |\n", __atomic_load_n(&nr_vruntime_failed, __ATOMIC_RELAXED));
311312
printf("o-----------------------o\n");
312313
printf("\n\n");
313314
fflush(stdout);
@@ -319,8 +320,6 @@ static void *run_stats_printer(void *arg)
319320

320321
static int spawn_stats_thread(void)
321322
{
322-
pthread_t stats_printer;
323-
324323
return pthread_create(&stats_printer, NULL, run_stats_printer, NULL);
325324
}
326325

@@ -375,6 +374,15 @@ static void pre_bootstrap(int argc, char **argv)
375374

376375
static void bootstrap(char *comm)
377376
{
377+
exit_req = 0;
378+
min_vruntime = 0.0;
379+
__atomic_store_n(&nr_vruntime_enqueues, 0, __ATOMIC_RELAXED);
380+
__atomic_store_n(&nr_vruntime_dispatches, 0, __ATOMIC_RELAXED);
381+
__atomic_store_n(&nr_vruntime_failed, 0, __ATOMIC_RELAXED);
382+
__atomic_store_n(&nr_curr_enqueued, 0, __ATOMIC_RELAXED);
383+
memset(tasks, 0, pid_max * sizeof(*tasks));
384+
LIST_INIT(&vruntime_head);
385+
378386
skel = SCX_OPS_OPEN(userland_ops, scx_userland);
379387

380388
skel->rodata->num_possible_cpus = libbpf_num_possible_cpus();
@@ -428,6 +436,7 @@ int main(int argc, char **argv)
428436

429437
exit_req = 1;
430438
bpf_link__destroy(ops_link);
439+
pthread_join(stats_printer, NULL);
431440
ecode = UEI_REPORT(skel, uei);
432441
scx_userland__destroy(skel);
433442

tools/testing/selftests/sched_ext/rt_stall.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,30 @@
2323
#define CORE_ID 0 /* CPU to pin tasks to */
2424
#define RUN_TIME 5 /* How long to run the test in seconds */
2525

26+
/* Signal the parent that setup is complete by writing to a pipe */
27+
static void signal_ready(int fd)
28+
{
29+
char c = 1;
30+
31+
if (write(fd, &c, 1) != 1) {
32+
perror("write to ready pipe");
33+
exit(EXIT_FAILURE);
34+
}
35+
close(fd);
36+
}
37+
38+
/* Wait for a child to signal readiness via a pipe */
39+
static void wait_ready(int fd)
40+
{
41+
char c;
42+
43+
if (read(fd, &c, 1) != 1) {
44+
perror("read from ready pipe");
45+
exit(EXIT_FAILURE);
46+
}
47+
close(fd);
48+
}
49+
2650
/* Simple busy-wait function for test tasks */
2751
static void process_func(void)
2852
{
@@ -122,14 +146,24 @@ static bool sched_stress_test(bool is_ext)
122146

123147
float ext_runtime, rt_runtime, actual_ratio;
124148
int ext_pid, rt_pid;
149+
int ext_ready[2], rt_ready[2];
125150

126151
ksft_print_header();
127152
ksft_set_plan(1);
128153

154+
if (pipe(ext_ready) || pipe(rt_ready)) {
155+
perror("pipe");
156+
ksft_exit_fail();
157+
}
158+
129159
/* Create and set up a EXT task */
130160
ext_pid = fork();
131161
if (ext_pid == 0) {
162+
close(ext_ready[0]);
163+
close(rt_ready[0]);
164+
close(rt_ready[1]);
132165
set_affinity(CORE_ID);
166+
signal_ready(ext_ready[1]);
133167
process_func();
134168
exit(0);
135169
} else if (ext_pid < 0) {
@@ -140,15 +174,30 @@ static bool sched_stress_test(bool is_ext)
140174
/* Create an RT task */
141175
rt_pid = fork();
142176
if (rt_pid == 0) {
177+
close(ext_ready[0]);
178+
close(ext_ready[1]);
179+
close(rt_ready[0]);
143180
set_affinity(CORE_ID);
144181
set_sched(SCHED_FIFO, 50);
182+
signal_ready(rt_ready[1]);
145183
process_func();
146184
exit(0);
147185
} else if (rt_pid < 0) {
148186
perror("fork for RT task");
149187
ksft_exit_fail();
150188
}
151189

190+
/*
191+
* Wait for both children to complete their setup (affinity and
192+
* scheduling policy) before starting the measurement window.
193+
* This prevents flaky failures caused by the RT child's setup
194+
* time eating into the measurement period.
195+
*/
196+
close(ext_ready[1]);
197+
close(rt_ready[1]);
198+
wait_ready(ext_ready[0]);
199+
wait_ready(rt_ready[0]);
200+
152201
/* Let the processes run for the specified time */
153202
sleep(RUN_TIME);
154203

0 commit comments

Comments
 (0)