Skip to content

Commit 4af5266

Browse files
committed
Merge branch 'bpf-fix-conditions-when-timer-wq-can-be-called'
Alexei Starovoitov says: ==================== bpf: Fix conditions when timer/wq can be called From: Alexei Starovoitov <ast@kernel.org> v2->v3: - Add missing refcount_put - Detect recursion of indiviual async_cb v2: https://lore.kernel.org/bpf/20260204040834.22263-4-alexei.starovoitov@gmail.com/ v1->v2: - Add a recursion check v1: https://lore.kernel.org/bpf/20260204030927.171-1-alexei.starovoitov@gmail.com/ ==================== Link: https://patch.msgid.link/20260204055147.54960-1-alexei.starovoitov@gmail.com Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
2 parents 5e6e1dc + 6e65cf8 commit 4af5266

3 files changed

Lines changed: 128 additions & 6 deletions

File tree

kernel/bpf/helpers.c

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,10 +1427,22 @@ static int bpf_async_update_prog_callback(struct bpf_async_cb *cb,
14271427
return 0;
14281428
}
14291429

1430+
static DEFINE_PER_CPU(struct bpf_async_cb *, async_cb_running);
1431+
14301432
static int bpf_async_schedule_op(struct bpf_async_cb *cb, enum bpf_async_op op,
14311433
u64 nsec, u32 timer_mode)
14321434
{
1433-
WARN_ON_ONCE(!in_hardirq());
1435+
/*
1436+
* Do not schedule another operation on this cpu if it's in irq_work
1437+
* callback that is processing async_cmds queue. Otherwise the following
1438+
* loop is possible:
1439+
* bpf_timer_start() -> bpf_async_schedule_op() -> irq_work_queue().
1440+
* irqrestore -> bpf_async_irq_worker() -> tracepoint -> bpf_timer_start().
1441+
*/
1442+
if (this_cpu_read(async_cb_running) == cb) {
1443+
bpf_async_refcount_put(cb);
1444+
return -EDEADLK;
1445+
}
14341446

14351447
struct bpf_async_cmd *cmd = kmalloc_nolock(sizeof(*cmd), 0, NUMA_NO_NODE);
14361448

@@ -1473,6 +1485,11 @@ static const struct bpf_func_proto bpf_timer_set_callback_proto = {
14731485
.arg2_type = ARG_PTR_TO_FUNC,
14741486
};
14751487

1488+
static bool defer_timer_wq_op(void)
1489+
{
1490+
return in_hardirq() || irqs_disabled();
1491+
}
1492+
14761493
BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, async, u64, nsecs, u64, flags)
14771494
{
14781495
struct bpf_hrtimer *t;
@@ -1500,7 +1517,7 @@ BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, async, u64, nsecs, u64, fla
15001517
if (!refcount_inc_not_zero(&t->cb.refcnt))
15011518
return -ENOENT;
15021519

1503-
if (!in_hardirq()) {
1520+
if (!defer_timer_wq_op()) {
15041521
hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
15051522
bpf_async_refcount_put(&t->cb);
15061523
return 0;
@@ -1524,7 +1541,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, async)
15241541
bool inc = false;
15251542
int ret = 0;
15261543

1527-
if (in_hardirq())
1544+
if (defer_timer_wq_op())
15281545
return -EOPNOTSUPP;
15291546

15301547
t = READ_ONCE(async->timer);
@@ -1625,13 +1642,15 @@ static void bpf_async_irq_worker(struct irq_work *work)
16251642
return;
16261643

16271644
list = llist_reverse_order(list);
1645+
this_cpu_write(async_cb_running, cb);
16281646
llist_for_each_safe(pos, n, list) {
16291647
struct bpf_async_cmd *cmd;
16301648

16311649
cmd = container_of(pos, struct bpf_async_cmd, node);
16321650
bpf_async_process_op(cb, cmd->op, cmd->nsec, cmd->mode);
16331651
kfree_nolock(cmd);
16341652
}
1653+
this_cpu_write(async_cb_running, NULL);
16351654
}
16361655

16371656
static void bpf_async_cancel_and_free(struct bpf_async_kern *async)
@@ -1650,7 +1669,7 @@ static void bpf_async_cancel_and_free(struct bpf_async_kern *async)
16501669
* refcnt. Either synchronously or asynchronously in irq_work.
16511670
*/
16521671

1653-
if (!in_hardirq()) {
1672+
if (!defer_timer_wq_op()) {
16541673
bpf_async_process_op(cb, BPF_ASYNC_CANCEL, 0, 0);
16551674
} else {
16561675
(void)bpf_async_schedule_op(cb, BPF_ASYNC_CANCEL, 0, 0);
@@ -3161,7 +3180,7 @@ __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags)
31613180
if (!refcount_inc_not_zero(&w->cb.refcnt))
31623181
return -ENOENT;
31633182

3164-
if (!in_hardirq()) {
3183+
if (!defer_timer_wq_op()) {
31653184
schedule_work(&w->work);
31663185
bpf_async_refcount_put(&w->cb);
31673186
return 0;
@@ -4461,7 +4480,7 @@ __bpf_kfunc int bpf_timer_cancel_async(struct bpf_timer *timer)
44614480
if (!refcount_inc_not_zero(&cb->refcnt))
44624481
return -ENOENT;
44634482

4464-
if (!in_hardirq()) {
4483+
if (!defer_timer_wq_op()) {
44654484
struct bpf_hrtimer *t = container_of(cb, struct bpf_hrtimer, cb);
44664485

44674486
ret = hrtimer_try_to_cancel(&t->timer);
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
3+
#include <test_progs.h>
4+
#include "timer_start_deadlock.skel.h"
5+
6+
void test_timer_start_deadlock(void)
7+
{
8+
struct timer_start_deadlock *skel;
9+
int err, prog_fd;
10+
LIBBPF_OPTS(bpf_test_run_opts, opts);
11+
12+
skel = timer_start_deadlock__open_and_load();
13+
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
14+
return;
15+
16+
err = timer_start_deadlock__attach(skel);
17+
if (!ASSERT_OK(err, "skel_attach"))
18+
goto cleanup;
19+
20+
prog_fd = bpf_program__fd(skel->progs.start_timer);
21+
22+
/*
23+
* Run the syscall program that attempts to deadlock.
24+
* If the kernel deadlocks, this call will never return.
25+
*/
26+
err = bpf_prog_test_run_opts(prog_fd, &opts);
27+
ASSERT_OK(err, "prog_test_run");
28+
ASSERT_EQ(opts.retval, 0, "prog_retval");
29+
30+
ASSERT_EQ(skel->bss->tp_called, 1, "tp_called");
31+
cleanup:
32+
timer_start_deadlock__destroy(skel);
33+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
3+
#include <vmlinux.h>
4+
#include <bpf/bpf_helpers.h>
5+
#include <bpf/bpf_tracing.h>
6+
7+
#define CLOCK_MONOTONIC 1
8+
9+
char _license[] SEC("license") = "GPL";
10+
11+
struct elem {
12+
struct bpf_timer timer;
13+
};
14+
15+
struct {
16+
__uint(type, BPF_MAP_TYPE_ARRAY);
17+
__uint(max_entries, 1);
18+
__type(key, int);
19+
__type(value, struct elem);
20+
} timer_map SEC(".maps");
21+
22+
volatile int in_timer_start;
23+
volatile int tp_called;
24+
25+
static int timer_cb(void *map, int *key, struct elem *value)
26+
{
27+
return 0;
28+
}
29+
30+
SEC("tp_btf/hrtimer_cancel")
31+
int BPF_PROG(tp_hrtimer_cancel, struct hrtimer *hrtimer)
32+
{
33+
struct bpf_timer *timer;
34+
int key = 0;
35+
36+
if (!in_timer_start)
37+
return 0;
38+
39+
tp_called = 1;
40+
timer = bpf_map_lookup_elem(&timer_map, &key);
41+
42+
/*
43+
* Call bpf_timer_start() from the tracepoint within hrtimer logic
44+
* on the same timer to make sure it doesn't deadlock.
45+
*/
46+
bpf_timer_start(timer, 1000000000, 0);
47+
return 0;
48+
}
49+
50+
SEC("syscall")
51+
int start_timer(void *ctx)
52+
{
53+
struct bpf_timer *timer;
54+
int key = 0;
55+
56+
timer = bpf_map_lookup_elem(&timer_map, &key);
57+
/* claude may complain here that there is no NULL check. Ignoring it. */
58+
bpf_timer_init(timer, &timer_map, CLOCK_MONOTONIC);
59+
bpf_timer_set_callback(timer, timer_cb);
60+
61+
/*
62+
* call hrtimer_start() twice, so that 2nd call does
63+
* remove_hrtimer() and trace_hrtimer_cancel() tracepoint.
64+
*/
65+
in_timer_start = 1;
66+
bpf_timer_start(timer, 1000000000, 0);
67+
bpf_timer_start(timer, 1000000000, 0);
68+
in_timer_start = 0;
69+
return 0;
70+
}

0 commit comments

Comments
 (0)