Skip to content

Commit a69040e

Browse files
committed
sched_ext: Simplify breather mechanism with scx_aborting flag
The breather mechanism was introduced in 62dcbab ("sched_ext: Avoid live-locking bypass mode switching") and e32c260 ("sched_ext: Enable the ops breather and eject BPF scheduler on softlockup") to prevent live-locks by injecting delays when CPUs are trapped in dispatch paths. Currently, it uses scx_breather_depth (atomic_t) and scx_in_softlockup (unsigned long) with separate increment/decrement and cleanup operations. The breather is only activated when aborting, so tie it directly to the exit mechanism. Replace both variables with scx_aborting flag set when exit is claimed and cleared after bypass is enabled. Introduce scx_claim_exit() to consolidate exit_kind claiming and breather enablement. This eliminates scx_clear_softlockup() and simplifies scx_softlockup() and scx_bypass(). The breather mechanism will be replaced by a different abort mechanism in a future patch. This simplification prepares for that change. Reviewed-by: Dan Schatzberg <schatzberg.dan@gmail.com> Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com> Acked-by: Andrea Righi <arighi@nvidia.com> Signed-off-by: Tejun Heo <tj@kernel.org>
1 parent 61debc2 commit a69040e

1 file changed

Lines changed: 25 additions & 29 deletions

File tree

kernel/sched/ext.c

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,8 @@ static DEFINE_MUTEX(scx_enable_mutex);
3333
DEFINE_STATIC_KEY_FALSE(__scx_enabled);
3434
DEFINE_STATIC_PERCPU_RWSEM(scx_fork_rwsem);
3535
static atomic_t scx_enable_state_var = ATOMIC_INIT(SCX_DISABLED);
36-
static unsigned long scx_in_softlockup;
37-
static atomic_t scx_breather_depth = ATOMIC_INIT(0);
3836
static int scx_bypass_depth;
37+
static bool scx_aborting;
3938
static bool scx_init_task_enabled;
4039
static bool scx_switching_all;
4140
DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
@@ -1831,7 +1830,7 @@ static void scx_breather(struct rq *rq)
18311830

18321831
lockdep_assert_rq_held(rq);
18331832

1834-
if (likely(!atomic_read(&scx_breather_depth)))
1833+
if (likely(!READ_ONCE(scx_aborting)))
18351834
return;
18361835

18371836
raw_spin_rq_unlock(rq);
@@ -1840,9 +1839,9 @@ static void scx_breather(struct rq *rq)
18401839

18411840
do {
18421841
int cnt = 1024;
1843-
while (atomic_read(&scx_breather_depth) && --cnt)
1842+
while (READ_ONCE(scx_aborting) && --cnt)
18441843
cpu_relax();
1845-
} while (atomic_read(&scx_breather_depth) &&
1844+
} while (READ_ONCE(scx_aborting) &&
18461845
time_before64(ktime_get_ns(), until));
18471846

18481847
raw_spin_rq_lock(rq);
@@ -3741,30 +3740,14 @@ void scx_softlockup(u32 dur_s)
37413740
goto out_unlock;
37423741
}
37433742

3744-
/* allow only one instance, cleared at the end of scx_bypass() */
3745-
if (test_and_set_bit(0, &scx_in_softlockup))
3746-
goto out_unlock;
3747-
37483743
printk_deferred(KERN_ERR "sched_ext: Soft lockup - CPU%d stuck for %us, disabling \"%s\"\n",
37493744
smp_processor_id(), dur_s, scx_root->ops.name);
37503745

3751-
/*
3752-
* Some CPUs may be trapped in the dispatch paths. Enable breather
3753-
* immediately; otherwise, we might even be able to get to scx_bypass().
3754-
*/
3755-
atomic_inc(&scx_breather_depth);
3756-
37573746
scx_error(sch, "soft lockup - CPU#%d stuck for %us", smp_processor_id(), dur_s);
37583747
out_unlock:
37593748
rcu_read_unlock();
37603749
}
37613750

3762-
static void scx_clear_softlockup(void)
3763-
{
3764-
if (test_and_clear_bit(0, &scx_in_softlockup))
3765-
atomic_dec(&scx_breather_depth);
3766-
}
3767-
37683751
/**
37693752
* scx_bypass - [Un]bypass scx_ops and guarantee forward progress
37703753
* @bypass: true for bypass, false for unbypass
@@ -3827,8 +3810,6 @@ static void scx_bypass(bool bypass)
38273810
ktime_get_ns() - bypass_timestamp);
38283811
}
38293812

3830-
atomic_inc(&scx_breather_depth);
3831-
38323813
/*
38333814
* No task property is changing. We just need to make sure all currently
38343815
* queued tasks are re-queued according to the new scx_rq_bypassing()
@@ -3884,10 +3865,8 @@ static void scx_bypass(bool bypass)
38843865
raw_spin_rq_unlock(rq);
38853866
}
38863867

3887-
atomic_dec(&scx_breather_depth);
38883868
unlock:
38893869
raw_spin_unlock_irqrestore(&bypass_lock, flags);
3890-
scx_clear_softlockup();
38913870
}
38923871

38933872
static void free_exit_info(struct scx_exit_info *ei)
@@ -3982,6 +3961,7 @@ static void scx_disable_workfn(struct kthread_work *work)
39823961

39833962
/* guarantee forward progress by bypassing scx_ops */
39843963
scx_bypass(true);
3964+
WRITE_ONCE(scx_aborting, false);
39853965

39863966
switch (scx_set_enable_state(SCX_DISABLING)) {
39873967
case SCX_DISABLING:
@@ -4104,9 +4084,24 @@ static void scx_disable_workfn(struct kthread_work *work)
41044084
scx_bypass(false);
41054085
}
41064086

4107-
static void scx_disable(enum scx_exit_kind kind)
4087+
static bool scx_claim_exit(struct scx_sched *sch, enum scx_exit_kind kind)
41084088
{
41094089
int none = SCX_EXIT_NONE;
4090+
4091+
if (!atomic_try_cmpxchg(&sch->exit_kind, &none, kind))
4092+
return false;
4093+
4094+
/*
4095+
* Some CPUs may be trapped in the dispatch paths. Enable breather
4096+
* immediately; otherwise, we might not even be able to get to
4097+
* scx_bypass().
4098+
*/
4099+
WRITE_ONCE(scx_aborting, true);
4100+
return true;
4101+
}
4102+
4103+
static void scx_disable(enum scx_exit_kind kind)
4104+
{
41104105
struct scx_sched *sch;
41114106

41124107
if (WARN_ON_ONCE(kind == SCX_EXIT_NONE || kind == SCX_EXIT_DONE))
@@ -4115,7 +4110,7 @@ static void scx_disable(enum scx_exit_kind kind)
41154110
rcu_read_lock();
41164111
sch = rcu_dereference(scx_root);
41174112
if (sch) {
4118-
atomic_try_cmpxchg(&sch->exit_kind, &none, kind);
4113+
scx_claim_exit(sch, kind);
41194114
kthread_queue_work(sch->helper, &sch->disable_work);
41204115
}
41214116
rcu_read_unlock();
@@ -4436,9 +4431,8 @@ static void scx_vexit(struct scx_sched *sch,
44364431
const char *fmt, va_list args)
44374432
{
44384433
struct scx_exit_info *ei = sch->exit_info;
4439-
int none = SCX_EXIT_NONE;
44404434

4441-
if (!atomic_try_cmpxchg(&sch->exit_kind, &none, kind))
4435+
if (!scx_claim_exit(sch, kind))
44424436
return;
44434437

44444438
ei->exit_code = exit_code;
@@ -4654,6 +4648,8 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
46544648
*/
46554649
WARN_ON_ONCE(scx_set_enable_state(SCX_ENABLING) != SCX_DISABLED);
46564650
WARN_ON_ONCE(scx_root);
4651+
if (WARN_ON_ONCE(READ_ONCE(scx_aborting)))
4652+
WRITE_ONCE(scx_aborting, false);
46574653

46584654
atomic_long_set(&scx_nr_rejected, 0);
46594655

0 commit comments

Comments
 (0)