Skip to content

Commit 837c818

Browse files
committed
Merge tag 'sched-urgent-2026-01-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: "Misc deadline scheduler fixes, mainly for a new category of bugs that were discovered and fixed recently: - Fix a race condition in the DL server - Fix a DL server bug which can result in incorrectly going idle when there's work available - Fix DL server bug which triggers a WARN() due to broken get_prio_dl() logic and subsequent misbehavior - Fix double update_rq_clock() calls - Fix setscheduler() assumption about static priorities - Make sure balancing callbacks are always called - Plus a handful of preparatory commits for the fixes" * tag 'sched-urgent-2026-01-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/deadline: Use ENQUEUE_MOVE to allow priority change sched: Deadline has dynamic priority sched: Audit MOVE vs balance_callbacks sched: Fold rq-pin swizzle into __balance_callbacks() sched/deadline: Avoid double update_rq_clock() sched/deadline: Ensure get_prio_dl() is up-to-date sched/deadline: Fix server stopping with runnable tasks sched: Provide idle_rq() helper sched/deadline: Fix potential race in dl_add_task_root_domain() sched/deadline: Remove unnecessary comment in dl_add_task_root_domain()
2 parents cee4757 + 627cc25 commit 837c818

6 files changed

Lines changed: 59 additions & 56 deletions

File tree

include/linux/sched.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1874,7 +1874,6 @@ static inline int task_nice(const struct task_struct *p)
18741874
extern int can_nice(const struct task_struct *p, const int nice);
18751875
extern int task_curr(const struct task_struct *p);
18761876
extern int idle_cpu(int cpu);
1877-
extern int available_idle_cpu(int cpu);
18781877
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
18791878
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
18801879
extern void sched_set_fifo(struct task_struct *p);

kernel/sched/core.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4950,9 +4950,13 @@ struct balance_callback *splice_balance_callbacks(struct rq *rq)
49504950
return __splice_balance_callbacks(rq, true);
49514951
}
49524952

4953-
static void __balance_callbacks(struct rq *rq)
4953+
void __balance_callbacks(struct rq *rq, struct rq_flags *rf)
49544954
{
4955+
if (rf)
4956+
rq_unpin_lock(rq, rf);
49554957
do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
4958+
if (rf)
4959+
rq_repin_lock(rq, rf);
49564960
}
49574961

49584962
void balance_callbacks(struct rq *rq, struct balance_callback *head)
@@ -4991,7 +4995,7 @@ static inline void finish_lock_switch(struct rq *rq)
49914995
* prev into current:
49924996
*/
49934997
spin_acquire(&__rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_);
4994-
__balance_callbacks(rq);
4998+
__balance_callbacks(rq, NULL);
49954999
raw_spin_rq_unlock_irq(rq);
49965000
}
49975001

@@ -6867,7 +6871,7 @@ static void __sched notrace __schedule(int sched_mode)
68676871
proxy_tag_curr(rq, next);
68686872

68696873
rq_unpin_lock(rq, &rf);
6870-
__balance_callbacks(rq);
6874+
__balance_callbacks(rq, NULL);
68716875
raw_spin_rq_unlock_irq(rq);
68726876
}
68736877
trace_sched_exit_tp(is_switch);
@@ -7316,7 +7320,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
73167320
trace_sched_pi_setprio(p, pi_task);
73177321
oldprio = p->prio;
73187322

7319-
if (oldprio == prio)
7323+
if (oldprio == prio && !dl_prio(prio))
73207324
queue_flag &= ~DEQUEUE_MOVE;
73217325

73227326
prev_class = p->sched_class;
@@ -7362,9 +7366,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
73627366
out_unlock:
73637367
/* Caller holds task_struct::pi_lock, IRQs are still disabled */
73647368

7365-
rq_unpin_lock(rq, &rf);
7366-
__balance_callbacks(rq);
7367-
rq_repin_lock(rq, &rf);
7369+
__balance_callbacks(rq, &rf);
73687370
__task_rq_unlock(rq, p, &rf);
73697371
}
73707372
#endif /* CONFIG_RT_MUTEXES */
@@ -9124,6 +9126,8 @@ void sched_move_task(struct task_struct *tsk, bool for_autogroup)
91249126

91259127
if (resched)
91269128
resched_curr(rq);
9129+
9130+
__balance_callbacks(rq, &rq_guard.rf);
91279131
}
91289132

91299133
static struct cgroup_subsys_state *

kernel/sched/deadline.c

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -752,8 +752,6 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
752752
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
753753
struct rq *rq = rq_of_dl_rq(dl_rq);
754754

755-
update_rq_clock(rq);
756-
757755
WARN_ON(is_dl_boosted(dl_se));
758756
WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
759757

@@ -1420,7 +1418,7 @@ update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, int
14201418

14211419
static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
14221420
{
1423-
bool idle = rq->curr == rq->idle;
1421+
bool idle = idle_rq(rq);
14241422
s64 scaled_delta_exec;
14251423

14261424
if (unlikely(delta_exec <= 0)) {
@@ -1603,8 +1601,8 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
16031601
* | 8 | B:zero_laxity-wait | | |
16041602
* | | | <---+ |
16051603
* | +--------------------------------+ |
1606-
* | | ^ ^ 2 |
1607-
* | | 7 | 2 +--------------------+
1604+
* | | ^ ^ 2 |
1605+
* | | 7 | 2, 1 +----------------+
16081606
* | v |
16091607
* | +-------------+ |
16101608
* +-- | C:idle-wait | -+
@@ -1649,8 +1647,11 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
16491647
* dl_defer_idle = 0
16501648
*
16511649
*
1652-
* [1] A->B, A->D
1650+
* [1] A->B, A->D, C->B
16531651
* dl_server_start()
1652+
* dl_defer_idle = 0;
1653+
* if (dl_server_active)
1654+
* return; // [B]
16541655
* dl_server_active = 1;
16551656
* enqueue_dl_entity()
16561657
* update_dl_entity(WAKEUP)
@@ -1759,6 +1760,7 @@ void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
17591760
* "B:zero_laxity-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"]
17601761
* "B:zero_laxity-wait" -> "D:running" [label="3:dl_server_timer"]
17611762
* "C:idle-wait" -> "A:init" [label="8:dl_server_timer"]
1763+
* "C:idle-wait" -> "B:zero_laxity-wait" [label="1:dl_server_start"]
17621764
* "C:idle-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"]
17631765
* "C:idle-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"]
17641766
* "D:running" -> "A:init" [label="4:pick_task_dl"]
@@ -1784,6 +1786,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
17841786
{
17851787
struct rq *rq = dl_se->rq;
17861788

1789+
dl_se->dl_defer_idle = 0;
17871790
if (!dl_server(dl_se) || dl_se->dl_server_active)
17881791
return;
17891792

@@ -1834,6 +1837,7 @@ void sched_init_dl_servers(void)
18341837
rq = cpu_rq(cpu);
18351838

18361839
guard(rq_lock_irq)(rq);
1840+
update_rq_clock(rq);
18371841

18381842
dl_se = &rq->fair_server;
18391843

@@ -2210,7 +2214,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
22102214
update_dl_entity(dl_se);
22112215
} else if (flags & ENQUEUE_REPLENISH) {
22122216
replenish_dl_entity(dl_se);
2213-
} else if ((flags & ENQUEUE_RESTORE) &&
2217+
} else if ((flags & ENQUEUE_MOVE) &&
22142218
!is_dl_boosted(dl_se) &&
22152219
dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_se(dl_se)))) {
22162220
setup_new_dl_entity(dl_se);
@@ -3154,28 +3158,20 @@ void dl_add_task_root_domain(struct task_struct *p)
31543158
struct rq *rq;
31553159
struct dl_bw *dl_b;
31563160
unsigned int cpu;
3157-
struct cpumask *msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
3161+
struct cpumask *msk;
31583162

31593163
raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
31603164
if (!dl_task(p) || dl_entity_is_special(&p->dl)) {
31613165
raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
31623166
return;
31633167
}
31643168

3165-
/*
3166-
* Get an active rq, whose rq->rd traces the correct root
3167-
* domain.
3168-
* Ideally this would be under cpuset reader lock until rq->rd is
3169-
* fetched. However, sleepable locks cannot nest inside pi_lock, so we
3170-
* rely on the caller of dl_add_task_root_domain() holds 'cpuset_mutex'
3171-
* to guarantee the CPU stays in the cpuset.
3172-
*/
3169+
msk = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
31733170
dl_get_task_effective_cpus(p, msk);
31743171
cpu = cpumask_first_and(cpu_active_mask, msk);
31753172
BUG_ON(cpu >= nr_cpu_ids);
31763173
rq = cpu_rq(cpu);
31773174
dl_b = &rq->rd->dl_bw;
3178-
/* End of fetching rd */
31793175

31803176
raw_spin_lock(&dl_b->lock);
31813177
__dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
@@ -3299,6 +3295,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
32993295

33003296
static u64 get_prio_dl(struct rq *rq, struct task_struct *p)
33013297
{
3298+
/*
3299+
* Make sure to update current so we don't return a stale value.
3300+
*/
3301+
if (task_current_donor(rq, p))
3302+
update_curr_dl(rq);
3303+
33023304
return p->dl.deadline;
33033305
}
33043306

kernel/sched/ext.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,7 @@ static void scx_task_iter_start(struct scx_task_iter *iter)
545545
static void __scx_task_iter_rq_unlock(struct scx_task_iter *iter)
546546
{
547547
if (iter->locked_task) {
548+
__balance_callbacks(iter->rq, &iter->rf);
548549
task_rq_unlock(iter->rq, iter->locked_task, &iter->rf);
549550
iter->locked_task = NULL;
550551
}

kernel/sched/sched.h

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1364,6 +1364,28 @@ static inline u32 sched_rng(void)
13641364
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
13651365
#define raw_rq() raw_cpu_ptr(&runqueues)
13661366

1367+
static inline bool idle_rq(struct rq *rq)
1368+
{
1369+
return rq->curr == rq->idle && !rq->nr_running && !rq->ttwu_pending;
1370+
}
1371+
1372+
/**
1373+
* available_idle_cpu - is a given CPU idle for enqueuing work.
1374+
* @cpu: the CPU in question.
1375+
*
1376+
* Return: 1 if the CPU is currently idle. 0 otherwise.
1377+
*/
1378+
static inline bool available_idle_cpu(int cpu)
1379+
{
1380+
if (!idle_rq(cpu_rq(cpu)))
1381+
return 0;
1382+
1383+
if (vcpu_is_preempted(cpu))
1384+
return 0;
1385+
1386+
return 1;
1387+
}
1388+
13671389
#ifdef CONFIG_SCHED_PROXY_EXEC
13681390
static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
13691391
{
@@ -2366,7 +2388,8 @@ extern const u32 sched_prio_to_wmult[40];
23662388
* should preserve as much state as possible.
23672389
*
23682390
* MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
2369-
* in the runqueue.
2391+
* in the runqueue. IOW the priority is allowed to change. Callers
2392+
* must expect to deal with balance callbacks.
23702393
*
23712394
* NOCLOCK - skip the update_rq_clock() (avoids double updates)
23722395
*
@@ -3947,6 +3970,8 @@ extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
39473970
extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);
39483971

39493972
extern struct balance_callback *splice_balance_callbacks(struct rq *rq);
3973+
3974+
extern void __balance_callbacks(struct rq *rq, struct rq_flags *rf);
39503975
extern void balance_callbacks(struct rq *rq, struct balance_callback *head);
39513976

39523977
/*

kernel/sched/syscalls.c

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -180,35 +180,7 @@ int task_prio(const struct task_struct *p)
180180
*/
181181
int idle_cpu(int cpu)
182182
{
183-
struct rq *rq = cpu_rq(cpu);
184-
185-
if (rq->curr != rq->idle)
186-
return 0;
187-
188-
if (rq->nr_running)
189-
return 0;
190-
191-
if (rq->ttwu_pending)
192-
return 0;
193-
194-
return 1;
195-
}
196-
197-
/**
198-
* available_idle_cpu - is a given CPU idle for enqueuing work.
199-
* @cpu: the CPU in question.
200-
*
201-
* Return: 1 if the CPU is currently idle. 0 otherwise.
202-
*/
203-
int available_idle_cpu(int cpu)
204-
{
205-
if (!idle_cpu(cpu))
206-
return 0;
207-
208-
if (vcpu_is_preempted(cpu))
209-
return 0;
210-
211-
return 1;
183+
return idle_rq(cpu_rq(cpu));
212184
}
213185

214186
/**
@@ -667,7 +639,7 @@ int __sched_setscheduler(struct task_struct *p,
667639
* itself.
668640
*/
669641
newprio = rt_effective_prio(p, newprio);
670-
if (newprio == oldprio)
642+
if (newprio == oldprio && !dl_prio(newprio))
671643
queue_flags &= ~DEQUEUE_MOVE;
672644
}
673645

0 commit comments

Comments
 (0)