Skip to content

Commit f5a538c

Browse files
author
Peter Zijlstra
committed
sched/deadline: Fix dl_server stop condition
Gabriel reported that the dl_server doesn't stop as expected. The problem was found to be the fact that idle time and fair runtime are treated equally. Both will count towards dl_server runtime and push the activation forwards when it is in the zero-laxity wait state. Notably: dl_server_update_idle() update_curr_dl_se() if (dl_defer && dl_throttled && dl_runtime_exceeded()) hrtimer_try_to_cancel(); // stop timer replenish_dl_new_period() deadline = now + dl_deadline; // fwd period runtime = dl_runtime; start_dl_timer(); // restart timer And while we do want idle time accounted towards the *current* activation of the dl_server -- after all, a fair task could've ran if we had any -- we don't necessarily want idle time to cause or push forward an activation. Introduce dl_defer_idle to make this distinction. It will be set once idle time pushed the activation forward, once set idle time will only be allowed to consume any runtime but not push the activation. This will then cause dl_server_timer() to fire, which will stop the dl_server. Any non-idle time accounting during this phase will clear dl_defer_idle, so only a full period of idle will cause the dl_server to stop. Reported-by: Gabriele Monaco <gmonaco@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251101000057.GA2184199@noisy.programming.kicks-ass.net
1 parent e636ffb commit f5a538c

2 files changed

Lines changed: 47 additions & 8 deletions

File tree

include/linux/sched.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -685,20 +685,22 @@ struct sched_dl_entity {
685685
*
686686
* @dl_server tells if this is a server entity.
687687
*
688-
* @dl_defer tells if this is a deferred or regular server. For
689-
* now only defer server exists.
690-
*
691-
* @dl_defer_armed tells if the deferrable server is waiting
692-
* for the replenishment timer to activate it.
693-
*
694688
* @dl_server_active tells if the dlserver is active(started).
695689
* dlserver is started on first cfs enqueue on an idle runqueue
696690
* and is stopped when a dequeue results in 0 cfs tasks on the
697691
* runqueue. In other words, dlserver is active only when cpu's
698692
* runqueue has atleast one cfs task.
699693
*
694+
* @dl_defer tells if this is a deferred or regular server. For
695+
* now only defer server exists.
696+
*
697+
* @dl_defer_armed tells if the deferrable server is waiting
698+
* for the replenishment timer to activate it.
699+
*
700700
* @dl_defer_running tells if the deferrable server is actually
701701
* running, skipping the defer phase.
702+
*
703+
* @dl_defer_idle tracks idle state
702704
*/
703705
unsigned int dl_throttled : 1;
704706
unsigned int dl_yielded : 1;
@@ -709,6 +711,7 @@ struct sched_dl_entity {
709711
unsigned int dl_defer : 1;
710712
unsigned int dl_defer_armed : 1;
711713
unsigned int dl_defer_running : 1;
714+
unsigned int dl_defer_idle : 1;
712715

713716
/*
714717
* Bandwidth enforcement timer. Each -deadline task has its

kernel/sched/deadline.c

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,6 +1173,11 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
11731173
*/
11741174
rq->donor->sched_class->update_curr(rq);
11751175

1176+
if (dl_se->dl_defer_idle) {
1177+
dl_server_stop(dl_se);
1178+
return HRTIMER_NORESTART;
1179+
}
1180+
11761181
if (dl_se->dl_defer_armed) {
11771182
/*
11781183
* First check if the server could consume runtime in background.
@@ -1420,10 +1425,11 @@ s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta
14201425
}
14211426

14221427
static inline void
1423-
update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
1424-
int flags);
1428+
update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, int flags);
1429+
14251430
static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
14261431
{
1432+
bool idle = rq->curr == rq->idle;
14271433
s64 scaled_delta_exec;
14281434

14291435
if (unlikely(delta_exec <= 0)) {
@@ -1444,6 +1450,9 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
14441450

14451451
dl_se->runtime -= scaled_delta_exec;
14461452

1453+
if (dl_se->dl_defer_idle && !idle)
1454+
dl_se->dl_defer_idle = 0;
1455+
14471456
/*
14481457
* The fair server can consume its runtime while throttled (not queued/
14491458
* running as regular CFS).
@@ -1453,6 +1462,29 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
14531462
* starting a new period, pushing the activation.
14541463
*/
14551464
if (dl_se->dl_defer && dl_se->dl_throttled && dl_runtime_exceeded(dl_se)) {
1465+
/*
1466+
* Non-servers would never get time accounted while throttled.
1467+
*/
1468+
WARN_ON_ONCE(!dl_server(dl_se));
1469+
1470+
/*
1471+
* While the server is marked idle, do not push out the
1472+
* activation further, instead wait for the period timer
1473+
* to lapse and stop the server.
1474+
*/
1475+
if (dl_se->dl_defer_idle && idle) {
1476+
/*
1477+
* The timer is at the zero-laxity point, this means
1478+
* dl_server_stop() / dl_server_start() can happen
1479+
* while now < deadline. This means update_dl_entity()
1480+
* will not replenish. Additionally start_dl_timer()
1481+
* will be set for 'deadline - runtime'. Negative
1482+
* runtime will not do.
1483+
*/
1484+
dl_se->runtime = 0;
1485+
return;
1486+
}
1487+
14561488
/*
14571489
* If the server was previously activated - the starving condition
14581490
* took place, it this point it went away because the fair scheduler
@@ -1465,6 +1497,9 @@ static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64
14651497

14661498
replenish_dl_new_period(dl_se, dl_se->rq);
14671499

1500+
if (idle)
1501+
dl_se->dl_defer_idle = 1;
1502+
14681503
/*
14691504
* Not being able to start the timer seems problematic. If it could not
14701505
* be started for whatever reason, we need to "unthrottle" the DL server
@@ -1590,6 +1625,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
15901625
hrtimer_try_to_cancel(&dl_se->dl_timer);
15911626
dl_se->dl_defer_armed = 0;
15921627
dl_se->dl_throttled = 0;
1628+
dl_se->dl_defer_idle = 0;
15931629
dl_se->dl_server_active = 0;
15941630
}
15951631

0 commit comments

Comments
 (0)