Skip to content

Commit 9269d27

Browse files
committed
Merge tag 'timers-nohz-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timers/nohz updates from Ingo Molnar: - Micro-optimize tick_nohz_full_cpu() - Optimize idle exit tick restarts to be less eager - Optimize tick_nohz_dep_set_task() to only wake up a single CPU. This reduces IPIs and interruptions on nohz_full CPUs. - Optimize tick_nohz_dep_set_signal() in a similar fashion. - Skip IPIs in tick_nohz_kick_task() when trying to kick a non-running task. - Micro-optimize tick_nohz_task_switch() IRQ flags handling to reduce context switching costs. - Misc cleanups and fixes * tag 'timers-nohz-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: MAINTAINERS: Add myself as context tracking maintainer tick/nohz: Call tick_nohz_task_switch() with interrupts disabled tick/nohz: Kick only _queued_ task whose tick dependency is updated tick/nohz: Change signal tick dependency to wake up CPUs of member tasks tick/nohz: Only wake up a single target cpu when kicking a task tick/nohz: Update nohz_full Kconfig help tick/nohz: Update idle_exittime on actual idle exit tick/nohz: Remove superflous check for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE tick/nohz: Conditionally restart tick on idle exit tick/nohz: Evaluate the CPU expression after the static key
2 parents 54a728d + 09fe880 commit 9269d27

7 files changed

Lines changed: 125 additions & 60 deletions

File tree

MAINTAINERS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4610,6 +4610,12 @@ S: Supported
46104610
F: drivers/video/console/
46114611
F: include/linux/console*
46124612

4613+
CONTEXT TRACKING
4614+
M: Frederic Weisbecker <frederic@kernel.org>
4615+
S: Maintained
4616+
F: kernel/context_tracking.c
4617+
F: include/linux/context_tracking*
4618+
46134619
CONTROL GROUP (CGROUP)
46144620
M: Tejun Heo <tj@kernel.org>
46154621
M: Zefan Li <lizefan.x@bytedance.com>

include/linux/sched.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2028,6 +2028,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
20282028

20292029
#endif /* CONFIG_SMP */
20302030

2031+
extern bool sched_task_on_rq(struct task_struct *p);
2032+
20312033
/*
20322034
* In order to reduce various lock holder preemption latencies provide an
20332035
* interface to see if a vCPU is currently running or not.

include/linux/tick.h

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -186,13 +186,17 @@ static inline bool tick_nohz_full_enabled(void)
186186
return tick_nohz_full_running;
187187
}
188188

189-
static inline bool tick_nohz_full_cpu(int cpu)
190-
{
191-
if (!tick_nohz_full_enabled())
192-
return false;
193-
194-
return cpumask_test_cpu(cpu, tick_nohz_full_mask);
195-
}
189+
/*
190+
* Check if a CPU is part of the nohz_full subset. Arrange for evaluating
191+
* the cpu expression (typically smp_processor_id()) _after_ the static
192+
* key.
193+
*/
194+
#define tick_nohz_full_cpu(_cpu) ({ \
195+
bool __ret = false; \
196+
if (tick_nohz_full_enabled()) \
197+
__ret = cpumask_test_cpu((_cpu), tick_nohz_full_mask); \
198+
__ret; \
199+
})
196200

197201
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
198202
{
@@ -208,7 +212,7 @@ extern void tick_nohz_dep_set_task(struct task_struct *tsk,
208212
enum tick_dep_bits bit);
209213
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
210214
enum tick_dep_bits bit);
211-
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
215+
extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
212216
enum tick_dep_bits bit);
213217
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
214218
enum tick_dep_bits bit);
@@ -253,11 +257,11 @@ static inline void tick_dep_clear_task(struct task_struct *tsk,
253257
if (tick_nohz_full_enabled())
254258
tick_nohz_dep_clear_task(tsk, bit);
255259
}
256-
static inline void tick_dep_set_signal(struct signal_struct *signal,
260+
static inline void tick_dep_set_signal(struct task_struct *tsk,
257261
enum tick_dep_bits bit)
258262
{
259263
if (tick_nohz_full_enabled())
260-
tick_nohz_dep_set_signal(signal, bit);
264+
tick_nohz_dep_set_signal(tsk, bit);
261265
}
262266
static inline void tick_dep_clear_signal(struct signal_struct *signal,
263267
enum tick_dep_bits bit)
@@ -285,7 +289,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
285289
enum tick_dep_bits bit) { }
286290
static inline void tick_dep_clear_task(struct task_struct *tsk,
287291
enum tick_dep_bits bit) { }
288-
static inline void tick_dep_set_signal(struct signal_struct *signal,
292+
static inline void tick_dep_set_signal(struct task_struct *tsk,
289293
enum tick_dep_bits bit) { }
290294
static inline void tick_dep_clear_signal(struct signal_struct *signal,
291295
enum tick_dep_bits bit) { }

kernel/sched/core.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1928,6 +1928,11 @@ static inline void uclamp_post_fork(struct task_struct *p) { }
19281928
static inline void init_uclamp(void) { }
19291929
#endif /* CONFIG_UCLAMP_TASK */
19301930

1931+
bool sched_task_on_rq(struct task_struct *p)
1932+
{
1933+
return task_on_rq_queued(p);
1934+
}
1935+
19311936
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
19321937
{
19331938
if (!(flags & ENQUEUE_NOCLOCK))
@@ -4546,6 +4551,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
45464551
vtime_task_switch(prev);
45474552
perf_event_task_sched_in(prev, current);
45484553
finish_task(prev);
4554+
tick_nohz_task_switch();
45494555
finish_lock_switch(rq);
45504556
finish_arch_post_lock_switch();
45514557
kcov_finish_switch(current);
@@ -4591,7 +4597,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
45914597
put_task_struct_rcu_user(prev);
45924598
}
45934599

4594-
tick_nohz_task_switch();
45954600
return rq;
45964601
}
45974602

kernel/time/Kconfig

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,14 @@ config NO_HZ_FULL
117117
the task mostly runs in userspace and has few kernel activity.
118118

119119
You need to fill up the nohz_full boot parameter with the
120-
desired range of dynticks CPUs.
120+
desired range of dynticks CPUs to use it. This is implemented at
121+
the expense of some overhead in user <-> kernel transitions:
122+
syscalls, exceptions and interrupts.
121123

122-
This is implemented at the expense of some overhead in user <-> kernel
123-
transitions: syscalls, exceptions and interrupts. Even when it's
124-
dynamically off.
124+
By default, without passing the nohz_full parameter, this behaves just
125+
like NO_HZ_IDLE.
125126

126-
Say N.
127+
If you're a distro say Y.
127128

128129
endchoice
129130

kernel/time/posix-cpu-timers.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ static void arm_timer(struct k_itimer *timer, struct task_struct *p)
523523
if (CPUCLOCK_PERTHREAD(timer->it_clock))
524524
tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
525525
else
526-
tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
526+
tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER);
527527
}
528528

529529
/*
@@ -1358,7 +1358,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
13581358
if (*newval < *nextevt)
13591359
*nextevt = *newval;
13601360

1361-
tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
1361+
tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER);
13621362
}
13631363

13641364
static int do_cpu_nanosleep(const clockid_t which_clock, int flags,

kernel/time/tick-sched.c

Lines changed: 88 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,46 @@ void tick_nohz_full_kick_cpu(int cpu)
323323
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
324324
}
325325

326+
static void tick_nohz_kick_task(struct task_struct *tsk)
327+
{
328+
int cpu;
329+
330+
/*
331+
* If the task is not running, run_posix_cpu_timers()
332+
* has nothing to elapse, IPI can then be spared.
333+
*
334+
* activate_task() STORE p->tick_dep_mask
335+
* STORE p->on_rq
336+
* __schedule() (switch to task 'p') smp_mb() (atomic_fetch_or())
337+
* LOCK rq->lock LOAD p->on_rq
338+
* smp_mb__after_spin_lock()
339+
* tick_nohz_task_switch()
340+
* LOAD p->tick_dep_mask
341+
*/
342+
if (!sched_task_on_rq(tsk))
343+
return;
344+
345+
/*
346+
* If the task concurrently migrates to another CPU,
347+
* we guarantee it sees the new tick dependency upon
348+
* schedule.
349+
*
350+
* set_task_cpu(p, cpu);
351+
* STORE p->cpu = @cpu
352+
* __schedule() (switch to task 'p')
353+
* LOCK rq->lock
354+
* smp_mb__after_spin_lock() STORE p->tick_dep_mask
355+
* tick_nohz_task_switch() smp_mb() (atomic_fetch_or())
356+
* LOAD p->tick_dep_mask LOAD p->cpu
357+
*/
358+
cpu = task_cpu(tsk);
359+
360+
preempt_disable();
361+
if (cpu_online(cpu))
362+
tick_nohz_full_kick_cpu(cpu);
363+
preempt_enable();
364+
}
365+
326366
/*
327367
* Kick all full dynticks CPUs in order to force these to re-evaluate
328368
* their dependency on the tick and restart it if necessary.
@@ -405,19 +445,8 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
405445
*/
406446
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
407447
{
408-
if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
409-
if (tsk == current) {
410-
preempt_disable();
411-
tick_nohz_full_kick();
412-
preempt_enable();
413-
} else {
414-
/*
415-
* Some future tick_nohz_full_kick_task()
416-
* should optimize this.
417-
*/
418-
tick_nohz_full_kick_all();
419-
}
420-
}
448+
if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask))
449+
tick_nohz_kick_task(tsk);
421450
}
422451
EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
423452

@@ -431,9 +460,20 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
431460
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
432461
* per process timers.
433462
*/
434-
void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
463+
void tick_nohz_dep_set_signal(struct task_struct *tsk,
464+
enum tick_dep_bits bit)
435465
{
436-
tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
466+
int prev;
467+
struct signal_struct *sig = tsk->signal;
468+
469+
prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask);
470+
if (!prev) {
471+
struct task_struct *t;
472+
473+
lockdep_assert_held(&tsk->sighand->siglock);
474+
__for_each_thread(sig, t)
475+
tick_nohz_kick_task(t);
476+
}
437477
}
438478

439479
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
@@ -448,13 +488,10 @@ void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bi
448488
*/
449489
void __tick_nohz_task_switch(void)
450490
{
451-
unsigned long flags;
452491
struct tick_sched *ts;
453492

454-
local_irq_save(flags);
455-
456493
if (!tick_nohz_full_cpu(smp_processor_id()))
457-
goto out;
494+
return;
458495

459496
ts = this_cpu_ptr(&tick_cpu_sched);
460497

@@ -463,8 +500,6 @@ void __tick_nohz_task_switch(void)
463500
atomic_read(&current->signal->tick_dep_mask))
464501
tick_nohz_full_kick();
465502
}
466-
out:
467-
local_irq_restore(flags);
468503
}
469504

470505
/* Get the boot-time nohz CPU list from the kernel parameters. */
@@ -922,27 +957,31 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
922957
* Cancel the scheduled timer and restore the tick
923958
*/
924959
ts->tick_stopped = 0;
925-
ts->idle_exittime = now;
926-
927960
tick_nohz_restart(ts, now);
928961
}
929962

930-
static void tick_nohz_full_update_tick(struct tick_sched *ts)
963+
static void __tick_nohz_full_update_tick(struct tick_sched *ts,
964+
ktime_t now)
931965
{
932966
#ifdef CONFIG_NO_HZ_FULL
933967
int cpu = smp_processor_id();
934968

935-
if (!tick_nohz_full_cpu(cpu))
969+
if (can_stop_full_tick(cpu, ts))
970+
tick_nohz_stop_sched_tick(ts, cpu);
971+
else if (ts->tick_stopped)
972+
tick_nohz_restart_sched_tick(ts, now);
973+
#endif
974+
}
975+
976+
static void tick_nohz_full_update_tick(struct tick_sched *ts)
977+
{
978+
if (!tick_nohz_full_cpu(smp_processor_id()))
936979
return;
937980

938981
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
939982
return;
940983

941-
if (can_stop_full_tick(cpu, ts))
942-
tick_nohz_stop_sched_tick(ts, cpu);
943-
else if (ts->tick_stopped)
944-
tick_nohz_restart_sched_tick(ts, ktime_get());
945-
#endif
984+
__tick_nohz_full_update_tick(ts, ktime_get());
946985
}
947986

948987
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
@@ -1189,11 +1228,13 @@ unsigned long tick_nohz_get_idle_calls(void)
11891228
return ts->idle_calls;
11901229
}
11911230

1192-
static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
1231+
static void tick_nohz_account_idle_time(struct tick_sched *ts,
1232+
ktime_t now)
11931233
{
1194-
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
11951234
unsigned long ticks;
11961235

1236+
ts->idle_exittime = now;
1237+
11971238
if (vtime_accounting_enabled_this_cpu())
11981239
return;
11991240
/*
@@ -1207,21 +1248,27 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
12071248
*/
12081249
if (ticks && ticks < LONG_MAX)
12091250
account_idle_ticks(ticks);
1210-
#endif
12111251
}
12121252

1213-
static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
1253+
void tick_nohz_idle_restart_tick(void)
12141254
{
1215-
tick_nohz_restart_sched_tick(ts, now);
1216-
tick_nohz_account_idle_ticks(ts);
1255+
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1256+
1257+
if (ts->tick_stopped) {
1258+
ktime_t now = ktime_get();
1259+
tick_nohz_restart_sched_tick(ts, now);
1260+
tick_nohz_account_idle_time(ts, now);
1261+
}
12171262
}
12181263

1219-
void tick_nohz_idle_restart_tick(void)
1264+
static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
12201265
{
1221-
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1266+
if (tick_nohz_full_cpu(smp_processor_id()))
1267+
__tick_nohz_full_update_tick(ts, now);
1268+
else
1269+
tick_nohz_restart_sched_tick(ts, now);
12221270

1223-
if (ts->tick_stopped)
1224-
__tick_nohz_idle_restart_tick(ts, ktime_get());
1271+
tick_nohz_account_idle_time(ts, now);
12251272
}
12261273

12271274
/**
@@ -1253,7 +1300,7 @@ void tick_nohz_idle_exit(void)
12531300
tick_nohz_stop_idle(ts, now);
12541301

12551302
if (tick_stopped)
1256-
__tick_nohz_idle_restart_tick(ts, now);
1303+
tick_nohz_idle_update_tick(ts, now);
12571304

12581305
local_irq_enable();
12591306
}

0 commit comments

Comments
 (0)