Skip to content

Commit e3ff7c6

Browse files
jpoimboePeter Zijlstra
authored andcommitted
livepatch,sched: Add livepatch task switching to cond_resched()
There have been reports [1][2] of live patches failing to complete within a reasonable amount of time due to CPU-bound kthreads. Fix it by patching tasks in cond_resched(). There are four different flavors of cond_resched(), depending on the kernel configuration. Hook into all of them. A more elegant solution might be to use a preempt notifier. However, non-ORC unwinders can't unwind a preempted task reliably. [1] https://lore.kernel.org/lkml/20220507174628.2086373-1-song@kernel.org/ [2] https://lkml.kernel.org/lkml/20230120-vhost-klp-switching-v1-0-7c2b65519c43@kernel.org Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Petr Mladek <pmladek@suse.com> Tested-by: Seth Forshee (DigitalOcean) <sforshee@kernel.org> Link: https://lore.kernel.org/r/4ae981466b7814ec221014fc2554b2f86f3fb70b.1677257135.git.jpoimboe@kernel.org
1 parent 383439d commit e3ff7c6

6 files changed

Lines changed: 194 additions & 28 deletions

File tree

include/linux/livepatch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <linux/ftrace.h>
1414
#include <linux/completion.h>
1515
#include <linux/list.h>
16+
#include <linux/livepatch_sched.h>
1617

1718
#if IS_ENABLED(CONFIG_LIVEPATCH)
1819

include/linux/livepatch_sched.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later */
2+
#ifndef _LINUX_LIVEPATCH_SCHED_H_
3+
#define _LINUX_LIVEPATCH_SCHED_H_
4+
5+
#include <linux/jump_label.h>
6+
#include <linux/static_call_types.h>
7+
8+
#ifdef CONFIG_LIVEPATCH
9+
10+
void __klp_sched_try_switch(void);
11+
12+
#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
13+
14+
DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
15+
16+
static __always_inline void klp_sched_try_switch(void)
17+
{
18+
if (static_branch_unlikely(&klp_sched_try_switch_key))
19+
__klp_sched_try_switch();
20+
}
21+
22+
#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
23+
24+
#else /* !CONFIG_LIVEPATCH */
25+
static inline void klp_sched_try_switch(void) {}
26+
static inline void __klp_sched_try_switch(void) {}
27+
#endif /* CONFIG_LIVEPATCH */
28+
29+
#endif /* _LINUX_LIVEPATCH_SCHED_H_ */

include/linux/sched.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <linux/seqlock.h>
3737
#include <linux/kcsan.h>
3838
#include <linux/rv.h>
39+
#include <linux/livepatch_sched.h>
3940
#include <asm/kmap_size.h>
4041

4142
/* task_struct member predeclarations (sorted alphabetically): */
@@ -2070,6 +2071,9 @@ extern int __cond_resched(void);
20702071

20712072
#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
20722073

2074+
void sched_dynamic_klp_enable(void);
2075+
void sched_dynamic_klp_disable(void);
2076+
20732077
DECLARE_STATIC_CALL(cond_resched, __cond_resched);
20742078

20752079
static __always_inline int _cond_resched(void)
@@ -2078,27 +2082,33 @@ static __always_inline int _cond_resched(void)
20782082
}
20792083

20802084
#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
2085+
20812086
extern int dynamic_cond_resched(void);
20822087

20832088
static __always_inline int _cond_resched(void)
20842089
{
20852090
return dynamic_cond_resched();
20862091
}
20872092

2088-
#else
2093+
#else /* !CONFIG_PREEMPTION */
20892094

20902095
static inline int _cond_resched(void)
20912096
{
2097+
klp_sched_try_switch();
20922098
return __cond_resched();
20932099
}
20942100

2095-
#endif /* CONFIG_PREEMPT_DYNAMIC */
2101+
#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
20962102

2097-
#else
2103+
#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */
20982104

2099-
static inline int _cond_resched(void) { return 0; }
2105+
static inline int _cond_resched(void)
2106+
{
2107+
klp_sched_try_switch();
2108+
return 0;
2109+
}
21002110

2101-
#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
2111+
#endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */
21022112

21032113
#define cond_resched() ({ \
21042114
__might_resched(__FILE__, __LINE__, 0); \

kernel/livepatch/core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
*
3434
* - klp_ftrace_handler()
3535
* - klp_update_patch_state()
36+
* - __klp_sched_try_switch()
3637
*/
3738
DEFINE_MUTEX(klp_mutex);
3839

kernel/livepatch/transition.c

Lines changed: 92 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include <linux/cpu.h>
1111
#include <linux/stacktrace.h>
12+
#include <linux/static_call.h>
1213
#include "core.h"
1314
#include "patch.h"
1415
#include "transition.h"
@@ -26,6 +27,25 @@ static int klp_target_state = KLP_UNDEFINED;
2627

2728
static unsigned int klp_signals_cnt;
2829

30+
/*
31+
* When a livepatch is in progress, enable klp stack checking in
32+
* cond_resched(). This helps CPU-bound kthreads get patched.
33+
*/
34+
#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
35+
36+
#define klp_cond_resched_enable() sched_dynamic_klp_enable()
37+
#define klp_cond_resched_disable() sched_dynamic_klp_disable()
38+
39+
#else /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
40+
41+
DEFINE_STATIC_KEY_FALSE(klp_sched_try_switch_key);
42+
EXPORT_SYMBOL(klp_sched_try_switch_key);
43+
44+
#define klp_cond_resched_enable() static_branch_enable(&klp_sched_try_switch_key)
45+
#define klp_cond_resched_disable() static_branch_disable(&klp_sched_try_switch_key)
46+
47+
#endif /* CONFIG_PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
48+
2949
/*
3050
* This work can be performed periodically to finish patching or unpatching any
3151
* "straggler" tasks which failed to transition in the first attempt.
@@ -174,8 +194,8 @@ void klp_update_patch_state(struct task_struct *task)
174194
* barrier (smp_rmb) for two cases:
175195
*
176196
* 1) Enforce the order of the TIF_PATCH_PENDING read and the
177-
* klp_target_state read. The corresponding write barrier is in
178-
* klp_init_transition().
197+
* klp_target_state read. The corresponding write barriers are in
198+
* klp_init_transition() and klp_reverse_transition().
179199
*
180200
* 2) Enforce the order of the TIF_PATCH_PENDING read and a future read
181201
* of func->transition, if klp_ftrace_handler() is called later on
@@ -343,6 +363,44 @@ static bool klp_try_switch_task(struct task_struct *task)
343363
return !ret;
344364
}
345365

366+
void __klp_sched_try_switch(void)
367+
{
368+
if (likely(!klp_patch_pending(current)))
369+
return;
370+
371+
/*
372+
* This function is called from cond_resched() which is called in many
373+
* places throughout the kernel. Using the klp_mutex here might
374+
* deadlock.
375+
*
376+
* Instead, disable preemption to prevent racing with other callers of
377+
* klp_try_switch_task(). Thanks to task_call_func() they won't be
378+
* able to switch this task while it's running.
379+
*/
380+
preempt_disable();
381+
382+
/*
383+
* Make sure current didn't get patched between the above check and
384+
* preempt_disable().
385+
*/
386+
if (unlikely(!klp_patch_pending(current)))
387+
goto out;
388+
389+
/*
390+
* Enforce the order of the TIF_PATCH_PENDING read above and the
391+
* klp_target_state read in klp_try_switch_task(). The corresponding
392+
* write barriers are in klp_init_transition() and
393+
* klp_reverse_transition().
394+
*/
395+
smp_rmb();
396+
397+
klp_try_switch_task(current);
398+
399+
out:
400+
preempt_enable();
401+
}
402+
EXPORT_SYMBOL(__klp_sched_try_switch);
403+
346404
/*
347405
* Sends a fake signal to all non-kthread tasks with TIF_PATCH_PENDING set.
348406
* Kthreads with TIF_PATCH_PENDING set are woken up.
@@ -449,7 +507,8 @@ void klp_try_complete_transition(void)
449507
return;
450508
}
451509

452-
/* we're done, now cleanup the data structures */
510+
/* Done! Now cleanup the data structures. */
511+
klp_cond_resched_disable();
453512
patch = klp_transition_patch;
454513
klp_complete_transition();
455514

@@ -501,6 +560,8 @@ void klp_start_transition(void)
501560
set_tsk_thread_flag(task, TIF_PATCH_PENDING);
502561
}
503562

563+
klp_cond_resched_enable();
564+
504565
klp_signals_cnt = 0;
505566
}
506567

@@ -556,8 +617,9 @@ void klp_init_transition(struct klp_patch *patch, int state)
556617
* see a func in transition with a task->patch_state of KLP_UNDEFINED.
557618
*
558619
* Also enforce the order of the klp_target_state write and future
559-
* TIF_PATCH_PENDING writes to ensure klp_update_patch_state() doesn't
560-
* set a task->patch_state to KLP_UNDEFINED.
620+
* TIF_PATCH_PENDING writes to ensure klp_update_patch_state() and
621+
* __klp_sched_try_switch() don't set a task->patch_state to
622+
* KLP_UNDEFINED.
561623
*/
562624
smp_wmb();
563625

@@ -593,14 +655,10 @@ void klp_reverse_transition(void)
593655
klp_target_state == KLP_PATCHED ? "patching to unpatching" :
594656
"unpatching to patching");
595657

596-
klp_transition_patch->enabled = !klp_transition_patch->enabled;
597-
598-
klp_target_state = !klp_target_state;
599-
600658
/*
601659
* Clear all TIF_PATCH_PENDING flags to prevent races caused by
602-
* klp_update_patch_state() running in parallel with
603-
* klp_start_transition().
660+
* klp_update_patch_state() or __klp_sched_try_switch() running in
661+
* parallel with the reverse transition.
604662
*/
605663
read_lock(&tasklist_lock);
606664
for_each_process_thread(g, task)
@@ -610,9 +668,28 @@ void klp_reverse_transition(void)
610668
for_each_possible_cpu(cpu)
611669
clear_tsk_thread_flag(idle_task(cpu), TIF_PATCH_PENDING);
612670

613-
/* Let any remaining calls to klp_update_patch_state() complete */
671+
/*
672+
* Make sure all existing invocations of klp_update_patch_state() and
673+
* __klp_sched_try_switch() see the cleared TIF_PATCH_PENDING before
674+
* starting the reverse transition.
675+
*/
614676
klp_synchronize_transition();
615677

678+
/*
679+
* All patching has stopped, now re-initialize the global variables to
680+
* prepare for the reverse transition.
681+
*/
682+
klp_transition_patch->enabled = !klp_transition_patch->enabled;
683+
klp_target_state = !klp_target_state;
684+
685+
/*
686+
* Enforce the order of the klp_target_state write and the
687+
* TIF_PATCH_PENDING writes in klp_start_transition() to ensure
688+
* klp_update_patch_state() and __klp_sched_try_switch() don't set
689+
* task->patch_state to the wrong value.
690+
*/
691+
smp_wmb();
692+
616693
klp_start_transition();
617694
}
618695

@@ -626,9 +703,9 @@ void klp_copy_process(struct task_struct *child)
626703
* the task flag up to date with the parent here.
627704
*
628705
* The operation is serialized against all klp_*_transition()
629-
* operations by the tasklist_lock. The only exception is
630-
* klp_update_patch_state(current), but we cannot race with
631-
* that because we are current.
706+
* operations by the tasklist_lock. The only exceptions are
707+
* klp_update_patch_state(current) and __klp_sched_try_switch(), but we
708+
* cannot race with them because we are current.
632709
*/
633710
if (test_tsk_thread_flag(current, TIF_PATCH_PENDING))
634711
set_tsk_thread_flag(child, TIF_PATCH_PENDING);

kernel/sched/core.c

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8525,6 +8525,7 @@ EXPORT_STATIC_CALL_TRAMP(might_resched);
85258525
static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
85268526
int __sched dynamic_cond_resched(void)
85278527
{
8528+
klp_sched_try_switch();
85288529
if (!static_branch_unlikely(&sk_dynamic_cond_resched))
85298530
return 0;
85308531
return __cond_resched();
@@ -8673,50 +8674,97 @@ int sched_dynamic_mode(const char *str)
86738674
#error "Unsupported PREEMPT_DYNAMIC mechanism"
86748675
#endif
86758676

8676-
void sched_dynamic_update(int mode)
8677+
DEFINE_MUTEX(sched_dynamic_mutex);
8678+
static bool klp_override;
8679+
8680+
static void __sched_dynamic_update(int mode)
86778681
{
86788682
/*
86798683
* Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
86808684
* the ZERO state, which is invalid.
86818685
*/
8682-
preempt_dynamic_enable(cond_resched);
8686+
if (!klp_override)
8687+
preempt_dynamic_enable(cond_resched);
86838688
preempt_dynamic_enable(might_resched);
86848689
preempt_dynamic_enable(preempt_schedule);
86858690
preempt_dynamic_enable(preempt_schedule_notrace);
86868691
preempt_dynamic_enable(irqentry_exit_cond_resched);
86878692

86888693
switch (mode) {
86898694
case preempt_dynamic_none:
8690-
preempt_dynamic_enable(cond_resched);
8695+
if (!klp_override)
8696+
preempt_dynamic_enable(cond_resched);
86918697
preempt_dynamic_disable(might_resched);
86928698
preempt_dynamic_disable(preempt_schedule);
86938699
preempt_dynamic_disable(preempt_schedule_notrace);
86948700
preempt_dynamic_disable(irqentry_exit_cond_resched);
8695-
pr_info("Dynamic Preempt: none\n");
8701+
if (mode != preempt_dynamic_mode)
8702+
pr_info("Dynamic Preempt: none\n");
86968703
break;
86978704

86988705
case preempt_dynamic_voluntary:
8699-
preempt_dynamic_enable(cond_resched);
8706+
if (!klp_override)
8707+
preempt_dynamic_enable(cond_resched);
87008708
preempt_dynamic_enable(might_resched);
87018709
preempt_dynamic_disable(preempt_schedule);
87028710
preempt_dynamic_disable(preempt_schedule_notrace);
87038711
preempt_dynamic_disable(irqentry_exit_cond_resched);
8704-
pr_info("Dynamic Preempt: voluntary\n");
8712+
if (mode != preempt_dynamic_mode)
8713+
pr_info("Dynamic Preempt: voluntary\n");
87058714
break;
87068715

87078716
case preempt_dynamic_full:
8708-
preempt_dynamic_disable(cond_resched);
8717+
if (!klp_override)
8718+
preempt_dynamic_disable(cond_resched);
87098719
preempt_dynamic_disable(might_resched);
87108720
preempt_dynamic_enable(preempt_schedule);
87118721
preempt_dynamic_enable(preempt_schedule_notrace);
87128722
preempt_dynamic_enable(irqentry_exit_cond_resched);
8713-
pr_info("Dynamic Preempt: full\n");
8723+
if (mode != preempt_dynamic_mode)
8724+
pr_info("Dynamic Preempt: full\n");
87148725
break;
87158726
}
87168727

87178728
preempt_dynamic_mode = mode;
87188729
}
87198730

8731+
void sched_dynamic_update(int mode)
8732+
{
8733+
mutex_lock(&sched_dynamic_mutex);
8734+
__sched_dynamic_update(mode);
8735+
mutex_unlock(&sched_dynamic_mutex);
8736+
}
8737+
8738+
#ifdef CONFIG_HAVE_PREEMPT_DYNAMIC_CALL
8739+
8740+
static int klp_cond_resched(void)
8741+
{
8742+
__klp_sched_try_switch();
8743+
return __cond_resched();
8744+
}
8745+
8746+
void sched_dynamic_klp_enable(void)
8747+
{
8748+
mutex_lock(&sched_dynamic_mutex);
8749+
8750+
klp_override = true;
8751+
static_call_update(cond_resched, klp_cond_resched);
8752+
8753+
mutex_unlock(&sched_dynamic_mutex);
8754+
}
8755+
8756+
void sched_dynamic_klp_disable(void)
8757+
{
8758+
mutex_lock(&sched_dynamic_mutex);
8759+
8760+
klp_override = false;
8761+
__sched_dynamic_update(preempt_dynamic_mode);
8762+
8763+
mutex_unlock(&sched_dynamic_mutex);
8764+
}
8765+
8766+
#endif /* CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
8767+
87208768
static int __init setup_preempt_mode(char *str)
87218769
{
87228770
int mode = sched_dynamic_mode(str);

0 commit comments

Comments
 (0)