Skip to content

Commit 32034df

Browse files
KAGA-KOKOingomolnar
authored andcommitted
rseq: Switch to TIF_RSEQ if supported
TIF_NOTIFY_RESUME is a multiplexing TIF bit, which is suboptimal especially with the RSEQ fast path depending on it, but not really handling it. Define a separate TIF_RSEQ in the generic TIF space and enable the full separation of fast and slow path for architectures which utilize that. That avoids the hassle with invocations of resume_user_mode_work() from hypervisors, which clear TIF_NOTIFY_RESUME. It makes the therefore required re-evaluation at the end of vcpu_run() a NOOP on architectures which utilize the generic TIF space and have a separate TIF_RSEQ. The hypervisor TIF handling does not include the separate TIF_RSEQ as there is no point in doing so. The guest does neither know nor care about the VMM host applications RSEQ state. That state is only relevant when the ioctl() returns to user space. The fastpath implementation still utilizes TIF_NOTIFY_RESUME for failure handling, but this only happens within exit_to_user_mode_loop(), so arguably the hypervisor ioctl() code is long done when this happens. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Link: https://patch.msgid.link/20251027084307.903622031@linutronix.de
1 parent 7a5201e commit 32034df

6 files changed

Lines changed: 61 additions & 13 deletions

File tree

include/asm-generic/thread_info_tif.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,7 @@
4545
# define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
4646
#endif
4747

48+
#define TIF_RSEQ 11 // Run RSEQ fast path
49+
#define _TIF_RSEQ BIT(TIF_RSEQ)
50+
4851
#endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */

include/linux/irq-entry-common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
#define EXIT_TO_USER_MODE_WORK \
3131
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
3232
_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
33-
_TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
33+
_TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ | \
3434
ARCH_EXIT_TO_USER_MODE_WORK)
3535

3636
/**

include/linux/rseq.h

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *reg
4242

4343
static inline void rseq_raise_notify_resume(struct task_struct *t)
4444
{
45-
set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
45+
set_tsk_thread_flag(t, TIF_RSEQ);
4646
}
4747

4848
/* Invoked from context switch to force evaluation on exit to user */
@@ -114,17 +114,25 @@ static inline void rseq_force_update(void)
114114

115115
/*
116116
* KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
117-
* which clears TIF_NOTIFY_RESUME. To avoid updating user space RSEQ in
118-
* that case just to do it eventually again before returning to user space,
119-
* the entry resume_user_mode_work() invocation is ignored as the register
120-
* argument is NULL.
117+
* which clears TIF_NOTIFY_RESUME on architectures that don't use the
118+
* generic TIF bits and therefore can't provide a separate TIF_RSEQ flag.
121119
*
122-
* After returning from guest mode, they have to invoke this function to
123-
* re-raise TIF_NOTIFY_RESUME if necessary.
120+
* To avoid updating user space RSEQ in that case just to do it eventually
121+
* again before returning to user space, because __rseq_handle_slowpath()
122+
* does nothing when invoked with NULL register state.
123+
*
124+
* After returning from guest mode, before exiting to userspace, hypervisors
125+
* must invoke this function to re-raise TIF_NOTIFY_RESUME if necessary.
124126
*/
125127
static inline void rseq_virt_userspace_exit(void)
126128
{
127129
if (current->rseq.event.sched_switch)
130+
/*
131+
* The generic optimization for deferring RSEQ updates until the next
132+
* exit relies on having a dedicated TIF_RSEQ.
133+
*/
134+
if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) &&
135+
current->rseq.event.sched_switch)
128136
rseq_raise_notify_resume(current);
129137
}
130138

include/linux/rseq_entry.h

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -507,18 +507,44 @@ static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *reg
507507
return false;
508508
}
509509

510-
static __always_inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs)
510+
/* Required to allow conversion to GENERIC_ENTRY w/o GENERIC_TIF_BITS */
511+
#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
512+
static __always_inline bool test_tif_rseq(unsigned long ti_work)
511513
{
514+
return ti_work & _TIF_RSEQ;
515+
}
516+
517+
static __always_inline void clear_tif_rseq(void)
518+
{
519+
static_assert(TIF_RSEQ != TIF_NOTIFY_RESUME);
520+
clear_thread_flag(TIF_RSEQ);
521+
}
522+
#else
523+
static __always_inline bool test_tif_rseq(unsigned long ti_work) { return true; }
524+
static __always_inline void clear_tif_rseq(void) { }
525+
#endif
526+
527+
static __always_inline bool
528+
rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
529+
{
530+
if (likely(!test_tif_rseq(ti_work)))
531+
return false;
532+
512533
if (unlikely(__rseq_exit_to_user_mode_restart(regs))) {
513534
current->rseq.event.slowpath = true;
514535
set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
515536
return true;
516537
}
538+
539+
clear_tif_rseq();
517540
return false;
518541
}
519542

520543
#else /* CONFIG_GENERIC_ENTRY */
521-
static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs) { return false; }
544+
static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
545+
{
546+
return false;
547+
}
522548
#endif /* !CONFIG_GENERIC_ENTRY */
523549

524550
static __always_inline void rseq_syscall_exit_to_user_mode(void)
@@ -577,7 +603,7 @@ static inline void rseq_debug_syscall_return(struct pt_regs *regs)
577603
}
578604
#else /* CONFIG_RSEQ */
579605
static inline void rseq_note_user_irq_entry(void) { }
580-
static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs)
606+
static inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
581607
{
582608
return false;
583609
}

include/linux/thread_info.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ enum syscall_work_bit {
6767
#define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED
6868
#endif
6969

70+
#ifndef TIF_RSEQ
71+
# define TIF_RSEQ TIF_NOTIFY_RESUME
72+
# define _TIF_RSEQ _TIF_NOTIFY_RESUME
73+
#endif
74+
7075
#ifdef __KERNEL__
7176

7277
#ifndef arch_set_restart_data

kernel/entry/common.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,20 @@
1111
/* Workaround to allow gradual conversion of architecture code */
1212
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
1313

14+
#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
15+
#define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK & ~_TIF_RSEQ)
16+
#else
17+
#define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK)
18+
#endif
19+
1420
static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *regs,
1521
unsigned long ti_work)
1622
{
1723
/*
1824
* Before returning to user space ensure that all pending work
1925
* items have been completed.
2026
*/
21-
while (ti_work & EXIT_TO_USER_MODE_WORK) {
27+
while (ti_work & EXIT_TO_USER_MODE_WORK_LOOP) {
2228

2329
local_irq_enable_exit_to_user(ti_work);
2430

@@ -68,7 +74,7 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
6874
for (;;) {
6975
ti_work = __exit_to_user_mode_loop(regs, ti_work);
7076

71-
if (likely(!rseq_exit_to_user_mode_restart(regs)))
77+
if (likely(!rseq_exit_to_user_mode_restart(regs, ti_work)))
7278
return ti_work;
7379
ti_work = read_thread_flags();
7480
}

0 commit comments

Comments
 (0)