Skip to content

Commit bf5835b

Browse files
Peter Zijlstrasuryasaimadhu
authored andcommitted
intel_idle: Disable IBRS during long idle
Having IBRS enabled while the SMT sibling is idle unnecessarily slows down the running sibling. OTOH, disabling IBRS around idle takes two MSR writes, which will increase the idle latency. Therefore, only disable IBRS around deeper idle states. Shallow idle states are bounded by the tick in duration, since NOHZ is not allowed for them by virtue of their short target residency. Only do this for mwait-driven idle, since that keeps interrupts disabled across idle, which makes disabling IBRS vs IRQ-entry a non-issue. Note: C6 is a random threshold, most importantly C1 probably shouldn't disable IBRS, benchmarking needed. Suggested-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> Signed-off-by: Borislav Petkov <bp@suse.de>
1 parent 6ad0ad2 commit bf5835b

3 files changed

Lines changed: 45 additions & 6 deletions

File tree

arch/x86/include/asm/nospec-branch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ static inline void indirect_branch_prediction_barrier(void)
255255
/* The Intel SPEC CTRL MSR base value cache */
256256
extern u64 x86_spec_ctrl_base;
257257
extern void write_spec_ctrl_current(u64 val, bool force);
258+
extern u64 spec_ctrl_current(void);
258259

259260
/*
260261
* With retpoline, we must use IBRS to restrict branch prediction

arch/x86/kernel/cpu/bugs.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@ void write_spec_ctrl_current(u64 val, bool force)
7979
wrmsrl(MSR_IA32_SPEC_CTRL, val);
8080
}
8181

82+
u64 spec_ctrl_current(void)
83+
{
84+
return this_cpu_read(x86_spec_ctrl_current);
85+
}
86+
EXPORT_SYMBOL_GPL(spec_ctrl_current);
87+
8288
/*
8389
* The vendor and possibly platform specific bits which can be modified in
8490
* x86_spec_ctrl_base.

drivers/idle/intel_idle.c

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,13 @@
4747
#include <linux/tick.h>
4848
#include <trace/events/power.h>
4949
#include <linux/sched.h>
50+
#include <linux/sched/smt.h>
5051
#include <linux/notifier.h>
5152
#include <linux/cpu.h>
5253
#include <linux/moduleparam.h>
5354
#include <asm/cpu_device_id.h>
5455
#include <asm/intel-family.h>
56+
#include <asm/nospec-branch.h>
5557
#include <asm/mwait.h>
5658
#include <asm/msr.h>
5759

@@ -105,6 +107,12 @@ static unsigned int mwait_substates __initdata;
105107
*/
106108
#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
107109

110+
/*
111+
* Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
112+
* above.
113+
*/
114+
#define CPUIDLE_FLAG_IBRS BIT(16)
115+
108116
/*
109117
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
110118
* the C-state (top nibble) and sub-state (bottom nibble)
@@ -159,6 +167,24 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
159167
return ret;
160168
}
161169

170+
static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
171+
struct cpuidle_driver *drv, int index)
172+
{
173+
bool smt_active = sched_smt_active();
174+
u64 spec_ctrl = spec_ctrl_current();
175+
int ret;
176+
177+
if (smt_active)
178+
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
179+
180+
ret = __intel_idle(dev, drv, index);
181+
182+
if (smt_active)
183+
wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
184+
185+
return ret;
186+
}
187+
162188
/**
163189
* intel_idle_s2idle - Ask the processor to enter the given idle state.
164190
* @dev: cpuidle device of the target CPU.
@@ -680,39 +706,39 @@ static struct cpuidle_state skl_cstates[] __initdata = {
680706
{
681707
.name = "C6",
682708
.desc = "MWAIT 0x20",
683-
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
709+
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
684710
.exit_latency = 85,
685711
.target_residency = 200,
686712
.enter = &intel_idle,
687713
.enter_s2idle = intel_idle_s2idle, },
688714
{
689715
.name = "C7s",
690716
.desc = "MWAIT 0x33",
691-
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
717+
.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
692718
.exit_latency = 124,
693719
.target_residency = 800,
694720
.enter = &intel_idle,
695721
.enter_s2idle = intel_idle_s2idle, },
696722
{
697723
.name = "C8",
698724
.desc = "MWAIT 0x40",
699-
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
725+
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
700726
.exit_latency = 200,
701727
.target_residency = 800,
702728
.enter = &intel_idle,
703729
.enter_s2idle = intel_idle_s2idle, },
704730
{
705731
.name = "C9",
706732
.desc = "MWAIT 0x50",
707-
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
733+
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
708734
.exit_latency = 480,
709735
.target_residency = 5000,
710736
.enter = &intel_idle,
711737
.enter_s2idle = intel_idle_s2idle, },
712738
{
713739
.name = "C10",
714740
.desc = "MWAIT 0x60",
715-
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
741+
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
716742
.exit_latency = 890,
717743
.target_residency = 5000,
718744
.enter = &intel_idle,
@@ -741,7 +767,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
741767
{
742768
.name = "C6",
743769
.desc = "MWAIT 0x20",
744-
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
770+
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
745771
.exit_latency = 133,
746772
.target_residency = 600,
747773
.enter = &intel_idle,
@@ -1819,6 +1845,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
18191845
if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
18201846
drv->states[drv->state_count].enter = intel_idle_irq;
18211847

1848+
if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
1849+
cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
1850+
WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
1851+
drv->states[drv->state_count].enter = intel_idle_ibrs;
1852+
}
1853+
18221854
if ((disabled_states_mask & BIT(drv->state_count)) ||
18231855
((icpu->use_acpi || force_use_acpi) &&
18241856
intel_idle_off_by_default(mwait_hint) &&

0 commit comments

Comments
 (0)