Skip to content

Commit 82721d8

Browse files
kirylhansendc
authored andcommitted
x86/mm: Handle LAM on context switch
Linear Address Masking mode for userspace pointers encoded in CR3 bits. The mode is selected per-process and stored in mm_context_t. switch_mm_irqs_off() now respects selected LAM mode and constructs CR3 accordingly. The active LAM mode gets recorded in the tlb_state. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Alexander Potapenko <glider@google.com> Link: https://lore.kernel.org/all/20230312112612.31869-5-kirill.shutemov%40linux.intel.com
1 parent 6449dcb commit 82721d8

4 files changed

Lines changed: 103 additions & 17 deletions

File tree

arch/x86/include/asm/mmu.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ typedef struct {
4242
unsigned long flags;
4343
#endif
4444

45+
#ifdef CONFIG_ADDRESS_MASKING
46+
/* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
47+
unsigned long lam_cr3_mask;
48+
#endif
49+
4550
struct mutex lock;
4651
void __user *vdso; /* vdso base address */
4752
const struct vdso_image *vdso_image; /* vdso image in use */

arch/x86/include/asm/mmu_context.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,29 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
9292
}
9393
#endif
9494

95+
#ifdef CONFIG_ADDRESS_MASKING
96+
static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
97+
{
98+
return mm->context.lam_cr3_mask;
99+
}
100+
101+
static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
102+
{
103+
mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
104+
}
105+
106+
#else
107+
108+
static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
109+
{
110+
return 0;
111+
}
112+
113+
static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
114+
{
115+
}
116+
#endif
117+
95118
#define enter_lazy_tlb enter_lazy_tlb
96119
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
97120

@@ -169,6 +192,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
169192
{
170193
arch_dup_pkeys(oldmm, mm);
171194
paravirt_arch_dup_mmap(oldmm, mm);
195+
dup_lam(oldmm, mm);
172196
return ldt_dup_context(oldmm, mm);
173197
}
174198

arch/x86/include/asm/tlbflush.h

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#ifndef _ASM_X86_TLBFLUSH_H
33
#define _ASM_X86_TLBFLUSH_H
44

5-
#include <linux/mm.h>
5+
#include <linux/mm_types.h>
66
#include <linux/sched.h>
77

88
#include <asm/processor.h>
@@ -12,6 +12,7 @@
1212
#include <asm/invpcid.h>
1313
#include <asm/pti.h>
1414
#include <asm/processor-flags.h>
15+
#include <asm/pgtable.h>
1516

1617
void __flush_tlb_all(void);
1718

@@ -101,6 +102,16 @@ struct tlb_state {
101102
*/
102103
bool invalidate_other;
103104

105+
#ifdef CONFIG_ADDRESS_MASKING
106+
/*
107+
* Active LAM mode.
108+
*
109+
* X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM
110+
* disabled.
111+
*/
112+
u8 lam;
113+
#endif
114+
104115
/*
105116
* Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
106117
* the corresponding user PCID needs a flush next time we
@@ -357,6 +368,31 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
357368
}
358369
#define huge_pmd_needs_flush huge_pmd_needs_flush
359370

371+
#ifdef CONFIG_ADDRESS_MASKING
372+
static inline u64 tlbstate_lam_cr3_mask(void)
373+
{
374+
u64 lam = this_cpu_read(cpu_tlbstate.lam);
375+
376+
return lam << X86_CR3_LAM_U57_BIT;
377+
}
378+
379+
static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
380+
{
381+
this_cpu_write(cpu_tlbstate.lam,
382+
mm->context.lam_cr3_mask >> X86_CR3_LAM_U57_BIT);
383+
}
384+
385+
#else
386+
387+
static inline u64 tlbstate_lam_cr3_mask(void)
388+
{
389+
return 0;
390+
}
391+
392+
static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
393+
{
394+
}
395+
#endif
360396
#endif /* !MODULE */
361397

362398
static inline void __native_tlb_flush_global(unsigned long cr4)

arch/x86/mm/tlb.c

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -154,26 +154,30 @@ static inline u16 user_pcid(u16 asid)
154154
return ret;
155155
}
156156

157-
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
157+
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
158158
{
159+
unsigned long cr3 = __sme_pa(pgd) | lam;
160+
159161
if (static_cpu_has(X86_FEATURE_PCID)) {
160-
return __sme_pa(pgd) | kern_pcid(asid);
162+
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
163+
cr3 |= kern_pcid(asid);
161164
} else {
162165
VM_WARN_ON_ONCE(asid != 0);
163-
return __sme_pa(pgd);
164166
}
167+
168+
return cr3;
165169
}
166170

167-
static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
171+
static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid,
172+
unsigned long lam)
168173
{
169-
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
170174
/*
171175
* Use boot_cpu_has() instead of this_cpu_has() as this function
172176
* might be called during early boot. This should work even after
173177
* boot because all CPU's the have same capabilities:
174178
*/
175179
VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
176-
return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
180+
return build_cr3(pgd, asid, lam) | CR3_NOFLUSH;
177181
}
178182

179183
/*
@@ -274,15 +278,16 @@ static inline void invalidate_user_asid(u16 asid)
274278
(unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
275279
}
276280

277-
static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
281+
static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam,
282+
bool need_flush)
278283
{
279284
unsigned long new_mm_cr3;
280285

281286
if (need_flush) {
282287
invalidate_user_asid(new_asid);
283-
new_mm_cr3 = build_cr3(pgdir, new_asid);
288+
new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
284289
} else {
285-
new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
290+
new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
286291
}
287292

288293
/*
@@ -491,6 +496,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
491496
{
492497
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
493498
u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
499+
unsigned long new_lam = mm_lam_cr3_mask(next);
494500
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
495501
unsigned cpu = smp_processor_id();
496502
u64 next_tlb_gen;
@@ -520,7 +526,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
520526
* isn't free.
521527
*/
522528
#ifdef CONFIG_DEBUG_VM
523-
if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
529+
if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid,
530+
tlbstate_lam_cr3_mask()))) {
524531
/*
525532
* If we were to BUG here, we'd be very likely to kill
526533
* the system so hard that we don't see the call trace.
@@ -552,9 +559,15 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
552559
* instruction.
553560
*/
554561
if (real_prev == next) {
562+
/* Not actually switching mm's */
555563
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
556564
next->context.ctx_id);
557565

566+
/*
567+
* If this races with another thread that enables lam, 'new_lam'
568+
* might not match tlbstate_lam_cr3_mask().
569+
*/
570+
558571
/*
559572
* Even in lazy TLB mode, the CPU should stay set in the
560573
* mm_cpumask. The TLB shootdown code can figure out from
@@ -622,15 +635,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
622635
barrier();
623636
}
624637

638+
set_tlbstate_lam_mode(next);
625639
if (need_flush) {
626640
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
627641
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
628-
load_new_mm_cr3(next->pgd, new_asid, true);
642+
load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
629643

630644
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
631645
} else {
632646
/* The new ASID is already up to date. */
633-
load_new_mm_cr3(next->pgd, new_asid, false);
647+
load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
634648

635649
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
636650
}
@@ -691,6 +705,10 @@ void initialize_tlbstate_and_flush(void)
691705
/* Assert that CR3 already references the right mm. */
692706
WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
693707

708+
/* LAM expected to be disabled */
709+
WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
710+
WARN_ON(mm_lam_cr3_mask(mm));
711+
694712
/*
695713
* Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
696714
* doesn't work like other CR4 bits because it can only be set from
@@ -699,15 +717,16 @@ void initialize_tlbstate_and_flush(void)
699717
WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
700718
!(cr4_read_shadow() & X86_CR4_PCIDE));
701719

702-
/* Force ASID 0 and force a TLB flush. */
703-
write_cr3(build_cr3(mm->pgd, 0));
720+
/* Disable LAM, force ASID 0 and force a TLB flush. */
721+
write_cr3(build_cr3(mm->pgd, 0, 0));
704722

705723
/* Reinitialize tlbstate. */
706724
this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
707725
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
708726
this_cpu_write(cpu_tlbstate.next_asid, 1);
709727
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
710728
this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
729+
set_tlbstate_lam_mode(mm);
711730

712731
for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
713732
this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
@@ -1071,8 +1090,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
10711090
*/
10721091
unsigned long __get_current_cr3_fast(void)
10731092
{
1074-
unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
1075-
this_cpu_read(cpu_tlbstate.loaded_mm_asid));
1093+
unsigned long cr3 =
1094+
build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
1095+
this_cpu_read(cpu_tlbstate.loaded_mm_asid),
1096+
tlbstate_lam_cr3_mask());
10761097

10771098
/* For now, be very restrictive about when this can be called. */
10781099
VM_WARN_ON(in_nmi() || preemptible());

0 commit comments

Comments
 (0)