Skip to content

Commit 3b299b9

Browse files
yosrym93hansendc
authored andcommitted
x86/mm: Use IPIs to synchronize LAM enablement
LAM can only be enabled when a process is single-threaded. But _kernel_ threads can temporarily use a single-threaded process's mm. If LAM is enabled by a userspace process while a kthread is using its mm, the kthread will not observe LAM enablement (i.e. LAM will be disabled in CR3). This could be fine for the kthread itself, as LAM only affects userspace addresses. However, if the kthread context switches to a thread in the same userspace process, CR3 may or may not be updated because the mm_struct doesn't change (based on pending TLB flushes). If CR3 is not updated, the userspace thread will run incorrectly with LAM disabled, which may cause page faults when using tagged addresses. Example scenario: CPU 1 CPU 2 /* kthread */ kthread_use_mm() /* user thread */ prctl_enable_tagged_addr() /* LAM enabled on CPU 2 */ /* LAM disabled on CPU 1 */ context_switch() /* to CPU 1 */ /* Switching to user thread */ switch_mm_irqs_off() /* CR3 not updated */ /* LAM is still disabled on CPU 1 */ Synchronize LAM enablement by sending an IPI to all CPUs running with the mm_struct to enable LAM. This makes sure LAM is enabled on CPU 1 in the above scenario before prctl_enable_tagged_addr() returns and userspace starts using tagged addresses, and before it's possible to run the userspace process on CPU 1. In switch_mm_irqs_off(), move reading the LAM mask until after mm_cpumask() is updated. This ensures that if an outdated LAM mask is written to CR3, an IPI is received to update it right after IRQs are re-enabled. [ dhansen: Add a LAM enabling helper and comment it ] Fixes: 82721d8 ("x86/mm: Handle LAM on context switch") Suggested-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Yosry Ahmed <yosryahmed@google.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Link: https://lore.kernel.org/all/20240702132139.3332013-2-yosryahmed%40google.com
1 parent 22a40d1 commit 3b299b9

2 files changed

Lines changed: 29 additions & 7 deletions

File tree

arch/x86/kernel/process_64.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,27 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
798798

799799
#define LAM_U57_BITS 6
800800

801+
static void enable_lam_func(void *__mm)
802+
{
803+
struct mm_struct *mm = __mm;
804+
805+
if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm) {
806+
write_cr3(__read_cr3() | mm->context.lam_cr3_mask);
807+
set_tlbstate_lam_mode(mm);
808+
}
809+
}
810+
811+
static void mm_enable_lam(struct mm_struct *mm)
812+
{
813+
/*
814+
* Even though the process must still be single-threaded at this
815+
* point, kernel threads may be using the mm. IPI those kernel
816+
* threads if they exist.
817+
*/
818+
on_each_cpu_mask(mm_cpumask(mm), enable_lam_func, mm, true);
819+
set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags);
820+
}
821+
801822
static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
802823
{
803824
if (!cpu_feature_enabled(X86_FEATURE_LAM))
@@ -814,6 +835,10 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
814835
if (mmap_write_lock_killable(mm))
815836
return -EINTR;
816837

838+
/*
839+
* MM_CONTEXT_LOCK_LAM is set on clone. Prevent LAM from
840+
* being enabled unless the process is single threaded:
841+
*/
817842
if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) {
818843
mmap_write_unlock(mm);
819844
return -EBUSY;
@@ -830,9 +855,7 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
830855
return -EINVAL;
831856
}
832857

833-
write_cr3(__read_cr3() | mm->context.lam_cr3_mask);
834-
set_tlbstate_lam_mode(mm);
835-
set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags);
858+
mm_enable_lam(mm);
836859

837860
mmap_write_unlock(mm);
838861

arch/x86/mm/tlb.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -503,9 +503,9 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
503503
{
504504
struct mm_struct *prev = this_cpu_read(cpu_tlbstate.loaded_mm);
505505
u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
506-
unsigned long new_lam = mm_lam_cr3_mask(next);
507506
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
508507
unsigned cpu = smp_processor_id();
508+
unsigned long new_lam;
509509
u64 next_tlb_gen;
510510
bool need_flush;
511511
u16 new_asid;
@@ -619,9 +619,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
619619
cpumask_clear_cpu(cpu, mm_cpumask(prev));
620620
}
621621

622-
/*
623-
* Start remote flushes and then read tlb_gen.
624-
*/
622+
/* Start receiving IPIs and then read tlb_gen (and LAM below) */
625623
if (next != &init_mm)
626624
cpumask_set_cpu(cpu, mm_cpumask(next));
627625
next_tlb_gen = atomic64_read(&next->context.tlb_gen);
@@ -633,6 +631,7 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
633631
barrier();
634632
}
635633

634+
new_lam = mm_lam_cr3_mask(next);
636635
set_tlbstate_lam_mode(next);
637636
if (need_flush) {
638637
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);

0 commit comments

Comments
 (0)