@@ -154,26 +154,30 @@ static inline u16 user_pcid(u16 asid)
154154 return ret ;
155155}
156156
157- static inline unsigned long build_cr3 (pgd_t * pgd , u16 asid )
157+ static inline unsigned long build_cr3 (pgd_t * pgd , u16 asid , unsigned long lam )
158158{
159+ unsigned long cr3 = __sme_pa (pgd ) | lam ;
160+
159161 if (static_cpu_has (X86_FEATURE_PCID )) {
160- return __sme_pa (pgd ) | kern_pcid (asid );
162+ VM_WARN_ON_ONCE (asid > MAX_ASID_AVAILABLE );
163+ cr3 |= kern_pcid (asid );
161164 } else {
162165 VM_WARN_ON_ONCE (asid != 0 );
163- return __sme_pa (pgd );
164166 }
167+
168+ return cr3 ;
165169}
166170
167- static inline unsigned long build_cr3_noflush (pgd_t * pgd , u16 asid )
171+ static inline unsigned long build_cr3_noflush (pgd_t * pgd , u16 asid ,
172+ unsigned long lam )
168173{
169- VM_WARN_ON_ONCE (asid > MAX_ASID_AVAILABLE );
170174 /*
171175 * Use boot_cpu_has() instead of this_cpu_has() as this function
172176 * might be called during early boot. This should work even after
173177 * boot because all CPU's the have same capabilities:
174178 */
175179 VM_WARN_ON_ONCE (!boot_cpu_has (X86_FEATURE_PCID ));
176- return __sme_pa (pgd ) | kern_pcid ( asid ) | CR3_NOFLUSH ;
180+ return build_cr3 (pgd , asid , lam ) | CR3_NOFLUSH ;
177181}
178182
179183/*
@@ -274,15 +278,16 @@ static inline void invalidate_user_asid(u16 asid)
274278 (unsigned long * )this_cpu_ptr (& cpu_tlbstate .user_pcid_flush_mask ));
275279}
276280
277- static void load_new_mm_cr3 (pgd_t * pgdir , u16 new_asid , bool need_flush )
281+ static void load_new_mm_cr3 (pgd_t * pgdir , u16 new_asid , unsigned long lam ,
282+ bool need_flush )
278283{
279284 unsigned long new_mm_cr3 ;
280285
281286 if (need_flush ) {
282287 invalidate_user_asid (new_asid );
283- new_mm_cr3 = build_cr3 (pgdir , new_asid );
288+ new_mm_cr3 = build_cr3 (pgdir , new_asid , lam );
284289 } else {
285- new_mm_cr3 = build_cr3_noflush (pgdir , new_asid );
290+ new_mm_cr3 = build_cr3_noflush (pgdir , new_asid , lam );
286291 }
287292
288293 /*
@@ -491,6 +496,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
491496{
492497 struct mm_struct * real_prev = this_cpu_read (cpu_tlbstate .loaded_mm );
493498 u16 prev_asid = this_cpu_read (cpu_tlbstate .loaded_mm_asid );
499+ unsigned long new_lam = mm_lam_cr3_mask (next );
494500 bool was_lazy = this_cpu_read (cpu_tlbstate_shared .is_lazy );
495501 unsigned cpu = smp_processor_id ();
496502 u64 next_tlb_gen ;
@@ -520,7 +526,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
520526 * isn't free.
521527 */
522528#ifdef CONFIG_DEBUG_VM
523- if (WARN_ON_ONCE (__read_cr3 () != build_cr3 (real_prev -> pgd , prev_asid ))) {
529+ if (WARN_ON_ONCE (__read_cr3 () != build_cr3 (real_prev -> pgd , prev_asid ,
530+ tlbstate_lam_cr3_mask ()))) {
524531 /*
525532 * If we were to BUG here, we'd be very likely to kill
526533 * the system so hard that we don't see the call trace.
@@ -552,9 +559,15 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
552559 * instruction.
553560 */
554561 if (real_prev == next ) {
562+ /* Not actually switching mm's */
555563 VM_WARN_ON (this_cpu_read (cpu_tlbstate .ctxs [prev_asid ].ctx_id ) !=
556564 next -> context .ctx_id );
557565
566+ /*
567+ * If this races with another thread that enables lam, 'new_lam'
568+ * might not match tlbstate_lam_cr3_mask().
569+ */
570+
558571 /*
559572 * Even in lazy TLB mode, the CPU should stay set in the
560573 * mm_cpumask. The TLB shootdown code can figure out from
@@ -622,15 +635,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
622635 barrier ();
623636 }
624637
638+ set_tlbstate_lam_mode (next );
625639 if (need_flush ) {
626640 this_cpu_write (cpu_tlbstate .ctxs [new_asid ].ctx_id , next -> context .ctx_id );
627641 this_cpu_write (cpu_tlbstate .ctxs [new_asid ].tlb_gen , next_tlb_gen );
628- load_new_mm_cr3 (next -> pgd , new_asid , true);
642+ load_new_mm_cr3 (next -> pgd , new_asid , new_lam , true);
629643
630644 trace_tlb_flush (TLB_FLUSH_ON_TASK_SWITCH , TLB_FLUSH_ALL );
631645 } else {
632646 /* The new ASID is already up to date. */
633- load_new_mm_cr3 (next -> pgd , new_asid , false);
647+ load_new_mm_cr3 (next -> pgd , new_asid , new_lam , false);
634648
635649 trace_tlb_flush (TLB_FLUSH_ON_TASK_SWITCH , 0 );
636650 }
@@ -691,6 +705,10 @@ void initialize_tlbstate_and_flush(void)
691705 /* Assert that CR3 already references the right mm. */
692706 WARN_ON ((cr3 & CR3_ADDR_MASK ) != __pa (mm -> pgd ));
693707
708+ /* LAM expected to be disabled */
709+ WARN_ON (cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57 ));
710+ WARN_ON (mm_lam_cr3_mask (mm ));
711+
694712 /*
695713 * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
696714 * doesn't work like other CR4 bits because it can only be set from
@@ -699,15 +717,16 @@ void initialize_tlbstate_and_flush(void)
699717 WARN_ON (boot_cpu_has (X86_FEATURE_PCID ) &&
700718 !(cr4_read_shadow () & X86_CR4_PCIDE ));
701719
702- /* Force ASID 0 and force a TLB flush. */
703- write_cr3 (build_cr3 (mm -> pgd , 0 ));
720+ /* Disable LAM, force ASID 0 and force a TLB flush. */
721+ write_cr3 (build_cr3 (mm -> pgd , 0 , 0 ));
704722
705723 /* Reinitialize tlbstate. */
706724 this_cpu_write (cpu_tlbstate .last_user_mm_spec , LAST_USER_MM_INIT );
707725 this_cpu_write (cpu_tlbstate .loaded_mm_asid , 0 );
708726 this_cpu_write (cpu_tlbstate .next_asid , 1 );
709727 this_cpu_write (cpu_tlbstate .ctxs [0 ].ctx_id , mm -> context .ctx_id );
710728 this_cpu_write (cpu_tlbstate .ctxs [0 ].tlb_gen , tlb_gen );
729+ set_tlbstate_lam_mode (mm );
711730
712731 for (i = 1 ; i < TLB_NR_DYN_ASIDS ; i ++ )
713732 this_cpu_write (cpu_tlbstate .ctxs [i ].ctx_id , 0 );
@@ -1071,8 +1090,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
10711090 */
10721091unsigned long __get_current_cr3_fast (void )
10731092{
1074- unsigned long cr3 = build_cr3 (this_cpu_read (cpu_tlbstate .loaded_mm )-> pgd ,
1075- this_cpu_read (cpu_tlbstate .loaded_mm_asid ));
1093+ unsigned long cr3 =
1094+ build_cr3 (this_cpu_read (cpu_tlbstate .loaded_mm )-> pgd ,
1095+ this_cpu_read (cpu_tlbstate .loaded_mm_asid ),
1096+ tlbstate_lam_cr3_mask ());
10761097
10771098 /* For now, be very restrictive about when this can be called. */
10781099 VM_WARN_ON (in_nmi () || preemptible ());
0 commit comments