Skip to content

Commit 9f0a7ab

Browse files
LuBaolugregkh
authored andcommitted
iommu/sva: invalidate stale IOTLB entries for kernel address space
commit e37d5a2 upstream. Introduce a new IOMMU interface to flush IOTLB paging cache entries for the CPU kernel address space. This interface is invoked from the x86 architecture code that manages combined user and kernel page tables, specifically before any kernel page table page is freed and reused. This addresses the main issue with vfree() which is a common occurrence and can be triggered by unprivileged users. While this resolves the primary problem, it doesn't address some extremely rare case related to memory unplug of memory that was present as reserved memory at boot, which cannot be triggered by unprivileged users. The discussion can be found at the link below. Enable SVA on x86 architecture since the IOMMU can now receive notification to flush the paging cache before freeing the CPU kernel page table pages. Link: https://lkml.kernel.org/r/20251022082635.2462433-9-baolu.lu@linux.intel.com Link: https://lore.kernel.org/linux-iommu/04983c62-3b1d-40d4-93ae-34ca04b827e5@intel.com/ Co-developed-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Suggested-by: Jann Horn <jannh@google.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Vasant Hegde <vasant.hegde@amd.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Cc: Alistair Popple <apopple@nvidia.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Betkov <bp@alien8.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jean-Philippe Brucker <jean-philippe@linaro.org> Cc: Joerg Roedel <joro@8bytes.org> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Mike Rapoport (Microsoft) <rppt@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Robin Murohy <robin.murphy@arm.com> Cc: Thomas Gleinxer <tglx@linutronix.de> Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com> Cc: Vinicius Costa Gomes <vinicius.gomes@intel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Cc: Yi Lai <yi1.lai@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent b3039c5 commit 9f0a7ab

4 files changed

Lines changed: 35 additions & 4 deletions

File tree

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@ config X86
279279
select HAVE_PCI
280280
select HAVE_PERF_REGS
281281
select HAVE_PERF_USER_STACK_DUMP
282+
select ASYNC_KERNEL_PGTABLE_FREE if IOMMU_SVA
282283
select MMU_GATHER_RCU_TABLE_FREE
283284
select MMU_GATHER_MERGE_VMAS
284285
select HAVE_POSIX_CPU_TIMERS_TASK_WORK

drivers/iommu/iommu-sva.c

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#include "iommu-priv.h"
1111

1212
static DEFINE_MUTEX(iommu_sva_lock);
13+
static bool iommu_sva_present;
14+
static LIST_HEAD(iommu_sva_mms);
1315
static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
1416
struct mm_struct *mm);
1517

@@ -42,6 +44,7 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de
4244
return ERR_PTR(-ENOSPC);
4345
}
4446
iommu_mm->pasid = pasid;
47+
iommu_mm->mm = mm;
4548
INIT_LIST_HEAD(&iommu_mm->sva_domains);
4649
/*
4750
* Make sure the write to mm->iommu_mm is not reordered in front of
@@ -77,9 +80,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
7780
if (!group)
7881
return ERR_PTR(-ENODEV);
7982

80-
if (IS_ENABLED(CONFIG_X86))
81-
return ERR_PTR(-EOPNOTSUPP);
82-
8383
mutex_lock(&iommu_sva_lock);
8484

8585
/* Allocate mm->pasid if necessary. */
@@ -135,8 +135,13 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
135135
if (ret)
136136
goto out_free_domain;
137137
domain->users = 1;
138-
list_add(&domain->next, &mm->iommu_mm->sva_domains);
139138

139+
if (list_empty(&iommu_mm->sva_domains)) {
140+
if (list_empty(&iommu_sva_mms))
141+
iommu_sva_present = true;
142+
list_add(&iommu_mm->mm_list_elm, &iommu_sva_mms);
143+
}
144+
list_add(&domain->next, &iommu_mm->sva_domains);
140145
out:
141146
refcount_set(&handle->users, 1);
142147
mutex_unlock(&iommu_sva_lock);
@@ -178,6 +183,13 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
178183
list_del(&domain->next);
179184
iommu_domain_free(domain);
180185
}
186+
187+
if (list_empty(&iommu_mm->sva_domains)) {
188+
list_del(&iommu_mm->mm_list_elm);
189+
if (list_empty(&iommu_sva_mms))
190+
iommu_sva_present = false;
191+
}
192+
181193
mutex_unlock(&iommu_sva_lock);
182194
kfree(handle);
183195
}
@@ -315,3 +327,15 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
315327

316328
return domain;
317329
}
330+
331+
void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end)
332+
{
333+
struct iommu_mm_data *iommu_mm;
334+
335+
guard(mutex)(&iommu_sva_lock);
336+
if (!iommu_sva_present)
337+
return;
338+
339+
list_for_each_entry(iommu_mm, &iommu_sva_mms, mm_list_elm)
340+
mmu_notifier_arch_invalidate_secondary_tlbs(iommu_mm->mm, start, end);
341+
}

include/linux/iommu.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,7 +1134,9 @@ struct iommu_sva {
11341134

11351135
struct iommu_mm_data {
11361136
u32 pasid;
1137+
struct mm_struct *mm;
11371138
struct list_head sva_domains;
1139+
struct list_head mm_list_elm;
11381140
};
11391141

11401142
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode);
@@ -1615,6 +1617,7 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
16151617
struct mm_struct *mm);
16161618
void iommu_sva_unbind_device(struct iommu_sva *handle);
16171619
u32 iommu_sva_get_pasid(struct iommu_sva *handle);
1620+
void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end);
16181621
#else
16191622
static inline struct iommu_sva *
16201623
iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
@@ -1639,6 +1642,7 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm)
16391642
}
16401643

16411644
static inline void mm_pasid_drop(struct mm_struct *mm) {}
1645+
static inline void iommu_sva_invalidate_kva_range(unsigned long start, unsigned long end) {}
16421646
#endif /* CONFIG_IOMMU_SVA */
16431647

16441648
#ifdef CONFIG_IOMMU_IOPF

mm/pgtable-generic.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <linux/swap.h>
1414
#include <linux/swapops.h>
1515
#include <linux/mm_inline.h>
16+
#include <linux/iommu.h>
1617
#include <asm/pgalloc.h>
1718
#include <asm/tlb.h>
1819

@@ -430,6 +431,7 @@ static void kernel_pgtable_work_func(struct work_struct *work)
430431
list_splice_tail_init(&kernel_pgtable_work.list, &page_list);
431432
spin_unlock(&kernel_pgtable_work.lock);
432433

434+
iommu_sva_invalidate_kva_range(PAGE_OFFSET, TLB_FLUSH_ALL);
433435
list_for_each_entry_safe(pt, next, &page_list, pt_list)
434436
__pagetable_free(pt);
435437
}

0 commit comments

Comments
 (0)