66
77#include <linux/adreno-smmu-priv.h>
88#include <linux/io-pgtable.h>
9+ #include <linux/kmemleak.h>
910#include "msm_drv.h"
1011#include "msm_mmu.h"
1112
@@ -14,6 +15,8 @@ struct msm_iommu {
1415 struct iommu_domain * domain ;
1516 atomic_t pagetables ;
1617 struct page * prr_page ;
18+
19+ struct kmem_cache * pt_cache ;
1720};
1821
1922#define to_msm_iommu (x ) container_of(x, struct msm_iommu, base)
@@ -27,6 +30,9 @@ struct msm_iommu_pagetable {
2730 unsigned long pgsize_bitmap ; /* Bitmap of page sizes in use */
2831 phys_addr_t ttbr ;
2932 u32 asid ;
33+
34+ /** @root_page_table: Stores the root page table pointer. */
35+ void * root_page_table ;
3036};
3137static struct msm_iommu_pagetable * to_pagetable (struct msm_mmu * mmu )
3238{
@@ -282,7 +288,145 @@ msm_iommu_pagetable_walk(struct msm_mmu *mmu, unsigned long iova, uint64_t ptes[
282288 return 0 ;
283289}
284290
291+ static void
292+ msm_iommu_pagetable_prealloc_count (struct msm_mmu * mmu , struct msm_mmu_prealloc * p ,
293+ uint64_t iova , size_t len )
294+ {
295+ u64 pt_count ;
296+
297+ /*
298+ * L1, L2 and L3 page tables.
299+ *
300+ * We could optimize L3 allocation by iterating over the sgt and merging
301+ * 2M contiguous blocks, but it's simpler to over-provision and return
302+ * the pages if they're not used.
303+ *
304+ * The first level descriptor (v8 / v7-lpae page table format) encodes
305+ * 30 bits of address. The second level encodes 29. For the 3rd it is
306+ * 39.
307+ *
308+ * https://developer.arm.com/documentation/ddi0406/c/System-Level-Architecture/Virtual-Memory-System-Architecture--VMSA-/Long-descriptor-translation-table-format/Long-descriptor-translation-table-format-descriptors?lang=en#BEIHEFFB
309+ */
310+ pt_count = ((ALIGN (iova + len , 1ull << 39 ) - ALIGN_DOWN (iova , 1ull << 39 )) >> 39 ) +
311+ ((ALIGN (iova + len , 1ull << 30 ) - ALIGN_DOWN (iova , 1ull << 30 )) >> 30 ) +
312+ ((ALIGN (iova + len , 1ull << 21 ) - ALIGN_DOWN (iova , 1ull << 21 )) >> 21 );
313+
314+ p -> count += pt_count ;
315+ }
316+
317+ static struct kmem_cache *
318+ get_pt_cache (struct msm_mmu * mmu )
319+ {
320+ struct msm_iommu_pagetable * pagetable = to_pagetable (mmu );
321+ return to_msm_iommu (pagetable -> parent )-> pt_cache ;
322+ }
323+
324+ static int
325+ msm_iommu_pagetable_prealloc_allocate (struct msm_mmu * mmu , struct msm_mmu_prealloc * p )
326+ {
327+ struct kmem_cache * pt_cache = get_pt_cache (mmu );
328+ int ret ;
329+
330+ p -> pages = kvmalloc_array (p -> count , sizeof (p -> pages ), GFP_KERNEL );
331+ if (!p -> pages )
332+ return - ENOMEM ;
333+
334+ ret = kmem_cache_alloc_bulk (pt_cache , GFP_KERNEL , p -> count , p -> pages );
335+ if (ret != p -> count ) {
336+ p -> count = ret ;
337+ return - ENOMEM ;
338+ }
339+
340+ return 0 ;
341+ }
342+
343+ static void
344+ msm_iommu_pagetable_prealloc_cleanup (struct msm_mmu * mmu , struct msm_mmu_prealloc * p )
345+ {
346+ struct kmem_cache * pt_cache = get_pt_cache (mmu );
347+ uint32_t remaining_pt_count = p -> count - p -> ptr ;
348+
349+ kmem_cache_free_bulk (pt_cache , remaining_pt_count , & p -> pages [p -> ptr ]);
350+ kvfree (p -> pages );
351+ }
352+
353+ /**
354+ * alloc_pt() - Custom page table allocator
355+ * @cookie: Cookie passed at page table allocation time.
356+ * @size: Size of the page table. This size should be fixed,
357+ * and determined at creation time based on the granule size.
358+ * @gfp: GFP flags.
359+ *
360+ * We want a custom allocator so we can use a cache for page table
361+ * allocations and amortize the cost of the over-reservation that's
362+ * done to allow asynchronous VM operations.
363+ *
364+ * Return: non-NULL on success, NULL if the allocation failed for any
365+ * reason.
366+ */
367+ static void *
368+ msm_iommu_pagetable_alloc_pt (void * cookie , size_t size , gfp_t gfp )
369+ {
370+ struct msm_iommu_pagetable * pagetable = cookie ;
371+ struct msm_mmu_prealloc * p = pagetable -> base .prealloc ;
372+ void * page ;
373+
374+ /* Allocation of the root page table happening during init. */
375+ if (unlikely (!pagetable -> root_page_table )) {
376+ struct page * p ;
377+
378+ p = alloc_pages_node (dev_to_node (pagetable -> iommu_dev ),
379+ gfp | __GFP_ZERO , get_order (size ));
380+ page = p ? page_address (p ) : NULL ;
381+ pagetable -> root_page_table = page ;
382+ return page ;
383+ }
384+
385+ if (WARN_ON (!p ) || WARN_ON (p -> ptr >= p -> count ))
386+ return NULL ;
387+
388+ page = p -> pages [p -> ptr ++ ];
389+ memset (page , 0 , size );
390+
391+ /*
392+ * Page table entries don't use virtual addresses, which trips out
393+ * kmemleak. kmemleak_alloc_phys() might work, but physical addresses
394+ * are mixed with other fields, and I fear kmemleak won't detect that
395+ * either.
396+ *
397+ * Let's just ignore memory passed to the page-table driver for now.
398+ */
399+ kmemleak_ignore (page );
400+
401+ return page ;
402+ }
403+
404+
405+ /**
406+ * free_pt() - Custom page table free function
407+ * @cookie: Cookie passed at page table allocation time.
408+ * @data: Page table to free.
409+ * @size: Size of the page table. This size should be fixed,
410+ * and determined at creation time based on the granule size.
411+ */
412+ static void
413+ msm_iommu_pagetable_free_pt (void * cookie , void * data , size_t size )
414+ {
415+ struct msm_iommu_pagetable * pagetable = cookie ;
416+
417+ if (unlikely (pagetable -> root_page_table == data )) {
418+ free_pages ((unsigned long )data , get_order (size ));
419+ pagetable -> root_page_table = NULL ;
420+ return ;
421+ }
422+
423+ kmem_cache_free (get_pt_cache (& pagetable -> base ), data );
424+ }
425+
285426static const struct msm_mmu_funcs pagetable_funcs = {
427+ .prealloc_count = msm_iommu_pagetable_prealloc_count ,
428+ .prealloc_allocate = msm_iommu_pagetable_prealloc_allocate ,
429+ .prealloc_cleanup = msm_iommu_pagetable_prealloc_cleanup ,
286430 .map = msm_iommu_pagetable_map ,
287431 .unmap = msm_iommu_pagetable_unmap ,
288432 .destroy = msm_iommu_pagetable_destroy ,
@@ -333,6 +477,17 @@ static const struct iommu_flush_ops tlb_ops = {
333477static int msm_gpu_fault_handler (struct iommu_domain * domain , struct device * dev ,
334478 unsigned long iova , int flags , void * arg );
335479
480+ static size_t get_tblsz (const struct io_pgtable_cfg * cfg )
481+ {
482+ int pg_shift , bits_per_level ;
483+
484+ pg_shift = __ffs (cfg -> pgsize_bitmap );
485+ /* arm_lpae_iopte is u64: */
486+ bits_per_level = pg_shift - ilog2 (sizeof (u64 ));
487+
488+ return sizeof (u64 ) << bits_per_level ;
489+ }
490+
336491struct msm_mmu * msm_iommu_pagetable_create (struct msm_mmu * parent , bool kernel_managed )
337492{
338493 struct adreno_smmu_priv * adreno_smmu = dev_get_drvdata (parent -> dev );
@@ -369,8 +524,34 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent, bool kernel_m
369524
370525 if (!kernel_managed ) {
371526 ttbr0_cfg .quirks |= IO_PGTABLE_QUIRK_NO_WARN ;
527+
528+ /*
529+ * With userspace managed VM (aka VM_BIND), we need to pre-
530+ * allocate pages ahead of time for map/unmap operations,
531+ * handing them to io-pgtable via custom alloc/free ops as
532+ * needed:
533+ */
534+ ttbr0_cfg .alloc = msm_iommu_pagetable_alloc_pt ;
535+ ttbr0_cfg .free = msm_iommu_pagetable_free_pt ;
536+
537+ /*
538+ * Restrict to single page granules. Otherwise we may run
539+ * into a situation where userspace wants to unmap/remap
540+ * only a part of a larger block mapping, which is not
541+ * possible without unmapping the entire block. Which in
542+ * turn could cause faults if the GPU is accessing other
543+ * parts of the block mapping.
544+ *
545+ * Note that prior to commit 33729a5fc0ca ("iommu/io-pgtable-arm:
546+ * Remove split on unmap behavior)" this was handled in
547+ * io-pgtable-arm. But this apparently does not work
548+ * correctly on SMMUv3.
549+ */
550+ WARN_ON (!(ttbr0_cfg .pgsize_bitmap & PAGE_SIZE ));
551+ ttbr0_cfg .pgsize_bitmap = PAGE_SIZE ;
372552 }
373553
554+ pagetable -> iommu_dev = ttbr1_cfg -> iommu_dev ;
374555 pagetable -> pgtbl_ops = alloc_io_pgtable_ops (ARM_64_LPAE_S1 ,
375556 & ttbr0_cfg , pagetable );
376557
@@ -414,7 +595,6 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent, bool kernel_m
414595 /* Needed later for TLB flush */
415596 pagetable -> parent = parent ;
416597 pagetable -> tlb = ttbr1_cfg -> tlb ;
417- pagetable -> iommu_dev = ttbr1_cfg -> iommu_dev ;
418598 pagetable -> pgsize_bitmap = ttbr0_cfg .pgsize_bitmap ;
419599 pagetable -> ttbr = ttbr0_cfg .arm_lpae_s1_cfg .ttbr ;
420600
@@ -510,6 +690,7 @@ static void msm_iommu_destroy(struct msm_mmu *mmu)
510690{
511691 struct msm_iommu * iommu = to_msm_iommu (mmu );
512692 iommu_domain_free (iommu -> domain );
693+ kmem_cache_destroy (iommu -> pt_cache );
513694 kfree (iommu );
514695}
515696
@@ -583,6 +764,14 @@ struct msm_mmu *msm_iommu_gpu_new(struct device *dev, struct msm_gpu *gpu, unsig
583764 return mmu ;
584765
585766 iommu = to_msm_iommu (mmu );
767+ if (adreno_smmu && adreno_smmu -> cookie ) {
768+ const struct io_pgtable_cfg * cfg =
769+ adreno_smmu -> get_ttbr1_cfg (adreno_smmu -> cookie );
770+ size_t tblsz = get_tblsz (cfg );
771+
772+ iommu -> pt_cache =
773+ kmem_cache_create ("msm-mmu-pt" , tblsz , tblsz , 0 , NULL );
774+ }
586775 iommu_set_fault_handler (iommu -> domain , msm_gpu_fault_handler , iommu );
587776
588777 /* Enable stall on iommu fault: */
0 commit comments