@@ -822,6 +822,35 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
822822 return PAGE_SIZE ;
823823}
824824
825+ static int get_vma_page_shift (struct vm_area_struct * vma , unsigned long hva )
826+ {
827+ unsigned long pa ;
828+
829+ if (is_vm_hugetlb_page (vma ) && !(vma -> vm_flags & VM_PFNMAP ))
830+ return huge_page_shift (hstate_vma (vma ));
831+
832+ if (!(vma -> vm_flags & VM_PFNMAP ))
833+ return PAGE_SHIFT ;
834+
835+ VM_BUG_ON (is_vm_hugetlb_page (vma ));
836+
837+ pa = (vma -> vm_pgoff << PAGE_SHIFT ) + (hva - vma -> vm_start );
838+
839+ #ifndef __PAGETABLE_PMD_FOLDED
840+ if ((hva & (PUD_SIZE - 1 )) == (pa & (PUD_SIZE - 1 )) &&
841+ ALIGN_DOWN (hva , PUD_SIZE ) >= vma -> vm_start &&
842+ ALIGN (hva , PUD_SIZE ) <= vma -> vm_end )
843+ return PUD_SHIFT ;
844+ #endif
845+
846+ if ((hva & (PMD_SIZE - 1 )) == (pa & (PMD_SIZE - 1 )) &&
847+ ALIGN_DOWN (hva , PMD_SIZE ) >= vma -> vm_start &&
848+ ALIGN (hva , PMD_SIZE ) <= vma -> vm_end )
849+ return PMD_SHIFT ;
850+
851+ return PAGE_SHIFT ;
852+ }
853+
825854static int user_mem_abort (struct kvm_vcpu * vcpu , phys_addr_t fault_ipa ,
826855 struct kvm_memory_slot * memslot , unsigned long hva ,
827856 unsigned long fault_status )
@@ -853,7 +882,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
853882 return - EFAULT ;
854883 }
855884
856- /* Let's check if we will get back a huge page backed by hugetlbfs */
885+ /*
886+ * Let's check if we will get back a huge page backed by hugetlbfs, or
887+ * get block mapping for device MMIO region.
888+ */
857889 mmap_read_lock (current -> mm );
858890 vma = find_vma_intersection (current -> mm , hva , hva + 1 );
859891 if (unlikely (!vma )) {
@@ -862,15 +894,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
862894 return - EFAULT ;
863895 }
864896
865- if (is_vm_hugetlb_page (vma ))
866- vma_shift = huge_page_shift (hstate_vma (vma ));
867- else
868- vma_shift = PAGE_SHIFT ;
869-
870- if (logging_active ||
871- (vma -> vm_flags & VM_PFNMAP )) {
897+ /*
898+ * logging_active is guaranteed to never be true for VM_PFNMAP
899+ * memslots.
900+ */
901+ if (logging_active ) {
872902 force_pte = true;
873903 vma_shift = PAGE_SHIFT ;
904+ } else {
905+ vma_shift = get_vma_page_shift (vma , hva );
874906 }
875907
876908 switch (vma_shift ) {
@@ -943,8 +975,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
943975 return - EFAULT ;
944976
945977 if (kvm_is_device_pfn (pfn )) {
978+ /*
979+ * If the page was identified as device early by looking at
980+ * the VMA flags, vma_pagesize is already representing the
981+ * largest quantity we can map. If instead it was mapped
982+ * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
983+ * and must not be upgraded.
984+ *
985+ * In both cases, we don't let transparent_hugepage_adjust()
986+ * change things at the last minute.
987+ */
946988 device = true;
947- force_pte = true;
948989 } else if (logging_active && !write_fault ) {
949990 /*
950991 * Only actually map the page as writable if this was a write
@@ -965,7 +1006,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
9651006 * If we are not forced to use page mapping, check if we are
9661007 * backed by a THP and thus use block mapping if possible.
9671008 */
968- if (vma_pagesize == PAGE_SIZE && !force_pte )
1009+ if (vma_pagesize == PAGE_SIZE && !( force_pte || device ) )
9691010 vma_pagesize = transparent_hugepage_adjust (memslot , hva ,
9701011 & pfn , & fault_ipa );
9711012 if (writable )
@@ -1346,7 +1387,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
13461387{
13471388 hva_t hva = mem -> userspace_addr ;
13481389 hva_t reg_end = hva + mem -> memory_size ;
1349- bool writable = !(mem -> flags & KVM_MEM_READONLY );
13501390 int ret = 0 ;
13511391
13521392 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1363,8 +1403,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
13631403 mmap_read_lock (current -> mm );
13641404 /*
13651405 * A memory region could potentially cover multiple VMAs, and any holes
1366- * between them, so iterate over all of them to find out if we can map
1367- * any of them right now.
1406+ * between them, so iterate over all of them.
13681407 *
13691408 * +--------------------------------------------+
13701409 * +---------------+----------------+ +----------------+
@@ -1375,51 +1414,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
13751414 */
13761415 do {
13771416 struct vm_area_struct * vma ;
1378- hva_t vm_start , vm_end ;
13791417
13801418 vma = find_vma_intersection (current -> mm , hva , reg_end );
13811419 if (!vma )
13821420 break ;
13831421
1384- /*
1385- * Take the intersection of this VMA with the memory region
1386- */
1387- vm_start = max (hva , vma -> vm_start );
1388- vm_end = min (reg_end , vma -> vm_end );
1389-
13901422 if (vma -> vm_flags & VM_PFNMAP ) {
1391- gpa_t gpa = mem -> guest_phys_addr +
1392- (vm_start - mem -> userspace_addr );
1393- phys_addr_t pa ;
1394-
1395- pa = (phys_addr_t )vma -> vm_pgoff << PAGE_SHIFT ;
1396- pa += vm_start - vma -> vm_start ;
1397-
13981423 /* IO region dirty page logging not allowed */
13991424 if (memslot -> flags & KVM_MEM_LOG_DIRTY_PAGES ) {
14001425 ret = - EINVAL ;
1401- goto out ;
1402- }
1403-
1404- ret = kvm_phys_addr_ioremap (kvm , gpa , pa ,
1405- vm_end - vm_start ,
1406- writable );
1407- if (ret )
14081426 break ;
1427+ }
14091428 }
1410- hva = vm_end ;
1429+ hva = min ( reg_end , vma -> vm_end ) ;
14111430 } while (hva < reg_end );
14121431
1413- if (change == KVM_MR_FLAGS_ONLY )
1414- goto out ;
1415-
1416- spin_lock (& kvm -> mmu_lock );
1417- if (ret )
1418- unmap_stage2_range (& kvm -> arch .mmu , mem -> guest_phys_addr , mem -> memory_size );
1419- else if (!cpus_have_final_cap (ARM64_HAS_STAGE2_FWB ))
1420- stage2_flush_memslot (kvm , memslot );
1421- spin_unlock (& kvm -> mmu_lock );
1422- out :
14231432 mmap_read_unlock (current -> mm );
14241433 return ret ;
14251434}
0 commit comments