Skip to content

Commit 32ab5a5

Browse files
author
Marc Zyngier
committed
Merge branch kvm-arm64/mmu/MMIO-block-mapping into kvmarm-master/next
MMIO block mapping support from Keqian Zhu, allowing larger (and lazy) mappings for devices assigned to guests. * kvm-arm64/mmu/MMIO-block-mapping: KVM: arm64: Try stage2 block mapping for host device MMIO KVM: arm64: Remove the creation time's mapping of MMIO regions
2 parents 32e92b7 + 2aa53d6 commit 32ab5a5

1 file changed

Lines changed: 54 additions & 45 deletions

File tree

arch/arm64/kvm/mmu.c

Lines changed: 54 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,35 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
822822
return PAGE_SIZE;
823823
}
824824

825+
static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
826+
{
827+
unsigned long pa;
828+
829+
if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
830+
return huge_page_shift(hstate_vma(vma));
831+
832+
if (!(vma->vm_flags & VM_PFNMAP))
833+
return PAGE_SHIFT;
834+
835+
VM_BUG_ON(is_vm_hugetlb_page(vma));
836+
837+
pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
838+
839+
#ifndef __PAGETABLE_PMD_FOLDED
840+
if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
841+
ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
842+
ALIGN(hva, PUD_SIZE) <= vma->vm_end)
843+
return PUD_SHIFT;
844+
#endif
845+
846+
if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
847+
ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
848+
ALIGN(hva, PMD_SIZE) <= vma->vm_end)
849+
return PMD_SHIFT;
850+
851+
return PAGE_SHIFT;
852+
}
853+
825854
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
826855
struct kvm_memory_slot *memslot, unsigned long hva,
827856
unsigned long fault_status)
@@ -853,7 +882,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
853882
return -EFAULT;
854883
}
855884

856-
/* Let's check if we will get back a huge page backed by hugetlbfs */
885+
/*
886+
* Let's check if we will get back a huge page backed by hugetlbfs, or
887+
* get block mapping for device MMIO region.
888+
*/
857889
mmap_read_lock(current->mm);
858890
vma = find_vma_intersection(current->mm, hva, hva + 1);
859891
if (unlikely(!vma)) {
@@ -862,15 +894,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
862894
return -EFAULT;
863895
}
864896

865-
if (is_vm_hugetlb_page(vma))
866-
vma_shift = huge_page_shift(hstate_vma(vma));
867-
else
868-
vma_shift = PAGE_SHIFT;
869-
870-
if (logging_active ||
871-
(vma->vm_flags & VM_PFNMAP)) {
897+
/*
898+
* logging_active is guaranteed to never be true for VM_PFNMAP
899+
* memslots.
900+
*/
901+
if (logging_active) {
872902
force_pte = true;
873903
vma_shift = PAGE_SHIFT;
904+
} else {
905+
vma_shift = get_vma_page_shift(vma, hva);
874906
}
875907

876908
switch (vma_shift) {
@@ -943,8 +975,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
943975
return -EFAULT;
944976

945977
if (kvm_is_device_pfn(pfn)) {
978+
/*
979+
* If the page was identified as device early by looking at
980+
* the VMA flags, vma_pagesize is already representing the
981+
* largest quantity we can map. If instead it was mapped
982+
* via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
983+
* and must not be upgraded.
984+
*
985+
* In both cases, we don't let transparent_hugepage_adjust()
986+
* change things at the last minute.
987+
*/
946988
device = true;
947-
force_pte = true;
948989
} else if (logging_active && !write_fault) {
949990
/*
950991
* Only actually map the page as writable if this was a write
@@ -965,7 +1006,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
9651006
* If we are not forced to use page mapping, check if we are
9661007
* backed by a THP and thus use block mapping if possible.
9671008
*/
968-
if (vma_pagesize == PAGE_SIZE && !force_pte)
1009+
if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
9691010
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
9701011
&pfn, &fault_ipa);
9711012
if (writable)
@@ -1346,7 +1387,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
13461387
{
13471388
hva_t hva = mem->userspace_addr;
13481389
hva_t reg_end = hva + mem->memory_size;
1349-
bool writable = !(mem->flags & KVM_MEM_READONLY);
13501390
int ret = 0;
13511391

13521392
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1363,8 +1403,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
13631403
mmap_read_lock(current->mm);
13641404
/*
13651405
* A memory region could potentially cover multiple VMAs, and any holes
1366-
* between them, so iterate over all of them to find out if we can map
1367-
* any of them right now.
1406+
* between them, so iterate over all of them.
13681407
*
13691408
* +--------------------------------------------+
13701409
* +---------------+----------------+ +----------------+
@@ -1375,51 +1414,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
13751414
*/
13761415
do {
13771416
struct vm_area_struct *vma;
1378-
hva_t vm_start, vm_end;
13791417

13801418
vma = find_vma_intersection(current->mm, hva, reg_end);
13811419
if (!vma)
13821420
break;
13831421

1384-
/*
1385-
* Take the intersection of this VMA with the memory region
1386-
*/
1387-
vm_start = max(hva, vma->vm_start);
1388-
vm_end = min(reg_end, vma->vm_end);
1389-
13901422
if (vma->vm_flags & VM_PFNMAP) {
1391-
gpa_t gpa = mem->guest_phys_addr +
1392-
(vm_start - mem->userspace_addr);
1393-
phys_addr_t pa;
1394-
1395-
pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
1396-
pa += vm_start - vma->vm_start;
1397-
13981423
/* IO region dirty page logging not allowed */
13991424
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
14001425
ret = -EINVAL;
1401-
goto out;
1402-
}
1403-
1404-
ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
1405-
vm_end - vm_start,
1406-
writable);
1407-
if (ret)
14081426
break;
1427+
}
14091428
}
1410-
hva = vm_end;
1429+
hva = min(reg_end, vma->vm_end);
14111430
} while (hva < reg_end);
14121431

1413-
if (change == KVM_MR_FLAGS_ONLY)
1414-
goto out;
1415-
1416-
spin_lock(&kvm->mmu_lock);
1417-
if (ret)
1418-
unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
1419-
else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
1420-
stage2_flush_memslot(kvm, memslot);
1421-
spin_unlock(&kvm->mmu_lock);
1422-
out:
14231432
mmap_read_unlock(current->mm);
14241433
return ret;
14251434
}

0 commit comments

Comments
 (0)