@@ -1477,13 +1477,56 @@ static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
14771477 }
14781478}
14791479
1480+ static int prepare_mmu_memcache (struct kvm_vcpu * vcpu , bool topup_memcache ,
1481+ void * * memcache )
1482+ {
1483+ int min_pages ;
1484+
1485+ if (!is_protected_kvm_enabled ())
1486+ * memcache = & vcpu -> arch .mmu_page_cache ;
1487+ else
1488+ * memcache = & vcpu -> arch .pkvm_memcache ;
1489+
1490+ if (!topup_memcache )
1491+ return 0 ;
1492+
1493+ min_pages = kvm_mmu_cache_min_pages (vcpu -> arch .hw_mmu );
1494+
1495+ if (!is_protected_kvm_enabled ())
1496+ return kvm_mmu_topup_memory_cache (* memcache , min_pages );
1497+
1498+ return topup_hyp_memcache (* memcache , min_pages );
1499+ }
1500+
1501+ /*
1502+ * Potentially reduce shadow S2 permissions to match the guest's own S2. For
1503+ * exec faults, we'd only reach this point if the guest actually allowed it (see
1504+ * kvm_s2_handle_perm_fault).
1505+ *
1506+ * Also encode the level of the original translation in the SW bits of the leaf
1507+ * entry as a proxy for the span of that translation. This will be retrieved on
1508+ * TLB invalidation from the guest and used to limit the invalidation scope if a
1509+ * TTL hint or a range isn't provided.
1510+ */
1511+ static void adjust_nested_fault_perms (struct kvm_s2_trans * nested ,
1512+ enum kvm_pgtable_prot * prot ,
1513+ bool * writable )
1514+ {
1515+ * writable &= kvm_s2_trans_writable (nested );
1516+ if (!kvm_s2_trans_readable (nested ))
1517+ * prot &= ~KVM_PGTABLE_PROT_R ;
1518+
1519+ * prot |= kvm_encode_nested_level (nested );
1520+ }
1521+
14801522static int user_mem_abort (struct kvm_vcpu * vcpu , phys_addr_t fault_ipa ,
14811523 struct kvm_s2_trans * nested ,
14821524 struct kvm_memory_slot * memslot , unsigned long hva ,
14831525 bool fault_is_perm )
14841526{
14851527 int ret = 0 ;
1486- bool write_fault , writable , force_pte = false;
1528+ bool topup_memcache ;
1529+ bool write_fault , writable ;
14871530 bool exec_fault , mte_allowed , is_vma_cacheable ;
14881531 bool s2_force_noncacheable = false, vfio_allow_any_uc = false;
14891532 unsigned long mmu_seq ;
@@ -1495,6 +1538,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
14951538 gfn_t gfn ;
14961539 kvm_pfn_t pfn ;
14971540 bool logging_active = memslot_is_logging (memslot );
1541+ bool force_pte = logging_active ;
14981542 long vma_pagesize , fault_granule ;
14991543 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R ;
15001544 struct kvm_pgtable * pgt ;
@@ -1506,35 +1550,18 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
15061550 fault_granule = kvm_vcpu_trap_get_perm_fault_granule (vcpu );
15071551 write_fault = kvm_is_write_fault (vcpu );
15081552 exec_fault = kvm_vcpu_trap_is_exec_fault (vcpu );
1509- VM_BUG_ON (write_fault && exec_fault );
1510-
1511- if (fault_is_perm && !write_fault && !exec_fault ) {
1512- kvm_err ("Unexpected L2 read permission error\n" );
1513- return - EFAULT ;
1514- }
1515-
1516- if (!is_protected_kvm_enabled ())
1517- memcache = & vcpu -> arch .mmu_page_cache ;
1518- else
1519- memcache = & vcpu -> arch .pkvm_memcache ;
1553+ VM_WARN_ON_ONCE (write_fault && exec_fault );
15201554
15211555 /*
15221556 * Permission faults just need to update the existing leaf entry,
15231557 * and so normally don't require allocations from the memcache. The
15241558 * only exception to this is when dirty logging is enabled at runtime
15251559 * and a write fault needs to collapse a block entry into a table.
15261560 */
1527- if (!fault_is_perm || (logging_active && write_fault )) {
1528- int min_pages = kvm_mmu_cache_min_pages (vcpu -> arch .hw_mmu );
1529-
1530- if (!is_protected_kvm_enabled ())
1531- ret = kvm_mmu_topup_memory_cache (memcache , min_pages );
1532- else
1533- ret = topup_hyp_memcache (memcache , min_pages );
1534-
1535- if (ret )
1536- return ret ;
1537- }
1561+ topup_memcache = !fault_is_perm || (logging_active && write_fault );
1562+ ret = prepare_mmu_memcache (vcpu , topup_memcache , & memcache );
1563+ if (ret )
1564+ return ret ;
15381565
15391566 /*
15401567 * Let's check if we will get back a huge page backed by hugetlbfs, or
@@ -1548,16 +1575,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
15481575 return - EFAULT ;
15491576 }
15501577
1551- /*
1552- * logging_active is guaranteed to never be true for VM_PFNMAP
1553- * memslots.
1554- */
1555- if (logging_active ) {
1556- force_pte = true;
1578+ if (force_pte )
15571579 vma_shift = PAGE_SHIFT ;
1558- } else {
1580+ else
15591581 vma_shift = get_vma_page_shift (vma , hva );
1560- }
15611582
15621583 switch (vma_shift ) {
15631584#ifndef __PAGETABLE_PMD_FOLDED
@@ -1609,7 +1630,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16091630 max_map_size = PAGE_SIZE ;
16101631
16111632 force_pte = (max_map_size == PAGE_SIZE );
1612- vma_pagesize = min ( vma_pagesize , ( long ) max_map_size );
1633+ vma_pagesize = min_t ( long , vma_pagesize , max_map_size );
16131634 }
16141635
16151636 /*
@@ -1642,7 +1663,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16421663 * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
16431664 * with the smp_wmb() in kvm_mmu_invalidate_end().
16441665 */
1645- mmu_seq = vcpu -> kvm -> mmu_invalidate_seq ;
1666+ mmu_seq = kvm -> mmu_invalidate_seq ;
16461667 mmap_read_unlock (current -> mm );
16471668
16481669 pfn = __kvm_faultin_pfn (memslot , gfn , write_fault ? FOLL_WRITE : 0 ,
@@ -1698,24 +1719,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16981719 if (exec_fault && s2_force_noncacheable )
16991720 return - ENOEXEC ;
17001721
1701- /*
1702- * Potentially reduce shadow S2 permissions to match the guest's own
1703- * S2. For exec faults, we'd only reach this point if the guest
1704- * actually allowed it (see kvm_s2_handle_perm_fault).
1705- *
1706- * Also encode the level of the original translation in the SW bits
1707- * of the leaf entry as a proxy for the span of that translation.
1708- * This will be retrieved on TLB invalidation from the guest and
1709- * used to limit the invalidation scope if a TTL hint or a range
1710- * isn't provided.
1711- */
1712- if (nested ) {
1713- writable &= kvm_s2_trans_writable (nested );
1714- if (!kvm_s2_trans_readable (nested ))
1715- prot &= ~KVM_PGTABLE_PROT_R ;
1716-
1717- prot |= kvm_encode_nested_level (nested );
1718- }
1722+ if (nested )
1723+ adjust_nested_fault_perms (nested , & prot , & writable );
17191724
17201725 kvm_fault_lock (kvm );
17211726 pgt = vcpu -> arch .hw_mmu -> pgt ;
@@ -1981,6 +1986,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
19811986 goto out_unlock ;
19821987 }
19831988
1989+ VM_WARN_ON_ONCE (kvm_vcpu_trap_is_permission_fault (vcpu ) &&
1990+ !write_fault && !kvm_vcpu_trap_is_exec_fault (vcpu ));
1991+
19841992 ret = user_mem_abort (vcpu , fault_ipa , nested , memslot , hva ,
19851993 esr_fsc_is_permission_fault (esr ));
19861994 if (ret == 0 )
0 commit comments