Skip to content

Commit 9a96770

Browse files
Lai Jiangshanbonzini
authored andcommitted
KVM: x86/mmu: Remove FNAME(is_self_change_mapping)
Drop FNAME(is_self_change_mapping) and instead rely on kvm_mmu_hugepage_adjust() to adjust the hugepage accordingly. Prior to commit 4cd071d ("KVM: x86/mmu: Move calls to thp_adjust() down a level"), the hugepage adjustment was done before allocating new shadow pages, i.e. failed to restrict the hugepage sizes if a new shadow page resulted in account_shadowed() changing the disallowed hugepage tracking. Removing FNAME(is_self_change_mapping) fixes a bug reported by Huang Hang where KVM unnecessarily forces a 4KiB page. FNAME(is_self_change_mapping) has a defect in that it blindly disables _all_ hugepage mappings rather than trying to reduce the size of the hugepage. If the guest is writing to a 1GiB page and the 1GiB is self-referential but a 2MiB page is not, then KVM can and should create a 2MiB mapping. Add a comment above the call to kvm_mmu_hugepage_adjust() to call out the new dependency on adjusting the hugepage size after walking indirect PTEs. Reported-by: Huang Hang <hhuang@linux.alibaba.com> Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com> Link: https://lore.kernel.org/r/20221213125538.81209-1-jiangshanlai@gmail.com [sean: rework changelog after separating out the emulator change] Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20230202182817.407394-4-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 39fda5d commit 9a96770

1 file changed

Lines changed: 7 additions & 44 deletions

File tree

arch/x86/kvm/mmu/paging_tmpl.h

Lines changed: 7 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,12 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
690690
fault->write_fault_to_shadow_pgtable = true;
691691
}
692692

693+
/*
694+
* Adjust the hugepage size _after_ resolving indirect shadow pages.
695+
* KVM doesn't support mapping hugepages into the guest for gfns that
696+
* are being shadowed by KVM, i.e. allocating a new shadow page may
697+
* affect the allowed hugepage size.
698+
*/
693699
kvm_mmu_hugepage_adjust(vcpu, fault);
694700

695701
trace_kvm_mmu_spte_requested(fault);
@@ -734,41 +740,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
734740
return RET_PF_RETRY;
735741
}
736742

737-
/*
738-
* To see whether the mapped gfn can write its page table in the current
739-
* mapping.
740-
*
741-
* It is the helper function of FNAME(page_fault). When guest uses large page
742-
* size to map the writable gfn which is used as current page table, we should
743-
* force kvm to use small page size to map it because new shadow page will be
744-
* created when kvm establishes shadow page table that stop kvm using large
745-
* page size. Do it early can avoid unnecessary #PF and emulation.
746-
*
747-
* Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok
748-
* since the PDPT is always shadowed, that means, we can not use large page
749-
* size to map the gfn which is used as PDPT.
750-
*/
751-
static bool
752-
FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
753-
struct guest_walker *walker, bool user_fault)
754-
{
755-
int level;
756-
gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1);
757-
bool self_changed = false;
758-
759-
if (!(walker->pte_access & ACC_WRITE_MASK ||
760-
(!is_cr0_wp(vcpu->arch.mmu) && !user_fault)))
761-
return false;
762-
763-
for (level = walker->level; level <= walker->max_level; level++) {
764-
gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1];
765-
766-
self_changed |= !(gfn & mask);
767-
}
768-
769-
return self_changed;
770-
}
771-
772743
/*
773744
* Page fault handler. There are several causes for a page fault:
774745
* - there is no shadow pte for the guest pte
@@ -787,7 +758,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
787758
{
788759
struct guest_walker walker;
789760
int r;
790-
bool is_self_change_mapping;
791761

792762
pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
793763
WARN_ON_ONCE(fault->is_tdp);
@@ -812,6 +782,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
812782
}
813783

814784
fault->gfn = walker.gfn;
785+
fault->max_level = walker.level;
815786
fault->slot = kvm_vcpu_gfn_to_memslot(vcpu, fault->gfn);
816787

817788
if (page_fault_handle_page_track(vcpu, fault)) {
@@ -823,14 +794,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
823794
if (r)
824795
return r;
825796

826-
is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
827-
&walker, fault->user);
828-
829-
if (is_self_change_mapping)
830-
fault->max_level = PG_LEVEL_4K;
831-
else
832-
fault->max_level = walker.level;
833-
834797
r = kvm_faultin_pfn(vcpu, fault, walker.pte_access);
835798
if (r != RET_PF_CONTINUE)
836799
return r;

0 commit comments

Comments
 (0)