Skip to content

Commit 7cd138d

Browse files
sean-jcbonzini
authored andcommitted
KVM: x86/mmu: Optimize and clean up so called "last nonleaf level" logic
Drop the pre-computed last_nonleaf_level, which is arguably wrong and at best confusing. Per the comment: Can have large pages at levels 2..last_nonleaf_level-1. the intent of the variable would appear to be to track what levels can _legally_ have large pages, but that intent doesn't align with reality. The computed value will be wrong for 5-level paging, or if 1gb pages are not supported. The flawed code is not a problem in practice, because except for 32-bit PSE paging, bit 7 is reserved if large pages aren't supported at the level. Take advantage of this invariant and simply omit the level magic math for 64-bit page tables (including PAE). For 32-bit paging (non-PAE), the adjustments are needed purely because bit 7 is ignored if PSE=0. Retain that logic as is, but make is_last_gpte() unique per PTTYPE so that the PSE check is avoided for PAE and EPT paging. In the spirit of avoiding branches, bump the "last nonleaf level" for 32-bit PSE paging by adding the PSE bit itself. Note, bit 7 is ignored or has other meaning in CR3/EPTP, but despite FNAME(walk_addr_generic) briefly grabbing CR3/EPTP in "pte", they are not PTEs and will blow up all the other gpte helpers. Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20210622175739.3610207-51-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 616007c commit 7cd138d

3 files changed

Lines changed: 30 additions & 35 deletions

File tree

arch/x86/include/asm/kvm_host.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -457,9 +457,6 @@ struct kvm_mmu {
457457

458458
struct rsvd_bits_validate guest_rsvd_check;
459459

460-
/* Can have large pages at levels 2..last_nonleaf_level-1. */
461-
u8 last_nonleaf_level;
462-
463460
u64 pdptrs[4]; /* pae */
464461
};
465462

arch/x86/kvm/mmu/mmu.c

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4071,26 +4071,6 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
40714071
return false;
40724072
}
40734073

4074-
static inline bool is_last_gpte(struct kvm_mmu *mmu,
4075-
unsigned level, unsigned gpte)
4076-
{
4077-
/*
4078-
* The RHS has bit 7 set iff level < mmu->last_nonleaf_level.
4079-
* If it is clear, there are no large pages at this level, so clear
4080-
* PT_PAGE_SIZE_MASK in gpte if that is the case.
4081-
*/
4082-
gpte &= level - mmu->last_nonleaf_level;
4083-
4084-
/*
4085-
* PG_LEVEL_4K always terminates. The RHS has bit 7 set
4086-
* iff level <= PG_LEVEL_4K, which for our purpose means
4087-
* level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then.
4088-
*/
4089-
gpte |= level - PG_LEVEL_4K - 1;
4090-
4091-
return gpte & PT_PAGE_SIZE_MASK;
4092-
}
4093-
40944074
#define PTTYPE_EPT 18 /* arbitrary */
40954075
#define PTTYPE PTTYPE_EPT
40964076
#include "paging_tmpl.h"
@@ -4491,15 +4471,6 @@ static void update_pkru_bitmask(struct kvm_mmu *mmu)
44914471
}
44924472
}
44934473

4494-
static void update_last_nonleaf_level(struct kvm_mmu *mmu)
4495-
{
4496-
unsigned root_level = mmu->root_level;
4497-
4498-
mmu->last_nonleaf_level = root_level;
4499-
if (root_level == PT32_ROOT_LEVEL && is_cr4_pse(mmu))
4500-
mmu->last_nonleaf_level++;
4501-
}
4502-
45034474
static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu,
45044475
struct kvm_mmu *mmu)
45054476
{
@@ -4509,7 +4480,6 @@ static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu,
45094480
reset_rsvds_bits_mask(vcpu, mmu);
45104481
update_permission_bitmask(mmu, false);
45114482
update_pkru_bitmask(mmu);
4512-
update_last_nonleaf_level(mmu);
45134483
}
45144484

45154485
static void paging64_init_context(struct kvm_mmu *context)
@@ -4783,7 +4753,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
47834753
context->direct_map = false;
47844754

47854755
update_permission_bitmask(context, true);
4786-
update_last_nonleaf_level(context);
47874756
update_pkru_bitmask(context);
47884757
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
47894758
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);

arch/x86/kvm/mmu/paging_tmpl.h

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,35 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
305305
return pkeys;
306306
}
307307

308+
static inline bool FNAME(is_last_gpte)(struct kvm_mmu *mmu,
309+
unsigned int level, unsigned int gpte)
310+
{
311+
/*
312+
* For EPT and PAE paging (both variants), bit 7 is either reserved at
313+
* all level or indicates a huge page (ignoring CR3/EPTP). In either
314+
* case, bit 7 being set terminates the walk.
315+
*/
316+
#if PTTYPE == 32
317+
/*
318+
* 32-bit paging requires special handling because bit 7 is ignored if
319+
* CR4.PSE=0, not reserved. Clear bit 7 in the gpte if the level is
320+
* greater than the last level for which bit 7 is the PAGE_SIZE bit.
321+
*
322+
* The RHS has bit 7 set iff level < (2 + PSE). If it is clear, bit 7
323+
* is not reserved and does not indicate a large page at this level,
324+
* so clear PT_PAGE_SIZE_MASK in gpte if that is the case.
325+
*/
326+
gpte &= level - (PT32_ROOT_LEVEL + mmu->mmu_role.ext.cr4_pse);
327+
#endif
328+
/*
329+
* PG_LEVEL_4K always terminates. The RHS has bit 7 set
330+
* iff level <= PG_LEVEL_4K, which for our purpose means
331+
* level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then.
332+
*/
333+
gpte |= level - PG_LEVEL_4K - 1;
334+
335+
return gpte & PT_PAGE_SIZE_MASK;
336+
}
308337
/*
309338
* Fetch a guest pte for a guest virtual address, or for an L2's GPA.
310339
*/
@@ -421,7 +450,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
421450

422451
/* Convert to ACC_*_MASK flags for struct guest_walker. */
423452
walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
424-
} while (!is_last_gpte(mmu, walker->level, pte));
453+
} while (!FNAME(is_last_gpte)(mmu, walker->level, pte));
425454

426455
pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
427456
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;

0 commit comments

Comments
 (0)