Skip to content

Commit 5ba7c4c

Browse files
Ben Gardonbonzini
authored andcommitted
KVM: x86/MMU: Zap non-leaf SPTEs when disabling dirty logging
Currently disabling dirty logging with the TDP MMU is extremely slow. On a 96 vCPU / 96G VM backed with gigabyte pages, it takes ~200 seconds to disable dirty logging with the TDP MMU, as opposed to ~4 seconds with the shadow MMU. When disabling dirty logging, zap non-leaf parent entries to allow replacement with huge pages instead of recursing and zapping all of the child, leaf entries. This reduces the number of TLB flushes required. and reduces the disable dirty log time with the TDP MMU to ~3 seconds. Opportunistically add a WARN() to catch GFNs that are mapped at a higher level than their max level. Signed-off-by: Ben Gardon <bgardon@google.com> Message-Id: <20220525230904.1584480-1-bgardon@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 1df931d commit 5ba7c4c

3 files changed

Lines changed: 42 additions & 6 deletions

File tree

arch/x86/kvm/mmu/tdp_iter.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,15 @@ static bool try_step_up(struct tdp_iter *iter)
145145
return true;
146146
}
147147

148+
/*
149+
* Step the iterator back up a level in the paging structure. Should only be
150+
* used when the iterator is below the root level.
151+
*/
152+
void tdp_iter_step_up(struct tdp_iter *iter)
153+
{
154+
WARN_ON(!try_step_up(iter));
155+
}
156+
148157
/*
149158
* Step to the next SPTE in a pre-order traversal of the paging structure.
150159
* To get to the next SPTE, the iterator either steps down towards the goal

arch/x86/kvm/mmu/tdp_iter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,5 +114,6 @@ void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
114114
int min_level, gfn_t next_last_level_gfn);
115115
void tdp_iter_next(struct tdp_iter *iter);
116116
void tdp_iter_restart(struct tdp_iter *iter);
117+
void tdp_iter_step_up(struct tdp_iter *iter);
117118

118119
#endif /* __KVM_X86_MMU_TDP_ITER_H */

arch/x86/kvm/mmu/tdp_mmu.c

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1742,28 +1742,54 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
17421742
gfn_t start = slot->base_gfn;
17431743
gfn_t end = start + slot->npages;
17441744
struct tdp_iter iter;
1745+
int max_mapping_level;
17451746
kvm_pfn_t pfn;
17461747

17471748
rcu_read_lock();
17481749

17491750
tdp_root_for_each_pte(iter, root, start, end) {
1750-
retry:
17511751
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
17521752
continue;
17531753

17541754
if (!is_shadow_present_pte(iter.old_spte) ||
17551755
!is_last_spte(iter.old_spte, iter.level))
17561756
continue;
17571757

1758+
/*
1759+
* This is a leaf SPTE. Check if the PFN it maps can
1760+
* be mapped at a higher level.
1761+
*/
17581762
pfn = spte_to_pfn(iter.old_spte);
1759-
if (kvm_is_reserved_pfn(pfn) ||
1760-
iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn,
1761-
pfn, PG_LEVEL_NUM))
1763+
1764+
if (kvm_is_reserved_pfn(pfn))
17621765
continue;
17631766

1767+
max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
1768+
iter.gfn, pfn, PG_LEVEL_NUM);
1769+
1770+
WARN_ON(max_mapping_level < iter.level);
1771+
1772+
/*
1773+
* If this page is already mapped at the highest
1774+
* viable level, there's nothing more to do.
1775+
*/
1776+
if (max_mapping_level == iter.level)
1777+
continue;
1778+
1779+
/*
1780+
* The page can be remapped at a higher level, so step
1781+
* up to zap the parent SPTE.
1782+
*/
1783+
while (max_mapping_level > iter.level)
1784+
tdp_iter_step_up(&iter);
1785+
17641786
/* Note, a successful atomic zap also does a remote TLB flush. */
1765-
if (tdp_mmu_zap_spte_atomic(kvm, &iter))
1766-
goto retry;
1787+
tdp_mmu_zap_spte_atomic(kvm, &iter);
1788+
1789+
/*
1790+
* If the atomic zap fails, the iter will recurse back into
1791+
* the same subtree to retry.
1792+
*/
17671793
}
17681794

17691795
rcu_read_unlock();

0 commit comments

Comments
 (0)