@@ -530,6 +530,31 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
530530 kvm_set_pfn_accessed (spte_to_pfn (old_spte ));
531531}
532532
533+ static inline int __tdp_mmu_set_spte_atomic (struct tdp_iter * iter , u64 new_spte )
534+ {
535+ u64 * sptep = rcu_dereference (iter -> sptep );
536+
537+ /*
538+ * The caller is responsible for ensuring the old SPTE is not a REMOVED
539+ * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
540+ * and pre-checking before inserting a new SPTE is advantageous as it
541+ * avoids unnecessary work.
542+ */
543+ WARN_ON_ONCE (iter -> yielded || is_removed_spte (iter -> old_spte ));
544+
545+ /*
546+ * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
547+ * does not hold the mmu_lock. On failure, i.e. if a different logical
548+ * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
549+ * the current value, so the caller operates on fresh data, e.g. if it
550+ * retries tdp_mmu_set_spte_atomic()
551+ */
552+ if (!try_cmpxchg64 (sptep , & iter -> old_spte , new_spte ))
553+ return - EBUSY ;
554+
555+ return 0 ;
556+ }
557+
533558/*
534559 * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically
535560 * and handle the associated bookkeeping. Do not mark the page dirty
@@ -551,27 +576,13 @@ static inline int tdp_mmu_set_spte_atomic(struct kvm *kvm,
551576 struct tdp_iter * iter ,
552577 u64 new_spte )
553578{
554- u64 * sptep = rcu_dereference (iter -> sptep );
555-
556- /*
557- * The caller is responsible for ensuring the old SPTE is not a REMOVED
558- * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
559- * and pre-checking before inserting a new SPTE is advantageous as it
560- * avoids unnecessary work.
561- */
562- WARN_ON_ONCE (iter -> yielded || is_removed_spte (iter -> old_spte ));
579+ int ret ;
563580
564581 lockdep_assert_held_read (& kvm -> mmu_lock );
565582
566- /*
567- * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
568- * does not hold the mmu_lock. On failure, i.e. if a different logical
569- * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
570- * the current value, so the caller operates on fresh data, e.g. if it
571- * retries tdp_mmu_set_spte_atomic()
572- */
573- if (!try_cmpxchg64 (sptep , & iter -> old_spte , new_spte ))
574- return - EBUSY ;
583+ ret = __tdp_mmu_set_spte_atomic (iter , new_spte );
584+ if (ret )
585+ return ret ;
575586
576587 handle_changed_spte (kvm , iter -> as_id , iter -> gfn , iter -> old_spte ,
577588 new_spte , iter -> level , true);
@@ -584,13 +595,17 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
584595{
585596 int ret ;
586597
598+ lockdep_assert_held_read (& kvm -> mmu_lock );
599+
587600 /*
588- * Freeze the SPTE by setting it to a special,
589- * non-present value. This will stop other threads from
590- * immediately installing a present entry in its place
591- * before the TLBs are flushed.
601+ * Freeze the SPTE by setting it to a special, non-present value. This
602+ * will stop other threads from immediately installing a present entry
603+ * in its place before the TLBs are flushed.
604+ *
605+ * Delay processing of the zapped SPTE until after TLBs are flushed and
606+ * the REMOVED_SPTE is replaced (see below).
592607 */
593- ret = tdp_mmu_set_spte_atomic ( kvm , iter , REMOVED_SPTE );
608+ ret = __tdp_mmu_set_spte_atomic ( iter , REMOVED_SPTE );
594609 if (ret )
595610 return ret ;
596611
@@ -599,12 +614,20 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
599614 /*
600615 * No other thread can overwrite the removed SPTE as they must either
601616 * wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not
602- * overwrite the special removed SPTE value. No bookkeeping is needed
603- * here since the SPTE is going from non-present to non-present. Use
604- * the raw write helper to avoid an unnecessary check on volatile bits.
617+ * overwrite the special removed SPTE value. Use the raw write helper to
618+ * avoid an unnecessary check on volatile bits.
605619 */
606620 __kvm_tdp_mmu_write_spte (iter -> sptep , SHADOW_NONPRESENT_VALUE );
607621
622+ /*
623+ * Process the zapped SPTE after flushing TLBs, and after replacing
624+ * REMOVED_SPTE with 0. This minimizes the amount of time vCPUs are
625+ * blocked by the REMOVED_SPTE and reduces contention on the child
626+ * SPTEs.
627+ */
628+ handle_changed_spte (kvm , iter -> as_id , iter -> gfn , iter -> old_spte ,
629+ 0 , iter -> level , true);
630+
608631 return 0 ;
609632}
610633
0 commit comments