@@ -139,13 +139,18 @@ void recalc_intercepts(struct vcpu_svm *svm)
139139
140140 if (g -> int_ctl & V_INTR_MASKING_MASK ) {
141141 /*
142- * Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8
143- * does not affect any interrupt we may want to inject;
144- * therefore, writes to CR8 are irrelevant to L0, as are
145- * interrupt window vmexits.
142+ * If L2 is active and V_INTR_MASKING is enabled in vmcb12,
143+ * disable intercept of CR8 writes as L2's CR8 does not affect
144+ * any interrupt KVM may want to inject.
145+ *
146+ * Similarly, disable intercept of virtual interrupts (used to
147+ * detect interrupt windows) if the saved RFLAGS.IF is '0', as
148+ * the effective RFLAGS.IF for L1 interrupts will never be set
149+ * while L2 is running (L2's RFLAGS.IF doesn't affect L1 IRQs).
146150 */
147151 vmcb_clr_intercept (c , INTERCEPT_CR8_WRITE );
148- vmcb_clr_intercept (c , INTERCEPT_VINTR );
152+ if (!(svm -> vmcb01 .ptr -> save .rflags & X86_EFLAGS_IF ))
153+ vmcb_clr_intercept (c , INTERCEPT_VINTR );
149154 }
150155
151156 /*
@@ -276,6 +281,11 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
276281 if (CC (!nested_svm_check_tlb_ctl (vcpu , control -> tlb_ctl )))
277282 return false;
278283
284+ if (CC ((control -> int_ctl & V_NMI_ENABLE_MASK ) &&
285+ !vmcb12_is_intercept (control , INTERCEPT_NMI ))) {
286+ return false;
287+ }
288+
279289 return true;
280290}
281291
@@ -416,22 +426,24 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
416426
417427 /* Only a few fields of int_ctl are written by the processor. */
418428 mask = V_IRQ_MASK | V_TPR_MASK ;
419- if (!( svm -> nested . ctl . int_ctl & V_INTR_MASKING_MASK ) &&
420- svm_is_intercept ( svm , INTERCEPT_VINTR )) {
421- /*
422- * In order to request an interrupt window, L0 is usurping
423- * svm->vmcb->control.int_ctl and possibly setting V_IRQ
424- * even if it was clear in L1's VMCB. Restoring it would be
425- * wrong. However, in this case V_IRQ will remain true until
426- * interrupt_window_interception calls svm_clear_vintr and
427- * restores int_ctl. We can just leave it aside.
428- */
429+ /*
430+ * Don't sync vmcb02 V_IRQ back to vmcb12 if KVM (L0) is intercepting
431+ * virtual interrupts in order to request an interrupt window, as KVM
432+ * has usurped vmcb02's int_ctl. If an interrupt window opens before
433+ * the next VM-Exit, svm_clear_vintr() will restore vmcb12's int_ctl.
434+ * If no window opens, V_IRQ will be correctly preserved in vmcb12's
435+ * int_ctl (because it was never recognized while L2 was running).
436+ */
437+ if ( svm_is_intercept ( svm , INTERCEPT_VINTR ) &&
438+ ! test_bit ( INTERCEPT_VINTR , ( unsigned long * ) svm -> nested . ctl . intercepts ))
429439 mask &= ~V_IRQ_MASK ;
430- }
431440
432441 if (nested_vgif_enabled (svm ))
433442 mask |= V_GIF_MASK ;
434443
444+ if (nested_vnmi_enabled (svm ))
445+ mask |= V_NMI_BLOCKING_MASK | V_NMI_PENDING_MASK ;
446+
435447 svm -> nested .ctl .int_ctl &= ~mask ;
436448 svm -> nested .ctl .int_ctl |= svm -> vmcb -> control .int_ctl & mask ;
437449}
@@ -651,6 +663,17 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
651663 else
652664 int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK );
653665
666+ if (vnmi ) {
667+ if (vmcb01 -> control .int_ctl & V_NMI_PENDING_MASK ) {
668+ svm -> vcpu .arch .nmi_pending ++ ;
669+ kvm_make_request (KVM_REQ_EVENT , & svm -> vcpu );
670+ }
671+ if (nested_vnmi_enabled (svm ))
672+ int_ctl_vmcb12_bits |= (V_NMI_PENDING_MASK |
673+ V_NMI_ENABLE_MASK |
674+ V_NMI_BLOCKING_MASK );
675+ }
676+
654677 /* Copied from vmcb01. msrpm_base can be overwritten later. */
655678 vmcb02 -> control .nested_ctl = vmcb01 -> control .nested_ctl ;
656679 vmcb02 -> control .iopm_base_pa = vmcb01 -> control .iopm_base_pa ;
@@ -1021,6 +1044,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
10211044
10221045 svm_switch_vmcb (svm , & svm -> vmcb01 );
10231046
1047+ /*
1048+ * Rules for synchronizing int_ctl bits from vmcb02 to vmcb01:
1049+ *
1050+ * V_IRQ, V_IRQ_VECTOR, V_INTR_PRIO_MASK, V_IGN_TPR: If L1 doesn't
1051+ * intercept interrupts, then KVM will use vmcb02's V_IRQ (and related
1052+ * flags) to detect interrupt windows for L1 IRQs (even if L1 uses
1053+ * virtual interrupt masking). Raise KVM_REQ_EVENT to ensure that
1054+ * KVM re-requests an interrupt window if necessary, which implicitly
1055+ * copies this bits from vmcb02 to vmcb01.
1056+ *
1057+ * V_TPR: If L1 doesn't use virtual interrupt masking, then L1's vTPR
1058+ * is stored in vmcb02, but its value doesn't need to be copied from/to
1059+ * vmcb01 because it is copied from/to the virtual APIC's TPR register
1060+ * on each VM entry/exit.
1061+ *
1062+ * V_GIF: If nested vGIF is not used, KVM uses vmcb02's V_GIF for L1's
1063+ * V_GIF. However, GIF is architecturally clear on each VM exit, thus
1064+ * there is no need to copy V_GIF from vmcb02 to vmcb01.
1065+ */
1066+ if (!nested_exit_on_intr (svm ))
1067+ kvm_make_request (KVM_REQ_EVENT , & svm -> vcpu );
1068+
10241069 if (unlikely (svm -> lbrv_enabled && (svm -> nested .ctl .virt_ext & LBR_CTL_ENABLE_MASK ))) {
10251070 svm_copy_lbrs (vmcb12 , vmcb02 );
10261071 svm_update_lbrv (vcpu );
@@ -1029,6 +1074,20 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
10291074 svm_update_lbrv (vcpu );
10301075 }
10311076
1077+ if (vnmi ) {
1078+ if (vmcb02 -> control .int_ctl & V_NMI_BLOCKING_MASK )
1079+ vmcb01 -> control .int_ctl |= V_NMI_BLOCKING_MASK ;
1080+ else
1081+ vmcb01 -> control .int_ctl &= ~V_NMI_BLOCKING_MASK ;
1082+
1083+ if (vcpu -> arch .nmi_pending ) {
1084+ vcpu -> arch .nmi_pending -- ;
1085+ vmcb01 -> control .int_ctl |= V_NMI_PENDING_MASK ;
1086+ } else {
1087+ vmcb01 -> control .int_ctl &= ~V_NMI_PENDING_MASK ;
1088+ }
1089+ }
1090+
10321091 /*
10331092 * On vmexit the GIF is set to false and
10341093 * no event can be injected in L1.
0 commit comments