Skip to content

Commit 4a5fd41

Browse files
committed
Merge tag 'kvm-x86-svm-6.4' of https://github.com/kvm-x86/linux into HEAD
KVM SVM changes for 6.4: - Add support for virtual NMIs - Fixes for edge cases related to virtual interrupts
2 parents c21775a + c0d0ce9 commit 4a5fd41

8 files changed

Lines changed: 292 additions & 58 deletions

File tree

arch/x86/include/asm/cpufeatures.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,10 +226,9 @@
226226

227227
/* Virtualization flags: Linux defined, word 8 */
228228
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
229-
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
230-
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
231-
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
232-
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
229+
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */
230+
#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */
231+
#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */
233232

234233
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
235234
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
@@ -370,6 +369,7 @@
370369
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
371370
#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */
372371
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
372+
#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */
373373
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
374374

375375
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */

arch/x86/include/asm/kvm-x86-ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ KVM_X86_OP(get_interrupt_shadow)
6868
KVM_X86_OP(patch_hypercall)
6969
KVM_X86_OP(inject_irq)
7070
KVM_X86_OP(inject_nmi)
71+
KVM_X86_OP_OPTIONAL_RET0(is_vnmi_pending)
72+
KVM_X86_OP_OPTIONAL_RET0(set_vnmi_pending)
7173
KVM_X86_OP(inject_exception)
7274
KVM_X86_OP(cancel_injection)
7375
KVM_X86_OP(interrupt_allowed)

arch/x86/include/asm/kvm_host.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,8 @@ struct kvm_vcpu_arch {
874874
u64 tsc_scaling_ratio; /* current scaling ratio */
875875

876876
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
877-
unsigned nmi_pending; /* NMI queued after currently running handler */
877+
/* Number of NMIs pending injection, not including hardware vNMIs. */
878+
unsigned int nmi_pending;
878879
bool nmi_injected; /* Trying to inject an NMI this entry */
879880
bool smi_pending; /* SMI queued after currently running handler */
880881
u8 handling_intr_from_guest;
@@ -1619,6 +1620,13 @@ struct kvm_x86_ops {
16191620
int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
16201621
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
16211622
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
1623+
/* Whether or not a virtual NMI is pending in hardware. */
1624+
bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu);
1625+
/*
1626+
* Attempt to pend a virtual NMI in harware. Returns %true on success
1627+
* to allow using static_call_ret0 as the fallback.
1628+
*/
1629+
bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu);
16221630
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
16231631
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
16241632
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
@@ -1999,6 +2007,7 @@ int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
19992007
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
20002008

20012009
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
2010+
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
20022011

20032012
void kvm_update_dr7(struct kvm_vcpu *vcpu);
20042013

arch/x86/include/asm/svm.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
183183
#define V_GIF_SHIFT 9
184184
#define V_GIF_MASK (1 << V_GIF_SHIFT)
185185

186+
#define V_NMI_PENDING_SHIFT 11
187+
#define V_NMI_PENDING_MASK (1 << V_NMI_PENDING_SHIFT)
188+
189+
#define V_NMI_BLOCKING_SHIFT 12
190+
#define V_NMI_BLOCKING_MASK (1 << V_NMI_BLOCKING_SHIFT)
191+
186192
#define V_INTR_PRIO_SHIFT 16
187193
#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
188194

@@ -197,6 +203,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
197203
#define V_GIF_ENABLE_SHIFT 25
198204
#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT)
199205

206+
#define V_NMI_ENABLE_SHIFT 26
207+
#define V_NMI_ENABLE_MASK (1 << V_NMI_ENABLE_SHIFT)
208+
200209
#define AVIC_ENABLE_SHIFT 31
201210
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
202211

@@ -278,7 +287,6 @@ static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_
278287
static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
279288

280289
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
281-
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
282290

283291

284292
struct vmcb_seg {

arch/x86/kvm/svm/nested.c

Lines changed: 75 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,18 @@ void recalc_intercepts(struct vcpu_svm *svm)
139139

140140
if (g->int_ctl & V_INTR_MASKING_MASK) {
141141
/*
142-
* Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8
143-
* does not affect any interrupt we may want to inject;
144-
* therefore, writes to CR8 are irrelevant to L0, as are
145-
* interrupt window vmexits.
142+
* If L2 is active and V_INTR_MASKING is enabled in vmcb12,
143+
* disable intercept of CR8 writes as L2's CR8 does not affect
144+
* any interrupt KVM may want to inject.
145+
*
146+
* Similarly, disable intercept of virtual interrupts (used to
147+
* detect interrupt windows) if the saved RFLAGS.IF is '0', as
148+
* the effective RFLAGS.IF for L1 interrupts will never be set
149+
* while L2 is running (L2's RFLAGS.IF doesn't affect L1 IRQs).
146150
*/
147151
vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
148-
vmcb_clr_intercept(c, INTERCEPT_VINTR);
152+
if (!(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF))
153+
vmcb_clr_intercept(c, INTERCEPT_VINTR);
149154
}
150155

151156
/*
@@ -276,6 +281,11 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
276281
if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl)))
277282
return false;
278283

284+
if (CC((control->int_ctl & V_NMI_ENABLE_MASK) &&
285+
!vmcb12_is_intercept(control, INTERCEPT_NMI))) {
286+
return false;
287+
}
288+
279289
return true;
280290
}
281291

@@ -416,22 +426,24 @@ void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
416426

417427
/* Only a few fields of int_ctl are written by the processor. */
418428
mask = V_IRQ_MASK | V_TPR_MASK;
419-
if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) &&
420-
svm_is_intercept(svm, INTERCEPT_VINTR)) {
421-
/*
422-
* In order to request an interrupt window, L0 is usurping
423-
* svm->vmcb->control.int_ctl and possibly setting V_IRQ
424-
* even if it was clear in L1's VMCB. Restoring it would be
425-
* wrong. However, in this case V_IRQ will remain true until
426-
* interrupt_window_interception calls svm_clear_vintr and
427-
* restores int_ctl. We can just leave it aside.
428-
*/
429+
/*
430+
* Don't sync vmcb02 V_IRQ back to vmcb12 if KVM (L0) is intercepting
431+
* virtual interrupts in order to request an interrupt window, as KVM
432+
* has usurped vmcb02's int_ctl. If an interrupt window opens before
433+
* the next VM-Exit, svm_clear_vintr() will restore vmcb12's int_ctl.
434+
* If no window opens, V_IRQ will be correctly preserved in vmcb12's
435+
* int_ctl (because it was never recognized while L2 was running).
436+
*/
437+
if (svm_is_intercept(svm, INTERCEPT_VINTR) &&
438+
!test_bit(INTERCEPT_VINTR, (unsigned long *)svm->nested.ctl.intercepts))
429439
mask &= ~V_IRQ_MASK;
430-
}
431440

432441
if (nested_vgif_enabled(svm))
433442
mask |= V_GIF_MASK;
434443

444+
if (nested_vnmi_enabled(svm))
445+
mask |= V_NMI_BLOCKING_MASK | V_NMI_PENDING_MASK;
446+
435447
svm->nested.ctl.int_ctl &= ~mask;
436448
svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask;
437449
}
@@ -651,6 +663,17 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
651663
else
652664
int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
653665

666+
if (vnmi) {
667+
if (vmcb01->control.int_ctl & V_NMI_PENDING_MASK) {
668+
svm->vcpu.arch.nmi_pending++;
669+
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
670+
}
671+
if (nested_vnmi_enabled(svm))
672+
int_ctl_vmcb12_bits |= (V_NMI_PENDING_MASK |
673+
V_NMI_ENABLE_MASK |
674+
V_NMI_BLOCKING_MASK);
675+
}
676+
654677
/* Copied from vmcb01. msrpm_base can be overwritten later. */
655678
vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
656679
vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
@@ -1021,6 +1044,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
10211044

10221045
svm_switch_vmcb(svm, &svm->vmcb01);
10231046

1047+
/*
1048+
* Rules for synchronizing int_ctl bits from vmcb02 to vmcb01:
1049+
*
1050+
* V_IRQ, V_IRQ_VECTOR, V_INTR_PRIO_MASK, V_IGN_TPR: If L1 doesn't
1051+
* intercept interrupts, then KVM will use vmcb02's V_IRQ (and related
1052+
* flags) to detect interrupt windows for L1 IRQs (even if L1 uses
1053+
* virtual interrupt masking). Raise KVM_REQ_EVENT to ensure that
1054+
* KVM re-requests an interrupt window if necessary, which implicitly
1055+
* copies this bits from vmcb02 to vmcb01.
1056+
*
1057+
* V_TPR: If L1 doesn't use virtual interrupt masking, then L1's vTPR
1058+
* is stored in vmcb02, but its value doesn't need to be copied from/to
1059+
* vmcb01 because it is copied from/to the virtual APIC's TPR register
1060+
* on each VM entry/exit.
1061+
*
1062+
* V_GIF: If nested vGIF is not used, KVM uses vmcb02's V_GIF for L1's
1063+
* V_GIF. However, GIF is architecturally clear on each VM exit, thus
1064+
* there is no need to copy V_GIF from vmcb02 to vmcb01.
1065+
*/
1066+
if (!nested_exit_on_intr(svm))
1067+
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
1068+
10241069
if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
10251070
svm_copy_lbrs(vmcb12, vmcb02);
10261071
svm_update_lbrv(vcpu);
@@ -1029,6 +1074,20 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
10291074
svm_update_lbrv(vcpu);
10301075
}
10311076

1077+
if (vnmi) {
1078+
if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK)
1079+
vmcb01->control.int_ctl |= V_NMI_BLOCKING_MASK;
1080+
else
1081+
vmcb01->control.int_ctl &= ~V_NMI_BLOCKING_MASK;
1082+
1083+
if (vcpu->arch.nmi_pending) {
1084+
vcpu->arch.nmi_pending--;
1085+
vmcb01->control.int_ctl |= V_NMI_PENDING_MASK;
1086+
} else {
1087+
vmcb01->control.int_ctl &= ~V_NMI_PENDING_MASK;
1088+
}
1089+
}
1090+
10321091
/*
10331092
* On vmexit the GIF is set to false and
10341093
* no event can be injected in L1.

0 commit comments

Comments
 (0)