Skip to content

Commit 76f598b

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "PPC: - Hide KVM_CAP_IRQFD_RESAMPLE if XIVE is enabled s390: - Fix handling of external interrupts in protected guests x86: - Resample the pending state of IOAPIC interrupts when unmasking them - Fix usage of Hyper-V "enlightened TLB" on AMD - Small fixes to real mode exceptions - Suppress pending MMIO write exits if emulator detects exception Documentation: - Fix rST syntax" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: docs: kvm: x86: Fix broken field list KVM: PPC: Make KVM_CAP_IRQFD_RESAMPLE platform dependent KVM: s390: pv: fix external interruption loop not always detected KVM: nVMX: Do not report error code when synthesizing VM-Exit from Real Mode KVM: x86: Clear "has_error_code", not "error_code", for RM exception injection KVM: x86: Suppress pending MMIO write exits if emulator detects exception KVM: x86/ioapic: Resample the pending state of an IRQ when unmasking KVM: irqfd: Make resampler_list an RCU list KVM: SVM: Flush Hyper-V TLB when required
2 parents ceeea1b + fb5015b commit 76f598b

15 files changed

Lines changed: 192 additions & 29 deletions

File tree

Documentation/virt/kvm/api.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8296,11 +8296,11 @@ ENOSYS for the others.
82968296
8.35 KVM_CAP_PMU_CAPABILITY
82978297
---------------------------
82988298

8299-
:Capability KVM_CAP_PMU_CAPABILITY
8299+
:Capability: KVM_CAP_PMU_CAPABILITY
83008300
:Architectures: x86
83018301
:Type: vm
83028302
:Parameters: arg[0] is bitmask of PMU virtualization capabilities.
8303-
:Returns 0 on success, -EINVAL when arg[0] contains invalid bits
8303+
:Returns: 0 on success, -EINVAL when arg[0] contains invalid bits
83048304

83058305
This capability alters PMU virtualization in KVM.
83068306

arch/arm64/kvm/arm.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
220220
case KVM_CAP_VCPU_ATTRIBUTES:
221221
case KVM_CAP_PTP_KVM:
222222
case KVM_CAP_ARM_SYSTEM_SUSPEND:
223+
case KVM_CAP_IRQFD_RESAMPLE:
223224
r = 1;
224225
break;
225226
case KVM_CAP_SET_GUEST_DEBUG2:

arch/powerpc/kvm/powerpc.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
576576
break;
577577
#endif
578578

579+
#ifdef CONFIG_HAVE_KVM_IRQFD
580+
case KVM_CAP_IRQFD_RESAMPLE:
581+
r = !xive_enabled();
582+
break;
583+
#endif
584+
579585
case KVM_CAP_PPC_ALLOC_HTAB:
580586
r = hv_enabled;
581587
break;

arch/s390/kvm/intercept.c

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,18 @@ static int handle_prog(struct kvm_vcpu *vcpu)
271271
* handle_external_interrupt - used for external interruption interceptions
272272
* @vcpu: virtual cpu
273273
*
274-
* This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
275-
* the new PSW does not have external interrupts disabled. In the first case,
276-
* we've got to deliver the interrupt manually, and in the second case, we
277-
* drop to userspace to handle the situation there.
274+
* This interception occurs if:
275+
* - the CPUSTAT_EXT_INT bit was already set when the external interrupt
276+
* occurred. In this case, the interrupt needs to be injected manually to
277+
* preserve interrupt priority.
278+
* - the external new PSW has external interrupts enabled, which will cause an
279+
* interruption loop. We drop to userspace in this case.
280+
*
281+
* The latter case can be detected by inspecting the external mask bit in the
282+
* external new psw.
283+
*
284+
* Under PV, only the latter case can occur, since interrupt priorities are
285+
* handled in the ultravisor.
278286
*/
279287
static int handle_external_interrupt(struct kvm_vcpu *vcpu)
280288
{
@@ -285,10 +293,18 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu)
285293

286294
vcpu->stat.exit_external_interrupt++;
287295

288-
rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
289-
if (rc)
290-
return rc;
291-
/* We can not handle clock comparator or timer interrupt with bad PSW */
296+
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
297+
newpsw = vcpu->arch.sie_block->gpsw;
298+
} else {
299+
rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
300+
if (rc)
301+
return rc;
302+
}
303+
304+
/*
305+
* Clock comparator or timer interrupt with external interrupt enabled
306+
* will cause interrupt loop. Drop to userspace.
307+
*/
292308
if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
293309
(newpsw.mask & PSW_MASK_EXT))
294310
return -EOPNOTSUPP;

arch/s390/kvm/kvm-s390.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
573573
case KVM_CAP_S390_VCPU_RESETS:
574574
case KVM_CAP_SET_GUEST_DEBUG:
575575
case KVM_CAP_S390_DIAG318:
576+
case KVM_CAP_IRQFD_RESAMPLE:
576577
r = 1;
577578
break;
578579
case KVM_CAP_SET_GUEST_DEBUG2:

arch/x86/kvm/ioapic.c

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -368,9 +368,39 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
368368
mask_after = e->fields.mask;
369369
if (mask_before != mask_after)
370370
kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
371-
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
372-
&& ioapic->irr & (1 << index))
373-
ioapic_service(ioapic, index, false);
371+
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG &&
372+
ioapic->irr & (1 << index) && !e->fields.mask && !e->fields.remote_irr) {
373+
/*
374+
* Pending status in irr may be outdated: the IRQ line may have
375+
* already been deasserted by a device while the IRQ was masked.
376+
* This occurs, for instance, if the interrupt is handled in a
377+
* Linux guest as a oneshot interrupt (IRQF_ONESHOT). In this
378+
* case the guest acknowledges the interrupt to the device in
379+
* its threaded irq handler, i.e. after the EOI but before
380+
* unmasking, so at the time of unmasking the IRQ line is
381+
* already down but our pending irr bit is still set. In such
382+
* cases, injecting this pending interrupt to the guest is
383+
* buggy: the guest will receive an extra unwanted interrupt.
384+
*
385+
* So we need to check here if the IRQ is actually still pending.
386+
* As we are generally not able to probe the IRQ line status
387+
* directly, we do it through irqfd resampler. Namely, we clear
388+
* the pending status and notify the resampler that this interrupt
389+
* is done, without actually injecting it into the guest. If the
390+
* IRQ line is actually already deasserted, we are done. If it is
391+
* still asserted, a new interrupt will be shortly triggered
392+
* through irqfd and injected into the guest.
393+
*
394+
* If, however, it's not possible to resample (no irqfd resampler
395+
* registered for this irq), then unconditionally inject this
396+
* pending interrupt into the guest, so the guest will not miss
397+
* an interrupt, although may get an extra unwanted interrupt.
398+
*/
399+
if (kvm_notify_irqfd_resampler(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index))
400+
ioapic->irr &= ~(1 << index);
401+
else
402+
ioapic_service(ioapic, index, false);
403+
}
374404
if (e->fields.delivery_mode == APIC_DM_FIXED) {
375405
struct kvm_lapic_irq irq;
376406

arch/x86/kvm/kvm_onhyperv.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ int hv_remote_flush_tlb_with_range(struct kvm *kvm,
1212
int hv_remote_flush_tlb(struct kvm *kvm);
1313
void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp);
1414
#else /* !CONFIG_HYPERV */
15+
static inline int hv_remote_flush_tlb(struct kvm *kvm)
16+
{
17+
return -EOPNOTSUPP;
18+
}
19+
1520
static inline void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp)
1621
{
1722
}

arch/x86/kvm/svm/svm.c

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3729,7 +3729,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
37293729
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
37303730
}
37313731

3732-
static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
3732+
static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
37333733
{
37343734
struct vcpu_svm *svm = to_svm(vcpu);
37353735

@@ -3753,6 +3753,37 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
37533753
svm->current_vmcb->asid_generation--;
37543754
}
37553755

3756+
static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
3757+
{
3758+
hpa_t root_tdp = vcpu->arch.mmu->root.hpa;
3759+
3760+
/*
3761+
* When running on Hyper-V with EnlightenedNptTlb enabled, explicitly
3762+
* flush the NPT mappings via hypercall as flushing the ASID only
3763+
* affects virtual to physical mappings, it does not invalidate guest
3764+
* physical to host physical mappings.
3765+
*/
3766+
if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp))
3767+
hyperv_flush_guest_mapping(root_tdp);
3768+
3769+
svm_flush_tlb_asid(vcpu);
3770+
}
3771+
3772+
static void svm_flush_tlb_all(struct kvm_vcpu *vcpu)
3773+
{
3774+
/*
3775+
* When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB
3776+
* flushes should be routed to hv_remote_flush_tlb() without requesting
3777+
* a "regular" remote flush. Reaching this point means either there's
3778+
* a KVM bug or a prior hv_remote_flush_tlb() call failed, both of
3779+
* which might be fatal to the guest. Yell, but try to recover.
3780+
*/
3781+
if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu)))
3782+
hv_remote_flush_tlb(vcpu->kvm);
3783+
3784+
svm_flush_tlb_asid(vcpu);
3785+
}
3786+
37563787
static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
37573788
{
37583789
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4745,10 +4776,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
47454776
.set_rflags = svm_set_rflags,
47464777
.get_if_flag = svm_get_if_flag,
47474778

4748-
.flush_tlb_all = svm_flush_tlb_current,
4779+
.flush_tlb_all = svm_flush_tlb_all,
47494780
.flush_tlb_current = svm_flush_tlb_current,
47504781
.flush_tlb_gva = svm_flush_tlb_gva,
4751-
.flush_tlb_guest = svm_flush_tlb_current,
4782+
.flush_tlb_guest = svm_flush_tlb_asid,
47524783

47534784
.vcpu_pre_run = svm_vcpu_pre_run,
47544785
.vcpu_run = svm_vcpu_run,

arch/x86/kvm/svm/svm_onhyperv.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#ifndef __ARCH_X86_KVM_SVM_ONHYPERV_H__
77
#define __ARCH_X86_KVM_SVM_ONHYPERV_H__
88

9+
#include <asm/mshyperv.h>
10+
911
#if IS_ENABLED(CONFIG_HYPERV)
1012

1113
#include "kvm_onhyperv.h"
@@ -15,6 +17,14 @@ static struct kvm_x86_ops svm_x86_ops;
1517

1618
int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu);
1719

20+
static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
21+
{
22+
struct hv_vmcb_enlightenments *hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments;
23+
24+
return ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB &&
25+
!!hve->hv_enlightenments_control.enlightened_npt_tlb;
26+
}
27+
1828
static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
1929
{
2030
struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
@@ -80,6 +90,11 @@ static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu)
8090
}
8191
#else
8292

93+
static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
94+
{
95+
return false;
96+
}
97+
8398
static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
8499
{
85100
}

arch/x86/kvm/vmx/nested.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3868,7 +3868,12 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu)
38683868
exit_qual = 0;
38693869
}
38703870

3871-
if (ex->has_error_code) {
3871+
/*
3872+
* Unlike AMD's Paged Real Mode, which reports an error code on #PF
3873+
* VM-Exits even if the CPU is in Real Mode, Intel VMX never sets the
3874+
* "has error code" flags on VM-Exit if the CPU is in Real Mode.
3875+
*/
3876+
if (ex->has_error_code && is_protmode(vcpu)) {
38723877
/*
38733878
* Intel CPUs do not generate error codes with bits 31:16 set,
38743879
* and more importantly VMX disallows setting bits 31:16 in the

0 commit comments

Comments
 (0)