Skip to content

Commit f02b1bc

Browse files
committed
Merge tag 'kvm-x86-irqs-6.17' of https://github.com/kvm-x86/linux into HEAD
KVM IRQ changes for 6.17 - Rework irqbypass to track/match producers and consumers via an xarray instead of a linked list. Using a linked list leads to O(n^2) insertion times, which is hugely problematic for use cases that create large numbers of VMs. Such use cases typically don't actually use irqbypass, but eliminating the pointless registration is a future problem to solve as it likely requires new uAPI. - Track irqbypass's "token" as "struct eventfd_ctx *" instead of a "void *", to avoid making a simple concept unnecessarily difficult to understand. - Add CONFIG_KVM_IOAPIC for x86 to allow disabling support for I/O APIC, PIC, and PIT emulation at compile time. - Drop x86's irq_comm.c, and move a pile of IRQ related code into irq.c. - Fix a variety of flaws and bugs in the AVIC device posted IRQ code. - Inhibited AVIC if a vCPU's ID is too big (relative to what hardware supports) instead of rejecting vCPU creation. - Extend enable_ipiv module param support to SVM, by simply leaving IsRunning clear in the vCPU's physical ID table entry. - Disable IPI virtualization, via enable_ipiv, if the CPU is affected by erratum #1235, to allow (safely) enabling AVIC on such CPUs. - Dedup x86's device posted IRQ code, as the vast majority of functionality can be shared verbatime between SVM and VMX. - Harden the device posted IRQ code against bugs and runtime errors. - Use vcpu_idx, not vcpu_id, for GA log tag/metadata, to make lookups O(1) instead of O(n). - Generate GA Log interrupts if and only if the target vCPU is blocking, i.e. only if KVM needs a notification in order to wake the vCPU. - Decouple device posted IRQs from VFIO device assignment, as binding a VM to a VFIO group is not a requirement for enabling device posted IRQs. - Clean up and document/comment the irqfd assignment code. - Disallow binding multiple irqfds to an eventfd with a priority waiter, i.e. ensure an eventfd is bound to at most one irqfd through the entire host, and add a selftest to verify eventfd:irqfd bindings are globally unique.
2 parents 65164fd + 81bf24f commit f02b1bc

56 files changed

Lines changed: 1841 additions & 1759 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

arch/arm64/kvm/arm.c

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2765,27 +2765,23 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
27652765
kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq);
27662766
}
27672767

2768-
bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
2769-
struct kvm_kernel_irq_routing_entry *new)
2768+
void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
2769+
struct kvm_kernel_irq_routing_entry *old,
2770+
struct kvm_kernel_irq_routing_entry *new)
27702771
{
2771-
if (old->type != KVM_IRQ_ROUTING_MSI ||
2772-
new->type != KVM_IRQ_ROUTING_MSI)
2773-
return true;
2774-
2775-
return memcmp(&old->msi, &new->msi, sizeof(new->msi));
2776-
}
2772+
if (old->type == KVM_IRQ_ROUTING_MSI &&
2773+
new->type == KVM_IRQ_ROUTING_MSI &&
2774+
!memcmp(&old->msi, &new->msi, sizeof(new->msi)))
2775+
return;
27772776

2778-
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
2779-
uint32_t guest_irq, bool set)
2780-
{
27812777
/*
27822778
* Remapping the vLPI requires taking the its_lock mutex to resolve
27832779
* the new translation. We're in spinlock land at this point, so no
27842780
* chance of resolving the translation.
27852781
*
27862782
* Unmap the vLPI and fall back to software LPI injection.
27872783
*/
2788-
return kvm_vgic_v4_unset_forwarding(kvm, host_irq);
2784+
return kvm_vgic_v4_unset_forwarding(irqfd->kvm, irqfd->producer->irq);
27892785
}
27902786

27912787
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)

arch/arm64/kvm/vgic/vgic-its.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,7 @@ static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
758758
if (irq) {
759759
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
760760
if (irq->hw)
761-
WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
761+
its_unmap_vlpi(ite->irq->host_irq);
762762

763763
irq->hw = false;
764764
}

arch/arm64/kvm/vgic/vgic-v4.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -527,28 +527,26 @@ static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq)
527527
return NULL;
528528
}
529529

530-
int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)
530+
void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)
531531
{
532532
struct vgic_irq *irq;
533533
unsigned long flags;
534-
int ret = 0;
535534

536535
if (!vgic_supports_direct_msis(kvm))
537-
return 0;
536+
return;
538537

539538
irq = __vgic_host_irq_get_vlpi(kvm, host_irq);
540539
if (!irq)
541-
return 0;
540+
return;
542541

543542
raw_spin_lock_irqsave(&irq->irq_lock, flags);
544543
WARN_ON(irq->hw && irq->host_irq != host_irq);
545544
if (irq->hw) {
546545
atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
547546
irq->hw = false;
548-
ret = its_unmap_vlpi(host_irq);
547+
its_unmap_vlpi(host_irq);
549548
}
550549

551550
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
552551
vgic_put_irq(kvm, irq);
553-
return ret;
554552
}

arch/x86/include/asm/irq_remapping.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,22 @@ enum {
2626
IRQ_REMAP_X2APIC_MODE,
2727
};
2828

29-
struct vcpu_data {
29+
/*
30+
* This is mainly used to communicate information back-and-forth
31+
* between SVM and IOMMU for setting up and tearing down posted
32+
* interrupt
33+
*/
34+
struct amd_iommu_pi_data {
35+
u64 vapic_addr; /* Physical address of the vCPU's vAPIC. */
36+
u32 ga_tag;
37+
u32 vector; /* Guest vector of the interrupt */
38+
int cpu;
39+
bool ga_log_intr;
40+
bool is_guest_mode;
41+
void *ir_data;
42+
};
43+
44+
struct intel_iommu_pi_data {
3045
u64 pi_desc_addr; /* Physical address of PI Descriptor */
3146
u32 vector; /* Guest vector of the interrupt */
3247
};

arch/x86/include/asm/kvm-x86-ops.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
112112
KVM_X86_OP_OPTIONAL(vcpu_blocking)
113113
KVM_X86_OP_OPTIONAL(vcpu_unblocking)
114114
KVM_X86_OP_OPTIONAL(pi_update_irte)
115-
KVM_X86_OP_OPTIONAL(pi_start_assignment)
115+
KVM_X86_OP_OPTIONAL(pi_start_bypass)
116116
KVM_X86_OP_OPTIONAL(apicv_pre_state_restore)
117117
KVM_X86_OP_OPTIONAL(apicv_post_state_restore)
118118
KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)

arch/x86/include/asm/kvm_host.h

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ enum x86_intercept_stage;
297297
*/
298298
#define KVM_APIC_PV_EOI_PENDING 1
299299

300+
struct kvm_kernel_irqfd;
300301
struct kvm_kernel_irq_routing_entry;
301302

302303
/*
@@ -1320,6 +1321,12 @@ enum kvm_apicv_inhibit {
13201321
*/
13211322
APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
13221323

1324+
/*
1325+
* AVIC is disabled because the vCPU's APIC ID is beyond the max
1326+
* supported by AVIC/x2AVIC, i.e. the vCPU is unaddressable.
1327+
*/
1328+
APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG,
1329+
13231330
NR_APICV_INHIBIT_REASONS,
13241331
};
13251332

@@ -1338,7 +1345,8 @@ enum kvm_apicv_inhibit {
13381345
__APICV_INHIBIT_REASON(IRQWIN), \
13391346
__APICV_INHIBIT_REASON(PIT_REINJ), \
13401347
__APICV_INHIBIT_REASON(SEV), \
1341-
__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED)
1348+
__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \
1349+
__APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG)
13421350

13431351
struct kvm_arch {
13441352
unsigned long n_used_mmu_pages;
@@ -1381,9 +1389,13 @@ struct kvm_arch {
13811389
atomic_t noncoherent_dma_count;
13821390
#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
13831391
atomic_t assigned_device_count;
1392+
unsigned long nr_possible_bypass_irqs;
1393+
1394+
#ifdef CONFIG_KVM_IOAPIC
13841395
struct kvm_pic *vpic;
13851396
struct kvm_ioapic *vioapic;
13861397
struct kvm_pit *vpit;
1398+
#endif
13871399
atomic_t vapics_in_nmi_mode;
13881400
struct mutex apic_map_lock;
13891401
struct kvm_apic_map __rcu *apic_map;
@@ -1403,7 +1415,6 @@ struct kvm_arch {
14031415
bool pause_in_guest;
14041416
bool cstate_in_guest;
14051417

1406-
unsigned long irq_sources_bitmap;
14071418
s64 kvmclock_offset;
14081419

14091420
/*
@@ -1432,9 +1443,6 @@ struct kvm_arch {
14321443
struct delayed_work kvmclock_update_work;
14331444
struct delayed_work kvmclock_sync_work;
14341445

1435-
/* reads protected by irq_srcu, writes by irq_lock */
1436-
struct hlist_head mask_notifier_list;
1437-
14381446
#ifdef CONFIG_KVM_HYPERV
14391447
struct kvm_hv hyperv;
14401448
#endif
@@ -1853,9 +1861,10 @@ struct kvm_x86_ops {
18531861
void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
18541862
void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
18551863

1856-
int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq,
1857-
uint32_t guest_irq, bool set);
1858-
void (*pi_start_assignment)(struct kvm *kvm);
1864+
int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
1865+
unsigned int host_irq, uint32_t guest_irq,
1866+
struct kvm_vcpu *vcpu, u32 vector);
1867+
void (*pi_start_bypass)(struct kvm *kvm);
18591868
void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
18601869
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
18611870
bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
@@ -1950,6 +1959,7 @@ struct kvm_arch_async_pf {
19501959
extern u32 __read_mostly kvm_nr_uret_msrs;
19511960
extern bool __read_mostly allow_smaller_maxphyaddr;
19521961
extern bool __read_mostly enable_apicv;
1962+
extern bool __read_mostly enable_ipiv;
19531963
extern bool __read_mostly enable_device_posted_irqs;
19541964
extern struct kvm_x86_ops kvm_x86_ops;
19551965

@@ -2044,19 +2054,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
20442054
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
20452055
const void *val, int bytes);
20462056

2047-
struct kvm_irq_mask_notifier {
2048-
void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
2049-
int irq;
2050-
struct hlist_node link;
2051-
};
2052-
2053-
void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
2054-
struct kvm_irq_mask_notifier *kimn);
2055-
void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
2056-
struct kvm_irq_mask_notifier *kimn);
2057-
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
2058-
bool mask);
2059-
20602057
extern bool tdp_enabled;
20612058

20622059
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
@@ -2215,9 +2212,6 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
22152212
return !!(*irq_state);
22162213
}
22172214

2218-
int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
2219-
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
2220-
22212215
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
22222216
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
22232217

@@ -2394,9 +2388,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
23942388
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
23952389
struct kvm_vcpu **dest_vcpu);
23962390

2397-
void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
2398-
struct kvm_lapic_irq *irq);
2399-
24002391
static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
24012392
{
24022393
/* We can only post Fixed and LowPrio IRQs */

arch/x86/include/asm/svm.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -252,16 +252,21 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
252252
#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
253253
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
254254

255+
/*
256+
* GA_LOG_INTR is a synthetic flag that's never propagated to hardware-visible
257+
* tables. GA_LOG_INTR is set if the vCPU needs device posted IRQs to generate
258+
* GA log interrupts to wake the vCPU (because it's blocking or about to block).
259+
*/
260+
#define AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR BIT_ULL(61)
261+
255262
#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
256-
#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
263+
#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK GENMASK_ULL(51, 12)
257264
#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
258265
#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
259266
#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL)
260267

261268
#define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
262269

263-
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
264-
265270
#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
266271
#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
267272
#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
@@ -290,8 +295,6 @@ enum avic_ipi_failure_cause {
290295
static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
291296
static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
292297

293-
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
294-
295298
#define SVM_SEV_FEAT_SNP_ACTIVE BIT(0)
296299
#define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3)
297300
#define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4)

arch/x86/kvm/Kconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,16 @@ config KVM_AMD_SEV
166166
Encrypted State (SEV-ES), and Secure Encrypted Virtualization with
167167
Secure Nested Paging (SEV-SNP) technologies on AMD processors.
168168

169+
config KVM_IOAPIC
170+
bool "I/O APIC, PIC, and PIT emulation"
171+
default y
172+
depends on KVM
173+
help
174+
Provides support for KVM to emulate an I/O APIC, PIC, and PIT, i.e.
175+
for full in-kernel APIC emulation.
176+
177+
If unsure, say Y.
178+
169179
config KVM_SMM
170180
bool "System Management Mode emulation"
171181
default y

arch/x86/kvm/Makefile

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@ ccflags-$(CONFIG_KVM_WERROR) += -Werror
55

66
include $(srctree)/virt/kvm/Makefile.kvm
77

8-
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
9-
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
10-
debugfs.o mmu/mmu.o mmu/page_track.o \
11-
mmu/spte.o
8+
kvm-y += x86.o emulate.o irq.o lapic.o cpuid.o pmu.o mtrr.o \
9+
debugfs.o mmu/mmu.o mmu/page_track.o mmu/spte.o
1210

1311
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
12+
kvm-$(CONFIG_KVM_IOAPIC) += i8259.o i8254.o ioapic.o
1413
kvm-$(CONFIG_KVM_HYPERV) += hyperv.o
1514
kvm-$(CONFIG_KVM_XEN) += xen.o
1615
kvm-$(CONFIG_KVM_SMM) += smm.o

arch/x86/kvm/hyperv.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -497,15 +497,19 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
497497
return ret;
498498
}
499499

500-
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
500+
int kvm_hv_synic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
501+
int irq_source_id, int level, bool line_status)
501502
{
502503
struct kvm_vcpu_hv_synic *synic;
503504

504-
synic = synic_get(kvm, vpidx);
505+
if (!level)
506+
return -1;
507+
508+
synic = synic_get(kvm, e->hv_sint.vcpu);
505509
if (!synic)
506510
return -EINVAL;
507511

508-
return synic_set_irq(synic, sint);
512+
return synic_set_irq(synic, e->hv_sint.sint);
509513
}
510514

511515
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)

0 commit comments

Comments
 (0)