Skip to content

Commit 5f8ca05

Browse files
committed
KVM: Add irqfd to KVM's list via the vfs_poll() callback
Add the irqfd structure to KVM's list of irqfds in kvm_irqfd_register(), i.e. via the vfs_poll() callback. This will allow taking irqfds.lock across the entire registration sequence (add to waitqueue, add to list), and more importantly will allow inserting into KVM's list if and only if adding to the waitqueue succeeds (spoiler alert), without needing to juggle return codes in weird ways. Tested-by: K Prateek Nayak <kprateek.nayak@amd.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20250522235223.3178519-5-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
1 parent b5c5435 commit 5f8ca05

1 file changed

Lines changed: 55 additions & 43 deletions

File tree

virt/kvm/eventfd.c

Lines changed: 55 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -245,16 +245,58 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
245245
return ret;
246246
}
247247

248+
static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
249+
{
250+
struct kvm_kernel_irq_routing_entry *e;
251+
struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
252+
int n_entries;
253+
254+
lockdep_assert_held(&kvm->irqfds.lock);
255+
256+
n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
257+
258+
write_seqcount_begin(&irqfd->irq_entry_sc);
259+
260+
e = entries;
261+
if (n_entries == 1)
262+
irqfd->irq_entry = *e;
263+
else
264+
irqfd->irq_entry.type = 0;
265+
266+
write_seqcount_end(&irqfd->irq_entry_sc);
267+
}
268+
248269
struct kvm_irqfd_pt {
249270
struct kvm_kernel_irqfd *irqfd;
271+
struct kvm *kvm;
250272
poll_table pt;
273+
int ret;
251274
};
252275

253276
static void kvm_irqfd_register(struct file *file, wait_queue_head_t *wqh,
254277
poll_table *pt)
255278
{
256279
struct kvm_irqfd_pt *p = container_of(pt, struct kvm_irqfd_pt, pt);
257280
struct kvm_kernel_irqfd *irqfd = p->irqfd;
281+
struct kvm_kernel_irqfd *tmp;
282+
struct kvm *kvm = p->kvm;
283+
284+
spin_lock_irq(&kvm->irqfds.lock);
285+
286+
list_for_each_entry(tmp, &kvm->irqfds.items, list) {
287+
if (irqfd->eventfd != tmp->eventfd)
288+
continue;
289+
/* This fd is used for another irq already. */
290+
p->ret = -EBUSY;
291+
spin_unlock_irq(&kvm->irqfds.lock);
292+
return;
293+
}
294+
295+
irqfd_update(kvm, irqfd);
296+
297+
list_add_tail(&irqfd->list, &kvm->irqfds.items);
298+
299+
spin_unlock_irq(&kvm->irqfds.lock);
258300

259301
/*
260302
* Add the irqfd as a priority waiter on the eventfd, with a custom
@@ -264,26 +306,7 @@ static void kvm_irqfd_register(struct file *file, wait_queue_head_t *wqh,
264306
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
265307

266308
add_wait_queue_priority(wqh, &irqfd->wait);
267-
}
268-
269-
/* Must be called under irqfds.lock */
270-
static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
271-
{
272-
struct kvm_kernel_irq_routing_entry *e;
273-
struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
274-
int n_entries;
275-
276-
n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
277-
278-
write_seqcount_begin(&irqfd->irq_entry_sc);
279-
280-
e = entries;
281-
if (n_entries == 1)
282-
irqfd->irq_entry = *e;
283-
else
284-
irqfd->irq_entry.type = 0;
285-
286-
write_seqcount_end(&irqfd->irq_entry_sc);
309+
p->ret = 0;
287310
}
288311

289312
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
@@ -308,7 +331,7 @@ void __weak kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
308331
static int
309332
kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
310333
{
311-
struct kvm_kernel_irqfd *irqfd, *tmp;
334+
struct kvm_kernel_irqfd *irqfd;
312335
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
313336
struct kvm_irqfd_pt irqfd_pt;
314337
int ret;
@@ -407,32 +430,22 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
407430
*/
408431
idx = srcu_read_lock(&kvm->irq_srcu);
409432

410-
spin_lock_irq(&kvm->irqfds.lock);
411-
412-
ret = 0;
413-
list_for_each_entry(tmp, &kvm->irqfds.items, list) {
414-
if (irqfd->eventfd != tmp->eventfd)
415-
continue;
416-
/* This fd is used for another irq already. */
417-
ret = -EBUSY;
418-
goto fail_duplicate;
419-
}
420-
421-
irqfd_update(kvm, irqfd);
422-
423-
list_add_tail(&irqfd->list, &kvm->irqfds.items);
424-
425-
spin_unlock_irq(&kvm->irqfds.lock);
426-
427433
/*
428-
* Register the irqfd with the eventfd by polling on the eventfd. If
429-
* there was en event pending on the eventfd prior to registering,
430-
* manually trigger IRQ injection.
434+
* Register the irqfd with the eventfd by polling on the eventfd, and
435+
* simultaneously and the irqfd to KVM's list. If there was en event
436+
* pending on the eventfd prior to registering, manually trigger IRQ
437+
* injection.
431438
*/
432439
irqfd_pt.irqfd = irqfd;
440+
irqfd_pt.kvm = kvm;
433441
init_poll_funcptr(&irqfd_pt.pt, kvm_irqfd_register);
434442

435443
events = vfs_poll(fd_file(f), &irqfd_pt.pt);
444+
445+
ret = irqfd_pt.ret;
446+
if (ret)
447+
goto fail_poll;
448+
436449
if (events & EPOLLIN)
437450
schedule_work(&irqfd->inject);
438451

@@ -452,8 +465,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
452465
srcu_read_unlock(&kvm->irq_srcu, idx);
453466
return 0;
454467

455-
fail_duplicate:
456-
spin_unlock_irq(&kvm->irqfds.lock);
468+
fail_poll:
457469
srcu_read_unlock(&kvm->irq_srcu, idx);
458470
fail:
459471
if (irqfd->resampler)

0 commit comments

Comments
 (0)