Skip to content

Commit a12578e

Browse files
Fuad Tabbabonzini
authored andcommitted
KVM: guest_memfd: Add plumbing to host to map guest_memfd pages
Introduce the core infrastructure to enable host userspace to mmap() guest_memfd-backed memory. This is needed for several evolving KVM use cases: * Non-CoCo VM backing: Allows VMMs like Firecracker to run guests entirely backed by guest_memfd, even for non-CoCo VMs [1]. This provides a unified memory management model and simplifies guest memory handling. * Direct map removal for enhanced security: This is an important step for direct map removal of guest memory [2]. By allowing host userspace to fault in guest_memfd pages directly, we can avoid maintaining host kernel direct maps of guest memory. This provides additional hardening against Spectre-like transient execution attacks by removing a potential attack surface within the kernel. * Future guest_memfd features: This also lays the groundwork for future enhancements to guest_memfd, such as supporting huge pages and enabling in-place sharing of guest memory with the host for CoCo platforms that permit it [3]. Enable the basic mmap and fault handling logic within guest_memfd, but hold off on allow userspace to actually do mmap() until the architecture support is also in place. [1] https://github.com/firecracker-microvm/firecracker/tree/feature/secret-hiding [2] https://lore.kernel.org/linux-mm/cc1bb8e9bc3e1ab637700a4d3defeec95b55060a.camel@amazon.com [3] https://lore.kernel.org/all/c1c9591d-218a-495c-957b-ba356c8f8e09@redhat.com/T/#u Reviewed-by: Gavin Shan <gshan@redhat.com> Reviewed-by: Shivank Garg <shivankg@amd.com> Acked-by: David Hildenbrand <david@redhat.com> Co-developed-by: Ackerley Tng <ackerleytng@google.com> Signed-off-by: Ackerley Tng <ackerleytng@google.com> Signed-off-by: Fuad Tabba <tabba@google.com> Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com> Signed-off-by: Sean Christopherson <seanjc@google.com> Message-ID: <20250729225455.670324-11-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent d1e54dd commit a12578e

3 files changed

Lines changed: 85 additions & 0 deletions

File tree

arch/x86/kvm/x86.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13521,6 +13521,16 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
1352113521
}
1352213522
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
1352313523

13524+
#ifdef CONFIG_KVM_GUEST_MEMFD
13525+
/*
13526+
* KVM doesn't yet support mmap() on guest_memfd for VMs with private memory
13527+
* (the private vs. shared tracking needs to be moved into guest_memfd).
13528+
*/
13529+
bool kvm_arch_supports_gmem_mmap(struct kvm *kvm)
13530+
{
13531+
return !kvm_arch_has_private_mem(kvm);
13532+
}
13533+
1352413534
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
1352513535
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
1352613536
{
@@ -13534,6 +13544,7 @@ void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
1353413544
kvm_x86_call(gmem_invalidate)(start, end);
1353513545
}
1353613546
#endif
13547+
#endif
1353713548

1353813549
int kvm_spec_ctrl_test_value(u64 value)
1353913550
{

include/linux/kvm_host.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,10 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
726726
}
727727
#endif
728728

729+
#ifdef CONFIG_KVM_GUEST_MEMFD
730+
bool kvm_arch_supports_gmem_mmap(struct kvm *kvm);
731+
#endif
732+
729733
#ifndef kvm_arch_has_readonly_mem
730734
static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
731735
{

virt/kvm/guest_memfd.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,72 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
312312
return gfn - slot->base_gfn + slot->gmem.pgoff;
313313
}
314314

315+
static bool kvm_gmem_supports_mmap(struct inode *inode)
316+
{
317+
return false;
318+
}
319+
320+
static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
321+
{
322+
struct inode *inode = file_inode(vmf->vma->vm_file);
323+
struct folio *folio;
324+
vm_fault_t ret = VM_FAULT_LOCKED;
325+
326+
if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
327+
return VM_FAULT_SIGBUS;
328+
329+
folio = kvm_gmem_get_folio(inode, vmf->pgoff);
330+
if (IS_ERR(folio)) {
331+
int err = PTR_ERR(folio);
332+
333+
if (err == -EAGAIN)
334+
return VM_FAULT_RETRY;
335+
336+
return vmf_error(err);
337+
}
338+
339+
if (WARN_ON_ONCE(folio_test_large(folio))) {
340+
ret = VM_FAULT_SIGBUS;
341+
goto out_folio;
342+
}
343+
344+
if (!folio_test_uptodate(folio)) {
345+
clear_highpage(folio_page(folio, 0));
346+
kvm_gmem_mark_prepared(folio);
347+
}
348+
349+
vmf->page = folio_file_page(folio, vmf->pgoff);
350+
351+
out_folio:
352+
if (ret != VM_FAULT_LOCKED) {
353+
folio_unlock(folio);
354+
folio_put(folio);
355+
}
356+
357+
return ret;
358+
}
359+
360+
static const struct vm_operations_struct kvm_gmem_vm_ops = {
361+
.fault = kvm_gmem_fault_user_mapping,
362+
};
363+
364+
static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
365+
{
366+
if (!kvm_gmem_supports_mmap(file_inode(file)))
367+
return -ENODEV;
368+
369+
if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
370+
(VM_SHARED | VM_MAYSHARE)) {
371+
return -EINVAL;
372+
}
373+
374+
vma->vm_ops = &kvm_gmem_vm_ops;
375+
376+
return 0;
377+
}
378+
315379
static struct file_operations kvm_gmem_fops = {
380+
.mmap = kvm_gmem_mmap,
316381
.open = generic_file_open,
317382
.release = kvm_gmem_release,
318383
.fallocate = kvm_gmem_fallocate,
@@ -391,6 +456,11 @@ static const struct inode_operations kvm_gmem_iops = {
391456
.setattr = kvm_gmem_setattr,
392457
};
393458

459+
bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm)
460+
{
461+
return true;
462+
}
463+
394464
static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
395465
{
396466
const char *anon_name = "[kvm-gmem]";

0 commit comments

Comments
 (0)