Skip to content

Commit 557dbdf

Browse files
Alex Mastroawilliam
authored andcommitted
vfio: selftests: Align BAR mmaps for efficient IOMMU mapping
Update vfio_pci_bar_map() to align BAR mmaps for efficient huge page mappings. The manual mmap alignment can be removed once mmap(!MAP_FIXED) on vfio device fds improves to automatically return well-aligned addresses. Also add MADV_HUGEPAGE, which encourages the kernel to use huge pages (e.g. when /sys/kernel/mm/transparent_hugepage/enabled is set to "madvise"). Drop MAP_FILE from mmap(). It is an ignored compatibility flag. Signed-off-by: Alex Mastro <amastro@fb.com> Reviewed-by: David Matlack <dmatlack@google.com> Tested-by: David Matlack <dmatlack@google.com> Link: https://lore.kernel.org/r/20260114-map-mmio-test-v3-2-44e036d95e64@fb.com Signed-off-by: Alex Williamson <alex@shazbot.org>
1 parent 03b7c2d commit 557dbdf

3 files changed

Lines changed: 57 additions & 1 deletion

File tree

tools/testing/selftests/vfio/lib/include/libvfio.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,13 @@
2323
const char *vfio_selftests_get_bdf(int *argc, char *argv[]);
2424
char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs);
2525

26+
/*
27+
* Reserve virtual address space of size at an address satisfying
28+
* (vaddr % align) == offset.
29+
*
30+
* Returns the reserved vaddr. The caller is responsible for unmapping
31+
* the returned region.
32+
*/
33+
void *mmap_reserve(size_t size, size_t align, size_t offset);
34+
2635
#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H */

tools/testing/selftests/vfio/lib/libvfio.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
#include <stdio.h>
44
#include <stdlib.h>
5+
#include <sys/mman.h>
6+
7+
#include <linux/align.h>
58

69
#include "../../../kselftest.h"
710
#include <libvfio.h>
@@ -76,3 +79,25 @@ const char *vfio_selftests_get_bdf(int *argc, char *argv[])
7679

7780
return vfio_selftests_get_bdfs(argc, argv, &nr_bdfs)[0];
7881
}
82+
83+
void *mmap_reserve(size_t size, size_t align, size_t offset)
84+
{
85+
void *map_base, *map_align;
86+
size_t delta;
87+
88+
VFIO_ASSERT_GT(align, offset);
89+
delta = align - offset;
90+
91+
map_base = mmap(NULL, size + align, PROT_NONE,
92+
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
93+
VFIO_ASSERT_NE(map_base, MAP_FAILED);
94+
95+
map_align = (void *)(ALIGN((uintptr_t)map_base + delta, align) - delta);
96+
97+
if (map_align > map_base)
98+
VFIO_ASSERT_EQ(munmap(map_base, map_align - map_base), 0);
99+
100+
VFIO_ASSERT_EQ(munmap(map_align + size, map_base + align - map_align), 0);
101+
102+
return map_align;
103+
}

tools/testing/selftests/vfio/lib/vfio_pci_device.c

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,14 @@
1111
#include <sys/ioctl.h>
1212
#include <sys/mman.h>
1313

14+
#include <linux/align.h>
1415
#include <linux/iommufd.h>
16+
#include <linux/kernel.h>
1517
#include <linux/limits.h>
18+
#include <linux/log2.h>
1619
#include <linux/mman.h>
1720
#include <linux/overflow.h>
21+
#include <linux/sizes.h>
1822
#include <linux/types.h>
1923
#include <linux/vfio.h>
2024

@@ -123,20 +127,38 @@ static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
123127
static void vfio_pci_bar_map(struct vfio_pci_device *device, int index)
124128
{
125129
struct vfio_pci_bar *bar = &device->bars[index];
130+
size_t align, size;
126131
int prot = 0;
132+
void *vaddr;
127133

128134
VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
129135
VFIO_ASSERT_NULL(bar->vaddr);
130136
VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP);
137+
VFIO_ASSERT_TRUE(is_power_of_2(bar->info.size));
131138

132139
if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ)
133140
prot |= PROT_READ;
134141
if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
135142
prot |= PROT_WRITE;
136143

137-
bar->vaddr = mmap(NULL, bar->info.size, prot, MAP_FILE | MAP_SHARED,
144+
size = bar->info.size;
145+
146+
/*
147+
* Align BAR mmaps to improve page fault granularity during potential
148+
* subsequent IOMMU mapping of these BAR vaddr. 1G for x86 is the
149+
* largest hugepage size across any architecture, so no benefit from
150+
* larger alignment. BARs smaller than 1G will be aligned by their
151+
* power-of-two size, guaranteeing sufficient alignment for smaller
152+
* hugepages, if present.
153+
*/
154+
align = min_t(size_t, size, SZ_1G);
155+
156+
vaddr = mmap_reserve(size, align, 0);
157+
bar->vaddr = mmap(vaddr, size, prot, MAP_SHARED | MAP_FIXED,
138158
device->fd, bar->info.offset);
139159
VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED);
160+
161+
madvise(bar->vaddr, size, MADV_HUGEPAGE);
140162
}
141163

142164
static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)

0 commit comments

Comments
 (0)