Skip to content

Commit d721f52

Browse files
dmatlackawilliam
authored andcommitted
vfio: selftests: Add vfio_pci_device_init_perf_test
Add a new VFIO selftest for measuring the time it takes to run vfio_pci_device_init() in parallel for one or more devices. This test serves as manual regression test for the performance improvement of commit e908f58 ("vfio/pci: Separate SR-IOV VF dev_set"). For example, when running this test with 64 VFs under the same PF: Before: $ ./vfio_pci_device_init_perf_test -r vfio_pci_device_init_perf_test.iommufd.init 0000:1a:00.0 0000:1a:00.1 ... ... Wall time: 6.653234463s Min init time (per device): 0.101215344s Max init time (per device): 6.652755941s Avg init time (per device): 3.377609608s After: $ ./vfio_pci_device_init_perf_test -r vfio_pci_device_init_perf_test.iommufd.init 0000:1a:00.0 0000:1a:00.1 ... ... Wall time: 0.122978332s Min init time (per device): 0.108121915s Max init time (per device): 0.122762761s Avg init time (per device): 0.113816748s This test does not make any assertions about performance, since any such assertion is likely to be flaky due to system differences and random noise. However this test can be fed into automation to detect regressions, and can be used by developers in the future to measure performance optimizations. Suggested-by: Aaron Lewis <aaronlewis@google.com> Reviewed-by: Alex Mastro <amastro@fb.com> Tested-by: Alex Mastro <amastro@fb.com> Reviewed-by: Raghavendra Rao Ananta <rananta@google.com> Signed-off-by: David Matlack <dmatlack@google.com> Link: https://lore.kernel.org/r/20251126231733.3302983-19-dmatlack@google.com Signed-off-by: Alex Williamson <alex@shazbot.org>
1 parent b8e96c8 commit d721f52

2 files changed

Lines changed: 171 additions & 0 deletions

File tree

tools/testing/selftests/vfio/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ CFLAGS = $(KHDR_INCLUDES)
22
TEST_GEN_PROGS += vfio_dma_mapping_test
33
TEST_GEN_PROGS += vfio_iommufd_setup_test
44
TEST_GEN_PROGS += vfio_pci_device_test
5+
TEST_GEN_PROGS += vfio_pci_device_init_perf_test
56
TEST_GEN_PROGS += vfio_pci_driver_test
67

78
TEST_FILES += scripts/cleanup.sh
@@ -16,6 +17,8 @@ CFLAGS += -I$(top_srcdir)/tools/include
1617
CFLAGS += -MD
1718
CFLAGS += $(EXTRA_CFLAGS)
1819

20+
LDFLAGS += -pthread
21+
1922
$(TEST_GEN_PROGS): %: %.o $(LIBVFIO_O)
2023
$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $< $(LIBVFIO_O) $(LDLIBS) -o $@
2124

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
#include <pthread.h>
3+
#include <sys/ioctl.h>
4+
#include <sys/mman.h>
5+
6+
#include <linux/sizes.h>
7+
#include <linux/time64.h>
8+
#include <linux/vfio.h>
9+
10+
#include <libvfio.h>
11+
12+
#include "../kselftest_harness.h"
13+
14+
static char **device_bdfs;
15+
static int nr_devices;
16+
17+
struct thread_args {
18+
struct iommu *iommu;
19+
int device_index;
20+
struct timespec start;
21+
struct timespec end;
22+
pthread_barrier_t *barrier;
23+
};
24+
25+
FIXTURE(vfio_pci_device_init_perf_test) {
26+
pthread_t *threads;
27+
pthread_barrier_t barrier;
28+
struct thread_args *thread_args;
29+
struct iommu *iommu;
30+
};
31+
32+
FIXTURE_VARIANT(vfio_pci_device_init_perf_test) {
33+
const char *iommu_mode;
34+
};
35+
36+
#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \
37+
FIXTURE_VARIANT_ADD(vfio_pci_device_init_perf_test, _iommu_mode) { \
38+
.iommu_mode = #_iommu_mode, \
39+
}
40+
41+
FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES();
42+
43+
FIXTURE_SETUP(vfio_pci_device_init_perf_test)
44+
{
45+
int i;
46+
47+
self->iommu = iommu_init(variant->iommu_mode);
48+
self->threads = calloc(nr_devices, sizeof(self->threads[0]));
49+
self->thread_args = calloc(nr_devices, sizeof(self->thread_args[0]));
50+
51+
pthread_barrier_init(&self->barrier, NULL, nr_devices);
52+
53+
for (i = 0; i < nr_devices; i++) {
54+
self->thread_args[i].iommu = self->iommu;
55+
self->thread_args[i].barrier = &self->barrier;
56+
self->thread_args[i].device_index = i;
57+
}
58+
}
59+
60+
FIXTURE_TEARDOWN(vfio_pci_device_init_perf_test)
61+
{
62+
iommu_cleanup(self->iommu);
63+
free(self->threads);
64+
free(self->thread_args);
65+
}
66+
67+
static s64 to_ns(struct timespec ts)
68+
{
69+
return (s64)ts.tv_nsec + NSEC_PER_SEC * (s64)ts.tv_sec;
70+
}
71+
72+
static struct timespec to_timespec(s64 ns)
73+
{
74+
struct timespec ts = {
75+
.tv_nsec = ns % NSEC_PER_SEC,
76+
.tv_sec = ns / NSEC_PER_SEC,
77+
};
78+
79+
return ts;
80+
}
81+
82+
static struct timespec timespec_sub(struct timespec a, struct timespec b)
83+
{
84+
return to_timespec(to_ns(a) - to_ns(b));
85+
}
86+
87+
static struct timespec timespec_min(struct timespec a, struct timespec b)
88+
{
89+
return to_ns(a) < to_ns(b) ? a : b;
90+
}
91+
92+
static struct timespec timespec_max(struct timespec a, struct timespec b)
93+
{
94+
return to_ns(a) > to_ns(b) ? a : b;
95+
}
96+
97+
static void *thread_main(void *__args)
98+
{
99+
struct thread_args *args = __args;
100+
struct vfio_pci_device *device;
101+
102+
pthread_barrier_wait(args->barrier);
103+
104+
clock_gettime(CLOCK_MONOTONIC, &args->start);
105+
device = vfio_pci_device_init(device_bdfs[args->device_index], args->iommu);
106+
clock_gettime(CLOCK_MONOTONIC, &args->end);
107+
108+
pthread_barrier_wait(args->barrier);
109+
110+
vfio_pci_device_cleanup(device);
111+
return NULL;
112+
}
113+
114+
TEST_F(vfio_pci_device_init_perf_test, init)
115+
{
116+
struct timespec start = to_timespec(INT64_MAX), end = {};
117+
struct timespec min = to_timespec(INT64_MAX);
118+
struct timespec max = {};
119+
struct timespec avg = {};
120+
struct timespec wall_time;
121+
s64 thread_ns = 0;
122+
int i;
123+
124+
for (i = 0; i < nr_devices; i++) {
125+
pthread_create(&self->threads[i], NULL, thread_main,
126+
&self->thread_args[i]);
127+
}
128+
129+
for (i = 0; i < nr_devices; i++) {
130+
struct thread_args *args = &self->thread_args[i];
131+
struct timespec init_time;
132+
133+
pthread_join(self->threads[i], NULL);
134+
135+
start = timespec_min(start, args->start);
136+
end = timespec_max(end, args->end);
137+
138+
init_time = timespec_sub(args->end, args->start);
139+
min = timespec_min(min, init_time);
140+
max = timespec_max(max, init_time);
141+
thread_ns += to_ns(init_time);
142+
}
143+
144+
avg = to_timespec(thread_ns / nr_devices);
145+
wall_time = timespec_sub(end, start);
146+
147+
printf("Wall time: %lu.%09lus\n",
148+
wall_time.tv_sec, wall_time.tv_nsec);
149+
printf("Min init time (per device): %lu.%09lus\n",
150+
min.tv_sec, min.tv_nsec);
151+
printf("Max init time (per device): %lu.%09lus\n",
152+
max.tv_sec, max.tv_nsec);
153+
printf("Avg init time (per device): %lu.%09lus\n",
154+
avg.tv_sec, avg.tv_nsec);
155+
}
156+
157+
int main(int argc, char *argv[])
158+
{
159+
int i;
160+
161+
device_bdfs = vfio_selftests_get_bdfs(&argc, argv, &nr_devices);
162+
163+
printf("Testing parallel initialization of %d devices:\n", nr_devices);
164+
for (i = 0; i < nr_devices; i++)
165+
printf(" %s\n", device_bdfs[i]);
166+
167+
return test_harness_run(argc, argv);
168+
}

0 commit comments

Comments
 (0)