1313#include <stdio.h>
1414#include <stdlib.h>
1515#include <time.h>
16- #include <poll.h>
1716#include <pthread.h>
1817#include <linux/userfaultfd.h>
1918#include <sys/syscall.h>
@@ -77,8 +76,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
7776 copy .mode = 0 ;
7877
7978 r = ioctl (uffd , UFFDIO_COPY , & copy );
80- if (r == -1 ) {
81- pr_info ("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n" ,
79+ /*
80+ * With multiple vCPU threads fault on a single page and there are
81+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
82+ * will fail with EEXIST: handle that case without signaling an
83+ * error.
84+ *
85+ * Note that this also suppress any EEXISTs occurring from,
86+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
87+ * happens here, but a realistic VMM might potentially maintain
88+ * some external state to correctly surface EEXISTs to userspace
89+ * (or prevent duplicate COPY/CONTINUEs in the first place).
90+ */
91+ if (r == -1 && errno != EEXIST ) {
92+ pr_info ("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n" ,
8293 addr , tid , errno );
8394 return r ;
8495 }
@@ -89,8 +100,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
89100 cont .range .len = demand_paging_size ;
90101
91102 r = ioctl (uffd , UFFDIO_CONTINUE , & cont );
92- if (r == -1 ) {
93- pr_info ("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n" ,
103+ /*
104+ * With multiple vCPU threads fault on a single page and there are
105+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
106+ * will fail with EEXIST: handle that case without signaling an
107+ * error.
108+ *
109+ * Note that this also suppress any EEXISTs occurring from,
110+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
111+ * happens here, but a realistic VMM might potentially maintain
112+ * some external state to correctly surface EEXISTs to userspace
113+ * (or prevent duplicate COPY/CONTINUEs in the first place).
114+ */
115+ if (r == -1 && errno != EEXIST ) {
116+ pr_info ("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n" ,
94117 addr , tid , errno );
95118 return r ;
96119 }
@@ -110,7 +133,9 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
110133
111134struct test_params {
112135 int uffd_mode ;
136+ bool single_uffd ;
113137 useconds_t uffd_delay ;
138+ int readers_per_uffd ;
114139 enum vm_mem_backing_src_type src_type ;
115140 bool partition_vcpu_memory_access ;
116141};
@@ -131,10 +156,12 @@ static void run_test(enum vm_guest_mode mode, void *arg)
131156 struct memstress_vcpu_args * vcpu_args ;
132157 struct test_params * p = arg ;
133158 struct uffd_desc * * uffd_descs = NULL ;
159+ uint64_t uffd_region_size ;
134160 struct timespec start ;
135161 struct timespec ts_diff ;
162+ double vcpu_paging_rate ;
136163 struct kvm_vm * vm ;
137- int i ;
164+ int i , num_uffds = 0 ;
138165
139166 vm = memstress_create_vm (mode , nr_vcpus , guest_percpu_mem_size , 1 ,
140167 p -> src_type , p -> partition_vcpu_memory_access );
@@ -147,17 +174,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
147174 memset (guest_data_prototype , 0xAB , demand_paging_size );
148175
149176 if (p -> uffd_mode == UFFDIO_REGISTER_MODE_MINOR ) {
150- for (i = 0 ; i < nr_vcpus ; i ++ ) {
177+ num_uffds = p -> single_uffd ? 1 : nr_vcpus ;
178+ for (i = 0 ; i < num_uffds ; i ++ ) {
151179 vcpu_args = & memstress_args .vcpu_args [i ];
152180 prefault_mem (addr_gpa2alias (vm , vcpu_args -> gpa ),
153181 vcpu_args -> pages * memstress_args .guest_page_size );
154182 }
155183 }
156184
157185 if (p -> uffd_mode ) {
158- uffd_descs = malloc (nr_vcpus * sizeof (struct uffd_desc * ));
186+ num_uffds = p -> single_uffd ? 1 : nr_vcpus ;
187+ uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds ;
188+
189+ uffd_descs = malloc (num_uffds * sizeof (struct uffd_desc * ));
159190 TEST_ASSERT (uffd_descs , "Memory allocation failed" );
160- for (i = 0 ; i < nr_vcpus ; i ++ ) {
191+ for (i = 0 ; i < num_uffds ; i ++ ) {
192+ struct memstress_vcpu_args * vcpu_args ;
161193 void * vcpu_hva ;
162194
163195 vcpu_args = & memstress_args .vcpu_args [i ];
@@ -170,7 +202,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
170202 */
171203 uffd_descs [i ] = uffd_setup_demand_paging (
172204 p -> uffd_mode , p -> uffd_delay , vcpu_hva ,
173- vcpu_args -> pages * memstress_args .guest_page_size ,
205+ uffd_region_size ,
206+ p -> readers_per_uffd ,
174207 & handle_uffd_page_request );
175208 }
176209 }
@@ -187,15 +220,19 @@ static void run_test(enum vm_guest_mode mode, void *arg)
187220
188221 if (p -> uffd_mode ) {
189222 /* Tell the user fault fd handler threads to quit */
190- for (i = 0 ; i < nr_vcpus ; i ++ )
223+ for (i = 0 ; i < num_uffds ; i ++ )
191224 uffd_stop_demand_paging (uffd_descs [i ]);
192225 }
193226
194- pr_info ("Total guest execution time: %ld.%.9lds\n" ,
227+ pr_info ("Total guest execution time:\t %ld.%.9lds\n" ,
195228 ts_diff .tv_sec , ts_diff .tv_nsec );
196- pr_info ("Overall demand paging rate: %f pgs/sec\n" ,
197- memstress_args .vcpu_args [0 ].pages * nr_vcpus /
198- ((double )ts_diff .tv_sec + (double )ts_diff .tv_nsec / NSEC_PER_SEC ));
229+
230+ vcpu_paging_rate = memstress_args .vcpu_args [0 ].pages /
231+ ((double )ts_diff .tv_sec + (double )ts_diff .tv_nsec / NSEC_PER_SEC );
232+ pr_info ("Per-vcpu demand paging rate:\t%f pgs/sec/vcpu\n" ,
233+ vcpu_paging_rate );
234+ pr_info ("Overall demand paging rate:\t%f pgs/sec\n" ,
235+ vcpu_paging_rate * nr_vcpus );
199236
200237 memstress_destroy_vm (vm );
201238
@@ -207,15 +244,20 @@ static void run_test(enum vm_guest_mode mode, void *arg)
207244static void help (char * name )
208245{
209246 puts ("" );
210- printf ("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
211- " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n" , name );
247+ printf ("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n"
248+ " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n"
249+ " [-s type] [-v vcpus] [-c cpu_list] [-o]\n" , name );
212250 guest_modes_help ();
213251 printf (" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
214252 " UFFD registration mode: 'MISSING' or 'MINOR'.\n" );
215253 kvm_print_vcpu_pinning_help ();
254+ printf (" -a: Use a single userfaultfd for all of guest memory, instead of\n"
255+ " creating one for each region paged by a unique vCPU\n"
256+ " Set implicitly with -o, and no effect without -u.\n" );
216257 printf (" -d: add a delay in usec to the User Fault\n"
217258 " FD handler to simulate demand paging\n"
218259 " overheads. Ignored without -u.\n" );
260+ printf (" -r: Set the number of reader threads per uffd.\n" );
219261 printf (" -b: specify the size of the memory region which should be\n"
220262 " demand paged by each vCPU. e.g. 10M or 3G.\n"
221263 " Default: 1G\n" );
@@ -234,12 +276,14 @@ int main(int argc, char *argv[])
234276 struct test_params p = {
235277 .src_type = DEFAULT_VM_MEM_SRC ,
236278 .partition_vcpu_memory_access = true,
279+ .readers_per_uffd = 1 ,
280+ .single_uffd = false,
237281 };
238282 int opt ;
239283
240284 guest_modes_append_default ();
241285
242- while ((opt = getopt (argc , argv , "hm :u:d:b:s:v:c:o " )) != -1 ) {
286+ while ((opt = getopt (argc , argv , "ahom :u:d:b:s:v:c:r: " )) != -1 ) {
243287 switch (opt ) {
244288 case 'm' :
245289 guest_modes_cmdline (optarg );
@@ -251,6 +295,9 @@ int main(int argc, char *argv[])
251295 p .uffd_mode = UFFDIO_REGISTER_MODE_MINOR ;
252296 TEST_ASSERT (p .uffd_mode , "UFFD mode must be 'MISSING' or 'MINOR'." );
253297 break ;
298+ case 'a' :
299+ p .single_uffd = true;
300+ break ;
254301 case 'd' :
255302 p .uffd_delay = strtoul (optarg , NULL , 0 );
256303 TEST_ASSERT (p .uffd_delay >= 0 , "A negative UFFD delay is not supported." );
@@ -271,6 +318,13 @@ int main(int argc, char *argv[])
271318 break ;
272319 case 'o' :
273320 p .partition_vcpu_memory_access = false;
321+ p .single_uffd = true;
322+ break ;
323+ case 'r' :
324+ p .readers_per_uffd = atoi (optarg );
325+ TEST_ASSERT (p .readers_per_uffd >= 1 ,
326+ "Invalid number of readers per uffd %d: must be >=1" ,
327+ p .readers_per_uffd );
274328 break ;
275329 case 'h' :
276330 default :
0 commit comments