@@ -77,8 +77,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
7777 copy .mode = 0 ;
7878
7979 r = ioctl (uffd , UFFDIO_COPY , & copy );
80- if (r == -1 ) {
81- pr_info ("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n" ,
80+ /*
81+ * With multiple vCPU threads fault on a single page and there are
82+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
83+ * will fail with EEXIST: handle that case without signaling an
84+ * error.
85+ *
86+ * Note that this also suppress any EEXISTs occurring from,
87+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
88+ * happens here, but a realistic VMM might potentially maintain
89+ * some external state to correctly surface EEXISTs to userspace
90+ * (or prevent duplicate COPY/CONTINUEs in the first place).
91+ */
92+ if (r == -1 && errno != EEXIST ) {
93+ pr_info ("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n" ,
8294 addr , tid , errno );
8395 return r ;
8496 }
@@ -89,8 +101,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
89101 cont .range .len = demand_paging_size ;
90102
91103 r = ioctl (uffd , UFFDIO_CONTINUE , & cont );
92- if (r == -1 ) {
93- pr_info ("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n" ,
104+ /*
105+ * With multiple vCPU threads fault on a single page and there are
106+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
107+ * will fail with EEXIST: handle that case without signaling an
108+ * error.
109+ *
110+ * Note that this also suppress any EEXISTs occurring from,
111+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
112+ * happens here, but a realistic VMM might potentially maintain
113+ * some external state to correctly surface EEXISTs to userspace
114+ * (or prevent duplicate COPY/CONTINUEs in the first place).
115+ */
116+ if (r == -1 && errno != EEXIST ) {
117+ pr_info ("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n" ,
94118 addr , tid , errno );
95119 return r ;
96120 }
@@ -110,7 +134,9 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
110134
111135struct test_params {
112136 int uffd_mode ;
137+ bool single_uffd ;
113138 useconds_t uffd_delay ;
139+ int readers_per_uffd ;
114140 enum vm_mem_backing_src_type src_type ;
115141 bool partition_vcpu_memory_access ;
116142};
@@ -131,11 +157,12 @@ static void run_test(enum vm_guest_mode mode, void *arg)
131157 struct memstress_vcpu_args * vcpu_args ;
132158 struct test_params * p = arg ;
133159 struct uffd_desc * * uffd_descs = NULL ;
160+ uint64_t uffd_region_size ;
134161 struct timespec start ;
135162 struct timespec ts_diff ;
136163 double vcpu_paging_rate ;
137164 struct kvm_vm * vm ;
138- int i ;
165+ int i , num_uffds = 0 ;
139166
140167 vm = memstress_create_vm (mode , nr_vcpus , guest_percpu_mem_size , 1 ,
141168 p -> src_type , p -> partition_vcpu_memory_access );
@@ -148,17 +175,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
148175 memset (guest_data_prototype , 0xAB , demand_paging_size );
149176
150177 if (p -> uffd_mode == UFFDIO_REGISTER_MODE_MINOR ) {
151- for (i = 0 ; i < nr_vcpus ; i ++ ) {
178+ num_uffds = p -> single_uffd ? 1 : nr_vcpus ;
179+ for (i = 0 ; i < num_uffds ; i ++ ) {
152180 vcpu_args = & memstress_args .vcpu_args [i ];
153181 prefault_mem (addr_gpa2alias (vm , vcpu_args -> gpa ),
154182 vcpu_args -> pages * memstress_args .guest_page_size );
155183 }
156184 }
157185
158186 if (p -> uffd_mode ) {
159- uffd_descs = malloc (nr_vcpus * sizeof (struct uffd_desc * ));
187+ num_uffds = p -> single_uffd ? 1 : nr_vcpus ;
188+ uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds ;
189+
190+ uffd_descs = malloc (num_uffds * sizeof (struct uffd_desc * ));
160191 TEST_ASSERT (uffd_descs , "Memory allocation failed" );
161- for (i = 0 ; i < nr_vcpus ; i ++ ) {
192+ for (i = 0 ; i < num_uffds ; i ++ ) {
193+ struct memstress_vcpu_args * vcpu_args ;
162194 void * vcpu_hva ;
163195
164196 vcpu_args = & memstress_args .vcpu_args [i ];
@@ -171,7 +203,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
171203 */
172204 uffd_descs [i ] = uffd_setup_demand_paging (
173205 p -> uffd_mode , p -> uffd_delay , vcpu_hva ,
174- vcpu_args -> pages * memstress_args .guest_page_size ,
206+ uffd_region_size ,
207+ p -> readers_per_uffd ,
175208 & handle_uffd_page_request );
176209 }
177210 }
@@ -188,7 +221,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
188221
189222 if (p -> uffd_mode ) {
190223 /* Tell the user fault fd handler threads to quit */
191- for (i = 0 ; i < nr_vcpus ; i ++ )
224+ for (i = 0 ; i < num_uffds ; i ++ )
192225 uffd_stop_demand_paging (uffd_descs [i ]);
193226 }
194227
@@ -212,15 +245,20 @@ static void run_test(enum vm_guest_mode mode, void *arg)
212245static void help (char * name )
213246{
214247 puts ("" );
215- printf ("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
216- " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n" , name );
248+ printf ("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n"
249+ " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n"
250+ " [-s type] [-v vcpus] [-c cpu_list] [-o]\n" , name );
217251 guest_modes_help ();
218252 printf (" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
219253 " UFFD registration mode: 'MISSING' or 'MINOR'.\n" );
220254 kvm_print_vcpu_pinning_help ();
255+ printf (" -a: Use a single userfaultfd for all of guest memory, instead of\n"
256+ " creating one for each region paged by a unique vCPU\n"
257+ " Set implicitly with -o, and no effect without -u.\n" );
221258 printf (" -d: add a delay in usec to the User Fault\n"
222259 " FD handler to simulate demand paging\n"
223260 " overheads. Ignored without -u.\n" );
261+ printf (" -r: Set the number of reader threads per uffd.\n" );
224262 printf (" -b: specify the size of the memory region which should be\n"
225263 " demand paged by each vCPU. e.g. 10M or 3G.\n"
226264 " Default: 1G\n" );
@@ -239,12 +277,14 @@ int main(int argc, char *argv[])
239277 struct test_params p = {
240278 .src_type = DEFAULT_VM_MEM_SRC ,
241279 .partition_vcpu_memory_access = true,
280+ .readers_per_uffd = 1 ,
281+ .single_uffd = false,
242282 };
243283 int opt ;
244284
245285 guest_modes_append_default ();
246286
247- while ((opt = getopt (argc , argv , "hm :u:d:b:s:v:c:o " )) != -1 ) {
287+ while ((opt = getopt (argc , argv , "ahom :u:d:b:s:v:c:r: " )) != -1 ) {
248288 switch (opt ) {
249289 case 'm' :
250290 guest_modes_cmdline (optarg );
@@ -256,6 +296,9 @@ int main(int argc, char *argv[])
256296 p .uffd_mode = UFFDIO_REGISTER_MODE_MINOR ;
257297 TEST_ASSERT (p .uffd_mode , "UFFD mode must be 'MISSING' or 'MINOR'." );
258298 break ;
299+ case 'a' :
300+ p .single_uffd = true;
301+ break ;
259302 case 'd' :
260303 p .uffd_delay = strtoul (optarg , NULL , 0 );
261304 TEST_ASSERT (p .uffd_delay >= 0 , "A negative UFFD delay is not supported." );
@@ -276,6 +319,13 @@ int main(int argc, char *argv[])
276319 break ;
277320 case 'o' :
278321 p .partition_vcpu_memory_access = false;
322+ p .single_uffd = true;
323+ break ;
324+ case 'r' :
325+ p .readers_per_uffd = atoi (optarg );
326+ TEST_ASSERT (p .readers_per_uffd >= 1 ,
327+ "Invalid number of readers per uffd %d: must be >=1" ,
328+ p .readers_per_uffd );
279329 break ;
280330 case 'h' :
281331 default :
0 commit comments