1717#include "processor.h"
1818#include "ucall_common.h"
1919
20+ static bool mprotect_ro_done ;
21+
2022static void guest_code (uint64_t start_gpa , uint64_t end_gpa , uint64_t stride )
2123{
2224 uint64_t gpa ;
@@ -32,6 +34,42 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
3234 * ((volatile uint64_t * )gpa );
3335 GUEST_SYNC (2 );
3436
37+ /*
38+ * Write to the region while mprotect(PROT_READ) is underway. Keep
39+ * looping until the memory is guaranteed to be read-only, otherwise
40+ * vCPUs may complete their writes and advance to the next stage
41+ * prematurely.
42+ *
43+ * For architectures that support skipping the faulting instruction,
44+ * generate the store via inline assembly to ensure the exact length
45+ * of the instruction is known and stable (vcpu_arch_put_guest() on
46+ * fixed-length architectures should work, but the cost of paranoia
47+ * is low in this case). For x86, hand-code the exact opcode so that
48+ * there is no room for variability in the generated instruction.
49+ */
50+ do {
51+ for (gpa = start_gpa ; gpa < end_gpa ; gpa += stride )
52+ #ifdef __x86_64__
53+ asm volatile (".byte 0x48,0x89,0x00" :: "a" (gpa ) : "memory" ); /* mov %rax, (%rax) */
54+ #elif defined(__aarch64__ )
55+ asm volatile ("str %0, [%0]" :: "r" (gpa ) : "memory" );
56+ #else
57+ vcpu_arch_put_guest (* ((volatile uint64_t * )gpa ), gpa );
58+ #endif
59+ } while (!READ_ONCE (mprotect_ro_done ));
60+
61+ /*
62+ * Only architectures that write the entire range can explicitly sync,
63+ * as other architectures will be stuck on the write fault.
64+ */
65+ #if defined(__x86_64__ ) || defined(__aarch64__ )
66+ GUEST_SYNC (3 );
67+ #endif
68+
69+ for (gpa = start_gpa ; gpa < end_gpa ; gpa += stride )
70+ vcpu_arch_put_guest (* ((volatile uint64_t * )gpa ), gpa );
71+ GUEST_SYNC (4 );
72+
3573 GUEST_ASSERT (0 );
3674}
3775
@@ -79,6 +117,7 @@ static void *vcpu_worker(void *data)
79117 struct vcpu_info * info = data ;
80118 struct kvm_vcpu * vcpu = info -> vcpu ;
81119 struct kvm_vm * vm = vcpu -> vm ;
120+ int r ;
82121
83122 vcpu_args_set (vcpu , 3 , info -> start_gpa , info -> end_gpa , vm -> page_size );
84123
@@ -101,6 +140,57 @@ static void *vcpu_worker(void *data)
101140
102141 /* Stage 2, read all of guest memory, which is now read-only. */
103142 run_vcpu (vcpu , 2 );
143+
144+ /*
145+ * Stage 3, write guest memory and verify KVM returns -EFAULT for once
146+ * the mprotect(PROT_READ) lands. Only architectures that support
147+ * validating *all* of guest memory sync for this stage, as vCPUs will
148+ * be stuck on the faulting instruction for other architectures. Go to
149+ * stage 3 without a rendezvous
150+ */
151+ do {
152+ r = _vcpu_run (vcpu );
153+ } while (!r );
154+ TEST_ASSERT (r == -1 && errno == EFAULT ,
155+ "Expected EFAULT on write to RO memory, got r = %d, errno = %d" , r , errno );
156+
157+ #if defined(__x86_64__ ) || defined(__aarch64__ )
158+ /*
159+ * Verify *all* writes from the guest hit EFAULT due to the VMA now
160+ * being read-only. x86 and arm64 only at this time as skipping the
161+ * instruction that hits the EFAULT requires advancing the program
162+ * counter, which is arch specific and relies on inline assembly.
163+ */
164+ #ifdef __x86_64__
165+ vcpu -> run -> kvm_valid_regs = KVM_SYNC_X86_REGS ;
166+ #endif
167+ for (;;) {
168+ r = _vcpu_run (vcpu );
169+ if (!r )
170+ break ;
171+ TEST_ASSERT_EQ (errno , EFAULT );
172+ #if defined(__x86_64__ )
173+ WRITE_ONCE (vcpu -> run -> kvm_dirty_regs , KVM_SYNC_X86_REGS );
174+ vcpu -> run -> s .regs .regs .rip += 3 ;
175+ #elif defined(__aarch64__ )
176+ vcpu_set_reg (vcpu , ARM64_CORE_REG (regs .pc ),
177+ vcpu_get_reg (vcpu , ARM64_CORE_REG (regs .pc )) + 4 );
178+ #endif
179+
180+ }
181+ assert_sync_stage (vcpu , 3 );
182+ #endif /* __x86_64__ || __aarch64__ */
183+ rendezvous_with_boss ();
184+
185+ /*
186+ * Stage 4. Run to completion, waiting for mprotect(PROT_WRITE) to
187+ * make the memory writable again.
188+ */
189+ do {
190+ r = _vcpu_run (vcpu );
191+ } while (r && errno == EFAULT );
192+ TEST_ASSERT_EQ (r , 0 );
193+ assert_sync_stage (vcpu , 4 );
104194 rendezvous_with_boss ();
105195
106196 return NULL ;
@@ -183,7 +273,7 @@ int main(int argc, char *argv[])
183273 const uint64_t start_gpa = SZ_4G ;
184274 const int first_slot = 1 ;
185275
186- struct timespec time_start , time_run1 , time_reset , time_run2 , time_ro ;
276+ struct timespec time_start , time_run1 , time_reset , time_run2 , time_ro , time_rw ;
187277 uint64_t max_gpa , gpa , slot_size , max_mem , i ;
188278 int max_slots , slot , opt , fd ;
189279 bool hugepages = false;
@@ -288,19 +378,27 @@ int main(int argc, char *argv[])
288378 rendezvous_with_vcpus (& time_run2 , "run 2" );
289379
290380 mprotect (mem , slot_size , PROT_READ );
381+ usleep (10 );
382+ mprotect_ro_done = true;
383+ sync_global_to_guest (vm , mprotect_ro_done );
384+
291385 rendezvous_with_vcpus (& time_ro , "mprotect RO" );
386+ mprotect (mem , slot_size , PROT_READ | PROT_WRITE );
387+ rendezvous_with_vcpus (& time_rw , "mprotect RW" );
292388
389+ time_rw = timespec_sub (time_rw , time_ro );
293390 time_ro = timespec_sub (time_ro , time_run2 );
294391 time_run2 = timespec_sub (time_run2 , time_reset );
295392 time_reset = timespec_sub (time_reset , time_run1 );
296393 time_run1 = timespec_sub (time_run1 , time_start );
297394
298395 pr_info ("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, "
299- "ro = %ld.%.9lds\n" ,
396+ "ro = %ld.%.9lds, rw = %ld.%.9lds \n" ,
300397 time_run1 .tv_sec , time_run1 .tv_nsec ,
301398 time_reset .tv_sec , time_reset .tv_nsec ,
302399 time_run2 .tv_sec , time_run2 .tv_nsec ,
303- time_ro .tv_sec , time_ro .tv_nsec );
400+ time_ro .tv_sec , time_ro .tv_nsec ,
401+ time_rw .tv_sec , time_rw .tv_nsec );
304402
305403 /*
306404 * Delete even numbered slots (arbitrary) and unmap the first half of
0 commit comments