@@ -16,16 +16,19 @@ limitations under the License.
1616
1717use std:: os:: raw:: c_void;
1818
19+ use hyperlight_common:: mem:: PAGE_SIZE_USIZE ;
1920use hyperlight_common:: outb:: VmAction ;
2021#[ cfg( feature = "trace_guest" ) ]
2122use tracing:: Span ;
2223#[ cfg( feature = "trace_guest" ) ]
2324use tracing_opentelemetry:: OpenTelemetrySpanExt ;
24- use windows:: Win32 :: Foundation :: { CloseHandle , FreeLibrary , HANDLE } ;
25+ use windows:: Win32 :: Foundation :: { CloseHandle , FreeLibrary , HANDLE , INVALID_HANDLE_VALUE } ;
2526use windows:: Win32 :: System :: Hypervisor :: * ;
2627use windows:: Win32 :: System :: LibraryLoader :: * ;
27- use windows:: Win32 :: System :: Memory :: { MEMORY_MAPPED_VIEW_ADDRESS , UnmapViewOfFile } ;
28- use windows:: core:: s;
28+ use windows:: Win32 :: System :: Memory :: {
29+ CreateFileMappingA , MEMORY_MAPPED_VIEW_ADDRESS , PAGE_READWRITE , UnmapViewOfFile ,
30+ } ;
31+ use windows:: core:: { PCSTR , s} ;
2932use windows_result:: HRESULT ;
3033
3134#[ cfg( gdb) ]
@@ -44,7 +47,10 @@ use crate::hypervisor::virtual_machine::{
4447 VirtualMachine , VmExit ,
4548} ;
4649use crate :: hypervisor:: wrappers:: HandleWrapper ;
47- use crate :: mem:: memory_region:: { MemoryRegion , MemoryRegionFlags , MemoryRegionType } ;
50+ use crate :: mem:: layout:: SandboxMemoryLayout ;
51+ use crate :: mem:: memory_region:: {
52+ MemoryRegion , MemoryRegionFlags , MemoryRegionType , SurrogateMapping ,
53+ } ;
4854#[ cfg( feature = "trace_guest" ) ]
4955use crate :: sandbox:: trace:: TraceContext as SandboxTraceContext ;
5056
@@ -86,6 +92,134 @@ fn release_file_mapping(view_base: *mut c_void, mapping_handle: HandleWrapper) {
8692 }
8793}
8894
95+ /// GPA for the NPT flush dummy page. Placed just past the maximum
96+ /// snapshot region so it can never collide with guest memory.
97+ const NPT_FLUSH_GPA : u64 =
98+ ( SandboxMemoryLayout :: BASE_ADDRESS + SandboxMemoryLayout :: MAX_MEMORY_SIZE ) as u64 ;
99+
100+ /// A single dummy page mapped at a fixed GPA whose permissions are
101+ /// toggled on each [`reset_partition`](VirtualMachine::reset_partition)
102+ /// call. On AMD SVM, `WHvResetPartition` does not flush the NPT
103+ /// (GPA→HPA) TLB. Changing a GPA mapping's permissions via
104+ /// `WHvMapGpaRange2` is the only WHP API path that triggers
105+ /// `ValFlushNestedTb` (an NPT TbGeneration bump), which causes
106+ /// `SVM_TLB_FLUSH_CURRENT_ASID` on the next VMRUN.
107+ #[ derive( Debug ) ]
108+ struct NptFlushPage {
109+ /// Backing file-mapping handle.
110+ handle : HANDLE ,
111+ /// Address of the page in the surrogate process.
112+ surrogate_addr : * mut c_void ,
113+ /// Cached function pointer for `WHvMapGpaRange2`.
114+ map_gpa_range2 : WHvMapGpaRange2Func ,
115+ /// Toggled on each flush to alternate the GPA permission between
116+ /// READ and READ|WRITE.
117+ toggle : bool ,
118+ }
119+
120+ impl NptFlushPage {
121+ const GPA_FLAGS_READ : WHV_MAP_GPA_RANGE_FLAGS = WHvMapGpaRangeFlagRead ;
122+ const GPA_FLAGS_READWRITE : WHV_MAP_GPA_RANGE_FLAGS =
123+ WHV_MAP_GPA_RANGE_FLAGS ( WHvMapGpaRangeFlagRead . 0 | WHvMapGpaRangeFlagWrite . 0 ) ;
124+
125+ /// Allocate a dummy page, map it into the surrogate process, and
126+ /// create the initial GPA mapping at [`NPT_FLUSH_GPA`].
127+ fn new (
128+ partition : WHV_PARTITION_HANDLE ,
129+ surrogate_process : & mut SurrogateProcess ,
130+ ) -> Result < Self , CreateVmError > {
131+ let handle = unsafe {
132+ CreateFileMappingA (
133+ INVALID_HANDLE_VALUE ,
134+ None ,
135+ PAGE_READWRITE ,
136+ 0 ,
137+ PAGE_SIZE_USIZE as u32 ,
138+ PCSTR :: null ( ) ,
139+ )
140+ . map_err ( |e| CreateVmError :: InitializeVm ( e. into ( ) ) ) ?
141+ } ;
142+
143+ let surrogate_addr = surrogate_process
144+ . map (
145+ handle. into ( ) ,
146+ 0 , // sentinel key; MapViewOfFile never returns 0
147+ PAGE_SIZE_USIZE ,
148+ & SurrogateMapping :: SandboxMemory ,
149+ )
150+ . map_err ( |e| CreateVmError :: SurrogateProcess ( e. to_string ( ) ) ) ?;
151+
152+ let map_gpa_range2 = unsafe {
153+ try_load_whv_map_gpa_range2 ( ) . map_err ( |e| CreateVmError :: InitializeVm ( e. into ( ) ) ) ?
154+ } ;
155+
156+ let res = unsafe {
157+ map_gpa_range2 (
158+ partition,
159+ surrogate_process. process_handle . into ( ) ,
160+ surrogate_addr,
161+ NPT_FLUSH_GPA ,
162+ PAGE_SIZE_USIZE as u64 ,
163+ Self :: GPA_FLAGS_READ ,
164+ )
165+ } ;
166+ if res. is_err ( ) {
167+ return Err ( CreateVmError :: InitializeVm (
168+ windows_result:: Error :: from_hresult ( res) . into ( ) ,
169+ ) ) ;
170+ }
171+
172+ Ok ( Self {
173+ handle,
174+ surrogate_addr,
175+ map_gpa_range2,
176+ toggle : false ,
177+ } )
178+ }
179+
180+ /// Toggle the dummy page's GPA permission to force an NPT TLB
181+ /// flush. VID skips the hypercall when permissions are unchanged,
182+ /// so we alternate between READ and READ|WRITE.
183+ fn flush (
184+ & mut self ,
185+ partition : WHV_PARTITION_HANDLE ,
186+ surrogate_process : & SurrogateProcess ,
187+ ) -> std:: result:: Result < ( ) , RegisterError > {
188+ self . toggle = !self . toggle ;
189+ let flags = if self . toggle {
190+ Self :: GPA_FLAGS_READWRITE
191+ } else {
192+ Self :: GPA_FLAGS_READ
193+ } ;
194+ let res = unsafe {
195+ ( self . map_gpa_range2 ) (
196+ partition,
197+ surrogate_process. process_handle . into ( ) ,
198+ self . surrogate_addr ,
199+ NPT_FLUSH_GPA ,
200+ PAGE_SIZE_USIZE as u64 ,
201+ flags,
202+ )
203+ } ;
204+ if res. is_err ( ) {
205+ return Err ( RegisterError :: ResetPartition (
206+ windows_result:: Error :: from_hresult ( res) . into ( ) ,
207+ ) ) ;
208+ }
209+ Ok ( ( ) )
210+ }
211+ }
212+
213+ impl Drop for NptFlushPage {
214+ fn drop ( & mut self ) {
215+ // The surrogate mapping is freed when SurrogateProcess drops;
216+ // we only need to close the file-mapping handle.
217+ if let Err ( e) = unsafe { CloseHandle ( self . handle ) } {
218+ tracing:: error!( "Failed to close NPT flush page handle: {:?}" , e) ;
219+ }
220+ }
221+ }
222+
89223/// A Windows Hypervisor Platform implementation of a single-vcpu VM
90224#[ derive( Debug ) ]
91225pub ( crate ) struct WhpVm {
@@ -98,15 +232,18 @@ pub(crate) struct WhpVm {
98232 /// Handle to the background timer (if started).
99233 #[ cfg( feature = "hw-interrupts" ) ]
100234 timer : Option < TimerThread > ,
235+ /// Dummy page whose GPA permissions are toggled to force NPT TLB
236+ /// flushes after `WHvResetPartition` on AMD SVM hosts.
237+ npt_flush : NptFlushPage ,
101238}
102239
103240// Safety: `WhpVm` is !Send because it holds `SurrogateProcess` which contains a raw pointer
104241// `allocated_address` (*mut c_void). This pointer represents a memory mapped view address
105242// in the surrogate process. It is never dereferenced, only used for address arithmetic and
106243// resource management (unmapping). This is a system resource that is not bound to the creating
107244// thread and can be safely transferred between threads.
108- // `file_mappings` contains raw pointers that are also kernel resource handles,
109- // safe to use from any thread.
245+ // `file_mappings` and `NptFlushPage` contain raw pointers that are also kernel
246+ // resource handles, safe to use from any thread.
110247unsafe impl Send for WhpVm { }
111248
112249impl WhpVm {
@@ -144,16 +281,19 @@ impl WhpVm {
144281
145282 let mgr = get_surrogate_process_manager ( )
146283 . map_err ( |e| CreateVmError :: SurrogateProcess ( e. to_string ( ) ) ) ?;
147- let surrogate_process = mgr
284+ let mut surrogate_process = mgr
148285 . get_surrogate_process ( )
149286 . map_err ( |e| CreateVmError :: SurrogateProcess ( e. to_string ( ) ) ) ?;
150287
288+ let npt_flush = NptFlushPage :: new ( partition, & mut surrogate_process) ?;
289+
151290 Ok ( WhpVm {
152291 partition,
153292 surrogate_process,
154293 file_mappings : Vec :: new ( ) ,
155294 #[ cfg( feature = "hw-interrupts" ) ]
156295 timer : None ,
296+ npt_flush,
157297 } )
158298 }
159299
@@ -729,35 +869,25 @@ impl VirtualMachine for WhpVm {
729869 WHvResetPartition ( self . partition )
730870 . map_err ( |e| RegisterError :: ResetPartition ( e. into ( ) ) ) ?;
731871
732- // WHvResetPartition resets the VMCB but may not update the
733- // hypervisor's internal register-change tracking used for
734- // TLB flush decisions on AMD SVM. Explicitly re-announce
735- // the post-reset CR0 (PG=0) via the WHP API so that the
736- // subsequent restore_sregs() write of CR0 with PG=1 is
737- // detected as a paging-mode change, triggering
738- // SVM_TLB_FLUSH_CURRENT_ASID on the next VMRUN.
739- let post_reset_sregs = self . sregs ( ) ?;
740- self . set_registers ( & [ (
741- WHvX64RegisterCr0 ,
742- Align16 ( WHV_REGISTER_VALUE {
743- Reg64 : post_reset_sregs. cr0 ,
744- } ) ,
745- ) ] )
746- . map_err ( |e| RegisterError :: ResetPartition ( e. into ( ) ) ) ?;
872+ // WHvResetPartition does not flush the NPT (GPA→HPA) TLB
873+ // on AMD SVM. Toggle the dummy page's GPA permission to
874+ // trigger a TbGeneration bump, forcing the next VMRUN to
875+ // issue SVM_TLB_FLUSH_CURRENT_ASID.
876+ self . npt_flush
877+ . flush ( self . partition , & self . surrogate_process ) ?;
747878
748879 // WHvResetPartition resets LAPIC to power-on defaults.
749880 // Re-initialize it when LAPIC emulation is active.
750881 #[ cfg( feature = "hw-interrupts" ) ]
751882 Self :: init_lapic_bulk ( self . partition )
752883 . map_err ( |e| RegisterError :: ResetPartition ( e. into ( ) ) ) ?;
753884
754- // With hw-interrupts, set_sregs filters out APIC_BASE to
755- // avoid disabling the LAPIC (the snapshot sregs default it
756- // to 0). On AMD/SVM this means SVM_CLEAN_FIELD_AVIC is
757- // never dirtied after the VMCB rebuild, causing the CPU to
758- // use stale cached AVIC state on VMRUN which corrupts guest
759- // memory reads. Re-writing APIC_BASE to its post-reset
760- // value forces the dirty bit via ValSetEmulatedApicBase.
885+ // set_sregs filters out APIC_BASE (the snapshot sregs
886+ // default it to 0, which would disable the LAPIC). This
887+ // means APIC_BASE is never written after the VMCB rebuild,
888+ // so the AVIC clean bit stays set and the CPU may use stale
889+ // cached AVIC state. Re-writing the post-reset value forces
890+ // the hypervisor to dirty SVM_CLEAN_FIELD_AVIC.
761891 #[ cfg( feature = "hw-interrupts" ) ]
762892 {
763893 let apic_base = self . sregs ( ) ?. apic_base ;
0 commit comments