Skip to content

Commit 4e923d4

Browse files
committed
Revert "Be more careful about poisoning"
This reverts commit cd50b1b. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com>
1 parent cd50b1b commit 4e923d4

1 file changed

Lines changed: 13 additions & 48 deletions

File tree

src/hyperlight_host/src/sandbox/initialized_multi_use.rs

Lines changed: 13 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -287,64 +287,35 @@ impl MultiUseSandbox {
287287
return Err(SnapshotSandboxMismatch);
288288
}
289289

290-
// A prior failed restore may have swapped mem_mgr.shared_mem
291-
// without updating the hypervisor's GPA mapping. Track this
292-
// so we can force a remap below if needed.
293-
let was_poisoned = self.poisoned;
294-
295-
// Assume poisoned: any failure below leaves the sandbox
296-
// inconsistent. Only cleared at the end on success.
297-
self.poisoned = true;
298-
299-
// Failure here may partially reset vCPU state. The sandbox is
300-
// poisoned, and the next restore() retry will call
301-
// reset_vm_state() again which is idempotent.
302-
self.vm
303-
.reset_vm_state()
304-
.map_err(HyperlightVmError::Restore)?;
290+
// Reset VM/vCPU state before memory restore.
291+
self.vm.reset_vm_state().map_err(|e| {
292+
self.poisoned = true;
293+
HyperlightVmError::Restore(e)
294+
})?;
305295

306-
// Failure here may leave mem_mgr with partially swapped
307-
// shared_mem/scratch_mem. The sandbox is poisoned, and if
308-
// shared_mem was swapped without updating the hypervisor, the
309-
// was_poisoned path below handles it on the retry.
310296
let (gsnapshot, gscratch) = self.mem_mgr.restore_snapshot(&snapshot)?;
311297
if let Some(gsnapshot) = gsnapshot {
312-
// Failure here leaves the hypervisor pointing at the old
313-
// (possibly freed) snapshot. The sandbox is poisoned, and
314-
// the next restore() will remap via the was_poisoned path.
315-
self.vm
316-
.update_snapshot_mapping(gsnapshot)
317-
.map_err(|e| HyperlightError::HyperlightVmError(e.into()))?;
318-
} else if was_poisoned {
319-
// restore_snapshot() skipped the swap because shared_mem
320-
// already matched, but the hypervisor may still point at
321-
// the old backing from before the failed restore. Force
322-
// a remap. Failure leaves us still poisoned for the same
323-
// reason as above.
324-
let snap_mem = snapshot.memory().to_mgr_snapshot_mem()?;
325-
let (_, gsnapshot) = snap_mem.build();
326298
self.vm
327299
.update_snapshot_mapping(gsnapshot)
328300
.map_err(|e| HyperlightError::HyperlightVmError(e.into()))?;
329301
}
330302
if let Some(gscratch) = gscratch {
331-
// Failure here leaves the hypervisor pointing at old
332-
// scratch. The sandbox is poisoned; scratch size is fixed
333-
// per sandbox so retry will zero the same allocation and
334-
// remap here again.
335303
self.vm
336304
.update_scratch_mapping(gscratch)
337305
.map_err(|e| HyperlightError::HyperlightVmError(e.into()))?;
338306
}
339307

340-
// Failure here means sregs (CR3, etc.) are not set. The
341-
// sandbox is poisoned; retry will call restore_sregs() again.
342308
let sregs = snapshot.sregs().ok_or_else(|| {
343309
HyperlightError::Error("snapshot from running sandbox should have sregs".to_string())
344310
})?;
311+
// TODO (ludfjig): Go through the rest of possible errors in this `MultiUseSandbox::restore` function
312+
// and determine if they should also poison the sandbox.
345313
self.vm
346314
.restore_sregs(snapshot.root_pt_gpa(), sregs)
347-
.map_err(HyperlightVmError::Restore)?;
315+
.map_err(|e| {
316+
self.poisoned = true;
317+
HyperlightVmError::Restore(e)
318+
})?;
348319

349320
self.vm.set_stack_top(snapshot.stack_top_gva());
350321
self.vm.set_entrypoint(snapshot.entrypoint());
@@ -356,19 +327,12 @@ impl MultiUseSandbox {
356327
let regions_to_map = snapshot_regions.difference(&current_regions);
357328

358329
for region in regions_to_unmap {
359-
// Failure here leaves extra regions mapped. The sandbox is
360-
// poisoned; retry will attempt the unmap again since the
361-
// region will still be in the current set.
362330
self.vm
363331
.unmap_region(region)
364332
.map_err(HyperlightVmError::UnmapRegion)?;
365333
}
366334

367335
for region in regions_to_map {
368-
// Failure here leaves regions missing. The sandbox is
369-
// poisoned; retry will attempt the map again since the
370-
// region will still be in the snapshot set.
371-
//
372336
// Safety: The region has been mapped before, and at that point the caller promised that the memory region is valid
373337
// in their call to `MultiUseSandbox::map_region`
374338
unsafe { self.vm.map_region(region) }.map_err(HyperlightVmError::MapRegion)?;
@@ -377,8 +341,9 @@ impl MultiUseSandbox {
377341
// The restored snapshot is now our most current snapshot
378342
self.snapshot = Some(snapshot.clone());
379343

380-
// Clear poison state: restore completed successfully.
344+
// Clear poison state when successfully restoring from snapshot.
381345
//
346+
// # Safety:
382347
// This is safe because:
383348
// 1. Snapshots can only be taken from non-poisoned sandboxes (verified at snapshot creation)
384349
// 2. Restoration completely replaces all memory state, eliminating:

0 commit comments

Comments
 (0)