Skip to content

Commit c61769a

Browse files
committed
amd64: slightly improve TLB flushing
This is still somewhat broken (see #721), since it relies on the flush_tlb function ending up in the big area around the code that is still identity mapped. So, this code should probably move somewhere else. However, this fixes a very significant bug in the prior code, where the flush_tlb() function called on guest function dispatch could return to the wrong address, due to the correct return address having been pushed onto the wrong page due to a stale TLB entry. Signed-off-by: Lucy Menon <168595099+syntactically@users.noreply.github.com>
1 parent f83ea1c commit c61769a

9 files changed

Lines changed: 117 additions & 91 deletions

File tree

src/hyperlight_guest/src/exit.rs

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,6 @@ use core::arch::asm;
1818
use core::ffi::{CStr, c_char};
1919

2020
use hyperlight_common::outb::OutBAction;
21-
use tracing::instrument;
22-
23-
/// Halt the execution of the guest and returns control to the host.
24-
/// Halt is generally called for a successful completion of the guest's work,
25-
/// this means we can instrument it as a trace point because the trace state
26-
/// shall not be locked at this point (we are not in an exception context).
27-
#[inline(never)]
28-
#[instrument(skip_all, level = "Trace")]
29-
pub fn halt() {
30-
#[cfg(all(feature = "trace_guest", target_arch = "x86_64"))]
31-
{
32-
// Send data before halting
33-
// If there is no data, this doesn't do anything
34-
// The reason we do this here instead of in `hlt` asm function
35-
// is to avoid allocating before halting, which leaks memory
36-
// because the guest is not expected to resume execution after halting.
37-
hyperlight_guest_tracing::flush();
38-
}
39-
40-
unsafe { asm!("hlt", options(nostack)) };
41-
}
4221

4322
/// Exits the VM with an Abort OUT action and code 0.
4423
#[unsafe(no_mangle)]
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
Copyright 2025 The Hyperlight Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
//! This module contains the architecture-specific dispatch function
18+
//! entry sequence
19+
20+
// The extern "C" here is a lie until #![feature(abi_custom)] is stabilised
21+
unsafe extern "C" {
22+
/// There are two reasons that we need an architecture-specific
23+
/// assembly stub currently: (1) to ensure that the fact that the
24+
/// dispatch function never returns but can be executed again is
25+
/// OK (i.e. we must ensure that there is no stack frame
26+
/// teardown); (2) at least presently, to ensure that the TLB is
27+
/// flushed in certain cases.
28+
///
29+
/// # TLB flushing
30+
///
31+
/// The hyperlight host likes to use one partition and reset it in
32+
/// various ways; if that has happened, there might stale TLB
33+
/// entries hanging around from the former user of the
34+
/// partition. Flushing the TLB here is not quite the right thing
35+
/// to do, since incorrectly cached entries could make even this
36+
/// code not exist, but regrettably there is not a simple way for
37+
/// the host to trigger flushing when it ought to happen, so for
38+
/// now this works in practice, since the text segment is always
39+
/// part of the big identity-mapped region at the base of the
40+
/// guest. The stack, however, is not part of the snapshot region
41+
/// which is (in practice) idmapped for the relevant area, so this
42+
/// cannot touch the stack until the flush is done.
43+
///
44+
/// Currently this just always flips CR4.PGE back and forth to
45+
/// trigger a tlb flush. We should use a faster approach where
46+
/// available
47+
///
48+
/// # ABI
49+
///
50+
/// The ZF should be set if a TLB flush is required on this call
51+
/// (e.g. the first call after a snapshot restore)
52+
///
53+
/// The stack pointer should, unusually for amd64, by 16-byte
54+
/// aligned at the beginning of the function---no return address
55+
/// should be pushed.
56+
pub(crate) unsafe fn dispatch_function(must_flush_tlb: u64);
57+
}
58+
core::arch::global_asm!("
59+
.global dispatch_function
60+
dispatch_function:
61+
jnz flush_done
62+
mov rdi, cr4
63+
xor rdi, 0x80
64+
mov cr4, rdi
65+
xor rdi, 0x80
66+
mov cr4, rdi
67+
flush_done:
68+
call {internal_dispatch_function}\n
69+
hlt\n
70+
", internal_dispatch_function = sym crate::guest_function::call::internal_dispatch_function);

src/hyperlight_guest_bin/src/arch/amd64/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ limitations under the License.
1515
*/
1616

1717
pub(crate) mod context;
18+
pub(crate) mod dispatch;
1819
pub mod exception;
1920
mod init;
2021
mod layout;

src/hyperlight_guest_bin/src/guest_function/call.rs

Lines changed: 15 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, Functi
2222
use hyperlight_common::flatbuffer_wrappers::function_types::{FunctionCallResult, ParameterType};
2323
use hyperlight_common::flatbuffer_wrappers::guest_error::{ErrorCode, GuestError};
2424
use hyperlight_guest::error::{HyperlightGuestError, Result};
25-
use hyperlight_guest::exit::halt;
2625
use tracing::{Span, instrument};
2726

2827
use crate::{GUEST_HANDLE, REGISTERED_GUEST_FUNCTIONS};
@@ -79,14 +78,17 @@ pub(crate) fn call_guest_function(function_call: FunctionCall) -> Result<Vec<u8>
7978
}
8079
}
8180

82-
// This function is marked as no_mangle/inline to prevent the compiler from inlining it , if its inlined the epilogue will not be called
83-
// and we will leak memory as the epilogue will not be called as halt() is not going to return.
84-
//
85-
// This function may panic, as we have no other ways of dealing with errors at this level
86-
#[unsafe(no_mangle)]
87-
#[inline(never)]
88-
#[instrument(skip_all, parent = Span::current(), level= "Trace")]
89-
fn internal_dispatch_function() {
81+
pub(crate) fn internal_dispatch_function() {
82+
// Read the current TSC to report it to the host with the spans/events
83+
// This helps calculating the timestamps relative to the guest call
84+
#[cfg(feature = "trace_guest")]
85+
{
86+
let guest_start_tsc = hyperlight_guest_tracing::invariant_tsc::read_tsc();
87+
// Reset the trace state for the new guest function call with the new start TSC
88+
// This clears any existing spans/events from previous calls ensuring a clean state
89+
hyperlight_guest_tracing::new_call(guest_start_tsc);
90+
}
91+
9092
let handle = unsafe { GUEST_HANDLE };
9193

9294
#[cfg(debug_assertions)]
@@ -114,35 +116,9 @@ fn internal_dispatch_function() {
114116
.expect("Failed to serialize function call result");
115117
}
116118
}
117-
}
118-
119-
// This is implemented as a separate function to make sure that epilogue in the internal_dispatch_function is called before the halt()
120-
// which if it were included in the internal_dispatch_function cause the epilogue to not be called because the halt() would not return
121-
// when running in the hypervisor.
122-
#[instrument(skip_all, parent = Span::current(), level= "Trace")]
123-
pub(crate) extern "C" fn dispatch_function() {
124-
// The hyperlight host likes to use one partition and reset it in
125-
// various ways; if that has happened, there might stale TLB
126-
// entries hanging around from the former user of the
127-
// partition. Flushing the TLB here is not quite the right thing
128-
// to do, since incorrectly cached entries could make even this
129-
// code not exist, but regrettably there is not a simple way for
130-
// the host to trigger flushing when it ought to happen, so for
131-
// now this works in practice, since the text segment is always
132-
// part of the big identity-mapped region at the base of the
133-
// guest.
134-
crate::paging::flush_tlb();
135-
136-
// Read the current TSC to report it to the host with the spans/events
137-
// This helps calculating the timestamps relative to the guest call
138-
#[cfg(feature = "trace_guest")]
139-
{
140-
let guest_start_tsc = hyperlight_guest_tracing::invariant_tsc::read_tsc();
141-
// Reset the trace state for the new guest function call with the new start TSC
142-
// This clears any existing spans/events from previous calls ensuring a clean state
143-
hyperlight_guest_tracing::new_call(guest_start_tsc);
144-
}
145119

146-
internal_dispatch_function();
147-
halt();
120+
// Ensure that any tracing output during the call is flushed to
121+
// the host, if necessary.
122+
#[cfg(all(feature = "trace_guest", target_arch = "x86_64"))]
123+
hyperlight_guest_tracing::flush();
148124
}

src/hyperlight_guest_bin/src/lib.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ extern crate alloc;
2020

2121
use core::fmt::Write;
2222

23+
use arch::dispatch::dispatch_function;
2324
use buddy_system_allocator::LockedHeap;
24-
use guest_function::call::dispatch_function;
2525
use guest_function::register::GuestFunctionRegister;
2626
use guest_logger::init_logger;
2727
use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode;
@@ -235,6 +235,11 @@ pub(crate) extern "C" fn generic_init(
235235
hyperlight_main();
236236
}
237237

238+
// Ensure that any tracing output from the initialisation phase is
239+
// flushed to the host, if necessary.
240+
#[cfg(all(feature = "trace_guest", target_arch = "x86_64"))]
241+
hyperlight_guest_tracing::flush();
242+
238243
dispatch_function as usize as u64
239244
}
240245

src/hyperlight_guest_bin/src/paging.rs

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -144,22 +144,3 @@ pub fn virt_to_phys(gva: vmem::VirtAddr) -> impl Iterator<Item = vmem::Mapping>
144144
pub fn phys_to_virt(gpa: vmem::PhysAddr) -> Option<*mut u8> {
145145
GuestMappingOperations::new().try_phys_to_virt(gpa)
146146
}
147-
148-
pub fn flush_tlb() {
149-
// Currently this just always flips CR4.PGE back and forth to
150-
// trigger a tlb flush. We should use a faster approach where
151-
// available
152-
let mut orig_cr4: u64;
153-
unsafe {
154-
asm!("mov {}, cr4", out(reg) orig_cr4);
155-
}
156-
let tmp_cr4: u64 = orig_cr4 ^ (1 << 7); // CR4.PGE
157-
unsafe {
158-
asm!(
159-
"mov cr4, {}",
160-
"mov cr4, {}",
161-
in(reg) tmp_cr4,
162-
in(reg) orig_cr4
163-
);
164-
}
165-
}

src/hyperlight_host/src/hypervisor/hyperlight_vm.rs

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ pub(crate) struct HyperlightVm {
104104

105105
mmap_regions: Vec<(u32, MemoryRegion)>, // Later mapped regions (slot number, region)
106106

107+
pending_tlb_flush: bool,
108+
107109
#[cfg(gdb)]
108110
gdb_conn: Option<DebugCommChannel<DebugResponse, DebugMsg>>,
109111
#[cfg(gdb)]
@@ -421,6 +423,8 @@ impl HyperlightVm {
421423

422424
mmap_regions: Vec::new(),
423425

426+
pending_tlb_flush: false,
427+
424428
#[cfg(gdb)]
425429
gdb_conn,
426430
#[cfg(gdb)]
@@ -662,17 +666,25 @@ impl HyperlightVm {
662666
let NextAction::Call(dispatch_func_addr) = self.entrypoint else {
663667
return Err(DispatchGuestCallError::Uninitialized);
664668
};
669+
let mut rflags = 1 << 1; // RFLAGS.1 is RES1
670+
if self.pending_tlb_flush {
671+
rflags |= 1 << 6; // set ZF if we need a tlb flush done before anything else executes
672+
self.pending_tlb_flush = false;
673+
}
665674
// set RIP and RSP, reset others
666675
let regs = CommonRegisters {
667676
rip: dispatch_func_addr,
668677
// We usually keep the top of the stack 16-byte
669-
// aligned. However, the ABI requirement is that the stack
670-
// be aligned _before a call instruction_, which means
671-
// that the stack needs to actually be ≡ 8 mod 16 at the
672-
// first instruction (since, on x64, a call instruction
673-
// automatically pushes a return address).
674-
rsp: self.rsp_gva - 8,
675-
rflags: 1 << 1,
678+
// aligned. Since the usual ABI requirement is that the
679+
// stack be aligned _before a call instruction_, one might
680+
// expect that the stack pointer here needs to actually be
681+
// ≡ 8 mod 16 at the first instruction (since, on x64, a
682+
// call instruction automatically pushes a return
683+
// address). However, the x64 entry stub in
684+
// hyperlight_guest::arch::dispatch handles this itself,
685+
// so we do use the aligned address here.
686+
rsp: self.rsp_gva,
687+
rflags,
676688
..Default::default()
677689
};
678690
self.vm
@@ -945,6 +957,7 @@ impl HyperlightVm {
945957
// to point to the new (relocated) page tables
946958
let mut sregs = *sregs;
947959
sregs.cr3 = cr3;
960+
self.pending_tlb_flush = true;
948961
self.vm.set_sregs(&sregs)?;
949962
}
950963
#[cfg(not(feature = "init-paging"))]

src/hyperlight_host/tests/integration_test.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -708,13 +708,13 @@ fn log_message() {
708708
// - internal_dispatch_function does a log::trace! in debug mode
709709
// - logs from trace level tracing spans created as logs because of the tracing `log` feature
710710
// - 4 from evolve call (hyperlight_main + halt)
711-
// - 16 from guest call (internal_dispatch_function + others)
711+
// - 8 from guest call (internal_dispatch_function + others)
712712
// and are multiplied because we make 6 calls to `log_test_messages`
713713
// NOTE: These numbers need to be updated if log messages or spans are added/removed
714714
let num_fixed_trace_log = if cfg!(debug_assertions) {
715-
(1 + 20) * 6
715+
(1 + 12) * 6
716716
} else {
717-
20 * 6
717+
12 * 6
718718
};
719719

720720
let tests = vec![

src/hyperlight_host/tests/sandbox_host_tests.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ use hyperlight_testing::simple_guest_as_string;
2626

2727
pub mod common; // pub to disable dead_code warning
2828
use crate::common::{
29-
with_all_sandboxes, with_all_sandboxes_cfg, with_all_sandboxes_with_writer, with_all_uninit_sandboxes,
29+
with_all_sandboxes, with_all_sandboxes_cfg, with_all_sandboxes_with_writer,
30+
with_all_uninit_sandboxes,
3031
};
3132

3233
#[test]

0 commit comments

Comments
 (0)