Skip to content

Commit 44b770f

Browse files
committed
[trace] Walk page table entries to get the tracing data
- With the new Copy-on-Write changes, the guest virtual addresses and guest physical addresses are no longer identity mapped, so we need a way to do this translation when a new guest trace batch arrives from the guest Signed-off-by: Doru Blânzeanu <dblnz@pm.me>
1 parent 4e0ea22 commit 44b770f

7 files changed

Lines changed: 181 additions & 68 deletions

File tree

src/hyperlight_common/src/flatbuffer_wrappers/guest_trace_data.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,15 @@ use crate::flatbuffers::hyperlight::generated::{
4040
OpenSpanTypeArgs as FbOpenSpanTypeArgs,
4141
};
4242

43+
/// TODO: Change these constant to be configurable at runtime by the guest
44+
/// Maybe use a weak symbol that the guest can override at link time?
45+
///
46+
/// Pre-calculated capacity for the encoder buffer
47+
/// This is to avoid reallocations in the guest
48+
/// If the next event would exceed this size, the encoder will flush the current buffer to the host
49+
/// before encoding the new event.
50+
pub const MAX_TRACE_DATA_SIZE: usize = 4096;
51+
4352
/// Key-Value pair structure used in tracing spans/events
4453
#[derive(Debug, Clone, PartialEq, Eq)]
4554
pub struct EventKeyValue {

src/hyperlight_common/src/vmem.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,8 @@ pub struct Mapping {
222222
/// are being remapped, TLB invalidation may need to be performed
223223
/// afterwards.
224224
pub use arch::map;
225-
/// This function is not presently used for anything, but is useful
226-
/// for debugging
225+
/// This function is presently used for reading the tracing data, also
226+
/// it is useful for debugging
227227
///
228228
/// # Safety
229229
/// This function traverses page table data structures, and should not

src/hyperlight_guest_tracing/src/state.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use alloc::vec::Vec;
2020
use core::sync::atomic::{AtomicU64, Ordering};
2121

2222
use hyperlight_common::flatbuffer_wrappers::guest_trace_data::{
23-
EventsBatchEncoder, EventsEncoder, GuestEvent,
23+
EventsBatchEncoder, EventsEncoder, GuestEvent, MAX_TRACE_DATA_SIZE,
2424
};
2525
use hyperlight_common::outb::OutBAction;
2626
use tracing_core::Event;
@@ -43,12 +43,6 @@ pub(crate) struct GuestState {
4343
stack: Vec<u64>,
4444
}
4545

46-
/// TODO: Change these constant to be configurable at runtime by the guest
47-
/// Maybe use a weak symbol that the guest can override at link time?
48-
///
49-
/// Pre-calculated capacity for the encoder buffer
50-
/// This is to avoid reallocations in the guest
51-
const ENCODER_CAPACITY: usize = 4096;
5246
/// Start with a stack capacity for active spans
5347
const ACTIVE_SPANS_CAPACITY: usize = 64;
5448

@@ -69,7 +63,7 @@ fn send_to_host(data: &[u8]) {
6963

7064
impl GuestState {
7165
pub(crate) fn new(guest_start_tsc: u64) -> Self {
72-
let mut encoder = EventsBatchEncoder::new(ENCODER_CAPACITY, send_to_host);
66+
let mut encoder = EventsBatchEncoder::new(MAX_TRACE_DATA_SIZE, send_to_host);
7367
encoder.encode(&GuestEvent::GuestStart {
7468
tsc: guest_start_tsc,
7569
});

src/hyperlight_host/src/hypervisor/hyperlight_vm.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,8 @@ pub enum RunVmError {
191191
DebugHandler(#[from] HandleDebugError),
192192
#[error("Execution was cancelled by the host")]
193193
ExecutionCancelledByHost,
194+
#[error("Failed to access page: {0}")]
195+
PageTableAccess(AccessPageTableError),
194196
#[cfg(feature = "trace_guest")]
195197
#[error("Failed to get registers: {0}")]
196198
GetRegs(RegisterError),
@@ -754,10 +756,18 @@ impl HyperlightVm {
754756
tc.end_host_trace();
755757
// Handle the guest trace data if any
756758
let regs = self.vm.regs().map_err(RunVmError::GetRegs)?;
757-
if let Err(e) = tc.handle_trace(&regs, mem_mgr) {
758-
// If no trace data is available, we just log a message and continue
759-
// Is this the right thing to do?
760-
log::debug!("Error handling guest trace: {:?}", e);
759+
760+
// Only parse the trace if it has reported
761+
if tc.has_trace_data(&regs) {
762+
let root_pt = self.get_root_pt().map_err(RunVmError::PageTableAccess)?;
763+
764+
// If something goes wrong with parsing the trace data, we log the error and
765+
// continue execution instead of returning an error since this is not critical
766+
// to correct execution of the guest
767+
tc.handle_trace(&regs, mem_mgr, root_pt)
768+
.unwrap_or_else(|e| {
769+
tracing::error!("Cannot handle trace data: {}", e);
770+
});
761771
}
762772
}
763773
result

src/hyperlight_host/src/mem/mgr.rs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,117 @@ impl SandboxMemoryManager<HostSharedMemory> {
411411

412412
Ok(())
413413
}
414+
415+
/// Read guest memory at a Guest Virtual Address (GVA) by walking the
416+
/// page tables to translate GVA → GPA, then reading from the correct
417+
/// backing memory (shared_mem or scratch_mem).
418+
///
419+
/// This is necessary because with Copy-on-Write (CoW) the guest's
420+
/// virtual pages are backed by physical pages in the scratch
421+
/// region rather than being identity-mapped.
422+
///
423+
/// # Arguments
424+
/// * `gva` - The Guest Virtual Address to read from
425+
/// * `len` - The number of bytes to read
426+
/// * `root_pt` - The root page table physical address (CR3)
427+
#[cfg(feature = "trace_guest")]
428+
pub(crate) fn read_guest_memory_by_gva(
429+
&mut self,
430+
gva: u64,
431+
len: usize,
432+
root_pt: u64,
433+
) -> Result<Vec<u8>> {
434+
use hyperlight_common::vmem::PAGE_SIZE;
435+
436+
use crate::sandbox::snapshot::{SharedMemoryPageTableBuffer, access_gpa};
437+
438+
let scratch_size = self.scratch_mem.mem_size();
439+
440+
self.shared_mem.with_exclusivity(|snap| {
441+
self.scratch_mem.with_exclusivity(|scratch| {
442+
let pt_buf = SharedMemoryPageTableBuffer::new(snap, scratch, scratch_size, root_pt);
443+
444+
// Walk page tables to get all mappings that cover the GVA range
445+
let mappings: Vec<_> = unsafe {
446+
hyperlight_common::vmem::virt_to_phys(&pt_buf, gva, len as u64)
447+
}
448+
.collect();
449+
450+
if mappings.is_empty() {
451+
return Err(new_error!(
452+
"No page table mappings found for GVA {:#x} (len {})",
453+
gva,
454+
len,
455+
));
456+
}
457+
458+
// Resulting vector of bytes to return
459+
let mut result = Vec::with_capacity(len);
460+
let mut current_gva = gva;
461+
462+
for mapping in &mappings {
463+
// The page table walker should only return valid mappings
464+
// that cover our current read position.
465+
if mapping.virt_base > current_gva {
466+
return Err(new_error!(
467+
"Page table walker returned mapping with virt_base {:#x} > current read position {:#x}",
468+
mapping.virt_base,
469+
current_gva,
470+
));
471+
}
472+
473+
// Calculate the offset within this page where to start copying
474+
let page_offset = (current_gva - mapping.virt_base) as usize;
475+
476+
let bytes_remaining = len - result.len();
477+
let available_in_page = PAGE_SIZE - page_offset;
478+
let bytes_to_copy = bytes_remaining.min(available_in_page);
479+
480+
// Translate the GPA to host memory
481+
let gpa = mapping.phys_base + page_offset as u64;
482+
let (mem, offset) = access_gpa(snap, scratch, scratch_size, gpa)
483+
.ok_or_else(|| {
484+
new_error!(
485+
"Failed to resolve GPA {:#x} to host memory (GVA {:#x})",
486+
gpa,
487+
gva
488+
)
489+
})?;
490+
491+
let slice = mem
492+
.as_slice()
493+
.get(offset..offset + bytes_to_copy)
494+
.ok_or_else(|| {
495+
new_error!(
496+
"GPA {:#x} resolved to out-of-bounds host offset {} (need {} bytes)",
497+
gpa,
498+
offset,
499+
bytes_to_copy
500+
)
501+
})?;
502+
503+
result.extend_from_slice(slice);
504+
current_gva += bytes_to_copy as u64;
505+
}
506+
507+
if result.len() != len {
508+
tracing::error!(
509+
"Page table walker returned mappings that don't cover the full requested length: got {}, expected {}",
510+
result.len(),
511+
len,
512+
);
513+
return Err(new_error!(
514+
"Could not read full GVA range: got {} of {} bytes {:?}",
515+
result.len(),
516+
len,
517+
mappings
518+
));
519+
}
520+
521+
Ok(result)
522+
})
523+
})??
524+
}
414525
}
415526

416527
#[cfg(test)]

src/hyperlight_host/src/sandbox/snapshot.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ fn hash(memory: &[u8], regions: &[MemoryRegion]) -> Result<[u8; 32]> {
174174
Ok(hasher.finalize().into())
175175
}
176176

177-
fn access_gpa<'a>(
177+
pub(crate) fn access_gpa<'a>(
178178
snap: &'a ExclusiveSharedMemory,
179179
scratch: &'a ExclusiveSharedMemory,
180180
scratch_size: usize,
@@ -197,7 +197,7 @@ pub(crate) struct SharedMemoryPageTableBuffer<'a> {
197197
root: u64,
198198
}
199199
impl<'a> SharedMemoryPageTableBuffer<'a> {
200-
fn new(
200+
pub(crate) fn new(
201201
snap: &'a ExclusiveSharedMemory,
202202
scratch: &'a ExclusiveSharedMemory,
203203
scratch_size: usize,

src/hyperlight_host/src/sandbox/trace/context.rs

Lines changed: 41 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use std::collections::HashMap;
1818
use std::time::{Duration, Instant, SystemTime};
1919

2020
use hyperlight_common::flatbuffer_wrappers::guest_trace_data::{
21-
EventKeyValue, EventsBatchDecoder, EventsDecoder, GuestEvent,
21+
EventKeyValue, EventsBatchDecoder, EventsDecoder, GuestEvent, MAX_TRACE_DATA_SIZE,
2222
};
2323
use hyperlight_common::outb::OutBAction;
2424
use opentelemetry::global::BoxedSpan;
@@ -28,73 +28,56 @@ use tracing::span::{EnteredSpan, Span};
2828
use tracing_opentelemetry::OpenTelemetrySpanExt;
2929

3030
use crate::hypervisor::regs::CommonRegisters;
31-
use crate::mem::layout::SandboxMemoryLayout;
3231
use crate::mem::mgr::SandboxMemoryManager;
33-
use crate::mem::shared_mem::{HostSharedMemory, SharedMemory};
34-
use crate::{HyperlightError, Result, new_error};
32+
use crate::mem::shared_mem::HostSharedMemory;
33+
use crate::{Result, new_error};
3534

3635
/// Type that helps get the data from the guest provided the registers and memory access
3736
struct EventsBatch {
3837
events: Vec<GuestEvent>,
3938
}
4039

41-
impl
42-
TryFrom<(
43-
&CommonRegisters,
44-
&mut SandboxMemoryManager<HostSharedMemory>,
45-
)> for EventsBatch
46-
{
47-
type Error = HyperlightError;
48-
fn try_from(
49-
(regs, mem_mgr): (
50-
&CommonRegisters,
51-
&mut SandboxMemoryManager<HostSharedMemory>,
52-
),
40+
impl EventsBatch {
41+
/// Extract a batch of guest trace events from guest memory.
42+
///
43+
/// The guest passes the trace data pointer as a Guest Virtual Address (GVA)
44+
/// in register r9. With Copy-on-Write enabled, this GVA may not be
45+
/// identity-mapped to its physical address, so we walk the guest page
46+
/// tables to translate GVA → GPA before reading the data.
47+
///
48+
/// # Arguments
49+
/// * `regs` - The guest registers (r8 = magic, r9 = GVA pointer, r10 = length)
50+
/// * `mem_mgr` - The sandbox memory manager with access to shared and scratch memory
51+
/// * `root_pt` - The root page table physical address (CR3) for GVA translation
52+
fn from_regs(
53+
regs: &CommonRegisters,
54+
mem_mgr: &mut SandboxMemoryManager<HostSharedMemory>,
55+
root_pt: u64,
5356
) -> Result<Self> {
5457
let magic_no = regs.r8;
55-
let trace_data_ptr = regs.r9 as usize;
58+
let trace_data_gva = regs.r9;
5659
let trace_data_len = regs.r10 as usize;
5760

61+
// Validate the magic number to ensure the guest is providing trace data
5862
if magic_no != OutBAction::TraceBatch as u64 {
5963
return Err(new_error!("A TraceBatch is not present"));
6064
}
6165

62-
// Extract the GuestTraceData from guest memory
63-
// This involves:
64-
// 1. Using a mutable reference to the memory manager to get exclusive access to the shared memory.
65-
// This is necessary to ensure that no other part of the code is accessing the memory
66-
// while we are reading from it.
67-
// 2. Getting immutable access to the slice of memory that contains the GuestTraceData
68-
// 3. Parsing the slice into a GuestTraceData structure
69-
//
70-
// Error handling is done at each step to ensure that any issues are properly reported.
71-
// This includes logging errors for easier debugging.
72-
//
73-
// The reason for using `with_exclusivity` is to ensure that we have exclusive access
74-
// and avoid allocating new memory, which needs to be correctly aligned for the
75-
// flatbuffer parsing.
76-
let events = mem_mgr.shared_mem.with_exclusivity(|mem| {
77-
let buf_slice = mem
78-
.as_slice()
79-
// Adjust the pointer to be relative to the base address of the sandbox memory
80-
.get(
81-
trace_data_ptr - SandboxMemoryLayout::BASE_ADDRESS
82-
..trace_data_ptr - SandboxMemoryLayout::BASE_ADDRESS + trace_data_len,
83-
)
84-
// Convert the slice to a Result to handle the case where the slice is out of
85-
// bounds and return a proper error message and log the error.
86-
.ok_or_else(|| {
87-
tracing::error!("Failed to get guest trace batch slice from guest memory");
88-
new_error!("Failed to get guest trace batch slice from guest memory")
89-
})?;
66+
// Validate the length to prevent reading excessive memory
67+
if trace_data_len == 0 || trace_data_len > MAX_TRACE_DATA_SIZE {
68+
return Err(new_error!("Invalid TraceBatch length: {}", trace_data_len));
69+
}
9070

91-
let events = EventsBatchDecoder {}.decode(buf_slice).map_err(|e| {
92-
tracing::error!("Failed to deserialize guest trace events: {:?}", e);
93-
new_error!("Failed to deserialize guest trace events: {:?}", e)
94-
})?;
71+
// Read the trace data from guest memory by walking the page tables
72+
// to translate the GVA to physical addresses. This is necessary
73+
// because with CoW, guest virtual pages are backed by physical
74+
// pages in the scratch region rather than being identity-mapped.
75+
let buf = mem_mgr.read_guest_memory_by_gva(trace_data_gva, trace_data_len, root_pt)?;
9576

96-
Ok::<Vec<GuestEvent>, HyperlightError>(events)
97-
})??;
77+
let events = EventsBatchDecoder {}.decode(&buf).map_err(|e| {
78+
tracing::error!("Failed to deserialize guest trace events: {:?}", e);
79+
new_error!("Failed to deserialize guest trace events: {:?}", e)
80+
})?;
9881

9982
Ok(EventsBatch { events })
10083
}
@@ -215,13 +198,19 @@ impl TraceContext {
215198
+ Duration::from_micros(rel_start_us as u64))
216199
}
217200

201+
/// Check if the registers indicate that there is trace data to be handled.
202+
pub fn has_trace_data(&self, regs: &CommonRegisters) -> bool {
203+
regs.r8 == OutBAction::TraceBatch as u64
204+
}
205+
218206
pub fn handle_trace(
219207
&mut self,
220208
regs: &CommonRegisters,
221209
mem_mgr: &mut SandboxMemoryManager<HostSharedMemory>,
210+
root_pt: u64,
222211
) -> Result<()> {
223212
// Get the guest sent info
224-
let trace_batch = EventsBatch::try_from((regs, mem_mgr))?;
213+
let trace_batch = EventsBatch::from_regs(regs, mem_mgr, root_pt)?;
225214

226215
self.handle_trace_impl(trace_batch.events)
227216
}

0 commit comments

Comments
 (0)