fix(virtq): adjust sizes for benchmarks

andreiltd · andreiltd · commit 0c36e8732dcc · 2026-04-13T15:06:14.000+02:00
Signed-off-by: Tomasz Andrzejak &lt;andreiltd@gmail.com&gt;
diff --git a/src/hyperlight_common/src/virtq/mod.rs b/src/hyperlight_common/src/virtq/mod.rs
@@ -896,14 +896,14 @@ mod tests {
     }
 
     #[test]
-    fn test_reclaim_then_poll_preserves_order() {
+    fn test_reclaim_discards_readonly_completions() {
         let ring = make_ring(8);
         let (mut producer, mut consumer, _) = make_test_producer(&ring);
 
         // Submit 3 entries: RO, RW, RO
-        let tok_ro1 = send_readonly(&mut producer, b"log1");
+        let _tok_ro1 = send_readonly(&mut producer, b"log1");
         let tok_rw = send_readwrite(&mut producer, b"call", 64);
-        let tok_ro2 = send_readonly(&mut producer, b"log2");
+        let _tok_ro2 = send_readonly(&mut producer, b"log2");
 
         // Consumer processes all 3
         let (_, c1) = consumer.poll(1024).unwrap().unwrap();
@@ -919,24 +919,16 @@ mod tests {
         let (_, c3) = consumer.poll(1024).unwrap().unwrap();
         consumer.complete(c3).unwrap(); // ack RO
 
-        // Reclaim all 3
+        // Reclaim all 3 - RO completions are discarded, only RW is buffered
         let count = producer.reclaim().unwrap();
         assert_eq!(count, 3);
 
-        // poll() returns them in order
-        let cqe1 = producer.poll().unwrap().unwrap();
-        assert_eq!(cqe1.token, tok_ro1);
-        assert!(cqe1.data.is_empty());
-
-        let cqe2 = producer.poll().unwrap().unwrap();
-        assert_eq!(cqe2.token, tok_rw);
-        assert_eq!(&cqe2.data[..], b"result");
-
-        let cqe3 = producer.poll().unwrap().unwrap();
-        assert_eq!(cqe3.token, tok_ro2);
-        assert!(cqe3.data.is_empty());
+        // poll() returns only the RW completion
+        let cqe = producer.poll().unwrap().unwrap();
+        assert_eq!(cqe.token, tok_rw);
+        assert_eq!(&cqe.data[..], b"result");
 
-        // No more
+        // No more - RO completions were discarded
         assert!(producer.poll().unwrap().is_none());
     }
 
@@ -973,11 +965,7 @@ mod tests {
         assert_eq!(&cqe2.data[..], b"reply");
     }
 
-    /// Regression test: reclaim + submit must not cause token collisions.
-    ///
-    /// Before the monotonic generation counter, Token wrapped the descriptor
-    /// ID which gets recycled. This caused stale pending completions to
-    /// match newly submitted entries with the same recycled descriptor ID.
+    /// reclaim + submit must not cause token collisions.
     #[test]
     fn test_reclaim_submit_no_token_collision() {
         let ring = make_ring(8);
@@ -989,7 +977,6 @@ mod tests {
         let (_, c) = consumer.poll(1024).unwrap().unwrap();
         consumer.complete(c).unwrap();
 
-        // Reclaim pushes the completion to pending (token = tok_old)
         let count = producer.reclaim().unwrap();
         assert_eq!(count, 1);
 
@@ -1010,15 +997,42 @@ mod tests {
         wc.write_all(b"result").unwrap();
         consumer.complete(wc.into()).unwrap();
 
-        // Poll should return the stale ReadOnly completion first (wrong token)
-        let cqe1 = producer.poll().unwrap().unwrap();
-        assert_eq!(cqe1.token, tok_old);
-        assert!(cqe1.data.is_empty());
+        // Poll returns only the RW completion (RO was discarded by reclaim)
+        let cqe = producer.poll().unwrap().unwrap();
+        assert_eq!(cqe.token, tok_new);
+        assert_eq!(&cqe.data[..], b"result");
 
-        // Then the new ReadWrite completion (matching token)
-        let cqe2 = producer.poll().unwrap().unwrap();
-        assert_eq!(cqe2.token, tok_new);
-        assert_eq!(&cqe2.data[..], b"result");
+        // No stale RO completion in the queue
+        assert!(producer.poll().unwrap().is_none());
+    }
+
+    /// Verify that repeated oneshot submit/reclaim cycles do not accumulate pending completions.
+    #[test]
+    fn test_reclaim_readonly_does_not_leak_pending() {
+        let ring = make_ring(4);
+        let (mut producer, mut consumer, _) = make_test_producer(&ring);
+
+        for _ in 0..10 {
+            // Fill the ring
+            for _ in 0..4 {
+                send_readonly(&mut producer, b"msg");
+            }
+
+            // Consumer acks all
+            while let Some((_, completion)) = consumer.poll(1024).unwrap() {
+                consumer.complete(completion).unwrap();
+            }
+
+            // Reclaim frees ring slots; empty completions are discarded
+            let count = producer.reclaim().unwrap();
+            assert_eq!(count, 4);
+
+            // No completions should be buffered in pending
+            assert!(
+                producer.poll().unwrap().is_none(),
+                "pending should be empty after reclaiming RO entries"
+            );
+        }
     }
 }
 #[cfg(all(test, loom))]
diff --git a/src/hyperlight_common/src/virtq/producer.rs b/src/hyperlight_common/src/virtq/producer.rs
@@ -34,6 +34,10 @@ pub struct RecvCompletion {
     pub token: Token,
     /// Completion data from the device.
     pub data: Bytes,
+    /// Whether this entry is oneshot so there is no writable completion buffer.
+    /// Oneshot entries are fire-and-forget: the producer does not
+    /// expect any response data from the device.
+    pub oneshot: bool,
 }
 
 /// Allocation tracking for an in-flight descriptor chain.
@@ -146,15 +150,16 @@ where
     /// * `pool` - Buffer allocator for entry/completion data
     pub fn new(layout: Layout, mem: M, notifier: N, pool: P) -> Self {
         let inner = RingProducer::new(layout, mem);
-        let inflight = vec![None; inner.len()];
+        let ring_len = inner.len();
+        let inflight = vec![None; ring_len];
 
         Self {
             inner,
             pool,
             notifier,
             inflight,
             next_token: 0,
-            pending: VecDeque::new(),
+            pending: VecDeque::with_capacity(ring_len),
         }
     }
 
@@ -192,6 +197,9 @@ where
     /// buffer allocations immediately, and buffers completion data for
     /// later retrieval via [`poll`](Self::poll).
     ///
+    /// Completions with empty data from read-only/oneshot entries are
+    /// discarded immediately.
+    ///
     /// Use this to free resources under backpressure without losing
     /// completion data. Returns the number of entries reclaimed.
     pub fn reclaim(&mut self) -> Result<usize, VirtqError>
@@ -201,7 +209,11 @@ where
     {
         let mut count = 0;
         while let Some(cqe) = self.poll_ring()? {
-            self.pending.push_back(cqe);
+            if !cqe.oneshot {
+                debug_assert!(self.pending.len() < self.inflight.len());
+                debug_assert!(!cqe.data.is_empty());
+                self.pending.push_back(cqe);
+            }
             count += 1;
         }
         Ok(count)
@@ -242,6 +254,7 @@ where
         }
 
         // Read completion data
+        let has_completion = inf.completion().is_some();
         let data = match inf.completion() {
             Some(buf) => {
                 if written > buf.len {
@@ -259,7 +272,11 @@ where
             None => Bytes::new(),
         };
 
-        Ok(Some(RecvCompletion { token, data }))
+        Ok(Some(RecvCompletion {
+            token,
+            data,
+            oneshot: !has_completion,
+        }))
     }
 
     /// Drain all available completions, calling the provided closure for each.
diff --git a/src/hyperlight_common/src/virtq/ring.rs b/src/hyperlight_common/src/virtq/ring.rs
@@ -350,11 +350,14 @@ impl RingCursor {
         }
     }
 
-    /// Advance by n positions
+    /// Advance by n positions using modular arithmetic.
     #[inline]
     pub(crate) fn advance_by(&mut self, n: u16) {
-        for _ in 0..n {
-            self.advance();
+        let new = self.head + n;
+        let wraps = new / self.size;
+        self.head = new % self.size;
+        if wraps % 2 != 0 {
+            self.wrap = !self.wrap;
         }
     }
 
@@ -371,6 +374,7 @@ impl RingCursor {
     }
 
     /// Reset cursor to initial state.
+    #[inline]
     pub fn reset(&mut self) {
         self.head = 0;
         self.wrap = true;
@@ -962,7 +966,7 @@ impl<M: MemOps> RingConsumer<M> {
             return Err(RingError::WouldBlock);
         }
 
-        // Build chain (head + tails).
+        // Build chain (head + tails), tracking readable/writable split inline.
         let mut elements = SmallVec::<[BufferElement; 16]>::new();
         let mut pos = self.avail_cursor;
         let mut chain_len: u16 = 1;
@@ -972,7 +976,10 @@ impl<M: MemOps> RingConsumer<M> {
 
         let max_steps = self.desc_table.len();
 
-        elements.push(BufferElement::from(&head_desc));
+        let head_elem = BufferElement::from(&head_desc);
+        let mut seen_writable = head_elem.writable;
+        let mut writables: usize = if seen_writable { 1 } else { 0 };
+        elements.push(head_elem);
         pos.advance();
 
         while has_next && steps < max_steps {
@@ -982,8 +989,17 @@ impl<M: MemOps> RingConsumer<M> {
                 .ok_or(RingError::InvalidState)?;
 
             // tail reads does not need ordering because head has been already validated
-            let desc = self.mem.read_val(addr).map_err(|_| RingError::MemError)?;
-            elements.push(BufferElement::from(&desc));
+            let desc: Descriptor = self.mem.read_val(addr).map_err(|_| RingError::MemError)?;
+            let elem = BufferElement::from(&desc);
+
+            if elem.writable {
+                seen_writable = true;
+                writables += 1;
+            } else if seen_writable {
+                return Err(RingError::BadChain);
+            }
+
+            elements.push(elem);
 
             chain_len += 1;
             steps += 1;
@@ -997,8 +1013,7 @@ impl<M: MemOps> RingConsumer<M> {
             return Err(RingError::BadChain);
         }
 
-        // Verify that readable/writable split is correct
-        let readables = chain_readable_count(&elements)?;
+        let readables = elements.len() - writables;
 
         // Since driver wrote the same id everywhere, head_desc.id is valid.
         let id = head_desc.id;
@@ -1261,24 +1276,6 @@ pub fn ring_need_event(event_idx: u16, new: u16, old: u16) -> bool {
     new.wrapping_sub(event_idx).wrapping_sub(1) < new.wrapping_sub(old)
 }
 
-#[inline]
-/// Check that a buffer chain is well-formed: all readable buffers first,
-/// then writable and return the count of readable buffers.
-fn chain_readable_count(elems: &[BufferElement]) -> Result<usize, RingError> {
-    let mut seen_writable = false;
-    let mut writables = 0;
-
-    for e in elems {
-        if e.writable {
-            seen_writable = true;
-            writables += 1;
-        } else if seen_writable {
-            return Err(RingError::BadChain);
-        }
-    }
-
-    Ok(elems.len() - writables)
-}
 
 impl From<&Descriptor> for BufferElement {
     fn from(desc: &Descriptor) -> Self {
diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs
@@ -62,13 +62,13 @@ impl SandboxSize {
             Self::Medium => {
                 let mut cfg = SandboxConfiguration::default();
                 cfg.set_heap_size(MEDIUM_HEAP_SIZE);
-                cfg.set_scratch_size(0x50000);
+                cfg.set_scratch_size(0x80000);
                 Some(cfg)
             }
             Self::Large => {
                 let mut cfg = SandboxConfiguration::default();
                 cfg.set_heap_size(LARGE_HEAP_SIZE);
-                cfg.set_scratch_size(0x100000);
+                cfg.set_scratch_size(0x200000);
                 Some(cfg)
             }
         }
@@ -384,10 +384,15 @@ fn guest_call_benchmark_large_param(c: &mut Criterion) {
         let large_vec = vec![0u8; SIZE];
         let large_string = String::from_utf8(large_vec.clone()).unwrap();
 
+        let h2g_pool_pages = (2 * SIZE + (1024 * 1024)) / 4096;
+        let heap_size = SIZE as u64 * 15;
+
         let mut config = SandboxConfiguration::default();
-        config.set_h2g_pool_pages((2 * SIZE + (1024 * 1024)) / 4096); // pool pages for the large input
-        config.set_heap_size(SIZE as u64 * 15);
-        config.set_scratch_size(6 * SIZE + 4 * (1024 * 1024)); // Big enough for any data copies, etc.
+        config.set_h2g_pool_pages(h2g_pool_pages);
+        config.set_h2g_queue_depth(h2g_pool_pages.next_power_of_two());
+        config.set_heap_size(heap_size);
+        // Scratch backs all guest physical pages (heap, page tables, pools).
+        config.set_scratch_size(heap_size as usize + 4 * 1024 * 1024);
 
         let sandbox = UninitializedSandbox::new(
             GuestBinary::FilePath(simple_guest_as_string().unwrap()),
diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs
@@ -1090,21 +1090,21 @@ mod tests {
         assert_eq!(res, 0);
     }
 
-    // Tests to ensure that many (1000) function calls can be made in a call context with a small stack (24K) and heap(20K).
+    // Tests to ensure that many (1000) function calls can be made in a call context with a small stack (24K) and heap(32K).
     // This test effectively ensures that the stack is being properly reset after each call and we are not leaking memory in the Guest.
     #[test]
     fn test_with_small_stack_and_heap() {
         let mut cfg = SandboxConfiguration::default();
-        cfg.set_heap_size(20 * 1024);
+        cfg.set_heap_size(32 * 1024);
         // min_scratch_size already includes 1 page (4k on most
         // platforms) of guest stack, so add 20k more to get 24k
         // total, and then add some more for the eagerly-copied page
-        // tables on amd64
+        // tables on amd64 and virtq pool pages.
         let min_scratch = hyperlight_common::layout::min_scratch_size(
             cfg.get_g2h_queue_depth(),
             cfg.get_h2g_queue_depth(),
         );
-        cfg.set_scratch_size(min_scratch + 0x10000 + 0x10000);
+        cfg.set_scratch_size(min_scratch + 0x10000 + 0x18000);
 
         let mut sbox1: MultiUseSandbox = {
             let path = simple_guest_as_string().unwrap();
@@ -1718,7 +1718,7 @@ mod tests {
 
         for (name, heap_size) in test_cases {
             let mut cfg = SandboxConfiguration::default();
-            cfg.set_heap_size(heap_size);
+            cfg.set_heap_size(128 * 1024);
             cfg.set_scratch_size(0x100000);
 
             let path = simple_guest_as_string().unwrap();