core: Bounded channel for durability worker (#4652)

kim · jsdt · clockwork-labs-bot · web-flow · commit afd3f35994d9 · 2026-03-26T13:01:50.000Z
Use a bounded channel for submitting transactions to the durability
layer, so as to apply backpressure to the db when transaction volume is
too high.

---------

Co-authored-by: Jeffrey Dallatezza &lt;jeffreydallatezza@gmail.com&gt;
Co-authored-by: clockwork-labs-bot &lt;clockwork-labs-bot@users.noreply.github.com&gt;
diff --git a/crates/core/src/db/durability.rs b/crates/core/src/db/durability.rs
@@ -15,7 +15,7 @@ use tokio::{
     runtime,
     sync::{
         futures::OwnedNotified,
-        mpsc::{channel, unbounded_channel, Receiver, Sender, UnboundedReceiver, UnboundedSender},
+        mpsc::{self, channel, Receiver, Sender},
         oneshot, Notify,
     },
     time::timeout,
@@ -69,7 +69,7 @@ type ShutdownReply = oneshot::Sender<OwnedNotified>;
 /// [RelationalDB]: crate::db::relational_db::RelationalDB
 pub struct DurabilityWorker {
     database: Identity,
-    request_tx: UnboundedSender<DurabilityRequest>,
+    request_tx: Sender<DurabilityRequest>,
     shutdown: Sender<ShutdownReply>,
     durability: Arc<Durability>,
     runtime: runtime::Handle,
@@ -86,7 +86,7 @@ impl DurabilityWorker {
         next_tx_offset: TxOffset,
         reorder_window_size: NonZeroUsize,
     ) -> Self {
-        let (request_tx, request_rx) = unbounded_channel();
+        let (request_tx, request_rx) = channel(4 * 4096);
         let (shutdown_tx, shutdown_rx) = channel(1);
 
         let actor = DurabilityWorkerActor {
@@ -123,8 +123,8 @@ impl DurabilityWorker {
     /// this method is responsible only for reading its decision out of the `tx_data`
     /// and calling `durability.append_tx`.
     ///
-    /// This method does not block,
-    /// and sends the work to an actor that collects data and calls `durability.append_tx`.
+    /// This method sends the work to an actor that collects data and calls `durability.append_tx`.
+    /// It blocks if the queue is at capacity.
     ///
     /// # Panics
     ///
@@ -135,12 +135,40 @@ impl DurabilityWorker {
     /// - [Self::shutdown] was called
     ///
     pub fn request_durability(&self, reducer_context: Option<ReducerContext>, tx_data: &Arc<TxData>) {
-        self.request_tx
-            .send(DurabilityRequest {
-                reducer_context,
-                tx_data: tx_data.clone(),
-            })
-            .unwrap_or_else(|_| panic!("durability actor vanished database={}", self.database));
+        // We first try to send it without blocking.
+        match self.request_tx.try_reserve() {
+            Ok(permit) => {
+                permit.send(DurabilityRequest {
+                    reducer_context,
+                    tx_data: tx_data.clone(),
+                });
+            }
+            Err(mpsc::error::TrySendError::Closed(_)) => {
+                panic!("durability actor vanished database={}", self.database);
+            }
+            Err(mpsc::error::TrySendError::Full(_)) => {
+                // If the channel was full, we use the blocking version.
+                let start = std::time::Instant::now();
+                let send = || {
+                    self.request_tx.blocking_send(DurabilityRequest {
+                        reducer_context,
+                        tx_data: tx_data.clone(),
+                    })
+                };
+                if tokio::runtime::Handle::try_current().is_ok() {
+                    tokio::task::block_in_place(send)
+                } else {
+                    send()
+                }
+                .unwrap_or_else(|_| panic!("durability actor vanished database={}", self.database));
+                // We could cache this metric, but if we are already in the blocking code path,
+                // the extra time of looking up the metric is probably negligible.
+                WORKER_METRICS
+                    .durability_blocking_send_duration
+                    .with_label_values(&self.database)
+                    .observe(start.elapsed().as_secs_f64());
+            }
+        }
     }
 
     /// Get the [`DurableOffset`] of this database.
@@ -281,8 +309,8 @@ impl<T> ReorderWindow<T> {
     }
 }
 
-struct DurabilityWorkerActor {
-    request_rx: UnboundedReceiver<DurabilityRequest>,
+pub struct DurabilityWorkerActor {
+    request_rx: mpsc::Receiver<DurabilityRequest>,
     shutdown: Receiver<ShutdownReply>,
     durability: Arc<Durability>,
     reorder_window: ReorderWindow<DurabilityRequest>,
@@ -483,7 +511,7 @@ mod tests {
         }
     }
 
-    #[tokio::test]
+    #[tokio::test(flavor = "multi_thread")]
     async fn shutdown_waits_until_durable() {
         let durability = Arc::new(CountingDurability::default());
         let worker = DurabilityWorker::new(
diff --git a/crates/core/src/subscription/module_subscription_actor.rs b/crates/core/src/subscription/module_subscription_actor.rs
@@ -4272,7 +4272,7 @@ mod tests {
         Ok(())
     }
 
-    #[tokio::test]
+    #[tokio::test(flavor = "multi_thread")]
     async fn test_confirmed_reads() -> anyhow::Result<()> {
         let (db, durability) = relational_db_with_manual_durability(tokio::runtime::Handle::current())?;
 
diff --git a/crates/core/src/worker_metrics/mod.rs b/crates/core/src/worker_metrics/mod.rs
@@ -481,6 +481,12 @@ metrics_group!(
         #[labels(db: Identity, scan_type: str, table: str, unindexed_columns: str)]
         pub subscription_queries_total: IntCounterVec,
 
+        #[name = spacetime_durability_blocking_send_duration_sec]
+        #[help = "Latency of blocking sends in request_durability (seconds); _count gives the number of times the channel was full"]
+        #[labels(database_identity: Identity)]
+        #[buckets(0.001, 0.01, 0.1, 1.0, 10.0)]
+        pub durability_blocking_send_duration: HistogramVec,
+
         #[name = spacetime_durability_worker_reorder_window_length]
         #[help = "The number of transactions currently being held in the reorder window"]
         #[labels(db: Identity)]
diff --git a/crates/durability/src/imp/local.rs b/crates/durability/src/imp/local.rs
@@ -38,6 +38,8 @@ pub struct Options {
     /// transactions that are currently in the queue, but shrink the buffer to
     /// `batch_capacity` if it had to make additional space during a burst.
     ///
+    /// The internal queue of [Local] is bounded to `2 * batch_capacity`.
+    ///
     /// Default: 4096
     pub batch_capacity: NonZeroUsize,
     /// [`Commitlog`] configuration.
@@ -87,8 +89,8 @@ pub struct Local<T> {
     /// Backlog of transactions to be written to disk by the background
     /// [`PersisterTask`].
     ///
-    /// Note that this is unbounded!
-    queue: mpsc::UnboundedSender<Transaction<Txdata<T>>>,
+    /// The queue is bounded to `4 * Option::batch_capacity`.
+    queue: mpsc::Sender<Transaction<Txdata<T>>>,
     /// How many transactions are sitting in the `queue`.
     ///
     /// This is mainly for observability purposes, and can thus be updated with
@@ -126,7 +128,7 @@ impl<T: Encode + Send + Sync + 'static> Local<T> {
             opts.commitlog,
             on_new_segment,
         )?);
-        let (queue, txdata_rx) = mpsc::unbounded_channel();
+        let (queue, txdata_rx) = mpsc::channel(4 * opts.batch_capacity.get());
         let queue_depth = Arc::new(AtomicU64::new(0));
         let (durable_tx, durable_rx) = watch::channel(clog.max_committed_offset());
         let (shutdown_tx, shutdown_rx) = mpsc::channel(1);
@@ -207,7 +209,7 @@ impl<T: Encode + Send + Sync + 'static> Actor<T> {
     #[instrument(name = "durability::local::actor", skip_all)]
     async fn run(
         self,
-        mut transactions_rx: mpsc::UnboundedReceiver<Transaction<Txdata<T>>>,
+        mut transactions_rx: mpsc::Receiver<Transaction<Txdata<T>>>,
         mut shutdown_rx: mpsc::Receiver<oneshot::Sender<OwnedNotified>>,
     ) {
         info!("starting durability actor");
@@ -328,7 +330,22 @@ impl<T: Send + Sync + 'static> Durability for Local<T> {
     type TxData = Txdata<T>;
 
     fn append_tx(&self, tx: Transaction<Self::TxData>) {
-        self.queue.send(tx).expect("durability actor crashed");
+        match self.queue.try_reserve() {
+            Ok(permit) => permit.send(tx),
+            Err(mpsc::error::TrySendError::Closed(_)) => {
+                panic!("durability actor crashed");
+            }
+            Err(mpsc::error::TrySendError::Full(_)) => {
+                let send = || self.queue.blocking_send(tx);
+                if tokio::runtime::Handle::try_current().is_ok() {
+                    tokio::task::block_in_place(send)
+                } else {
+                    send()
+                }
+                .expect("durability actor crashed");
+            }
+        }
+
         self.queue_depth.fetch_add(1, Relaxed);
     }
 
diff --git a/crates/durability/tests/io/fallocate.rs b/crates/durability/tests/io/fallocate.rs
@@ -43,7 +43,7 @@ use tokio::{sync::watch, time::sleep};
 
 const MB: u64 = 1024 * 1024;
 
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread")]
 async fn local_durability_cannot_be_created_if_not_enough_space() -> anyhow::Result<()> {
     enable_logging();
 
@@ -72,7 +72,7 @@ async fn local_durability_cannot_be_created_if_not_enough_space() -> anyhow::Res
 // NOTE: This test is set up to proceed more or less sequentially.
 // In reality, `append_tx` will fail at some point in the future.
 // I.e. transactions can be lost when the host runs out of disk space.
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread")]
 #[should_panic = "durability actor crashed"]
 async fn local_durability_crashes_on_new_segment_if_not_enough_space() {
     enable_logging();
@@ -120,7 +120,7 @@ async fn local_durability_crashes_on_new_segment_if_not_enough_space() {
 /// without `fallocate`.
 ///
 /// Resuming a segment when there is insufficient space should fail.
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread")]
 async fn local_durability_crashes_on_resume_with_insuffient_space() -> anyhow::Result<()> {
     enable_logging();
 

Original file line number	Diff line number	Diff line change
`@@ -4272,7 +4272,7 @@ mod tests {`
`4272`	`4272`	`Ok(())`
`4273`	`4273`	`}`
`4274`	`4274`
`4275`		`- #[tokio::test]`
	`4275`	`+ #[tokio::test(flavor = "multi_thread")]`
`4276`	`4276`	`async fn test_confirmed_reads() -> anyhow::Result<()> {`
`4277`	`4277`	`let (db, durability) = relational_db_with_manual_durability(tokio::runtime::Handle::current())?;`
`4278`	`4278`