Skip to content

Commit e991421

Browse files
Wait for database to load before returning schema (#4551)
## Summary When hitting `/v1/schema` while a database is still loading (replaying the log, running init reducers, etc.), the endpoint returned a 500 error because the module host was not yet available. ## Changes - Add `Host::wait_for_module(timeout)` in `crates/client-api/src/lib.rs` -- polls `get_module_host` with exponential backoff (100ms, 200ms, 400ms, 800ms, 1s, 1s, ...) up to the given timeout - Update the `/v1/schema` route to use `wait_for_module(10s)` instead of the immediate `module()` call If the database finishes loading within 10 seconds, the schema is returned normally. If it does not load in time, the existing 500 error is returned (same behavior as before, just delayed). No other routes are changed -- this is scoped to the schema endpoint per the issue description. Other routes (SQL, call, etc.) could adopt the same pattern if needed. Fixes clockworklabs/SpacetimeDBPrivate#2748 Co-authored-by: clockwork-labs-bot <clockwork-labs-bot@users.noreply.github.com>
1 parent cacfd73 commit e991421

2 files changed

Lines changed: 31 additions & 1 deletion

File tree

crates/client-api/src/lib.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,29 @@ impl Host {
107107
self.host_controller.get_module_host(self.replica_id).await
108108
}
109109

110+
/// Wait for the module host to become available, retrying with backoff.
111+
///
112+
/// This is useful for routes like `/schema` that may be called while the
113+
/// database is still loading. Instead of returning an immediate 500, we
114+
/// poll for up to `timeout` before giving up.
115+
pub async fn wait_for_module(&self, timeout: std::time::Duration) -> Result<ModuleHost, NoSuchModule> {
116+
let deadline = tokio::time::Instant::now() + timeout;
117+
let mut interval = tokio::time::Duration::from_millis(100);
118+
loop {
119+
match self.host_controller.get_module_host(self.replica_id).await {
120+
Ok(module) => return Ok(module),
121+
Err(NoSuchModule) => {
122+
if tokio::time::Instant::now() >= deadline {
123+
return Err(NoSuchModule);
124+
}
125+
tokio::time::sleep(interval).await;
126+
// Exponential backoff: 100ms, 200ms, 400ms, 800ms, 1s, 1s, ...
127+
interval = (interval * 2).min(tokio::time::Duration::from_secs(1));
128+
}
129+
}
130+
}
131+
}
132+
110133
pub async fn module_watcher(&self) -> Result<watch::Receiver<ModuleHost>, NoSuchModule> {
111134
self.host_controller.watch_module_host(self.replica_id).await
112135
}

crates/client-api/src/routes/database.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,14 @@ pub async fn schema<S>(
341341
where
342342
S: ControlStateDelegate + NodeDelegate,
343343
{
344-
let (module, _) = find_module_and_database(&worker_ctx, name_or_identity).await?;
344+
let (leader, _) = find_leader_and_database(&worker_ctx, name_or_identity).await?;
345+
// Wait for the module to finish loading rather than returning an immediate
346+
// 500 error. The database may still be initializing (replaying the log,
347+
// running init reducers, etc.).
348+
let module = leader
349+
.wait_for_module(std::time::Duration::from_secs(10))
350+
.await
351+
.map_err(log_and_500)?;
345352

346353
let module_def = &module.info.module_def;
347354
let response_json = match version {

0 commit comments

Comments
 (0)