Skip to content

Commit 24a6658

Browse files
authored
core: Attempt to repair databases with wrong host type (#4619)
Prior to #4549 the host type in `st_module` was always set to wasm. We now correctly use the host type from the database, but the module may in fact be a JS module. So if launching it as a wasm module fails, try JS instead. If this succeeds, the module is definitely a JS module, so attempt to repair `st_module` in this case. # Expected complexity level and risk 2 # Testing - [x] Added smoketest
1 parent 18d4fbc commit 24a6658

6 files changed

Lines changed: 127 additions & 21 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/core/src/db/relational_db.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use crate::db::durability::DurabilityWorker;
22
use crate::db::MetricsRecorderQueue;
33
use crate::error::{DBError, RestoreSnapshotError};
4-
use crate::messages::control_db::HostType;
54
use crate::subscription::ExecutionCounters;
65
use crate::util::asyncify;
76
use crate::worker_metrics::WORKER_METRICS;
@@ -464,7 +463,7 @@ impl RelationalDB {
464463
/// The caller must ensure that:
465464
///
466465
/// - `program.hash` is the [`Hash`] over `program.bytes`.
467-
/// - `program.bytes` is a valid module acc. to `host_type`.
466+
/// - `program.bytes` is a valid module acc. to `program.host_type`.
468467
/// - the schema updates contained in the module have been applied within
469468
/// the transactional context `tx`.
470469
/// - the `__init__` reducer contained in the module has been executed
@@ -1861,6 +1860,7 @@ fn default_row_count_fn(db: Identity) -> RowCountFn {
18611860
pub mod tests_utils {
18621861
use crate::db::snapshot;
18631862
use crate::db::snapshot::SnapshotWorker;
1863+
use crate::messages::control_db::HostType;
18641864

18651865
use super::*;
18661866
use core::ops::Deref;

crates/core/src/host/host_controller.rs

Lines changed: 86 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ use spacetimedb_data_structures::error_stream::ErrorStream;
3030
use spacetimedb_data_structures::map::{IntMap, IntSet};
3131
use spacetimedb_datastore::db_metrics::data_size::DATA_SIZE_METRICS;
3232
use spacetimedb_datastore::db_metrics::DB_METRICS;
33+
use spacetimedb_datastore::execution_context::Workload;
34+
use spacetimedb_datastore::system_tables::ModuleKind;
3335
use spacetimedb_datastore::traits::Program;
3436
use spacetimedb_durability::{self as durability};
3537
use spacetimedb_lib::{AlgebraicValue, Identity, Timestamp};
@@ -898,7 +900,6 @@ impl Host {
898900
bsatn_rlb_pool,
899901
..
900902
} = host_controller;
901-
let on_panic = host_controller.unregister_fn(replica_id);
902903
let replica_dir = data_dir.replica(replica_id);
903904
let (tx_metrics_queue, tx_metrics_recorder_task) = spawn_tx_metrics_recorder();
904905

@@ -947,7 +948,7 @@ impl Host {
947948
(db, clients)
948949
}
949950
};
950-
let (program, program_needs_init) = match db.program()? {
951+
let (mut program, program_needs_init) = match db.program()? {
951952
// Launch module with program from existing database.
952953
Some(program) => {
953954
info!(
@@ -974,23 +975,89 @@ impl Host {
974975
}
975976
};
976977

977-
let (program, launched) = ModuleLauncher {
978-
database,
979-
replica_id,
980-
program,
981-
on_panic,
982-
relational_db: Arc::new(db),
983-
energy_monitor: energy_monitor.clone(),
984-
module_logs: match config.storage {
985-
db::Storage::Memory => None,
986-
db::Storage::Disk => Some(replica_dir.module_logs()),
987-
},
988-
runtimes: runtimes.clone(),
989-
core: host_controller.db_cores.take(),
990-
bsatn_rlb_pool: bsatn_rlb_pool.clone(),
991-
}
992-
.launch_module()
993-
.await?;
978+
let relational_db = Arc::new(db);
979+
let (program, launched) = match HostType::from(program.kind) {
980+
HostType::Js => {
981+
ModuleLauncher {
982+
database,
983+
replica_id,
984+
program,
985+
on_panic: host_controller.unregister_fn(replica_id),
986+
relational_db,
987+
energy_monitor: energy_monitor.clone(),
988+
module_logs: match config.storage {
989+
db::Storage::Memory => None,
990+
db::Storage::Disk => Some(replica_dir.module_logs()),
991+
},
992+
runtimes: runtimes.clone(),
993+
core: host_controller.db_cores.take(),
994+
bsatn_rlb_pool: bsatn_rlb_pool.clone(),
995+
}
996+
.launch_module()
997+
.await?
998+
}
999+
HostType::Wasm => {
1000+
// Prior to https://github.com/clockworklabs/SpacetimeDB/pull/4549
1001+
// the host type in `st_module` was always set to wasm.
1002+
// We now correctly use the host type from the database, but the
1003+
// module may in fact be a JS module.
1004+
// So if launching it as a wasm module fails, try JS instead.
1005+
// If this succeeds, the module is definitely a JS module, so
1006+
// attempt to repair `st_module` in this case.
1007+
//
1008+
// TODO: This code should eventually be removed once all
1009+
// databases have been repaired.
1010+
let launch_wasm_result = ModuleLauncher {
1011+
database: database.clone(),
1012+
replica_id,
1013+
program: program.clone(),
1014+
on_panic: host_controller.unregister_fn(replica_id),
1015+
relational_db: relational_db.clone(),
1016+
energy_monitor: energy_monitor.clone(),
1017+
module_logs: match config.storage {
1018+
db::Storage::Memory => None,
1019+
db::Storage::Disk => Some(replica_dir.clone().module_logs()),
1020+
},
1021+
runtimes: runtimes.clone(),
1022+
core: host_controller.db_cores.take(),
1023+
bsatn_rlb_pool: bsatn_rlb_pool.clone(),
1024+
}
1025+
.launch_module()
1026+
.await;
1027+
match launch_wasm_result {
1028+
Ok(program_and_module_host) => program_and_module_host,
1029+
Err(e) => {
1030+
warn!("failed to launch wasm module, trying js: {e:#}");
1031+
1032+
program.kind = ModuleKind::JS;
1033+
let res = ModuleLauncher {
1034+
database,
1035+
replica_id,
1036+
program: program.clone(),
1037+
on_panic: host_controller.unregister_fn(replica_id),
1038+
relational_db: relational_db.clone(),
1039+
energy_monitor: energy_monitor.clone(),
1040+
module_logs: match config.storage {
1041+
db::Storage::Memory => None,
1042+
db::Storage::Disk => Some(replica_dir.module_logs()),
1043+
},
1044+
runtimes: runtimes.clone(),
1045+
core: host_controller.db_cores.take(),
1046+
bsatn_rlb_pool: bsatn_rlb_pool.clone(),
1047+
}
1048+
.launch_module()
1049+
.await;
1050+
1051+
if res.is_ok() {
1052+
let _ = relational_db
1053+
.with_auto_commit(Workload::Internal, |tx| relational_db.update_program(tx, program));
1054+
}
1055+
1056+
res?
1057+
}
1058+
}
1059+
}
1060+
};
9941061

9951062
if program_needs_init {
9961063
let call_result = launched.module_host.init_database(program).await?;

crates/datastore/src/traits.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ pub struct Metadata {
500500
}
501501

502502
/// Program associated with a database.
503+
#[derive(Clone)]
503504
pub struct Program {
504505
/// Hash over the program's bytes.
505506
pub hash: Hash,

crates/smoketests/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ anyhow.workspace = true
1515
which = "8.0.0"
1616

1717
[dev-dependencies]
18+
spacetimedb-core.workspace = true
1819
cargo_metadata.workspace = true
1920
assert_cmd = "2"
2021
predicates = "3"

crates/smoketests/tests/smoketests/change_host_type.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use spacetimedb::messages::control_db::HostType;
12
use spacetimedb_smoketests::{require_local_server, require_pnpm, Smoketest};
23

34
const TS_MODULE_BASIC: &str = r#"import { schema, t, table } from "spacetimedb/server";
@@ -83,3 +84,38 @@ fn assert_has_rows(test: &Smoketest, names: &[&str], context: &str) {
8384
"{context}: expected all of {names:?} to be in result: {output}"
8485
)
8586
}
87+
88+
/// Tests that a legacy database that has a wrong host type in `st_module` is
89+
/// auto-repaired upon startup.
90+
///
91+
/// NOTE: The repair mechanism shall be removed eventually, and so shall this
92+
/// test (which will fail when the mechanism is sunset).
93+
///
94+
/// This test restarts the server.
95+
#[test]
96+
fn test_repair_host_type() {
97+
require_pnpm!();
98+
require_local_server!();
99+
100+
let mut test = Smoketest::builder().autopublish(false).build();
101+
102+
test.publish_typescript_module_source("modules-basic-ts", "basic-ts", TS_MODULE_BASIC)
103+
.unwrap();
104+
assert_host_type(&test, HostType::Js);
105+
// Set the program kind to the wrong value.
106+
test.sql_confirmed("update st_module set program_kind=0").unwrap();
107+
assert_host_type(&test, HostType::Wasm);
108+
109+
// After restarting, the database both comes up and has the right host type.
110+
test.restart_server();
111+
assert_host_type(&test, HostType::Js);
112+
}
113+
114+
fn assert_host_type(test: &Smoketest, host_type: HostType) {
115+
let output = test.sql_confirmed("select program_kind from st_module").unwrap();
116+
let rows = output.lines().skip(2).map(|s| s.trim()).collect::<Vec<_>>();
117+
match host_type {
118+
HostType::Wasm => assert_eq!(&rows, &["0"]),
119+
HostType::Js => assert_eq!(&rows, &["1"]),
120+
}
121+
}

0 commit comments

Comments
 (0)