diff --git a/Cargo.lock b/Cargo.lock index ed7b7486..196e3f6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -750,7 +750,7 @@ dependencies = [ [[package]] name = "edgezero-adapter-fastly" version = "0.1.0" -source = "git+https://github.com/stackpop/edgezero?rev=170b74b#170b74bd2c9933b7d561f7ccdb67c53b239e9527" +source = "git+https://github.com/stackpop/edgezero?rev=38198f9839b70aef03ab971ae5876982773fc2a1#38198f9839b70aef03ab971ae5876982773fc2a1" dependencies = [ "anyhow", "async-stream", @@ -771,7 +771,7 @@ dependencies = [ [[package]] name = "edgezero-core" version = "0.1.0" -source = "git+https://github.com/stackpop/edgezero?rev=170b74b#170b74bd2c9933b7d561f7ccdb67c53b239e9527" +source = "git+https://github.com/stackpop/edgezero?rev=38198f9839b70aef03ab971ae5876982773fc2a1#38198f9839b70aef03ab971ae5876982773fc2a1" dependencies = [ "anyhow", "async-compression", @@ -799,7 +799,7 @@ dependencies = [ [[package]] name = "edgezero-macros" version = "0.1.0" -source = "git+https://github.com/stackpop/edgezero?rev=170b74b#170b74bd2c9933b7d561f7ccdb67c53b239e9527" +source = "git+https://github.com/stackpop/edgezero?rev=38198f9839b70aef03ab971ae5876982773fc2a1#38198f9839b70aef03ab971ae5876982773fc2a1" dependencies = [ "log", "proc-macro2", @@ -1576,9 +1576,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.29" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "log-fastly" @@ -2264,9 +2264,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.149" +version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ "itoa", "memchr", diff --git a/Cargo.toml b/Cargo.toml index 9f2f4c67..05a0eaf7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,10 +56,10 @@ config = "0.15.19" cookie = "0.18.1" derive_more = { version = "2.0", features = ["display", "error"] } ed25519-dalek = { version = "2.2", features = ["rand_core"] } -edgezero-adapter-axum = { git = "https://github.com/stackpop/edgezero", rev = "170b74b", default-features = false } -edgezero-adapter-cloudflare = { git = "https://github.com/stackpop/edgezero", rev = "170b74b", default-features = false } -edgezero-adapter-fastly = { git = "https://github.com/stackpop/edgezero", rev = "170b74b", default-features = false } -edgezero-core = { git = "https://github.com/stackpop/edgezero", rev = "170b74b", default-features = false } +edgezero-adapter-axum = { git = "https://github.com/stackpop/edgezero", rev = "38198f9839b70aef03ab971ae5876982773fc2a1", default-features = false } +edgezero-adapter-cloudflare = { git = "https://github.com/stackpop/edgezero", rev = "38198f9839b70aef03ab971ae5876982773fc2a1", default-features = false } +edgezero-adapter-fastly = { git = "https://github.com/stackpop/edgezero", rev = "38198f9839b70aef03ab971ae5876982773fc2a1", default-features = false } +edgezero-core = { git = "https://github.com/stackpop/edgezero", rev = "38198f9839b70aef03ab971ae5876982773fc2a1", default-features = false } error-stack = "0.6" fastly = "0.11.12" fern = "0.7.1" @@ -83,7 +83,7 @@ sha2 = "0.10.9" subtle = "2.6" temp-env = "0.3.6" tokio = { version = "1.49", features = ["sync", "macros", "io-util", "rt", "time"] } -toml = "1.0" +toml = "1.1" trusted-server-core = { path = "crates/trusted-server-core" } url = "2.5.8" urlencoding = "2.1" diff --git a/crates/integration-tests/Cargo.lock b/crates/integration-tests/Cargo.lock index 32d8f1db..766c472f 100644 --- a/crates/integration-tests/Cargo.lock +++ b/crates/integration-tests/Cargo.lock @@ -996,7 +996,7 @@ dependencies = [ [[package]] name = "edgezero-core" version = "0.1.0" -source = "git+https://github.com/stackpop/edgezero?rev=170b74b#170b74bd2c9933b7d561f7ccdb67c53b239e9527" +source = "git+https://github.com/stackpop/edgezero?rev=38198f9839b70aef03ab971ae5876982773fc2a1#38198f9839b70aef03ab971ae5876982773fc2a1" dependencies = [ "anyhow", "async-compression", @@ -1024,7 +1024,7 @@ dependencies = [ [[package]] name = "edgezero-macros" version = "0.1.0" -source = "git+https://github.com/stackpop/edgezero?rev=170b74b#170b74bd2c9933b7d561f7ccdb67c53b239e9527" +source = "git+https://github.com/stackpop/edgezero?rev=38198f9839b70aef03ab971ae5876982773fc2a1#38198f9839b70aef03ab971ae5876982773fc2a1" dependencies = [ "log", "proc-macro2", @@ -2142,9 +2142,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.29" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "lol_html" @@ -3410,9 +3410,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.149" +version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ "itoa", "memchr", diff --git a/crates/trusted-server-adapter-fastly/src/app.rs b/crates/trusted-server-adapter-fastly/src/app.rs new file mode 100644 index 00000000..f2ac0c8c --- /dev/null +++ b/crates/trusted-server-adapter-fastly/src/app.rs @@ -0,0 +1,1078 @@ +//! Full `EdgeZero` application wiring for Trusted Server. +//! +//! Registers all routes from the legacy [`crate::route_request`] into a +//! [`RouterService`]. On successful startup, attaches [`FinalizeResponseMiddleware`] +//! (outermost) and [`AuthMiddleware`] (inner). When startup fails, +//! [`startup_error_router`] returns a bare router without middleware. +//! Builds the [`AppState`] once per Wasm instance. +//! +//! `EdgeZero`'s current Fastly request context exposes client IP but not TLS +//! protocol or cipher metadata. `edgezero_main` injects a trusted `fastly-ssl` +//! header after stripping client-spoofable headers, so [`detect_request_scheme`] +//! in `http_util` can still derive the correct scheme for HTTPS traffic. +//! +//! # Route inventory +//! +//! | Method | Path pattern | Handler | +//! |--------|-------------|---------| +//! | GET | `/.well-known/trusted-server.json` | [`handle_trusted_server_discovery`] | +//! | POST | `/verify-signature` | [`handle_verify_signature`] | +//! | POST | `/_ts/admin/keys/rotate` | [`handle_rotate_key`] | +//! | POST | `/_ts/admin/keys/deactivate` | [`handle_deactivate_key`] | +//! | POST | `/admin/keys/rotate` (legacy alias) | [`handle_rotate_key`] | +//! | POST | `/admin/keys/deactivate` (legacy alias) | [`handle_deactivate_key`] | +//! | POST | `/_ts/api/v1/batch-sync` | [`handle_batch_sync`] | +//! | GET | `/_ts/api/v1/identify` | [`handle_identify`] | +//! | OPTIONS | `/_ts/api/v1/identify` | [`cors_preflight_identify`] | +//! | POST | `/auction` | [`handle_auction`] | +//! | GET | `/first-party/proxy` | [`handle_first_party_proxy`] | +//! | GET | `/first-party/click` | [`handle_first_party_click`] | +//! | GET | `/first-party/sign` | [`handle_first_party_proxy_sign`] | +//! | POST | `/first-party/sign` | [`handle_first_party_proxy_sign`] | +//! | POST | `/first-party/proxy-rebuild` | [`handle_first_party_proxy_rebuild`] | +//! | GET | `/` and `/{*rest}` | tsjs (if `/static/tsjs=` prefix), integration proxy, or publisher fallback | +//! | POST, HEAD, OPTIONS, PUT, PATCH, DELETE | `/` and `/{*rest}` | integration proxy or publisher fallback | +//! | POST, HEAD, OPTIONS, PUT, PATCH, DELETE | named paths above | publisher fallback (legacy parity for non-primary methods) | +//! +//! > **Note:** Methods not in the list above (e.g. `TRACE`, `CONNECT`, WebDAV verbs) return a +//! > router-level 405. Legacy routing proxied *every* method through to the publisher origin. +//! > This is a known intentional restriction of the EdgeZero router; the entry-point +//! > `apply_finalize_headers` call in `main.rs` still adds TS headers to those 405 responses. +//! +//! # EC identity lifecycle +//! +//! The `EdgeZero` path mirrors the EC identity lifecycle of the legacy +//! `route_request` (tracked in issue #495): +//! +//! - [`build_ec_request_state`] runs before every dispatched route (except +//! batch-sync, which uses Bearer auth) and reproduces the legacy +//! pre-routing prelude: device signals, bot gate, `ts-eids`/`sharedid` +//! cookie capture, geo lookup, [`EcContext`] creation, and KV-graph gating. +//! - `handle_auction` and integration proxy dispatch receive the same +//! [`EcContext`], [`KvIdentityGraph`], and [`PartnerRegistry`] inputs as +//! legacy; the publisher fallback generates EC IDs for browser navigations. +//! - Handlers attach an [`EcFinalizeState`] to the response via extensions; +//! `edgezero_main` pops it and runs `ec_finalize_response` plus the +//! pull-sync hook on the converted fastly response before sending. +//! +//! ## Intentional deviations from legacy +//! +//! - **401 auth challenges**: [`AuthMiddleware`] short-circuits before the +//! handler runs, so no EC state is built and `ec_finalize_response` does not +//! run on these responses. Legacy ran EC finalization on its own auth +//! challenges. Like the 401 geo-skip, this is privacy-conservative: no EC +//! cookies are issued to unauthenticated callers. +//! - **Streaming publisher responses** are buffered (bounded by +//! `publisher.max_buffered_body_bytes`) instead of streamed to the client. +//! - **Router-level 405s** (unregistered verbs) skip EC finalization along +//! with the middleware chain; the entry point still adds TS headers. +//! +//! # Startup error handling +//! +//! When [`build_state`] fails, [`startup_error_router`] returns a minimal router +//! that responds to all routes with the startup error. This router does **not** +//! attach middleware. Startup-error responses may still receive entry-point +//! finalization (geo and TS headers) when settings can be reloaded via +//! [`trusted_server_core::settings_data::get_settings`]; if settings loading itself +//! fails, they are returned without geo or TS headers. + +use std::sync::Arc; + +use edgezero_adapter_fastly::FastlyRequestContext; +use edgezero_core::app::Hooks; +use edgezero_core::context::RequestContext; +use edgezero_core::error::EdgeError; +use edgezero_core::http::{header, HandlerFuture, HeaderValue, Method, Request, Response}; +use edgezero_core::router::RouterService; +use error_stack::Report; +use trusted_server_core::auction::endpoints::handle_auction; +use trusted_server_core::auction::{build_orchestrator, AuctionOrchestrator}; +use trusted_server_core::compat; +use trusted_server_core::constants::{COOKIE_SHAREDID, COOKIE_TS_EIDS}; +use trusted_server_core::ec::batch_sync::handle_batch_sync; +use trusted_server_core::ec::consent::ec_consent_withdrawn; +use trusted_server_core::ec::identify::{cors_preflight_identify, handle_identify}; +use trusted_server_core::ec::kv::KvIdentityGraph; +use trusted_server_core::ec::rate_limiter::{FastlyRateLimiter, RATE_COUNTER_NAME}; +use trusted_server_core::ec::registry::PartnerRegistry; +use trusted_server_core::ec::EcContext; +use trusted_server_core::error::{IntoHttpResponse as _, TrustedServerError}; +use trusted_server_core::http_util::is_navigation_request; +use trusted_server_core::integrations::{IntegrationRegistry, ProxyDispatchInput}; +use trusted_server_core::platform::{ClientInfo, PlatformKvStore, RuntimeServices}; +use trusted_server_core::proxy::{ + handle_first_party_click, handle_first_party_proxy, handle_first_party_proxy_rebuild, + handle_first_party_proxy_sign, +}; +use trusted_server_core::publisher::{handle_publisher_request, handle_tsjs_dynamic}; +use trusted_server_core::request_signing::{ + handle_deactivate_key, handle_rotate_key, handle_trusted_server_discovery, + handle_verify_signature, +}; +use trusted_server_core::settings::Settings; +use trusted_server_core::settings_data::get_settings; + +use crate::middleware::{AuthMiddleware, FinalizeResponseMiddleware}; +use crate::platform::{ + FastlyPlatformBackend, FastlyPlatformConfigStore, FastlyPlatformGeo, FastlyPlatformHttpClient, + FastlyPlatformSecretStore, UnavailableKvStore, +}; + +// --------------------------------------------------------------------------- +// AppState +// --------------------------------------------------------------------------- + +/// Application state built once per Wasm instance and shared for its lifetime. +/// +/// In Fastly Compute each request spawns a new Wasm instance, so this struct is +/// effectively per-request. It holds pre-parsed settings and all service handles. +pub(crate) struct AppState { + pub(crate) settings: Arc, + pub(crate) orchestrator: Arc, + pub(crate) registry: Arc, + pub(crate) kv_store: Arc, +} + +/// Build the application state, loading settings and constructing all per-application components. +/// +/// # Errors +/// +/// Returns an error when settings, the auction orchestrator, or the integration +/// registry fail to initialise. +pub(crate) fn build_state() -> Result, Report> { + build_state_from_settings(get_settings()?) +} + +pub(crate) fn build_state_from_settings( + settings: Settings, +) -> Result, Report> { + let orchestrator = build_orchestrator(&settings)?; + let registry = IntegrationRegistry::new(&settings)?; + let kv_store = Arc::new(UnavailableKvStore) as Arc; + Ok(Arc::new(AppState { + settings: Arc::new(settings), + orchestrator: Arc::new(orchestrator), + registry: Arc::new(registry), + kv_store, + })) +} + +// --------------------------------------------------------------------------- +// Per-request RuntimeServices +// --------------------------------------------------------------------------- + +/// Construct per-request [`RuntimeServices`] from the `EdgeZero` request context. +/// +/// Extracts the client IP address from the [`FastlyRequestContext`] extension +/// inserted by `edgezero_adapter_fastly::dispatch`. TLS metadata is not +/// available through the `EdgeZero` context; scheme detection relies on the +/// trusted `fastly-ssl` header injected by `edgezero_main` after sanitization. +fn build_per_request_services(state: &AppState, ctx: &RequestContext) -> RuntimeServices { + let client_ip = FastlyRequestContext::get(ctx.request()).and_then(|c| c.client_ip); + + RuntimeServices::builder() + .config_store(Arc::new(FastlyPlatformConfigStore)) + .secret_store(Arc::new(FastlyPlatformSecretStore)) + .kv_store(Arc::clone(&state.kv_store)) + .backend(Arc::new(FastlyPlatformBackend)) + .http_client(Arc::new(FastlyPlatformHttpClient)) + .geo(Arc::new(FastlyPlatformGeo)) + .client_info(ClientInfo { + client_ip, + tls_protocol: None, + tls_cipher: None, + }) + .build() +} + +fn publisher_fallback_methods() -> [Method; 7] { + [ + Method::GET, + Method::POST, + Method::HEAD, + Method::OPTIONS, + Method::PUT, + Method::PATCH, + Method::DELETE, + ] +} + +fn uses_dynamic_tsjs_fallback(method: &Method, path: &str) -> bool { + *method == Method::GET && path.starts_with("/static/tsjs=") +} + +// --------------------------------------------------------------------------- +// EC request state +// --------------------------------------------------------------------------- + +/// EC state threaded from route handlers to the `main.rs` entry point via +/// response extensions. +/// +/// `edgezero_main` pops this from the response after dispatch and runs +/// [`trusted_server_core::ec::finalize::ec_finalize_response`] plus the +/// pull-sync hook on the converted fastly response — the same EC response +/// lifecycle the legacy path drives through `RouteResult`. +#[derive(Clone)] +pub(crate) struct EcFinalizeState { + pub(crate) ec_context: EcContext, + pub(crate) finalize_kv_graph: Option, + pub(crate) eids_cookie: Option, + pub(crate) sharedid_cookie: Option, + pub(crate) is_real_browser: bool, +} + +/// Per-request EC identity state built before dispatch, mirroring the +/// pre-routing prelude of the legacy `route_request` (device signals, bot +/// gate, cookie capture, consent/geo-aware [`EcContext`], and KV graph +/// gating). +struct EcRequestState { + ec_context: EcContext, + kv_graph: Option, + finalize_kv_graph: Option, + eids_cookie: Option, + sharedid_cookie: Option, + is_real_browser: bool, + /// Error from [`EcContext`] creation. When set, handlers return this as + /// the response without running the route handler (legacy parity: the + /// legacy path short-circuits with an error response and a default + /// context). + setup_error: Option>, +} + +impl EcRequestState { + fn into_finalize_state(self) -> EcFinalizeState { + EcFinalizeState { + ec_context: self.ec_context, + finalize_kv_graph: self.finalize_kv_graph, + eids_cookie: self.eids_cookie, + sharedid_cookie: self.sharedid_cookie, + is_real_browser: self.is_real_browser, + } + } +} + +/// Builds the per-request EC state from a headers-only fastly request copy, +/// mirroring the legacy `route_request` prelude step by step. +fn build_ec_request_state( + settings: &Settings, + services: &RuntimeServices, + req: &Request, +) -> EcRequestState { + let fastly_ref = compat::to_fastly_request_ref(req); + + let device_signals = crate::derive_device_signals(&fastly_ref); + let is_real_browser = device_signals.looks_like_browser(); + if !is_real_browser { + log::info!( + "Bot gate: blocking EC operations (ja4={:?}, platform={:?}, is_mobile={})", + device_signals.ja4_class, + device_signals.platform_class, + device_signals.is_mobile, + ); + } + + let eids_cookie = crate::extract_cookie_value(&fastly_ref, COOKIE_TS_EIDS); + let sharedid_cookie = crate::extract_cookie_value(&fastly_ref, COOKIE_SHAREDID); + + let geo_info = services + .geo() + .lookup(services.client_info().client_ip) + .unwrap_or_else(|e| { + log::warn!("geo lookup failed during EC setup: {e}"); + None + }); + + let (ec_context, setup_error) = + match EcContext::read_from_request_with_geo(settings, &fastly_ref, geo_info.as_ref()) { + Ok(mut context) => { + context.set_device_signals(device_signals); + (context, None) + } + Err(report) => (EcContext::default(), Some(report)), + }; + + // Bot gate: suppress KV-backed EC writes for unrecognized clients, except + // consent withdrawals. Revocations keep the write path so tombstones stay + // authoritative even for privacy-extension-heavy clients. + let kv_graph = crate::maybe_identity_graph(settings); + let finalize_kv_graph = if setup_error.is_none() + && (is_real_browser || ec_consent_withdrawn(ec_context.consent())) + { + kv_graph.clone() + } else { + None + }; + let kv_graph = if is_real_browser { kv_graph } else { None }; + + EcRequestState { + ec_context, + kv_graph, + finalize_kv_graph, + eids_cookie, + sharedid_cookie, + is_real_browser, + setup_error, + } +} + +// --------------------------------------------------------------------------- +// Dispatch +// --------------------------------------------------------------------------- + +async fn execute_named( + state: Arc, + ctx: RequestContext, + handler: NamedRouteHandler, +) -> Result { + let services = build_per_request_services(&state, &ctx); + let req = ctx.into_request(); + + // S2S batch sync uses Bearer auth (not EC cookies), so it skips EC + // context creation entirely — mirroring the dedicated early arm in the + // legacy route_request. + if matches!(handler, NamedRouteHandler::BatchSync) { + return Ok(run_batch_sync(&state, req)); + } + + let mut ec = build_ec_request_state(&state.settings, &services, &req); + let mut response = match ec.setup_error.take() { + Some(report) => http_error(&report), + None => run_named_route(&state, &services, req, handler, &mut ec) + .await + .unwrap_or_else(|e| http_error(&e)), + }; + response.extensions_mut().insert(ec.into_finalize_state()); + Ok(response) +} + +async fn run_named_route( + state: &AppState, + services: &RuntimeServices, + req: Request, + handler: NamedRouteHandler, + ec: &mut EcRequestState, +) -> Result> { + match handler { + NamedRouteHandler::TrustedServerDiscovery => { + handle_trusted_server_discovery(&state.settings, services, req) + } + NamedRouteHandler::VerifySignature => { + handle_verify_signature(&state.settings, services, req) + } + NamedRouteHandler::RotateKey => handle_rotate_key(&state.settings, services, req), + NamedRouteHandler::DeactivateKey => handle_deactivate_key(&state.settings, services, req), + NamedRouteHandler::BatchSync => { + // Dispatched by execute_named before EC state is built. + unreachable!("batch-sync should be handled by run_batch_sync") + } + NamedRouteHandler::Identify => { + let fastly_ref = compat::to_fastly_request_ref(&req); + if req.method() == Method::OPTIONS { + cors_preflight_identify(&state.settings, &fastly_ref) + .map(compat::from_fastly_response) + } else { + let kv = crate::require_identity_graph(&state.settings)?; + let partner_registry = PartnerRegistry::from_config(&state.settings.ec.partners)?; + handle_identify( + &state.settings, + &kv, + &partner_registry, + &fastly_ref, + &ec.ec_context, + ) + .map(compat::from_fastly_response) + } + } + NamedRouteHandler::Auction => { + let partner_registry = PartnerRegistry::from_config(&state.settings.ec.partners)?; + let registry_ref = if partner_registry.is_empty() { + None + } else { + Some(&partner_registry) + }; + handle_auction( + &state.settings, + &state.orchestrator, + ec.kv_graph.as_ref(), + registry_ref, + &ec.ec_context, + services, + req, + ) + .await + } + NamedRouteHandler::FirstPartyProxy => { + handle_first_party_proxy(&state.settings, services, req).await + } + NamedRouteHandler::FirstPartyClick => { + handle_first_party_click(&state.settings, services, req).await + } + NamedRouteHandler::FirstPartySign => { + handle_first_party_proxy_sign(&state.settings, services, req).await + } + NamedRouteHandler::FirstPartyProxyRebuild => { + handle_first_party_proxy_rebuild(&state.settings, services, req).await + } + } +} + +/// Handles `POST /_ts/api/v1/batch-sync`, mirroring the legacy arm: identity +/// graph + partner registry + rate limiter, with a default EC context for +/// response finalization. +fn run_batch_sync(state: &AppState, req: Request) -> Response { + // Device signals and cookies come from a headers-only fastly copy taken + // before the conversion below consumes the request body. + let fastly_ref = compat::to_fastly_request_ref(&req); + let device_signals = crate::derive_device_signals(&fastly_ref); + let is_real_browser = device_signals.looks_like_browser(); + let eids_cookie = crate::extract_cookie_value(&fastly_ref, COOKIE_TS_EIDS); + let sharedid_cookie = crate::extract_cookie_value(&fastly_ref, COOKIE_SHAREDID); + + let result = crate::require_identity_graph(&state.settings).and_then(|kv| { + let partner_registry = PartnerRegistry::from_config(&state.settings.ec.partners)?; + let limiter = FastlyRateLimiter::new(RATE_COUNTER_NAME); + let fastly_req = compat::to_fastly_request(req); + handle_batch_sync(&kv, &partner_registry, &limiter, fastly_req) + .map(compat::from_fastly_response) + }); + + let mut response = result.unwrap_or_else(|e| http_error(&e)); + // Legacy parity: batch-sync responses still pass through + // ec_finalize_response with a default EC context and no finalize KV graph. + response.extensions_mut().insert(EcFinalizeState { + ec_context: EcContext::default(), + finalize_kv_graph: None, + eids_cookie, + sharedid_cookie, + is_real_browser, + }); + response +} + +async fn execute_fallback( + state: Arc, + ctx: RequestContext, +) -> Result { + let services = build_per_request_services(&state, &ctx); + let req = ctx.into_request(); + Ok(dispatch_fallback(&state, &services, req).await) +} + +async fn dispatch_fallback(state: &AppState, services: &RuntimeServices, req: Request) -> Response { + let path = req.uri().path().to_string(); + let method = req.method().clone(); + + let mut ec = build_ec_request_state(&state.settings, services, &req); + if let Some(report) = ec.setup_error.take() { + let mut response = http_error(&report); + response.extensions_mut().insert(ec.into_finalize_state()); + return response; + } + + let result = if uses_dynamic_tsjs_fallback(&method, &path) { + handle_tsjs_dynamic(&req, &state.registry) + } else if state.registry.has_route(&method, &path) { + // Integration-proxy responses are not bounded by publisher.max_buffered_body_bytes. + // Only the handle_publisher_request branch below routes through + // resolve_publisher_response_buffered. Integration responses are small in practice + // and the EdgeZero flag is off by default; extend the cap here if that changes. + state + .registry + .handle_proxy(ProxyDispatchInput { + method: &method, + path: &path, + settings: &state.settings, + kv: ec.kv_graph.as_ref(), + ec_context: &mut ec.ec_context, + services, + req, + }) + .await + .unwrap_or_else(|| { + Err(Report::new(TrustedServerError::BadRequest { + message: format!("Unknown integration route: {path}"), + })) + }) + } else { + // Generate an EC ID if needed — mirrors the legacy catch-all arm. + // Only for document navigations by recognised browsers; subresource + // requests may lack consent signals such as Sec-GPC. + if ec.is_real_browser && is_navigation_request(&req) { + if let Err(err) = ec + .ec_context + .generate_if_needed(&state.settings, ec.kv_graph.as_ref()) + { + log::warn!("EC generation failed for publisher proxy: {err:?}"); + } + } + + handle_publisher_request(&state.settings, &state.registry, services, req) + .await + .and_then(|pub_response| { + crate::resolve_publisher_response_buffered( + pub_response, + &state.settings, + &state.registry, + ) + }) + }; + + let mut response = result.unwrap_or_else(|e| http_error(&e)); + response.extensions_mut().insert(ec.into_finalize_state()); + response +} + +// --------------------------------------------------------------------------- +// Error helper +// --------------------------------------------------------------------------- + +/// Convert a [`Report`] into an HTTP [`Response`], +/// mirroring [`crate::http_error_response`] exactly. +/// +/// The near-identical function in `main.rs` is intentional: the legacy path +/// uses fastly HTTP types while this path uses `edgezero_core` types. The +/// duplication will be removed when `legacy_main` is deleted in PR 15. +pub(crate) fn http_error(report: &Report) -> Response { + let root_error = report.current_context(); + log::error!("Error occurred: {:?}", report); + + let body = edgezero_core::body::Body::from(format!("{}\n", root_error.user_message())); + let mut response = Response::new(body); + *response.status_mut() = root_error.status_code(); + response.headers_mut().insert( + header::CONTENT_TYPE, + HeaderValue::from_static("text/plain; charset=utf-8"), + ); + response +} + +// --------------------------------------------------------------------------- +// Startup error fallback +// --------------------------------------------------------------------------- + +/// Returns a [`RouterService`] that responds to every registered route with the startup error. +/// +/// Called when [`build_state`] fails so that request handling degrades to a +/// structured HTTP error response rather than an unrecoverable panic. +fn startup_error_router(e: &Report) -> RouterService { + let message = Arc::new(format!("{}\n", e.current_context().user_message())); + let status = e.current_context().status_code(); + + let make = move |msg: Arc| { + move |_ctx: RequestContext| { + let body = edgezero_core::body::Body::from((*msg).clone()); + let mut resp = Response::new(body); + *resp.status_mut() = status; + resp.headers_mut().insert( + header::CONTENT_TYPE, + HeaderValue::from_static("text/plain; charset=utf-8"), + ); + async move { Ok::(resp) } + } + }; + + let mut router = RouterService::builder(); + for method in publisher_fallback_methods() { + router = router.route("/", method.clone(), make(Arc::clone(&message))); + router = router.route("/{*rest}", method, make(Arc::clone(&message))); + } + router.build() +} + +// --------------------------------------------------------------------------- +// Route registration +// --------------------------------------------------------------------------- + +#[derive(Clone, Copy)] +enum NamedRouteHandler { + TrustedServerDiscovery, + VerifySignature, + RotateKey, + DeactivateKey, + BatchSync, + Identify, + Auction, + FirstPartyProxy, + FirstPartyClick, + FirstPartySign, + FirstPartyProxyRebuild, +} + +struct NamedRoute { + path: &'static str, + primary_methods: &'static [Method], + handler: NamedRouteHandler, +} + +const NAMED_ROUTES: &[NamedRoute] = &[ + NamedRoute { + path: "/.well-known/trusted-server.json", + primary_methods: &[Method::GET], + handler: NamedRouteHandler::TrustedServerDiscovery, + }, + NamedRoute { + path: "/verify-signature", + primary_methods: &[Method::POST], + handler: NamedRouteHandler::VerifySignature, + }, + NamedRoute { + path: "/_ts/admin/keys/rotate", + primary_methods: &[Method::POST], + handler: NamedRouteHandler::RotateKey, + }, + NamedRoute { + path: "/_ts/admin/keys/deactivate", + primary_methods: &[Method::POST], + handler: NamedRouteHandler::DeactivateKey, + }, + // Legacy aliases without the `/_ts` prefix, kept for parity with + // route_request in main.rs. Auth coverage comes from settings.handlers + // (enforced by AuthMiddleware), same as on the legacy path. + NamedRoute { + path: "/admin/keys/rotate", + primary_methods: &[Method::POST], + handler: NamedRouteHandler::RotateKey, + }, + NamedRoute { + path: "/admin/keys/deactivate", + primary_methods: &[Method::POST], + handler: NamedRouteHandler::DeactivateKey, + }, + NamedRoute { + path: "/_ts/api/v1/batch-sync", + primary_methods: &[Method::POST], + handler: NamedRouteHandler::BatchSync, + }, + NamedRoute { + path: "/_ts/api/v1/identify", + primary_methods: &[Method::GET, Method::OPTIONS], + handler: NamedRouteHandler::Identify, + }, + NamedRoute { + path: "/auction", + primary_methods: &[Method::POST], + handler: NamedRouteHandler::Auction, + }, + NamedRoute { + path: "/first-party/proxy", + primary_methods: &[Method::GET], + handler: NamedRouteHandler::FirstPartyProxy, + }, + NamedRoute { + path: "/first-party/click", + primary_methods: &[Method::GET], + handler: NamedRouteHandler::FirstPartyClick, + }, + NamedRoute { + path: "/first-party/sign", + primary_methods: &[Method::GET, Method::POST], + handler: NamedRouteHandler::FirstPartySign, + }, + NamedRoute { + path: "/first-party/proxy-rebuild", + primary_methods: &[Method::POST], + handler: NamedRouteHandler::FirstPartyProxyRebuild, + }, +]; + +fn named_route_handler( + state: Arc, + handler: NamedRouteHandler, +) -> impl Fn(RequestContext) -> HandlerFuture + Clone + Send + Sync + 'static { + move |ctx: RequestContext| { + let state = Arc::clone(&state); + Box::pin(execute_named(state, ctx, handler)) + } +} + +fn fallback_route_handler( + state: Arc, +) -> impl Fn(RequestContext) -> HandlerFuture + Clone + Send + Sync + 'static { + move |ctx: RequestContext| { + let state = Arc::clone(&state); + Box::pin(execute_fallback(state, ctx)) + } +} + +// --------------------------------------------------------------------------- +// TrustedServerApp +// --------------------------------------------------------------------------- + +/// `EdgeZero` [`Hooks`] implementation for the Trusted Server application. +pub struct TrustedServerApp; + +impl TrustedServerApp { + fn routes_for_state(state: &Arc) -> RouterService { + let mut router = RouterService::builder() + .middleware(FinalizeResponseMiddleware::new( + Arc::clone(&state.settings), + Arc::new(FastlyPlatformGeo), + )) + .middleware(AuthMiddleware::new(Arc::clone(&state.settings))); + + let fallback_handler = fallback_route_handler(Arc::clone(state)); + + // matchit prefers exact path+method over a wildcard catch-all. Each + // named route is registered from this single table, then every + // non-primary publisher fallback method is registered from the same + // row. Adding a named route now requires editing only this table. + for route in NAMED_ROUTES { + for method in route.primary_methods { + router = router.route( + route.path, + method.clone(), + named_route_handler(Arc::clone(state), route.handler), + ); + } + + for method in publisher_fallback_methods() { + if !route.primary_methods.contains(&method) { + router = router.route(route.path, method, fallback_handler.clone()); + } + } + } + + // matchit's `/{*rest}` does not match the bare root `/` — register + // explicit root routes so `/` reaches the publisher fallback too. + for method in publisher_fallback_methods() { + router = router.route("/", method.clone(), fallback_handler.clone()); + router = router.route("/{*rest}", method, fallback_handler.clone()); + } + + router.build() + } +} + +impl Hooks for TrustedServerApp { + fn name() -> &'static str { + "TrustedServer" + } + + fn routes() -> RouterService { + let state = match build_state() { + Ok(s) => s, + Err(ref e) => { + log::error!("failed to build application state: {:?}", e); + return startup_error_router(e); + } + }; + + Self::routes_for_state(&state) + } +} + +#[cfg(test)] +mod tests { + use super::{build_state_from_settings, startup_error_router, TrustedServerApp}; + + use edgezero_core::body::Body; + use edgezero_core::http::{header, request_builder, Method, StatusCode}; + use edgezero_core::router::RouterService; + use error_stack::Report; + use futures::executor::block_on; + use trusted_server_core::constants::HEADER_X_GEO_INFO_AVAILABLE; + use trusted_server_core::error::TrustedServerError; + use trusted_server_core::settings::Settings; + + fn empty_request(method: Method, path: &str) -> edgezero_core::http::Request { + // EC request-state construction converts requests to fastly requests, + // which require absolute URLs — mirror the absolute URIs that the + // fastly adapter provides in production. + let uri = format!("https://test-publisher.com{path}"); + request_builder() + .method(method) + .uri(uri) + .body(Body::empty()) + .expect("should build request") + } + + fn test_router() -> RouterService { + let settings = Settings::from_toml( + r#" + [[handlers]] + path = "^/_ts/admin" + username = "admin" + password = "admin-pass" + + [[handlers]] + path = "^/admin" + username = "admin" + password = "admin-pass" + + [publisher] + domain = "test-publisher.com" + cookie_domain = ".test-publisher.com" + origin_url = "https://origin.test-publisher.com" + proxy_secret = "unit-test-proxy-secret" + + [ec] + passphrase = "test-secret-key-32-bytes-minimum" + + [request_signing] + enabled = false + config_store_id = "test-config-store-id" + secret_store_id = "test-secret-store-id" + + [integrations.prebid] + enabled = true + server_url = "https://test-prebid.com/openrtb2/auction" + + [auction] + enabled = true + providers = ["prebid"] + timeout_ms = 2000 + "#, + ) + .expect("should parse test settings"); + let state = build_state_from_settings(settings).expect("should build test state"); + TrustedServerApp::routes_for_state(&state) + } + + #[test] + fn startup_error_router_handles_head_and_options() { + let report = Report::new(TrustedServerError::BadRequest { + message: "startup failed".to_string(), + }); + let router = startup_error_router(&report); + + let head_response = block_on(router.oneshot(empty_request(Method::HEAD, "/"))); + let options_response = block_on(router.oneshot(empty_request(Method::OPTIONS, "/any"))); + + assert_eq!( + head_response.status(), + StatusCode::BAD_REQUEST, + "HEAD should use the degraded startup-error response" + ); + assert_eq!( + options_response.status(), + StatusCode::BAD_REQUEST, + "OPTIONS should use the degraded startup-error response" + ); + assert_eq!( + head_response + .headers() + .get(header::CONTENT_TYPE) + .and_then(|value| value.to_str().ok()), + Some("text/plain; charset=utf-8"), + "startup errors should stay plain-text for HEAD requests" + ); + assert_eq!( + options_response + .headers() + .get(header::CONTENT_TYPE) + .and_then(|value| value.to_str().ok()), + Some("text/plain; charset=utf-8"), + "startup errors should stay plain-text for OPTIONS requests" + ); + } + + #[test] + fn dynamic_tsjs_fallback_is_get_only() { + assert!( + super::uses_dynamic_tsjs_fallback(&Method::GET, "/static/tsjs=tsjs-unified.js"), + "GET should use the dynamic tsjs shortcut" + ); + assert!( + !super::uses_dynamic_tsjs_fallback(&Method::HEAD, "/static/tsjs=tsjs-unified.js"), + "HEAD should fall through to the publisher/integration fallback" + ); + assert!( + !super::uses_dynamic_tsjs_fallback(&Method::OPTIONS, "/static/tsjs=tsjs-unified.js"), + "OPTIONS should fall through to the publisher/integration fallback" + ); + } + + // --------------------------------------------------------------------------- + // Full EdgeZero dispatch-path tests + // --------------------------------------------------------------------------- + + #[test] + fn dispatch_auth_rejected_401_carries_finalize_headers() { + // Verifies FinalizeResponseMiddleware is outermost: an auth-rejected 401 + // must still carry standard TS headers before reaching the client. + // + // The test settings protects `^/_ts/admin` with basic-auth. + // Sending the request without an Authorization header causes AuthMiddleware + // to short-circuit with a 401, which then bubbles through + // FinalizeResponseMiddleware for header injection. + // + // This is safe to run without Viceroy: enforce_basic_auth is pure Rust + // (reads settings + request headers only) and FastlyPlatformGeo.lookup(None) + // short-circuits without calling any Fastly ABI. + let router = test_router(); + let req = empty_request(Method::POST, "/_ts/admin/keys/rotate"); + + let response = block_on(router.oneshot(req)); + + assert_eq!( + response.status(), + StatusCode::UNAUTHORIZED, + "request without credentials should be rejected" + ); + assert_eq!( + response + .headers() + .get(HEADER_X_GEO_INFO_AVAILABLE) + .and_then(|v| v.to_str().ok()), + Some("false"), + "FinalizeResponseMiddleware must run even for auth-rejected responses" + ); + } + + #[test] + fn dispatch_admin_alias_routes_are_registered_and_auth_gated() { + // Parity guard for the legacy non-`/_ts` admin aliases: both alias + // paths must be registered (no router-level 405) and protected by the + // `^/admin` handler in the test settings, mirroring how legacy + // route_request applies enforce_basic_auth before its route match. + let router = test_router(); + + for path in ["/admin/keys/rotate", "/admin/keys/deactivate"] { + let req = empty_request(Method::POST, path); + + let response = block_on(router.oneshot(req)); + + assert_eq!( + response.status(), + StatusCode::UNAUTHORIZED, + "POST {path} without credentials should be rejected by AuthMiddleware" + ); + } + } + + #[test] + fn dispatch_identify_options_routes_to_cors_preflight() { + // Parity guard: OPTIONS /_ts/api/v1/identify must reach + // cors_preflight_identify (200 for a request without an Origin + // header), not the publisher fallback, which would fail with a + // gateway error without a live backend. + let router = test_router(); + let response = + block_on(router.oneshot(empty_request(Method::OPTIONS, "/_ts/api/v1/identify"))); + + assert_eq!( + response.status(), + StatusCode::OK, + "OPTIONS identify should be answered by the CORS preflight handler" + ); + } + + #[test] + fn dispatch_identify_get_routes_to_identity_handler() { + // Parity guard: GET /_ts/api/v1/identify must reach the identify + // handler chain. The test settings configure no ec.ec_store, so + // require_identity_graph fails with a KvStore error (503) — proving + // the request was NOT proxied to the publisher origin. + let router = test_router(); + let response = block_on(router.oneshot(empty_request(Method::GET, "/_ts/api/v1/identify"))); + + assert_eq!( + response.status(), + StatusCode::SERVICE_UNAVAILABLE, + "GET identify without ec_store should fail with the KvStore error, not a publisher proxy error" + ); + } + + #[test] + fn dispatch_batch_sync_routes_to_batch_sync_handler() { + // Parity guard: POST /_ts/api/v1/batch-sync must reach the batch-sync + // handler chain instead of forwarding the request (body and + // Authorization header included) to the publisher origin. With no + // ec.ec_store configured, require_identity_graph fails with a KvStore + // error (503). + let router = test_router(); + let response = + block_on(router.oneshot(empty_request(Method::POST, "/_ts/api/v1/batch-sync"))); + + assert_eq!( + response.status(), + StatusCode::SERVICE_UNAVAILABLE, + "POST batch-sync without ec_store should fail with the KvStore error, not reach the publisher" + ); + } + + #[test] + fn dispatch_fallback_attaches_ec_finalize_state() { + // The publisher fallback must thread EC finalize state to the entry + // point via response extensions — even on error responses — so that + // edgezero_main can run ec_finalize_response and pull sync. + let router = test_router(); + let response = block_on(router.oneshot(empty_request(Method::GET, "/some-page"))); + + assert!( + response.extensions().get::().is_some(), + "publisher fallback responses should carry EcFinalizeState for entry-point EC finalization" + ); + } + + #[test] + fn dispatch_named_route_attaches_ec_finalize_state() { + // Named routes must also thread EC finalize state, mirroring how the + // legacy path finalizes every response with the pre-routing EcContext. + let router = test_router(); + let response = block_on(router.oneshot(empty_request( + Method::GET, + "/.well-known/trusted-server.json", + ))); + + assert!( + response + .extensions() + .get::() + .is_some(), + "named-route responses should carry EcFinalizeState for entry-point EC finalization" + ); + } + + #[test] + fn dispatch_head_on_named_get_route_falls_through_to_publisher_fallback() { + // Regression guard: HEAD /first-party/proxy must reach the publisher + // fallback, not return a router-level 405. Legacy route_request proxies + // every (method, path) combination not matched by a specific arm through + // to the publisher origin. + // + // Without a live backend the publisher proxy errors (502/503), but the + // important invariant is that the status is NOT 405. + let router = test_router(); + let req = empty_request(Method::HEAD, "/first-party/proxy"); + + let response = block_on(router.oneshot(req)); + + assert_ne!( + response.status(), + StatusCode::METHOD_NOT_ALLOWED, + "HEAD on a named GET path should reach the publisher fallback, not return 405" + ); + } + + #[test] + fn dispatch_unregistered_method_returns_405_at_router_level() { + // Documents the known router-level behavior for verbs outside the + // publisher_fallback_methods() list (e.g. TRACE, CONNECT): the RouterService + // returns 405 before the middleware chain runs, so FinalizeResponseMiddleware + // does not inject TS headers at this layer. + // + // The full-system guarantee (TS headers on ALL responses including these 405s) + // is maintained by the entry-point apply_finalize_headers call in main.rs. + let router = test_router(); + let req = empty_request( + Method::from_bytes(b"TRACE").expect("should parse TRACE"), + "/", + ); + + let response = block_on(router.oneshot(req)); + + assert_eq!( + response.status(), + StatusCode::METHOD_NOT_ALLOWED, + "unregistered method should return 405 from the router layer" + ); + assert!( + response + .headers() + .get(HEADER_X_GEO_INFO_AVAILABLE) + .is_none(), + "router-level 405 bypasses FinalizeResponseMiddleware; main.rs entry-point covers this" + ); + } +} diff --git a/crates/trusted-server-adapter-fastly/src/main.rs b/crates/trusted-server-adapter-fastly/src/main.rs index b1e59e35..dcda6d4d 100644 --- a/crates/trusted-server-adapter-fastly/src/main.rs +++ b/crates/trusted-server-adapter-fastly/src/main.rs @@ -1,19 +1,21 @@ +use std::sync::Arc; + +use edgezero_adapter_fastly::{into_core_request, FastlyConfigStore}; +use edgezero_core::app::Hooks as _; use edgezero_core::body::Body as EdgeBody; +use edgezero_core::config_store::ConfigStoreHandle; use edgezero_core::http::{ - header, HeaderName, HeaderValue, Method, Request as HttpRequest, Response as HttpResponse, + header, HeaderValue, Method, Request as HttpRequest, Response as HttpResponse, }; use error_stack::Report; use fastly::http::Method as FastlyMethod; use fastly::{Request as FastlyRequest, Response as FastlyResponse}; use trusted_server_core::auction::endpoints::handle_auction; -use trusted_server_core::auction::{build_orchestrator, AuctionOrchestrator}; +use trusted_server_core::auction::AuctionOrchestrator; use trusted_server_core::auth::enforce_basic_auth; use trusted_server_core::compat; -use trusted_server_core::constants::{ - COOKIE_SHAREDID, COOKIE_TS_EIDS, ENV_FASTLY_IS_STAGING, ENV_FASTLY_SERVICE_VERSION, - HEADER_X_GEO_INFO_AVAILABLE, HEADER_X_TS_ENV, HEADER_X_TS_VERSION, -}; +use trusted_server_core::constants::{COOKIE_SHAREDID, COOKIE_TS_EIDS}; use trusted_server_core::ec::batch_sync::handle_batch_sync; use trusted_server_core::ec::consent::ec_consent_withdrawn; use trusted_server_core::ec::device::DeviceSignals; @@ -30,6 +32,7 @@ use trusted_server_core::error::{IntoHttpResponse, TrustedServerError}; use trusted_server_core::geo::GeoInfo; use trusted_server_core::http_util::is_navigation_request; use trusted_server_core::integrations::{IntegrationRegistry, ProxyDispatchInput}; +use trusted_server_core::platform::PlatformGeo as _; use trusted_server_core::platform::RuntimeServices; use trusted_server_core::proxy::{ handle_asset_proxy_request, handle_first_party_click, handle_first_party_proxy, @@ -37,7 +40,7 @@ use trusted_server_core::proxy::{ AssetProxyCachePolicy, }; use trusted_server_core::publisher::{ - handle_publisher_request, handle_tsjs_dynamic, stream_publisher_body, + handle_publisher_request, handle_tsjs_dynamic, stream_publisher_body, BoundedWriter, OwnedProcessResponseParams, PublisherResponse, }; use trusted_server_core::request_signing::{ @@ -47,21 +50,27 @@ use trusted_server_core::request_signing::{ use trusted_server_core::settings::Settings; use trusted_server_core::settings_data::get_settings; +mod app; mod error; mod logging; mod management_api; +mod middleware; mod platform; #[cfg(test)] mod route_tests; +use crate::app::{build_state, TrustedServerApp}; use crate::error::to_error_response; -use crate::logging::init_logger; -use crate::platform::{build_runtime_services, UnavailableKvStore}; +use crate::middleware::{apply_finalize_headers, resolve_geo_for_response, HEADER_X_TS_FINALIZED}; +use crate::platform::{build_runtime_services, FastlyPlatformGeo}; + +const TRUSTED_SERVER_CONFIG_STORE: &str = "trusted_server_config"; +const EDGEZERO_ENABLED_KEY: &str = "edgezero_enabled"; /// Result of routing a request, distinguishing buffered from streaming publisher responses. /// /// The streaming arm keeps the publisher body out of WASM heap until it is written directly -/// to the client via [`fastly::Response::stream_to_client`]. All other routes are buffered. +/// to the client via [`fastly::Response::stream_to_client`]. All other legacy routes are buffered. /// /// [`AuthChallenge`](HandlerOutcome::AuthChallenge) marks responses produced by this server's /// own `enforce_basic_auth` so the geo-lookup gate can distinguish them from origin-forwarded @@ -91,6 +100,51 @@ impl HandlerOutcome { } } +/// Returns `true` if the raw config-store value represents an enabled flag. +/// +/// Accepted values (after whitespace trimming): `"1"` or `"true"` in any ASCII case. +/// All other values, including the empty string, are treated as disabled. +fn parse_edgezero_flag(value: &str) -> bool { + let v = value.trim(); + v.eq_ignore_ascii_case("true") || v == "1" +} + +/// Opens the shared Fastly Config Store used by both the `EdgeZero` flag read and +/// `EdgeZero` dispatch metadata. +/// +/// # Errors +/// +/// Returns [`fastly::Error`] if the config store cannot be opened. +fn open_trusted_server_config_store() -> Result { + let store = FastlyConfigStore::try_open(TRUSTED_SERVER_CONFIG_STORE) + .map_err(|e| fastly::Error::msg(format!("failed to open config store: {e}")))?; + Ok(ConfigStoreHandle::new(Arc::new(store))) +} + +/// Reads the `edgezero_enabled` key from the prepared Fastly Config Store +/// handle. +/// +/// Returns `Err` on any key-read failure, so callers should use the legacy path +/// as the safe default. +/// +/// # Errors +/// +/// - [`fastly::Error`] if the key cannot be read. +fn is_edgezero_enabled(config_store: &ConfigStoreHandle) -> Result { + let value = config_store + .get(EDGEZERO_ENABLED_KEY) + .map_err(|e| fastly::Error::msg(format!("failed to read edgezero_enabled: {e}")))?; + Ok(value.as_deref().is_some_and(parse_edgezero_flag)) +} + +fn health_response(req: &FastlyRequest) -> Option { + if req.get_method() == FastlyMethod::GET && req.get_path() == "/health" { + return Some(FastlyResponse::from_status(200).with_body_text_plain("ok")); + } + + None +} + /// Combined result from `route_request`, bundling the handler outcome with the /// EC context and cookies needed for post-send finalization and pull sync. struct RouteResult { @@ -107,64 +161,226 @@ struct RouteResult { /// Entry point for the Fastly Compute program. /// /// Uses an undecorated `main()` with `FastlyRequest::from_client()` instead of -/// `#[fastly::main]` so we can call `send_to_client()` explicitly when needed. +/// `#[fastly::main]` so the legacy streaming publisher path can call +/// [`fastly::Response::stream_to_client`] explicitly. fn main() { - init_logger(); - - let mut req = FastlyRequest::from_client(); + let req = FastlyRequest::from_client(); - // Keep the health probe independent from settings loading and routing so - // readiness checks still get a cheap liveness response during startup. - if req.get_method() == FastlyMethod::GET && req.get_path() == "/health" { - FastlyResponse::from_status(200) - .with_body_text_plain("ok") - .send_to_client(); + // Health probe bypasses logging, settings, and app construction as a cheap liveness signal. + if let Some(response) = health_response(&req) { + response.send_to_client(); return; } - let settings = match get_settings() { - Ok(s) => s, + logging::init_logger(); + + let edgezero_config_store = match open_trusted_server_config_store() { + Ok(config_store) => config_store, Err(e) => { - log::error!("Failed to load settings: {:?}", e); - to_error_response(&e).send_to_client(); + log::warn!("failed to open EdgeZero config store, falling back to legacy path: {e}"); + legacy_main(req); return; } }; - // lgtm[rust/cleartext-logging] - // `Settings` uses `Redacted` for secrets, so this debug dump is redacted. - log::debug!("Settings {settings:?}"); - // Short-circuit the ja4 debug probe before finalize_response so that - // Cache-Control: no-store, private cannot be replaced by operator [response_headers]. + if is_edgezero_enabled(&edgezero_config_store).unwrap_or_else(|e| { + log::warn!("failed to read edgezero_enabled flag, falling back to legacy path: {e}"); + false + }) { + log::debug!("routing request through EdgeZero path"); + edgezero_main(req, edgezero_config_store); + } else { + log::debug!("routing request through legacy path"); + legacy_main(req); + } +} + +/// Handles a request through the `EdgeZero` router path. +fn edgezero_main(mut req: FastlyRequest, config_store: ConfigStoreHandle) { + // Short-circuit the JA4 debug probe before app construction, mirroring + // legacy_main. Must run here because TLS/JA4 accessors are only available + // on FastlyRequest before conversion to edgezero types. if req.get_method() == FastlyMethod::GET && req.get_path() == "/_ts/debug/ja4" { - if settings.debug.ja4_endpoint_enabled { - build_ja4_debug_response(&req).send_to_client(); - } else { - FastlyResponse::from_status(fastly::http::StatusCode::NOT_FOUND).send_to_client(); + match get_settings() { + Ok(settings) if settings.debug.ja4_endpoint_enabled => { + build_ja4_debug_response(&req).send_to_client(); + } + Ok(_) => { + FastlyResponse::from_status(fastly::http::StatusCode::NOT_FOUND).send_to_client(); + } + Err(e) => { + log::warn!("EdgeZero JA4 endpoint: failed to load settings: {e:?}"); + FastlyResponse::from_status(fastly::http::StatusCode::INTERNAL_SERVER_ERROR) + .with_body_text_plain("Internal Server Error") + .send_to_client(); + } } return; } - // Build the auction orchestrator once at startup - let orchestrator = match build_orchestrator(&settings) { - Ok(orchestrator) => orchestrator, - Err(e) => { - log::error!("Failed to build auction orchestrator: {:?}", e); - to_error_response(&e).send_to_client(); - return; + let app = TrustedServerApp::build_app(); + + // Strip client-spoofable forwarded headers before handing off to the + // EdgeZero dispatcher, mirroring the sanitization done in legacy_main. + compat::sanitize_fastly_forwarded_headers(&mut req); + + // Re-inject a trusted TLS scheme signal after sanitization has stripped any + // client-sent fastly-ssl header. Setting it from Fastly's native TLS + // metadata here is authoritative. detect_request_scheme in http_util + // checks this header so scheme-sensitive logic (publisher URL rewriting, + // etc.) produces https URLs on HTTPS traffic, matching legacy path parity. + if req.get_tls_protocol().is_some() || req.get_tls_cipher_openssl_name().is_some() { + req.set_header("fastly-ssl", "1"); + } + + // Capture client IP before the request is consumed by dispatch. + let client_ip = req.get_client_ip_addr(); + + // Dispatch directly through the EdgeZero router without an intermediate + // fastly::Response conversion. The standard dispatch helpers + // (dispatch_with_config_handle, etc.) convert through fastly::Response using + // set_header, which drops duplicate header values — silently losing multiple + // Set-Cookie headers from publisher/origin responses. + // + // Bypassing to app.router().oneshot() preserves every header value in the + // http::HeaderMap and skips the logger-reinit that prevents using run_app_*. + let mut response = { + match into_core_request(req) { + Ok(mut core_req) => { + core_req.extensions_mut().insert(config_store); + futures::executor::block_on(app.router().oneshot(core_req)) + } + Err(e) => { + log::error!("EdgeZero request conversion failed: {e}"); + FastlyResponse::from_status(fastly::http::StatusCode::INTERNAL_SERVER_ERROR) + .with_body_text_plain("Internal Server Error") + .send_to_client(); + return; + } } }; - let integration_registry = match IntegrationRegistry::new(&settings) { - Ok(r) => r, + // Pop the EC finalize state that route handlers thread out via response + // extensions. Must happen before the fastly conversion, which drops + // extensions. + let ec_state = response + .extensions_mut() + .remove::(); + + if !take_finalize_sentinel(&mut response) { + // Apply finalize headers at the entry point so that router-level + // 405/404 responses for unregistered HTTP methods (e.g. TRACE, WebDAV + // verbs) carry TS/geo headers. Middleware-finalized responses are + // skipped here to avoid a second settings read and geo lookup on the + // normal registered-route path. + match get_settings() { + Ok(settings) => { + let geo_info = resolve_geo_for_response(&response, client_ip, |client_ip| { + FastlyPlatformGeo.lookup(client_ip).unwrap_or_else(|e| { + log::warn!("entry-point geo lookup failed: {e}"); + None + }) + }); + apply_finalize_headers(&settings, geo_info.as_ref(), &mut response); + } + Err(e) => { + log::warn!("entry-point finalize skipped: failed to reload settings: {e:?}"); + } + } + } + + let mut fastly_resp = compat::to_fastly_response(response); + + // EC response lifecycle, mirroring legacy_main: finalize EC cookies and + // request headers on the converted fastly response, send it, then run + // pull sync for recognized browsers. When settings or the partner + // registry cannot be loaded the response is sent without EC finalization + // rather than dropped. + if let Some(ec_state) = ec_state { + match get_settings() { + Ok(settings) => match PartnerRegistry::from_config(&settings.ec.partners) { + Ok(partner_registry) => { + ec_finalize_response( + &settings, + &ec_state.ec_context, + ec_state.finalize_kv_graph.as_ref(), + &partner_registry, + ec_state.eids_cookie.as_deref(), + ec_state.sharedid_cookie.as_deref(), + &mut fastly_resp, + ); + fastly_resp.send_to_client(); + + if ec_state.is_real_browser { + if let Some(context) = build_pull_sync_context(&ec_state.ec_context) { + run_pull_sync_after_send(&settings, &partner_registry, &context); + } + } + return; + } + Err(e) => { + log::error!( + "EdgeZero EC finalize skipped: failed to build partner registry: {e:?}" + ); + } + }, + Err(e) => { + log::warn!("EdgeZero EC finalize skipped: failed to reload settings: {e:?}"); + } + } + } + + fastly_resp.send_to_client(); +} + +fn take_finalize_sentinel(response: &mut HttpResponse) -> bool { + response + .headers_mut() + .remove(HEADER_X_TS_FINALIZED) + .is_some() +} + +/// Handles a request using the original Fastly-native entry point. +/// +/// Preserves identical semantics to the pre-PR14 `main()`, with one +/// relocation: `GET /health` is short-circuited in [`main`] before the flag +/// dispatch, so it never reaches this function. The pre-PR14 entry point +/// answered `/health` with the same `200 ok` before settings loading and +/// routing; the only difference is that the probe now also skips logger +/// initialization. Called whenever +/// the `EdgeZero` flag is disabled or cannot be read/parsed as enabled — that +/// includes config-store open failures, key-read errors, missing keys, and +/// any value other than the accepted `"true"` / `"1"` forms. +/// +/// The thin fastly<->http conversion layer (via `compat::from_fastly_request` / +/// `compat::to_fastly_response`) lives here in the adapter crate. `compat.rs` +/// will be deleted in PR 15 once this legacy path is retired. +// TODO: delete after Phase 5 EdgeZero cutover - see issue #495 +fn legacy_main(mut req: FastlyRequest) { + let state = match build_state() { + Ok(state) => state, Err(e) => { - log::error!("Failed to create integration registry: {:?}", e); + log::error!("Failed to build application state: {:?}", e); to_error_response(&e).send_to_client(); return; } }; + // lgtm[rust/cleartext-logging] + // `Settings` uses `Redacted` for secrets, so this debug dump is redacted. + log::debug!("Settings {:?}", state.settings); + + // Short-circuit the ja4 debug probe before finalize_response so that + // Cache-Control: no-store, private cannot be replaced by operator [response_headers]. + if req.get_method() == FastlyMethod::GET && req.get_path() == "/_ts/debug/ja4" { + if state.settings.debug.ja4_endpoint_enabled { + build_ja4_debug_response(&req).send_to_client(); + } else { + FastlyResponse::from_status(fastly::http::StatusCode::NOT_FOUND).send_to_client(); + } + return; + } - let partner_registry = match PartnerRegistry::from_config(&settings.ec.partners) { + let partner_registry = match PartnerRegistry::from_config(&state.settings.ec.partners) { Ok(registry) => registry, Err(e) => { log::error!("Failed to build partner registry: {:?}", e); @@ -173,22 +389,17 @@ fn main() { } }; - // Start with an unavailable primary KV slot. EC-backed routes lazily - // replace it with the configured EC identity store at dispatch time so - // unrelated routes stay available when EC KV is unavailable. - let kv_store = std::sync::Arc::new(UnavailableKvStore) - as std::sync::Arc; // Strip client-spoofable forwarded headers at the edge before building // any request-derived context or converting to the core HTTP types. compat::sanitize_fastly_forwarded_headers(&mut req); - let runtime_services = build_runtime_services(&req, kv_store); + let runtime_services = build_runtime_services(&req, std::sync::Arc::clone(&state.kv_store)); let http_req = compat::from_fastly_request(req); let route_result = futures::executor::block_on(route_request( - &settings, - &orchestrator, - &integration_registry, + &state.settings, + &state.orchestrator, + &state.registry, &partner_registry, &runtime_services, http_req, @@ -232,12 +443,12 @@ fn main() { match outcome { HandlerOutcome::Buffered(mut response) | HandlerOutcome::AuthChallenge(mut response) => { - finalize_response(&settings, geo_info.as_ref(), &mut response); + finalize_response(&state.settings, geo_info.as_ref(), &mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let mut fastly_resp = compat::to_fastly_response(response); if should_finalize_ec { ec_finalize_response( - &settings, + &state.settings, &ec_context, finalize_kv_graph.as_ref(), &partner_registry, @@ -250,7 +461,7 @@ fn main() { if is_real_browser { if let Some(context) = build_pull_sync_context(&ec_context) { - run_pull_sync_after_send(&settings, &partner_registry, &context); + run_pull_sync_after_send(&state.settings, &partner_registry, &context); } } } @@ -259,12 +470,12 @@ fn main() { body, params, } => { - finalize_response(&settings, geo_info.as_ref(), &mut response); + finalize_response(&state.settings, geo_info.as_ref(), &mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let mut fastly_resp = compat::to_fastly_response_skeleton(response); if should_finalize_ec { ec_finalize_response( - &settings, + &state.settings, &ec_context, finalize_kv_graph.as_ref(), &partner_registry, @@ -279,8 +490,8 @@ fn main() { body, &mut streaming_body, ¶ms, - &settings, - &integration_registry, + &state.settings, + &state.registry, ) { Ok(()) => { if let Err(e) = streaming_body.finish() { @@ -299,12 +510,12 @@ fn main() { if is_real_browser && stream_succeeded { if let Some(context) = build_pull_sync_context(&ec_context) { - run_pull_sync_after_send(&settings, &partner_registry, &context); + run_pull_sync_after_send(&state.settings, &partner_registry, &context); } } } HandlerOutcome::AssetStreaming { mut response, body } => { - finalize_response(&settings, geo_info.as_ref(), &mut response); + finalize_response(&state.settings, geo_info.as_ref(), &mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let fastly_resp = compat::to_fastly_response_skeleton(response); let mut streaming_body = fastly_resp.stream_to_client(); @@ -324,7 +535,7 @@ const FALLBACK_UNAVAILABLE: &str = "unavailable"; const FALLBACK_NOT_SENT: &str = "not sent"; const FALLBACK_NONE: &str = "none"; -// TODO: remove after JA4 evaluation completes — see #645 +// TODO: remove after JA4 evaluation completes - see #645 fn build_ja4_debug_response(req: &FastlyRequest) -> FastlyResponse { let ja4 = req.get_tls_ja4().unwrap_or(FALLBACK_UNAVAILABLE); let h2 = req @@ -498,7 +709,7 @@ async fn route_request( Err(e) => return Err(e), } - // Get path and method for routing + // Get path and method for routing. let path = req.uri().path().to_string(); let method = req.method().clone(); @@ -550,7 +761,7 @@ async fn route_request( (outcome, false) } - // Unified auction endpoint (returns creative HTML inline) + // Unified auction endpoint. (Method::POST, "/auction") => { let registry_ref = if partner_registry.is_empty() { None @@ -720,7 +931,7 @@ async fn route_request( }) } -fn maybe_identity_graph(settings: &Settings) -> Option { +pub(crate) fn maybe_identity_graph(settings: &Settings) -> Option { settings.ec.ec_store.as_ref().map(KvIdentityGraph::new) } @@ -741,6 +952,35 @@ fn run_pull_sync_after_send( dispatch_pull_sync(settings, &kv, partner_registry, &limiter, context); } +pub(crate) fn resolve_publisher_response_buffered( + publisher_response: PublisherResponse, + settings: &Settings, + integration_registry: &IntegrationRegistry, +) -> Result> { + match publisher_response { + PublisherResponse::Buffered(response) => Ok(response), + PublisherResponse::Stream { + mut response, + body, + params, + } => { + let mut output = BoundedWriter::new(settings.publisher.max_buffered_body_bytes); + stream_publisher_body(body, &mut output, ¶ms, settings, integration_registry)?; + let bytes = output.into_inner(); + response.headers_mut().insert( + header::CONTENT_LENGTH, + HeaderValue::from(bytes.len() as u64), + ); + *response.body_mut() = EdgeBody::from(bytes); + Ok(response) + } + PublisherResponse::PassThrough { mut response, body } => { + *response.body_mut() = body; + Ok(response) + } + } +} + /// Applies all standard response headers: geo, version, staging, and configured headers. /// /// Called from every response path (including auth early-returns) so that all @@ -750,35 +990,7 @@ fn run_pull_sync_after_send( /// version/staging, then operator-configured `settings.response_headers`. /// This means operators can intentionally override any managed header. fn finalize_response(settings: &Settings, geo_info: Option<&GeoInfo>, response: &mut HttpResponse) { - if let Some(geo) = geo_info { - geo.set_response_headers(response); - } else { - response.headers_mut().insert( - HEADER_X_GEO_INFO_AVAILABLE, - HeaderValue::from_static("false"), - ); - } - - if let Ok(v) = ::std::env::var(ENV_FASTLY_SERVICE_VERSION) { - if let Ok(value) = HeaderValue::from_str(&v) { - response.headers_mut().insert(HEADER_X_TS_VERSION, value); - } else { - log::warn!("Skipping invalid FASTLY_SERVICE_VERSION response header value"); - } - } - if ::std::env::var(ENV_FASTLY_IS_STAGING).as_deref() == Ok("1") { - response - .headers_mut() - .insert(HEADER_X_TS_ENV, HeaderValue::from_static("staging")); - } - - for (key, value) in &settings.response_headers { - let header_name = HeaderName::from_bytes(key.as_bytes()) - .expect("settings.response_headers validated at load time"); - let header_value = - HeaderValue::from_str(value).expect("settings.response_headers validated at load time"); - response.headers_mut().insert(header_name, header_value); - } + apply_finalize_headers(settings, geo_info, response); } fn http_error_response(report: &Report) -> HttpResponse { @@ -797,7 +1009,7 @@ fn http_error_response(report: &Report) -> HttpResponse { /// Constructs a `KvIdentityGraph` from settings, or returns an error if the /// `ec_store` config is not set. -fn require_identity_graph( +pub(crate) fn require_identity_graph( settings: &Settings, ) -> Result> { let store_name = settings.ec.ec_store.as_deref().ok_or_else(|| { @@ -810,7 +1022,7 @@ fn require_identity_graph( } /// Extracts a named cookie value from the request's `Cookie` header. -fn extract_cookie_value(req: &FastlyRequest, name: &str) -> Option { +pub(crate) fn extract_cookie_value(req: &FastlyRequest, name: &str) -> Option { let cookie_header = req.get_header_str("cookie")?; for pair in cookie_header.split(';') { let pair = pair.trim(); @@ -827,7 +1039,7 @@ fn extract_cookie_value(req: &FastlyRequest, name: &str) -> Option { /// /// All extraction is pure in-memory — no KV I/O. The Fastly SDK provides /// `get_tls_ja4()` and `get_client_h2_fingerprint()` on client requests. -fn derive_device_signals(req: &FastlyRequest) -> DeviceSignals { +pub(crate) fn derive_device_signals(req: &FastlyRequest) -> DeviceSignals { let ua = req.get_header_str("user-agent").unwrap_or(""); let ja4 = req.get_tls_ja4(); let h2_fp = req.get_client_h2_fingerprint(); @@ -838,8 +1050,131 @@ fn derive_device_signals(req: &FastlyRequest) -> DeviceSignals { #[cfg(test)] mod tests { use super::*; + use edgezero_core::http::response_builder; use fastly::mime; + fn test_settings() -> Settings { + Settings::from_toml( + r#" + [[handlers]] + path = "^/_ts/admin" + username = "admin" + password = "admin-pass" + + [publisher] + domain = "test-publisher.com" + cookie_domain = ".test-publisher.com" + origin_url = "https://origin.test-publisher.com" + proxy_secret = "unit-test-proxy-secret" + + [ec] + passphrase = "test-secret-key-32-bytes-minimum" + + [request_signing] + enabled = false + config_store_id = "test-config-store-id" + secret_store_id = "test-secret-store-id" + "#, + ) + .expect("should parse test settings") + } + + #[test] + fn parses_true_flag_values() { + assert!(parse_edgezero_flag("true"), "should parse 'true'"); + assert!(parse_edgezero_flag("1"), "should parse '1'"); + assert!(parse_edgezero_flag(" true "), "should trim whitespace"); + assert!( + parse_edgezero_flag(" 1 "), + "should trim whitespace around '1'" + ); + assert!(parse_edgezero_flag("TRUE"), "should parse uppercase 'TRUE'"); + assert!( + parse_edgezero_flag("True"), + "should parse mixed-case 'True'" + ); + } + + #[test] + fn rejects_non_true_flag_values() { + assert!(!parse_edgezero_flag("false"), "should not parse 'false'"); + assert!(!parse_edgezero_flag(""), "should not parse empty string"); + assert!( + !parse_edgezero_flag(" "), + "should not parse whitespace-only" + ); + assert!(!parse_edgezero_flag("yes"), "should not parse 'yes'"); + } + + #[test] + fn health_response_short_circuits_get_health() { + let req = FastlyRequest::get("https://example.com/health"); + + let mut response = health_response(&req).expect("should build health response"); + + assert_eq!( + response.get_status(), + fastly::http::StatusCode::OK, + "should return 200 OK" + ); + assert_eq!( + response.take_body_str(), + "ok", + "should return the health body" + ); + } + + #[test] + fn health_response_ignores_non_health_paths() { + let req = FastlyRequest::get("https://example.com/auction"); + + assert!( + health_response(&req).is_none(), + "should only short-circuit /health" + ); + } + + #[test] + fn take_finalize_sentinel_strips_sentinel() { + let mut response = HttpResponse::new(EdgeBody::empty()); + response + .headers_mut() + .insert("x-ts-finalized", HeaderValue::from_static("1")); + + assert!( + take_finalize_sentinel(&mut response), + "should detect middleware-finalized responses" + ); + assert!( + response.headers().get("x-ts-finalized").is_none(), + "sentinel should not be sent to clients" + ); + } + + #[test] + #[allow(clippy::panic)] + fn entry_point_finalize_skips_geo_lookup_for_401() { + let settings = test_settings(); + let mut response = response_builder() + .status(edgezero_core::http::StatusCode::UNAUTHORIZED) + .body(EdgeBody::empty()) + .expect("should build response"); + + let geo_info = resolve_geo_for_response(&response, None, |_| { + panic!("should skip entry-point geo lookup for 401 responses"); + }); + apply_finalize_headers(&settings, geo_info.as_ref(), &mut response); + + assert_eq!( + response + .headers() + .get(trusted_server_core::constants::HEADER_X_GEO_INFO_AVAILABLE) + .and_then(|v| v.to_str().ok()), + Some("false"), + "401 responses should still carry geo-unavailable headers" + ); + } + #[test] fn ja4_debug_response_uses_plain_text_and_fallback_values() { let req = FastlyRequest::get("https://example.com/_ts/debug/ja4"); diff --git a/crates/trusted-server-adapter-fastly/src/middleware.rs b/crates/trusted-server-adapter-fastly/src/middleware.rs new file mode 100644 index 00000000..34d4b349 --- /dev/null +++ b/crates/trusted-server-adapter-fastly/src/middleware.rs @@ -0,0 +1,503 @@ +//! Middleware implementations for the dual-path entry point. +//! +//! Provides two middleware types that mirror the finalization and auth logic +//! from the legacy [`crate::finalize_response`] and [`crate::route_request`]: +//! +//! - [`FinalizeResponseMiddleware`] — geo lookup and standard TS header injection +//! - [`AuthMiddleware`] — basic-auth enforcement via [`enforce_basic_auth`] +//! +//! Registration order in [`crate::app`]: `FinalizeResponseMiddleware` outermost, +//! then `AuthMiddleware`. This ensures auth-rejected responses also receive the +//! standard TS headers before being returned to the client. + +use std::sync::Arc; + +use async_trait::async_trait; +use edgezero_adapter_fastly::FastlyRequestContext; +use edgezero_core::context::RequestContext; +use edgezero_core::error::EdgeError; +use edgezero_core::http::{HeaderName, HeaderValue, Response, StatusCode}; +use edgezero_core::middleware::{Middleware, Next}; +use edgezero_core::response::IntoResponse; +use std::net::IpAddr; +use trusted_server_core::auth::enforce_basic_auth; +use trusted_server_core::constants::{ + ENV_FASTLY_IS_STAGING, ENV_FASTLY_SERVICE_VERSION, HEADER_X_GEO_INFO_AVAILABLE, + HEADER_X_TS_ENV, HEADER_X_TS_VERSION, +}; +use trusted_server_core::geo::GeoInfo; +use trusted_server_core::platform::PlatformGeo; +use trusted_server_core::settings::Settings; + +pub(crate) const HEADER_X_TS_FINALIZED: &str = "x-ts-finalized"; + +// --------------------------------------------------------------------------- +// FinalizeResponseMiddleware +// --------------------------------------------------------------------------- + +/// Outermost middleware: performs geo lookup and injects all standard TS response headers. +/// +/// Registered first in the middleware chain so that it wraps all inner middleware +/// (including [`AuthMiddleware`]) and the handler. This guarantees every registered-route +/// response — including auth-rejected ones — carries a consistent set of headers. +/// +/// Router-level 405/404 responses for unregistered HTTP methods (e.g. TRACE) bypass the +/// middleware chain. Those are covered by a second call to [`apply_finalize_headers`] at +/// the `main.rs` entry point. Middleware-finalized responses carry +/// [`HEADER_X_TS_FINALIZED`] so the entry point can skip duplicate finalization. +/// +/// # Header precedence +/// +/// Headers are written in this order (last write wins): +/// 1. Geo headers (or `X-Geo-Info-Available: false` when geo is unavailable) +/// 2. `X-TS-Version` from `FASTLY_SERVICE_VERSION` env var +/// 3. `X-TS-ENV: staging` when `FASTLY_IS_STAGING == "1"` +/// 4. Operator-configured `settings.response_headers` (can override any managed header) +pub struct FinalizeResponseMiddleware { + settings: Arc, + geo: Arc, +} + +impl FinalizeResponseMiddleware { + /// Creates a new [`FinalizeResponseMiddleware`] with the given settings and geo lookup service. + pub fn new(settings: Arc, geo: Arc) -> Self { + Self { settings, geo } + } +} + +#[async_trait(?Send)] +impl Middleware for FinalizeResponseMiddleware { + async fn handle(&self, ctx: RequestContext, next: Next<'_>) -> Result { + let client_ip = FastlyRequestContext::get(ctx.request()).and_then(|c| c.client_ip); + + let mut response = match next.run(ctx).await { + Ok(r) => r, + Err(e) => { + log::error!("request handler failed: {e:?}"); + e.into_response() + } + }; + + let geo_info = resolve_geo_for_response(&response, client_ip, |ip| { + self.geo.lookup(ip).unwrap_or_else(|e| { + log::warn!("geo lookup failed: {e}"); + None + }) + }); + + apply_finalize_headers(&self.settings, geo_info.as_ref(), &mut response); + response + .headers_mut() + .insert(HEADER_X_TS_FINALIZED, HeaderValue::from_static("1")); + + Ok(response) + } +} + +// --------------------------------------------------------------------------- +// AuthMiddleware +// --------------------------------------------------------------------------- + +/// Inner middleware: enforces basic-auth before the handler runs. +/// +/// - `Ok(Some(response))` from [`enforce_basic_auth`] → auth failed; return the +/// challenge response (bubbles through [`FinalizeResponseMiddleware`] for header injection). +/// - `Ok(None)` → no auth required or credentials accepted; continue the chain. +/// - `Err(report)` → internal error; log and convert to an HTTP response via +/// [`crate::app::http_error`] using the error's documented status code. +/// +/// # Errors +/// +/// When [`enforce_basic_auth`] returns an error report, converts it to an HTTP +/// response via [`crate::app::http_error`] (preserving the error's status code) +/// so that [`FinalizeResponseMiddleware`] can still inject standard TS headers +/// before the response reaches the client. +pub struct AuthMiddleware { + settings: Arc, +} + +impl AuthMiddleware { + /// Creates a new [`AuthMiddleware`] with the given settings. + pub fn new(settings: Arc) -> Self { + Self { settings } + } +} + +#[async_trait(?Send)] +impl Middleware for AuthMiddleware { + async fn handle(&self, ctx: RequestContext, next: Next<'_>) -> Result { + match enforce_basic_auth(&self.settings, ctx.request()) { + Ok(Some(response)) => return Ok(response), + Ok(None) => {} + Err(report) => { + log::error!("auth check failed: {:?}", report); + return Ok(crate::app::http_error(&report)); + } + } + + next.run(ctx).await + } +} + +// --------------------------------------------------------------------------- +// Shared geo resolution helper +// --------------------------------------------------------------------------- + +/// Resolves geo for a response, skipping the lookup for 401 responses. +/// +/// Returns `None` for authentication rejections (401) without calling `lookup_geo` +/// to avoid unnecessary work and exposing geo data to unauthenticated callers. +/// All other responses call `lookup_geo` and return its result. +/// +/// Used by both [`FinalizeResponseMiddleware`] and the entry-point finalization +/// in `main.rs` so the 401-skip rule is defined in one place. +/// +/// # Parity note +/// +/// The legacy path skips geo only for its own `HandlerOutcome::AuthChallenge` +/// responses; origin-forwarded 401s still receive geo headers there. The `EdgeZero` +/// path skips geo for **all** 401s by status. This is intentionally more +/// conservative: geo data is not sent to any unauthenticated caller regardless of +/// whether the 401 originated from this server or the upstream origin. +pub(crate) fn resolve_geo_for_response( + response: &Response, + client_ip: Option, + lookup_geo: F, +) -> Option +where + F: FnOnce(Option) -> Option, +{ + if response.status() == StatusCode::UNAUTHORIZED { + None + } else { + lookup_geo(client_ip) + } +} + +// --------------------------------------------------------------------------- +// apply_finalize_headers — extracted for unit testing +// --------------------------------------------------------------------------- + +/// Applies all standard Trusted Server response headers to the given response. +/// +/// Mirrors [`crate::finalize_response`] exactly, operating on [`Response`] from +/// `edgezero_core::http` instead of `HttpResponse`. +/// +/// Header write order (last write wins): +/// 1. Geo headers (`x-geo-*`) — or `X-Geo-Info-Available: false` when absent +/// 2. `X-TS-Version` from `FASTLY_SERVICE_VERSION` env var +/// 3. `X-TS-ENV: staging` when `FASTLY_IS_STAGING == "1"` +/// 4. `settings.response_headers` — operator-configured overrides applied last +pub(crate) fn apply_finalize_headers( + settings: &Settings, + geo_info: Option<&GeoInfo>, + response: &mut Response, +) { + if let Some(geo) = geo_info { + geo.set_response_headers(response); + } else { + response.headers_mut().insert( + HEADER_X_GEO_INFO_AVAILABLE, + HeaderValue::from_static("false"), + ); + } + + if let Ok(v) = std::env::var(ENV_FASTLY_SERVICE_VERSION) { + if let Ok(value) = HeaderValue::from_str(&v) { + response.headers_mut().insert(HEADER_X_TS_VERSION, value); + } else { + log::warn!("Skipping invalid FASTLY_SERVICE_VERSION response header value"); + } + } + + if std::env::var(ENV_FASTLY_IS_STAGING).as_deref() == Ok("1") { + response + .headers_mut() + .insert(HEADER_X_TS_ENV, HeaderValue::from_static("staging")); + } + + for (key, value) in &settings.response_headers { + let header_name = HeaderName::from_bytes(key.as_bytes()) + .expect("should be a valid header name: response_headers validated in prepare_runtime"); + let header_value = HeaderValue::from_str(value).expect( + "should be a valid header value: response_headers validated in prepare_runtime", + ); + response.headers_mut().insert(header_name, header_value); + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + use std::collections::HashMap; + use std::net::IpAddr; + use std::sync::Arc; + + use edgezero_core::body::Body; + use edgezero_core::context::RequestContext; + use edgezero_core::error::EdgeError; + use edgezero_core::http::{request_builder, response_builder, Method, StatusCode}; + use edgezero_core::middleware::Next; + use edgezero_core::params::PathParams; + use error_stack::Report; + use futures::executor::block_on; + use trusted_server_core::platform::{PlatformError, PlatformGeo}; + + fn empty_response() -> Response { + response_builder() + .body(Body::empty()) + .expect("should build empty test response") + } + + fn empty_ctx() -> RequestContext { + let req = request_builder() + .method(Method::GET) + .uri("/test") + .body(Body::empty()) + .expect("should build test request"); + RequestContext::new(req, PathParams::new(HashMap::new())) + } + + struct FixedGeo(Option); + + impl PlatformGeo for FixedGeo { + fn lookup(&self, _: Option) -> Result, Report> { + Ok(self.0.clone()) + } + } + + fn test_settings() -> Settings { + Settings::from_toml( + r#" + [[handlers]] + path = "^/_ts/admin" + username = "admin" + password = "admin-pass" + + [publisher] + domain = "test-publisher.com" + cookie_domain = ".test-publisher.com" + origin_url = "https://origin.test-publisher.com" + proxy_secret = "unit-test-proxy-secret" + + [ec] + passphrase = "test-secret-key-32-bytes-minimum" + + [request_signing] + enabled = false + config_store_id = "test-config-store-id" + secret_store_id = "test-secret-store-id" + "#, + ) + .expect("should parse test settings") + } + + fn settings_with_response_headers(headers: Vec<(&str, &str)>) -> Settings { + let mut s = test_settings(); + s.response_headers = headers + .into_iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + s + } + + #[test] + fn operator_response_headers_override_earlier_headers() { + let settings = + settings_with_response_headers(vec![("X-Geo-Info-Available", "operator-override")]); + let mut response = empty_response(); + + // No geo_info → would set "false"; operator header should win instead. + apply_finalize_headers(&settings, None, &mut response); + + assert_eq!( + response + .headers() + .get("x-geo-info-available") + .and_then(|v| v.to_str().ok()), + Some("operator-override"), + "should override the managed geo header with the operator-configured value" + ); + } + + #[test] + fn sets_geo_unavailable_header_when_no_geo_info() { + let settings = settings_with_response_headers(vec![]); + let mut response = empty_response(); + + apply_finalize_headers(&settings, None, &mut response); + + assert_eq!( + response + .headers() + .get("x-geo-info-available") + .and_then(|v| v.to_str().ok()), + Some("false"), + "should set X-Geo-Info-Available: false when no geo info is available" + ); + } + + // --------------------------------------------------------------------------- + // FinalizeResponseMiddleware::handle tests + // --------------------------------------------------------------------------- + + #[test] + fn finalize_handle_injects_geo_unavailable_on_ok_response() { + let settings = settings_with_response_headers(vec![]); + let middleware = + FinalizeResponseMiddleware::new(Arc::new(settings), Arc::new(FixedGeo(None))); + let handler = + Arc::new( + |_ctx: RequestContext| async move { Ok::(empty_response()) }, + ); + + let response = block_on(middleware.handle(empty_ctx(), Next::new(&[], &*handler))) + .expect("should succeed"); + + assert_eq!( + response + .headers() + .get("x-geo-info-available") + .and_then(|v| v.to_str().ok()), + Some("false"), + "should set X-Geo-Info-Available: false when geo returns None" + ); + } + + #[test] + fn finalize_handle_marks_response_as_finalized() { + let settings = settings_with_response_headers(vec![]); + let middleware = + FinalizeResponseMiddleware::new(Arc::new(settings), Arc::new(FixedGeo(None))); + let handler = + Arc::new( + |_ctx: RequestContext| async move { Ok::(empty_response()) }, + ); + + let response = block_on(middleware.handle(empty_ctx(), Next::new(&[], &*handler))) + .expect("should succeed"); + + assert_eq!( + response + .headers() + .get("x-ts-finalized") + .and_then(|v| v.to_str().ok()), + Some("1"), + "middleware-finalized responses should carry the entry-point sentinel" + ); + } + + #[test] + fn finalize_handle_absorbs_handler_error_and_injects_headers() { + let settings = settings_with_response_headers(vec![]); + let middleware = + FinalizeResponseMiddleware::new(Arc::new(settings), Arc::new(FixedGeo(None))); + let handler = Arc::new(|_ctx: RequestContext| async move { + Err::(EdgeError::service_unavailable("test error")) + }); + + let response = block_on(middleware.handle(empty_ctx(), Next::new(&[], &*handler))) + .expect("should absorb handler error into a response"); + + assert!( + response.status().is_server_error(), + "should produce a server-error status for absorbed handler error" + ); + assert!( + response.headers().get("x-geo-info-available").is_some(), + "absorbed error response should still carry geo header" + ); + } + + #[test] + #[allow(clippy::panic)] + fn finalize_handle_skips_geo_lookup_for_401() { + struct PanicGeo; + impl PlatformGeo for PanicGeo { + fn lookup(&self, _: Option) -> Result, Report> { + panic!("should not call geo for 401 responses") + } + } + + let settings = settings_with_response_headers(vec![]); + let middleware = FinalizeResponseMiddleware::new(Arc::new(settings), Arc::new(PanicGeo)); + let handler = Arc::new(|_ctx: RequestContext| async move { + let mut resp = empty_response(); + *resp.status_mut() = StatusCode::UNAUTHORIZED; + Ok::(resp) + }); + + let response = block_on(middleware.handle(empty_ctx(), Next::new(&[], &*handler))) + .expect("should succeed without calling geo"); + + assert_eq!( + response.status(), + StatusCode::UNAUTHORIZED, + "should preserve 401 status" + ); + assert_eq!( + response + .headers() + .get("x-geo-info-available") + .and_then(|v| v.to_str().ok()), + Some("false"), + "should set geo-unavailable header without calling geo for 401" + ); + } + + // --------------------------------------------------------------------------- + // AuthMiddleware::handle tests + // --------------------------------------------------------------------------- + + #[test] + fn finalize_handle_preserves_duplicate_set_cookie_headers() { + // Regression guard: FinalizeResponseMiddleware must not drop duplicate + // Set-Cookie headers. The old dispatch_with_config_handle path silently + // collapsed them because fastly::Response uses set_header (last-wins). + // This test verifies the EdgeZero middleware chain is header-transparent. + let settings = settings_with_response_headers(vec![]); + let middleware = + FinalizeResponseMiddleware::new(Arc::new(settings), Arc::new(FixedGeo(None))); + let handler = Arc::new(|_ctx: RequestContext| async move { + let resp = response_builder() + .header("set-cookie", "session=abc; Path=/; HttpOnly") + .header("set-cookie", "tracker=xyz; Path=/; SameSite=Lax") + .body(Body::empty()) + .expect("should build response with two Set-Cookie headers"); + Ok::(resp) + }); + + let response = block_on(middleware.handle(empty_ctx(), Next::new(&[], &*handler))) + .expect("should succeed"); + + let cookie_count = response.headers().get_all("set-cookie").iter().count(); + assert_eq!( + cookie_count, 2, + "FinalizeResponseMiddleware must not drop duplicate Set-Cookie headers" + ); + } + + #[test] + fn auth_handle_passes_through_when_auth_not_configured() { + let settings = test_settings(); + let middleware = AuthMiddleware::new(Arc::new(settings)); + let handler = + Arc::new( + |_ctx: RequestContext| async move { Ok::(empty_response()) }, + ); + + let response = block_on(middleware.handle(empty_ctx(), Next::new(&[], &*handler))) + .expect("should pass through when auth is not configured"); + + assert_eq!( + response.status(), + StatusCode::OK, + "should reach the handler when auth is not required" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/mod.rs b/crates/trusted-server-core/src/ec/mod.rs index c71b2e61..bc28206b 100644 --- a/crates/trusted-server-core/src/ec/mod.rs +++ b/crates/trusted-server-core/src/ec/mod.rs @@ -135,7 +135,7 @@ pub fn get_ec_id(req: &fastly::Request) -> Result, Report, diff --git a/crates/trusted-server-core/src/html_processor.rs b/crates/trusted-server-core/src/html_processor.rs index eafe1e1e..427074ac 100644 --- a/crates/trusted-server-core/src/html_processor.rs +++ b/crates/trusted-server-core/src/html_processor.rs @@ -33,6 +33,10 @@ struct HtmlWithPostProcessing { /// Buffer that accumulates all intermediate output when post-processors /// need the full document. Left empty on the streaming-only path. accumulated_output: Vec, + /// Upper bound on `accumulated_output` (and the post-processed result) to + /// prevent the buffered post-processing path from growing the Wasm heap + /// without limit on highly-compressible documents. + max_buffered_body_bytes: usize, origin_host: String, request_host: String, request_scheme: String, @@ -48,7 +52,15 @@ impl StreamProcessor for HtmlWithPostProcessing { return Ok(output); } - // Post-processors need the full document. Accumulate until the last chunk. + // Post-processors need the full document. Accumulate until the last chunk, + // but enforce the buffering cap before growing the heap so a highly + // compressible document cannot OOM the accumulator. Matches the + // `BoundedWriter` error path (mapped to a 5xx proxy error downstream). + if self.accumulated_output.len() + output.len() > self.max_buffered_body_bytes { + return Err(io::Error::other( + "publisher body exceeded maximum buffered size", + )); + } self.accumulated_output.extend_from_slice(&output); if !is_last { return Ok(Vec::new()); @@ -97,6 +109,15 @@ impl StreamProcessor for HtmlWithPostProcessing { log::debug!("HTML post-processing complete: output_len={}", html.len()); } + // Post-processors may append content (e.g. injected scripts); enforce the + // same cap on the final document so growth during post-processing cannot + // push the buffer past the limit either. + if html.len() > self.max_buffered_body_bytes { + return Err(io::Error::other( + "publisher body exceeded maximum buffered size", + )); + } + Ok(html.into_bytes()) } @@ -114,13 +135,18 @@ pub struct HtmlProcessorConfig { pub request_host: String, pub request_scheme: String, pub integrations: IntegrationRegistry, + /// Maximum bytes the post-processing accumulator may buffer before the + /// processor aborts. Mirrors `publisher.max_buffered_body_bytes` so the + /// full-document buffering done for post-processors is bounded by the same + /// cap as the final [`crate::publisher::BoundedWriter`] sink. + pub max_buffered_body_bytes: usize, } impl HtmlProcessorConfig { /// Create from settings and request parameters #[must_use] pub fn from_settings( - _settings: &Settings, + settings: &Settings, integrations: &IntegrationRegistry, origin_host: &str, request_host: &str, @@ -131,6 +157,7 @@ impl HtmlProcessorConfig { request_host: request_host.to_string(), request_scheme: request_scheme.to_string(), integrations: integrations.clone(), + max_buffered_body_bytes: settings.publisher.max_buffered_body_bytes, } } } @@ -494,6 +521,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso inner, post_processors, accumulated_output: Vec::new(), + max_buffered_body_bytes: config.max_buffered_body_bytes, origin_host: config.origin_host, request_host: config.request_host, request_scheme: config.request_scheme, @@ -520,6 +548,7 @@ mod tests { request_host: "test.example.com".to_string(), request_scheme: "https".to_string(), integrations: IntegrationRegistry::default(), + max_buffered_body_bytes: 16 * 1024 * 1024, } } @@ -1041,6 +1070,7 @@ mod tests { inner: HtmlRewriterAdapter::new(Settings::default()), post_processors: Vec::new(), accumulated_output: Vec::new(), + max_buffered_body_bytes: 16 * 1024 * 1024, origin_host: String::new(), request_host: String::new(), request_scheme: String::new(), @@ -1081,6 +1111,7 @@ mod tests { inner: HtmlRewriterAdapter::new(Settings::default()), post_processors: vec![Arc::new(NoopPostProcessor)], accumulated_output: Vec::new(), + max_buffered_body_bytes: 16 * 1024 * 1024, origin_host: String::new(), request_host: String::new(), request_scheme: String::new(), @@ -1116,6 +1147,53 @@ mod tests { ); } + #[test] + fn post_processing_accumulator_rejects_growth_past_cap() { + use crate::streaming_processor::{HtmlRewriterAdapter, StreamProcessor}; + use lol_html::Settings; + + struct NoopPostProcessor; + impl IntegrationHtmlPostProcessor for NoopPostProcessor { + fn integration_id(&self) -> &'static str { + "test-noop" + } + fn post_process(&self, _html: &mut String, _ctx: &IntegrationHtmlContext<'_>) -> bool { + false + } + } + + // Tiny cap so a single non-final chunk overflows the accumulator. + let mut processor = HtmlWithPostProcessing { + inner: HtmlRewriterAdapter::new(Settings::default()), + post_processors: vec![Arc::new(NoopPostProcessor)], + accumulated_output: Vec::new(), + max_buffered_body_bytes: 16, + origin_host: String::new(), + request_host: String::new(), + request_scheme: String::new(), + document_state: IntegrationDocumentState::default(), + }; + + // A complete element well past the cap. The error must fire on this + // non-final chunk — proving the accumulator itself is bounded, not just + // the final write after the whole document was already buffered. + let oversized = format!("

{}

", "a".repeat(100)); + let err = processor + .process_chunk(oversized.as_bytes(), false) + .expect_err("accumulator growth past the cap must error mid-stream"); + assert!( + err.to_string().contains("exceeded maximum buffered size"), + "should report the buffering cap violation, got: {err}" + ); + + // The accumulator must never retain more than the configured cap. + assert!( + processor.accumulated_output.len() <= 16, + "accumulator must not grow past the cap, held {} bytes", + processor.accumulated_output.len() + ); + } + #[test] fn active_post_processor_receives_full_document_and_mutates_output() { use crate::streaming_processor::{HtmlRewriterAdapter, StreamProcessor}; @@ -1139,6 +1217,7 @@ mod tests { inner: HtmlRewriterAdapter::new(Settings::default()), post_processors: vec![Arc::new(AppendCommentProcessor)], accumulated_output: Vec::new(), + max_buffered_body_bytes: 16 * 1024 * 1024, origin_host: String::new(), request_host: String::new(), request_scheme: String::new(), diff --git a/crates/trusted-server-core/src/http_util.rs b/crates/trusted-server-core/src/http_util.rs index be855241..a426fb3e 100644 --- a/crates/trusted-server-core/src/http_util.rs +++ b/crates/trusted-server-core/src/http_util.rs @@ -216,7 +216,7 @@ fn normalize_scheme(value: &str) -> Option { /// 1. Fastly SDK TLS detection methods (most reliable) /// 2. Forwarded header (RFC 7239) /// 3. X-Forwarded-Proto header -/// 4. Fastly-SSL header (least reliable, can be spoofed) +/// 4. Fastly-SSL header (trusted on `EdgeZero` path; can be spoofed on legacy path) /// 5. Default to HTTP fn detect_request_scheme( req: &Request, @@ -257,7 +257,9 @@ fn detect_request_scheme( } } - // 4. Check Fastly-SSL header (can be spoofed by clients, use as last resort) + // 4. Check Fastly-SSL header. On the `EdgeZero` path this is injected from + // authoritative Fastly TLS metadata after spoofable headers are stripped, + // so it is reliable. On direct or legacy paths it can be spoofed by clients. if let Some(ssl) = req.headers().get("fastly-ssl") { if let Ok(ssl_str) = ssl.to_str() { if ssl_str == "1" || ssl_str.to_lowercase() == "true" { diff --git a/crates/trusted-server-core/src/proxy.rs b/crates/trusted-server-core/src/proxy.rs index 49d8ddd3..958fd80e 100644 --- a/crates/trusted-server-core/src/proxy.rs +++ b/crates/trusted-server-core/src/proxy.rs @@ -295,9 +295,15 @@ pub struct ProxyRequestConfig<'a> { pub stream_passthrough: bool, /// Domains allowed for the initial request and any redirects. /// - /// When empty every host is permitted (open mode). Integration proxies - /// should leave this empty; first-party handlers should pass - /// `&settings.proxy.allowed_domains` to enforce the publisher allowlist. + /// **Open mode** (`&[]`): every host is permitted. Integration proxies pass `&[]` + /// because their target URLs originate from operator-controlled configuration + /// (e.g. `trusted-server.toml` integration settings) and are therefore trusted at + /// operator setup time rather than at request time. + /// + /// **Restricted mode** (non-empty slice): only hosts matching a listed pattern are + /// permitted. Currently only [`handle_first_party_proxy`] passes + /// `&settings.proxy.allowed_domains` because it follows redirect chains that may + /// originate from untrusted creative-supplied URLs. pub allowed_domains: &'a [String], } @@ -640,10 +646,11 @@ struct ProxyRequestHeaders<'a> { additional_headers: &'a [(header::HeaderName, HeaderValue)], copy_request_headers: bool, services: &'a RuntimeServices, - /// Domains permitted for the initial request and any redirects. - /// - /// Empty slice means open mode (all hosts allowed). Populated by first-party - /// handlers; integration proxies leave it empty. +} + +struct ProxyRedirectPolicy<'a> { + follow_redirects: bool, + stream_passthrough: bool, allowed_domains: &'a [String], } @@ -688,15 +695,17 @@ pub async fn proxy_request( settings, &req, target_url_parsed, - follow_redirects, body.as_deref(), ProxyRequestHeaders { additional_headers: &headers, copy_request_headers, services, + }, + ProxyRedirectPolicy { + follow_redirects, + stream_passthrough, allowed_domains, }, - stream_passthrough, ) .await } @@ -1187,10 +1196,9 @@ async fn proxy_with_redirects( settings: &Settings, req: &Request, target_url_parsed: url::Url, - follow_redirects: bool, body: Option<&[u8]>, request_headers: ProxyRequestHeaders<'_>, - stream_passthrough: bool, + redirect_policy: ProxyRedirectPolicy<'_>, ) -> Result, Report> { const MAX_REDIRECTS: usize = 4; @@ -1218,7 +1226,7 @@ async fn proxy_with_redirects( })); } - if !redirect_is_permitted(request_headers.allowed_domains, host) { + if !redirect_is_permitted(redirect_policy.allowed_domains, host) { log::warn!( "request to `{}` blocked: host not in proxy allowed_domains", host @@ -1290,8 +1298,14 @@ async fn proxy_with_redirects( let beresp = platform_resp.response; - if !follow_redirects { - return finalize_response(settings, req, ¤t_url, beresp, stream_passthrough); + if !redirect_policy.follow_redirects { + return finalize_response( + settings, + req, + ¤t_url, + beresp, + redirect_policy.stream_passthrough, + ); } let status = beresp.status(); @@ -1305,7 +1319,13 @@ async fn proxy_with_redirects( ); if !is_redirect { - return finalize_response(settings, req, ¤t_url, beresp, stream_passthrough); + return finalize_response( + settings, + req, + ¤t_url, + beresp, + redirect_policy.stream_passthrough, + ); } let Some(location) = beresp @@ -1314,7 +1334,13 @@ async fn proxy_with_redirects( .and_then(|h| h.to_str().ok()) .filter(|value| !value.is_empty()) else { - return finalize_response(settings, req, ¤t_url, beresp, stream_passthrough); + return finalize_response( + settings, + req, + ¤t_url, + beresp, + redirect_policy.stream_passthrough, + ); }; if redirect_attempt == MAX_REDIRECTS { @@ -1335,7 +1361,13 @@ async fn proxy_with_redirects( let next_scheme = next_url.scheme().to_ascii_lowercase(); if next_scheme != "http" && next_scheme != "https" { - return finalize_response(settings, req, ¤t_url, beresp, stream_passthrough); + return finalize_response( + settings, + req, + ¤t_url, + beresp, + redirect_policy.stream_passthrough, + ); } let next_host = match next_url.host_str() { @@ -1346,7 +1378,7 @@ async fn proxy_with_redirects( })); } }; - if !redirect_is_permitted(request_headers.allowed_domains, next_host) { + if !redirect_is_permitted(redirect_policy.allowed_domains, next_host) { log::warn!( "redirect to `{}` blocked: host not in proxy allowed_domains", next_host @@ -1909,7 +1941,7 @@ fn reconstruct_and_validate_signed_target( #[cfg(test)] mod tests { - use std::collections::HashMap; + use std::collections::{HashMap, VecDeque}; use std::io; use std::sync::{Arc, Mutex}; @@ -2003,6 +2035,79 @@ mod tests { .expect("response body should be valid UTF-8") } + struct QueuedHttpResponse { + status: u16, + headers: Vec<(header::HeaderName, HeaderValue)>, + body: Vec, + } + + #[derive(Default)] + struct HeaderAwareStubHttpClient { + responses: Mutex>, + } + + impl HeaderAwareStubHttpClient { + fn new() -> Self { + Self::default() + } + + fn push_response( + &self, + status: u16, + headers: Vec<(header::HeaderName, HeaderValue)>, + body: Vec, + ) { + self.responses + .lock() + .expect("should lock queued responses") + .push_back(QueuedHttpResponse { + status, + headers, + body, + }); + } + } + + #[async_trait::async_trait(?Send)] + impl PlatformHttpClient for HeaderAwareStubHttpClient { + async fn send( + &self, + _request: PlatformHttpRequest, + ) -> Result> { + let queued = self + .responses + .lock() + .expect("should lock queued responses") + .pop_front() + .ok_or_else(|| Report::new(PlatformError::HttpClient))?; + + let mut builder = edgezero_core::http::response_builder().status(queued.status); + for (name, value) in queued.headers { + builder = builder.header(name, value); + } + + let response = builder + .body(EdgeBody::from(queued.body)) + .expect("should build stub HTTP response"); + + Ok(PlatformResponse::new(response)) + } + + async fn send_async( + &self, + _request: PlatformHttpRequest, + ) -> Result> { + Err(Report::new(PlatformError::Unsupported)) + } + + async fn select( + &self, + _pending_requests: Vec, + ) -> Result> { + Err(Report::new(PlatformError::Unsupported)) + } + } + fn build_http_response(status: StatusCode, body: EdgeBody) -> Response { let mut response = Response::new(body); *response.status_mut() = status; @@ -2830,6 +2935,83 @@ mod tests { ); } + #[tokio::test] + async fn proxy_request_allows_open_mode_when_settings_allowlist_is_non_empty() { + let mut settings = create_test_settings(); + settings.proxy.allowed_domains = vec!["allowed.example".to_string()]; + + let stub = Arc::new(HeaderAwareStubHttpClient::new()); + stub.push_response(200, Vec::new(), b"ok".to_vec()); + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); + let req = build_http_request(Method::GET, "https://edge.example/"); + + let response = proxy_request( + &settings, + req, + ProxyRequestConfig { + target_url: "https://blocked.example/resource.js", + follow_redirects: false, + forward_ec_id: false, + body: None, + headers: Vec::new(), + copy_request_headers: false, + stream_passthrough: false, + allowed_domains: &[], + }, + &services, + ) + .await + .expect("open mode should ignore settings.proxy.allowed_domains"); + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!(response_body_string(response), "ok"); + } + + #[tokio::test] + async fn proxy_request_uses_config_allowlist_for_redirect_hops() { + let mut settings = create_test_settings(); + settings.proxy.allowed_domains = vec!["origin.example".to_string()]; + + let stub = Arc::new(HeaderAwareStubHttpClient::new()); + stub.push_response( + 302, + vec![( + header::LOCATION, + HeaderValue::from_static("https://redirected.example/final.js"), + )], + Vec::new(), + ); + stub.push_response(200, Vec::new(), b"redirected".to_vec()); + + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); + let req = build_http_request(Method::GET, "https://edge.example/"); + + let response = proxy_request( + &settings, + req, + ProxyRequestConfig { + target_url: "https://origin.example/start.js", + follow_redirects: true, + forward_ec_id: false, + body: None, + headers: Vec::new(), + copy_request_headers: false, + stream_passthrough: false, + allowed_domains: &[], + }, + &services, + ) + .await + .expect("open mode should allow redirect hops outside settings allowlist"); + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!(response_body_string(response), "redirected"); + } + #[tokio::test] async fn proxy_request_forwards_curated_headers_when_copy_request_headers_is_true() { use crate::platform::test_support::StubHttpClient; @@ -4189,12 +4371,9 @@ mod tests { // --- initial target allowlist enforcement (integration-level) --- // - // NOTE: A test for Nth-hop redirect blocking (i.e. exercising the - // `redirect_is_permitted` check that fires *after* receiving a 302 - // response) requires a Viceroy backend fixture that returns a redirect. - // That infrastructure is not available here. The unit tests above for - // `redirect_is_permitted` and `ip_literal_blocked_by_domain_allowlist` - // cover the blocking logic used at every hop. + // The unit tests above cover the host-matching logic itself. The tests + // below verify that proxy_request threads config.allowed_domains through + // the initial target check and redirect hops. #[tokio::test] async fn proxy_initial_target_blocked_by_allowlist() { diff --git a/crates/trusted-server-core/src/publisher.rs b/crates/trusted-server-core/src/publisher.rs index db8a1778..15089eb5 100644 --- a/crates/trusted-server-core/src/publisher.rs +++ b/crates/trusted-server-core/src/publisher.rs @@ -443,6 +443,51 @@ pub fn stream_publisher_body( process_response_streaming(body, output, &borrowed) } +/// A [`Write`] sink that buffers into a `Vec` but fails once the configured +/// byte limit would be exceeded. +/// +/// Used to bound in-WASM-heap buffering of decoded/re-written publisher bodies. +/// A highly-compressible origin response can sit under the platform raw-body cap +/// yet expand past a safe heap size after decode and post-processing; this writer +/// turns that into a recoverable error instead of an out-of-memory abort. +pub struct BoundedWriter { + inner: Vec, + limit: usize, +} + +impl BoundedWriter { + /// Creates a writer that accepts at most `limit` bytes before erroring. + #[must_use] + pub fn new(limit: usize) -> Self { + Self { + inner: Vec::new(), + limit, + } + } + + /// Consumes the writer and returns the buffered bytes. + #[must_use] + pub fn into_inner(self) -> Vec { + self.inner + } +} + +impl Write for BoundedWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + if self.inner.len() + buf.len() > self.limit { + return Err(std::io::Error::other( + "publisher body exceeded maximum buffered size", + )); + } + self.inner.extend_from_slice(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + /// Proxies requests to the publisher's origin server. /// /// Returns a [`PublisherResponse`] indicating how the response should be sent: @@ -521,6 +566,13 @@ pub async fn handle_publisher_request( log::debug!("Proxying request to configured publisher backend"); // Only advertise encodings the rewrite pipeline can decode and re-encode. restrict_accept_encoding(&mut req); + // Strip the internal `fastly-ssl` scheme signal before forwarding to the + // origin. On the EdgeZero path the entry point re-injects this header from + // trusted Fastly TLS metadata so in-process scheme detection + // (`RequestInfo::from_request`, computed above) works; the legacy path never + // sets it. Either way it is an internal edge signal that must not leak to + // publisher backends, matching legacy outbound-header behavior. + req.headers_mut().remove("fastly-ssl"); *req.uri_mut() = target_uri; req.headers_mut().insert( header::HOST, @@ -640,8 +692,9 @@ pub async fn handle_publisher_request( content_type: &content_type, integration_registry, }; - let mut output = Vec::new(); + let mut output = BoundedWriter::new(settings.publisher.max_buffered_body_bytes); process_response_streaming(body, &mut output, ¶ms)?; + let output = output.into_inner(); response.headers_mut().insert( header::CONTENT_LENGTH, @@ -1315,6 +1368,43 @@ mod tests { ); } + #[tokio::test] + async fn publisher_request_strips_fastly_ssl_before_forwarding() { + // The EdgeZero entry point re-injects `fastly-ssl` from trusted TLS + // metadata so in-process scheme detection works. It must not leak to the + // origin: the legacy path never forwarded it. + let settings = create_test_settings(); + let registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + let stub = Arc::new(StubHttpClient::new()); + stub.push_response(200, b"origin response".to_vec()); + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); + let req = HttpRequest::builder() + .method(Method::GET) + .uri("https://publisher.example/page") + .header(header::HOST, "publisher.example") + .header("fastly-ssl", "1") + .body(EdgeBody::empty()) + .expect("should build request"); + + handle_publisher_request(&settings, ®istry, &services, req) + .await + .expect("should proxy publisher request"); + + let recorded = stub.recorded_request_headers(); + let outbound = recorded + .first() + .expect("should record one outbound request"); + assert!( + !outbound + .iter() + .any(|(name, _)| name.eq_ignore_ascii_case("fastly-ssl")), + "internal fastly-ssl signal must not be forwarded to the origin, got: {outbound:?}" + ); + } + #[test] fn stream_publisher_body_preserves_gzip_round_trip() { use flate2::write::GzEncoder; @@ -1545,6 +1635,61 @@ mod tests { ); } + /// `BufferedProcessed` must enforce `publisher.max_buffered_body_bytes` so a + /// post-processed HTML body whose decoded output exceeds the cap fails instead + /// of allocating past the limit. Regression for the `EdgeZero` buffering gap + /// where only the streaming-conversion path applied the cap. + #[tokio::test] + async fn buffered_processed_enforces_max_buffered_body_bytes() { + let mut settings = create_test_settings(); + // Register an HTML post-processor so the response routes to BufferedProcessed. + settings + .integrations + .insert_config( + "nextjs", + &serde_json::json!({ + "enabled": true, + "rewrite_attributes": ["href", "link", "url"], + }), + ) + .expect("should update nextjs config"); + // Tiny cap so a modest HTML document exceeds it after processing. + settings.publisher.max_buffered_body_bytes = 64; + + let registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + assert!( + registry.has_html_post_processors(), + "nextjs integration must register an HTML post-processor" + ); + + // Identity-encoded HTML well above the 64-byte cap once buffered. + let filler = "

padding

".repeat(64); + let html = format!("{filler}"); + let stub = Arc::new(StubHttpClient::new()); + stub.push_response_with_headers( + 200, + html.into_bytes(), + vec![("content-type", "text/html; charset=utf-8")], + ); + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); + let req = HttpRequest::builder() + .method(Method::GET) + .uri("https://publisher.example/page") + .header(header::HOST, "publisher.example") + .body(EdgeBody::empty()) + .expect("should build request"); + + let result = handle_publisher_request(&settings, ®istry, &services, req).await; + + assert!( + result.is_err(), + "buffered-processed body exceeding max_buffered_body_bytes must error, not allocate past the cap" + ); + } + /// Document-state survives from the streaming pass into the post-processor. /// `NextJsRscPlaceholderRewriter` writes into `IntegrationDocumentState` /// during streaming; `NextJsHtmlPostProcessor` reads it and substitutes. diff --git a/crates/trusted-server-core/src/settings.rs b/crates/trusted-server-core/src/settings.rs index c6984a34..eb2b23d6 100644 --- a/crates/trusted-server-core/src/settings.rs +++ b/crates/trusted-server-core/src/settings.rs @@ -39,6 +39,26 @@ pub struct Publisher { /// Keep this secret stable to allow existing links to decode. #[validate(custom(function = validate_redacted_not_empty))] pub proxy_secret: Redacted, + /// Maximum number of bytes buffered when the `EdgeZero` publisher fallback + /// processes an origin response. This caps the *decoded, post-rewrite* + /// output buffer. Defaults to 16 MiB — a conservative cap that prevents + /// Wasm-heap OOM at flag-flip. + /// + /// On Fastly the *effective* ceiling for a publisher page is lower: the + /// platform HTTP client rejects any origin response whose raw (still + /// compressed) body exceeds 10 MiB before this buffer is ever filled, so + /// raising this value only helps highly compressible pages whose decoded + /// size exceeds the 16 MiB default while their compressed origin body stays + /// under 10 MiB. Raising it above ~10 MiB does not lift the platform cap for + /// uncompressed pages. That platform limit is removed once true streaming + /// lands (tracked for PR 15, issue #495), after which this setting becomes + /// the sole ceiling. + #[serde(default = "default_max_buffered_body_bytes")] + pub max_buffered_body_bytes: usize, +} + +fn default_max_buffered_body_bytes() -> usize { + 16 * 1024 * 1024 } impl Publisher { @@ -78,6 +98,7 @@ impl Publisher { /// origin_url: "https://origin.example.com:8080".to_string(), /// origin_host_header_override: None, /// proxy_secret: Redacted::new("proxy-secret".to_string()), + /// max_buffered_body_bytes: 16 * 1024 * 1024, /// }; /// assert_eq!(publisher.origin_host(), "origin.example.com:8080"); /// ``` @@ -3194,6 +3215,7 @@ origin_host_header_overide = "www.example.com""#, origin_url: "https://origin.example.com:8080".to_string(), origin_host_header_override: None, proxy_secret: Redacted::new("test-secret".to_string()), + max_buffered_body_bytes: 16 * 1024 * 1024, }; assert_eq!(publisher.origin_host(), "origin.example.com:8080"); @@ -3204,6 +3226,7 @@ origin_host_header_overide = "www.example.com""#, origin_url: "https://origin.example.com".to_string(), origin_host_header_override: None, proxy_secret: Redacted::new("test-secret".to_string()), + max_buffered_body_bytes: 16 * 1024 * 1024, }; assert_eq!(publisher.origin_host(), "origin.example.com"); @@ -3214,6 +3237,7 @@ origin_host_header_overide = "www.example.com""#, origin_url: "http://localhost:9090".to_string(), origin_host_header_override: None, proxy_secret: Redacted::new("test-secret".to_string()), + max_buffered_body_bytes: 16 * 1024 * 1024, }; assert_eq!(publisher.origin_host(), "localhost:9090"); @@ -3224,6 +3248,7 @@ origin_host_header_overide = "www.example.com""#, origin_url: "localhost:9090".to_string(), origin_host_header_override: None, proxy_secret: Redacted::new("test-secret".to_string()), + max_buffered_body_bytes: 16 * 1024 * 1024, }; assert_eq!(publisher.origin_host(), "localhost:9090"); @@ -3234,6 +3259,7 @@ origin_host_header_overide = "www.example.com""#, origin_url: "http://192.168.1.1:8080".to_string(), origin_host_header_override: None, proxy_secret: Redacted::new("test-secret".to_string()), + max_buffered_body_bytes: 16 * 1024 * 1024, }; assert_eq!(publisher.origin_host(), "192.168.1.1:8080"); @@ -3244,6 +3270,7 @@ origin_host_header_overide = "www.example.com""#, origin_url: "http://[::1]:8080".to_string(), origin_host_header_override: None, proxy_secret: Redacted::new("test-secret".to_string()), + max_buffered_body_bytes: 16 * 1024 * 1024, }; assert_eq!(publisher.origin_host(), "[::1]:8080"); } @@ -3256,6 +3283,7 @@ origin_host_header_overide = "www.example.com""#, origin_url: "https://origin.example.com:8443".to_string(), origin_host_header_override: None, proxy_secret: Redacted::new("test-secret".to_string()), + max_buffered_body_bytes: 16 * 1024 * 1024, }; assert_eq!(publisher.origin_host_header(), "origin.example.com:8443"); @@ -3269,6 +3297,7 @@ origin_host_header_overide = "www.example.com""#, origin_url: "https://origin.example.com".to_string(), origin_host_header_override: Some("www.example.com".to_string()), proxy_secret: Redacted::new("test-secret".to_string()), + max_buffered_body_bytes: 16 * 1024 * 1024, }; assert_eq!(publisher.origin_host_header(), "www.example.com"); diff --git a/crates/trusted-server-core/src/settings_data.rs b/crates/trusted-server-core/src/settings_data.rs index 5207a7e6..ed290f98 100644 --- a/crates/trusted-server-core/src/settings_data.rs +++ b/crates/trusted-server-core/src/settings_data.rs @@ -1,4 +1,6 @@ use core::str; +use std::sync::OnceLock; + use error_stack::{Report, ResultExt}; use validator::Validate; @@ -8,18 +10,37 @@ use crate::settings::Settings; pub use crate::auction_config_types::AuctionConfig; const SETTINGS_DATA: &[u8] = include_bytes!("../../../target/trusted-server-out.toml"); +static SETTINGS: OnceLock = OnceLock::new(); -/// Creates a new [`Settings`] instance from the embedded configuration file. +/// Returns the embedded [`Settings`], loading and validating them once per Wasm instance +/// and cloning the cached value on subsequent calls. /// -/// Loads the pre-built TOML that was generated by `build.rs` (base config -/// merged with any `TRUSTED_SERVER__` environment variable overrides at -/// build time). Environment variables are **not** read at runtime. +/// The first successful call parses the pre-built TOML generated by `build.rs` (base config +/// merged with any `TRUSTED_SERVER__` environment variable overrides at build time), +/// validates the result, and stores it in a [`OnceLock`]. Later calls return a clone of the +/// cached settings without re-running validation or emitting warning logs. +/// Environment variables are **not** read at runtime. /// /// # Errors /// /// - [`TrustedServerError::InvalidUtf8`] if the embedded TOML file contains invalid UTF-8 /// - [`TrustedServerError::Configuration`] if the configuration is invalid or missing required fields pub fn get_settings() -> Result> { + if let Some(settings) = SETTINGS.get() { + return Ok(settings.clone()); + } + + let settings = load_settings()?; + if SETTINGS.set(settings.clone()).is_err() { + if let Some(settings) = SETTINGS.get() { + return Ok(settings.clone()); + } + } + + Ok(settings) +} + +fn load_settings() -> Result> { let toml_bytes = SETTINGS_DATA; let toml_str = str::from_utf8(toml_bytes).change_context(TrustedServerError::InvalidUtf8 { message: "embedded trusted-server.toml file".to_string(), diff --git a/fastly.toml b/fastly.toml index 2ea512a6..85630152 100644 --- a/fastly.toml +++ b/fastly.toml @@ -55,6 +55,16 @@ build = """ env = "FASTLY_KEY" [local_server.config_stores] + [local_server.config_stores.trusted_server_config] + format = "inline-toml" + [local_server.config_stores.trusted_server_config.contents] + # "true" / "1" (case-insensitive) enable the EdgeZero path. Missing, + # unreadable, or any other value falls back to the legacy entry point. + # Keep "false" until EdgeZero parity is verified end to end (issue #495). + # EC API routes and the EC identity lifecycle are ported; the remaining + # intentional deviations are documented in adapter-fastly/src/app.rs. + edgezero_enabled = "false" + [local_server.config_stores.jwks_store] format = "inline-toml" [local_server.config_stores.jwks_store.contents] diff --git a/trusted-server.toml b/trusted-server.toml index 8ff95361..3c91bda9 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -15,6 +15,10 @@ origin_url = "https://origin.test-publisher.com" # Optional: override outbound Host header while connecting to origin_url. # origin_host_header_override = "www.example.com" proxy_secret = "change-me-proxy-secret" +# Maximum bytes buffered when processing a streaming publisher response on the EdgeZero path. +# Defaults to 16 MiB when omitted; responses exceeding the cap return 502 (proxy error). +# Raise it for deployments serving larger publisher pages: +# max_buffered_body_bytes = 16777216 # 16 MiB [ec] passphrase = "local-dev-passphrase-32-bytes-min"