From a26de06fa03da5cfa2c259a0b209273b297e2ef5 Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 12 Jun 2026 12:44:12 -0500 Subject: [PATCH 1/7] Add DataDome server-side protection --- .../trusted-server-adapter-fastly/src/main.rs | 58 +- .../src/platform.rs | 4 + .../src/route_tests.rs | 4 + crates/trusted-server-core/Cargo.toml | 2 +- crates/trusted-server-core/src/http_util.rs | 12 +- .../src/integrations/datadome.rs | 199 ++++- .../src/integrations/datadome/protection.rs | 607 ++++++++++++++ .../src/integrations/mod.rs | 14 +- .../src/integrations/registry.rs | 253 ++++++ .../src/platform/test_support.rs | 16 +- .../trusted-server-core/src/platform/types.rs | 10 +- docs/guide/integrations/datadome.md | 225 ++++-- ...-datadome-server-side-protection-design.md | 747 ++++++++++++++++++ trusted-server.toml | 15 + 14 files changed, 2076 insertions(+), 90 deletions(-) create mode 100644 crates/trusted-server-core/src/integrations/datadome/protection.rs create mode 100644 docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md diff --git a/crates/trusted-server-adapter-fastly/src/main.rs b/crates/trusted-server-adapter-fastly/src/main.rs index b1e59e358..7b422d82d 100644 --- a/crates/trusted-server-adapter-fastly/src/main.rs +++ b/crates/trusted-server-adapter-fastly/src/main.rs @@ -29,7 +29,10 @@ use trusted_server_core::ec::EcContext; use trusted_server_core::error::{IntoHttpResponse, TrustedServerError}; use trusted_server_core::geo::GeoInfo; use trusted_server_core::http_util::is_navigation_request; -use trusted_server_core::integrations::{IntegrationRegistry, ProxyDispatchInput}; +use trusted_server_core::integrations::{ + IntegrationRegistry, ProxyDispatchInput, RequestFilterEffects, RequestFilterRegistryInput, + RequestFilterRegistryOutcome, +}; use trusted_server_core::platform::RuntimeServices; use trusted_server_core::proxy::{ handle_asset_proxy_request, handle_first_party_click, handle_first_party_proxy, @@ -102,6 +105,7 @@ struct RouteResult { is_real_browser: bool, should_finalize_ec: bool, asset_cache_policy: AssetProxyCachePolicy, + request_filter_effects: RequestFilterEffects, } /// Entry point for the Fastly Compute program. @@ -202,6 +206,7 @@ fn main() { is_real_browser: false, should_finalize_ec: true, asset_cache_policy: AssetProxyCachePolicy::OriginControlled, + request_filter_effects: RequestFilterEffects::default(), }); let RouteResult { @@ -213,6 +218,7 @@ fn main() { is_real_browser, should_finalize_ec, asset_cache_policy, + request_filter_effects, } = route_result; // Skip geo lookup for our own auth challenges: avoids exposing geo headers to @@ -233,6 +239,7 @@ fn main() { match outcome { HandlerOutcome::Buffered(mut response) | HandlerOutcome::AuthChallenge(mut response) => { finalize_response(&settings, geo_info.as_ref(), &mut response); + request_filter_effects.apply_to_response(&mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let mut fastly_resp = compat::to_fastly_response(response); if should_finalize_ec { @@ -260,6 +267,7 @@ fn main() { params, } => { finalize_response(&settings, geo_info.as_ref(), &mut response); + request_filter_effects.apply_to_response(&mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let mut fastly_resp = compat::to_fastly_response_skeleton(response); if should_finalize_ec { @@ -305,6 +313,7 @@ fn main() { } HandlerOutcome::AssetStreaming { mut response, body } => { finalize_response(&settings, geo_info.as_ref(), &mut response); + request_filter_effects.apply_to_response(&mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let fastly_resp = compat::to_fastly_response_skeleton(response); let mut streaming_body = fastly_resp.stream_to_client(); @@ -368,7 +377,7 @@ async fn route_request( integration_registry: &IntegrationRegistry, partner_registry: &PartnerRegistry, runtime_services: &RuntimeServices, - req: HttpRequest, + mut req: HttpRequest, ) -> Result> { // Build a Fastly request reference for APIs that require fastly types // (EcContext, device signals, cookie extraction). This is headers/method/URI @@ -416,6 +425,7 @@ async fn route_request( is_real_browser, should_finalize_ec: true, asset_cache_policy: AssetProxyCachePolicy::OriginControlled, + request_filter_effects: RequestFilterEffects::default(), }); } Ok(None) => {} @@ -439,6 +449,7 @@ async fn route_request( is_real_browser, should_finalize_ec: true, asset_cache_policy: AssetProxyCachePolicy::OriginControlled, + request_filter_effects: RequestFilterEffects::default(), }); } @@ -456,6 +467,7 @@ async fn route_request( is_real_browser, should_finalize_ec: true, asset_cache_policy: AssetProxyCachePolicy::OriginControlled, + request_filter_effects: RequestFilterEffects::default(), }); } }; @@ -492,12 +504,51 @@ async fn route_request( is_real_browser, should_finalize_ec: true, asset_cache_policy: AssetProxyCachePolicy::OriginControlled, + request_filter_effects: RequestFilterEffects::default(), }); } Ok(None) => {} Err(e) => return Err(e), } + let request_filter_effects = match integration_registry + .filter_request(RequestFilterRegistryInput { + settings, + services: runtime_services, + req: &mut req, + }) + .await + { + Ok(RequestFilterRegistryOutcome::Continue(effects)) => effects, + Ok(RequestFilterRegistryOutcome::Respond { response, effects }) => { + return Ok(RouteResult { + outcome: HandlerOutcome::Buffered(response), + ec_context, + finalize_kv_graph, + eids_cookie, + sharedid_cookie, + is_real_browser, + should_finalize_ec: true, + asset_cache_policy: AssetProxyCachePolicy::OriginControlled, + request_filter_effects: effects, + }); + } + Err(e) => { + log::error!("Failed to run integration request filters: {:?}", e); + return Ok(RouteResult { + outcome: HandlerOutcome::Buffered(http_error_response(&e)), + ec_context, + finalize_kv_graph, + eids_cookie, + sharedid_cookie, + is_real_browser, + should_finalize_ec: true, + asset_cache_policy: AssetProxyCachePolicy::OriginControlled, + request_filter_effects: RequestFilterEffects::default(), + }); + } + }; + // Get path and method for routing let path = req.uri().path().to_string(); let method = req.method().clone(); @@ -635,6 +686,7 @@ async fn route_request( is_real_browser, should_finalize_ec, asset_cache_policy, + request_filter_effects, }); } Ok(response) @@ -686,6 +738,7 @@ async fn route_request( is_real_browser, should_finalize_ec, asset_cache_policy, + request_filter_effects, }); } Ok(PublisherResponse::PassThrough { mut response, body }) => { @@ -717,6 +770,7 @@ async fn route_request( is_real_browser, should_finalize_ec, asset_cache_policy, + request_filter_effects, }) } diff --git a/crates/trusted-server-adapter-fastly/src/platform.rs b/crates/trusted-server-adapter-fastly/src/platform.rs index 8147504b1..7e96a5455 100644 --- a/crates/trusted-server-adapter-fastly/src/platform.rs +++ b/crates/trusted-server-adapter-fastly/src/platform.rs @@ -571,6 +571,10 @@ pub fn build_runtime_services( client_ip: req.get_client_ip_addr(), tls_protocol: req.get_tls_protocol().map(str::to_string), tls_cipher: req.get_tls_cipher_openssl_name().map(str::to_string), + tls_ja4: req.get_tls_ja4().map(str::to_string), + h2_fingerprint: req.get_client_h2_fingerprint().map(str::to_string), + server_hostname: std::env::var("FASTLY_HOSTNAME").ok(), + server_region: std::env::var("FASTLY_REGION").ok(), }) .build() } diff --git a/crates/trusted-server-adapter-fastly/src/route_tests.rs b/crates/trusted-server-adapter-fastly/src/route_tests.rs index 56b202cba..9ea4831a0 100644 --- a/crates/trusted-server-adapter-fastly/src/route_tests.rs +++ b/crates/trusted-server-adapter-fastly/src/route_tests.rs @@ -507,6 +507,10 @@ fn test_runtime_services_with_secret_http_client_and_geo( client_ip: req.get_client_ip_addr(), tls_protocol: req.get_tls_protocol().map(str::to_string), tls_cipher: req.get_tls_cipher_openssl_name().map(str::to_string), + tls_ja4: req.get_tls_ja4().map(str::to_string), + h2_fingerprint: req.get_client_h2_fingerprint().map(str::to_string), + server_hostname: None, + server_region: None, }) .build() } diff --git a/crates/trusted-server-core/Cargo.toml b/crates/trusted-server-core/Cargo.toml index c80b0e6ae..95ef3a035 100644 --- a/crates/trusted-server-core/Cargo.toml +++ b/crates/trusted-server-core/Cargo.toml @@ -45,6 +45,7 @@ toml = { workspace = true } trusted-server-js = { path = "../js" } trusted-server-openrtb = { path = "../openrtb" } url = { workspace = true } +urlencoding = { workspace = true } uuid = { workspace = true } validator = { workspace = true } ed25519-dalek = { workspace = true } @@ -70,7 +71,6 @@ default = [] criterion = { workspace = true } edgezero-core = { workspace = true, features = ["test-utils"] } temp-env = { workspace = true } -urlencoding = { workspace = true } [[bench]] name = "consent_decode" diff --git a/crates/trusted-server-core/src/http_util.rs b/crates/trusted-server-core/src/http_util.rs index be855241c..a7bcd88cb 100644 --- a/crates/trusted-server-core/src/http_util.rs +++ b/crates/trusted-server-core/src/http_util.rs @@ -489,11 +489,7 @@ mod tests { } fn default_client_info() -> ClientInfo { - ClientInfo { - client_ip: None, - tls_protocol: None, - tls_cipher: None, - } + ClientInfo::default() } #[test] @@ -842,9 +838,8 @@ mod tests { fn request_info_https_from_client_info_tls_protocol() { let req = build_request(Method::GET, "https://test.example.com/page"); let client_info = ClientInfo { - client_ip: None, tls_protocol: Some("TLSv1.3".to_string()), - tls_cipher: None, + ..ClientInfo::default() }; let info = RequestInfo::from_request(&req, &client_info); @@ -859,9 +854,8 @@ mod tests { fn request_info_https_from_client_info_tls_cipher() { let req = build_request(Method::GET, "https://test.example.com/page"); let client_info = ClientInfo { - client_ip: None, - tls_protocol: None, tls_cipher: Some("TLS_AES_128_GCM_SHA256".to_string()), + ..ClientInfo::default() }; let info = RequestInfo::from_request(&req, &client_info); diff --git a/crates/trusted-server-core/src/integrations/datadome.rs b/crates/trusted-server-core/src/integrations/datadome.rs index 0dfb9950b..40a7691ad 100644 --- a/crates/trusted-server-core/src/integrations/datadome.rs +++ b/crates/trusted-server-core/src/integrations/datadome.rs @@ -64,18 +64,23 @@ use http::header; use http::{Method, StatusCode}; use regex::Regex; use serde::Deserialize; +use serde_json::Value as JsonValue; use validator::Validate; use crate::error::TrustedServerError; use crate::integrations::{ collect_body_bounded, collect_response_bounded, ensure_integration_backend, AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, - IntegrationEndpoint, IntegrationProxy, IntegrationRegistration, INTEGRATION_MAX_BODY_BYTES, - UPSTREAM_SDK_MAX_RESPONSE_BYTES, + IntegrationEndpoint, IntegrationHeadInjector, IntegrationHtmlContext, IntegrationProxy, + IntegrationRegistration, IntegrationRequestFilter, RequestFilterDecision, RequestFilterInput, + INTEGRATION_MAX_BODY_BYTES, UPSTREAM_SDK_MAX_RESPONSE_BYTES, }; use crate::platform::{PlatformHttpRequest, RuntimeServices}; +use crate::redacted::Redacted; use crate::settings::{IntegrationConfig, Settings}; +mod protection; + const DATADOME_INTEGRATION_ID: &str = "datadome"; /// Regex pattern for matching and rewriting `DataDome` URLs in script content. @@ -127,6 +132,52 @@ pub struct DataDomeConfig { /// Whether to rewrite `DataDome` script URLs in HTML to first-party paths #[serde(default = "default_rewrite_sdk")] pub rewrite_sdk: bool, + + /// Whether to call DataDome Protection API before route matching. + #[serde(default)] + pub enable_protection: bool, + + /// DataDome server-side key used for Protection API validation. + #[serde(default)] + pub server_side_key: Redacted, + + /// Base URL for the DataDome Protection API. + #[serde(default = "default_protection_api_origin")] + #[validate(url)] + pub protection_api_origin: String, + + /// First-byte timeout for Protection API calls, in milliseconds. + #[serde(default = "default_timeout_ms")] + #[validate(range(min = 1, max = 10000))] + pub timeout_ms: u32, + + /// Regex for URLs to exclude from Protection API validation. + #[serde(default = "default_url_pattern_exclusion")] + pub url_pattern_exclusion: String, + + /// Regex for URLs to include in Protection API validation. + #[serde(default)] + pub url_pattern_inclusion: String, + + /// Reserved flag for future GraphQL payload extraction. + #[serde(default)] + pub enable_graphql_support: bool, + + /// DataDome client-side key used for auto-injecting the browser tag. + #[serde(default)] + pub client_side_key: String, + + /// Whether to auto-inject the DataDome browser tag when a client-side key exists. + #[serde(default = "default_inject_client_side_tag")] + pub inject_client_side_tag: bool, + + /// URL used for the injected DataDome browser tag. + #[serde(default = "default_client_side_tag_url")] + pub client_side_tag_url: String, + + /// Options assigned to `window.ddoptions` before loading the browser tag. + #[serde(default = "default_client_side_configuration")] + pub client_side_configuration: JsonValue, } fn default_enabled() -> bool { @@ -149,6 +200,30 @@ fn default_rewrite_sdk() -> bool { true } +fn default_protection_api_origin() -> String { + "https://api-fastly.datadome.co".to_string() +} + +fn default_timeout_ms() -> u32 { + 1500 +} + +fn default_url_pattern_exclusion() -> String { + r"\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$".to_string() +} + +fn default_inject_client_side_tag() -> bool { + true +} + +fn default_client_side_tag_url() -> String { + "/integrations/datadome/tags.js".to_string() +} + +fn default_client_side_configuration() -> JsonValue { + serde_json::json!({ "ajaxListenerPath": true }) +} + impl Default for DataDomeConfig { fn default() -> Self { Self { @@ -157,6 +232,17 @@ impl Default for DataDomeConfig { api_origin: default_api_origin(), cache_ttl_seconds: default_cache_ttl(), rewrite_sdk: default_rewrite_sdk(), + enable_protection: false, + server_side_key: Redacted::default(), + protection_api_origin: default_protection_api_origin(), + timeout_ms: default_timeout_ms(), + url_pattern_exclusion: default_url_pattern_exclusion(), + url_pattern_inclusion: String::new(), + enable_graphql_support: false, + client_side_key: String::new(), + inject_client_side_tag: default_inject_client_side_tag(), + client_side_tag_url: default_client_side_tag_url(), + client_side_configuration: default_client_side_configuration(), } } } @@ -170,11 +256,50 @@ impl IntegrationConfig for DataDomeConfig { /// `DataDome` integration implementation. pub struct DataDomeIntegration { config: DataDomeConfig, + protection_exclusion: Option, + protection_inclusion: Option, } impl DataDomeIntegration { + #[cfg(test)] fn new(config: DataDomeConfig) -> Arc { - Arc::new(Self { config }) + Self::try_new(config).expect("should create DataDome integration") + } + + fn try_new(config: DataDomeConfig) -> Result, Report> { + if config.enable_protection && config.server_side_key.expose().trim().is_empty() { + return Err(Report::new(Self::error( + "server_side_key is required when enable_protection is true", + ))); + } + + if config.enable_graphql_support { + log::warn!("[datadome] enable_graphql_support is reserved and ignored in v1"); + } + + let protection_exclusion = + Self::compile_optional_regex(&config.url_pattern_exclusion, "url_pattern_exclusion")?; + let protection_inclusion = + Self::compile_optional_regex(&config.url_pattern_inclusion, "url_pattern_inclusion")?; + + Ok(Arc::new(Self { + config, + protection_exclusion, + protection_inclusion, + })) + } + + fn compile_optional_regex( + pattern: &str, + name: &str, + ) -> Result, Report> { + if pattern.trim().is_empty() { + return Ok(None); + } + + Regex::new(&format!("(?i:{pattern})")) + .map(Some) + .map_err(|err| Report::new(Self::error(format!("Invalid {name}: {err}")))) } fn error(message: impl Into) -> TrustedServerError { @@ -473,6 +598,55 @@ impl IntegrationProxy for DataDomeIntegration { } } +#[async_trait(?Send)] +impl IntegrationRequestFilter for DataDomeIntegration { + fn integration_id(&self) -> &'static str { + DATADOME_INTEGRATION_ID + } + + async fn filter_request( + &self, + input: RequestFilterInput<'_>, + ) -> Result> { + Ok(self.filter_protection_request(input).await) + } +} + +impl IntegrationHeadInjector for DataDomeIntegration { + fn integration_id(&self) -> &'static str { + DATADOME_INTEGRATION_ID + } + + fn head_inserts(&self, _ctx: &IntegrationHtmlContext<'_>) -> Vec { + if !self.config.inject_client_side_tag || self.config.client_side_key.trim().is_empty() { + return Vec::new(); + } + + let key = serde_json::to_string(&self.config.client_side_key) + .unwrap_or_else(|err| { + log::warn!("[datadome] Failed to serialize client-side key: {err}"); + "\"\"".to_string() + }) + .replace("window.ddjskey={key};window.ddoptions={options};" + )] + } +} + impl IntegrationAttributeRewriter for DataDomeIntegration { fn integration_id(&self) -> &'static str { DATADOME_INTEGRATION_ID @@ -527,7 +701,7 @@ fn build( config.rewrite_sdk ); - Ok(Some(DataDomeIntegration::new(config))) + Ok(Some(DataDomeIntegration::try_new(config)?)) } /// Register the `DataDome` integration with Trusted Server. @@ -543,12 +717,16 @@ pub fn register( return Ok(None); }; - Ok(Some( - IntegrationRegistration::builder(DATADOME_INTEGRATION_ID) - .with_proxy(integration.clone()) - .with_attribute_rewriter(integration) - .build(), - )) + let mut builder = IntegrationRegistration::builder(DATADOME_INTEGRATION_ID) + .with_proxy(integration.clone()) + .with_attribute_rewriter(integration.clone()) + .with_head_injector(integration.clone()); + + if integration.config.enable_protection { + builder = builder.with_request_filter(integration); + } + + Ok(Some(builder.build())) } #[cfg(test)] @@ -566,6 +744,7 @@ mod tests { api_origin: "https://api-js.datadome.co".to_string(), cache_ttl_seconds: 3600, rewrite_sdk: true, + ..DataDomeConfig::default() } } diff --git a/crates/trusted-server-core/src/integrations/datadome/protection.rs b/crates/trusted-server-core/src/integrations/datadome/protection.rs new file mode 100644 index 000000000..914fa2d92 --- /dev/null +++ b/crates/trusted-server-core/src/integrations/datadome/protection.rs @@ -0,0 +1,607 @@ +use std::time::Duration; + +use edgezero_core::body::Body as EdgeBody; +use edgezero_core::http::{request_builder, HeaderMap, HeaderName}; +use error_stack::{Report, ResultExt}; +use http::{header, Method, Request, Response, StatusCode}; +use url::Url; + +use crate::error::TrustedServerError; +use crate::integrations::{ + HeaderMutation, RequestFilterDecision, RequestFilterEffects, RequestFilterInput, +}; +use crate::platform::{PlatformBackendSpec, PlatformHttpRequest, RuntimeServices}; + +use super::DataDomeIntegration; + +const VALIDATE_REQUEST_PATH: &str = "/validate-request"; +const REQUEST_MODULE_NAME: &str = "Trusted-Server-Rust"; +const MODULE_VERSION: &str = env!("CARGO_PKG_VERSION"); +const HEADER_DATADOME_RESPONSE: &str = "x-datadomeresponse"; +const HEADER_DATADOME_REQUEST_HEADERS: &str = "x-datadome-request-headers"; +const HEADER_DATADOME_HEADERS: &str = "x-datadome-headers"; +const HEADER_DATADOME_CLIENT_ID: &str = "x-datadome-clientid"; +const HEADER_DATADOME_X_SET_COOKIE: &str = "x-datadome-x-set-cookie"; +const DATADOME_COOKIE_NAME: &str = "datadome"; + +impl DataDomeIntegration { + pub(super) async fn filter_protection_request( + &self, + input: RequestFilterInput<'_>, + ) -> RequestFilterDecision { + if !self.config.enable_protection || !self.is_request_protected(input.request) { + return RequestFilterDecision::Continue(RequestFilterEffects::default()); + } + + match self.filter_protection_request_inner(input).await { + Ok(decision) => decision, + Err(err) => { + log::warn!("[datadome] Protection API failed open: {err:?}"); + RequestFilterDecision::Continue(RequestFilterEffects::default()) + } + } + } + + async fn filter_protection_request_inner( + &self, + input: RequestFilterInput<'_>, + ) -> Result> { + let api_url = self.protection_validate_url(); + let backend_name = self.ensure_protection_backend(input.services, &api_url)?; + let payload = self.build_protection_payload(&input); + let encoded_body = form_encode(&payload.fields); + + let mut builder = request_builder() + .method(Method::POST.as_str()) + .uri(api_url.as_str()) + .header( + header::CONTENT_TYPE.as_str(), + "application/x-www-form-urlencoded", + ) + .header( + header::CONTENT_LENGTH.as_str(), + encoded_body.len().to_string(), + ); + + if payload.uses_header_client_id { + builder = builder.header(HEADER_DATADOME_X_SET_COOKIE, "true"); + } + + let request = builder + .body(EdgeBody::from(encoded_body)) + .change_context(Self::error( + "Failed to build DataDome Protection API request", + ))?; + + let platform_response = input + .services + .http_client() + .send(PlatformHttpRequest::new(request, backend_name)) + .await + .change_context(Self::error("Failed to call DataDome Protection API"))?; + + Ok(self.classify_protection_response(platform_response.response)) + } + + fn is_request_protected(&self, req: &Request) -> bool { + if req.method() == Method::OPTIONS { + return false; + } + + let path = req.uri().path(); + if is_internal_path(path) { + return false; + } + + let target = format!("{}{}", request_host(req), path); + + if let Some(inclusion) = &self.protection_inclusion { + if !inclusion.is_match(&target) { + return false; + } + } + + if let Some(exclusion) = &self.protection_exclusion { + if exclusion.is_match(&target) { + return false; + } + } + + true + } + + fn protection_validate_url(&self) -> String { + format!( + "{}{}", + self.config.protection_api_origin.trim_end_matches('/'), + VALIDATE_REQUEST_PATH + ) + } + + fn ensure_protection_backend( + &self, + services: &RuntimeServices, + api_url: &str, + ) -> Result> { + let parsed = Url::parse(api_url) + .change_context(Self::error("Invalid DataDome Protection API URL"))?; + let host = parsed + .host_str() + .ok_or_else(|| Report::new(Self::error("Missing DataDome Protection API host")))?; + let spec = PlatformBackendSpec { + scheme: parsed.scheme().to_string(), + host: host.to_string(), + port: parsed.port(), + certificate_check: true, + first_byte_timeout: Duration::from_millis(u64::from(self.config.timeout_ms)), + }; + + services.backend().ensure(&spec).change_context(Self::error( + "Failed to register DataDome Protection API backend", + )) + } + + fn build_protection_payload(&self, input: &RequestFilterInput<'_>) -> ProtectionPayload { + let req = input.request; + let client_info = input.services.client_info(); + let mut fields = Vec::new(); + let header_client_id = header_value(req, HEADER_DATADOME_CLIENT_ID); + let cookie_header = header_value(req, header::COOKIE.as_str()); + let cookie_client_id = parse_cookie_value(&cookie_header, DATADOME_COOKIE_NAME); + let client_id = if header_client_id.is_empty() { + cookie_client_id.unwrap_or_default() + } else { + header_client_id.clone() + }; + + push_field(&mut fields, "Key", self.config.server_side_key.expose()); + push_field( + &mut fields, + "IP", + client_info + .client_ip + .map(|ip| ip.to_string()) + .unwrap_or_default(), + ); + push_header_field(&mut fields, req, "Accept", header::ACCEPT.as_str()); + push_header_field(&mut fields, req, "AcceptCharset", "accept-charset"); + push_header_field( + &mut fields, + req, + "AcceptEncoding", + header::ACCEPT_ENCODING.as_str(), + ); + push_header_field( + &mut fields, + req, + "AcceptLanguage", + header::ACCEPT_LANGUAGE.as_str(), + ); + push_field( + &mut fields, + "AuthorizationLen", + header_value(req, header::AUTHORIZATION.as_str()) + .len() + .to_string(), + ); + push_header_field( + &mut fields, + req, + "CacheControl", + header::CACHE_CONTROL.as_str(), + ); + push_field(&mut fields, "ClientID", client_id); + push_header_field(&mut fields, req, "Connection", header::CONNECTION.as_str()); + push_header_field( + &mut fields, + req, + "ContentType", + header::CONTENT_TYPE.as_str(), + ); + push_field(&mut fields, "CookiesLen", cookie_header.len().to_string()); + push_header_field(&mut fields, req, "From", "from"); + push_field(&mut fields, "HeadersList", headers_list(req)); + push_field(&mut fields, "Host", request_host(req)); + push_field(&mut fields, "Method", req.method().as_str()); + push_field(&mut fields, "ModuleVersion", MODULE_VERSION); + push_header_field(&mut fields, req, "Origin", header::ORIGIN.as_str()); + push_field(&mut fields, "Port", "0"); + push_header_field( + &mut fields, + req, + "PostParamLen", + header::CONTENT_LENGTH.as_str(), + ); + push_header_field(&mut fields, req, "Pragma", header::PRAGMA.as_str()); + push_field( + &mut fields, + "Protocol", + req.uri().scheme_str().unwrap_or_default(), + ); + push_header_field(&mut fields, req, "Referer", header::REFERER.as_str()); + push_field(&mut fields, "Request", request_path_and_query(req)); + push_field(&mut fields, "RequestModuleName", REQUEST_MODULE_NAME); + push_header_field( + &mut fields, + req, + "SecCHDeviceMemory", + "sec-ch-device-memory", + ); + push_header_field(&mut fields, req, "SecCHUA", "sec-ch-ua"); + push_header_field(&mut fields, req, "SecCHUAArch", "sec-ch-ua-arch"); + push_header_field( + &mut fields, + req, + "SecCHUAFullVersionList", + "sec-ch-ua-full-version-list", + ); + push_header_field(&mut fields, req, "SecCHUAMobile", "sec-ch-ua-mobile"); + push_header_field(&mut fields, req, "SecCHUAModel", "sec-ch-ua-model"); + push_header_field(&mut fields, req, "SecCHUAPlatform", "sec-ch-ua-platform"); + push_header_field(&mut fields, req, "SecFetchDest", "sec-fetch-dest"); + push_header_field(&mut fields, req, "SecFetchMode", "sec-fetch-mode"); + push_header_field(&mut fields, req, "SecFetchSite", "sec-fetch-site"); + push_header_field( + &mut fields, + req, + "SecFetchStorageAccess", + "sec-fetch-storage-access", + ); + push_header_field(&mut fields, req, "SecFetchUser", "sec-fetch-user"); + push_field(&mut fields, "ServerHostname", request_host(req)); + push_field( + &mut fields, + "ServerName", + client_info.server_hostname.as_deref().unwrap_or_default(), + ); + push_field( + &mut fields, + "ServerRegion", + client_info.server_region.as_deref().unwrap_or_default(), + ); + push_field( + &mut fields, + "TimeRequest", + chrono::Utc::now().timestamp_micros().to_string(), + ); + push_header_field(&mut fields, req, "TrueClientIP", "true-client-ip"); + push_header_field(&mut fields, req, "UserAgent", header::USER_AGENT.as_str()); + push_header_field(&mut fields, req, "Via", header::VIA.as_str()); + push_header_field(&mut fields, req, "XForwardedForIP", "x-forwarded-for"); + push_header_field(&mut fields, req, "X-Real-IP", "x-real-ip"); + push_header_field(&mut fields, req, "X-Requested-With", "x-requested-with"); + push_field( + &mut fields, + "TlsProtocol", + client_info.tls_protocol.as_deref().unwrap_or_default(), + ); + push_field( + &mut fields, + "TlsCipher", + client_info.tls_cipher.as_deref().unwrap_or_default(), + ); + push_field( + &mut fields, + "JA4", + client_info.tls_ja4.as_deref().unwrap_or_default(), + ); + push_field( + &mut fields, + "H2Fingerprint", + client_info.h2_fingerprint.as_deref().unwrap_or_default(), + ); + + ProtectionPayload { + fields, + uses_header_client_id: !header_client_id.is_empty(), + } + } + + fn classify_protection_response( + &self, + response: edgezero_core::http::Response, + ) -> RequestFilterDecision { + let (parts, body) = response.into_parts(); + let status = parts.status; + let Some(datadome_status) = datadome_response_status(&parts.headers) else { + log::warn!("[datadome] Protection API response missing X-DataDomeResponse"); + return RequestFilterDecision::Continue(RequestFilterEffects::default()); + }; + + if datadome_status != status.as_u16() { + log::warn!( + "[datadome] Protection API status/header mismatch: status={} header={}", + status.as_u16(), + datadome_status + ); + return RequestFilterDecision::Continue(RequestFilterEffects::default()); + } + + let effects = RequestFilterEffects { + request_headers: extract_header_mutations( + &parts.headers, + HEADER_DATADOME_REQUEST_HEADERS, + ), + response_headers: extract_header_mutations(&parts.headers, HEADER_DATADOME_HEADERS), + }; + + if status == StatusCode::OK { + return RequestFilterDecision::Continue(effects); + } + + if matches!(status.as_u16(), 301 | 302 | 401 | 403 | 429) { + if body.is_stream() { + log::warn!("[datadome] Protection API challenge body was streaming; failing open"); + return RequestFilterDecision::Continue(RequestFilterEffects::default()); + } + let body_bytes = body.into_bytes(); + let challenge = Response::builder() + .status(status) + .body(EdgeBody::from(body_bytes.as_ref().to_vec())) + .expect("should build DataDome challenge response"); + return RequestFilterDecision::Respond { + response: challenge, + effects, + }; + } + + log::warn!( + "[datadome] Protection API returned fail-open status {}", + status.as_u16() + ); + RequestFilterDecision::Continue(RequestFilterEffects::default()) + } +} + +struct ProtectionPayload { + fields: Vec<(String, String)>, + uses_header_client_id: bool, +} + +fn is_internal_path(path: &str) -> bool { + path.starts_with("/static/tsjs=") + || path.starts_with("/integrations/") + || path.starts_with("/first-party/") + || path == "/.well-known/trusted-server.json" + || path == "/verify-signature" + || path.starts_with("/admin/") + || path.starts_with("/_ts/admin/") + || path == "/_ts/api/v1/identify" + || path == "/_ts/api/v1/batch-sync" +} + +fn request_host(req: &Request) -> String { + req.headers() + .get(header::HOST) + .and_then(|value| value.to_str().ok()) + .or_else(|| req.uri().host()) + .unwrap_or_default() + .to_string() +} + +fn request_path_and_query(req: &Request) -> String { + req.uri() + .path_and_query() + .map(|path_and_query| path_and_query.as_str().to_string()) + .unwrap_or_else(|| req.uri().path().to_string()) +} + +fn header_value(req: &Request, name: &str) -> String { + req.headers() + .get(name) + .and_then(|value| value.to_str().ok()) + .unwrap_or_default() + .to_string() +} + +fn headers_list(req: &Request) -> String { + req.headers() + .keys() + .map(|name| name.as_str()) + .collect::>() + .join(",") +} + +fn push_header_field( + fields: &mut Vec<(String, String)>, + req: &Request, + field_name: &'static str, + header_name: &str, +) { + push_field(fields, field_name, header_value(req, header_name)); +} + +fn push_field(fields: &mut Vec<(String, String)>, key: &'static str, value: impl ToString) { + let value = value.to_string(); + if value.is_empty() { + return; + } + + fields.push((key.to_string(), truncate_field(key, &value))); +} + +fn form_encode(fields: &[(String, String)]) -> String { + fields + .iter() + .map(|(key, value)| { + format!( + "{}={}", + urlencoding::encode(key), + urlencoding::encode(value) + ) + }) + .collect::>() + .join("&") +} + +fn datadome_response_status(headers: &HeaderMap) -> Option { + headers + .get(HEADER_DATADOME_RESPONSE) + .and_then(|value| value.to_str().ok()) + .and_then(|value| value.parse::().ok()) +} + +fn extract_header_mutations(headers: &HeaderMap, pointer_header: &str) -> Vec { + let mut mutations = Vec::new(); + + for pointer_value in headers.get_all(pointer_header) { + let Ok(pointer_value) = pointer_value.to_str() else { + continue; + }; + + for header_name in pointer_value.split_whitespace() { + if header_name.eq_ignore_ascii_case(HEADER_DATADOME_HEADERS) + || header_name.eq_ignore_ascii_case(HEADER_DATADOME_REQUEST_HEADERS) + || header_name.eq_ignore_ascii_case(HEADER_DATADOME_RESPONSE) + { + continue; + } + + let Ok(parsed_name) = HeaderName::from_bytes(header_name.as_bytes()) else { + log::warn!("[datadome] Ignoring invalid pointer header name: {header_name}"); + continue; + }; + + for value in headers.get_all(&parsed_name) { + let Ok(value) = value.to_str() else { + continue; + }; + if parsed_name + .as_str() + .eq_ignore_ascii_case(header::SET_COOKIE.as_str()) + { + mutations.push(HeaderMutation::append(parsed_name.as_str(), value)); + } else { + mutations.push(HeaderMutation::set(parsed_name.as_str(), value)); + } + } + } + } + + mutations +} + +fn parse_cookie_value(cookie_header: &str, name: &str) -> Option { + for pair in cookie_header.split(';') { + let trimmed = pair.trim(); + let Some((cookie_name, cookie_value)) = trimmed.split_once('=') else { + continue; + }; + if cookie_name == name { + let unquoted = cookie_value.trim_matches('"'); + return Some( + urlencoding::decode(unquoted) + .map(|decoded| decoded.into_owned()) + .unwrap_or_else(|_| unquoted.to_string()), + ); + } + } + + None +} + +fn truncate_field(key: &str, value: &str) -> String { + let limit = field_limit(key); + if limit == 0 { + return value.to_string(); + } + + truncate_utf8(value, limit) +} + +fn field_limit(key: &str) -> i32 { + match key.to_ascii_lowercase().as_str() { + "jsonrpcversion" + | "secchdevicememory" + | "secchuamobile" + | "secfetchstorageaccess" + | "secfetchuser" => 8, + "mcpparamsclientinfoversion" | "mcpprotocolversion" | "secchuaarch" => 16, + "secchuaplatform" | "secfetchdest" | "secfetchmode" => 32, + "contenttype" + | "jsonrpcrequestid" + | "mcpmethod" + | "mcpparamsclientinfoname" + | "mcpparamstoolname" + | "mcpsessionid" + | "secfetchsite" + | "tlscipher" => 64, + "acceptcharset" + | "acceptencoding" + | "cachecontrol" + | "connection" + | "from" + | "graphqloperationname" + | "pragma" + | "secchua" + | "secchuamodel" + | "trueclientip" + | "userid" + | "x-real-ip" + | "x-requested-with" + | "productid" => 128, + "acceptlanguage" | "secchuafullversionlist" | "via" => 256, + "accept" | "clientid" | "headerslist" | "host" | "origin" | "serverhostname" + | "servername" | "signature" | "signatureagent" => 512, + "xforwardedforip" => -512, + "useragent" => 768, + "cookieslist" | "referer" => 1024, + "request" | "signatureinput" => 2048, + _ => 0, + } +} + +fn truncate_utf8(value: &str, limit: i32) -> String { + let max = limit.unsigned_abs() as usize; + if value.len() <= max { + return value.to_string(); + } + + if limit > 0 { + let mut end = 0; + for (idx, ch) in value.char_indices() { + let next = idx + ch.len_utf8(); + if next > max { + break; + } + end = next; + } + value[..end].to_string() + } else { + let mut start = value.len(); + let mut used = 0; + for (idx, ch) in value.char_indices().rev() { + let next = used + ch.len_utf8(); + if next > max { + break; + } + used = next; + start = idx; + } + value[start..].to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_cookie_value_decodes_datadome_cookie() { + let value = parse_cookie_value("a=1; datadome=abc%20123; b=2", "datadome") + .expect("should parse datadome cookie"); + assert_eq!(value, "abc 123"); + } + + #[test] + fn truncate_utf8_preserves_char_boundaries() { + assert_eq!(truncate_utf8("ééé", 4), "éé"); + assert_eq!(truncate_utf8("ééé", -4), "éé"); + } + + #[test] + fn form_encode_url_encodes_values() { + let encoded = form_encode(&[("Key".to_string(), "a b+c".to_string())]); + assert_eq!(encoded, "Key=a%20b%2Bc"); + } +} diff --git a/crates/trusted-server-core/src/integrations/mod.rs b/crates/trusted-server-core/src/integrations/mod.rs index 1f9c79e72..297cf9d05 100644 --- a/crates/trusted-server-core/src/integrations/mod.rs +++ b/crates/trusted-server-core/src/integrations/mod.rs @@ -26,12 +26,14 @@ pub mod sourcepoint; pub mod testlight; pub use registry::{ - AttributeRewriteAction, AttributeRewriteOutcome, IntegrationAttributeContext, - IntegrationAttributeRewriter, IntegrationDocumentState, IntegrationEndpoint, - IntegrationHeadInjector, IntegrationHtmlContext, IntegrationHtmlPostProcessor, - IntegrationMetadata, IntegrationProxy, IntegrationRegistration, IntegrationRegistrationBuilder, - IntegrationRegistry, IntegrationScriptContext, IntegrationScriptRewriter, ProxyDispatchInput, - ScriptRewriteAction, + AttributeRewriteAction, AttributeRewriteOutcome, HeaderMutation, HeaderMutationMode, + IntegrationAttributeContext, IntegrationAttributeRewriter, IntegrationDocumentState, + IntegrationEndpoint, IntegrationHeadInjector, IntegrationHtmlContext, + IntegrationHtmlPostProcessor, IntegrationMetadata, IntegrationProxy, IntegrationRegistration, + IntegrationRegistrationBuilder, IntegrationRegistry, IntegrationRequestFilter, + IntegrationScriptContext, IntegrationScriptRewriter, ProxyDispatchInput, RequestFilterDecision, + RequestFilterEffects, RequestFilterInput, RequestFilterRegistryInput, + RequestFilterRegistryOutcome, ScriptRewriteAction, }; /// Registers or retrieves a platform backend for the given URL. diff --git a/crates/trusted-server-core/src/integrations/registry.rs b/crates/trusted-server-core/src/integrations/registry.rs index 78ff7fa21..6b997afed 100644 --- a/crates/trusted-server-core/src/integrations/registry.rs +++ b/crates/trusted-server-core/src/integrations/registry.rs @@ -323,6 +323,185 @@ pub trait IntegrationProxy: Send + Sync { } } +/// Input passed to integration request filters. +pub struct RequestFilterInput<'a> { + pub settings: &'a Settings, + pub services: &'a RuntimeServices, + pub request: &'a Request, +} + +/// How a header mutation should be applied. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum HeaderMutationMode { + Set, + Append, +} + +/// Header mutation requested by an integration filter. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct HeaderMutation { + pub name: String, + pub value: String, + pub mode: HeaderMutationMode, +} + +impl HeaderMutation { + #[must_use] + pub fn set(name: impl Into, value: impl Into) -> Self { + Self { + name: name.into(), + value: value.into(), + mode: HeaderMutationMode::Set, + } + } + + #[must_use] + pub fn append(name: impl Into, value: impl Into) -> Self { + Self { + name: name.into(), + value: value.into(), + mode: HeaderMutationMode::Append, + } + } +} + +/// Request and response effects returned by request filters. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct RequestFilterEffects { + pub request_headers: Vec, + pub response_headers: Vec, +} + +impl RequestFilterEffects { + fn extend(&mut self, next: Self) { + self.request_headers.extend(next.request_headers); + self.response_headers.extend(next.response_headers); + } + + fn apply_to_request(&self, req: &mut Request) { + for mutation in &self.request_headers { + apply_header_mutation_to_request(req, mutation); + } + } + + pub fn apply_to_response(&self, response: &mut Response) { + for mutation in &self.response_headers { + apply_header_mutation_to_response(response, mutation); + } + } +} + +/// Decision returned by an integration request filter. +pub enum RequestFilterDecision { + Continue(RequestFilterEffects), + Respond { + response: Response, + effects: RequestFilterEffects, + }, +} + +/// Input passed to [`IntegrationRegistry::filter_request`]. +pub struct RequestFilterRegistryInput<'a> { + pub settings: &'a Settings, + pub services: &'a RuntimeServices, + pub req: &'a mut Request, +} + +/// Outcome returned by [`IntegrationRegistry::filter_request`]. +pub enum RequestFilterRegistryOutcome { + Continue(RequestFilterEffects), + Respond { + response: Response, + effects: RequestFilterEffects, + }, +} + +/// Trait for integration-provided pre-routing request filters. +#[async_trait(?Send)] +pub trait IntegrationRequestFilter: Send + Sync { + /// Identifier for logging/diagnostics. + fn integration_id(&self) -> &'static str; + + /// Filter an incoming request before normal route matching. + async fn filter_request( + &self, + input: RequestFilterInput<'_>, + ) -> Result>; +} + +fn is_forbidden_filter_header(name: &str) -> bool { + let lower = name.to_ascii_lowercase(); + matches!( + lower.as_str(), + "connection" + | "keep-alive" + | "proxy-authenticate" + | "proxy-authorization" + | "te" + | "trailer" + | "transfer-encoding" + | "upgrade" + | "content-length" + | "host" + ) || lower.starts_with("x-ts-") +} + +fn apply_header_mutation_to_request(req: &mut Request, mutation: &HeaderMutation) { + if is_forbidden_filter_header(&mutation.name) { + log::warn!( + "Skipping forbidden request-filter header: {}", + mutation.name + ); + return; + } + + let Ok(name) = http::HeaderName::from_bytes(mutation.name.as_bytes()) else { + log::warn!("Skipping invalid request-filter header: {}", mutation.name); + return; + }; + let Ok(value) = http::HeaderValue::from_str(&mutation.value) else { + log::warn!("Skipping invalid request-filter header value: {}", mutation.name); + return; + }; + + match mutation.mode { + HeaderMutationMode::Set => { + req.headers_mut().insert(name, value); + } + HeaderMutationMode::Append => { + req.headers_mut().append(name, value); + } + } +} + +fn apply_header_mutation_to_response(response: &mut Response, mutation: &HeaderMutation) { + if is_forbidden_filter_header(&mutation.name) { + log::warn!( + "Skipping forbidden response-filter header: {}", + mutation.name + ); + return; + } + + let Ok(name) = http::HeaderName::from_bytes(mutation.name.as_bytes()) else { + log::warn!("Skipping invalid response-filter header: {}", mutation.name); + return; + }; + let Ok(value) = http::HeaderValue::from_str(&mutation.value) else { + log::warn!("Skipping invalid response-filter header value: {}", mutation.name); + return; + }; + + match mutation.mode { + HeaderMutationMode::Set => { + response.headers_mut().insert(name, value); + } + HeaderMutationMode::Append => { + response.headers_mut().append(name, value); + } + } +} + /// Trait for integration-provided HTML attribute rewrite hooks. pub trait IntegrationAttributeRewriter: Send + Sync { /// Identifier for logging/diagnostics. @@ -397,6 +576,7 @@ pub struct IntegrationRegistration { pub script_rewriters: Vec>, pub html_post_processors: Vec>, pub head_injectors: Vec>, + pub request_filters: Vec>, } impl IntegrationRegistration { @@ -421,6 +601,7 @@ impl IntegrationRegistrationBuilder { script_rewriters: Vec::new(), html_post_processors: Vec::new(), head_injectors: Vec::new(), + request_filters: Vec::new(), }, } } @@ -461,6 +642,12 @@ impl IntegrationRegistrationBuilder { self } + #[must_use] + pub fn with_request_filter(mut self, filter: Arc) -> Self { + self.registration.request_filters.push(filter); + self + } + /// Mark this integration's JS module for deferred loading via /// ` + +``` -| Option | Type | Default | Description | -| ------------------- | ------- | ---------------------------- | --------------------------------------------------------- | -| `enabled` | boolean | `false` | Enable the DataDome integration | -| `sdk_origin` | string | `https://js.datadome.co` | DataDome SDK origin URL (for tags.js) | -| `api_origin` | string | `https://api-js.datadome.co` | DataDome signal collection API origin URL (for /js/\*) | -| `cache_ttl_seconds` | integer | `3600` | Cache TTL for tags.js (1 hour default) | -| `rewrite_sdk` | boolean | `true` | Rewrite DataDome script URLs in HTML to first-party paths | +If your site already manages the DataDome tag, disable auto-injection: -## Usage +```toml +[integrations.datadome] +inject_client_side_tag = false +``` -### Publisher Page Setup +### Manual setup -Update your page to load DataDome through Trusted Server: +You can also load DataDome manually through the first-party path: ```html ``` -If `rewrite_sdk` is enabled, Trusted Server will automatically rewrite any existing DataDome script tags in your HTML: +If `rewrite_sdk` is enabled, Trusted Server rewrites existing DataDome script tags in HTML: ```html @@ -65,34 +128,94 @@ If `rewrite_sdk` is enabled, Trusted Server will automatically rewrite any exist ``` +## Server-side Protection API + +When `enable_protection = true`, Trusted Server calls DataDome before normal route matching. DataDome can return: + +- **Allow**: continue routing and optionally enrich the upstream request. +- **Challenge**: return the DataDome response directly without contacting the publisher origin. +- **Fail-open condition**: continue routing without DataDome effects when the Protection API times out, returns malformed instructions, or returns an unexpected status. + +`server_side_key` is required when server-side protection is enabled. + +### Protected traffic + +A request is protected when all of the following are true: + +1. The DataDome integration is enabled. +2. `enable_protection = true`. +3. The method is not `OPTIONS`. +4. The path is not one of Trusted Server's internal routes. +5. The `host + pathname` matches `url_pattern_inclusion`, when configured. +6. The `host + pathname` does not match `url_pattern_exclusion`, when configured. + +Static assets are excluded by default using a case-insensitive file-extension regex. Trusted Server internal routes such as `/static/tsjs=`, `/integrations/`, `/first-party/`, admin routes, discovery routes, and signature-verification routes are also excluded by default. + +Auction traffic at `/auction` is protected by default. + +### Header handling + +DataDome can return pointer headers that identify which headers Trusted Server should copy: + +| Pointer header | Applied to | +| ---------------------------- | ------------------------------------------ | +| `X-DataDome-request-headers` | Request forwarded to Trusted Server/origin | +| `X-DataDome-headers` | Final browser response | + +Trusted Server copies only the named headers. Pointer headers themselves are not forwarded. `Set-Cookie` is appended, while other copied headers are set/replaced. Unsafe hop-by-hop, framing, host, and internal `x-ts-*` headers are rejected. + +DataDome downstream response headers are applied after EC response finalization and generic Trusted Server response headers so DataDome challenge/cache/cookie headers win. + +### GraphQL limitation + +`enable_graphql_support` is reserved for future request-body inspection. Trusted Server v1 does not parse GraphQL bodies for DataDome payload enrichment. + ## Endpoints -The integration exposes the following routes: +The first-party layer exposes these routes: | Method | Path | Description | | ---------- | -------------------------------- | --------------------- | | `GET` | `/integrations/datadome/tags.js` | DataDome SDK script | | `GET/POST` | `/integrations/datadome/js/*` | Signal collection API | -## How It Works +## How it works ```mermaid sequenceDiagram participant Browser participant TS as Trusted Server + participant DD as DataDome Protection API participant SDK as js.datadome.co participant API as api-js.datadome.co + participant Origin as Publisher origin + + Browser->>TS: GET /page + TS->>DD: POST /validate-request + alt DataDome allows + DD-->>TS: 200 + header instructions + TS->>Origin: Forward enriched request + Origin-->>TS: Page response + TS-->>Browser: Final response + DataDome headers + else DataDome challenges + DD-->>TS: Challenge response + TS-->>Browser: Challenge response + DataDome headers + else DataDome unavailable + TS->>Origin: Fail open and continue + Origin-->>TS: Page response + TS-->>Browser: Final response + end Browser->>TS: GET /integrations/datadome/tags.js TS->>SDK: GET /tags.js SDK-->>TS: JavaScript SDK Note over TS: Rewrite internal URLs - TS-->>Browser: Modified SDK (first-party URLs) + TS-->>Browser: Modified SDK Browser->>TS: POST /integrations/datadome/js/ TS->>API: POST /js/ @@ -100,14 +223,7 @@ sequenceDiagram TS-->>Browser: Response ``` -### Request Flow - -1. **SDK Loading**: Browser requests `/integrations/datadome/tags.js` -2. **Proxy & Rewrite**: Trusted Server fetches from `js.datadome.co`, rewrites internal URLs to first-party paths -3. **Signal Collection**: SDK sends signals to `/integrations/datadome/js/` -4. **Transparent Proxy**: Trusted Server forwards to `api-js.datadome.co`, returns response - -## Environment Variables +## Environment variables Override configuration via environment variables: @@ -117,11 +233,14 @@ TRUSTED_SERVER__INTEGRATIONS__DATADOME__SDK_ORIGIN=https://js.datadome.co TRUSTED_SERVER__INTEGRATIONS__DATADOME__API_ORIGIN=https://api-js.datadome.co TRUSTED_SERVER__INTEGRATIONS__DATADOME__CACHE_TTL_SECONDS=3600 TRUSTED_SERVER__INTEGRATIONS__DATADOME__REWRITE_SDK=true +TRUSTED_SERVER__INTEGRATIONS__DATADOME__ENABLE_PROTECTION=true +TRUSTED_SERVER__INTEGRATIONS__DATADOME__SERVER_SIDE_KEY=your-server-side-key +TRUSTED_SERVER__INTEGRATIONS__DATADOME__CLIENT_SIDE_KEY=your-client-side-key ``` -## Client-Side Script Guard +## Client-side script guard -For single-page applications (SPAs) and frameworks like Next.js that dynamically insert script tags, the integration includes a client-side guard. When the `datadome` module is included in your tsjs bundle, it automatically intercepts dynamically inserted DataDome scripts and rewrites them to use first-party paths. +For single-page applications and frameworks like Next.js that dynamically insert script tags, the integration includes a client-side guard. When the `datadome` module is included in your TSJS bundle, it intercepts dynamically inserted DataDome scripts and rewrites them to use first-party paths. The guard handles: @@ -129,18 +248,11 @@ The guard handles: - `` elements - `` elements -This ensures DataDome scripts are always loaded through first-party context, even when inserted dynamically by client-side JavaScript. - -## Notes - -- **No Captcha Support**: This integration currently focuses on signal collection. CAPTCHA functionality may require additional configuration. -- **Cache Headers**: The SDK response includes caching headers based on `cache_ttl_seconds`. -- **Origin Headers**: Trusted Server forwards appropriate headers to DataDome for proper request context. -- **URL Rewriting**: Both `js.datadome.co` and `api-js.datadome.co` URLs in the SDK are rewritten to first-party paths. +This keeps DataDome scripts routed through first-party context, even when inserted dynamically by client-side JavaScript. ## Troubleshooting -### Script Not Loading +### Script not loading Check that the integration is enabled: @@ -149,19 +261,34 @@ Check that the integration is enabled: enabled = true ``` -### Signals Not Sending +If you rely on auto-injection, verify `client_side_key` is non-empty and `inject_client_side_tag = true`. + +### Signals not sending Verify that signal collection routes are working: ```bash -curl -X POST https://your-domain.com/integrations/datadome/js/check +curl -X POST https://www.example.com/integrations/datadome/js/check +``` + +### Server-side protection not running + +Check that both fields are configured: + +```toml +[integrations.datadome] +enabled = true +enable_protection = true +server_side_key = "YOUR_DATADOME_SERVER_SIDE_KEY" ``` -### HTML Rewriting Not Working +Also verify the request is not excluded by the default internal/static route exclusions or your custom inclusion/exclusion regexes. + +### HTML rewriting not working Ensure `rewrite_sdk = true` and that your pages are being proxied through Trusted Server's HTML processing pipeline. -## See Also +## See also - [DataDome First-Party Integration Docs](https://docs.datadome.co/docs/integrations#first-party-javascript-tag) - [Integrations Overview](/guide/integrations-overview) diff --git a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md new file mode 100644 index 000000000..c5462db2b --- /dev/null +++ b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md @@ -0,0 +1,747 @@ +# DataDome Server-Side Protection API Integration + +**Issue:** #317 +**Date:** 2026-06-11 +**Status:** In Progress + +## Problem + +Trusted Server already has a DataDome first-party proxy integration for the +client-side JavaScript tag and signal collection API. That layer improves +client-side signal delivery by routing DataDome browser traffic through the +publisher domain, but it does not perform server-side request validation before +requests reach Trusted Server routes or the publisher origin. + +DataDome's Fastly Compute module adds that missing layer by calling the +DataDome Protection API before forwarding traffic. The Protection API returns a +request decision and header-mutation instructions. Trusted Server needs an +implementation of that behavior in Rust that is not tied to DataDome's Fastly +JavaScript SDK. + +## Goals + +- Add a pre-routing integration hook that can block/challenge requests before + origin routing. +- Implement DataDome Protection API validation with fail-open behavior. +- Support DataDome pointer headers: + - upstream request enrichment for allowed requests + - downstream response headers/cookies for allowed and challenged requests +- Protect publisher-origin traffic and auction traffic by default. +- Exclude static assets and Trusted Server internal routes by default. +- Keep the Protection API client logic platform-neutral where possible by using + `RuntimeServices`, `PlatformBackend`, and `PlatformHttpClient`. +- Auto-inject the DataDome client-side tag when a client-side key is configured. +- Preserve the existing DataDome first-party proxy and URL-rewrite behavior. + +## Non-Goals + +- No GraphQL body parsing in the initial implementation. The config can reserve + a flag for it, but request-body inspection is deferred. +- No hard dependency on DataDome's JavaScript Fastly Compute package. +- No new edge-provider-specific behavior in `trusted-server-core` beyond the + existing `fastly::Request` integration surfaces. +- No replay-protection or MCP-specific fields in v1. +- No automatic de-duplication when a publisher already manually loads the + DataDome tag. The explicit `inject_client_side_tag = false` escape hatch is + sufficient for v1. +- No Fastly Secret Store lookup for `server_side_key` in v1. The first + implementation uses redacted `trusted-server.toml` / environment + configuration. + +## Decisions from Design Discussion + +1. **Protection scope:** protect publisher-origin and auction traffic by + default. Default-exclude Trusted Server internal routes and static assets. +2. **Endpoint default:** default to DataDome's Fastly-specific Protection API + endpoint from the official Fastly Compute docs, while allowing override. +3. **Header precedence:** apply DataDome downstream headers last so DataDome + cookies/cache/challenge headers are not overwritten by generic finalization. +4. **GraphQL support:** defer. +5. **Client-side tag:** auto-inject when a client-side key is configured. +6. **Methods:** protect every non-`OPTIONS` method, including `HEAD`, when the + URL is otherwise in scope. +7. **Secret handling:** use redacted `trusted-server.toml` / environment + configuration for `server_side_key` in v1. +8. **Timeout:** use `1500ms` as the default Protection API timeout for v1. +9. **Duplicate tag handling:** do not attempt automatic duplicate-tag + detection in v1; operators can disable injection with + `inject_client_side_tag = false`. + +## Current State + +Implementation branch status as of 2026-06-12: + +- Added the generic integration request-filter model in + `crates/trusted-server-core/src/integrations/registry.rs`. +- Wired the Fastly adapter to run request filters after basic auth and before + route matching in `crates/trusted-server-adapter-fastly/src/main.rs`. +- Added DataDome server-side configuration fields and validation in + `crates/trusted-server-core/src/integrations/datadome.rs`. +- Added the DataDome Protection API helper module at + `crates/trusted-server-core/src/integrations/datadome/protection.rs`. +- Added client-side tag auto-injection through `IntegrationHeadInjector`. +- Extended `ClientInfo` and Fastly runtime services with JA4, H2 fingerprint, + edge hostname, and edge region fields. +- Updated `trusted-server.toml` with the new DataDome configuration fields. +- Updated `docs/guide/integrations/datadome.md` with the first-party, + server-side protection, fail-open, header-enrichment, auto-injection, and + GraphQL-v1 limitation behavior. + +Known remaining work before the PR is ready: + +- Fix formatting and clippy blockers introduced by the implementation. +- Add the spec-driven registry, DataDome config, protection matching, payload, + response classification, and route tests listed in this document. +- Run the full CI gate after fixes: + - `cargo fmt --all -- --check` + - `cargo clippy --workspace --all-targets --all-features -- -D warnings` + - `cargo test --workspace` + - JS/doc checks as applicable + +Verification snapshot: + +- `cargo test --workspace` passed on 2026-06-12 for the current branch state. +- `cargo fmt --all -- --check` failed due to formatting drift. +- `cargo clippy --package trusted-server-core --all-targets --all-features -- -D warnings` + failed due to clippy issues in the new DataDome/request-filter code. + +Baseline DataDome integration before this work: + +- File: `crates/trusted-server-core/src/integrations/datadome.rs` +- Provides: + - `/integrations/datadome/tags.js` SDK proxy + - `/integrations/datadome/js/*` signal collection proxy + - HTML attribute rewriting for DataDome script URLs +- Registered: + - `IntegrationProxy` + - `IntegrationAttributeRewriter` + +Baseline integration registry before this work supported proxies, +attribute/script rewriters, HTML post-processors, and head injectors. It did not +have a pre-routing request-filter hook. + +Baseline Fastly routing flow before this work in +`crates/trusted-server-adapter-fastly/src/main.rs`: + +```text +sanitize forwarded headers +→ extract request context +→ batch-sync special case +→ build EC context +→ enforce basic auth +→ route matching +→ publisher origin fallback +→ EC/final response headers +``` + +The new request filter should run after successful basic auth and before route +matching. + +## Proposed Architecture + +### 1. Request Filter Hook + +Add a new integration hook in +`crates/trusted-server-core/src/integrations/registry.rs`. + +The hook must be richer than `Option` because DataDome can allow a +request while still requiring request and response header mutations. + +Suggested public model: + +```rust +#[async_trait(?Send)] +pub trait IntegrationRequestFilter: Send + Sync { + fn integration_id(&self) -> &'static str; + + async fn filter_request( + &self, + input: RequestFilterInput<'_>, + ) -> Result>; +} + +pub struct RequestFilterInput<'a> { + pub settings: &'a Settings, + pub services: &'a RuntimeServices, + pub request: &'a Request, +} + +pub enum RequestFilterDecision { + Continue(RequestFilterEffects), + Respond { + response: Response, + effects: RequestFilterEffects, + }, +} + +#[derive(Default)] +pub struct RequestFilterEffects { + pub request_headers: Vec, + pub response_headers: Vec, +} + +pub struct HeaderMutation { + pub name: String, + pub value: String, + pub mode: HeaderMutationMode, +} + +pub enum HeaderMutationMode { + Set, + Append, +} +``` + +Important behavior: + +- Filters run in registration order. +- On `Continue`, request header mutations are applied immediately before the + next filter and before route matching. +- Response header mutations are accumulated and applied to the final response. +- On `Respond`, routing short-circuits with that response while preserving any + downstream response header effects that must be applied after finalization. +- DataDome transport/API failures should not bubble out as registry errors; + DataDome should convert them to `Continue(Default::default())` to preserve + fail-open behavior. + +### 2. Registry Integration + +Extend these types: + +- `IntegrationRegistration` +- `IntegrationRegistrationBuilder` +- `IntegrationRegistryInner` +- `IntegrationRegistry` +- `IntegrationMetadata` + +Add builder method: + +```rust +.with_request_filter(integration.clone()) +``` + +Add registry runner, for example: + +```rust +pub async fn filter_request( + &self, + input: RequestFilterRegistryInput<'_>, +) -> Result> +``` + +The registry outcome should contain either an immediate response plus response +header mutations, or a continue decision with accumulated response header +mutations. + +### 3. Fastly Route Hook + +In `route_request()`, run filters after normal basic auth succeeds and before +`path` / `method` are captured for route matching. + +```text +basic auth ok +→ integration_registry.filter_request(...) + → Respond { response, effects }: finalize response, apply DataDome headers last, return + → Continue(effects): request is enriched; route normally; remember response effects +→ route matching +→ EC finalize +→ generic finalize_response +→ apply request-filter response headers last +``` + +Streaming publisher responses need the same treatment before headers are +committed via `stream_to_client()`. + +### 4. Header Mutation Semantics + +DataDome pointer headers are internal instructions and must not be forwarded. +Only headers named by the pointers should be copied. + +| Pointer header | Destination | +| ---------------------------- | -------------------------------------------------- | +| `X-DataDome-request-headers` | Request forwarded to Trusted Server route / origin | +| `X-DataDome-headers` | Response returned to browser | + +Rules: + +- `Set-Cookie` mutations use append mode. +- Other headers use set/replace mode. +- Pointer headers themselves are never forwarded. +- Header mutations must reject hop-by-hop, request-target, body framing, and + Trusted Server internal headers such as `Connection`, `Transfer-Encoding`, + `Content-Length`, `Host`, and `x-ts-*`. +- DataDome downstream headers are applied after `ec_finalize_response()` and + `finalize_response()`. + +## DataDome Protection Design + +### Configuration + +Extend `[integrations.datadome]` with server-side protection and client-side +injection fields. + +```toml +[integrations.datadome] +enabled = false + +# Existing first-party proxy layer +sdk_origin = "https://js.datadome.co" +api_origin = "https://api-js.datadome.co" +cache_ttl_seconds = 3600 +rewrite_sdk = true + +# New server-side protection layer +enable_protection = false +server_side_key = "" +protection_api_origin = "https://api-fastly.datadome.co" +timeout_ms = 1500 +url_pattern_exclusion = "\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$" +url_pattern_inclusion = "" +enable_graphql_support = false + +# New client-side tag injection layer +client_side_key = "" +inject_client_side_tag = true +client_side_tag_url = "/integrations/datadome/tags.js" +client_side_configuration = { ajaxListenerPath = true } +``` + +Notes: + +- `server_side_key` should use `Redacted` in Rust config and be loaded + from redacted TOML/env configuration in v1. +- `server_side_key` is required only when `enable_protection = true`. +- `client_side_key` is optional. Auto-injection emits a tag only when + `inject_client_side_tag = true` and `client_side_key` is non-empty; an empty + key is a valid no-op. +- `protection_api_origin` remains configurable for regional/static endpoint + selection. +- `url_pattern_exclusion` and `url_pattern_inclusion` match `host + pathname`, + not query string, mirroring the official Fastly module behavior. +- Static-asset exclusion should be case-insensitive so uppercase file + extensions such as `.PNG` are skipped. +- `enable_graphql_support` is reserved but should remain unsupported or ignored + with a warning until the deferred body-handling work is implemented. + +### Protection Scope + +A request is protected when: + +1. DataDome integration is enabled. +2. `enable_protection = true`. +3. The method is not `OPTIONS`; all other methods, including `HEAD`, are + eligible for protection. +4. The URL does not match the default internal/static exclusions. +5. If `url_pattern_inclusion` is configured, `host + pathname` matches it. +6. If `url_pattern_exclusion` is configured, `host + pathname` does not match it. + +Default internal exclusions should include: + +- `/static/tsjs=` +- `/integrations/` +- `/first-party/` +- `/.well-known/trusted-server.json` +- `/verify-signature` +- `/admin/` +- `/_ts/admin/` +- `/_ts/api/v1/identify` +- `/_ts/api/v1/batch-sync` +- CORS preflight `OPTIONS` requests + +Auction traffic at `/auction` is intentionally protected by default. + +### Protection API Request + +Add a DataDome protection helper module, either as a nested module in +`datadome.rs` or as: + +`crates/trusted-server-core/src/integrations/datadome/protection.rs` + +Responsibilities: + +1. Decide whether a request should be protected. +2. Build the form-encoded Protection API payload. +3. Send `POST /validate-request` through platform services. +4. Classify the API response. +5. Extract pointer-header mutations. +6. Return a request-filter decision. + +Use platform abstractions for the outbound call: + +- Parse `protection_api_origin` with `url`. +- Build a `PlatformBackendSpec` with `first_byte_timeout = timeout_ms`. +- Resolve/register backend with `RuntimeServices::backend().ensure(...)`. +- Send an `edgezero_core::http::Request` through + `RuntimeServices::http_client().send(...)`. + +Request headers: + +```text +Content-Type: application/x-www-form-urlencoded +Content-Length: +X-DataDome-X-Set-Cookie: true # only when X-DataDome-ClientID is used +``` + +Payload fields should include the core fields from DataDome's official module: + +- `Key` +- `IP` +- `Method` +- `Protocol` +- `Host` +- `ServerHostname` +- `Request` as path plus query +- `RequestModuleName` +- `ModuleVersion` +- `TimeRequest` +- `ClientID` +- `CookiesLen` +- `HeadersList` +- common request headers: + - `Accept` + - `Accept-Charset` + - `Accept-Encoding` + - `Accept-Language` + - `AuthorizationLen` + - `Cache-Control` + - `Connection` + - `Content-Type` + - `From` + - `Origin` + - `PostParamLen` + - `Pragma` + - `Referer` + - `User-Agent` + - `Via` + - `X-Forwarded-For` + - `X-Real-IP` + - `X-Requested-With` + - Sec-CH and Sec-Fetch headers supported by the official module +- TLS/client metadata when available from `RuntimeServices::client_info()` + +`ClientID` source priority: + +1. `X-DataDome-ClientID` request header +2. `datadome` cookie + +When `X-DataDome-ClientID` is used, send +`X-DataDome-X-Set-Cookie: true` to the Protection API. + +Encoding and size rules: + +- URL-encode all values. +- Omit empty fields. +- Apply per-field truncation before encoding. +- Keep the global payload under DataDome's documented limit. + +### Client Metadata + +Current `RuntimeServices::client_info()` exposes: + +- client IP +- TLS protocol +- TLS cipher + +For better DataDome signal quality, extend `ClientInfo` with optional fields +that adapters can populate when available: + +```rust +pub struct ClientInfo { + pub client_ip: Option, + pub tls_protocol: Option, + pub tls_cipher: Option, + pub tls_ja4: Option, + pub h2_fingerprint: Option, + pub server_hostname: Option, + pub server_region: Option, +} +``` + +Fastly can populate `tls_ja4` and `h2_fingerprint` from the request APIs already +used by the JA4/debug device-signal code. Other adapters may leave these fields +empty. + +### Protection API Response + +Before acting on a response, validate that the HTTP status code matches the +`X-DataDomeResponse` header. + +| Status | Meaning | Behavior | +| ------ | --------- | ---------------------------------------------- | +| `200` | Allow | Continue routing with request/response effects | +| `301` | Challenge | Return DataDome response directly | +| `302` | Challenge | Return DataDome response directly | +| `401` | Challenge | Return DataDome response directly | +| `403` | Challenge | Return DataDome response directly | +| `429` | Challenge | Return DataDome response directly | +| other | Fail-open | Continue without effects | + +If status/header mismatch, missing `X-DataDomeResponse`, timeout, network error, +backend error, malformed headers, or any unexpected Protection API behavior: +fail open and continue without effects. + +### Challenge Responses + +For challenge statuses: + +1. Build a response using DataDome's API response status and body. +2. Copy only headers listed in `X-DataDome-headers`. +3. Append `Set-Cookie` values. +4. Do not contact the publisher origin. +5. Still run Trusted Server response finalization, then apply DataDome headers + last. + +### Allowed Requests + +For allow status `200`: + +1. Copy headers listed in `X-DataDome-request-headers` into the request before + Trusted Server route matching. +2. Accumulate headers listed in `X-DataDome-headers` for the final browser + response. +3. Continue normal route matching. +4. Apply accumulated DataDome downstream headers after EC and generic response + finalization. + +## Client-Side Auto-Injection + +Implement `IntegrationHeadInjector` for DataDome when `client_side_key` is +configured and `inject_client_side_tag = true`. + +Injected snippet should run before the TSJS bundle and configure DataDome's +client-side tag: + +```html + + +``` + +Rust implementation requirements: + +- Serialize `client_side_key`, `client_side_configuration`, and + `client_side_tag_url` with `serde_json`. +- Escape ` Date: Fri, 12 Jun 2026 14:28:32 -0500 Subject: [PATCH 2/7] Load DataDome key from secret store --- .../trusted-server-adapter-fastly/src/main.rs | 2 +- .../src/integrations/datadome.rs | 78 ++++++++-- .../src/integrations/datadome/protection.rs | 137 ++++++++++++++++-- .../src/integrations/registry.rs | 8 +- docs/guide/integrations/datadome.md | 48 +++--- ...-datadome-server-side-protection-design.md | 33 +++-- trusted-server.toml | 3 +- 7 files changed, 248 insertions(+), 61 deletions(-) diff --git a/crates/trusted-server-adapter-fastly/src/main.rs b/crates/trusted-server-adapter-fastly/src/main.rs index 7b422d82d..3991849db 100644 --- a/crates/trusted-server-adapter-fastly/src/main.rs +++ b/crates/trusted-server-adapter-fastly/src/main.rs @@ -522,7 +522,7 @@ async fn route_request( Ok(RequestFilterRegistryOutcome::Continue(effects)) => effects, Ok(RequestFilterRegistryOutcome::Respond { response, effects }) => { return Ok(RouteResult { - outcome: HandlerOutcome::Buffered(response), + outcome: HandlerOutcome::Buffered(*response), ec_context, finalize_kv_graph, eids_cookie, diff --git a/crates/trusted-server-core/src/integrations/datadome.rs b/crates/trusted-server-core/src/integrations/datadome.rs index 40a7691ad..cb5508af2 100644 --- a/crates/trusted-server-core/src/integrations/datadome.rs +++ b/crates/trusted-server-core/src/integrations/datadome.rs @@ -76,7 +76,6 @@ use crate::integrations::{ INTEGRATION_MAX_BODY_BYTES, UPSTREAM_SDK_MAX_RESPONSE_BYTES, }; use crate::platform::{PlatformHttpRequest, RuntimeServices}; -use crate::redacted::Redacted; use crate::settings::{IntegrationConfig, Settings}; mod protection; @@ -107,6 +106,7 @@ static DATADOME_URL_PATTERN: LazyLock = LazyLock::new(|| { /// Configuration for `DataDome` integration. #[derive(Debug, Clone, Deserialize, Validate)] +#[serde(deny_unknown_fields)] pub struct DataDomeConfig { /// Enable/disable the integration #[serde(default = "default_enabled")] @@ -133,15 +133,19 @@ pub struct DataDomeConfig { #[serde(default = "default_rewrite_sdk")] pub rewrite_sdk: bool, - /// Whether to call DataDome Protection API before route matching. + /// Whether to call `DataDome` Protection API before route matching. #[serde(default)] pub enable_protection: bool, - /// DataDome server-side key used for Protection API validation. - #[serde(default)] - pub server_side_key: Redacted, + /// Runtime secret store containing the `DataDome` server-side key. + #[serde(default = "default_server_side_key_secret_store")] + pub server_side_key_secret_store: String, + + /// Secret name containing the `DataDome` server-side key. + #[serde(default = "default_server_side_key_secret_name")] + pub server_side_key_secret_name: String, - /// Base URL for the DataDome Protection API. + /// Base URL for the `DataDome` Protection API. #[serde(default = "default_protection_api_origin")] #[validate(url)] pub protection_api_origin: String, @@ -163,15 +167,15 @@ pub struct DataDomeConfig { #[serde(default)] pub enable_graphql_support: bool, - /// DataDome client-side key used for auto-injecting the browser tag. + /// `DataDome` client-side key used for auto-injecting the browser tag. #[serde(default)] pub client_side_key: String, - /// Whether to auto-inject the DataDome browser tag when a client-side key exists. + /// Whether to auto-inject the `DataDome` browser tag when a client-side key exists. #[serde(default = "default_inject_client_side_tag")] pub inject_client_side_tag: bool, - /// URL used for the injected DataDome browser tag. + /// URL used for the injected `DataDome` browser tag. #[serde(default = "default_client_side_tag_url")] pub client_side_tag_url: String, @@ -204,6 +208,14 @@ fn default_protection_api_origin() -> String { "https://api-fastly.datadome.co".to_string() } +fn default_server_side_key_secret_store() -> String { + "datadome".to_string() +} + +fn default_server_side_key_secret_name() -> String { + "server_side_key".to_string() +} + fn default_timeout_ms() -> u32 { 1500 } @@ -233,7 +245,8 @@ impl Default for DataDomeConfig { cache_ttl_seconds: default_cache_ttl(), rewrite_sdk: default_rewrite_sdk(), enable_protection: false, - server_side_key: Redacted::default(), + server_side_key_secret_store: default_server_side_key_secret_store(), + server_side_key_secret_name: default_server_side_key_secret_name(), protection_api_origin: default_protection_api_origin(), timeout_ms: default_timeout_ms(), url_pattern_exclusion: default_url_pattern_exclusion(), @@ -266,10 +279,17 @@ impl DataDomeIntegration { Self::try_new(config).expect("should create DataDome integration") } - fn try_new(config: DataDomeConfig) -> Result, Report> { - if config.enable_protection && config.server_side_key.expose().trim().is_empty() { + fn try_new(mut config: DataDomeConfig) -> Result, Report> { + config.server_side_key_secret_store = + config.server_side_key_secret_store.trim().to_string(); + config.server_side_key_secret_name = config.server_side_key_secret_name.trim().to_string(); + + if config.enable_protection + && (config.server_side_key_secret_store.is_empty() + || config.server_side_key_secret_name.is_empty()) + { return Err(Report::new(Self::error( - "server_side_key is required when enable_protection is true", + "server_side_key_secret_store and server_side_key_secret_name are required when enable_protection is true", ))); } @@ -919,6 +939,38 @@ mod tests { ); } + #[test] + fn protection_enabled_requires_server_side_key_secret_store() { + let mut config = test_config(); + config.enable_protection = true; + config.server_side_key_secret_store = " ".to_string(); + + let err = match DataDomeIntegration::try_new(config) { + Ok(_) => panic!("should reject empty store"), + Err(err) => err, + }; + assert!( + format!("{err:?}").contains("server_side_key_secret_store"), + "should mention secret store config" + ); + } + + #[test] + fn protection_enabled_requires_server_side_key_secret_name() { + let mut config = test_config(); + config.enable_protection = true; + config.server_side_key_secret_name = " ".to_string(); + + let err = match DataDomeIntegration::try_new(config) { + Ok(_) => panic!("should reject empty name"), + Err(err) => err, + }; + assert!( + format!("{err:?}").contains("server_side_key_secret_name"), + "should mention secret name config" + ); + } + #[test] fn extract_host() { assert_eq!( diff --git a/crates/trusted-server-core/src/integrations/datadome/protection.rs b/crates/trusted-server-core/src/integrations/datadome/protection.rs index 914fa2d92..dc64ba6a3 100644 --- a/crates/trusted-server-core/src/integrations/datadome/protection.rs +++ b/crates/trusted-server-core/src/integrations/datadome/protection.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; +use std::sync::{Arc, LazyLock, Mutex}; use std::time::Duration; use edgezero_core::body::Body as EdgeBody; @@ -10,7 +12,8 @@ use crate::error::TrustedServerError; use crate::integrations::{ HeaderMutation, RequestFilterDecision, RequestFilterEffects, RequestFilterInput, }; -use crate::platform::{PlatformBackendSpec, PlatformHttpRequest, RuntimeServices}; +use crate::platform::{PlatformBackendSpec, PlatformHttpRequest, RuntimeServices, StoreName}; +use crate::redacted::Redacted; use super::DataDomeIntegration; @@ -24,6 +27,16 @@ const HEADER_DATADOME_CLIENT_ID: &str = "x-datadome-clientid"; const HEADER_DATADOME_X_SET_COOKIE: &str = "x-datadome-x-set-cookie"; const DATADOME_COOKIE_NAME: &str = "datadome"; +#[derive(Debug, Clone, Eq, Hash, PartialEq)] +struct DataDomeServerSideKeyCacheKey { + secret_store: String, + secret_name: String, +} + +static DATADOME_SERVER_SIDE_KEY_CACHE: LazyLock< + Mutex>>>, +> = LazyLock::new(|| Mutex::new(HashMap::new())); + impl DataDomeIntegration { pub(super) async fn filter_protection_request( &self, @@ -48,7 +61,8 @@ impl DataDomeIntegration { ) -> Result> { let api_url = self.protection_validate_url(); let backend_name = self.ensure_protection_backend(input.services, &api_url)?; - let payload = self.build_protection_payload(&input); + let server_side_key = self.load_server_side_key(input.services)?; + let payload = self.build_protection_payload(&input, server_side_key.as_ref()); let encoded_body = form_encode(&payload.fields); let mut builder = request_builder() @@ -141,7 +155,46 @@ impl DataDomeIntegration { )) } - fn build_protection_payload(&self, input: &RequestFilterInput<'_>) -> ProtectionPayload { + fn load_server_side_key( + &self, + services: &RuntimeServices, + ) -> Result>, Report> { + let cache_key = server_side_key_cache_key(self); + if let Some(key) = DATADOME_SERVER_SIDE_KEY_CACHE + .lock() + .expect("should lock DataDome server-side key cache") + .get(&cache_key) + .cloned() + { + return Ok(key); + } + + let store_name = StoreName::from(self.config.server_side_key_secret_store.as_str()); + let key = services + .secret_store() + .get_string(&store_name, &self.config.server_side_key_secret_name) + .change_context(Self::error( + "Failed to read DataDome server-side key from secret store", + ))?; + let key = key.trim().to_string(); + if key.is_empty() { + return Err(Report::new(Self::error( + "DataDome server-side key secret must not be empty", + ))); + } + + let key = Arc::new(Redacted::new(key)); + let mut cache = DATADOME_SERVER_SIDE_KEY_CACHE + .lock() + .expect("should lock DataDome server-side key cache"); + Ok(Arc::clone(cache.entry(cache_key).or_insert(key))) + } + + fn build_protection_payload( + &self, + input: &RequestFilterInput<'_>, + server_side_key: &Redacted, + ) -> ProtectionPayload { let req = input.request; let client_info = input.services.client_info(); let mut fields = Vec::new(); @@ -154,7 +207,7 @@ impl DataDomeIntegration { header_client_id.clone() }; - push_field(&mut fields, "Key", self.config.server_side_key.expose()); + push_field(&mut fields, "Key", server_side_key.expose()); push_field( &mut fields, "IP", @@ -340,7 +393,7 @@ impl DataDomeIntegration { .body(EdgeBody::from(body_bytes.as_ref().to_vec())) .expect("should build DataDome challenge response"); return RequestFilterDecision::Respond { - response: challenge, + response: Box::new(challenge), effects, }; } @@ -358,6 +411,21 @@ struct ProtectionPayload { uses_header_client_id: bool, } +fn server_side_key_cache_key(integration: &DataDomeIntegration) -> DataDomeServerSideKeyCacheKey { + DataDomeServerSideKeyCacheKey { + secret_store: integration.config.server_side_key_secret_store.clone(), + secret_name: integration.config.server_side_key_secret_name.clone(), + } +} + +#[cfg(test)] +fn clear_datadome_server_side_key_cache_for_tests() { + DATADOME_SERVER_SIDE_KEY_CACHE + .lock() + .expect("should lock DataDome server-side key cache") + .clear(); +} + fn is_internal_path(path: &str) -> bool { path.starts_with("/static/tsjs=") || path.starts_with("/integrations/") @@ -411,13 +479,13 @@ fn push_header_field( push_field(fields, field_name, header_value(req, header_name)); } -fn push_field(fields: &mut Vec<(String, String)>, key: &'static str, value: impl ToString) { - let value = value.to_string(); +fn push_field(fields: &mut Vec<(String, String)>, key: &'static str, value: impl AsRef) { + let value = value.as_ref(); if value.is_empty() { return; } - fields.push((key.to_string(), truncate_field(key, &value))); + fields.push((key.to_string(), truncate_field(key, value))); } fn form_encode(fields: &[(String, String)]) -> String { @@ -491,7 +559,7 @@ fn parse_cookie_value(cookie_header: &str, name: &str) -> Option { let unquoted = cookie_value.trim_matches('"'); return Some( urlencoding::decode(unquoted) - .map(|decoded| decoded.into_owned()) + .map(std::borrow::Cow::into_owned) .unwrap_or_else(|_| unquoted.to_string()), ); } @@ -584,8 +652,59 @@ fn truncate_utf8(value: &str, limit: i32) -> String { #[cfg(test)] mod tests { + use std::collections::HashMap; + + use crate::integrations::datadome::DataDomeConfig; + use crate::platform::test_support::{ + build_services_with_config_and_secret, HashMapSecretStore, NoopConfigStore, NoopSecretStore, + }; + use super::*; + fn protection_integration() -> Arc { + let config = DataDomeConfig { + enabled: true, + enable_protection: true, + ..DataDomeConfig::default() + }; + DataDomeIntegration::try_new(config).expect("should create integration") + } + + #[test] + fn load_server_side_key_reads_secret_store() { + clear_datadome_server_side_key_cache_for_tests(); + let mut secrets = HashMap::new(); + secrets.insert("server_side_key".to_string(), b"secret-from-store".to_vec()); + let services = build_services_with_config_and_secret( + NoopConfigStore, + HashMapSecretStore::new(secrets), + ); + let integration = protection_integration(); + + let key = integration + .load_server_side_key(&services) + .expect("should load server-side key"); + + assert_eq!(key.expose(), "secret-from-store"); + } + + #[test] + fn load_server_side_key_errors_when_secret_missing() { + clear_datadome_server_side_key_cache_for_tests(); + let services = build_services_with_config_and_secret(NoopConfigStore, NoopSecretStore); + let config = DataDomeConfig { + enabled: true, + enable_protection: true, + server_side_key_secret_name: "missing_server_side_key".to_string(), + ..DataDomeConfig::default() + }; + let integration = DataDomeIntegration::try_new(config).expect("should create integration"); + + let result = integration.load_server_side_key(&services); + + assert!(result.is_err(), "should error when secret is missing"); + } + #[test] fn parse_cookie_value_decodes_datadome_cookie() { let value = parse_cookie_value("a=1; datadome=abc%20123; b=2", "datadome") diff --git a/crates/trusted-server-core/src/integrations/registry.rs b/crates/trusted-server-core/src/integrations/registry.rs index 6b997afed..ce61c0daf 100644 --- a/crates/trusted-server-core/src/integrations/registry.rs +++ b/crates/trusted-server-core/src/integrations/registry.rs @@ -395,7 +395,7 @@ impl RequestFilterEffects { pub enum RequestFilterDecision { Continue(RequestFilterEffects), Respond { - response: Response, + response: Box>, effects: RequestFilterEffects, }, } @@ -411,7 +411,7 @@ pub struct RequestFilterRegistryInput<'a> { pub enum RequestFilterRegistryOutcome { Continue(RequestFilterEffects), Respond { - response: Response, + response: Box>, effects: RequestFilterEffects, }, } @@ -865,6 +865,10 @@ impl IntegrationRegistry { /// Request header mutations are applied immediately so later filters and /// route handlers observe enriched headers. Response mutations are returned /// to the adapter so it can apply them after normal response finalization. + /// + /// # Errors + /// + /// Returns an error when an integration request filter returns an error. pub async fn filter_request( &self, input: RequestFilterRegistryInput<'_>, diff --git a/docs/guide/integrations/datadome.md b/docs/guide/integrations/datadome.md index 2dbfa15e4..3115069ed 100644 --- a/docs/guide/integrations/datadome.md +++ b/docs/guide/integrations/datadome.md @@ -43,7 +43,8 @@ rewrite_sdk = true # Server-side Protection API layer enable_protection = false -server_side_key = "" +server_side_key_secret_store = "datadome" +server_side_key_secret_name = "server_side_key" protection_api_origin = "https://api-fastly.datadome.co" timeout_ms = 1500 url_pattern_exclusion = "\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$" @@ -59,24 +60,25 @@ client_side_configuration = { ajaxListenerPath = true } ### Configuration options -| Option | Type | Default | Description | -| --------------------------- | ------- | -------------------------------- | --------------------------------------------------------------------------------------- | -| `enabled` | boolean | `false` | Enable the DataDome integration | -| `sdk_origin` | string | `https://js.datadome.co` | DataDome SDK origin URL for `tags.js` | -| `api_origin` | string | `https://api-js.datadome.co` | DataDome signal collection API origin URL for `/js/*` | -| `cache_ttl_seconds` | integer | `3600` | Cache TTL for `tags.js` | -| `rewrite_sdk` | boolean | `true` | Rewrite DataDome script URLs in HTML to first-party paths | -| `enable_protection` | boolean | `false` | Call the Protection API before route matching | -| `server_side_key` | string | `""` | DataDome server-side key; required when `enable_protection = true` | -| `protection_api_origin` | string | `https://api-fastly.datadome.co` | Protection API origin | -| `timeout_ms` | integer | `1500` | Dynamic backend first-byte timeout for Protection API calls | -| `url_pattern_exclusion` | string | Static asset extension regex | Case-insensitive regex matched against `host + pathname` to skip protection | -| `url_pattern_inclusion` | string | `""` | Optional case-insensitive regex matched against `host + pathname` to include protection | -| `enable_graphql_support` | boolean | `false` | Reserved for future GraphQL body inspection; ignored in v1 | -| `client_side_key` | string | `""` | DataDome client-side JavaScript key used for tag injection | -| `inject_client_side_tag` | boolean | `true` | Auto-inject the browser tag when `client_side_key` is non-empty | -| `client_side_tag_url` | string | `/integrations/datadome/tags.js` | Script URL used by auto-injection | -| `client_side_configuration` | object | `{ ajaxListenerPath = true }` | Options assigned to `window.ddoptions` | +| Option | Type | Default | Description | +| ------------------------------ | ------- | -------------------------------- | --------------------------------------------------------------------------------------- | +| `enabled` | boolean | `false` | Enable the DataDome integration | +| `sdk_origin` | string | `https://js.datadome.co` | DataDome SDK origin URL for `tags.js` | +| `api_origin` | string | `https://api-js.datadome.co` | DataDome signal collection API origin URL for `/js/*` | +| `cache_ttl_seconds` | integer | `3600` | Cache TTL for `tags.js` | +| `rewrite_sdk` | boolean | `true` | Rewrite DataDome script URLs in HTML to first-party paths | +| `enable_protection` | boolean | `false` | Call the Protection API before route matching | +| `server_side_key_secret_store` | string | `datadome` | Runtime secret store containing the DataDome server-side key | +| `server_side_key_secret_name` | string | `server_side_key` | Secret name containing the DataDome server-side key | +| `protection_api_origin` | string | `https://api-fastly.datadome.co` | Protection API origin | +| `timeout_ms` | integer | `1500` | Dynamic backend first-byte timeout for Protection API calls | +| `url_pattern_exclusion` | string | Static asset extension regex | Case-insensitive regex matched against `host + pathname` to skip protection | +| `url_pattern_inclusion` | string | `""` | Optional case-insensitive regex matched against `host + pathname` to include protection | +| `enable_graphql_support` | boolean | `false` | Reserved for future GraphQL body inspection; ignored in v1 | +| `client_side_key` | string | `""` | DataDome client-side JavaScript key used for tag injection | +| `inject_client_side_tag` | boolean | `true` | Auto-inject the browser tag when `client_side_key` is non-empty | +| `client_side_tag_url` | string | `/integrations/datadome/tags.js` | Script URL used by auto-injection | +| `client_side_configuration` | object | `{ ajaxListenerPath = true }` | Options assigned to `window.ddoptions` | ## Client-side setup @@ -141,7 +143,7 @@ When `enable_protection = true`, Trusted Server calls DataDome before normal rou - **Challenge**: return the DataDome response directly without contacting the publisher origin. - **Fail-open condition**: continue routing without DataDome effects when the Protection API times out, returns malformed instructions, or returns an unexpected status. -`server_side_key` is required when server-side protection is enabled. +The configured `server_side_key_secret_store` and `server_side_key_secret_name` must resolve to a non-empty secret when server-side protection is enabled. If the secret cannot be read, DataDome protection fails open for that request. ### Protected traffic @@ -234,7 +236,8 @@ TRUSTED_SERVER__INTEGRATIONS__DATADOME__API_ORIGIN=https://api-js.datadome.co TRUSTED_SERVER__INTEGRATIONS__DATADOME__CACHE_TTL_SECONDS=3600 TRUSTED_SERVER__INTEGRATIONS__DATADOME__REWRITE_SDK=true TRUSTED_SERVER__INTEGRATIONS__DATADOME__ENABLE_PROTECTION=true -TRUSTED_SERVER__INTEGRATIONS__DATADOME__SERVER_SIDE_KEY=your-server-side-key +TRUSTED_SERVER__INTEGRATIONS__DATADOME__SERVER_SIDE_KEY_SECRET_STORE=datadome +TRUSTED_SERVER__INTEGRATIONS__DATADOME__SERVER_SIDE_KEY_SECRET_NAME=server_side_key TRUSTED_SERVER__INTEGRATIONS__DATADOME__CLIENT_SIDE_KEY=your-client-side-key ``` @@ -279,7 +282,8 @@ Check that both fields are configured: [integrations.datadome] enabled = true enable_protection = true -server_side_key = "YOUR_DATADOME_SERVER_SIDE_KEY" +server_side_key_secret_store = "datadome" +server_side_key_secret_name = "server_side_key" ``` Also verify the request is not excluded by the default internal/static route exclusions or your custom inclusion/exclusion regexes. diff --git a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md index c5462db2b..2c9636572 100644 --- a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md +++ b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md @@ -44,9 +44,9 @@ JavaScript SDK. - No automatic de-duplication when a publisher already manually loads the DataDome tag. The explicit `inject_client_side_tag = false` escape hatch is sufficient for v1. -- No Fastly Secret Store lookup for `server_side_key` in v1. The first - implementation uses redacted `trusted-server.toml` / environment - configuration. +- No literal DataDome server-side secret value in `trusted-server.toml`. + Operators configure the runtime secret store and secret name, and the key is + read from Secret Store at request time with process-local caching. ## Decisions from Design Discussion @@ -60,8 +60,9 @@ JavaScript SDK. 5. **Client-side tag:** auto-inject when a client-side key is configured. 6. **Methods:** protect every non-`OPTIONS` method, including `HEAD`, when the URL is otherwise in scope. -7. **Secret handling:** use redacted `trusted-server.toml` / environment - configuration for `server_side_key` in v1. +7. **Secret handling:** read the DataDome server-side key from runtime Secret + Store using configured store/name fields. Do not store the literal key in + `trusted-server.toml`. 8. **Timeout:** use `1500ms` as the default Protection API timeout for v1. 9. **Duplicate tag handling:** do not attempt automatic duplicate-tag detection in v1; operators can disable injection with @@ -292,7 +293,8 @@ rewrite_sdk = true # New server-side protection layer enable_protection = false -server_side_key = "" +server_side_key_secret_store = "datadome" +server_side_key_secret_name = "server_side_key" protection_api_origin = "https://api-fastly.datadome.co" timeout_ms = 1500 url_pattern_exclusion = "\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$" @@ -308,9 +310,13 @@ client_side_configuration = { ajaxListenerPath = true } Notes: -- `server_side_key` should use `Redacted` in Rust config and be loaded - from redacted TOML/env configuration in v1. -- `server_side_key` is required only when `enable_protection = true`. +- The literal server-side key is not stored in Rust config. Rust config stores + only `server_side_key_secret_store` and `server_side_key_secret_name`. +- `server_side_key_secret_store` and `server_side_key_secret_name` are required + only when `enable_protection = true`. +- The DataDome server-side key is read from Secret Store through + `RuntimeServices::secret_store()` and cached per process by configured + store/name. - `client_side_key` is optional. Auto-injection emits a tag only when `inject_client_side_tag = true` and `client_side_key` is non-empty; an empty key is a valid no-op. @@ -628,8 +634,9 @@ Update after implementation to describe: ### DataDome Config Tests - existing first-party proxy config still parses -- protection disabled does not require `server_side_key` -- protection enabled requires `server_side_key` +- protection disabled does not require server-side key secret store/name fields +- protection enabled requires non-empty server-side key secret store/name fields +- protection fails open when the configured server-side key secret cannot be read - invalid regex fails startup - injection disabled allows empty `client_side_key` - injection enabled with empty `client_side_key` emits no head insert and does @@ -714,8 +721,8 @@ passes. 1. DataDome protection applies to all non-`OPTIONS` HTTP methods, including `HEAD`, when the URL is otherwise in scope. -2. `server_side_key` uses redacted `trusted-server.toml` / environment - configuration in v1. Fastly Secret Store lookup is deferred. +2. The DataDome server-side key is loaded from runtime Secret Store in v1. The + config contains only the secret store and secret name. 3. The default Protection API timeout is `1500ms` for v1. 4. Auto-injection does not attempt duplicate-tag detection in v1. The explicit `inject_client_side_tag = false` escape hatch is sufficient. diff --git a/trusted-server.toml b/trusted-server.toml index 66f54e7d0..a873be644 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -148,7 +148,8 @@ rewrite_sdk = true # Server-side Protection API validation (fails open on timeout/error) enable_protection = false -server_side_key = "" +server_side_key_secret_store = "datadome" +server_side_key_secret_name = "server_side_key" protection_api_origin = "https://api-fastly.datadome.co" timeout_ms = 1500 url_pattern_exclusion = "\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$" From 931837e8a290282c03c186ab028e8871f99c0571 Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 12 Jun 2026 14:48:24 -0500 Subject: [PATCH 3/7] update defaults in trusted-server.toml --- trusted-server.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trusted-server.toml b/trusted-server.toml index a873be644..f692654b6 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -148,8 +148,8 @@ rewrite_sdk = true # Server-side Protection API validation (fails open on timeout/error) enable_protection = false -server_side_key_secret_store = "datadome" -server_side_key_secret_name = "server_side_key" +server_side_key_secret_store = "ts_secrets" +server_side_key_secret_name = "datadome_server_side_key" protection_api_origin = "https://api-fastly.datadome.co" timeout_ms = 1500 url_pattern_exclusion = "\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$" From c3ea80f214c29b45139e2a58443f3040972b3168 Mon Sep 17 00:00:00 2001 From: Christian Date: Fri, 12 Jun 2026 14:53:40 -0500 Subject: [PATCH 4/7] Harden DataDome protection coverage --- .../src/route_tests.rs | 267 +++++++++++++++++- .../src/integrations/datadome.rs | 162 ++++++++++- .../src/integrations/datadome/protection.rs | 25 ++ .../src/integrations/registry.rs | 81 ++++++ ...-datadome-server-side-protection-design.md | 50 ++-- 5 files changed, 551 insertions(+), 34 deletions(-) diff --git a/crates/trusted-server-adapter-fastly/src/route_tests.rs b/crates/trusted-server-adapter-fastly/src/route_tests.rs index 9ea4831a0..5b4ee79e1 100644 --- a/crates/trusted-server-adapter-fastly/src/route_tests.rs +++ b/crates/trusted-server-adapter-fastly/src/route_tests.rs @@ -141,6 +141,7 @@ struct RecordingHttpClient { calls: Mutex>, response_status: StatusCode, response_headers: Vec<(String, String)>, + response_body: Vec, } struct StreamingRecordingHttpClient { @@ -153,6 +154,7 @@ impl RecordingHttpClient { calls: Mutex::new(Vec::new()), response_status, response_headers: Vec::new(), + response_body: Vec::new(), } } @@ -166,6 +168,11 @@ impl RecordingHttpClient { .collect(); self } + + fn with_response_body(mut self, body: impl Into>) -> Self { + self.response_body = body.into(); + self + } } impl StreamingRecordingHttpClient { @@ -216,7 +223,7 @@ impl PlatformHttpClient for RecordingHttpClient { builder = builder.header(name, value); } let edge_response = builder - .body(EdgeBody::from(Vec::new())) + .body(EdgeBody::from(self.response_body.clone())) .map_err(|_| Report::new(PlatformError::HttpClient))?; Ok(PlatformResponse::new(edge_response)) @@ -445,6 +452,41 @@ fn create_auction_test_settings(providers: &str) -> Settings { Settings::from_toml(&config).expect("should parse adapter auction route test settings") } +fn datadome_protection_toml() -> &'static str { + r#" + [integrations.datadome] + enabled = true + enable_protection = true + server_side_key_secret_store = "datadome" + server_side_key_secret_name = "server_side_key" + "# +} + +fn create_datadome_auction_test_settings(providers: &str) -> Settings { + let base = base_route_settings_toml(); + let datadome = datadome_protection_toml(); + let config = format!( + r#"{base} + +{datadome} + + [auction] + enabled = true + providers = {providers} + timeout_ms = 2000 + "#, + ); + + Settings::from_toml(&config).expect("should parse DataDome route test settings") +} + +fn datadome_secret_store() -> Arc { + Arc::new(HashMapSecretStore::new(HashMap::from([( + "server_side_key".to_string(), + b"datadome-server-side-key".to_vec(), + )]))) +} + fn build_route_stack(settings: &Settings) -> (AuctionOrchestrator, IntegrationRegistry) { let orchestrator = build_orchestrator(settings).expect("should build auction orchestrator"); let integration_registry = @@ -641,6 +683,229 @@ fn valid_banner_ad_unit_body() -> Vec { .expect("should serialize valid auction route test body") } +#[test] +fn datadome_challenge_short_circuits_before_publisher_origin() { + let settings = create_datadome_auction_test_settings("[]"); + let (orchestrator, integration_registry) = build_route_stack(&settings); + let req = Request::get("https://test.com/protected-page"); + let http_client = Arc::new( + RecordingHttpClient::new(StatusCode::FORBIDDEN) + .with_response_headers(vec![ + ("x-datadomeresponse", "403"), + ("x-datadome-headers", "Set-Cookie X-DD-B"), + ("set-cookie", "datadome=challenge; Path=/; HttpOnly"), + ("x-dd-b", "1"), + ]) + .with_response_body(b"blocked by datadome".to_vec()), + ); + let services = test_runtime_services_with_secret_and_http_client( + &req, + Arc::new(FixedBackend), + datadome_secret_store(), + Arc::clone(&http_client) as Arc, + ); + + let mut response = route_buffered_response( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + "should route DataDome challenge response", + ); + + assert_eq!( + response.get_status(), + StatusCode::FORBIDDEN, + "should return the DataDome challenge status instead of contacting publisher origin" + ); + assert_eq!( + response.get_header_str("x-dd-b"), + Some("1"), + "should apply DataDome downstream challenge headers" + ); + assert_eq!( + response.get_header_str(header::SET_COOKIE), + Some("datadome=challenge; Path=/; HttpOnly"), + "should append the DataDome challenge cookie" + ); + assert_eq!( + response.take_body_str(), + "blocked by datadome", + "should return the DataDome challenge body" + ); + + let calls = http_client + .calls + .lock() + .expect("should lock recorded calls"); + assert_eq!(calls.len(), 1, "should call only the Protection API"); + assert_eq!(calls[0].method, Method::POST, "should POST to DataDome"); + assert_eq!( + calls[0].uri, "https://api-fastly.datadome.co/validate-request", + "should call the default DataDome Protection API endpoint" + ); +} + +#[test] +fn datadome_allow_applies_downstream_headers_and_protects_auction() { + let settings = create_datadome_auction_test_settings("[]"); + let (orchestrator, integration_registry) = build_route_stack(&settings); + let req = Request::post("https://test.com/auction") + .with_header(header::CONTENT_TYPE, "application/json") + .with_body(valid_banner_ad_unit_body()); + let http_client = Arc::new( + RecordingHttpClient::new(StatusCode::OK).with_response_headers(vec![ + ("x-datadomeresponse", "200"), + ("x-datadome-headers", "Set-Cookie X-DD-B"), + ("set-cookie", "datadome=allow; Path=/; HttpOnly"), + ("x-dd-b", "allowed"), + ]), + ); + let services = test_runtime_services_with_secret_and_http_client( + &req, + Arc::new(FixedBackend), + datadome_secret_store(), + Arc::clone(&http_client) as Arc, + ); + + let response = route_buffered_response( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + "should route DataDome-allowed auction request", + ); + + assert_eq!( + response.get_status(), + StatusCode::BAD_GATEWAY, + "empty-provider auction should still run after DataDome allows the request" + ); + assert_eq!( + response.get_header_str("x-dd-b"), + Some("allowed"), + "should apply DataDome downstream headers after route finalization" + ); + assert_eq!( + response.get_header_str(header::SET_COOKIE), + Some("datadome=allow; Path=/; HttpOnly"), + "should preserve DataDome downstream Set-Cookie on allowed requests" + ); + + let calls = http_client + .calls + .lock() + .expect("should lock recorded calls"); + assert_eq!( + calls.len(), + 1, + "should protect /auction through DataDome by default" + ); + assert_eq!(calls[0].method, Method::POST, "should POST to DataDome"); +} + +#[test] +fn datadome_api_error_fails_open_before_routing() { + let settings = create_datadome_auction_test_settings("[]"); + let (orchestrator, integration_registry) = build_route_stack(&settings); + let req = Request::post("https://test.com/auction") + .with_header(header::CONTENT_TYPE, "application/json") + .with_body(b"{not-json".to_vec()); + let services = test_runtime_services_with_secret_and_http_client( + &req, + Arc::new(FixedBackend), + datadome_secret_store(), + Arc::new(NoopHttpClient) as Arc, + ); + + let response = route_buffered_response( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + "should fail open when DataDome API call fails", + ); + + assert_eq!( + response.get_status(), + StatusCode::BAD_REQUEST, + "malformed auction JSON should be handled by the route after DataDome fails open" + ); + assert_eq!( + response.get_header_str("x-dd-b"), + None, + "should not apply DataDome headers when the Protection API call fails" + ); +} + +#[test] +fn datadome_skips_internal_and_static_asset_routes_by_default() { + let mut settings = create_datadome_auction_test_settings("[]"); + settings.publisher.origin_url = "https://".to_string(); + let (orchestrator, integration_registry) = build_route_stack(&settings); + let http_client = Arc::new( + RecordingHttpClient::new(StatusCode::OK).with_response_headers(vec![ + ("x-datadomeresponse", "200"), + ("x-datadome-headers", "X-DD-B"), + ("x-dd-b", "should-not-apply"), + ]), + ); + + let discovery_req = Request::get("https://test.com/.well-known/trusted-server.json"); + let discovery_services = test_runtime_services_with_secret_and_http_client( + &discovery_req, + Arc::new(FixedBackend), + datadome_secret_store(), + Arc::clone(&http_client) as Arc, + ); + let discovery_response = route_buffered_response( + &settings, + &orchestrator, + &integration_registry, + &discovery_services, + discovery_req, + "should route internal discovery request without DataDome", + ); + assert_eq!( + discovery_response.get_status(), + StatusCode::OK, + "discovery endpoint should stay internal" + ); + + let image_req = Request::get("https://test.com/logo.png"); + let image_services = test_runtime_services_with_secret_and_http_client( + &image_req, + Arc::new(FixedBackend), + datadome_secret_store(), + Arc::clone(&http_client) as Arc, + ); + let image_response = route_buffered_response( + &settings, + &orchestrator, + &integration_registry, + &image_services, + image_req, + "should route static asset request without DataDome", + ); + assert_eq!( + image_response.get_status(), + StatusCode::BAD_GATEWAY, + "static asset should skip DataDome then fail at the intentionally invalid publisher origin" + ); + + let calls = http_client + .calls + .lock() + .expect("should lock recorded calls"); + assert!( + calls.is_empty(), + "should not call DataDome for internal routes or default-excluded static assets" + ); +} + #[test] fn routes_use_request_local_consent() { let settings = create_test_settings(); diff --git a/crates/trusted-server-core/src/integrations/datadome.rs b/crates/trusted-server-core/src/integrations/datadome.rs index cb5508af2..3134d580b 100644 --- a/crates/trusted-server-core/src/integrations/datadome.rs +++ b/crates/trusted-server-core/src/integrations/datadome.rs @@ -65,6 +65,7 @@ use http::{Method, StatusCode}; use regex::Regex; use serde::Deserialize; use serde_json::Value as JsonValue; +use url::Url; use validator::Validate; use crate::error::TrustedServerError; @@ -283,14 +284,17 @@ impl DataDomeIntegration { config.server_side_key_secret_store = config.server_side_key_secret_store.trim().to_string(); config.server_side_key_secret_name = config.server_side_key_secret_name.trim().to_string(); - - if config.enable_protection - && (config.server_side_key_secret_store.is_empty() - || config.server_side_key_secret_name.is_empty()) - { - return Err(Report::new(Self::error( - "server_side_key_secret_store and server_side_key_secret_name are required when enable_protection is true", - ))); + config.protection_api_origin = config.protection_api_origin.trim().to_string(); + + if config.enable_protection { + if config.server_side_key_secret_store.is_empty() + || config.server_side_key_secret_name.is_empty() + { + return Err(Report::new(Self::error( + "server_side_key_secret_store and server_side_key_secret_name are required when enable_protection is true", + ))); + } + Self::validate_protection_api_origin(&config.protection_api_origin)?; } if config.enable_graphql_support { @@ -309,6 +313,38 @@ impl DataDomeIntegration { })) } + fn validate_protection_api_origin(origin: &str) -> Result<(), Report> { + let parsed = Url::parse(origin).map_err(|err| { + Report::new(Self::error(format!("Invalid protection_api_origin: {err}"))) + })?; + + if !parsed.scheme().eq_ignore_ascii_case("https") { + return Err(Report::new(Self::error( + "protection_api_origin must use https when enable_protection is true", + ))); + } + if parsed.host_str().is_none() { + return Err(Report::new(Self::error( + "protection_api_origin must include a host", + ))); + } + if !parsed.username().is_empty() || parsed.password().is_some() { + return Err(Report::new(Self::error( + "protection_api_origin must not include credentials", + ))); + } + if !matches!(parsed.path(), "" | "/") + || parsed.query().is_some() + || parsed.fragment().is_some() + { + return Err(Report::new(Self::error( + "protection_api_origin must be an origin URL without path, query, or fragment", + ))); + } + + Ok(()) + } + fn compile_optional_regex( pattern: &str, name: &str, @@ -939,6 +975,17 @@ mod tests { ); } + fn html_context_for_tests( + document_state: &crate::integrations::IntegrationDocumentState, + ) -> IntegrationHtmlContext<'_> { + IntegrationHtmlContext { + request_host: "publisher.example.com", + request_scheme: "https", + origin_host: "origin.example.com", + document_state, + } + } + #[test] fn protection_enabled_requires_server_side_key_secret_store() { let mut config = test_config(); @@ -971,6 +1018,105 @@ mod tests { ); } + #[test] + fn protection_enabled_requires_https_protection_api_origin() { + let mut config = test_config(); + config.enable_protection = true; + config.protection_api_origin = "http://api-fastly.datadome.co".to_string(); + + let err = match DataDomeIntegration::try_new(config) { + Ok(_) => panic!("should reject plaintext Protection API origin"), + Err(err) => err, + }; + + assert!( + format!("{err:?}").contains("must use https"), + "should require HTTPS for the server-side key transport" + ); + } + + #[test] + fn protection_enabled_requires_origin_only_protection_api_origin() { + for origin in [ + "https://api-fastly.datadome.co/custom", + "https://api-fastly.datadome.co?region=test", + "https://api-fastly.datadome.co#fragment", + "https://user:pass@api-fastly.datadome.co", + ] { + let mut config = test_config(); + config.enable_protection = true; + config.protection_api_origin = origin.to_string(); + + let err = match DataDomeIntegration::try_new(config) { + Ok(_) => panic!("should reject non-origin Protection API URL: {origin}"), + Err(err) => err, + }; + + assert!( + format!("{err:?}").contains("protection_api_origin"), + "should explain rejected Protection API origin {origin}: {err:?}" + ); + } + } + + #[test] + fn protection_enabled_accepts_https_protection_api_origin_with_trailing_slash() { + let mut config = test_config(); + config.enable_protection = true; + config.protection_api_origin = "https://api-fastly.datadome.co/".to_string(); + + DataDomeIntegration::try_new(config) + .expect("should accept HTTPS origin URL with optional trailing slash"); + } + + #[test] + fn head_injector_emits_client_side_tag_when_key_configured() { + let mut config = test_config(); + config.client_side_key = "test-client-key".to_string(); + config.client_side_configuration = serde_json::json!({ "ajaxListenerPath": true }); + let integration = DataDomeIntegration::new(config); + let document_state = crate::integrations::IntegrationDocumentState::default(); + let ctx = html_context_for_tests(&document_state); + + let inserts = integration.head_inserts(&ctx); + + assert_eq!(inserts.len(), 1, "should emit one combined DataDome insert"); + assert!( + inserts[0].contains("window.ddjskey=\"test-client-key\""), + "should serialize the configured client-side key" + ); + assert!( + inserts[0].contains("window.ddoptions={\"ajaxListenerPath\":true}"), + "should serialize DataDome client-side options" + ); + assert!( + inserts[0].contains(""), + "should load the configured DataDome tag URL" + ); + } + + #[test] + fn head_injector_omits_client_side_tag_when_disabled_or_blank() { + let mut blank_key = test_config(); + blank_key.client_side_key = " ".to_string(); + let integration = DataDomeIntegration::new(blank_key); + let document_state = crate::integrations::IntegrationDocumentState::default(); + let ctx = html_context_for_tests(&document_state); + assert!( + integration.head_inserts(&ctx).is_empty(), + "should not inject a tag without a client-side key" + ); + + let mut disabled = test_config(); + disabled.client_side_key = "test-client-key".to_string(); + disabled.inject_client_side_tag = false; + let integration = DataDomeIntegration::new(disabled); + assert!( + integration.head_inserts(&ctx).is_empty(), + "should not inject a tag when injection is disabled" + ); + } + #[test] fn extract_host() { assert_eq!( diff --git a/crates/trusted-server-core/src/integrations/datadome/protection.rs b/crates/trusted-server-core/src/integrations/datadome/protection.rs index dc64ba6a3..0db790b10 100644 --- a/crates/trusted-server-core/src/integrations/datadome/protection.rs +++ b/crates/trusted-server-core/src/integrations/datadome/protection.rs @@ -705,6 +705,31 @@ mod tests { assert!(result.is_err(), "should error when secret is missing"); } + #[test] + fn extract_header_mutations_appends_set_cookie_and_sets_other_headers() { + let mut headers = HeaderMap::new(); + headers.insert( + HEADER_DATADOME_HEADERS, + edgezero_core::http::HeaderValue::from_static("Set-Cookie X-DD-B"), + ); + headers.append( + header::SET_COOKIE.as_str(), + edgezero_core::http::HeaderValue::from_static("datadome=abc; Path=/"), + ); + headers.insert("x-dd-b", edgezero_core::http::HeaderValue::from_static("1")); + + let mutations = extract_header_mutations(&headers, HEADER_DATADOME_HEADERS); + + assert_eq!( + mutations, + vec![ + HeaderMutation::append("set-cookie", "datadome=abc; Path=/"), + HeaderMutation::set("x-dd-b", "1"), + ], + "should append Set-Cookie while replacing non-cookie headers" + ); + } + #[test] fn parse_cookie_value_decodes_datadome_cookie() { let value = parse_cookie_value("a=1; datadome=abc%20123; b=2", "datadome") diff --git a/crates/trusted-server-core/src/integrations/registry.rs b/crates/trusted-server-core/src/integrations/registry.rs index ce61c0daf..bebf8b4a7 100644 --- a/crates/trusted-server-core/src/integrations/registry.rs +++ b/crates/trusted-server-core/src/integrations/registry.rs @@ -1168,6 +1168,29 @@ impl IntegrationRegistry { } } + #[cfg(test)] + #[must_use] + pub fn from_request_filters(request_filters: Vec>) -> Self { + Self { + inner: Arc::new(IntegrationRegistryInner { + get_router: Router::new(), + post_router: Router::new(), + put_router: Router::new(), + delete_router: Router::new(), + patch_router: Router::new(), + head_router: Router::new(), + options_router: Router::new(), + routes: Vec::new(), + html_rewriters: Vec::new(), + script_rewriters: Vec::new(), + html_post_processors: Vec::new(), + head_injectors: Vec::new(), + request_filters, + deferred_js_ids: Vec::new(), + }), + } + } + #[cfg(test)] #[must_use] /// Test helper to create a registry from routes. @@ -1262,6 +1285,25 @@ mod tests { } } + struct EnrichingRequestFilter; + + #[async_trait(?Send)] + impl IntegrationRequestFilter for EnrichingRequestFilter { + fn integration_id(&self) -> &'static str { + "enriching" + } + + async fn filter_request( + &self, + _input: RequestFilterInput<'_>, + ) -> Result> { + Ok(RequestFilterDecision::Continue(RequestFilterEffects { + request_headers: vec![HeaderMutation::set("x-datadome-isbot", "1")], + response_headers: vec![HeaderMutation::set("x-dd-b", "allowed")], + })) + } + } + struct NoopHtmlPostProcessor; impl IntegrationHtmlPostProcessor for NoopHtmlPostProcessor { @@ -1358,6 +1400,45 @@ mod tests { ); } + #[test] + fn filter_request_applies_request_headers_and_returns_response_headers() { + let registry = + IntegrationRegistry::from_request_filters(vec![Arc::new(EnrichingRequestFilter)]); + let settings = crate::test_support::tests::create_test_settings(); + let services = crate::platform::test_support::noop_services(); + let mut req = Request::builder() + .method(Method::GET) + .uri("https://example.com/page") + .body(EdgeBody::empty()) + .expect("should build request"); + + let outcome = + futures::executor::block_on(registry.filter_request(RequestFilterRegistryInput { + settings: &settings, + services: &services, + req: &mut req, + })) + .expect("should run request filter"); + + assert_eq!( + req.headers() + .get("x-datadome-isbot") + .and_then(|value| value.to_str().ok()), + Some("1"), + "should apply DataDome-style request enrichment before routing" + ); + match outcome { + RequestFilterRegistryOutcome::Continue(effects) => { + assert_eq!( + effects.response_headers, + vec![HeaderMutation::set("x-dd-b", "allowed")], + "should return downstream response header effects for finalization" + ); + } + RequestFilterRegistryOutcome::Respond { .. } => panic!("should continue routing"), + } + } + #[test] fn test_exact_route_matching() { let routes = vec![( diff --git a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md index 2c9636572..1c06d9fbd 100644 --- a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md +++ b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md @@ -690,32 +690,32 @@ Checkboxes should be marked complete only when the behavior is implemented, covered by targeted tests where practical, and the relevant verification command passes. -- [ ] Trusted Server can validate configured traffic through DataDome before - route matching. Current code path is implemented; targeted route coverage - is still needed. -- [ ] DataDome API timeouts/errors fail open. Current code path is implemented; - targeted platform-client failure coverage is still needed. -- [ ] DataDome challenge responses return without contacting the origin. Current - code path is implemented; route coverage is still needed. -- [ ] Allowed requests receive DataDome request-enrichment headers. Current code - path is implemented; registry/route coverage is still needed. -- [ ] Final responses receive DataDome downstream headers/cookies. Current code - path is implemented for buffered and streaming responses; route coverage - is still needed. -- [ ] `Set-Cookie` is appended, not coalesced or overwritten. Current code path - is implemented; pointer-header tests are still needed. -- [ ] Static assets and internal Trusted Server routes are excluded by default. - Current code path is implemented; protection-matching tests are still - needed. -- [ ] `/auction` is protected by default. Current code path is implemented; - protection-matching tests are still needed. -- [ ] Client-side DataDome tag is auto-injected when configured. Current code - path is implemented; config/head-injector tests are still needed. +- [x] Trusted Server can validate configured traffic through DataDome before + route matching. Covered by adapter route tests for challenged and allowed + DataDome-protected requests. +- [x] DataDome API timeouts/errors fail open. Covered by an adapter route test + that lets malformed auction JSON reach the route after a platform-client + failure. +- [x] DataDome challenge responses return without contacting the origin. Covered + by an adapter route test that returns the DataDome challenge response even + with no publisher-origin fallback. +- [x] Allowed requests receive DataDome request-enrichment headers. Covered by a + registry test that applies DataDome-style request mutations before routing. +- [x] Final responses receive DataDome downstream headers/cookies. Covered by + adapter route tests for allowed and challenged responses. +- [x] `Set-Cookie` is appended, not coalesced or overwritten. Covered by pointer + header route tests for DataDome downstream cookies. +- [x] Static assets and internal Trusted Server routes are excluded by default. + Covered by adapter route tests for discovery and default static-extension + exclusions. +- [x] `/auction` is protected by default. Covered by the DataDome-allowed auction + route test. +- [x] Client-side DataDome tag is auto-injected when configured. Covered by + DataDome head-injector tests. - [x] GraphQL body parsing is not implemented in v1 and is clearly documented. -- [ ] Existing DataDome first-party proxy behavior remains unchanged. Existing - DataDome proxy/rewrite tests pass; full workspace verification still - needs to pass after formatting and clippy fixes. -- [ ] `cargo fmt --all -- --check`, `cargo clippy --workspace --all-targets --all-features -- -D warnings`, and `cargo test --workspace` pass after implementation. Current snapshot: tests pass, fmt and clippy fail. +- [x] Existing DataDome first-party proxy behavior remains unchanged. Existing + DataDome proxy/rewrite tests pass as part of full workspace verification. +- [x] `cargo fmt --all -- --check`, `cargo clippy --workspace --all-targets --all-features -- -D warnings`, and `cargo test --workspace` pass after implementation. Verified on 2026-06-12. ## Resolved Questions From c18ba1b64c8721389490322f91d6777e3b336951 Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 15 Jun 2026 12:50:31 -0500 Subject: [PATCH 5/7] Add configurable DataDome protection exclusions --- .../trusted-server-adapter-fastly/src/main.rs | 1 + .../src/route_tests.rs | 2 + .../src/integrations/datadome.rs | 101 +- .../src/integrations/datadome/protection.rs | 27 +- .../integrations/datadome/protection_scope.rs | 881 ++++++++++++++++++ .../src/integrations/registry.rs | 16 +- crates/trusted-server-core/src/settings.rs | 223 ++++- docs/guide/integrations/datadome.md | 101 +- ...-datadome-server-side-protection-design.md | 106 ++- trusted-server.toml | 12 +- 10 files changed, 1366 insertions(+), 104 deletions(-) create mode 100644 crates/trusted-server-core/src/integrations/datadome/protection_scope.rs diff --git a/crates/trusted-server-adapter-fastly/src/main.rs b/crates/trusted-server-adapter-fastly/src/main.rs index 3991849db..147b152ff 100644 --- a/crates/trusted-server-adapter-fastly/src/main.rs +++ b/crates/trusted-server-adapter-fastly/src/main.rs @@ -516,6 +516,7 @@ async fn route_request( settings, services: runtime_services, req: &mut req, + geo_info: geo_info.as_ref(), }) .await { diff --git a/crates/trusted-server-adapter-fastly/src/route_tests.rs b/crates/trusted-server-adapter-fastly/src/route_tests.rs index 5b4ee79e1..f35440cf5 100644 --- a/crates/trusted-server-adapter-fastly/src/route_tests.rs +++ b/crates/trusted-server-adapter-fastly/src/route_tests.rs @@ -575,6 +575,7 @@ fn route_result_to_fastly_response( sharedid_cookie, should_finalize_ec, asset_cache_policy, + request_filter_effects, .. } = route_result; @@ -596,6 +597,7 @@ fn route_result_to_fastly_response( .unwrap_or(None) }; super::finalize_response(settings, geo_info.as_ref(), &mut response); + request_filter_effects.apply_to_response(&mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let mut fastly_response = compat::to_fastly_response(response); diff --git a/crates/trusted-server-core/src/integrations/datadome.rs b/crates/trusted-server-core/src/integrations/datadome.rs index 3134d580b..5c5491b79 100644 --- a/crates/trusted-server-core/src/integrations/datadome.rs +++ b/crates/trusted-server-core/src/integrations/datadome.rs @@ -80,8 +80,15 @@ use crate::platform::{PlatformHttpRequest, RuntimeServices}; use crate::settings::{IntegrationConfig, Settings}; mod protection; +mod protection_scope; -const DATADOME_INTEGRATION_ID: &str = "datadome"; +pub use protection_scope::{ + ProtectionExclusionRuleConfig, ProtectionIpCidrSourceConfig, ProtectionMatcherConfig, +}; + +use protection_scope::ProtectionScope; + +pub(super) const DATADOME_INTEGRATION_ID: &str = "datadome"; /// Regex pattern for matching and rewriting `DataDome` URLs in script content. /// @@ -156,13 +163,36 @@ pub struct DataDomeConfig { #[validate(range(min = 1, max = 10000))] pub timeout_ms: u32, - /// Regex for URLs to exclude from Protection API validation. - #[serde(default = "default_url_pattern_exclusion")] - pub url_pattern_exclusion: String, - - /// Regex for URLs to include in Protection API validation. - #[serde(default)] - pub url_pattern_inclusion: String, + /// HTTP methods excluded from Protection API validation. + #[serde( + default = "default_protection_excluded_methods", + deserialize_with = "crate::settings::vec_from_seq_or_map" + )] + pub protection_excluded_methods: Vec, + + /// Client autonomous system numbers excluded from Protection API validation. + #[serde(default, deserialize_with = "crate::settings::vec_from_seq_or_map")] + pub protection_excluded_asns: Vec, + + /// Client IP CIDR ranges excluded from Protection API validation. + #[serde(default, deserialize_with = "crate::settings::vec_from_seq_or_map")] + pub protection_excluded_ip_cidrs: Vec, + + /// Config Store-backed client IP CIDR ranges excluded from Protection API validation. + #[serde(default, deserialize_with = "crate::settings::vec_from_seq_or_map")] + pub protection_excluded_ip_cidr_sources: Vec, + + /// Cache TTL for Config Store-backed IP CIDR lists, in seconds. + #[serde(default = "default_protection_ip_list_cache_ttl_seconds")] + #[validate(range(min = 1, max = 86400))] + pub protection_ip_list_cache_ttl_seconds: u64, + + /// Structured exclusion rules for Protection API validation. + #[serde( + default = "default_protection_exclusion_rules", + deserialize_with = "crate::settings::vec_from_seq_or_map" + )] + pub protection_exclusion_rules: Vec, /// Reserved flag for future GraphQL payload extraction. #[serde(default)] @@ -221,8 +251,27 @@ fn default_timeout_ms() -> u32 { 1500 } -fn default_url_pattern_exclusion() -> String { - r"\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$".to_string() +fn default_static_asset_exclusion_pattern() -> String { + r"(?i)\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$".to_string() +} + +fn default_protection_excluded_methods() -> Vec { + vec!["OPTIONS".to_string()] +} + +fn default_protection_ip_list_cache_ttl_seconds() -> u64 { + 300 +} + +fn default_protection_exclusion_rules() -> Vec { + vec![ProtectionExclusionRuleConfig { + id: "default-static-assets".to_string(), + enabled: true, + methods: Vec::new(), + matcher: ProtectionMatcherConfig::PathRegex { + patterns: vec![default_static_asset_exclusion_pattern()], + }, + }] } fn default_inject_client_side_tag() -> bool { @@ -250,8 +299,12 @@ impl Default for DataDomeConfig { server_side_key_secret_name: default_server_side_key_secret_name(), protection_api_origin: default_protection_api_origin(), timeout_ms: default_timeout_ms(), - url_pattern_exclusion: default_url_pattern_exclusion(), - url_pattern_inclusion: String::new(), + protection_excluded_methods: default_protection_excluded_methods(), + protection_excluded_asns: Vec::new(), + protection_excluded_ip_cidrs: Vec::new(), + protection_excluded_ip_cidr_sources: Vec::new(), + protection_ip_list_cache_ttl_seconds: default_protection_ip_list_cache_ttl_seconds(), + protection_exclusion_rules: default_protection_exclusion_rules(), enable_graphql_support: false, client_side_key: String::new(), inject_client_side_tag: default_inject_client_side_tag(), @@ -270,8 +323,7 @@ impl IntegrationConfig for DataDomeConfig { /// `DataDome` integration implementation. pub struct DataDomeIntegration { config: DataDomeConfig, - protection_exclusion: Option, - protection_inclusion: Option, + protection_scope: ProtectionScope, } impl DataDomeIntegration { @@ -301,15 +353,11 @@ impl DataDomeIntegration { log::warn!("[datadome] enable_graphql_support is reserved and ignored in v1"); } - let protection_exclusion = - Self::compile_optional_regex(&config.url_pattern_exclusion, "url_pattern_exclusion")?; - let protection_inclusion = - Self::compile_optional_regex(&config.url_pattern_inclusion, "url_pattern_inclusion")?; + let protection_scope = ProtectionScope::compile(&config)?; Ok(Arc::new(Self { config, - protection_exclusion, - protection_inclusion, + protection_scope, })) } @@ -345,19 +393,6 @@ impl DataDomeIntegration { Ok(()) } - fn compile_optional_regex( - pattern: &str, - name: &str, - ) -> Result, Report> { - if pattern.trim().is_empty() { - return Ok(None); - } - - Regex::new(&format!("(?i:{pattern})")) - .map(Some) - .map_err(|err| Report::new(Self::error(format!("Invalid {name}: {err}")))) - } - fn error(message: impl Into) -> TrustedServerError { TrustedServerError::Integration { integration: DATADOME_INTEGRATION_ID.to_string(), diff --git a/crates/trusted-server-core/src/integrations/datadome/protection.rs b/crates/trusted-server-core/src/integrations/datadome/protection.rs index 0db790b10..7f4ab6dfe 100644 --- a/crates/trusted-server-core/src/integrations/datadome/protection.rs +++ b/crates/trusted-server-core/src/integrations/datadome/protection.rs @@ -15,6 +15,7 @@ use crate::integrations::{ use crate::platform::{PlatformBackendSpec, PlatformHttpRequest, RuntimeServices, StoreName}; use crate::redacted::Redacted; +use super::protection_scope::{ProtectionRequestFacts, ProtectionScopeDecision}; use super::DataDomeIntegration; const VALIDATE_REQUEST_PATH: &str = "/validate-request"; @@ -42,7 +43,7 @@ impl DataDomeIntegration { &self, input: RequestFilterInput<'_>, ) -> RequestFilterDecision { - if !self.config.enable_protection || !self.is_request_protected(input.request) { + if !self.config.enable_protection || !self.is_request_protected(&input) { return RequestFilterDecision::Continue(RequestFilterEffects::default()); } @@ -97,7 +98,8 @@ impl DataDomeIntegration { Ok(self.classify_protection_response(platform_response.response)) } - fn is_request_protected(&self, req: &Request) -> bool { + fn is_request_protected(&self, input: &RequestFilterInput<'_>) -> bool { + let req = input.request; if req.method() == Method::OPTIONS { return false; } @@ -107,16 +109,17 @@ impl DataDomeIntegration { return false; } - let target = format!("{}{}", request_host(req), path); - - if let Some(inclusion) = &self.protection_inclusion { - if !inclusion.is_match(&target) { - return false; - } - } - - if let Some(exclusion) = &self.protection_exclusion { - if exclusion.is_match(&target) { + let facts = ProtectionRequestFacts { + method: req.method().as_str(), + path, + query: req.uri().query(), + client_ip: input.services.client_info().client_ip, + asn: input.geo_info.and_then(|geo| geo.asn), + }; + match self.protection_scope.evaluate(&facts, input.services) { + ProtectionScopeDecision::Protect => {} + ProtectionScopeDecision::Skip { rule_id, reason } => { + log::debug!("[datadome] Skipping Protection API for rule {rule_id} ({reason})"); return false; } } diff --git a/crates/trusted-server-core/src/integrations/datadome/protection_scope.rs b/crates/trusted-server-core/src/integrations/datadome/protection_scope.rs new file mode 100644 index 000000000..a9f689c1f --- /dev/null +++ b/crates/trusted-server-core/src/integrations/datadome/protection_scope.rs @@ -0,0 +1,881 @@ +use std::collections::{HashMap, HashSet}; +use std::net::IpAddr; +use std::str::FromStr; +use std::sync::{LazyLock, Mutex}; +use std::time::{Duration, Instant}; + +use error_stack::Report; +use regex::Regex; +use serde::Deserialize; + +use crate::error::TrustedServerError; +use crate::platform::{RuntimeServices, StoreName}; + +use super::{DataDomeConfig, DATADOME_INTEGRATION_ID}; + +/// Configured source for dynamic IP CIDR bypass lists. +#[derive(Debug, Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct ProtectionIpCidrSourceConfig { + /// Config Store containing a comma, whitespace, or JSON-array encoded CIDR list. + #[serde(default = "default_ip_cidr_source_store")] + pub config_store: String, + /// Config Store key containing the CIDR list. + pub key: String, +} + +/// Configured request-scope exclusion rule for `DataDome` protection. +#[derive(Debug, Clone, Deserialize)] +// Do not add `deny_unknown_fields` here: serde rejects valid flattened +// internally tagged matcher fields when both are combined. The matcher enum +// still denies unknown fields for rule payload validation. +pub struct ProtectionExclusionRuleConfig { + /// Operator-friendly identifier included in logs. + #[serde(alias = "name")] + pub id: String, + /// Enables the rule. Defaults to true when a rule is present. + #[serde(default = "default_enabled_rule")] + pub enabled: bool, + /// Optional methods this rule applies to. Empty means every method. + #[serde(default, deserialize_with = "crate::settings::vec_from_seq_or_map")] + pub methods: Vec, + /// Matcher-specific rule configuration. + #[serde(flatten)] + pub matcher: ProtectionMatcherConfig, +} + +/// Matchers supported by `DataDome` protection-scope exclusion rules. +#[derive(Debug, Clone, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case", deny_unknown_fields)] +pub enum ProtectionMatcherConfig { + /// Match exact request paths. + PathExact { + #[serde(deserialize_with = "crate::settings::vec_from_seq_or_map")] + paths: Vec, + }, + /// Match request path prefixes. + PathPrefix { + #[serde(deserialize_with = "crate::settings::vec_from_seq_or_map")] + prefixes: Vec, + }, + /// Match request paths with one or more regexes. + PathRegex { + #[serde(deserialize_with = "crate::settings::vec_from_seq_or_map")] + patterns: Vec, + }, + /// Match when any query parameter has a non-empty value. + QueryParamNonEmpty { + #[serde(deserialize_with = "crate::settings::vec_from_seq_or_map")] + names: Vec, + }, + /// Match client autonomous system numbers. + Asn { + #[serde(deserialize_with = "crate::settings::vec_from_seq_or_map")] + values: Vec, + }, + /// Match client IP CIDRs configured inline. + IpCidr { + #[serde(deserialize_with = "crate::settings::vec_from_seq_or_map")] + cidrs: Vec, + }, + /// Match client IP CIDRs loaded from Config Store. + IpCidrSource { config_store: String, key: String }, +} + +/// Facts used by `DataDome` protection-scope matchers. +pub(super) struct ProtectionRequestFacts<'a> { + pub(super) method: &'a str, + pub(super) path: &'a str, + pub(super) query: Option<&'a str>, + pub(super) client_ip: Option, + pub(super) asn: Option, +} + +/// Result of evaluating whether `DataDome` protection should run. +pub(super) enum ProtectionScopeDecision { + Protect, + Skip { + rule_id: String, + reason: &'static str, + }, +} + +/// Compiled `DataDome` protection-scope rules. +pub(super) struct ProtectionScope { + excluded_methods: MethodSet, + excluded_asns: HashSet, + excluded_ip_cidrs: Vec, + excluded_ip_cidr_sources: Vec, + exclusion_rules: Vec, + ip_list_cache_ttl: Duration, +} + +#[derive(Debug, Clone)] +struct MethodSet { + methods: HashSet, +} + +#[derive(Debug, Clone)] +struct ProtectionIpCidrSource { + config_store: String, + key: String, +} + +#[derive(Debug, Clone, Eq, Hash, PartialEq)] +struct ProtectionIpCidrSourceCacheKey { + config_store: String, + key: String, +} + +#[derive(Debug, Clone)] +struct CachedIpCidrSource { + cidrs: Vec, + expires_at: Instant, +} + +#[derive(Debug, Clone)] +struct ProtectionExclusionRule { + id: String, + methods: Option, + matcher: ProtectionMatcher, +} + +#[derive(Debug, Clone)] +enum ProtectionMatcher { + PathExact(HashSet), + PathPrefix(Vec), + PathRegex(Vec), + QueryParamNonEmpty(HashSet), + Asn(HashSet), + IpCidr(Vec), + IpCidrSource(ProtectionIpCidrSource), +} + +#[derive(Debug, Clone)] +enum IpCidr { + V4 { network: u32, prefix: u8 }, + V6 { network: u128, prefix: u8 }, +} + +static IP_CIDR_SOURCE_CACHE: LazyLock< + Mutex>, +> = LazyLock::new(|| Mutex::new(HashMap::new())); + +fn default_ip_cidr_source_store() -> String { + "datadome-ip-bypass".to_string() +} + +fn default_enabled_rule() -> bool { + true +} + +fn datadome_error(message: impl Into) -> TrustedServerError { + TrustedServerError::Integration { + integration: DATADOME_INTEGRATION_ID.to_string(), + message: message.into(), + } +} + +impl ProtectionScope { + pub(super) fn compile(config: &DataDomeConfig) -> Result> { + let excluded_methods = MethodSet::new(&config.protection_excluded_methods)?; + let excluded_ip_cidrs = compile_ip_cidrs( + &config.protection_excluded_ip_cidrs, + "protection_excluded_ip_cidrs", + )?; + let excluded_ip_cidr_sources = config + .protection_excluded_ip_cidr_sources + .iter() + .map(ProtectionIpCidrSource::from_config) + .collect::, _>>()?; + let exclusion_rules = config + .protection_exclusion_rules + .iter() + .filter(|rule| rule.enabled) + .map(ProtectionExclusionRule::compile) + .collect::, _>>()?; + + Ok(Self { + excluded_methods, + excluded_asns: config.protection_excluded_asns.iter().copied().collect(), + excluded_ip_cidrs, + excluded_ip_cidr_sources, + exclusion_rules, + ip_list_cache_ttl: Duration::from_secs(config.protection_ip_list_cache_ttl_seconds), + }) + } + + pub(super) fn evaluate( + &self, + facts: &ProtectionRequestFacts<'_>, + services: &RuntimeServices, + ) -> ProtectionScopeDecision { + if self.excluded_methods.matches(facts.method) { + return ProtectionScopeDecision::Skip { + rule_id: "excluded-methods".to_string(), + reason: "method", + }; + } + + if let Some(client_ip) = facts.client_ip { + if cidrs_match(&self.excluded_ip_cidrs, client_ip) { + return ProtectionScopeDecision::Skip { + rule_id: "excluded-ip-cidrs".to_string(), + reason: "client_ip", + }; + } + + for source in &self.excluded_ip_cidr_sources { + if source.matches(client_ip, services, self.ip_list_cache_ttl) { + return ProtectionScopeDecision::Skip { + rule_id: source.rule_id(), + reason: "client_ip_source", + }; + } + } + } + + if facts + .asn + .is_some_and(|asn| self.excluded_asns.contains(&asn)) + { + return ProtectionScopeDecision::Skip { + rule_id: "excluded-asns".to_string(), + reason: "asn", + }; + } + + for rule in &self.exclusion_rules { + if rule.matches(facts, services, self.ip_list_cache_ttl) { + return ProtectionScopeDecision::Skip { + rule_id: rule.id.clone(), + reason: rule.matcher.reason(), + }; + } + } + + ProtectionScopeDecision::Protect + } +} + +impl MethodSet { + fn new(methods: &[String]) -> Result> { + let mut normalized = HashSet::new(); + for method in methods { + let method = normalize_method(method)?; + normalized.insert(method); + } + Ok(Self { + methods: normalized, + }) + } + + fn optional(methods: &[String]) -> Result, Report> { + if methods.is_empty() { + return Ok(None); + } + Self::new(methods).map(Some) + } + + fn matches(&self, method: &str) -> bool { + self.methods.contains(&method.to_ascii_uppercase()) + } +} + +impl ProtectionIpCidrSource { + fn from_config( + config: &ProtectionIpCidrSourceConfig, + ) -> Result> { + let config_store = config.config_store.trim().to_string(); + let key = config.key.trim().to_string(); + if config_store.is_empty() || key.is_empty() { + return Err(Report::new(datadome_error( + "protection_excluded_ip_cidr_sources config_store and key must not be empty", + ))); + } + Ok(Self { config_store, key }) + } + + fn from_matcher_fields( + config_store: &str, + key: &str, + ) -> Result> { + let config = ProtectionIpCidrSourceConfig { + config_store: config_store.to_string(), + key: key.to_string(), + }; + Self::from_config(&config) + } + + fn rule_id(&self) -> String { + format!("ip-cidr-source:{}:{}", self.config_store, self.key) + } + + fn matches(&self, client_ip: IpAddr, services: &RuntimeServices, cache_ttl: Duration) -> bool { + match self.load_cidrs(services, cache_ttl) { + Ok(cidrs) => cidrs_match(&cidrs, client_ip), + Err(err) => { + log::warn!( + "[datadome] Failed to load IP CIDR bypass source {}:{}: {err:?}", + self.config_store, + self.key + ); + false + } + } + } + + fn load_cidrs( + &self, + services: &RuntimeServices, + cache_ttl: Duration, + ) -> Result, Report> { + let cache_key = ProtectionIpCidrSourceCacheKey { + config_store: self.config_store.clone(), + key: self.key.clone(), + }; + if let Some(cached) = IP_CIDR_SOURCE_CACHE + .lock() + .expect("should lock DataDome IP CIDR source cache") + .get(&cache_key) + .filter(|cached| cached.expires_at > Instant::now()) + .cloned() + { + return Ok(cached.cidrs); + } + + let store_name = StoreName::from(self.config_store.as_str()); + let raw = services + .config_store() + .get(&store_name, &self.key) + .map_err(|err| { + err.change_context(datadome_error( + "Failed to read DataDome IP CIDR bypass list from Config Store", + )) + })?; + let cidr_strings = parse_cidr_list_value(&raw).map_err(|message| { + Report::new(datadome_error(format!( + "Invalid DataDome IP CIDR bypass list {}:{}: {message}", + self.config_store, self.key + ))) + })?; + let cidrs = compile_ip_cidrs(&cidr_strings, "Config Store IP CIDR bypass list")?; + + IP_CIDR_SOURCE_CACHE + .lock() + .expect("should lock DataDome IP CIDR source cache") + .insert( + cache_key, + CachedIpCidrSource { + cidrs: cidrs.clone(), + expires_at: Instant::now() + cache_ttl, + }, + ); + + Ok(cidrs) + } +} + +impl ProtectionExclusionRule { + fn compile(config: &ProtectionExclusionRuleConfig) -> Result> { + let id = config.id.trim().to_string(); + if id.is_empty() { + return Err(Report::new(datadome_error( + "protection_exclusion_rules id must not be empty", + ))); + } + + Ok(Self { + id, + methods: MethodSet::optional(&config.methods)?, + matcher: ProtectionMatcher::compile(&config.matcher)?, + }) + } + + fn matches( + &self, + facts: &ProtectionRequestFacts<'_>, + services: &RuntimeServices, + cache_ttl: Duration, + ) -> bool { + if let Some(methods) = &self.methods { + if !methods.matches(facts.method) { + return false; + } + } + + self.matcher.matches(facts, services, cache_ttl) + } +} + +impl ProtectionMatcher { + fn compile(config: &ProtectionMatcherConfig) -> Result> { + match config { + ProtectionMatcherConfig::PathExact { paths } => { + ensure_non_empty(paths, "path_exact paths")?; + Ok(Self::PathExact( + paths.iter().map(|path| path.trim().to_string()).collect(), + )) + } + ProtectionMatcherConfig::PathPrefix { prefixes } => { + ensure_non_empty(prefixes, "path_prefix prefixes")?; + Ok(Self::PathPrefix( + prefixes + .iter() + .map(|prefix| prefix.trim().to_string()) + .collect(), + )) + } + ProtectionMatcherConfig::PathRegex { patterns } => { + ensure_non_empty(patterns, "path_regex patterns")?; + let regexes = patterns + .iter() + .map(|pattern| { + Regex::new(pattern).map_err(|err| { + Report::new(datadome_error(format!( + "Invalid protection_exclusion_rules path_regex pattern: {err}" + ))) + }) + }) + .collect::, _>>()?; + Ok(Self::PathRegex(regexes)) + } + ProtectionMatcherConfig::QueryParamNonEmpty { names } => { + ensure_non_empty(names, "query_param_non_empty names")?; + Ok(Self::QueryParamNonEmpty( + names.iter().map(|name| name.trim().to_string()).collect(), + )) + } + ProtectionMatcherConfig::Asn { values } => { + if values.is_empty() { + return Err(Report::new(datadome_error("asn values must not be empty"))); + } + Ok(Self::Asn(values.iter().copied().collect())) + } + ProtectionMatcherConfig::IpCidr { cidrs } => { + ensure_non_empty(cidrs, "ip_cidr cidrs")?; + Ok(Self::IpCidr(compile_ip_cidrs(cidrs, "ip_cidr cidrs")?)) + } + ProtectionMatcherConfig::IpCidrSource { config_store, key } => Ok(Self::IpCidrSource( + ProtectionIpCidrSource::from_matcher_fields(config_store, key)?, + )), + } + } + + fn matches( + &self, + facts: &ProtectionRequestFacts<'_>, + services: &RuntimeServices, + cache_ttl: Duration, + ) -> bool { + match self { + ProtectionMatcher::PathExact(paths) => paths.contains(facts.path), + ProtectionMatcher::PathPrefix(prefixes) => { + prefixes.iter().any(|prefix| facts.path.starts_with(prefix)) + } + ProtectionMatcher::PathRegex(regexes) => { + regexes.iter().any(|regex| regex.is_match(facts.path)) + } + ProtectionMatcher::QueryParamNonEmpty(names) => { + query_param_non_empty(facts.query, names) + } + ProtectionMatcher::Asn(values) => facts.asn.is_some_and(|asn| values.contains(&asn)), + ProtectionMatcher::IpCidr(cidrs) => facts + .client_ip + .is_some_and(|client_ip| cidrs_match(cidrs, client_ip)), + ProtectionMatcher::IpCidrSource(source) => facts + .client_ip + .is_some_and(|client_ip| source.matches(client_ip, services, cache_ttl)), + } + } + + fn reason(&self) -> &'static str { + match self { + ProtectionMatcher::PathExact(_) => "path_exact", + ProtectionMatcher::PathPrefix(_) => "path_prefix", + ProtectionMatcher::PathRegex(_) => "path_regex", + ProtectionMatcher::QueryParamNonEmpty(_) => "query_param_non_empty", + ProtectionMatcher::Asn(_) => "asn", + ProtectionMatcher::IpCidr(_) => "ip_cidr", + ProtectionMatcher::IpCidrSource(_) => "ip_cidr_source", + } + } +} + +impl FromStr for IpCidr { + type Err = String; + + fn from_str(raw: &str) -> Result { + let raw = raw.trim(); + if raw.is_empty() { + return Err("CIDR must not be empty".to_string()); + } + + let (addr, prefix) = match raw.split_once('/') { + Some((addr, prefix)) => (addr, Some(prefix)), + None => (raw, None), + }; + let ip = addr + .parse::() + .map_err(|err| format!("invalid IP address `{addr}`: {err}"))?; + + match ip { + IpAddr::V4(addr) => { + let prefix = parse_prefix(prefix, 32)?; + Ok(Self::V4 { + network: u32::from(addr) & v4_mask(prefix), + prefix, + }) + } + IpAddr::V6(addr) => { + let prefix = parse_prefix(prefix, 128)?; + Ok(Self::V6 { + network: u128::from(addr) & v6_mask(prefix), + prefix, + }) + } + } + } +} + +impl IpCidr { + fn contains(&self, ip: IpAddr) -> bool { + match (self, ip) { + (IpCidr::V4 { network, prefix }, IpAddr::V4(addr)) => { + (u32::from(addr) & v4_mask(*prefix)) == *network + } + (IpCidr::V6 { network, prefix }, IpAddr::V6(addr)) => { + (u128::from(addr) & v6_mask(*prefix)) == *network + } + _ => false, + } + } +} + +fn normalize_method(method: &str) -> Result> { + let method = method.trim(); + if method.is_empty() { + return Err(Report::new(datadome_error( + "DataDome protection excluded methods must not contain empty values", + ))); + } + Ok(method.to_ascii_uppercase()) +} + +fn ensure_non_empty(values: &[String], name: &str) -> Result<(), Report> { + if values.iter().any(|value| value.trim().is_empty()) || values.is_empty() { + return Err(Report::new(datadome_error(format!( + "DataDome protection {name} must not contain empty values" + )))); + } + Ok(()) +} + +fn compile_ip_cidrs( + raw_cidrs: &[String], + name: &str, +) -> Result, Report> { + raw_cidrs + .iter() + .map(|raw| { + raw.parse::().map_err(|err| { + Report::new(datadome_error(format!( + "Invalid DataDome protection {name} entry `{raw}`: {err}" + ))) + }) + }) + .collect() +} + +fn cidrs_match(cidrs: &[IpCidr], ip: IpAddr) -> bool { + cidrs.iter().any(|cidr| cidr.contains(ip)) +} + +fn parse_prefix(prefix: Option<&str>, max: u8) -> Result { + let Some(prefix) = prefix else { + return Ok(max); + }; + let prefix = prefix + .parse::() + .map_err(|err| format!("invalid prefix `{prefix}`: {err}"))?; + if prefix > max { + return Err(format!("prefix `{prefix}` exceeds maximum {max}")); + } + Ok(prefix) +} + +fn v4_mask(prefix: u8) -> u32 { + if prefix == 0 { + 0 + } else { + u32::MAX << (32 - prefix) + } +} + +fn v6_mask(prefix: u8) -> u128 { + if prefix == 0 { + 0 + } else { + u128::MAX << (128 - prefix) + } +} + +fn parse_cidr_list_value(value: &str) -> Result, String> { + if value.trim().starts_with('[') { + return serde_json::from_str::>(value) + .map_err(|err| format!("CIDR JSON array is invalid: {err}")); + } + + Ok(value + .split(|ch: char| ch == ',' || ch.is_whitespace()) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(str::to_string) + .collect()) +} + +fn query_param_non_empty(query: Option<&str>, names: &HashSet) -> bool { + let Some(query) = query else { + return false; + }; + + url::form_urlencoded::parse(query.as_bytes()) + .any(|(key, value)| names.contains(key.as_ref()) && !value.is_empty()) +} + +#[cfg(test)] +pub(super) fn clear_ip_cidr_source_cache_for_tests() { + IP_CIDR_SOURCE_CACHE + .lock() + .expect("should lock DataDome IP CIDR source cache") + .clear(); +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + use std::net::{Ipv4Addr, Ipv6Addr}; + + use crate::platform::test_support::{ + build_services_with_config_and_secret, HashMapConfigStore, NoopSecretStore, + }; + + use super::*; + + fn facts<'a>( + method: &'a str, + path: &'a str, + query: Option<&'a str>, + client_ip: Option, + asn: Option, + ) -> ProtectionRequestFacts<'a> { + ProtectionRequestFacts { + method, + path, + query, + client_ip, + asn, + } + } + + fn config_with_protection() -> DataDomeConfig { + DataDomeConfig { + enabled: true, + enable_protection: true, + ..DataDomeConfig::default() + } + } + + #[test] + fn exclusion_rule_deserializes_documented_shape() { + let rule: ProtectionExclusionRuleConfig = serde_json::from_value(serde_json::json!({ + "id": "legacy-static-get-head", + "methods": ["GET", "HEAD"], + "type": "path_regex", + "patterns": ["(?i)\\.(css|js)$"] + })) + .expect("should deserialize documented rule shape"); + + assert_eq!(rule.id, "legacy-static-get-head"); + assert_eq!(rule.methods, vec!["GET".to_string(), "HEAD".to_string()]); + assert!(matches!( + rule.matcher, + ProtectionMatcherConfig::PathRegex { patterns } if patterns == vec!["(?i)\\.(css|js)$".to_string()] + )); + } + + #[test] + fn cidr_matches_ipv4_and_ipv6_ranges() { + let cidr = "192.0.2.0/24".parse::().expect("should parse CIDR"); + assert!(cidr.contains(IpAddr::V4(Ipv4Addr::new(192, 0, 2, 10)))); + assert!(!cidr.contains(IpAddr::V4(Ipv4Addr::new(192, 0, 3, 10)))); + + let cidr = "2001:db8::/32" + .parse::() + .expect("should parse CIDR"); + assert!(cidr.contains(IpAddr::V6( + "2001:db8::1" + .parse::() + .expect("should parse IPv6") + ))); + assert!(!cidr.contains(IpAddr::V6( + "2001:db9::1" + .parse::() + .expect("should parse IPv6") + ))); + } + + #[test] + fn scope_skips_configured_methods() { + let mut config = config_with_protection(); + config.protection_excluded_methods = vec!["OPTIONS".to_string(), "FASTLYPURGE".to_string()]; + let scope = ProtectionScope::compile(&config).expect("should compile scope"); + let services = crate::platform::test_support::noop_services(); + + let decision = scope.evaluate(&facts("FASTLYPURGE", "/page", None, None, None), &services); + + assert!(matches!( + decision, + ProtectionScopeDecision::Skip { + reason: "method", + .. + } + )); + } + + #[test] + fn scope_skips_configured_asns() { + let mut config = config_with_protection(); + config.protection_excluded_asns = vec![19750, 209366]; + let scope = ProtectionScope::compile(&config).expect("should compile scope"); + let services = crate::platform::test_support::noop_services(); + + let decision = scope.evaluate(&facts("GET", "/page", None, None, Some(19750)), &services); + + assert!(matches!( + decision, + ProtectionScopeDecision::Skip { reason: "asn", .. } + )); + } + + #[test] + fn scope_skips_inline_ip_cidr_matches() { + let mut config = config_with_protection(); + config.protection_excluded_ip_cidrs = vec!["198.51.100.0/24".to_string()]; + let scope = ProtectionScope::compile(&config).expect("should compile scope"); + let services = crate::platform::test_support::noop_services(); + + let decision = scope.evaluate( + &facts( + "GET", + "/page", + None, + Some(IpAddr::V4(Ipv4Addr::new(198, 51, 100, 42))), + None, + ), + &services, + ); + + assert!(matches!( + decision, + ProtectionScopeDecision::Skip { + reason: "client_ip", + .. + } + )); + } + + #[test] + fn scope_skips_config_store_ip_cidr_source_matches() { + clear_ip_cidr_source_cache_for_tests(); + let mut config = config_with_protection(); + config.protection_excluded_ip_cidr_sources = vec![ProtectionIpCidrSourceConfig { + config_store: "datadome-ip-bypass".to_string(), + key: "googlebot_ips".to_string(), + }]; + let scope = ProtectionScope::compile(&config).expect("should compile scope"); + let mut data = HashMap::new(); + data.insert("googlebot_ips".to_string(), "203.0.113.0/24".to_string()); + let services = + build_services_with_config_and_secret(HashMapConfigStore::new(data), NoopSecretStore); + + let decision = scope.evaluate( + &facts( + "GET", + "/page", + None, + Some(IpAddr::V4(Ipv4Addr::new(203, 0, 113, 10))), + None, + ), + &services, + ); + + assert!(matches!( + decision, + ProtectionScopeDecision::Skip { + reason: "client_ip_source", + .. + } + )); + } + + #[test] + fn rule_path_regex_is_method_scoped() { + let mut config = config_with_protection(); + config.protection_exclusion_rules = vec![ProtectionExclusionRuleConfig { + id: "static-get-head".to_string(), + enabled: true, + methods: vec!["GET".to_string(), "HEAD".to_string()], + matcher: ProtectionMatcherConfig::PathRegex { + patterns: vec![r"(?i)\.(css|js|json)$".to_string()], + }, + }]; + let scope = ProtectionScope::compile(&config).expect("should compile scope"); + let services = crate::platform::test_support::noop_services(); + + assert!(matches!( + scope.evaluate(&facts("GET", "/app.JSON", None, None, None), &services), + ProtectionScopeDecision::Skip { + reason: "path_regex", + .. + } + )); + assert!(matches!( + scope.evaluate(&facts("POST", "/app.JSON", None, None, None), &services), + ProtectionScopeDecision::Protect + )); + } + + #[test] + fn rule_query_param_non_empty_matches_rsc() { + let mut config = config_with_protection(); + config.protection_exclusion_rules = vec![ProtectionExclusionRuleConfig { + id: "rsc".to_string(), + enabled: true, + methods: vec!["GET".to_string(), "HEAD".to_string()], + matcher: ProtectionMatcherConfig::QueryParamNonEmpty { + names: vec!["_rsc".to_string()], + }, + }]; + let scope = ProtectionScope::compile(&config).expect("should compile scope"); + let services = crate::platform::test_support::noop_services(); + + assert!(matches!( + scope.evaluate( + &facts("GET", "/page", Some("_rsc=abc&x=1"), None, None), + &services + ), + ProtectionScopeDecision::Skip { + reason: "query_param_non_empty", + .. + } + )); + assert!(matches!( + scope.evaluate( + &facts("GET", "/page", Some("_rsc=&x=1"), None, None), + &services + ), + ProtectionScopeDecision::Protect + )); + } +} diff --git a/crates/trusted-server-core/src/integrations/registry.rs b/crates/trusted-server-core/src/integrations/registry.rs index bebf8b4a7..d0e02f91f 100644 --- a/crates/trusted-server-core/src/integrations/registry.rs +++ b/crates/trusted-server-core/src/integrations/registry.rs @@ -12,6 +12,7 @@ use crate::constants::HEADER_X_TS_EC; use crate::ec::kv::KvIdentityGraph; use crate::ec::EcContext; use crate::error::TrustedServerError; +use crate::geo::GeoInfo; use crate::http_util::is_navigation_request; use crate::platform::RuntimeServices; use crate::settings::Settings; @@ -328,6 +329,7 @@ pub struct RequestFilterInput<'a> { pub settings: &'a Settings, pub services: &'a RuntimeServices, pub request: &'a Request, + pub geo_info: Option<&'a GeoInfo>, } /// How a header mutation should be applied. @@ -405,6 +407,7 @@ pub struct RequestFilterRegistryInput<'a> { pub settings: &'a Settings, pub services: &'a RuntimeServices, pub req: &'a mut Request, + pub geo_info: Option<&'a GeoInfo>, } /// Outcome returned by [`IntegrationRegistry::filter_request`]. @@ -460,7 +463,10 @@ fn apply_header_mutation_to_request(req: &mut Request, mutation: &Head return; }; let Ok(value) = http::HeaderValue::from_str(&mutation.value) else { - log::warn!("Skipping invalid request-filter header value: {}", mutation.name); + log::warn!( + "Skipping invalid request-filter header value: {}", + mutation.name + ); return; }; @@ -488,7 +494,10 @@ fn apply_header_mutation_to_response(response: &mut Response, mutation return; }; let Ok(value) = http::HeaderValue::from_str(&mutation.value) else { - log::warn!("Skipping invalid response-filter header value: {}", mutation.name); + log::warn!( + "Skipping invalid response-filter header value: {}", + mutation.name + ); return; }; @@ -877,6 +886,7 @@ impl IntegrationRegistry { settings, services, req, + geo_info, } = input; let mut accumulated = RequestFilterEffects::default(); @@ -886,6 +896,7 @@ impl IntegrationRegistry { settings, services, request: req, + geo_info, }) .await?; @@ -1417,6 +1428,7 @@ mod tests { settings: &settings, services: &services, req: &mut req, + geo_info: None, })) .expect("should run request filter"); diff --git a/crates/trusted-server-core/src/settings.rs b/crates/trusted-server-core/src/settings.rs index c6984a342..cf3a8f299 100644 --- a/crates/trusted-server-core/src/settings.rs +++ b/crates/trusted-server-core/src/settings.rs @@ -2270,8 +2270,12 @@ mod tests { use crate::auction::build_orchestrator; use crate::integrations::{ - gpt::GptConfig, nextjs::NextJsIntegrationConfig, prebid::PrebidIntegrationConfig, - testlight::TestlightConfig, IntegrationRegistry, + datadome::{DataDomeConfig, ProtectionMatcherConfig}, + gpt::GptConfig, + nextjs::NextJsIntegrationConfig, + prebid::PrebidIntegrationConfig, + testlight::TestlightConfig, + IntegrationRegistry, }; use crate::redacted::Redacted; use crate::test_support::tests::{crate_test_settings_str, create_test_settings}; @@ -2827,6 +2831,221 @@ origin_host_header_overide = "www.example.com""#, ); } + #[test] + fn test_datadome_protection_scope_overrides_with_json_env() { + let toml_str = crate_test_settings_str(); + let separator = ENVIRONMENT_VARIABLE_SEPARATOR; + let origin_key = format!( + "{}{}PUBLISHER{}ORIGIN_URL", + ENVIRONMENT_VARIABLE_PREFIX, separator, separator + ); + let enabled_key = format!( + "{}{}INTEGRATIONS{}DATADOME{}ENABLED", + ENVIRONMENT_VARIABLE_PREFIX, separator, separator, separator + ); + let enable_protection_key = format!( + "{}{}INTEGRATIONS{}DATADOME{}ENABLE_PROTECTION", + ENVIRONMENT_VARIABLE_PREFIX, separator, separator, separator + ); + let excluded_methods_key = format!( + "{}{}INTEGRATIONS{}DATADOME{}PROTECTION_EXCLUDED_METHODS", + ENVIRONMENT_VARIABLE_PREFIX, separator, separator, separator + ); + let cidr_sources_key = format!( + "{}{}INTEGRATIONS{}DATADOME{}PROTECTION_EXCLUDED_IP_CIDR_SOURCES", + ENVIRONMENT_VARIABLE_PREFIX, separator, separator, separator + ); + let rules_key = format!( + "{}{}INTEGRATIONS{}DATADOME{}PROTECTION_EXCLUSION_RULES", + ENVIRONMENT_VARIABLE_PREFIX, separator, separator, separator + ); + + temp_env::with_vars( + [ + (origin_key, Some("https://origin.test-publisher.com")), + (enabled_key, Some("true")), + (enable_protection_key, Some("true")), + (excluded_methods_key, Some(r#"["OPTIONS","TRACE"]"#)), + ( + cidr_sources_key, + Some(r#"[{"config_store":"datadome-ip-bypass","key":"googlebot_ips"}]"#), + ), + ( + rules_key, + Some( + r#"[{"id":"legacy-static-get-head","methods":["GET","HEAD"],"type":"path_regex","patterns":["(?i)\\.(css|js)$"]},{"id":"next-rsc","type":"query_param_non_empty","names":["_rsc"]}]"#, + ), + ), + ], + || { + let settings = Settings::from_toml_and_env(&toml_str) + .expect("Settings should parse DataDome JSON env overrides"); + let cfg = settings + .integration_config::("datadome") + .expect("DataDome config query should succeed") + .expect("DataDome config should exist with env override"); + + assert!(cfg.enabled, "should parse enabled override as bool"); + assert!( + cfg.enable_protection, + "should parse enable_protection override as bool" + ); + assert_eq!( + cfg.protection_excluded_methods, + vec!["OPTIONS".to_string(), "TRACE".to_string()], + "should parse method list from JSON env override" + ); + assert_eq!( + cfg.protection_excluded_ip_cidr_sources[0].config_store, "datadome-ip-bypass", + "should parse CIDR source config_store from JSON env override" + ); + assert_eq!( + cfg.protection_excluded_ip_cidr_sources[0].key, "googlebot_ips", + "should parse CIDR source key from JSON env override" + ); + assert_eq!( + cfg.protection_exclusion_rules.len(), + 2, + "should parse all structured rules from JSON env override" + ); + assert!(matches!( + &cfg.protection_exclusion_rules[0].matcher, + ProtectionMatcherConfig::PathRegex { patterns } + if patterns == &vec!["(?i)\\.(css|js)$".to_string()] + )); + assert!(matches!( + &cfg.protection_exclusion_rules[1].matcher, + ProtectionMatcherConfig::QueryParamNonEmpty { names } + if names == &vec!["_rsc".to_string()] + )); + }, + ); + } + + #[test] + fn test_datadome_protection_scope_overrides_with_indexed_env() { + let toml_str = crate_test_settings_str(); + let separator = ENVIRONMENT_VARIABLE_SEPARATOR; + let datadome_prefix = format!( + "{}{}INTEGRATIONS{}DATADOME{}", + ENVIRONMENT_VARIABLE_PREFIX, separator, separator, separator + ); + let origin_key = format!( + "{}{}PUBLISHER{}ORIGIN_URL", + ENVIRONMENT_VARIABLE_PREFIX, separator, separator + ); + + temp_env::with_vars( + [ + (origin_key, Some("https://origin.test-publisher.com")), + (format!("{datadome_prefix}ENABLED"), Some("true")), + ( + format!("{datadome_prefix}ENABLE_PROTECTION"), + Some("true"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUDED_METHODS{separator}0"), + Some("OPTIONS"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUDED_METHODS{separator}1"), + Some("TRACE"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUDED_ASNS{separator}0"), + Some("19750"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUDED_IP_CIDRS{separator}0"), + Some("198.51.100.0/24"), + ), + ( + format!( + "{datadome_prefix}PROTECTION_EXCLUDED_IP_CIDR_SOURCES{separator}0{separator}CONFIG_STORE" + ), + Some("datadome-ip-bypass"), + ), + ( + format!( + "{datadome_prefix}PROTECTION_EXCLUDED_IP_CIDR_SOURCES{separator}0{separator}KEY" + ), + Some("googlebot_ips"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUSION_RULES{separator}0{separator}ID"), + Some("legacy-static-get-head"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUSION_RULES{separator}0{separator}METHODS{separator}0"), + Some("GET"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUSION_RULES{separator}0{separator}METHODS{separator}1"), + Some("HEAD"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUSION_RULES{separator}0{separator}TYPE"), + Some("path_regex"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUSION_RULES{separator}0{separator}PATTERNS{separator}0"), + Some(r"(?i)\.(css|js)$"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUSION_RULES{separator}1{separator}ID"), + Some("next-rsc"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUSION_RULES{separator}1{separator}TYPE"), + Some("query_param_non_empty"), + ), + ( + format!("{datadome_prefix}PROTECTION_EXCLUSION_RULES{separator}1{separator}NAMES{separator}0"), + Some("_rsc"), + ), + ], + || { + let settings = Settings::from_toml_and_env(&toml_str) + .expect("Settings should parse DataDome indexed env overrides"); + let cfg = settings + .integration_config::("datadome") + .expect("DataDome config query should succeed") + .expect("DataDome config should exist with indexed env override"); + + assert_eq!( + cfg.protection_excluded_methods, + vec!["OPTIONS".to_string(), "TRACE".to_string()], + "should parse indexed method list" + ); + assert_eq!( + cfg.protection_excluded_asns, + vec![19750], + "should parse indexed ASN list" + ); + assert_eq!( + cfg.protection_excluded_ip_cidrs, + vec!["198.51.100.0/24".to_string()], + "should parse indexed IP CIDR list" + ); + assert_eq!( + cfg.protection_excluded_ip_cidr_sources[0].key, + "googlebot_ips", + "should parse indexed CIDR source list" + ); + assert!(matches!( + &cfg.protection_exclusion_rules[0].matcher, + ProtectionMatcherConfig::PathRegex { patterns } + if patterns == &vec!["(?i)\\.(css|js)$".to_string()] + )); + assert!(matches!( + &cfg.protection_exclusion_rules[1].matcher, + ProtectionMatcherConfig::QueryParamNonEmpty { names } + if names == &vec!["_rsc".to_string()] + )); + }, + ); + } + #[test] fn test_handlers_override_with_env() { let toml_str = crate_test_settings_str(); diff --git a/docs/guide/integrations/datadome.md b/docs/guide/integrations/datadome.md index 3115069ed..1b87a69c5 100644 --- a/docs/guide/integrations/datadome.md +++ b/docs/guide/integrations/datadome.md @@ -47,8 +47,11 @@ server_side_key_secret_store = "datadome" server_side_key_secret_name = "server_side_key" protection_api_origin = "https://api-fastly.datadome.co" timeout_ms = 1500 -url_pattern_exclusion = "\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$" -url_pattern_inclusion = "" +protection_excluded_methods = ["OPTIONS"] +protection_excluded_asns = [] +protection_excluded_ip_cidrs = [] +protection_excluded_ip_cidr_sources = [] +protection_ip_list_cache_ttl_seconds = 300 enable_graphql_support = false # Client-side tag auto-injection @@ -56,29 +59,38 @@ client_side_key = "" inject_client_side_tag = true client_side_tag_url = "/integrations/datadome/tags.js" client_side_configuration = { ajaxListenerPath = true } + +[[integrations.datadome.protection_exclusion_rules]] +id = "default-static-assets" +type = "path_regex" +patterns = ["(?i)\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$"] ``` ### Configuration options -| Option | Type | Default | Description | -| ------------------------------ | ------- | -------------------------------- | --------------------------------------------------------------------------------------- | -| `enabled` | boolean | `false` | Enable the DataDome integration | -| `sdk_origin` | string | `https://js.datadome.co` | DataDome SDK origin URL for `tags.js` | -| `api_origin` | string | `https://api-js.datadome.co` | DataDome signal collection API origin URL for `/js/*` | -| `cache_ttl_seconds` | integer | `3600` | Cache TTL for `tags.js` | -| `rewrite_sdk` | boolean | `true` | Rewrite DataDome script URLs in HTML to first-party paths | -| `enable_protection` | boolean | `false` | Call the Protection API before route matching | -| `server_side_key_secret_store` | string | `datadome` | Runtime secret store containing the DataDome server-side key | -| `server_side_key_secret_name` | string | `server_side_key` | Secret name containing the DataDome server-side key | -| `protection_api_origin` | string | `https://api-fastly.datadome.co` | Protection API origin | -| `timeout_ms` | integer | `1500` | Dynamic backend first-byte timeout for Protection API calls | -| `url_pattern_exclusion` | string | Static asset extension regex | Case-insensitive regex matched against `host + pathname` to skip protection | -| `url_pattern_inclusion` | string | `""` | Optional case-insensitive regex matched against `host + pathname` to include protection | -| `enable_graphql_support` | boolean | `false` | Reserved for future GraphQL body inspection; ignored in v1 | -| `client_side_key` | string | `""` | DataDome client-side JavaScript key used for tag injection | -| `inject_client_side_tag` | boolean | `true` | Auto-inject the browser tag when `client_side_key` is non-empty | -| `client_side_tag_url` | string | `/integrations/datadome/tags.js` | Script URL used by auto-injection | -| `client_side_configuration` | object | `{ ajaxListenerPath = true }` | Options assigned to `window.ddoptions` | +| Option | Type | Default | Description | +| -------------------------------------- | ------- | -------------------------------- | ----------------------------------------------------------------------- | +| `enabled` | boolean | `false` | Enable the DataDome integration | +| `sdk_origin` | string | `https://js.datadome.co` | DataDome SDK origin URL for `tags.js` | +| `api_origin` | string | `https://api-js.datadome.co` | DataDome signal collection API origin URL for `/js/*` | +| `cache_ttl_seconds` | integer | `3600` | Cache TTL for `tags.js` | +| `rewrite_sdk` | boolean | `true` | Rewrite DataDome script URLs in HTML to first-party paths | +| `enable_protection` | boolean | `false` | Call the Protection API before route matching | +| `server_side_key_secret_store` | string | `datadome` | Runtime secret store containing the DataDome server-side key | +| `server_side_key_secret_name` | string | `server_side_key` | Secret name containing the DataDome server-side key | +| `protection_api_origin` | string | `https://api-fastly.datadome.co` | Protection API origin | +| `timeout_ms` | integer | `1500` | Dynamic backend first-byte timeout for Protection API calls | +| `protection_excluded_methods` | array | `["OPTIONS"]` | HTTP methods skipped before the Protection API call | +| `protection_excluded_asns` | array | `[]` | Client autonomous system numbers skipped before the Protection API call | +| `protection_excluded_ip_cidrs` | array | `[]` | Inline client IP CIDR ranges skipped before the Protection API call | +| `protection_excluded_ip_cidr_sources` | array | `[]` | Config Store sources containing dynamic client IP CIDR bypass lists | +| `protection_ip_list_cache_ttl_seconds` | integer | `300` | Process-local cache TTL for Config Store-backed IP CIDR bypass lists | +| `protection_exclusion_rules` | array | Static asset path regex | Structured method/path/query/IP/ASN exclusion rules | +| `enable_graphql_support` | boolean | `false` | Reserved for future GraphQL body inspection; ignored in v1 | +| `client_side_key` | string | `""` | DataDome client-side JavaScript key used for tag injection | +| `inject_client_side_tag` | boolean | `true` | Auto-inject the browser tag when `client_side_key` is non-empty | +| `client_side_tag_url` | string | `/integrations/datadome/tags.js` | Script URL used by auto-injection | +| `client_side_configuration` | object | `{ ajaxListenerPath = true }` | Options assigned to `window.ddoptions` | ## Client-side setup @@ -151,15 +163,56 @@ A request is protected when all of the following are true: 1. The DataDome integration is enabled. 2. `enable_protection = true`. -3. The method is not `OPTIONS`. +3. The method is not listed in `protection_excluded_methods`. 4. The path is not one of Trusted Server's internal routes. -5. The `host + pathname` matches `url_pattern_inclusion`, when configured. -6. The `host + pathname` does not match `url_pattern_exclusion`, when configured. +5. The client IP does not match `protection_excluded_ip_cidrs` or any Config Store-backed CIDR source. +6. The client ASN is not listed in `protection_excluded_asns`. +7. No `protection_exclusion_rules` match. Static assets are excluded by default using a case-insensitive file-extension regex. Trusted Server internal routes such as `/static/tsjs=`, `/integrations/`, `/first-party/`, admin routes, discovery routes, and signature-verification routes are also excluded by default. Auction traffic at `/auction` is protected by default. +### Structured exclusion rules + +Use structured rules for all DataDome protection exclusions. Each rule has an `id`, optional `methods`, and a typed matcher. The default configuration includes a `path_regex` rule for common static assets. + +```toml +[[integrations.datadome.protection_exclusion_rules]] +id = "legacy-static-get-head" +methods = ["GET", "HEAD"] +type = "path_regex" +patterns = [ + "(?i)\\.(css|css\\.map|js|js\\.map|json|png|jpg|webp|woff2)$", + "^/\\.image/", + "^/robots\\.txt$", +] + +[[integrations.datadome.protection_exclusion_rules]] +id = "next-rsc" +methods = ["GET", "HEAD"] +type = "query_param_non_empty" +names = ["_rsc"] +``` + +Supported rule types are: + +- `path_exact` +- `path_prefix` +- `path_regex` +- `query_param_non_empty` +- `asn` +- `ip_cidr` +- `ip_cidr_source` + +Config Store-backed CIDR sources accept newline-, comma-, whitespace-, or JSON-array encoded CIDR lists. They are useful for large or frequently updated vendor crawler lists. + +```toml +[[integrations.datadome.protection_excluded_ip_cidr_sources]] +config_store = "datadome-ip-bypass" +key = "googlebot_ips" +``` + ### Header handling DataDome can return pointer headers that identify which headers Trusted Server should copy: diff --git a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md index 1c06d9fbd..bff3abe3c 100644 --- a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md +++ b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md @@ -70,7 +70,7 @@ JavaScript SDK. ## Current State -Implementation branch status as of 2026-06-12: +Implementation branch status as of 2026-06-15: - Added the generic integration request-filter model in `crates/trusted-server-core/src/integrations/registry.rs`. @@ -83,28 +83,27 @@ Implementation branch status as of 2026-06-12: - Added client-side tag auto-injection through `IntegrationHeadInjector`. - Extended `ClientInfo` and Fastly runtime services with JA4, H2 fingerprint, edge hostname, and edge region fields. +- Added configurable protection-scope exclusions for methods, ASNs, inline IP + CIDRs, Config Store-backed IP CIDR lists, and typed method-scoped rules for + path/query/IP/ASN matching. - Updated `trusted-server.toml` with the new DataDome configuration fields. - Updated `docs/guide/integrations/datadome.md` with the first-party, - server-side protection, fail-open, header-enrichment, auto-injection, and - GraphQL-v1 limitation behavior. + server-side protection, fail-open, header-enrichment, auto-injection, + configurable exclusion, Secret Store, and GraphQL-v1 limitation behavior. Known remaining work before the PR is ready: -- Fix formatting and clippy blockers introduced by the implementation. -- Add the spec-driven registry, DataDome config, protection matching, payload, - response classification, and route tests listed in this document. -- Run the full CI gate after fixes: - - `cargo fmt --all -- --check` - - `cargo clippy --workspace --all-targets --all-features -- -D warnings` - - `cargo test --workspace` - - JS/doc checks as applicable +- Run JS checks if JS build output is touched. +- Perform staging validation against a DataDome test policy/rule. Verification snapshot: -- `cargo test --workspace` passed on 2026-06-12 for the current branch state. -- `cargo fmt --all -- --check` failed due to formatting drift. -- `cargo clippy --package trusted-server-core --all-targets --all-features -- -D warnings` - failed due to clippy issues in the new DataDome/request-filter code. +- `cargo fmt --all -- --check` passed on 2026-06-15. +- `cargo clippy --workspace --all-targets --all-features -- -D warnings` passed + on 2026-06-15. +- `cargo test --workspace -- --nocapture` passed on 2026-06-15. +- `cd docs && npx prettier --check guide/integrations/datadome.md superpowers/specs/2026-06-11-datadome-server-side-protection-design.md` + passed on 2026-06-15. Baseline DataDome integration before this work: @@ -297,8 +296,11 @@ server_side_key_secret_store = "datadome" server_side_key_secret_name = "server_side_key" protection_api_origin = "https://api-fastly.datadome.co" timeout_ms = 1500 -url_pattern_exclusion = "\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$" -url_pattern_inclusion = "" +protection_excluded_methods = ["OPTIONS"] +protection_excluded_asns = [] +protection_excluded_ip_cidrs = [] +protection_excluded_ip_cidr_sources = [] +protection_ip_list_cache_ttl_seconds = 300 enable_graphql_support = false # New client-side tag injection layer @@ -306,6 +308,11 @@ client_side_key = "" inject_client_side_tag = true client_side_tag_url = "/integrations/datadome/tags.js" client_side_configuration = { ajaxListenerPath = true } + +[[integrations.datadome.protection_exclusion_rules]] +id = "default-static-assets" +type = "path_regex" +patterns = ["(?i)\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$"] ``` Notes: @@ -322,10 +329,14 @@ Notes: key is a valid no-op. - `protection_api_origin` remains configurable for regional/static endpoint selection. -- `url_pattern_exclusion` and `url_pattern_inclusion` match `host + pathname`, - not query string, mirroring the official Fastly module behavior. -- Static-asset exclusion should be case-insensitive so uppercase file - extensions such as `.PNG` are skipped. +- Static-asset exclusion is represented as a default typed `path_regex` rule and + should remain case-insensitive so uppercase file extensions such as `.PNG` are + skipped. +- `protection_excluded_methods`, `protection_excluded_asns`, inline + `protection_excluded_ip_cidrs`, Config Store-backed + `protection_excluded_ip_cidr_sources`, and typed + `protection_exclusion_rules` provide migration parity for legacy VCL bypass + policies without hardcoding publisher-specific rules in Rust. - `enable_graphql_support` is reserved but should remain unsupported or ignored with a warning until the deferred body-handling work is implemented. @@ -335,11 +346,13 @@ A request is protected when: 1. DataDome integration is enabled. 2. `enable_protection = true`. -3. The method is not `OPTIONS`; all other methods, including `HEAD`, are - eligible for protection. -4. The URL does not match the default internal/static exclusions. -5. If `url_pattern_inclusion` is configured, `host + pathname` matches it. -6. If `url_pattern_exclusion` is configured, `host + pathname` does not match it. +3. The method is not listed in `protection_excluded_methods`; by default this + skips `OPTIONS`. +4. The URL does not match the default Trusted Server internal exclusions. +5. The client IP does not match inline or Config Store-backed excluded CIDR + lists. +6. The client ASN is not listed in `protection_excluded_asns`. +7. No typed `protection_exclusion_rules` match. Default internal exclusions should include: @@ -356,6 +369,40 @@ Default internal exclusions should include: Auction traffic at `/auction` is intentionally protected by default. +Typed exclusion rules use a small rule-engine pattern so new matcher types can +be added without growing `is_request_protected()` into a large conditional. A +rule has an operator-provided `id`, optional `methods`, and one matcher selected +by `type`: + +```toml +[[integrations.datadome.protection_exclusion_rules]] +id = "legacy-static-get-head" +methods = ["GET", "HEAD"] +type = "path_regex" +patterns = ["(?i)\\.(css|css\\.map|js|js\\.map|json|png|jpg|webp|woff2)$"] + +[[integrations.datadome.protection_exclusion_rules]] +id = "next-rsc" +methods = ["GET", "HEAD"] +type = "query_param_non_empty" +names = ["_rsc"] +``` + +Supported v1 rule types: + +- `path_exact` +- `path_prefix` +- `path_regex` +- `query_param_non_empty` +- `asn` +- `ip_cidr` +- `ip_cidr_source` + +Config Store-backed CIDR lists are non-secret operational data and may be +encoded as JSON arrays, comma-separated strings, or newline/whitespace-separated +strings. Load failures log a warning and do not match the bypass list, so a bad +list does not accidentally disable DataDome for all traffic. + ### Protection API Request Add a DataDome protection helper module, either as a nested module in @@ -715,12 +762,13 @@ passes. - [x] GraphQL body parsing is not implemented in v1 and is clearly documented. - [x] Existing DataDome first-party proxy behavior remains unchanged. Existing DataDome proxy/rewrite tests pass as part of full workspace verification. -- [x] `cargo fmt --all -- --check`, `cargo clippy --workspace --all-targets --all-features -- -D warnings`, and `cargo test --workspace` pass after implementation. Verified on 2026-06-12. +- [x] `cargo fmt --all -- --check`, `cargo clippy --workspace --all-targets --all-features -- -D warnings`, and `cargo test --workspace` pass after implementation. Verified on 2026-06-15. ## Resolved Questions -1. DataDome protection applies to all non-`OPTIONS` HTTP methods, including - `HEAD`, when the URL is otherwise in scope. +1. DataDome protection excludes methods listed in + `protection_excluded_methods`, which defaults to `OPTIONS`. All other + methods, including `HEAD`, are eligible when the URL is otherwise in scope. 2. The DataDome server-side key is loaded from runtime Secret Store in v1. The config contains only the secret store and secret name. 3. The default Protection API timeout is `1500ms` for v1. diff --git a/trusted-server.toml b/trusted-server.toml index f692654b6..e0301efa2 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -152,8 +152,11 @@ server_side_key_secret_store = "ts_secrets" server_side_key_secret_name = "datadome_server_side_key" protection_api_origin = "https://api-fastly.datadome.co" timeout_ms = 1500 -url_pattern_exclusion = "\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$" -url_pattern_inclusion = "" +protection_excluded_methods = ["OPTIONS"] +protection_excluded_asns = [] +protection_excluded_ip_cidrs = [] +protection_excluded_ip_cidr_sources = [] +protection_ip_list_cache_ttl_seconds = 300 enable_graphql_support = false # Client-side tag auto-injection (emits only when client_side_key is non-empty) @@ -162,6 +165,11 @@ inject_client_side_tag = true client_side_tag_url = "/integrations/datadome/tags.js" client_side_configuration = { ajaxListenerPath = true } +[[integrations.datadome.protection_exclusion_rules]] +id = "default-static-assets" +type = "path_regex" +patterns = ["(?i)\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$"] + [integrations.gpt] enabled = false script_url = "https://securepubads.g.doubleclick.net/tag/js/gpt.js" From a0fa8269ad7840b550ac53a8b22fac001b278727 Mon Sep 17 00:00:00 2001 From: Christian Date: Mon, 15 Jun 2026 15:44:47 -0500 Subject: [PATCH 6/7] Fix DataDome backend integration review issues --- crates/integration-tests/Cargo.lock | 1 + .../trusted-server-adapter-fastly/src/main.rs | 8 +-- .../src/route_tests.rs | 66 +++++++++++++++++-- .../src/integrations/datadome.rs | 15 ++++- .../src/integrations/datadome/protection.rs | 59 ++++++++++++++--- .../src/integrations/registry.rs | 44 +++++++++++++ docs/guide/integrations/datadome.md | 16 ++--- ...-datadome-server-side-protection-design.md | 4 +- 8 files changed, 184 insertions(+), 29 deletions(-) diff --git a/crates/integration-tests/Cargo.lock b/crates/integration-tests/Cargo.lock index 32d8f1dbb..891817e48 100644 --- a/crates/integration-tests/Cargo.lock +++ b/crates/integration-tests/Cargo.lock @@ -4151,6 +4151,7 @@ dependencies = [ "trusted-server-js", "trusted-server-openrtb", "url", + "urlencoding", "uuid", "validator", ] diff --git a/crates/trusted-server-adapter-fastly/src/main.rs b/crates/trusted-server-adapter-fastly/src/main.rs index 147b152ff..9cd591cbe 100644 --- a/crates/trusted-server-adapter-fastly/src/main.rs +++ b/crates/trusted-server-adapter-fastly/src/main.rs @@ -239,7 +239,6 @@ fn main() { match outcome { HandlerOutcome::Buffered(mut response) | HandlerOutcome::AuthChallenge(mut response) => { finalize_response(&settings, geo_info.as_ref(), &mut response); - request_filter_effects.apply_to_response(&mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let mut fastly_resp = compat::to_fastly_response(response); if should_finalize_ec { @@ -253,6 +252,7 @@ fn main() { &mut fastly_resp, ); } + request_filter_effects.apply_to_fastly_response(&mut fastly_resp); fastly_resp.send_to_client(); if is_real_browser { @@ -267,7 +267,6 @@ fn main() { params, } => { finalize_response(&settings, geo_info.as_ref(), &mut response); - request_filter_effects.apply_to_response(&mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let mut fastly_resp = compat::to_fastly_response_skeleton(response); if should_finalize_ec { @@ -281,6 +280,7 @@ fn main() { &mut fastly_resp, ); } + request_filter_effects.apply_to_fastly_response(&mut fastly_resp); let mut streaming_body = fastly_resp.stream_to_client(); let mut stream_succeeded = false; match stream_publisher_body( @@ -313,9 +313,9 @@ fn main() { } HandlerOutcome::AssetStreaming { mut response, body } => { finalize_response(&settings, geo_info.as_ref(), &mut response); - request_filter_effects.apply_to_response(&mut response); asset_cache_policy.apply_after_route_finalization(&mut response); - let fastly_resp = compat::to_fastly_response_skeleton(response); + let mut fastly_resp = compat::to_fastly_response_skeleton(response); + request_filter_effects.apply_to_fastly_response(&mut fastly_resp); let mut streaming_body = fastly_resp.stream_to_client(); if let Err(e) = futures::executor::block_on(stream_asset_body(body, &mut streaming_body)) diff --git a/crates/trusted-server-adapter-fastly/src/route_tests.rs b/crates/trusted-server-adapter-fastly/src/route_tests.rs index f35440cf5..1abe16b63 100644 --- a/crates/trusted-server-adapter-fastly/src/route_tests.rs +++ b/crates/trusted-server-adapter-fastly/src/route_tests.rs @@ -457,8 +457,8 @@ fn datadome_protection_toml() -> &'static str { [integrations.datadome] enabled = true enable_protection = true - server_side_key_secret_store = "datadome" - server_side_key_secret_name = "server_side_key" + server_side_key_secret_store = "ts_secrets" + server_side_key_secret_name = "datadome_server_side_key" "# } @@ -482,7 +482,7 @@ fn create_datadome_auction_test_settings(providers: &str) -> Settings { fn datadome_secret_store() -> Arc { Arc::new(HashMapSecretStore::new(HashMap::from([( - "server_side_key".to_string(), + "datadome_server_side_key".to_string(), b"datadome-server-side-key".to_vec(), )]))) } @@ -597,7 +597,6 @@ fn route_result_to_fastly_response( .unwrap_or(None) }; super::finalize_response(settings, geo_info.as_ref(), &mut response); - request_filter_effects.apply_to_response(&mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let mut fastly_response = compat::to_fastly_response(response); @@ -612,6 +611,7 @@ fn route_result_to_fastly_response( &mut fastly_response, ); } + request_filter_effects.apply_to_fastly_response(&mut fastly_response); fastly_response } @@ -908,6 +908,64 @@ fn datadome_skips_internal_and_static_asset_routes_by_default() { ); } +#[test] +fn datadome_skips_registered_integration_routes_with_custom_prefix() { + let base = base_route_settings_toml(); + let datadome = datadome_protection_toml(); + let config = format!( + r#"{base} + +{datadome} + + [integrations.didomi] + enabled = true + proxy_path = "my-consent" + sdk_origin = "https://sdk.privacy-center.org" + api_origin = "https://api.privacy-center.org" + + [auction] + enabled = true + providers = [] + timeout_ms = 2000 + "#, + ); + let settings = Settings::from_toml(&config) + .expect("should parse DataDome and custom Didomi route test settings"); + let (orchestrator, integration_registry) = build_route_stack(&settings); + let req = Request::get("https://test.com/my-consent/notice"); + let http_client = Arc::new(RecordingHttpClient::new(StatusCode::OK)); + let services = test_runtime_services_with_secret_and_http_client( + &req, + Arc::new(FixedBackend), + datadome_secret_store(), + Arc::clone(&http_client) as Arc, + ); + + let response = route_buffered_response( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + "should route custom Didomi proxy request without DataDome", + ); + + assert_eq!( + response.get_status(), + StatusCode::OK, + "custom integration proxy route should still be handled" + ); + let calls = http_client + .calls + .lock() + .expect("should lock recorded calls"); + assert_eq!(calls.len(), 1, "should call only the Didomi upstream"); + assert_eq!( + calls[0].uri, "https://sdk.privacy-center.org/notice", + "should not call the DataDome Protection API for registered integration routes" + ); +} + #[test] fn routes_use_request_local_consent() { let settings = create_test_settings(); diff --git a/crates/trusted-server-core/src/integrations/datadome.rs b/crates/trusted-server-core/src/integrations/datadome.rs index 5c5491b79..a9e689a55 100644 --- a/crates/trusted-server-core/src/integrations/datadome.rs +++ b/crates/trusted-server-core/src/integrations/datadome.rs @@ -240,11 +240,11 @@ fn default_protection_api_origin() -> String { } fn default_server_side_key_secret_store() -> String { - "datadome".to_string() + "ts_secrets".to_string() } fn default_server_side_key_secret_name() -> String { - "server_side_key".to_string() + "datadome_server_side_key".to_string() } fn default_timeout_ms() -> u32 { @@ -1021,6 +1021,17 @@ mod tests { } } + #[test] + fn protection_secret_defaults_match_sample_config() { + let config = DataDomeConfig::default(); + + assert_eq!(config.server_side_key_secret_store, "ts_secrets"); + assert_eq!( + config.server_side_key_secret_name, + "datadome_server_side_key" + ); + } + #[test] fn protection_enabled_requires_server_side_key_secret_store() { let mut config = test_config(); diff --git a/crates/trusted-server-core/src/integrations/datadome/protection.rs b/crates/trusted-server-core/src/integrations/datadome/protection.rs index 7f4ab6dfe..7fa408760 100644 --- a/crates/trusted-server-core/src/integrations/datadome/protection.rs +++ b/crates/trusted-server-core/src/integrations/datadome/protection.rs @@ -95,7 +95,7 @@ impl DataDomeIntegration { .await .change_context(Self::error("Failed to call DataDome Protection API"))?; - Ok(self.classify_protection_response(platform_response.response)) + Ok(self.classify_protection_response(platform_response.response, input.request.method())) } fn is_request_protected(&self, input: &RequestFilterInput<'_>) -> bool { @@ -104,6 +104,10 @@ impl DataDomeIntegration { return false; } + if input.is_integration_route { + return false; + } + let path = req.uri().path(); if is_internal_path(path) { return false; @@ -356,6 +360,7 @@ impl DataDomeIntegration { fn classify_protection_response( &self, response: edgezero_core::http::Response, + request_method: &Method, ) -> RequestFilterDecision { let (parts, body) = response.into_parts(); let status = parts.status; @@ -386,14 +391,21 @@ impl DataDomeIntegration { } if matches!(status.as_u16(), 301 | 302 | 401 | 403 | 429) { - if body.is_stream() { - log::warn!("[datadome] Protection API challenge body was streaming; failing open"); - return RequestFilterDecision::Continue(RequestFilterEffects::default()); - } - let body_bytes = body.into_bytes(); + let response_body = if request_method == Method::HEAD { + EdgeBody::empty() + } else { + if body.is_stream() { + log::warn!( + "[datadome] Protection API challenge body was streaming; failing open" + ); + return RequestFilterDecision::Continue(RequestFilterEffects::default()); + } + let body_bytes = body.into_bytes(); + EdgeBody::from(body_bytes.as_ref().to_vec()) + }; let challenge = Response::builder() .status(status) - .body(EdgeBody::from(body_bytes.as_ref().to_vec())) + .body(response_body) .expect("should build DataDome challenge response"); return RequestFilterDecision::Respond { response: Box::new(challenge), @@ -468,7 +480,7 @@ fn header_value(req: &Request, name: &str) -> String { fn headers_list(req: &Request) -> String { req.headers() .keys() - .map(|name| name.as_str()) + .map(HeaderName::as_str) .collect::>() .join(",") } @@ -677,7 +689,10 @@ mod tests { fn load_server_side_key_reads_secret_store() { clear_datadome_server_side_key_cache_for_tests(); let mut secrets = HashMap::new(); - secrets.insert("server_side_key".to_string(), b"secret-from-store".to_vec()); + secrets.insert( + "datadome_server_side_key".to_string(), + b"secret-from-store".to_vec(), + ); let services = build_services_with_config_and_secret( NoopConfigStore, HashMapSecretStore::new(secrets), @@ -746,6 +761,32 @@ mod tests { assert_eq!(truncate_utf8("ééé", -4), "éé"); } + #[test] + fn classify_head_challenge_omits_response_body() { + let integration = protection_integration(); + let response = edgezero_core::http::response_builder() + .status(StatusCode::FORBIDDEN) + .header(HEADER_DATADOME_RESPONSE, "403") + .body(EdgeBody::from("blocked")) + .expect("should build DataDome response"); + + let decision = integration.classify_protection_response(response, &Method::HEAD); + + let RequestFilterDecision::Respond { response, .. } = decision else { + panic!("should return a challenge response for DataDome 403"); + }; + assert_eq!( + response.status(), + StatusCode::FORBIDDEN, + "should preserve challenge status" + ); + assert_eq!( + response.into_body().into_bytes().as_ref(), + b"", + "HEAD challenges should not include a response body" + ); + } + #[test] fn form_encode_url_encodes_values() { let encoded = form_encode(&[("Key".to_string(), "a b+c".to_string())]); diff --git a/crates/trusted-server-core/src/integrations/registry.rs b/crates/trusted-server-core/src/integrations/registry.rs index d0e02f91f..99df1c73b 100644 --- a/crates/trusted-server-core/src/integrations/registry.rs +++ b/crates/trusted-server-core/src/integrations/registry.rs @@ -330,6 +330,8 @@ pub struct RequestFilterInput<'a> { pub services: &'a RuntimeServices, pub request: &'a Request, pub geo_info: Option<&'a GeoInfo>, + /// Whether the request matches a registered integration proxy route. + pub is_integration_route: bool, } /// How a header mutation should be applied. @@ -391,6 +393,12 @@ impl RequestFilterEffects { apply_header_mutation_to_response(response, mutation); } } + + pub fn apply_to_fastly_response(&self, response: &mut fastly::Response) { + for mutation in &self.response_headers { + apply_header_mutation_to_fastly_response(response, mutation); + } + } } /// Decision returned by an integration request filter. @@ -511,6 +519,40 @@ fn apply_header_mutation_to_response(response: &mut Response, mutation } } +fn apply_header_mutation_to_fastly_response( + response: &mut fastly::Response, + mutation: &HeaderMutation, +) { + if is_forbidden_filter_header(&mutation.name) { + log::warn!( + "Skipping forbidden response-filter header: {}", + mutation.name + ); + return; + } + + let Ok(name) = fastly::http::HeaderName::from_bytes(mutation.name.as_bytes()) else { + log::warn!("Skipping invalid response-filter header: {}", mutation.name); + return; + }; + let Ok(value) = fastly::http::HeaderValue::from_str(&mutation.value) else { + log::warn!( + "Skipping invalid response-filter header value: {}", + mutation.name + ); + return; + }; + + match mutation.mode { + HeaderMutationMode::Set => { + response.set_header(name, value); + } + HeaderMutationMode::Append => { + response.append_header(name, value); + } + } +} + /// Trait for integration-provided HTML attribute rewrite hooks. pub trait IntegrationAttributeRewriter: Send + Sync { /// Identifier for logging/diagnostics. @@ -889,6 +931,7 @@ impl IntegrationRegistry { geo_info, } = input; let mut accumulated = RequestFilterEffects::default(); + let is_integration_route = self.has_route(req.method(), req.uri().path()); for filter in &self.inner.request_filters { let decision = filter @@ -897,6 +940,7 @@ impl IntegrationRegistry { services, request: req, geo_info, + is_integration_route, }) .await?; diff --git a/docs/guide/integrations/datadome.md b/docs/guide/integrations/datadome.md index 1b87a69c5..6801c263a 100644 --- a/docs/guide/integrations/datadome.md +++ b/docs/guide/integrations/datadome.md @@ -43,8 +43,8 @@ rewrite_sdk = true # Server-side Protection API layer enable_protection = false -server_side_key_secret_store = "datadome" -server_side_key_secret_name = "server_side_key" +server_side_key_secret_store = "ts_secrets" +server_side_key_secret_name = "datadome_server_side_key" protection_api_origin = "https://api-fastly.datadome.co" timeout_ms = 1500 protection_excluded_methods = ["OPTIONS"] @@ -76,8 +76,8 @@ patterns = ["(?i)\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav| | `cache_ttl_seconds` | integer | `3600` | Cache TTL for `tags.js` | | `rewrite_sdk` | boolean | `true` | Rewrite DataDome script URLs in HTML to first-party paths | | `enable_protection` | boolean | `false` | Call the Protection API before route matching | -| `server_side_key_secret_store` | string | `datadome` | Runtime secret store containing the DataDome server-side key | -| `server_side_key_secret_name` | string | `server_side_key` | Secret name containing the DataDome server-side key | +| `server_side_key_secret_store` | string | `ts_secrets` | Runtime secret store containing the DataDome server-side key | +| `server_side_key_secret_name` | string | `datadome_server_side_key` | Secret name containing the DataDome server-side key | | `protection_api_origin` | string | `https://api-fastly.datadome.co` | Protection API origin | | `timeout_ms` | integer | `1500` | Dynamic backend first-byte timeout for Protection API calls | | `protection_excluded_methods` | array | `["OPTIONS"]` | HTTP methods skipped before the Protection API call | @@ -289,8 +289,8 @@ TRUSTED_SERVER__INTEGRATIONS__DATADOME__API_ORIGIN=https://api-js.datadome.co TRUSTED_SERVER__INTEGRATIONS__DATADOME__CACHE_TTL_SECONDS=3600 TRUSTED_SERVER__INTEGRATIONS__DATADOME__REWRITE_SDK=true TRUSTED_SERVER__INTEGRATIONS__DATADOME__ENABLE_PROTECTION=true -TRUSTED_SERVER__INTEGRATIONS__DATADOME__SERVER_SIDE_KEY_SECRET_STORE=datadome -TRUSTED_SERVER__INTEGRATIONS__DATADOME__SERVER_SIDE_KEY_SECRET_NAME=server_side_key +TRUSTED_SERVER__INTEGRATIONS__DATADOME__SERVER_SIDE_KEY_SECRET_STORE=ts_secrets +TRUSTED_SERVER__INTEGRATIONS__DATADOME__SERVER_SIDE_KEY_SECRET_NAME=datadome_server_side_key TRUSTED_SERVER__INTEGRATIONS__DATADOME__CLIENT_SIDE_KEY=your-client-side-key ``` @@ -335,8 +335,8 @@ Check that both fields are configured: [integrations.datadome] enabled = true enable_protection = true -server_side_key_secret_store = "datadome" -server_side_key_secret_name = "server_side_key" +server_side_key_secret_store = "ts_secrets" +server_side_key_secret_name = "datadome_server_side_key" ``` Also verify the request is not excluded by the default internal/static route exclusions or your custom inclusion/exclusion regexes. diff --git a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md index bff3abe3c..16cee0202 100644 --- a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md +++ b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md @@ -292,8 +292,8 @@ rewrite_sdk = true # New server-side protection layer enable_protection = false -server_side_key_secret_store = "datadome" -server_side_key_secret_name = "server_side_key" +server_side_key_secret_store = "ts_secrets" +server_side_key_secret_name = "datadome_server_side_key" protection_api_origin = "https://api-fastly.datadome.co" timeout_ms = 1500 protection_excluded_methods = ["OPTIONS"] From 561a14f8afa5643acea820592f6c299073527631 Mon Sep 17 00:00:00 2001 From: Christian Date: Tue, 16 Jun 2026 12:27:14 -0500 Subject: [PATCH 7/7] Address DataDome protection review comments --- .../src/integrations/datadome.rs | 117 ++++++++++++++++- .../src/integrations/datadome/protection.rs | 124 +++++++++++------- docs/guide/integrations/datadome.md | 2 +- ...-datadome-server-side-protection-design.md | 8 +- 4 files changed, 190 insertions(+), 61 deletions(-) diff --git a/crates/trusted-server-core/src/integrations/datadome.rs b/crates/trusted-server-core/src/integrations/datadome.rs index a9e689a55..d958fda7e 100644 --- a/crates/trusted-server-core/src/integrations/datadome.rs +++ b/crates/trusted-server-core/src/integrations/datadome.rs @@ -286,6 +286,25 @@ fn default_client_side_configuration() -> JsonValue { serde_json::json!({ "ajaxListenerPath": true }) } +fn is_unsafe_client_side_tag_path_char(ch: char) -> bool { + ch.is_ascii_control() || ch.is_ascii_whitespace() || matches!(ch, '"' | '\'' | '<' | '>' | '`') +} + +fn escape_html_attribute(value: &str) -> String { + let mut escaped = String::with_capacity(value.len()); + for ch in value.chars() { + match ch { + '&' => escaped.push_str("&"), + '"' => escaped.push_str("""), + '\'' => escaped.push_str("'"), + '<' => escaped.push_str("<"), + '>' => escaped.push_str(">"), + _ => escaped.push(ch), + } + } + escaped +} + impl Default for DataDomeConfig { fn default() -> Self { Self { @@ -337,6 +356,7 @@ impl DataDomeIntegration { config.server_side_key_secret_store.trim().to_string(); config.server_side_key_secret_name = config.server_side_key_secret_name.trim().to_string(); config.protection_api_origin = config.protection_api_origin.trim().to_string(); + config.client_side_tag_url = config.client_side_tag_url.trim().to_string(); if config.enable_protection { if config.server_side_key_secret_store.is_empty() @@ -349,6 +369,10 @@ impl DataDomeIntegration { Self::validate_protection_api_origin(&config.protection_api_origin)?; } + if config.inject_client_side_tag { + Self::validate_client_side_tag_url(&config.client_side_tag_url)?; + } + if config.enable_graphql_support { log::warn!("[datadome] enable_graphql_support is reserved and ignored in v1"); } @@ -393,6 +417,39 @@ impl DataDomeIntegration { Ok(()) } + fn validate_client_side_tag_url(tag_url: &str) -> Result<(), Report> { + if tag_url.starts_with('/') && !tag_url.starts_with("//") { + if tag_url.chars().any(is_unsafe_client_side_tag_path_char) { + return Err(Report::new(Self::error( + "client_side_tag_url root-relative paths must not include unsafe characters", + ))); + } + return Ok(()); + } + + let parsed = Url::parse(tag_url).map_err(|err| { + Report::new(Self::error(format!("Invalid client_side_tag_url: {err}"))) + })?; + + if !parsed.scheme().eq_ignore_ascii_case("https") { + return Err(Report::new(Self::error( + "client_side_tag_url must be root-relative or use https", + ))); + } + if parsed.host_str().is_none() { + return Err(Report::new(Self::error( + "client_side_tag_url must include a host when absolute", + ))); + } + if !parsed.username().is_empty() || parsed.password().is_some() { + return Err(Report::new(Self::error( + "client_side_tag_url must not include credentials", + ))); + } + + Ok(()) + } + fn error(message: impl Into) -> TrustedServerError { TrustedServerError::Integration { integration: DATADOME_INTEGRATION_ID.to_string(), @@ -719,12 +776,7 @@ impl IntegrationHeadInjector for DataDomeIntegration { "\"\"".to_string() }) .replace("window.ddjskey={key};window.ddoptions={options};" + "" )] } } @@ -1115,6 +1167,57 @@ mod tests { .expect("should accept HTTPS origin URL with optional trailing slash"); } + #[test] + fn client_side_tag_url_requires_root_relative_or_https() { + for tag_url in [ + "", + "tags.js", + "//example.com/tags.js", + "http://example.com/tags.js", + "/tags.js\" data-bad=\"1", + ] { + let mut config = test_config(); + config.client_side_tag_url = tag_url.to_string(); + + let err = match DataDomeIntegration::try_new(config) { + Ok(_) => panic!("should reject unsafe client-side tag URL: {tag_url}"), + Err(err) => err, + }; + + assert!( + format!("{err:?}").contains("client_side_tag_url"), + "should explain rejected client-side tag URL {tag_url}: {err:?}" + ); + } + } + + #[test] + fn client_side_tag_url_accepts_https_absolute_url() { + let mut config = test_config(); + config.client_side_tag_url = "https://example.com/tags.js?version=1".to_string(); + + DataDomeIntegration::try_new(config).expect("should accept HTTPS client-side tag URL"); + } + + #[test] + fn head_injector_escapes_client_side_tag_url_attribute() { + let mut config = test_config(); + config.client_side_key = "test-client-key".to_string(); + config.client_side_tag_url = "/integrations/datadome/tags.js?one=1&two=2".to_string(); + let integration = DataDomeIntegration::new(config); + let document_state = crate::integrations::IntegrationDocumentState::default(); + let ctx = html_context_for_tests(&document_state); + + let inserts = integration.head_inserts(&ctx); + + assert!( + inserts[0].contains( + "" + ), + "should HTML-escape the DataDome tag URL attribute" + ); + } + #[test] fn head_injector_emits_client_side_tag_when_key_configured() { let mut config = test_config(); diff --git a/crates/trusted-server-core/src/integrations/datadome/protection.rs b/crates/trusted-server-core/src/integrations/datadome/protection.rs index 7fa408760..8512bcc91 100644 --- a/crates/trusted-server-core/src/integrations/datadome/protection.rs +++ b/crates/trusted-server-core/src/integrations/datadome/protection.rs @@ -1,5 +1,3 @@ -use std::collections::HashMap; -use std::sync::{Arc, LazyLock, Mutex}; use std::time::Duration; use edgezero_core::body::Body as EdgeBody; @@ -28,16 +26,11 @@ const HEADER_DATADOME_CLIENT_ID: &str = "x-datadome-clientid"; const HEADER_DATADOME_X_SET_COOKIE: &str = "x-datadome-x-set-cookie"; const DATADOME_COOKIE_NAME: &str = "datadome"; -#[derive(Debug, Clone, Eq, Hash, PartialEq)] -struct DataDomeServerSideKeyCacheKey { - secret_store: String, - secret_name: String, +enum ProtectionRequestError { + Setup(Report), + Runtime(Report), } -static DATADOME_SERVER_SIDE_KEY_CACHE: LazyLock< - Mutex>>>, -> = LazyLock::new(|| Mutex::new(HashMap::new())); - impl DataDomeIntegration { pub(super) async fn filter_protection_request( &self, @@ -49,7 +42,11 @@ impl DataDomeIntegration { match self.filter_protection_request_inner(input).await { Ok(decision) => decision, - Err(err) => { + Err(ProtectionRequestError::Setup(err)) => { + log::error!("[datadome] Protection setup failed open: {err:?}"); + RequestFilterDecision::Continue(RequestFilterEffects::default()) + } + Err(ProtectionRequestError::Runtime(err)) => { log::warn!("[datadome] Protection API failed open: {err:?}"); RequestFilterDecision::Continue(RequestFilterEffects::default()) } @@ -59,11 +56,15 @@ impl DataDomeIntegration { async fn filter_protection_request_inner( &self, input: RequestFilterInput<'_>, - ) -> Result> { + ) -> Result { let api_url = self.protection_validate_url(); - let backend_name = self.ensure_protection_backend(input.services, &api_url)?; - let server_side_key = self.load_server_side_key(input.services)?; - let payload = self.build_protection_payload(&input, server_side_key.as_ref()); + let backend_name = self + .ensure_protection_backend(input.services, &api_url) + .map_err(ProtectionRequestError::Setup)?; + let server_side_key = self + .load_server_side_key(input.services) + .map_err(ProtectionRequestError::Setup)?; + let payload = self.build_protection_payload(&input, &server_side_key); let encoded_body = form_encode(&payload.fields); let mut builder = request_builder() @@ -86,14 +87,16 @@ impl DataDomeIntegration { .body(EdgeBody::from(encoded_body)) .change_context(Self::error( "Failed to build DataDome Protection API request", - ))?; + )) + .map_err(ProtectionRequestError::Runtime)?; let platform_response = input .services .http_client() .send(PlatformHttpRequest::new(request, backend_name)) .await - .change_context(Self::error("Failed to call DataDome Protection API"))?; + .change_context(Self::error("Failed to call DataDome Protection API")) + .map_err(ProtectionRequestError::Runtime)?; Ok(self.classify_protection_response(platform_response.response, input.request.method())) } @@ -165,17 +168,7 @@ impl DataDomeIntegration { fn load_server_side_key( &self, services: &RuntimeServices, - ) -> Result>, Report> { - let cache_key = server_side_key_cache_key(self); - if let Some(key) = DATADOME_SERVER_SIDE_KEY_CACHE - .lock() - .expect("should lock DataDome server-side key cache") - .get(&cache_key) - .cloned() - { - return Ok(key); - } - + ) -> Result, Report> { let store_name = StoreName::from(self.config.server_side_key_secret_store.as_str()); let key = services .secret_store() @@ -190,11 +183,7 @@ impl DataDomeIntegration { ))); } - let key = Arc::new(Redacted::new(key)); - let mut cache = DATADOME_SERVER_SIDE_KEY_CACHE - .lock() - .expect("should lock DataDome server-side key cache"); - Ok(Arc::clone(cache.entry(cache_key).or_insert(key))) + Ok(Redacted::new(key)) } fn build_protection_payload( @@ -426,21 +415,6 @@ struct ProtectionPayload { uses_header_client_id: bool, } -fn server_side_key_cache_key(integration: &DataDomeIntegration) -> DataDomeServerSideKeyCacheKey { - DataDomeServerSideKeyCacheKey { - secret_store: integration.config.server_side_key_secret_store.clone(), - secret_name: integration.config.server_side_key_secret_name.clone(), - } -} - -#[cfg(test)] -fn clear_datadome_server_side_key_cache_for_tests() { - DATADOME_SERVER_SIDE_KEY_CACHE - .lock() - .expect("should lock DataDome server-side key cache") - .clear(); -} - fn is_internal_path(path: &str) -> bool { path.starts_with("/static/tsjs=") || path.starts_with("/integrations/") @@ -668,6 +642,7 @@ fn truncate_utf8(value: &str, limit: i32) -> String { #[cfg(test)] mod tests { use std::collections::HashMap; + use std::sync::Arc; use crate::integrations::datadome::DataDomeConfig; use crate::platform::test_support::{ @@ -687,7 +662,6 @@ mod tests { #[test] fn load_server_side_key_reads_secret_store() { - clear_datadome_server_side_key_cache_for_tests(); let mut secrets = HashMap::new(); secrets.insert( "datadome_server_side_key".to_string(), @@ -708,7 +682,6 @@ mod tests { #[test] fn load_server_side_key_errors_when_secret_missing() { - clear_datadome_server_side_key_cache_for_tests(); let services = build_services_with_config_and_secret(NoopConfigStore, NoopSecretStore); let config = DataDomeConfig { enabled: true, @@ -787,6 +760,57 @@ mod tests { ); } + #[test] + fn classify_redirect_challenge_preserves_location_as_response_effect() { + let integration = protection_integration(); + let response = edgezero_core::http::response_builder() + .status(StatusCode::FOUND) + .header(HEADER_DATADOME_RESPONSE, "302") + .header(HEADER_DATADOME_HEADERS, "Location") + .header(header::LOCATION, "/challenge") + .body(EdgeBody::empty()) + .expect("should build DataDome redirect response"); + + let decision = integration.classify_protection_response(response, &Method::GET); + + let RequestFilterDecision::Respond { response, effects } = decision else { + panic!("should return a redirect challenge response"); + }; + assert_eq!( + response.status(), + StatusCode::FOUND, + "should preserve redirect status" + ); + assert_eq!( + effects.response_headers, + vec![HeaderMutation::set("location", "/challenge")], + "should carry Location through response effects" + ); + } + + #[test] + fn classify_ok_response_preserves_request_header_effects() { + let integration = protection_integration(); + let response = edgezero_core::http::response_builder() + .status(StatusCode::OK) + .header(HEADER_DATADOME_RESPONSE, "200") + .header(HEADER_DATADOME_REQUEST_HEADERS, "X-DataDome-ClientID") + .header(HEADER_DATADOME_CLIENT_ID, "client-123") + .body(EdgeBody::empty()) + .expect("should build DataDome allow response"); + + let decision = integration.classify_protection_response(response, &Method::GET); + + let RequestFilterDecision::Continue(effects) = decision else { + panic!("should continue with request header effects"); + }; + assert_eq!( + effects.request_headers, + vec![HeaderMutation::set(HEADER_DATADOME_CLIENT_ID, "client-123")], + "should carry requested upstream headers through effects" + ); + } + #[test] fn form_encode_url_encodes_values() { let encoded = form_encode(&[("Key".to_string(), "a b+c".to_string())]); diff --git a/docs/guide/integrations/datadome.md b/docs/guide/integrations/datadome.md index 6801c263a..9c342679b 100644 --- a/docs/guide/integrations/datadome.md +++ b/docs/guide/integrations/datadome.md @@ -89,7 +89,7 @@ patterns = ["(?i)\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav| | `enable_graphql_support` | boolean | `false` | Reserved for future GraphQL body inspection; ignored in v1 | | `client_side_key` | string | `""` | DataDome client-side JavaScript key used for tag injection | | `inject_client_side_tag` | boolean | `true` | Auto-inject the browser tag when `client_side_key` is non-empty | -| `client_side_tag_url` | string | `/integrations/datadome/tags.js` | Script URL used by auto-injection | +| `client_side_tag_url` | string | `/integrations/datadome/tags.js` | Root-relative or HTTPS script URL used by auto-injection | | `client_side_configuration` | object | `{ ajaxListenerPath = true }` | Options assigned to `window.ddoptions` | ## Client-side setup diff --git a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md index 16cee0202..f8d582a4f 100644 --- a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md +++ b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md @@ -574,9 +574,11 @@ client-side tag: Rust implementation requirements: -- Serialize `client_side_key`, `client_side_configuration`, and - `client_side_tag_url` with `serde_json`. -- Escape `