Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 39 additions & 5 deletions assets/apps_script/Code.gs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,32 @@ const AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET";
// (Inspired by #365 Section 3, mhrv-rs v1.8.0+.)
const DIAGNOSTIC_MODE = false;

// ── Response header noise filtering ────────────────────────────────────────
// CDN stacks (Cloudflare, AWS, Fastly, Google) attach metadata headers to
// every response that are useless through a MITM relay: report-to, nel,
// alt-svc, server-timing, etc. These add 400-700 bytes of JSON per response
// for no benefit — the relay ignores them and the browser never reads them.
//
// STRIP_NOISE_RESPONSE_HEADERS controls whether _respHeaders() filters them
// before returning. Hardcoded true here for GAS-side payload reduction.
// The primary user toggle is `strip_noise_response_headers` in config.toml
// on the Rust client side, which drops them even if Code.gs sends them.
//
// Set to false only if you need to see raw origin headers in GAS logs.
// ---------------------------------------------------------------------------
const STRIP_NOISE_RESPONSE_HEADERS = true;

const STRIP_RESPONSE_HEADERS = {
"report-to": 1, "reporting-endpoints": 1,
"nel": 1,
"alt-svc": 1,
"server-timing": 1,
"origin-trial": 1,
"cf-ray": 1, "cf-cache-status": 1,
"x-amzn-requestid": 1, "x-amzn-trace-id": 1,
"x-request-id": 1, "x-correlation-id": 1,
};

// ── Optional Spreadsheet Cache ──────────────────────────────
// Set to a valid Spreadsheet ID to enable response caching.
// Leave as-is to disable caching entirely (zero overhead).
Expand Down Expand Up @@ -329,12 +355,20 @@ function _buildOpts(req) {
}

function _respHeaders(resp) {
var raw;
try {
if (typeof resp.getAllHeaders === "function") {
return resp.getAllHeaders();
}
} catch (err) {}
return resp.getHeaders();
raw = typeof resp.getAllHeaders === "function"
? resp.getAllHeaders()
: resp.getHeaders();
} catch (err) {
raw = {};
}
if (!STRIP_NOISE_RESPONSE_HEADERS) return raw;
var out = {};
for (var k in raw) {
if (!STRIP_RESPONSE_HEADERS[k.toLowerCase()]) out[k] = raw[k];
}
return out;
}

function _json(obj) {
Expand Down
26 changes: 26 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,13 @@ pub struct Config {
/// Hard cap on total coalesce wait (ms). 0 = use compiled default (1000ms).
#[serde(default)]
pub coalesce_max_ms: u16,
/// Adaptive coalescing preset. One of "auto" (default), "fast", or "slow".
/// "auto" measures batch RTT and switches automatically.
/// "fast" uses 50ms/300ms windows — best for broadband/fiber.
/// "slow" uses 150ms/600ms windows — best for slow links (Iran cable, mobile).
/// Leave unset or set to "auto" for automatic detection.
#[serde(default)]
pub network_preset: Option<String>,
/// Optional explicit SNI rotation pool for outbound TLS to `google_ip`.
/// Empty / missing = auto-expand from `front_domain` (current default of
/// {www, mail, drive, docs, calendar}.google.com). Set to an explicit list
Expand Down Expand Up @@ -426,6 +433,16 @@ pub struct Config {
/// Default 500.
#[serde(default = "default_quota_safety_buffer")]
pub quota_safety_buffer: u64,

/// Strip CDN noise headers from relay responses before forwarding to
/// the browser. Headers such as `report-to`, `nel`, `alt-svc`, and
/// `server-timing` are attached by modern CDNs (Cloudflare, AWS,
/// Fastly) and add 400–700 bytes per response for no benefit through
/// a MITM relay — the proxy ignores them and the browser never reads
/// them. Default `true`. Set to `false` only to inspect raw origin
/// headers for debugging.
#[serde(default = "default_strip_noise_response_headers")]
pub strip_noise_response_headers: bool,
}

/// Configuration for the optional second-hop exit node.
Expand Down Expand Up @@ -556,6 +573,7 @@ fn default_auto_blacklist_window_secs() -> u64 { 30 }
fn default_auto_blacklist_cooldown_secs() -> u64 { 120 }
fn default_quota_daily_limit() -> u64 { 20_000 }
fn default_quota_safety_buffer() -> u64 { 500 }
fn default_strip_noise_response_headers() -> bool { true }

/// Default for `request_timeout_secs`: 30s, matching the historical
/// hard-coded `BATCH_TIMEOUT` and Apps Script's typical response cliff.
Expand Down Expand Up @@ -785,6 +803,8 @@ pub struct TomlRelay {
#[serde(default)]
pub coalesce_max_ms: u16,
#[serde(default)]
pub network_preset: Option<String>,
#[serde(default)]
pub youtube_via_relay: bool,
#[serde(default)]
pub normalize_x_graphql: bool,
Expand All @@ -802,6 +822,8 @@ pub struct TomlRelay {
pub request_timeout_secs: u64,
#[serde(default = "default_stream_timeout_secs")]
pub stream_timeout_secs: u64,
#[serde(default = "default_strip_noise_response_headers")]
pub strip_noise_response_headers: bool,
}

/// [network] section of config.toml.
Expand Down Expand Up @@ -935,6 +957,7 @@ impl From<TomlConfig> for Config {
parallel_relay: t.relay.parallel_relay,
coalesce_step_ms: t.relay.coalesce_step_ms,
coalesce_max_ms: t.relay.coalesce_max_ms,
network_preset: t.relay.network_preset,
sni_hosts: t.network.sni_hosts,
fetch_ips_from_api: t.scan.fetch_ips_from_api,
max_ips_to_scan: t.scan.max_ips_to_scan,
Expand All @@ -959,6 +982,7 @@ impl From<TomlConfig> for Config {
exit_node: t.exit_node,
quota_daily_limit: default_quota_daily_limit(),
quota_safety_buffer: default_quota_safety_buffer(),
strip_noise_response_headers: t.relay.strip_noise_response_headers,
}
}
}
Expand All @@ -977,6 +1001,7 @@ impl From<&Config> for TomlConfig {
enable_batching: c.enable_batching,
coalesce_step_ms: c.coalesce_step_ms,
coalesce_max_ms: c.coalesce_max_ms,
network_preset: c.network_preset.clone(),
youtube_via_relay: c.youtube_via_relay,
normalize_x_graphql: c.normalize_x_graphql,
disable_padding: c.disable_padding,
Expand All @@ -986,6 +1011,7 @@ impl From<&Config> for TomlConfig {
auto_blacklist_cooldown_secs: c.auto_blacklist_cooldown_secs,
request_timeout_secs: c.request_timeout_secs,
stream_timeout_secs: c.stream_timeout_secs,
strip_noise_response_headers: c.strip_noise_response_headers,
},
network: TomlNetwork {
google_ip: c.google_ip.clone(),
Expand Down
70 changes: 53 additions & 17 deletions src/domain_fronter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ impl FronterError {
}

type PooledStream = TlsStream<TcpStream>;
const POOL_TTL_SECS: u64 = 60;
const POOL_TTL_SECS: u64 = 30;
const POOL_MIN: usize = 8;
const POOL_REFILL_INTERVAL_SECS: u64 = 5;
const POOL_REFILL_INTERVAL_SECS: u64 = 2;
const POOL_MAX: usize = 80;
const REQUEST_TIMEOUT_SECS: u64 = 25;
const RANGE_PARALLEL_CHUNK_BYTES: u64 = 256 * 1024;
Expand All @@ -118,7 +118,7 @@ const H2_CONN_TTL_SECS: u64 = 540;
/// `h2_round_trip`. This way a slow but legitimate Apps Script call
/// isn't cut off at an arbitrary fixed cap, and Full-mode batches can
/// honor the user's `request_timeout_secs` setting.
const H2_READY_TIMEOUT_SECS: u64 = 5;
const H2_READY_TIMEOUT_SECS: u64 = 3;
/// Default response-phase deadline used by `relay_uncoalesced` callers
/// (the Apps-Script direct path). Sized to be just under the outer
/// `REQUEST_TIMEOUT_SECS` (25 s) so an h2 timeout still leaves a few
Expand Down Expand Up @@ -147,7 +147,7 @@ const H1_OPEN_TIMEOUT_SECS: u64 = 8;
/// containers go cold after ~5min idle and cost 1-3s on the first
/// request to wake back up — most painful on YouTube / streaming where
/// the first chunk after a quiet pause stalls the player.
const H1_KEEPALIVE_INTERVAL_SECS: u64 = 240;
const H1_KEEPALIVE_INTERVAL_SECS: u64 = 60;
/// Largest response body Apps Script's `UrlFetchApp` will deliver before
/// the script gets killed mid-execution. The hard wire ceiling is ~50 MiB;
/// after base64 / envelope overhead and edge variance, the practical raw
Expand Down Expand Up @@ -413,6 +413,10 @@ pub struct DomainFronter {
/// payloads. Mirrors `Config::disable_padding` (#391). Default false
/// (padding active = stronger DPI defense at +25% bandwidth cost).
disable_padding: bool,
/// Strip CDN noise headers (report-to, nel, alt-svc, etc.) from the
/// relay response before forwarding to the browser. Default true.
/// Mirrors `Config::strip_noise_response_headers`.
strip_noise_response_headers: bool,
zstd_enabled: Arc<AtomicBool>,
/// Per-instance auto-blacklist tuning. Mirrors `Config::auto_blacklist_*`
/// (#391, #444). Cached here so the hot path in `record_timeout_strike`
Expand Down Expand Up @@ -648,6 +652,7 @@ impl DomainFronter {
today_bytes: AtomicU64::new(0),
today_key: std::sync::Mutex::new(current_pt_day_key()),
disable_padding: config.disable_padding,
strip_noise_response_headers: config.strip_noise_response_headers,
zstd_enabled: Arc::new(AtomicBool::new(false)),
auto_blacklist_strikes: config.auto_blacklist_strikes.max(1),
auto_blacklist_window: Duration::from_secs(
Expand Down Expand Up @@ -1405,10 +1410,13 @@ impl DomainFronter {
// `release_capacity` on every chunk for typical Apps Script
// payloads (usually < 1 MB; range chunks are 256 KB). We still
// release capacity in the body-read loop for safety on larger
// bodies.
// bodies. 16/32 MB windows eliminate stalls for range-parallel
// streaming (256 KB chunks × many streams) without adding memory
// overhead on idle connections (the window is just a counter until
// data flows).
let (send, conn) = h2::client::Builder::new()
.initial_window_size(4 * 1024 * 1024)
.initial_connection_window_size(8 * 1024 * 1024)
.initial_window_size(16 * 1024 * 1024)
.initial_connection_window_size(32 * 1024 * 1024)
.handshake(tls)
.await
.map_err(|e| OpenH2Error::Handshake(e.to_string()))?;
Expand Down Expand Up @@ -1626,9 +1634,15 @@ impl DomainFronter {
// through Apps Script (where a 256 KB range chunk can take 30-90s
// of wall-clock time) are not killed by the tighter `batch_timeout`.
// Release flow-control credit per chunk so large responses don't
// stall after the initial 4 MB window.
// stall after the initial window.
// Pre-size from content-length to avoid O(log n) realloc cycles
// on large GAS responses (up to 40 MB).
let stream_timeout = self.stream_timeout();
let mut buf: Vec<u8> = Vec::new();
let body_hint: usize = headers.iter()
.find(|(k, _)| k.eq_ignore_ascii_case("content-length"))
.and_then(|(_, v)| v.parse().ok())
.unwrap_or(0);
let mut buf: Vec<u8> = Vec::with_capacity(body_hint.min(APPS_SCRIPT_BODY_MAX_BYTES as usize));
loop {
match tokio::time::timeout(stream_timeout, body.data()).await {
Ok(None) => break,
Expand Down Expand Up @@ -2842,7 +2856,7 @@ impl DomainFronter {
status, body_txt
)));
}
return parse_relay_json(&resp_body).map_err(|e| {
return parse_relay_json(&resp_body, self.strip_noise_response_headers).map_err(|e| {
if let FronterError::Relay(ref msg) = e {
if looks_like_quota_error(msg) {
self.blacklist_script(&script_id, msg);
Expand Down Expand Up @@ -2951,7 +2965,7 @@ impl DomainFronter {
status, body_txt
)));
}
match parse_relay_json(&resp_body) {
match parse_relay_json(&resp_body, self.strip_noise_response_headers) {
Ok(bytes) => Ok::<_, FronterError>((bytes, true)),
Err(e) => {
if let FronterError::Relay(ref msg) = e {
Expand Down Expand Up @@ -4992,8 +5006,27 @@ fn is_h2_fronting_refusal_status(status: u16) -> bool {
status == 421
}

/// CDN metadata headers that carry no value through a MITM relay.
/// Stripped when `strip_noise_response_headers = true` (the default).
/// The browser never reads them through a proxy, and they add 400-700 bytes
/// of JSON per CDN-backed response for zero benefit.
static NOISE_RESPONSE_HEADERS: &[&str] = &[
"report-to",
"reporting-endpoints",
"nel",
"alt-svc",
"server-timing",
"origin-trial",
"cf-ray",
"cf-cache-status",
"x-amzn-requestid",
"x-amzn-trace-id",
"x-request-id",
"x-correlation-id",
];

/// Parse the JSON envelope from Apps Script and build a raw HTTP response.
fn parse_relay_json(body: &[u8]) -> Result<Vec<u8>, FronterError> {
fn parse_relay_json(body: &[u8], strip_noise: bool) -> Result<Vec<u8>, FronterError> {
let text = std::str::from_utf8(body)
.map_err(|_| FronterError::BadResponse("non-utf8 json".into()))?
.trim();
Expand Down Expand Up @@ -5075,6 +5108,9 @@ fn parse_relay_json(body: &[u8]) -> Result<Vec<u8>, FronterError> {
if SKIP.contains(&lk.as_str()) {
continue;
}
if strip_noise && NOISE_RESPONSE_HEADERS.contains(&lk.as_str()) {
continue;
}
match v {
Value::Array(arr) => {
for item in arr {
Expand Down Expand Up @@ -5896,7 +5932,7 @@ mod tests {
#[test]
fn parse_relay_basic_json() {
let body = r#"{"s":200,"h":{"Content-Type":"text/plain"},"b":"SGVsbG8="}"#;
let raw = parse_relay_json(body.as_bytes()).unwrap();
let raw = parse_relay_json(body.as_bytes(), true).unwrap();
let s = String::from_utf8_lossy(&raw);
assert!(s.starts_with("HTTP/1.1 200 OK\r\n"));
assert!(s.contains("Content-Type: text/plain\r\n"));
Expand Down Expand Up @@ -6795,14 +6831,14 @@ hello";
#[test]
fn parse_relay_error_field() {
let body = r#"{"e":"unauthorized"}"#;
let err = parse_relay_json(body.as_bytes()).unwrap_err();
let err = parse_relay_json(body.as_bytes(), true).unwrap_err();
assert!(matches!(err, FronterError::Relay(_)));
}

#[test]
fn parse_relay_rejects_invalid_body_base64() {
let body = r#"{"s":200,"b":"***not-base64***"}"#;
let err = parse_relay_json(body.as_bytes()).unwrap_err();
let err = parse_relay_json(body.as_bytes(), true).unwrap_err();
assert!(matches!(err, FronterError::BadResponse(_)));
}

Expand Down Expand Up @@ -6861,7 +6897,7 @@ hello";
#[test]
fn parse_relay_array_set_cookie() {
let body = r#"{"s":200,"h":{"Set-Cookie":["a=1","b=2"]},"b":""}"#;
let raw = parse_relay_json(body.as_bytes()).unwrap();
let raw = parse_relay_json(body.as_bytes(), true).unwrap();
let s = String::from_utf8_lossy(&raw);
assert!(s.contains("Set-Cookie: a=1\r\n"));
assert!(s.contains("Set-Cookie: b=2\r\n"));
Expand Down Expand Up @@ -6929,7 +6965,7 @@ hello";
// to fail with `key must be a string at line 2`.
let inner_json = r#"{"s":200,"h":{},"b":""}"#;
let wrapped = build_goog_script_init_wrapper(inner_json);
let raw = parse_relay_json(wrapped.as_bytes()).unwrap();
let raw = parse_relay_json(wrapped.as_bytes(), true).unwrap();
let s = String::from_utf8_lossy(&raw);
assert!(s.starts_with("HTTP/1.1 200 "), "got: {}", s);
}
Expand Down
Loading
Loading