From a4268caf73cb0deaa79d9f2a3792a3188f2908e2 Mon Sep 17 00:00:00 2001 From: lexlian <9305752+lexlian@users.noreply.github.com> Date: Tue, 23 Jun 2026 00:16:40 +0800 Subject: [PATCH] fix(core): distinguish WebFetch URL errors from network errors WebFetch's URL validation collapsed every failure (malformed URL, wrong scheme, transport failure, timeout, response too large, MIME rejection, HTML conversion crash) into the same "Unable to fetch" message. The model could not tell whether it sent a bad URL or hit an unreachable host. This commit introduces two-layer URL validation, matching Claude Code's WebFetch shape: 1. Schema-level parseability check via Schema.makeFilter rejects inputs that `new URL()` cannot parse. These surface through the standard tool-input channel as "Invalid tool input: Invalid URL\n at [\"url\"]". 2. Tool-layer scheme check (kept from the original assertHttpUrl) rejects non-http/https schemes with a typed InvalidUrlError. These surface as "Invalid URL : URL must use http:// or https://". Network, timeout, body-size, MIME, and HTML-conversion failures keep the generic "Unable to fetch " message so existing tests and downstream consumers do not break. Replaces Effect.try({ catch: (error) => error }) at the validation site with Effect.gen + Effect.fail(InvalidUrlError) so the tagged error reaches the boundary mapper without erasure. Removes the now-unused assertHttpUrl helper. Adds two new tests (parse error, ftp scheme) and updates the existing file:///etc/passwd test to expect the new typed message. Closes #33073 --- packages/core/src/tool/webfetch.ts | 43 ++++++++++++++++++------ packages/core/test/tool-webfetch.test.ts | 30 ++++++++++++++++- 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/packages/core/src/tool/webfetch.ts b/packages/core/src/tool/webfetch.ts index 2ce0868d991e..2d8ada47e74e 100644 --- a/packages/core/src/tool/webfetch.ts +++ b/packages/core/src/tool/webfetch.ts @@ -1,7 +1,7 @@ export * as WebFetchTool from "./webfetch" import { ToolFailure } from "@opencode-ai/llm" -import { Duration, Effect, Layer, Schema } from "effect" +import { Data, Duration, Effect, Layer, Schema } from "effect" import { HttpClient, HttpClientRequest, HttpClientResponse } from "effect/unstable/http" import { Parser } from "htmlparser2" import TurndownService from "turndown" @@ -10,6 +10,11 @@ import { collectBoundedResponseBody } from "./http-body" import { Tool } from "./tool" import { Tools } from "./tools" +export class InvalidUrlError extends Data.TaggedError("WebFetch.InvalidUrl")<{ + readonly url: string + readonly reason: string +}> {} + export const name = "webfetch" export const MAX_RESPONSE_BYTES = 5 * 1024 * 1024 export const DEFAULT_TIMEOUT_SECONDS = 30 @@ -22,7 +27,16 @@ Use a more targeted tool when one is available. This tool is read-only. Large te const Timeout = Schema.Number.check(Schema.isGreaterThan(0), Schema.isLessThanOrEqualTo(MAX_TIMEOUT_SECONDS)) export const Input = Schema.Struct({ - url: Schema.String.annotate({ description: "The HTTP or HTTPS URL to fetch content from" }), + url: Schema.String.check( + Schema.makeFilter((s) => { + try { + new URL(s) + return true + } catch { + return "Invalid URL" + } + }), + ).annotate({ description: "The HTTP or HTTPS URL to fetch content from" }), format: Schema.Literals(["text", "markdown", "html"]) .annotate({ description: "The format to return the content in. Defaults to markdown." }) .pipe(Schema.withDecodingDefault(Effect.succeed("markdown" as const))), @@ -79,10 +93,6 @@ const isCloudflareChallenge = (error: unknown) => { const request = (url: string, format: Format, userAgent = browserUserAgent) => HttpClientRequest.get(url).pipe(HttpClientRequest.setHeaders(headers(format, userAgent))) -const assertHttpUrl = (url: URL) => { - if (url.protocol !== "http:" && url.protocol !== "https:") throw new Error("URL must use http:// or https://") -} - const execute = (http: HttpClient.HttpClient, url: string, format: Format, userAgent = browserUserAgent) => http.execute(request(url, format, userAgent)).pipe(Effect.flatMap(HttpClientResponse.filterStatusOk)) @@ -127,9 +137,16 @@ export const layer = Layer.effectDiscard( toModelOutput: ({ output }) => [{ type: "text", text: output.output }], execute: (input, context) => Effect.gen(function* () { - yield* Effect.try({ - try: () => assertHttpUrl(new URL(input.url)), - catch: (error) => error, + yield* Effect.gen(function* () { + const url = new URL(input.url) + if (url.protocol !== "http:" && url.protocol !== "https:") { + return yield* Effect.fail( + new InvalidUrlError({ + url: input.url, + reason: "URL must use http:// or https://", + }), + ) + } }) yield* permission.assert({ @@ -170,7 +187,13 @@ export const layer = Layer.effectDiscard( format: input.format, output, } - }).pipe(Effect.mapError(() => new ToolFailure({ message: `Unable to fetch ${input.url}` }))), + }).pipe( + Effect.mapError((error) => + error instanceof InvalidUrlError + ? new ToolFailure({ message: `Invalid URL ${error.url}: ${error.reason}` }) + : new ToolFailure({ message: `Unable to fetch ${input.url}` }), + ), + ), }), }) .pipe(Effect.orDie) diff --git a/packages/core/test/tool-webfetch.test.ts b/packages/core/test/tool-webfetch.test.ts index 5a856ffaf428..fd094c8c0d86 100644 --- a/packages/core/test/tool-webfetch.test.ts +++ b/packages/core/test/tool-webfetch.test.ts @@ -147,7 +147,35 @@ describe("WebFetchTool registration", () => { expect(yield* executeTool(registry, call({ url: "file:///etc/passwd", format: "text" }))).toEqual({ type: "error", - value: "Unable to fetch file:///etc/passwd", + value: "Invalid URL file:///etc/passwd: URL must use http:// or https://", + }) + expect(assertions).toEqual([]) + expect(requests).toEqual([]) + }), + ) + + it.effect("rejects malformed URLs at schema decode without permission or transport", () => + Effect.gen(function* () { + reset() + const registry = yield* ToolRegistry.Service + + expect(yield* executeTool(registry, call({ url: "not-a-url", format: "text" }))).toEqual({ + type: "error", + value: 'Invalid tool input: Invalid URL\n at ["url"]', + }) + expect(assertions).toEqual([]) + expect(requests).toEqual([]) + }), + ) + + it.effect("rejects non-HTTP schemes with a typed error before permission or transport", () => + Effect.gen(function* () { + reset() + const registry = yield* ToolRegistry.Service + + expect(yield* executeTool(registry, call({ url: "ftp://example.com", format: "text" }))).toEqual({ + type: "error", + value: "Invalid URL ftp://example.com: URL must use http:// or https://", }) expect(assertions).toEqual([]) expect(requests).toEqual([])