Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 33 additions & 10 deletions packages/core/src/tool/webfetch.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
export * as WebFetchTool from "./webfetch"

import { ToolFailure } from "@opencode-ai/llm"
import { Duration, Effect, Layer, Schema } from "effect"
import { Data, Duration, Effect, Layer, Schema } from "effect"
import { HttpClient, HttpClientRequest, HttpClientResponse } from "effect/unstable/http"
import { Parser } from "htmlparser2"
import TurndownService from "turndown"
Expand All @@ -10,6 +10,11 @@ import { collectBoundedResponseBody } from "./http-body"
import { Tool } from "./tool"
import { Tools } from "./tools"

export class InvalidUrlError extends Data.TaggedError("WebFetch.InvalidUrl")<{
readonly url: string
readonly reason: string
}> {}

export const name = "webfetch"
export const MAX_RESPONSE_BYTES = 5 * 1024 * 1024
export const DEFAULT_TIMEOUT_SECONDS = 30
Expand All @@ -22,7 +27,16 @@ Use a more targeted tool when one is available. This tool is read-only. Large te
const Timeout = Schema.Number.check(Schema.isGreaterThan(0), Schema.isLessThanOrEqualTo(MAX_TIMEOUT_SECONDS))

export const Input = Schema.Struct({
url: Schema.String.annotate({ description: "The HTTP or HTTPS URL to fetch content from" }),
url: Schema.String.check(
Schema.makeFilter((s) => {
try {
new URL(s)
return true
} catch {
return "Invalid URL"
}
}),
).annotate({ description: "The HTTP or HTTPS URL to fetch content from" }),
format: Schema.Literals(["text", "markdown", "html"])
.annotate({ description: "The format to return the content in. Defaults to markdown." })
.pipe(Schema.withDecodingDefault(Effect.succeed("markdown" as const))),
Expand Down Expand Up @@ -79,10 +93,6 @@ const isCloudflareChallenge = (error: unknown) => {
const request = (url: string, format: Format, userAgent = browserUserAgent) =>
HttpClientRequest.get(url).pipe(HttpClientRequest.setHeaders(headers(format, userAgent)))

const assertHttpUrl = (url: URL) => {
if (url.protocol !== "http:" && url.protocol !== "https:") throw new Error("URL must use http:// or https://")
}

const execute = (http: HttpClient.HttpClient, url: string, format: Format, userAgent = browserUserAgent) =>
http.execute(request(url, format, userAgent)).pipe(Effect.flatMap(HttpClientResponse.filterStatusOk))

Expand Down Expand Up @@ -127,9 +137,16 @@ export const layer = Layer.effectDiscard(
toModelOutput: ({ output }) => [{ type: "text", text: output.output }],
execute: (input, context) =>
Effect.gen(function* () {
yield* Effect.try({
try: () => assertHttpUrl(new URL(input.url)),
catch: (error) => error,
yield* Effect.gen(function* () {
const url = new URL(input.url)
if (url.protocol !== "http:" && url.protocol !== "https:") {
return yield* Effect.fail(
new InvalidUrlError({
url: input.url,
reason: "URL must use http:// or https://",
}),
)
}
})

yield* permission.assert({
Expand Down Expand Up @@ -170,7 +187,13 @@ export const layer = Layer.effectDiscard(
format: input.format,
output,
}
}).pipe(Effect.mapError(() => new ToolFailure({ message: `Unable to fetch ${input.url}` }))),
}).pipe(
Effect.mapError((error) =>
error instanceof InvalidUrlError
? new ToolFailure({ message: `Invalid URL ${error.url}: ${error.reason}` })
: new ToolFailure({ message: `Unable to fetch ${input.url}` }),
),
),
}),
})
.pipe(Effect.orDie)
Expand Down
30 changes: 29 additions & 1 deletion packages/core/test/tool-webfetch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,35 @@ describe("WebFetchTool registration", () => {

expect(yield* executeTool(registry, call({ url: "file:///etc/passwd", format: "text" }))).toEqual({
type: "error",
value: "Unable to fetch file:///etc/passwd",
value: "Invalid URL file:///etc/passwd: URL must use http:// or https://",
})
expect(assertions).toEqual([])
expect(requests).toEqual([])
}),
)

it.effect("rejects malformed URLs at schema decode without permission or transport", () =>
Effect.gen(function* () {
reset()
const registry = yield* ToolRegistry.Service

expect(yield* executeTool(registry, call({ url: "not-a-url", format: "text" }))).toEqual({
type: "error",
value: 'Invalid tool input: Invalid URL\n at ["url"]',
})
expect(assertions).toEqual([])
expect(requests).toEqual([])
}),
)

it.effect("rejects non-HTTP schemes with a typed error before permission or transport", () =>
Effect.gen(function* () {
reset()
const registry = yield* ToolRegistry.Service

expect(yield* executeTool(registry, call({ url: "ftp://example.com", format: "text" }))).toEqual({
type: "error",
value: "Invalid URL ftp://example.com: URL must use http:// or https://",
})
expect(assertions).toEqual([])
expect(requests).toEqual([])
Expand Down
Loading