|
| 1 | +import { z } from "zod/v4"; |
| 2 | +import { MODEL_NAMES } from "./models.js"; |
| 3 | +import * as url from "./url.js"; |
| 4 | + |
| 5 | +export const apiServiceEnumSchema = z.enum([ |
| 6 | + "scrape", |
| 7 | + "extract", |
| 8 | + "schema", |
| 9 | + "search", |
| 10 | + "monitor", |
| 11 | + "crawl", |
| 12 | +]); |
| 13 | +export const apiStatusEnumSchema = z.enum(["completed", "failed"]); |
| 14 | +export const apiHtmlModeSchema = z.enum(["normal", "reader", "prune"]); |
| 15 | +export const apiFetchContentTypeSchema = z.enum([ |
| 16 | + "text/html", |
| 17 | + "application/pdf", |
| 18 | + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", |
| 19 | + "application/vnd.openxmlformats-officedocument.presentationml.presentation", |
| 20 | + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
| 21 | + "image/jpeg", |
| 22 | + "image/png", |
| 23 | + "image/webp", |
| 24 | + "image/gif", |
| 25 | + "image/avif", |
| 26 | + "image/tiff", |
| 27 | + "image/heic", |
| 28 | + "image/bmp", |
| 29 | + "application/epub+zip", |
| 30 | + "application/rtf", |
| 31 | + "application/vnd.oasis.opendocument.text", |
| 32 | + "text/csv", |
| 33 | + "text/plain", |
| 34 | + "application/x-latex", |
| 35 | +]); |
| 36 | +export const apiUserPromptSchema = z.string().min(1).max(10_000); |
| 37 | + |
| 38 | +export const apiUrlSchema = z.url().check( |
| 39 | + z.refine((val) => { |
| 40 | + try { |
| 41 | + const { protocol, hostname } = new URL(val); |
| 42 | + if (protocol !== "http:" && protocol !== "https:") return false; |
| 43 | + return !url.isInternal(hostname); |
| 44 | + } catch { |
| 45 | + return false; |
| 46 | + } |
| 47 | + }, "Private or internal URLs are not allowed"), |
| 48 | +); |
| 49 | + |
| 50 | +export const apiPaginationSchema = z.object({ |
| 51 | + page: z.coerce.number().int().positive().default(1), |
| 52 | + limit: z.coerce.number().int().positive().max(100).default(20), |
| 53 | +}); |
| 54 | + |
| 55 | +export const apiUuidParamSchema = z.object({ |
| 56 | + id: z.string().regex(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i), |
| 57 | +}); |
| 58 | + |
| 59 | +export const FETCH_CONFIG_DEFAULTS = { |
| 60 | + timeout: 15000, |
| 61 | + render: false, |
| 62 | + wait: 0, |
| 63 | + stealth: false, |
| 64 | + scrolls: 0, |
| 65 | +} as const; |
| 66 | + |
| 67 | +export const apiFetchConfigSchema = z.object({ |
| 68 | + timeout: z.number().int().min(1000).max(30000).default(FETCH_CONFIG_DEFAULTS.timeout), |
| 69 | + render: z.boolean().default(FETCH_CONFIG_DEFAULTS.render), |
| 70 | + wait: z.number().int().min(0).max(30000).default(FETCH_CONFIG_DEFAULTS.wait), |
| 71 | + headers: z.record(z.string(), z.string()).optional(), |
| 72 | + cookies: z.record(z.string(), z.string()).optional(), |
| 73 | + country: z |
| 74 | + .string() |
| 75 | + .length(2) |
| 76 | + .transform((v) => v.toLowerCase()) |
| 77 | + .optional(), |
| 78 | + stealth: z.boolean().default(FETCH_CONFIG_DEFAULTS.stealth), |
| 79 | + scrolls: z.number().int().min(0).max(100).default(FETCH_CONFIG_DEFAULTS.scrolls), |
| 80 | + mock: z |
| 81 | + .union([ |
| 82 | + z.boolean(), |
| 83 | + z.object({ |
| 84 | + minKb: z.number().int().min(1).max(1000).default(1), |
| 85 | + maxKb: z.number().int().min(1).max(1000).default(5), |
| 86 | + minSleep: z.number().int().min(0).max(30000).default(5), |
| 87 | + maxSleep: z.number().int().min(0).max(30000).default(15), |
| 88 | + writeToBucket: z.boolean().default(false), |
| 89 | + }), |
| 90 | + ]) |
| 91 | + .default(false), |
| 92 | +}); |
| 93 | + |
| 94 | +export const apiChunkerSchema = z.object({ |
| 95 | + size: z.union([z.number().int().min(2048), z.literal("dynamic")]).optional(), |
| 96 | + overlap: z.number().int().min(0).max(512).optional(), |
| 97 | +}); |
| 98 | + |
| 99 | +export const apiLlmConfigSchema = z.object({ |
| 100 | + model: z.enum(MODEL_NAMES).optional(), |
| 101 | + temperature: z.number().min(0).max(1).default(0), |
| 102 | + maxTokens: z.number().int().min(1).max(16384).default(4096), |
| 103 | + chunker: apiChunkerSchema.optional(), |
| 104 | +}); |
| 105 | + |
| 106 | +export const apiHistoryFilterSchema = z.object({ |
| 107 | + page: z.coerce.number().int().positive().default(1), |
| 108 | + limit: z.coerce.number().int().min(1).max(100).default(20), |
| 109 | + service: apiServiceEnumSchema.optional(), |
| 110 | +}); |
| 111 | + |
| 112 | +export const apiScrapeFormatSchema = z.enum(["markdown", "html", "screenshot", "branding"]); |
| 113 | + |
| 114 | +export const apiMarkdownConfigSchema = z.object({ |
| 115 | + mode: apiHtmlModeSchema.default("normal"), |
| 116 | +}); |
| 117 | + |
| 118 | +export const apiHtmlConfigSchema = z.object({ |
| 119 | + mode: apiHtmlModeSchema.default("normal"), |
| 120 | +}); |
| 121 | + |
| 122 | +export const apiScreenshotConfigSchema = z.object({ |
| 123 | + fullPage: z.boolean().default(false), |
| 124 | + width: z.number().int().min(320).max(3840).default(1440), |
| 125 | + height: z.number().int().min(200).max(2160).default(900), |
| 126 | + quality: z.number().int().min(1).max(100).default(80), |
| 127 | +}); |
| 128 | + |
| 129 | +const scrapeBase = { |
| 130 | + url: apiUrlSchema, |
| 131 | + contentType: apiFetchContentTypeSchema.optional(), |
| 132 | + fetchConfig: apiFetchConfigSchema.optional(), |
| 133 | +}; |
| 134 | + |
| 135 | +const apiScrapeDiscriminatedSchema = z.discriminatedUnion("format", [ |
| 136 | + z.object({ |
| 137 | + ...scrapeBase, |
| 138 | + format: z.literal("markdown"), |
| 139 | + markdown: apiMarkdownConfigSchema.default({ mode: "normal" }), |
| 140 | + }), |
| 141 | + z.object({ |
| 142 | + ...scrapeBase, |
| 143 | + format: z.literal("html"), |
| 144 | + html: apiHtmlConfigSchema.default({ mode: "normal" }), |
| 145 | + }), |
| 146 | + z.object({ |
| 147 | + ...scrapeBase, |
| 148 | + format: z.literal("screenshot"), |
| 149 | + screenshot: apiScreenshotConfigSchema.default({ |
| 150 | + fullPage: false, |
| 151 | + width: 1440, |
| 152 | + height: 900, |
| 153 | + quality: 80, |
| 154 | + }), |
| 155 | + }), |
| 156 | + z.object({ |
| 157 | + ...scrapeBase, |
| 158 | + format: z.literal("branding"), |
| 159 | + }), |
| 160 | +]); |
| 161 | + |
| 162 | +// [NOTE] @Claude preprocess injects format:"markdown" when omitted so { url } works as default |
| 163 | +export const apiScrapeRequestSchema = z.preprocess((val) => { |
| 164 | + if (typeof val === "object" && val && !("format" in val)) return { ...val, format: "markdown" }; |
| 165 | + return val; |
| 166 | +}, apiScrapeDiscriminatedSchema); |
| 167 | + |
| 168 | +export const apiExtractRequestBaseSchema = z |
| 169 | + .object({ |
| 170 | + url: apiUrlSchema.optional(), |
| 171 | + html: z.string().optional(), |
| 172 | + markdown: z.string().optional(), |
| 173 | + mode: apiHtmlModeSchema.default("normal"), |
| 174 | + prompt: apiUserPromptSchema, |
| 175 | + schema: z.record(z.string(), z.unknown()).optional(), |
| 176 | + contentType: apiFetchContentTypeSchema.optional(), |
| 177 | + fetchConfig: apiFetchConfigSchema.optional(), |
| 178 | + llmConfig: apiLlmConfigSchema.optional(), |
| 179 | + }) |
| 180 | + .refine((d) => d.url || d.html || d.markdown, { |
| 181 | + message: "Either url, html, or markdown is required", |
| 182 | + }); |
| 183 | + |
| 184 | +export const apiGenerateSchemaRequestSchema = z.object({ |
| 185 | + prompt: apiUserPromptSchema, |
| 186 | + existingSchema: z.record(z.string(), z.unknown()).optional(), |
| 187 | + model: z.enum(MODEL_NAMES).optional(), |
| 188 | +}); |
| 189 | + |
| 190 | +export const apiSearchRequestSchema = z |
| 191 | + .object({ |
| 192 | + query: z.string().min(1).max(500), |
| 193 | + numResults: z.number().int().min(1).max(20).default(3), |
| 194 | + format: z.enum(["html", "markdown"]).default("markdown"), |
| 195 | + mode: apiHtmlModeSchema.default("prune"), |
| 196 | + fetchConfig: apiFetchConfigSchema.optional(), |
| 197 | + prompt: apiUserPromptSchema.optional(), |
| 198 | + schema: z.record(z.string(), z.unknown()).optional(), |
| 199 | + llmConfig: apiLlmConfigSchema.optional(), |
| 200 | + locationGeoCode: z.string().max(10).optional(), |
| 201 | + timeRange: z |
| 202 | + .enum(["past_hour", "past_24_hours", "past_week", "past_month", "past_year"]) |
| 203 | + .optional(), |
| 204 | + }) |
| 205 | + .refine((d) => !d.schema || d.prompt, { |
| 206 | + message: "schema requires prompt", |
| 207 | + }); |
| 208 | + |
| 209 | +export const apiMonitorCreateSchema = z.object({ |
| 210 | + url: apiUrlSchema, |
| 211 | + name: z.string().max(200).optional(), |
| 212 | + prompt: apiUserPromptSchema, |
| 213 | + schema: z.record(z.string(), z.unknown()).optional(), |
| 214 | + webhookUrl: apiUrlSchema.optional(), |
| 215 | + interval: z.string().min(1).max(100), |
| 216 | + fetchConfig: apiFetchConfigSchema.optional(), |
| 217 | + llmConfig: apiLlmConfigSchema.optional(), |
| 218 | +}); |
| 219 | + |
| 220 | +export const apiMonitorUpdateSchema = z |
| 221 | + .object({ |
| 222 | + name: z.string().max(200).optional(), |
| 223 | + prompt: apiUserPromptSchema.optional(), |
| 224 | + schema: z.record(z.string(), z.unknown()).optional(), |
| 225 | + webhookUrl: apiUrlSchema.nullable().optional(), |
| 226 | + interval: z.string().min(1).max(100).optional(), |
| 227 | + fetchConfig: apiFetchConfigSchema.optional(), |
| 228 | + llmConfig: apiLlmConfigSchema.optional(), |
| 229 | + }) |
| 230 | + .partial(); |
| 231 | + |
| 232 | +export const apiCrawlStatusSchema = z.enum([ |
| 233 | + "running", |
| 234 | + "completed", |
| 235 | + "failed", |
| 236 | + "cancelled", |
| 237 | + "paused", |
| 238 | +]); |
| 239 | + |
| 240 | +export const apiCrawlPageStatusSchema = z.enum(["completed", "failed", "skipped"]); |
| 241 | + |
| 242 | +export const apiCrawlRequestSchema = z.object({ |
| 243 | + url: apiUrlSchema, |
| 244 | + maxDepth: z.coerce.number().int().min(0).max(10).default(2), |
| 245 | + maxPages: z.coerce.number().int().min(1).max(500).default(50), |
| 246 | + maxLinksPerPage: z.coerce.number().int().min(1).max(50).default(10), |
| 247 | + allowExternal: z.boolean().default(false), |
| 248 | + includePatterns: z.array(z.string()).optional(), |
| 249 | + excludePatterns: z.array(z.string()).optional(), |
| 250 | + contentTypes: z.array(apiFetchContentTypeSchema).optional(), |
| 251 | + fetchConfig: apiFetchConfigSchema.optional(), |
| 252 | +}); |
0 commit comments