Skip to content

Commit 05bc8ed

Browse files
committed
refactor: copy shared api contract schemas
1 parent f7574ec commit 05bc8ed

5 files changed

Lines changed: 354 additions & 179 deletions

File tree

src/index.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,23 @@ export { scrapegraphai } from "./client.js";
22
export type { ApiResult } from "./http.js";
33
export type {
44
ApiCrawlOptions,
5+
ApiCrawlRequest,
56
ApiExtractOptions,
7+
ApiExtractRequestBase,
68
ApiFetchConfig,
79
ApiGenerateSchemaOptions,
10+
ApiGenerateSchemaRequest,
811
ApiHistoryFilterInput,
12+
ApiHistoryService,
13+
ApiHtmlMode,
14+
ApiLlmConfig,
15+
ApiModelName,
916
ApiMonitorCreateInput,
17+
ApiMonitorUpdateInput,
1018
ApiScrapeOptions,
19+
ApiScrapeRequest,
1120
ApiSearchOptions,
21+
ApiSearchRequest,
1222
ClientConfig,
1323
RequestOptions,
1424
} from "./types/index.js";

src/models.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
export const MODEL_NAMES = [
2+
"gpt-4o-mini",
3+
"gpt-4o-mini-2024-07-18",
4+
"llama-3.3-70b-versatile",
5+
"llama-3.1-8b-instant",
6+
"mixtral-8x7b-32768",
7+
"mistral-small-2501",
8+
"gpt-oss-120b",
9+
"openai/gpt-oss-120b",
10+
"claude-haiku-4-5-20251001",
11+
] as const;
12+
13+
export type ApiModelName = (typeof MODEL_NAMES)[number];

src/schemas.ts

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
import { z } from "zod/v4";
2+
import { MODEL_NAMES } from "./models.js";
3+
import * as url from "./url.js";
4+
5+
export const apiServiceEnumSchema = z.enum([
6+
"scrape",
7+
"extract",
8+
"schema",
9+
"search",
10+
"monitor",
11+
"crawl",
12+
]);
13+
export const apiStatusEnumSchema = z.enum(["completed", "failed"]);
14+
export const apiHtmlModeSchema = z.enum(["normal", "reader", "prune"]);
15+
export const apiFetchContentTypeSchema = z.enum([
16+
"text/html",
17+
"application/pdf",
18+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
19+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
20+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
21+
"image/jpeg",
22+
"image/png",
23+
"image/webp",
24+
"image/gif",
25+
"image/avif",
26+
"image/tiff",
27+
"image/heic",
28+
"image/bmp",
29+
"application/epub+zip",
30+
"application/rtf",
31+
"application/vnd.oasis.opendocument.text",
32+
"text/csv",
33+
"text/plain",
34+
"application/x-latex",
35+
]);
36+
export const apiUserPromptSchema = z.string().min(1).max(10_000);
37+
38+
export const apiUrlSchema = z.url().check(
39+
z.refine((val) => {
40+
try {
41+
const { protocol, hostname } = new URL(val);
42+
if (protocol !== "http:" && protocol !== "https:") return false;
43+
return !url.isInternal(hostname);
44+
} catch {
45+
return false;
46+
}
47+
}, "Private or internal URLs are not allowed"),
48+
);
49+
50+
export const apiPaginationSchema = z.object({
51+
page: z.coerce.number().int().positive().default(1),
52+
limit: z.coerce.number().int().positive().max(100).default(20),
53+
});
54+
55+
export const apiUuidParamSchema = z.object({
56+
id: z.string().regex(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i),
57+
});
58+
59+
export const FETCH_CONFIG_DEFAULTS = {
60+
timeout: 15000,
61+
render: false,
62+
wait: 0,
63+
stealth: false,
64+
scrolls: 0,
65+
} as const;
66+
67+
export const apiFetchConfigSchema = z.object({
68+
timeout: z.number().int().min(1000).max(30000).default(FETCH_CONFIG_DEFAULTS.timeout),
69+
render: z.boolean().default(FETCH_CONFIG_DEFAULTS.render),
70+
wait: z.number().int().min(0).max(30000).default(FETCH_CONFIG_DEFAULTS.wait),
71+
headers: z.record(z.string(), z.string()).optional(),
72+
cookies: z.record(z.string(), z.string()).optional(),
73+
country: z
74+
.string()
75+
.length(2)
76+
.transform((v) => v.toLowerCase())
77+
.optional(),
78+
stealth: z.boolean().default(FETCH_CONFIG_DEFAULTS.stealth),
79+
scrolls: z.number().int().min(0).max(100).default(FETCH_CONFIG_DEFAULTS.scrolls),
80+
mock: z
81+
.union([
82+
z.boolean(),
83+
z.object({
84+
minKb: z.number().int().min(1).max(1000).default(1),
85+
maxKb: z.number().int().min(1).max(1000).default(5),
86+
minSleep: z.number().int().min(0).max(30000).default(5),
87+
maxSleep: z.number().int().min(0).max(30000).default(15),
88+
writeToBucket: z.boolean().default(false),
89+
}),
90+
])
91+
.default(false),
92+
});
93+
94+
export const apiChunkerSchema = z.object({
95+
size: z.union([z.number().int().min(2048), z.literal("dynamic")]).optional(),
96+
overlap: z.number().int().min(0).max(512).optional(),
97+
});
98+
99+
export const apiLlmConfigSchema = z.object({
100+
model: z.enum(MODEL_NAMES).optional(),
101+
temperature: z.number().min(0).max(1).default(0),
102+
maxTokens: z.number().int().min(1).max(16384).default(4096),
103+
chunker: apiChunkerSchema.optional(),
104+
});
105+
106+
export const apiHistoryFilterSchema = z.object({
107+
page: z.coerce.number().int().positive().default(1),
108+
limit: z.coerce.number().int().min(1).max(100).default(20),
109+
service: apiServiceEnumSchema.optional(),
110+
});
111+
112+
export const apiScrapeFormatSchema = z.enum(["markdown", "html", "screenshot", "branding"]);
113+
114+
export const apiMarkdownConfigSchema = z.object({
115+
mode: apiHtmlModeSchema.default("normal"),
116+
});
117+
118+
export const apiHtmlConfigSchema = z.object({
119+
mode: apiHtmlModeSchema.default("normal"),
120+
});
121+
122+
export const apiScreenshotConfigSchema = z.object({
123+
fullPage: z.boolean().default(false),
124+
width: z.number().int().min(320).max(3840).default(1440),
125+
height: z.number().int().min(200).max(2160).default(900),
126+
quality: z.number().int().min(1).max(100).default(80),
127+
});
128+
129+
const scrapeBase = {
130+
url: apiUrlSchema,
131+
contentType: apiFetchContentTypeSchema.optional(),
132+
fetchConfig: apiFetchConfigSchema.optional(),
133+
};
134+
135+
const apiScrapeDiscriminatedSchema = z.discriminatedUnion("format", [
136+
z.object({
137+
...scrapeBase,
138+
format: z.literal("markdown"),
139+
markdown: apiMarkdownConfigSchema.default({ mode: "normal" }),
140+
}),
141+
z.object({
142+
...scrapeBase,
143+
format: z.literal("html"),
144+
html: apiHtmlConfigSchema.default({ mode: "normal" }),
145+
}),
146+
z.object({
147+
...scrapeBase,
148+
format: z.literal("screenshot"),
149+
screenshot: apiScreenshotConfigSchema.default({
150+
fullPage: false,
151+
width: 1440,
152+
height: 900,
153+
quality: 80,
154+
}),
155+
}),
156+
z.object({
157+
...scrapeBase,
158+
format: z.literal("branding"),
159+
}),
160+
]);
161+
162+
// [NOTE] @Claude preprocess injects format:"markdown" when omitted so { url } works as default
163+
export const apiScrapeRequestSchema = z.preprocess((val) => {
164+
if (typeof val === "object" && val && !("format" in val)) return { ...val, format: "markdown" };
165+
return val;
166+
}, apiScrapeDiscriminatedSchema);
167+
168+
export const apiExtractRequestBaseSchema = z
169+
.object({
170+
url: apiUrlSchema.optional(),
171+
html: z.string().optional(),
172+
markdown: z.string().optional(),
173+
mode: apiHtmlModeSchema.default("normal"),
174+
prompt: apiUserPromptSchema,
175+
schema: z.record(z.string(), z.unknown()).optional(),
176+
contentType: apiFetchContentTypeSchema.optional(),
177+
fetchConfig: apiFetchConfigSchema.optional(),
178+
llmConfig: apiLlmConfigSchema.optional(),
179+
})
180+
.refine((d) => d.url || d.html || d.markdown, {
181+
message: "Either url, html, or markdown is required",
182+
});
183+
184+
export const apiGenerateSchemaRequestSchema = z.object({
185+
prompt: apiUserPromptSchema,
186+
existingSchema: z.record(z.string(), z.unknown()).optional(),
187+
model: z.enum(MODEL_NAMES).optional(),
188+
});
189+
190+
export const apiSearchRequestSchema = z
191+
.object({
192+
query: z.string().min(1).max(500),
193+
numResults: z.number().int().min(1).max(20).default(3),
194+
format: z.enum(["html", "markdown"]).default("markdown"),
195+
mode: apiHtmlModeSchema.default("prune"),
196+
fetchConfig: apiFetchConfigSchema.optional(),
197+
prompt: apiUserPromptSchema.optional(),
198+
schema: z.record(z.string(), z.unknown()).optional(),
199+
llmConfig: apiLlmConfigSchema.optional(),
200+
locationGeoCode: z.string().max(10).optional(),
201+
timeRange: z
202+
.enum(["past_hour", "past_24_hours", "past_week", "past_month", "past_year"])
203+
.optional(),
204+
})
205+
.refine((d) => !d.schema || d.prompt, {
206+
message: "schema requires prompt",
207+
});
208+
209+
export const apiMonitorCreateSchema = z.object({
210+
url: apiUrlSchema,
211+
name: z.string().max(200).optional(),
212+
prompt: apiUserPromptSchema,
213+
schema: z.record(z.string(), z.unknown()).optional(),
214+
webhookUrl: apiUrlSchema.optional(),
215+
interval: z.string().min(1).max(100),
216+
fetchConfig: apiFetchConfigSchema.optional(),
217+
llmConfig: apiLlmConfigSchema.optional(),
218+
});
219+
220+
export const apiMonitorUpdateSchema = z
221+
.object({
222+
name: z.string().max(200).optional(),
223+
prompt: apiUserPromptSchema.optional(),
224+
schema: z.record(z.string(), z.unknown()).optional(),
225+
webhookUrl: apiUrlSchema.nullable().optional(),
226+
interval: z.string().min(1).max(100).optional(),
227+
fetchConfig: apiFetchConfigSchema.optional(),
228+
llmConfig: apiLlmConfigSchema.optional(),
229+
})
230+
.partial();
231+
232+
export const apiCrawlStatusSchema = z.enum([
233+
"running",
234+
"completed",
235+
"failed",
236+
"cancelled",
237+
"paused",
238+
]);
239+
240+
export const apiCrawlPageStatusSchema = z.enum(["completed", "failed", "skipped"]);
241+
242+
export const apiCrawlRequestSchema = z.object({
243+
url: apiUrlSchema,
244+
maxDepth: z.coerce.number().int().min(0).max(10).default(2),
245+
maxPages: z.coerce.number().int().min(1).max(500).default(50),
246+
maxLinksPerPage: z.coerce.number().int().min(1).max(50).default(10),
247+
allowExternal: z.boolean().default(false),
248+
includePatterns: z.array(z.string()).optional(),
249+
excludePatterns: z.array(z.string()).optional(),
250+
contentTypes: z.array(apiFetchContentTypeSchema).optional(),
251+
fetchConfig: apiFetchConfigSchema.optional(),
252+
});

0 commit comments

Comments
 (0)