From 55a7f42dc6d68c00f20eb90adc02b643df118f0c Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:12:51 +0200 Subject: [PATCH 01/28] feat(v2): add zod schemas and types for v2 API - Add all API request/response schemas matching v2 API exactly - Remove llmConfig from schemas (not exposed in SDK) - Add comprehensive types for all endpoints Co-Authored-By: Claude Opus 4.5 --- .claude/rules/coding-style.md | 714 ++++++++++++++++++++++++++++++++++ .claude/rules/git-style.md | 29 ++ src/schemas.ts | 286 ++++++++++++++ src/types/index.ts | 601 ++++++++++++++++++---------- 4 files changed, 1418 insertions(+), 212 deletions(-) create mode 100644 .claude/rules/coding-style.md create mode 100644 .claude/rules/git-style.md create mode 100644 src/schemas.ts diff --git a/.claude/rules/coding-style.md b/.claude/rules/coding-style.md new file mode 100644 index 0000000..364aa43 --- /dev/null +++ b/.claude/rules/coding-style.md @@ -0,0 +1,714 @@ +# Coding Style Guide + +Universal TypeScript patterns. Framework-specific rules live in `api.md` and `web.md`. + +--- + +## 1. File Organization + +Every piece of code has exactly one home. No exceptions. Framework-specific layouts in `api.md` and `web.md`. + +### Placement Rules (Non-Negotiable) + +| What | Where | Rule | +|---|---|---| +| Hand-written types, interfaces, type aliases | `types/index.ts` | Types NEVER live in `lib/`, `app/`, hooks, components (error classes in `lib/errors.ts` are code, not types) | +| ORM-inferred types (`$inferSelect`, `$inferInsert`) | Co-located with schema in `db/schema.ts` | The ONE exception — generated from schema, not hand-written | +| Constants (maps, sets, numbers, config) | `lib/constants.ts` (frontend) or on the module namespace (backend, e.g. `redis.KEYS`). Cross-cutting backend constants with no domain home → `lib/constants.ts` | No functions in constants files | +| Utility functions | `lib/utils.ts` | Pure helpers only — no DB, no API calls, no side effects | +| Zod schemas | `lib/schemas.ts` (frontend) or `routes/*/schemas.ts` (backend) | Never inline schemas in route handlers or components | +| DB queries | `db/*.ts` | Drizzle typed builders, no raw SQL for WHERE/ORDER | + +**The moment a second file needs a type, constant, or schema — move it to its canonical home.** + +**All hand-written types go in `types/index.ts`.** Three exceptions: + +1. **ORM-inferred types** (`$inferSelect`, `$inferInsert`) stay in `db/schema.ts` +2. **Schema-inferred types** (`z.infer`) can stay co-located when used by a single file. When a second file needs it, move to `types/index.ts` +3. **Component-local props interfaces** can stay in a component file if used only within that component + +**Scaling**: Start with `types/index.ts`. Past ~50 types, split by domain (`types/api.ts`, `types/dashboard.ts`) with barrel re-exports. + +### Monorepo (shared package) + +When both apps need the same types, schemas, or DB definitions, extract into a shared package with subpath imports (`@myapp/shared/db`, `@myapp/shared/types`, etc.). Never barrel-import from the package root. + +--- + +## 2. Module Export & Import Patterns + +### Multi-Function Modules + +Two patterns — both result in `module.verb()` at the call site: + +**Pattern A: Default namespace object** (domain modules with shared constants) + +```ts +// lib/redis.ts +function get(key: string) { ... } +function set(key: string, value: unknown, ttl?: number) { ... } +const KEYS = { html: (hash: string) => `cache:html:${hash}` } + +export default { get, set, KEYS } + +// consumer +import redis from "@/lib/redis" +redis.get(redis.KEYS.html(hash)) +``` + +**Pattern B: Named exports with star import** (when you need to export types alongside functions) + +```ts +// lib/email.ts +export function sendVerification(to: string, url: string) { ... } +export function sendPasswordReset(to: string, url: string) { ... } + +// consumer +import * as email from "@/lib/email" +email.sendVerification(to, url) +``` + +Pick one per module, don't mix default + named exports. **Namespace is mandatory for domain modules** — bare function names are ambiguous: + +```ts +// BANNED +import { sendVerification } from "@/lib/email" +sendVerification(to, url) + +// REQUIRED +import * as email from "@/lib/email" +email.sendVerification(to, url) +``` + +**When named imports are OK** (no namespace needed): + +- Error classes: `import { HttpError } from "@/lib/errors"` +- Singletons/instances: `import { stripe } from "@/lib/stripe"` +- Grab-bag utility modules (`utils.ts`): functions are already unambiguous — `slugify()`, `cn()`, `formatDate()` don't need a `utils.` prefix +- Server actions consumed by Next.js `action={fn}` prop (needs bare reference) +- Co-located same-directory imports (`import { createTaskSchema } from "./schemas"`) — no namespace stuttering +- Re-exports from barrel files (`types/index.ts`) + +### Single-Function Modules + +Default export. Import alias matches the module domain or the function name. Don't repeat file context — `send` in `email.ts`, not `sendEmail`. + +```ts +// lib/email.ts +export default async function send(to: string, subject: string) { ... } + +// consumer +import send from "@/lib/email" +send(to, "Welcome") +``` + +### Nested Namespace Objects + +When a module has multiple related sub-domains, group them as nested objects. The call site reads like `module.subdomain.verb()`. + +```ts +// lib/payment.ts +const customer = { + async create(email: string): Promise { ... }, + async get(id: string): Promise { ... }, +} + +const charge = { + async create(input: ChargeInput): Promise { ... }, + async refund(chargeId: string): Promise { ... }, +} + +const webhook = { + verify(payload: string, signature: string): boolean { ... }, + async process(event: WebhookEvent): Promise { ... }, +} + +export default { customer, charge, webhook } + +// consumer +import payment from "@/lib/payment" +payment.customer.create(email) +payment.charge.refund(chargeId) +payment.webhook.verify(payload, sig) +``` + +**When to nest**: the module covers a single domain but has distinct sub-concerns (customers, charges, webhooks). Without nesting you'd get flat functions like `createCustomer`, `getCustomer`, `createCharge`, `refundCharge`, `verifyWebhook` — zero structure, zero discoverability. + +```ts +// BANNED — flat loose functions with prefixes to disambiguate +export function createCustomer(email: string) { ... } +export function getCustomer(id: string) { ... } +export function createCharge(input: ChargeInput) { ... } +export function refundCharge(chargeId: string) { ... } +export function verifyWebhook(payload: string, sig: string) { ... } + +// REQUIRED — nested namespaces +export default { customer, charge, webhook } +``` + +**Rule of thumb**: if you're prefixing function names to disambiguate (`create*`, `get*`, `verify*`) — you need sub-objects instead. + +### Decision Matrix + +| Module exports | Export style | Import style | Call site | +|---|---|---|---| +| Multiple functions | `export default { fn1, fn2 }` or named exports | `import mod from` or `import * as mod from` | `mod.fn1()` | +| Single function (lib) | `export default function name()` | `import name from` | `name()` | +| React component | `export function Component()` | `import { Component } from` | `` | +| Single instance | `export const thing = ...` | `import { thing } from` | `thing.method()` | +| Types only | `export type / export interface` | `import type { T } from` | — | +| Error classes | `export class FooError` | `import { FooError } from` | `instanceof FooError` | +| Library integration | Semantic export (`*` or full object) | `import * as name from` | `name.method()` | + +--- + +## 3. Naming + +### API Fields (camelCase — Non-Negotiable) + +All JSON over the wire — request schemas, response bodies, SSE event payloads, webhook payloads — uses **camelCase**. Matches Drizzle ORM convention so `c.json(row)` works with no mapping. No snake_case anywhere in the API contract. + +### Files + +| Type | Convention | Examples | +|---|---|---| +| Modules | `kebab-case.ts` | `rate-limit.ts`, `auth-client.ts` | +| Components | `kebab-case.tsx` | `nav-bar.tsx`, `pricing-card.tsx` | +| Hooks | `use-*.ts` | `use-oauth.ts`, `use-debounce.ts` | +| Types | `index.ts` in `types/` | One file, all types | +| Tests | `*.test.ts` | `credits.test.ts` | + +### Functions + +**Never repeat the module name in the function name.** `module.verb()`. + +```ts +// BANNED // REQUIRED +tokens.countTokens() tokens.count() +email.sendVerificationEmail() email.sendVerification() +cache.getCacheEntry() cache.get() +``` + +### Types + +Every type name must make sense in isolation. + +| Layer | Pattern | Examples | +|---|---|---| +| DB rows (read) | `*Select` | `UserSelect`, `OrderSelect` | +| DB rows (write) | `*Insert` | `UserInsert`, `OrderInsert` | +| API requests | `Api*Request` | `ApiCreateOrderRequest` | +| API responses | `Api*Response` | `ApiOrderResponse` | +| Discriminated entries | `Api*Entry` | `ApiHistoryEntry` | +| Paginated wrappers | `ApiPageResponse` | `ApiPageResponse` aliased as `ApiHistoryPage` | +| UI/domain types | Domain prefix | `DashboardProps`, `StripeInvoice` | +| Config objects | `*Config` / `*Options` | `FetchConfig`, `RetryOptions` | +| Generic utilities | No prefix | `ActionResponse`, `ApiPageResponse` | + +**DB types come from ORM schema inference** — never hand-roll interfaces. Use `Pick<>` / `Omit<>` to derive subsets. + +```ts +// BANNED +interface User { id: string; email: string; name: string } + +// REQUIRED +import type { UserSelect } from "@sgai/shared/db" +type UserSummary = Pick +``` + +--- + +## 4. Code Patterns + +### Early Returns + +Flip the condition, return early, keep the happy path flat. + +```ts +// BANNED // REQUIRED +if (user) { if (!user) return null + if (user.isActive) { if (!user.isActive) return null + if (user.hasPermission) { if (!user.hasPermission) return null + return doThing(user) return doThing(user) + } + } +} +return null +``` + +### Resolve Pattern (Kill Duplicate Paths) + +### Helper Bloat (Banned) + +Do not stack tiny helpers that only rename, normalize, or forward data once. + +```ts +// BANNED +function normalizeMonitorDiffs(diffs?: Partial): ApiMonitorDiffs { + return { + markdown: diffs?.markdown ?? [], + json: diffs?.json ?? [], + } +} + +function countMonitorDiffs(diffs?: Partial): number { + const normalized = normalizeMonitorDiffs(diffs) + return normalized.markdown.length + normalized.json.length +} + +// REQUIRED +function countMonitorDiffs(diffs?: Partial): number { + return (diffs?.markdown?.length ?? 0) + (diffs?.json?.length ?? 0) +} +``` + +Rules: + +- If a helper is called once, inline it unless it removes real complexity +- If a helper only adds defaults, rename indirection, or one property shuffle, inline it +- Do not create `normalize*`, `build*`, `create*`, `to*` wrappers unless they hide real branching or repeated logic +- A helper must pay rent: repeated use, meaningful branching, or domain logic worth naming + +When branching logic feeds into the same response, extract a `resolve` function returning a unified shape. + +```ts +async function resolve(url: string) { + const cached = await cache.get(url) + if (cached) return { content: cached, provider: "cache", cached: true } + const result = await fetcher.fetch(url) + await cache.set(url, result.content) + return { content: result.content, provider: result.provider, cached: false } +} +``` + +### No Wrapper Abstractions + +Keep modules generic. Call sites are explicit about keys. + +```ts +// BANNED: redis.getHtml(hash) +// REQUIRED: redis.get(redis.KEYS.html(hash)) +``` + +--- + +## 5. Functions + +Small functions, small names, one thing. But don't abstract two obvious lines. + +Refactor into a function when: +1. **Readability** — the function name explains a non-obvious implementation +2. **Redundancy** — the same logic appears in 3+ places + +Three similar lines in one file is better than a premature abstraction. + +**No over-engineering**: No factory-of-factories, no abstractions used < 3 times, no config objects for things that could be arguments, no feature flags for hypothetical futures. + +**Component-local state/event logic stays inline**: If a reducer/helper exists only to support one component or one `useEvent(...)` handler, keep it inside the handler or component body. Do not extract tiny `isXEvent`, `applyXEvent`, `upsertX`, or `countX` helpers unless the exact logic is reused in 3+ places or the extracted name removes real complexity. + +**Use clear verbs for mutations**: If a function changes state, name it like an action: `complete`, `pause`, `resume`, `flush`, `setStatus`. Do not hide writes behind vague names like `done`, `handle`, `process`, `finalize`, or enterprise sludge like `finishPendingJob` when a plain verb says the same thing. + +**Reads and writes must be obvious from the name**: Read-only functions use `get*`/`list*`/`find*`. Mutating functions use a verb. Never make a name sound like a read when it writes, and never split one simple state transition across multiple vaguely named helpers. + +**Do not re-declare existing shared shapes**: If an event, API payload, or domain object already has a shared type, import it and narrow it with `Extract<>`, indexed access, or helpers from the shared type. Do not hand-write local duplicates of existing contracts. + +Do: +```ts +let event: Extract +``` + +Don't: +```ts +let event: + | { type: "crawl.page.failed"; crawlId: string; page: ApiCrawlPage; error: string } + | { type: "crawl.page.skipped"; crawlId: string; page: ApiCrawlPage; reason: string } +``` + +--- + +## 6. Comments + +Code says "what" — comments say "why". Plain `//` with tag and `@Claude` annotation. + +**Tag format**: `// [TAG] @Claude ` + +| Tag | When to use | +|---|---| +| `[NOTE]` | Non-obvious logic — race conditions, ordering dependencies, cache invalidation | +| `[TODO]` | Known improvement or missing piece | +| `[BUG]` | Known bug or workaround for upstream issue | +| `[REFACTOR]` | Tech debt — works but should be restructured | + +```ts +// [NOTE] @Claude invalidate cache before DB write — stale reads on concurrent requests otherwise +await redis.del(redis.KEYS.task(taskId)) +await db.update(tasks).set({ status: "completed" }).where(eq(tasks.id, taskId)) + +// [BUG] @Claude Readability returns empty string for SPAs — fall back to raw HTML +if (!extracted.length) return raw +``` + +**`@Claude` is mandatory** — team standard for AI context attribution and auditability. + +### Strictly Forbidden + +- Comments restating what the code does +- Comments without a tag (`[NOTE]`, `[TODO]`, `[BUG]`, `[REFACTOR]`) +- Tagged comments without `@Claude` +- JSDoc on internal functions (types ARE the docs) +- Commented-out code (git has history) +- `@param` / `@returns` except on shared package public API + +--- + +## 7. Error Handling + +### Backend + +Define a base error class, extend per domain. Routes throw, middleware catches — no try/catch in route handlers. See `api.md` for error classes and framework wiring. + +### Frontend + +Server actions catch errors internally and return `{ data: null, error: "message" }`. Opposite of backend where you throw and let middleware catch. + +### ActionResponse (frontend only) + +Define once in `types/index.ts`. Used by **server actions and frontend code only** — API endpoints use HTTP status codes. + +```ts +export type ActionResponse = + | { data: T; error: null } + | { data: null; error: string } +``` + +- **Server Actions** (`"use server"`): Always return `ActionResponse`. +- **API Endpoints**: HTTP status codes + JSON body. Frontend callers wrap fetch results in `ActionResponse`. +- **Pure backend projects**: You don't need `ActionResponse` — it's a frontend contract. + +--- + +## 8. Types & Schemas + +### Discriminated Unions + +Use discriminated unions for polymorphic data. Pick a discriminator field and be consistent. + +```ts +interface OrderBase { id: string; createdAt: string; status: OrderStatus } + +interface PhysicalOrder extends OrderBase { + type: "physical" + shippingAddress: string +} + +interface DigitalOrder extends OrderBase { + type: "digital" + downloadUrl: string +} + +type Order = PhysicalOrder | DigitalOrder +``` + +### Event Types + +Discriminate by `type` field with **dotted namespace**: `{domain}.{resource}.{verb}`. + +**Standard verbs** (use these, nothing else): + +| Verb | Meaning | +|---|---| +| `started` | Operation began | +| `completed` | Operation finished successfully | +| `failed` | Operation errored | +| `detected` | Something was observed (e.g. change detected) | +| `paused` | Resource was paused/suspended | + +**Type naming**: `Api{Domain}Event` — no `Streaming` suffix. The transport (SSE, Redis pub/sub, webhook) is irrelevant to the type name. + +```ts +type ApiOrderEvent = + | { type: "order.payment.started" } + | { type: "order.payment.completed"; transactionId: string } + | { type: "order.result"; data: OrderResponse } + | { type: "order.failed"; error: string; code: string } +``` + +**Webhook payloads** use the same `type` strings as events but wrap richer data in a `data` field. Defined as a separate discriminated union (`ApiWebhookPayload`). + +```ts +type ApiWebhookPayload = + | { type: "order.change.detected"; data: { ... } } + | { type: "order.test"; data: { ... } } +``` + +### Provider Interfaces (Swappable Implementations) + +When multiple things do the same job, define an interface contract. Each implementation satisfies the interface — swap them without touching consumers. + +```ts +// types/index.ts +interface StorageProvider { + name: string + async upload(key: string, data: Buffer): Promise + async download(key: string): Promise + async delete(key: string): Promise +} + +// lib/storage/s3.ts +const s3: StorageProvider = { + name: "s3", + async upload(key, data) { ... }, + async download(key) { ... }, + async delete(key) { ... }, +} + +// lib/storage/local.ts +const local: StorageProvider = { + name: "local", + async upload(key, data) { ... }, + async download(key) { ... }, + async delete(key) { ... }, +} + +// consumer — doesn't care which provider +async function saveReport(storage: StorageProvider, report: Buffer) { + const url = await storage.upload("reports/latest.pdf", report) + ... +} +``` + +**When to use**: 2+ implementations with the same shape. Classic examples: +- Storage backends (S3 vs local filesystem vs GCS) +- Notification channels (email vs Slack vs Discord) +- Cache layers (Redis vs in-memory vs SQLite) +- Queue drivers (SQS vs RabbitMQ vs in-process) + +**When NOT to use**: one implementation with a hypothetical future second. YAGNI — inline it. Extract the interface when the second implementation actually exists. + +**Type the constant, not just the function signatures.** Annotating `const x: MyInterface = { ... }` catches mismatches at definition, not at the call site 3 files away. + +```ts +// BANNED — no contract, errors surface at call site +const s3 = { + name: "s3", + async upload(key: string, data: Buffer) { ... }, +} + +// REQUIRED — interface enforced at definition +const s3: StorageProvider = { + name: "s3", + async upload(key, data) { ... }, +} +``` + +### Zod Schemas + +Compose from small reusable sub-schemas. Infer types alongside. + +```ts +const paginationSchema = z.object({ + page: z.coerce.number().int().positive().default(1), + limit: z.coerce.number().int().positive().max(100).default(20), +}) + +const orderFilterSchema = paginationSchema.extend({ + status: z.enum(["pending", "completed", "failed"]).optional(), +}) + +type OrderFilter = z.infer +``` + +Schema-inferred types (`z.infer<...>`) can stay co-located when used by a single file. When a second file needs it, move to `types/index.ts`. + +--- + +## 9. Database (Drizzle ORM) + +### Typed Query Builders + +Always use Drizzle's typed methods. Raw `sql` only for Postgres functions Drizzle doesn't wrap. + +```ts +// BANNED +db.select().from(schema.orders).where(sql`${schema.orders.userId} = ${userId}`) + +// REQUIRED +import { and, desc, eq, gte } from "drizzle-orm" +db.select().from(schema.orders) + .where(and(eq(schema.orders.userId, userId), gte(schema.orders.createdAt, since))) + .orderBy(desc(schema.orders.createdAt)) +``` + +**Raw `sql` OK for**: `date_trunc`, `COALESCE`, `CASE WHEN`, `NULLIF`, window functions, custom aggregates. Always check Drizzle docs first. + +### Schema as Source of Truth + +```ts +export const orders = pgTable("orders", { + id: uuid("id").primaryKey().defaultRandom(), + userId: text("user_id").notNull().references(() => user.id), + status: text("status", { enum: ["pending", "completed", "failed"] }).notNull(), + total: integer("total").notNull(), + createdAt: timestamp("created_at").defaultNow().notNull(), +}) + +export type OrderSelect = typeof orders.$inferSelect +export type OrderInsert = typeof orders.$inferInsert +``` + +### Table Imports (Namespace Required) + +Always import table definitions via `import * as schema from "@sgai/shared/db"`. Access tables as `schema.tableName`. This avoids name clashes with domain modules (e.g. `cron` module vs `cron` table) and makes it instantly clear what's a table reference vs a function call. + +```ts +// BANNED — bare table imports clash with domain modules +import { cron, subscriptions, apiKeys } from "@sgai/shared/db" + +// REQUIRED — schema namespace +import * as schema from "@sgai/shared/db" + +schema.cron.userId +schema.subscriptions.remainingCredits +schema.apiKeys.apiKey +``` + +Types and factory functions stay as named imports — only table constants use the namespace: + +```ts +import * as schema from "@sgai/shared/db" +import type { ApiKeySelect, CronSelect } from "@sgai/shared/db" +import { createDb, type Database } from "@sgai/shared/db" +``` + +### Migrations + +Always generate + migrate. Never `db:push` in production. + +--- + +## 10. Logging (Pino) + +Structured logging with pino. One `lib/logger.ts`, child loggers per module. + +### Event Naming + +The pino message string (second argument) is a namespaced event name using **dot-separated** segments: `{domain}.{action}.{status}` — same convention as streaming/event types (Section 8). One delimiter everywhere, no cognitive overhead. Data goes in the first argument object. + +```ts +// BANNED — free-form prose messages +log.info({ count: 5 }, "finished processing batch") +log.error({ error: err.message }, "failed to save record") + +// REQUIRED — namespaced event as message, data in object +log.info({ count: 5 }, "job.batch.completed") +log.error({ error: err.message }, "job.record.save.failed") + +// No data? Message-only is fine +log.debug("job.parse.fallback") +``` + +### Naming Convention + +`{domain}.{resource}.{action}` — always lowercase, dot-separated. + +| Pattern | Examples | +|---|---| +| `{domain}.started` | `job.started`, `sync.started` | +| `{domain}.completed` | `job.completed`, `job.batch.completed` | +| `{domain}.failed` | `job.fetch.failed`, `job.record.save.failed` | +| `{domain}.{resource}.{action}` | `cron.schedule.created`, `queue.task.enqueued` | +| `{domain}.{action}.{status}` | `email.send.started`, `email.send.failed`, `cache.lookup.miss` | + +### Rules + +- **Event name is the pino message** (second arg) — `log.info({ data }, "domain.action.status")` +- **Child loggers** per module: `logger.child({ module: "email", recipient })` +- **No `console.log`** — use pino everywhere +- **Log level**: `debug` for internal flow details, `info` for operations completing, `warn` for recoverable issues, `error` for failures +- **pino-pretty** in dev, structured JSON in production + +--- + +## 11. Environment Variables + +Validate at startup with Zod. Crash on bad config — fail loud, fail early. + +```ts +const envSchema = z.object({ + DATABASE_URL: z.string().url(), + STRIPE_SECRET_KEY: z.string().startsWith("sk_"), + NODE_ENV: z.enum(["development", "production", "test"]).default("development"), +}) + +export const env = envSchema.parse(process.env) +``` + +No `process.env.THING` scattered across files. One `env.ts`, one import. **Monorepos**: each app has its own `env.ts` — shared package does NOT validate env vars. + +--- + +## 12. TypeScript Rules + +- `strict: true` always +- Never `any` — use `unknown` + narrowing, generics, or proper types +- `as const` for literal objects/arrays (not for objects with function values) +- `satisfies` for type-checked literals that keep narrow type. Combine both when you need narrowing + type checking: `const X = { ... } as const satisfies Record` +- Path aliases: `@/*` maps to source root +- `interface` for object shapes, `type` for unions/intersections + +--- + +## 13. Linting, Formatting & Validation Flow + +Biome replaces ESLint + Prettier. Single `biome.json` at project root: + +- **Tabs** for indentation, **100 char line width** +- **Recommended rules** enabled +- **Import organization** enabled +- **Ignores**: `node_modules`, `dist`, `.next`, `drizzle`, `components/ui` + +### Before Every Commit (MANDATORY) + +```bash +bun run format # Auto-fix formatting + imports +bun run lint # Check for remaining errors +bunx tsc --noEmit # TypeScript type checking +bun test # Tests (if applicable) +``` + +No exceptions. Web also runs `bun run build` for production validation. + +--- + +## 14. Testing Strategy + +Tests live alongside source: `email.ts` → `email.test.ts`. Use Bun test (API) or Vitest (Next.js). + +**Test**: Pure functions, API calls (mocked), business logic, edge cases. +**Don't test**: UI rendering, DB queries directly, third-party library behavior. + +Mock at the boundary (API calls, external services). Use factories for complex test objects: + +```ts +function makeUser(overrides?: Partial): UserSelect { + return { id: "test-id", email: "test@example.com", name: "Test User", ...overrides } +} +``` + +--- + +## 15. What NOT to Do + +- **No `any`** — `unknown`, generics, or proper types +- **No raw `sql`** for WHERE/ORDER — use ORM typed builders +- **No hand-written types outside `types/index.ts`** — exceptions: ORM-inferred, schema-inferred (single consumer), component-local props +- **No functions in constants files** +- **No manual DB type definitions** — derive from ORM schema +- **No JSDoc on internal functions** — types are the docs +- **No commented-out code** — git has history +- **No module name in function name** — `tokens.count()` not `tokens.countTokens()` +- **No wrapper abstractions** — generic modules, explicit call sites +- **No premature abstractions** — not used 3+ times → inline it +- **No extracted one-off UI event reducers/helpers** — keep tiny component-only event/state updates inline +- **No scattered `process.env`** — one `env.ts` +- **No `db:push` in production** — generate + migrate diff --git a/.claude/rules/git-style.md b/.claude/rules/git-style.md new file mode 100644 index 0000000..0c7cef5 --- /dev/null +++ b/.claude/rules/git-style.md @@ -0,0 +1,29 @@ +# Git Commit Style + +Conventional Commits. Format: `(): ` + +## Types + +| Type | When | +|------|------| +| `feat` | New feature | +| `fix` | Bug fix | +| `refactor` | Code change (not fix, not feature) | +| `chore` | Maintenance (deps, config, build) | +| `docs` | Documentation only | +| `style` | Formatting, whitespace | +| `perf` | Performance improvement | +| `test` | Adding/fixing tests | +| `content` | Content changes (blog, copy) | + +## Scope + +Optional. Area affected: `auth`, `payments`, `ui`, `api`, `referral`, `seo`, `web`, `shared`. + +## Rules + +1. Lowercase everything +2. No period at the end +3. Imperative mood ("add" not "added") +4. First line under 72 chars +5. Scope optional but helps changelogs diff --git a/src/schemas.ts b/src/schemas.ts new file mode 100644 index 0000000..264a9aa --- /dev/null +++ b/src/schemas.ts @@ -0,0 +1,286 @@ +import { z } from "zod"; + +export const apiServiceEnumSchema = z.enum([ + "scrape", + "extract", + "schema", + "search", + "monitor", + "crawl", +]); + +export const apiStatusEnumSchema = z.enum(["completed", "failed"]); + +export const apiHtmlModeSchema = z.enum(["normal", "reader", "prune"]); + +export const apiFetchContentTypeSchema = z.enum([ + "text/html", + "application/pdf", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "image/jpeg", + "image/png", + "image/webp", + "image/gif", + "image/avif", + "image/tiff", + "image/heic", + "image/bmp", + "application/epub+zip", + "application/rtf", + "application/vnd.oasis.opendocument.text", + "text/csv", + "text/plain", + "application/x-latex", +]); + +export const apiUserPromptSchema = z.string().min(1).max(10_000); + +export const apiUrlSchema = z.string().url(); + +export const apiPaginationSchema = z.object({ + page: z.coerce.number().int().positive().default(1), + limit: z.coerce.number().int().positive().max(100).default(20), +}); + +export const apiUuidParamSchema = z.object({ + id: z.string().regex(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i), +}); + +export const apiFetchModeSchema = z.enum(["auto", "fast", "js"]); + +export const FETCH_CONFIG_DEFAULTS = { + mode: "auto", + stealth: false, + timeout: 30000, + wait: 0, + scrolls: 0, +} as const; + +export const apiFetchConfigSchema = z.object({ + mode: apiFetchModeSchema.default(FETCH_CONFIG_DEFAULTS.mode), + stealth: z.boolean().default(FETCH_CONFIG_DEFAULTS.stealth), + timeout: z.number().int().min(1000).max(60000).default(FETCH_CONFIG_DEFAULTS.timeout), + wait: z.number().int().min(0).max(30000).default(FETCH_CONFIG_DEFAULTS.wait), + headers: z.record(z.string(), z.string()).optional(), + cookies: z.record(z.string(), z.string()).optional(), + country: z + .string() + .length(2) + .transform((v) => v.toLowerCase()) + .optional(), + scrolls: z.number().int().min(0).max(100).default(FETCH_CONFIG_DEFAULTS.scrolls), + mock: z + .union([ + z.boolean(), + z.object({ + minKb: z.number().int().min(1).max(1000).default(1), + maxKb: z.number().int().min(1).max(1000).default(5), + minSleep: z.number().int().min(0).max(30000).default(5), + maxSleep: z.number().int().min(0).max(30000).default(15), + writeToBucket: z.boolean().default(false), + }), + ]) + .default(false), +}); + +export const apiHistoryFilterSchema = z.object({ + page: z.coerce.number().int().positive().default(1), + limit: z.coerce.number().int().min(1).max(100).default(20), + service: apiServiceEnumSchema.optional(), +}); + +export const apiScrapeContentFormatSchema = z.enum([ + "markdown", + "html", + "links", + "images", + "summary", + "json", + "branding", +]); + +export const apiScrapeCaptureFormatSchema = z.enum(["screenshot"]); + +export const apiScrapeFormatSchema = z.enum([ + ...apiScrapeContentFormatSchema.options, + ...apiScrapeCaptureFormatSchema.options, +]); + +export const apiMarkdownConfigSchema = z.object({ + mode: apiHtmlModeSchema.default("normal"), +}); + +export const apiHtmlConfigSchema = z.object({ + mode: apiHtmlModeSchema.default("normal"), +}); + +export const apiScreenshotConfigSchema = z.object({ + fullPage: z.boolean().default(false), + width: z.number().int().min(320).max(3840).default(1440), + height: z.number().int().min(200).max(2160).default(900), + quality: z.number().int().min(1).max(100).default(80), +}); + +export const apiScrapeJsonConfigSchema = z.object({ + prompt: apiUserPromptSchema, + schema: z.record(z.string(), z.unknown()).optional(), + mode: apiHtmlModeSchema.default("normal"), +}); + +export const apiScrapeSummaryConfigSchema = z.object({}); + +export const apiScrapeMarkdownFormatSchema = apiMarkdownConfigSchema.extend({ + type: z.literal("markdown"), +}); + +export const apiScrapeHtmlFormatSchema = apiHtmlConfigSchema.extend({ + type: z.literal("html"), +}); + +export const apiScrapeScreenshotFormatSchema = apiScreenshotConfigSchema.extend({ + type: z.literal("screenshot"), +}); + +export const apiScrapeJsonFormatSchema = apiScrapeJsonConfigSchema.extend({ + type: z.literal("json"), +}); + +export const apiScrapeLinksFormatSchema = z.object({ + type: z.literal("links"), +}); + +export const apiScrapeImagesFormatSchema = z.object({ + type: z.literal("images"), +}); + +export const apiScrapeSummaryFormatSchema = apiScrapeSummaryConfigSchema.extend({ + type: z.literal("summary"), +}); + +export const apiScrapeBrandingFormatSchema = z.object({ + type: z.literal("branding"), +}); + +export const apiScrapeFormatEntrySchema = z.discriminatedUnion("type", [ + apiScrapeMarkdownFormatSchema, + apiScrapeHtmlFormatSchema, + apiScrapeScreenshotFormatSchema, + apiScrapeJsonFormatSchema, + apiScrapeLinksFormatSchema, + apiScrapeImagesFormatSchema, + apiScrapeSummaryFormatSchema, + apiScrapeBrandingFormatSchema, +]); + +export const apiScrapeRequestSchema = z.object({ + url: apiUrlSchema, + contentType: apiFetchContentTypeSchema.optional(), + fetchConfig: apiFetchConfigSchema.optional(), + formats: z + .array(apiScrapeFormatEntrySchema) + .min(1) + .refine((formats) => new Set(formats.map((format) => format.type)).size === formats.length, { + message: "duplicate format types not allowed", + }) + .default([{ type: "markdown", mode: "normal" }]), +}); + +export const apiExtractRequestBaseSchema = z + .object({ + url: apiUrlSchema.optional(), + html: z.string().optional(), + markdown: z.string().optional(), + mode: apiHtmlModeSchema.default("normal"), + prompt: apiUserPromptSchema, + schema: z.record(z.string(), z.unknown()).optional(), + contentType: apiFetchContentTypeSchema.optional(), + fetchConfig: apiFetchConfigSchema.optional(), + }) + .refine((d) => d.url || d.html || d.markdown, { + message: "Either url, html, or markdown is required", + }); + +export const apiGenerateSchemaRequestSchema = z.object({ + prompt: apiUserPromptSchema, + existingSchema: z.record(z.string(), z.unknown()).optional(), +}); + +export const apiSearchRequestSchema = z + .object({ + query: z.string().min(1).max(500), + numResults: z.number().int().min(1).max(20).default(3), + format: z.enum(["html", "markdown"]).default("markdown"), + mode: apiHtmlModeSchema.default("prune"), + fetchConfig: apiFetchConfigSchema.optional(), + prompt: apiUserPromptSchema.optional(), + schema: z.record(z.string(), z.unknown()).optional(), + locationGeoCode: z.string().max(10).optional(), + timeRange: z + .enum(["past_hour", "past_24_hours", "past_week", "past_month", "past_year"]) + .optional(), + }) + .refine((d) => !d.schema || d.prompt, { + message: "schema requires prompt", + }); + +export const apiMonitorCreateSchema = z.object({ + url: apiUrlSchema, + name: z.string().max(200).optional(), + formats: z + .array(apiScrapeFormatEntrySchema) + .min(1) + .refine((formats) => new Set(formats.map((f) => f.type)).size === formats.length, { + message: "duplicate format types not allowed", + }) + .default([{ type: "markdown", mode: "normal" }]), + webhookUrl: apiUrlSchema.optional(), + interval: z.string().min(1).max(100), + fetchConfig: apiFetchConfigSchema.optional(), +}); + +export const apiMonitorUpdateSchema = z + .object({ + name: z.string().max(200).optional(), + formats: z + .array(apiScrapeFormatEntrySchema) + .min(1) + .refine((formats) => new Set(formats.map((f) => f.type)).size === formats.length, { + message: "duplicate format types not allowed", + }) + .optional(), + webhookUrl: apiUrlSchema.nullable().optional(), + interval: z.string().min(1).max(100).optional(), + fetchConfig: apiFetchConfigSchema.optional(), + }) + .partial(); + +export const apiCrawlStatusSchema = z.enum([ + "running", + "completed", + "failed", + "cancelled", + "paused", +]); + +export const apiCrawlPageStatusSchema = z.enum(["completed", "failed", "skipped"]); + +export const apiCrawlRequestSchema = z.object({ + url: apiUrlSchema, + formats: z + .array(apiScrapeFormatEntrySchema) + .min(1) + .refine((formats) => new Set(formats.map((f) => f.type)).size === formats.length, { + message: "duplicate format types not allowed", + }) + .default([{ type: "markdown", mode: "normal" }]), + maxDepth: z.coerce.number().int().min(0).default(2), + maxPages: z.coerce.number().int().min(1).max(1000).default(50), + maxLinksPerPage: z.coerce.number().int().min(1).default(10), + allowExternal: z.boolean().default(false), + includePatterns: z.array(z.string()).optional(), + excludePatterns: z.array(z.string()).optional(), + contentTypes: z.array(apiFetchContentTypeSchema).optional(), + fetchConfig: apiFetchConfigSchema.optional(), +}); diff --git a/src/types/index.ts b/src/types/index.ts index e6f1360..0817c5c 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -1,228 +1,405 @@ -export type SmartScraperParams = { - website_url?: string; - website_html?: string; - website_markdown?: string; - user_prompt: string; - output_schema?: Record; - number_of_scrolls?: number; - total_pages?: number; - stealth?: boolean; - cookies?: Record; - headers?: Record; - plain_text?: boolean; - webhook_url?: string; - mock?: boolean; - steps?: string[]; - wait_ms?: number; - country_code?: string; -}; - -export type SearchScraperParams = { - user_prompt: string; - num_results?: number; - extraction_mode?: boolean; - output_schema?: Record; - stealth?: boolean; - headers?: Record; - webhook_url?: string; - mock?: boolean; - time_range?: "past_hour" | "past_24_hours" | "past_week" | "past_month" | "past_year"; - location_geo_code?: string; -}; - -export type MarkdownifyParams = { - website_url: string; - stealth?: boolean; - headers?: Record; - webhook_url?: string; - mock?: boolean; - wait_ms?: number; - country_code?: string; -}; - -type CrawlBase = { - url: string; - max_pages?: number; - depth?: number; - rules?: Record; - sitemap?: boolean; - stealth?: boolean; - webhook_url?: string; - cache_website?: boolean; - breadth?: number; - same_domain_only?: boolean; - batch_size?: number; - wait_ms?: number; - headers?: Record; - number_of_scrolls?: number; - website_html?: string; -}; - -type CrawlExtraction = CrawlBase & { - extraction_mode?: true; - prompt: string; - schema?: Record; -}; - -type CrawlMarkdown = CrawlBase & { - extraction_mode: false; - prompt?: never; - schema?: never; -}; - -export type CrawlParams = CrawlExtraction | CrawlMarkdown; - -export type GenerateSchemaParams = { - user_prompt: string; - existing_schema?: Record; -}; - -export type SitemapParams = { - website_url: string; - headers?: Record; - mock?: boolean; - stealth?: boolean; -}; - -export type ScrapeParams = { - website_url: string; - stealth?: boolean; - branding?: boolean; - country_code?: string; - wait_ms?: number; -}; - -export type AgenticScraperParams = { +import type { z } from "zod"; +import type { + apiCrawlRequestSchema, + apiExtractRequestBaseSchema, + apiFetchConfigSchema, + apiFetchContentTypeSchema, + apiGenerateSchemaRequestSchema, + apiHistoryFilterSchema, + apiHtmlModeSchema, + apiMonitorCreateSchema, + apiMonitorUpdateSchema, + apiScrapeFormatEntrySchema, + apiScrapeRequestSchema, + apiSearchRequestSchema, +} from "../schemas.js"; + +export type ApiFetchConfig = z.infer; +export type ApiFetchContentType = z.infer; +export type ApiHtmlMode = z.infer; +export type ApiScrapeFormatEntry = z.infer; + +export type ApiScrapeRequest = z.infer; +export type ApiExtractRequest = z.infer; +export type ApiGenerateSchemaRequest = z.infer; +export type ApiSearchRequest = z.infer; +export type ApiCrawlRequest = z.infer; +export type ApiMonitorCreateInput = z.infer; +export type ApiMonitorUpdateInput = z.infer; +export type ApiHistoryFilter = z.infer; + +export type ApiScrapeFormat = + | "markdown" + | "html" + | "links" + | "images" + | "summary" + | "json" + | "branding" + | "screenshot"; + +export interface ApiTokenUsage { + promptTokens: number; + completionTokens: number; +} + +export interface ApiChunkerMetadata { + chunks: { size: number }[]; +} + +export interface ApiFetchWarning { + reason: "too_short" | "empty" | "bot_blocked" | "spa_shell" | "soft_404"; + provider?: string; +} + +export interface ScrapeMetadata { + provider?: string; + contentType: string; + elapsedMs?: number; + warnings?: ApiFetchWarning[]; + ocr?: { + model: string; + pagesProcessed: number; + pages: ContentPageMetadata[]; + }; +} + +export interface ContentPageMetadata { + index: number; + images: Array<{ + id: string; + topLeftX: number; + topLeftY: number; + bottomRightX: number; + bottomRightY: number; + }>; + tables: Array<{ id: string; content: string; format: string }>; + hyperlinks: string[]; + dimensions: { dpi: number; height: number; width: number }; +} + +export interface ApiBrandingColors { + primary: string; + accent: string; + background: string; + textPrimary: string; + link: string; +} + +export interface ApiBrandingFontEntry { + family: string; + fallback: string; +} + +export interface ApiBrandingTypography { + primary: ApiBrandingFontEntry; + heading: ApiBrandingFontEntry; + mono: ApiBrandingFontEntry; + sizes: { h1: string; h2: string; body: string }; +} + +export interface ApiBrandingImages { + logo: string; + favicon: string; + ogImage: string; +} + +export interface ApiBrandingPersonality { + tone: string; + energy: "high" | "medium" | "low"; + targetAudience: string; +} + +export interface ApiBranding { + colorScheme: "light" | "dark"; + colors: ApiBrandingColors; + typography: ApiBrandingTypography; + images: ApiBrandingImages; + spacing: { baseUnit: number; borderRadius: string }; + frameworkHints: string[]; + personality: ApiBrandingPersonality; + confidence: number; +} + +export interface ApiBrandingMetadata { + title: string; + description: string; + favicon: string; + language: string; + themeColor: string; + ogTitle: string; + ogDescription: string; + ogImage: string; + ogUrl: string; +} + +export interface ApiScrapeScreenshotData { url: string; - steps: string[]; - user_prompt?: string; - output_schema?: Record; - ai_extraction?: boolean; - use_session?: boolean; -}; - -export const HISTORY_SERVICES = [ - "markdownify", - "smartscraper", - "searchscraper", - "scrape", - "crawl", - "agentic-scraper", - "sitemap", -] as const; - -export type HistoryParams = { - service: (typeof HISTORY_SERVICES)[number]; - page?: number; - page_size?: number; -}; - -export type ApiResult = { - status: "success" | "error"; - data: T | null; - error?: string; - elapsedMs: number; -}; + width: number; + height: number; +} -export type SmartScraperResponse = { - request_id: string; - status: string; - website_url: string; - user_prompt: string; - result: Record | null; - error?: string; -}; +export interface ApiScrapeFormatError { + code: string; + error: string; +} -export type SearchScraperResponse = { - request_id: string; - status: string; - user_prompt: string; - num_results?: number; - result: Record | null; - markdown_content?: string | null; - reference_urls: string[]; - error?: string | null; -}; - -export type MarkdownifyResponse = { - request_id: string; - status: string; - website_url: string; - result: string | null; - error?: string; -}; +export interface ApiScrapeFormatResponseMap { + markdown: string[]; + html: string[]; + links: string[]; + images: string[]; + summary: string; + json: Record; + branding: ApiBranding; + screenshot: ApiScrapeScreenshotData; +} -export type CrawlPage = { +export type ApiImageContentType = Extract; + +export interface ApiScrapeFormatMetadataMap { + markdown: Record; + html: Record; + links: { count: number }; + images: { count: number }; + summary: { chunker?: ApiChunkerMetadata }; + json: { chunker: ApiChunkerMetadata; raw?: string | null }; + branding: { branding: ApiBrandingMetadata }; + screenshot: { contentType: ApiImageContentType; provider?: string }; +} + +export type ApiScrapeResultMap = Partial<{ + [K in ApiScrapeFormat]: { + data: ApiScrapeFormatResponseMap[K]; + metadata?: ApiScrapeFormatMetadataMap[K]; + }; +}>; + +export interface ApiScrapeResponse { + results: ApiScrapeResultMap; + metadata: ScrapeMetadata; + errors?: Partial<{ [K in ApiScrapeFormat]: ApiScrapeFormatError }>; +} + +export interface ApiExtractResponse { + raw: string | null; + json: Record | null; + usage: ApiTokenUsage; + metadata: { + chunker: ApiChunkerMetadata; + fetch?: { provider?: string }; + }; +} + +export interface ApiGenerateSchemaResponse { + refinedPrompt: string; + schema: Record; + usage: ApiTokenUsage; +} + +export interface ApiSearchResult { url: string; - markdown: string; -}; + title: string; + content: string; + provider?: string; +} -export type CrawlResponse = { - task_id: string; - status: string; - result?: Record | null; - llm_result?: Record | null; - crawled_urls?: string[]; - pages?: CrawlPage[]; - credits_used?: number; - pages_processed?: number; - elapsed_time?: number; - error?: string; -}; +export interface ApiSearchMetadata { + search: { provider?: string }; + pages: { requested: number; scraped: number }; + chunker?: ApiChunkerMetadata; +} -export type ScrapeResponse = { - scrape_request_id: string; - status: string; - html: string; - branding?: Record | null; - metadata?: Record | null; - error?: string; -}; +export interface ApiSearchResponse { + results: ApiSearchResult[]; + json?: Record | null; + raw?: string | null; + usage?: ApiTokenUsage; + metadata: ApiSearchMetadata; +} -export type AgenticScraperResponse = { - request_id: string; - status: string; - result: Record | null; +export type ApiCrawlStatus = "running" | "completed" | "failed" | "cancelled" | "paused"; +export type ApiCrawlPageStatus = "completed" | "failed" | "skipped"; + +export interface ApiCrawlPage { + url: string; + status: ApiCrawlPageStatus; + depth: number; + parentUrl: string | null; + links: string[]; + scrapeRefId: string; + title: string; + contentType: string; + screenshotUrl?: string; + reason?: string; error?: string; -}; +} -export type GenerateSchemaResponse = { - request_id: string; - status: string; - user_prompt: string; - refined_prompt?: string | null; - generated_schema?: Record | null; - error?: string | null; - created_at?: string | null; - updated_at?: string | null; -}; - -export type SitemapResponse = { - request_id: string; - urls: string[]; - status?: string; - website_url?: string; +export interface ApiCrawlResult { + status: ApiCrawlStatus; + reason?: string; + total: number; + finished: number; + pages: ApiCrawlPage[]; +} + +export interface ApiCrawlResponse extends ApiCrawlResult { + id: string; +} + +export interface TextChange { + type: "added" | "removed"; + line: number; + content: string; +} + +export interface JsonChange { + path: string; + old: unknown; + new: unknown; +} + +export interface SetChange { + added: string[]; + removed: string[]; +} + +export interface ImageChange { + size: number; + changed: number; + mask?: string; +} + +export interface ApiMonitorDiffs { + markdown?: TextChange[]; + html?: TextChange[]; + json?: JsonChange[]; + screenshot?: ImageChange; + links?: SetChange; + images?: SetChange; + summary?: TextChange[]; + branding?: JsonChange[]; +} + +export type ApiMonitorRefs = Partial>; + +export interface ApiWebhookStatus { + sentAt: string; + statusCode: number | null; error?: string; -}; +} -export type CreditsResponse = { - remaining_credits: number; - total_credits_used: number; -}; +export interface ApiMonitorResult { + changed: boolean; + diffs: ApiMonitorDiffs; + refs: ApiMonitorRefs; + webhookStatus?: ApiWebhookStatus; +} -export type HealthResponse = { - status: string; -}; +export interface ApiMonitorResponse { + cronId: string; + scheduleId: string; + interval: string; + status: "active" | "paused"; + config: ApiMonitorCreateInput; + createdAt: string; + updatedAt: string; +} -export type HistoryResponse = { - requests: HistoryEntry[]; - total_count: number; - page: number; - page_size: number; -}; +export type ApiHistoryService = "scrape" | "extract" | "schema" | "search" | "monitor" | "crawl"; +export type ApiHistoryStatus = "completed" | "failed" | "running" | "paused"; + +interface ApiHistoryBase { + id: string; + status: ApiHistoryStatus; + error: unknown; + elapsedMs: number; + createdAt: string; + requestParentId: string | null; +} -export type HistoryEntry = { - request_id: string; +export interface ApiScrapeHistoryEntry extends ApiHistoryBase { + service: "scrape"; + params: ApiScrapeRequest; + result: ApiScrapeResponse; +} + +export interface ApiExtractHistoryEntry extends ApiHistoryBase { + service: "extract"; + params: ApiExtractRequest; + result: ApiExtractResponse; +} + +export interface ApiSchemaHistoryEntry extends ApiHistoryBase { + service: "schema"; + params: ApiGenerateSchemaRequest; + result: ApiGenerateSchemaResponse; +} + +export interface ApiSearchHistoryEntry extends ApiHistoryBase { + service: "search"; + params: ApiSearchRequest; + result: ApiSearchResponse; +} + +export interface ApiMonitorHistoryEntry extends ApiHistoryBase { + service: "monitor"; + params: { cronId: string; url: string }; + result: ApiMonitorResult; +} + +export interface ApiCrawlHistoryEntry extends ApiHistoryBase { + service: "crawl"; + params: { url: string; maxPages: number }; + result: ApiCrawlResult; +} + +export type ApiHistoryEntry = + | ApiScrapeHistoryEntry + | ApiExtractHistoryEntry + | ApiSchemaHistoryEntry + | ApiSearchHistoryEntry + | ApiMonitorHistoryEntry + | ApiCrawlHistoryEntry; + +export interface ApiPageResponse { + data: T[]; + pagination: { + page: number; + limit: number; + total: number; + }; +} + +export type ApiHistoryPage = ApiPageResponse; + +export interface ApiJobsStatus { + used: number; + limit: number; +} + +export interface ApiCreditsResponse { + remaining: number; + used: number; + plan: string; + jobs: { + crawl: ApiJobsStatus; + monitor: ApiJobsStatus; + }; +} + +export interface ApiHealthResponse { status: string; - [key: string]: unknown; -}; + uptime: number; + services?: { + redis: "ok" | "down"; + db: "ok" | "down"; + }; +} + +export interface ApiResult { + status: "success" | "error"; + data: T | null; + error?: string; + elapsedMs: number; +} From 97a89584a1b38c90389897658ac791256aed6e7b Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:14:24 +0200 Subject: [PATCH 02/28] feat(v2): implement SDK with crawl and monitor namespaces - Add scrape, extract, search, generateSchema endpoints - Add crawl namespace: start, get, stop, resume, delete - Add monitor namespace: create, list, get, update, delete, pause, resume - Add getCredits, checkHealth, getHistory, getHistoryEntry - Export schemas for client-side validation - Add zod dependency Co-Authored-By: Claude Opus 4.5 --- package.json | 3 + src/index.ts | 84 +++++++---- src/scrapegraphai.ts | 327 ++++++++++++++++++++++++------------------- 3 files changed, 237 insertions(+), 177 deletions(-) diff --git a/package.json b/package.json index 0dc8b8c..801a314 100644 --- a/package.json +++ b/package.json @@ -39,5 +39,8 @@ "@types/node": "^22.13.1", "tsup": "^8.3.6", "typescript": "^5.8.2" + }, + "dependencies": { + "zod": "^4.3.6" } } diff --git a/src/index.ts b/src/index.ts index cb73196..5c915e6 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,41 +1,65 @@ export { - smartScraper, - searchScraper, - markdownify, scrape, - crawl, - agenticScraper, + extract, + search, generateSchema, - sitemap, getCredits, checkHealth, - history, + getHistory, + getHistoryEntry, + crawl, + monitor, } from "./scrapegraphai.js"; export type { - AgenticScraperParams, - AgenticScraperResponse, + ApiFetchConfig, + ApiFetchContentType, + ApiHtmlMode, + ApiScrapeFormatEntry, + ApiScrapeRequest, + ApiScrapeResponse, + ApiScrapeFormat, + ApiScrapeResultMap, + ApiExtractRequest, + ApiExtractResponse, + ApiGenerateSchemaRequest, + ApiGenerateSchemaResponse, + ApiSearchRequest, + ApiSearchResponse, + ApiSearchResult, + ApiCrawlRequest, + ApiCrawlResponse, + ApiCrawlResult, + ApiCrawlPage, + ApiCrawlStatus, + ApiCrawlPageStatus, + ApiMonitorCreateInput, + ApiMonitorUpdateInput, + ApiMonitorResponse, + ApiMonitorResult, + ApiMonitorDiffs, + ApiHistoryFilter, + ApiHistoryEntry, + ApiHistoryPage, + ApiHistoryService, + ApiHistoryStatus, + ApiCreditsResponse, + ApiHealthResponse, ApiResult, - CrawlParams, - CrawlPage, - CrawlResponse, - CreditsResponse, - GenerateSchemaParams, - GenerateSchemaResponse, - HealthResponse, - HistoryEntry, - HistoryParams, - HistoryResponse, - MarkdownifyParams, - MarkdownifyResponse, - ScrapeParams, - ScrapeResponse, - SearchScraperParams, - SearchScraperResponse, - SitemapParams, - SitemapResponse, - SmartScraperParams, - SmartScraperResponse, + ApiTokenUsage, + ApiChunkerMetadata, + ApiBranding, } from "./types/index.js"; -export { HISTORY_SERVICES } from "./types/index.js"; +export { + apiScrapeRequestSchema, + apiExtractRequestBaseSchema, + apiGenerateSchemaRequestSchema, + apiSearchRequestSchema, + apiCrawlRequestSchema, + apiMonitorCreateSchema, + apiMonitorUpdateSchema, + apiHistoryFilterSchema, + apiFetchConfigSchema, + apiScrapeFormatEntrySchema, +} from "./schemas.js"; diff --git a/src/scrapegraphai.ts b/src/scrapegraphai.ts index b91fa3b..cb96ba1 100644 --- a/src/scrapegraphai.ts +++ b/src/scrapegraphai.ts @@ -1,33 +1,30 @@ import { env } from "./env.js"; import type { - AgenticScraperParams, - AgenticScraperResponse, + ApiCrawlRequest, + ApiCrawlResponse, + ApiCreditsResponse, + ApiExtractRequest, + ApiExtractResponse, + ApiGenerateSchemaRequest, + ApiGenerateSchemaResponse, + ApiHealthResponse, + ApiHistoryEntry, + ApiHistoryFilter, + ApiHistoryPage, + ApiMonitorCreateInput, + ApiMonitorResponse, + ApiMonitorUpdateInput, ApiResult, - CrawlParams, - CrawlResponse, - CreditsResponse, - GenerateSchemaParams, - GenerateSchemaResponse, - HealthResponse, - HistoryParams, - HistoryResponse, - MarkdownifyParams, - MarkdownifyResponse, - ScrapeParams, - ScrapeResponse, - SearchScraperParams, - SearchScraperResponse, - SitemapParams, - SitemapResponse, - SmartScraperParams, - SmartScraperResponse, + ApiScrapeRequest, + ApiScrapeResponse, + ApiSearchRequest, + ApiSearchResponse, } from "./types/index.js"; -const BASE_URL = process.env.SGAI_API_URL || "https://api.scrapegraphai.com/v1"; +const BASE_URL = process.env.SGAI_API_URL || "https://api.scrapegraphai.com/v2"; const HEALTH_URL = process.env.SGAI_API_URL ? `${process.env.SGAI_API_URL.replace(/\/v\d+$/, "")}` : "https://api.scrapegraphai.com"; -const POLL_INTERVAL_MS = 3000; function debug(label: string, data?: unknown) { if (!env.debug) return; @@ -68,7 +65,7 @@ function mapHttpError(status: number): string { type RequestResult = { data: T; elapsedMs: number }; async function request( - method: "GET" | "POST", + method: "GET" | "POST" | "PATCH" | "DELETE", path: string, apiKey: string, body?: object, @@ -107,75 +104,55 @@ async function request( return { data, elapsedMs }; } -type PollResponse = { - status: string; - error?: string; - [key: string]: unknown; -}; - -function isDone(status: string) { - return status === "completed" || status === "done" || status === "success"; -} - -async function pollUntilDone( - path: string, - id: string, +export async function scrape( apiKey: string, - onPoll?: (status: string) => void, -): Promise> { - const deadline = Date.now() + env.timeoutS * 1000; - let totalMs = 0; - - while (Date.now() < deadline) { - const { data, elapsedMs } = await request("GET", `${path}/${id}`, apiKey); - totalMs += elapsedMs; - onPoll?.(data.status); - - if (isDone(data.status)) return { data, elapsedMs: totalMs }; - if (data.status === "failed") throw new Error(data.error ?? "Job failed"); - - await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS)); + params: ApiScrapeRequest, +): Promise> { + try { + const { data, elapsedMs } = await request("POST", "/scrape", apiKey, params); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); } - - throw new Error("Polling timed out"); } -function unwrapResult(data: PollResponse): PollResponse { - if (data.result && typeof data.result === "object" && !Array.isArray(data.result)) { - const inner = data.result as Record; - if (inner.status || inner.pages || inner.crawled_urls) { - return { ...inner, status: String(inner.status ?? data.status) } as PollResponse; - } +export async function extract( + apiKey: string, + params: ApiExtractRequest, +): Promise> { + try { + const { data, elapsedMs } = await request( + "POST", + "/extract", + apiKey, + params, + ); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); } - return data; } -async function submitAndPoll( - path: string, +export async function search( apiKey: string, - body: object, - idField: string, - onPoll?: (status: string) => void, -): Promise> { - const { data: res, elapsedMs } = await request("POST", path, apiKey, body); - if (isDone(res.status)) return { data: unwrapResult(res) as unknown as T, elapsedMs }; - const id = res[idField]; - if (typeof id !== "string") throw new Error(`Missing ${idField} in response`); - const poll = await pollUntilDone(path, id, apiKey, onPoll); - return { - data: unwrapResult(poll.data) as unknown as T, - elapsedMs: elapsedMs + poll.elapsedMs, - }; + params: ApiSearchRequest, +): Promise> { + try { + const { data, elapsedMs } = await request("POST", "/search", apiKey, params); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } } -export async function smartScraper( +export async function generateSchema( apiKey: string, - params: SmartScraperParams, -): Promise> { + params: ApiGenerateSchemaRequest, +): Promise> { try { - const { data, elapsedMs } = await request( + const { data, elapsedMs } = await request( "POST", - "/smartscraper", + "/schema", apiKey, params, ); @@ -185,33 +162,23 @@ export async function smartScraper( } } -export async function searchScraper( - apiKey: string, - params: SearchScraperParams, -): Promise> { +export async function getCredits(apiKey: string): Promise> { try { - const { data, elapsedMs } = await request( - "POST", - "/searchscraper", - apiKey, - params, - ); + const { data, elapsedMs } = await request("GET", "/credits", apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); } } -export async function markdownify( - apiKey: string, - params: MarkdownifyParams, -): Promise> { +export async function checkHealth(apiKey: string): Promise> { try { - const { data, elapsedMs } = await request( - "POST", - "/markdownify", + const { data, elapsedMs } = await request( + "GET", + "/healthz", apiKey, - params, + undefined, + HEALTH_URL, ); return ok(data, elapsedMs); } catch (err) { @@ -219,47 +186,72 @@ export async function markdownify( } } -export async function scrape( +export async function getHistory( apiKey: string, - params: ScrapeParams, -): Promise> { + params?: ApiHistoryFilter, +): Promise> { try { - const { data, elapsedMs } = await request("POST", "/scrape", apiKey, params); + const qs = new URLSearchParams(); + if (params?.page) qs.set("page", String(params.page)); + if (params?.limit) qs.set("limit", String(params.limit)); + if (params?.service) qs.set("service", params.service); + const query = qs.toString(); + const path = query ? `/history?${query}` : "/history"; + const { data, elapsedMs } = await request("GET", path, apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); } } -export async function crawl( +export async function getHistoryEntry( apiKey: string, - params: CrawlParams, - onPoll?: (status: string) => void, -): Promise> { + id: string, +): Promise> { try { - const { data, elapsedMs } = await submitAndPoll( - "/crawl", - apiKey, - params, - "task_id", - onPoll, - ); + const { data, elapsedMs } = await request("GET", `/history/${id}`, apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); } } -export async function agenticScraper( +async function crawlStart( apiKey: string, - params: AgenticScraperParams, -): Promise> { + params: ApiCrawlRequest, +): Promise> { try { - const { data, elapsedMs } = await request( + const { data, elapsedMs } = await request("POST", "/crawl", apiKey, params); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } +} + +async function crawlGet(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request("GET", `/crawl/${id}`, apiKey); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } +} + +async function crawlStop(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request<{ ok: boolean }>("POST", `/crawl/${id}/stop`, apiKey); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } +} + +async function crawlResume(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request<{ ok: boolean }>( "POST", - "/agentic-scrapper", + `/crawl/${id}/resume`, apiKey, - params, ); return ok(data, elapsedMs); } catch (err) { @@ -267,14 +259,31 @@ export async function agenticScraper( } } -export async function generateSchema( +async function crawlDelete(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request<{ ok: boolean }>("DELETE", `/crawl/${id}`, apiKey); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } +} + +export const crawl = { + start: crawlStart, + get: crawlGet, + stop: crawlStop, + resume: crawlResume, + delete: crawlDelete, +}; + +async function monitorCreate( apiKey: string, - params: GenerateSchemaParams, -): Promise> { + params: ApiMonitorCreateInput, +): Promise> { try { - const { data, elapsedMs } = await request( + const { data, elapsedMs } = await request( "POST", - "/generate_schema", + "/monitor", apiKey, params, ); @@ -284,35 +293,57 @@ export async function generateSchema( } } -export async function sitemap( +async function monitorList(apiKey: string): Promise> { + try { + const { data, elapsedMs } = await request("GET", "/monitor", apiKey); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } +} + +async function monitorGet(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request("GET", `/monitor/${id}`, apiKey); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } +} + +async function monitorUpdate( apiKey: string, - params: SitemapParams, -): Promise> { + id: string, + params: ApiMonitorUpdateInput, +): Promise> { try { - const { data, elapsedMs } = await request("POST", "/sitemap", apiKey, params); + const { data, elapsedMs } = await request( + "PATCH", + `/monitor/${id}`, + apiKey, + params, + ); return ok(data, elapsedMs); } catch (err) { return fail(err); } } -export async function getCredits(apiKey: string): Promise> { +async function monitorDelete(apiKey: string, id: string): Promise> { try { - const { data, elapsedMs } = await request("GET", "/credits", apiKey); + const { data, elapsedMs } = await request<{ ok: boolean }>("DELETE", `/monitor/${id}`, apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); } } -export async function checkHealth(apiKey: string): Promise> { +async function monitorPause(apiKey: string, id: string): Promise> { try { - const { data, elapsedMs } = await request( - "GET", - "/healthz", + const { data, elapsedMs } = await request( + "POST", + `/monitor/${id}/pause`, apiKey, - undefined, - HEALTH_URL, ); return ok(data, elapsedMs); } catch (err) { @@ -320,19 +351,11 @@ export async function checkHealth(apiKey: string): Promise> { +async function monitorResume(apiKey: string, id: string): Promise> { try { - const page = params.page ?? 1; - const pageSize = params.page_size ?? 10; - const qs = new URLSearchParams(); - qs.set("page", String(page)); - qs.set("page_size", String(pageSize)); - const { data, elapsedMs } = await request( - "GET", - `/history/${params.service}?${qs}`, + const { data, elapsedMs } = await request( + "POST", + `/monitor/${id}/resume`, apiKey, ); return ok(data, elapsedMs); @@ -340,3 +363,13 @@ export async function history( return fail(err); } } + +export const monitor = { + create: monitorCreate, + list: monitorList, + get: monitorGet, + update: monitorUpdate, + delete: monitorDelete, + pause: monitorPause, + resume: monitorResume, +}; From 5847b9586609d32001c1a8d7e3852cd68bf1522f Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:22:55 +0200 Subject: [PATCH 03/28] refactor: inline namespace functions directly in objects Co-Authored-By: Claude Opus 4.5 --- src/scrapegraphai.ts | 299 +++++++++++++++++++++---------------------- 1 file changed, 147 insertions(+), 152 deletions(-) diff --git a/src/scrapegraphai.ts b/src/scrapegraphai.ts index cb96ba1..11692ae 100644 --- a/src/scrapegraphai.ts +++ b/src/scrapegraphai.ts @@ -216,160 +216,155 @@ export async function getHistoryEntry( } } -async function crawlStart( - apiKey: string, - params: ApiCrawlRequest, -): Promise> { - try { - const { data, elapsedMs } = await request("POST", "/crawl", apiKey, params); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - -async function crawlGet(apiKey: string, id: string): Promise> { - try { - const { data, elapsedMs } = await request("GET", `/crawl/${id}`, apiKey); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - -async function crawlStop(apiKey: string, id: string): Promise> { - try { - const { data, elapsedMs } = await request<{ ok: boolean }>("POST", `/crawl/${id}/stop`, apiKey); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - -async function crawlResume(apiKey: string, id: string): Promise> { - try { - const { data, elapsedMs } = await request<{ ok: boolean }>( - "POST", - `/crawl/${id}/resume`, - apiKey, - ); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - -async function crawlDelete(apiKey: string, id: string): Promise> { - try { - const { data, elapsedMs } = await request<{ ok: boolean }>("DELETE", `/crawl/${id}`, apiKey); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - export const crawl = { - start: crawlStart, - get: crawlGet, - stop: crawlStop, - resume: crawlResume, - delete: crawlDelete, + async start(apiKey: string, params: ApiCrawlRequest): Promise> { + try { + const { data, elapsedMs } = await request("POST", "/crawl", apiKey, params); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, + + async get(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request("GET", `/crawl/${id}`, apiKey); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, + + async stop(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request<{ ok: boolean }>( + "POST", + `/crawl/${id}/stop`, + apiKey, + ); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, + + async resume(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request<{ ok: boolean }>( + "POST", + `/crawl/${id}/resume`, + apiKey, + ); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, + + async delete(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request<{ ok: boolean }>("DELETE", `/crawl/${id}`, apiKey); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, }; -async function monitorCreate( - apiKey: string, - params: ApiMonitorCreateInput, -): Promise> { - try { - const { data, elapsedMs } = await request( - "POST", - "/monitor", - apiKey, - params, - ); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - -async function monitorList(apiKey: string): Promise> { - try { - const { data, elapsedMs } = await request("GET", "/monitor", apiKey); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - -async function monitorGet(apiKey: string, id: string): Promise> { - try { - const { data, elapsedMs } = await request("GET", `/monitor/${id}`, apiKey); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - -async function monitorUpdate( - apiKey: string, - id: string, - params: ApiMonitorUpdateInput, -): Promise> { - try { - const { data, elapsedMs } = await request( - "PATCH", - `/monitor/${id}`, - apiKey, - params, - ); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - -async function monitorDelete(apiKey: string, id: string): Promise> { - try { - const { data, elapsedMs } = await request<{ ok: boolean }>("DELETE", `/monitor/${id}`, apiKey); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - -async function monitorPause(apiKey: string, id: string): Promise> { - try { - const { data, elapsedMs } = await request( - "POST", - `/monitor/${id}/pause`, - apiKey, - ); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - -async function monitorResume(apiKey: string, id: string): Promise> { - try { - const { data, elapsedMs } = await request( - "POST", - `/monitor/${id}/resume`, - apiKey, - ); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - export const monitor = { - create: monitorCreate, - list: monitorList, - get: monitorGet, - update: monitorUpdate, - delete: monitorDelete, - pause: monitorPause, - resume: monitorResume, + async create( + apiKey: string, + params: ApiMonitorCreateInput, + ): Promise> { + try { + const { data, elapsedMs } = await request( + "POST", + "/monitor", + apiKey, + params, + ); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, + + async list(apiKey: string): Promise> { + try { + const { data, elapsedMs } = await request("GET", "/monitor", apiKey); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, + + async get(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request( + "GET", + `/monitor/${id}`, + apiKey, + ); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, + + async update( + apiKey: string, + id: string, + params: ApiMonitorUpdateInput, + ): Promise> { + try { + const { data, elapsedMs } = await request( + "PATCH", + `/monitor/${id}`, + apiKey, + params, + ); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, + + async delete(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request<{ ok: boolean }>( + "DELETE", + `/monitor/${id}`, + apiKey, + ); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, + + async pause(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request( + "POST", + `/monitor/${id}/pause`, + apiKey, + ); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, + + async resume(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request( + "POST", + `/monitor/${id}/resume`, + apiKey, + ); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, }; From a725c1cea19c1f67eaa989a53b60b74e9f21dfbe Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:24:56 +0200 Subject: [PATCH 04/28] fix: use deleted status and move types to src/types.ts - Replace cancelled with deleted in ApiCrawlStatus - Add deleted to ApiHistoryStatus - Move types from src/types/index.ts to src/types.ts Co-Authored-By: Claude Opus 4.5 --- src/index.ts | 2 +- src/schemas.ts | 8 +------- src/scrapegraphai.ts | 2 +- src/{types/index.ts => types.ts} | 6 +++--- 4 files changed, 6 insertions(+), 12 deletions(-) rename src/{types/index.ts => types.ts} (99%) diff --git a/src/index.ts b/src/index.ts index 5c915e6..de300ea 100644 --- a/src/index.ts +++ b/src/index.ts @@ -49,7 +49,7 @@ export type { ApiTokenUsage, ApiChunkerMetadata, ApiBranding, -} from "./types/index.js"; +} from "./types.js"; export { apiScrapeRequestSchema, diff --git a/src/schemas.ts b/src/schemas.ts index 264a9aa..617d1d6 100644 --- a/src/schemas.ts +++ b/src/schemas.ts @@ -256,13 +256,7 @@ export const apiMonitorUpdateSchema = z }) .partial(); -export const apiCrawlStatusSchema = z.enum([ - "running", - "completed", - "failed", - "cancelled", - "paused", -]); +export const apiCrawlStatusSchema = z.enum(["running", "completed", "failed", "paused", "deleted"]); export const apiCrawlPageStatusSchema = z.enum(["completed", "failed", "skipped"]); diff --git a/src/scrapegraphai.ts b/src/scrapegraphai.ts index 11692ae..99dac94 100644 --- a/src/scrapegraphai.ts +++ b/src/scrapegraphai.ts @@ -19,7 +19,7 @@ import type { ApiScrapeResponse, ApiSearchRequest, ApiSearchResponse, -} from "./types/index.js"; +} from "./types.js"; const BASE_URL = process.env.SGAI_API_URL || "https://api.scrapegraphai.com/v2"; const HEALTH_URL = process.env.SGAI_API_URL diff --git a/src/types/index.ts b/src/types.ts similarity index 99% rename from src/types/index.ts rename to src/types.ts index 0817c5c..57f412b 100644 --- a/src/types/index.ts +++ b/src/types.ts @@ -12,7 +12,7 @@ import type { apiScrapeFormatEntrySchema, apiScrapeRequestSchema, apiSearchRequestSchema, -} from "../schemas.js"; +} from "./schemas.js"; export type ApiFetchConfig = z.infer; export type ApiFetchContentType = z.infer; @@ -218,7 +218,7 @@ export interface ApiSearchResponse { metadata: ApiSearchMetadata; } -export type ApiCrawlStatus = "running" | "completed" | "failed" | "cancelled" | "paused"; +export type ApiCrawlStatus = "running" | "completed" | "failed" | "paused" | "deleted"; export type ApiCrawlPageStatus = "completed" | "failed" | "skipped"; export interface ApiCrawlPage { @@ -307,7 +307,7 @@ export interface ApiMonitorResponse { } export type ApiHistoryService = "scrape" | "extract" | "schema" | "search" | "monitor" | "crawl"; -export type ApiHistoryStatus = "completed" | "failed" | "running" | "paused"; +export type ApiHistoryStatus = "completed" | "failed" | "running" | "paused" | "deleted"; interface ApiHistoryBase { id: string; From 11b811cff9876cdb891d8433715c13f6e4ffa557 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:28:14 +0200 Subject: [PATCH 05/28] feat(v2): rewrite tests for v2 API - Update unit tests for new SDK structure - Add integration tests for live API - Fix schemas to use deleted instead of cancelled - Move types.ts out of folder Co-Authored-By: Claude Opus 4.5 --- tests/integration.test.ts | 73 +++++++ tests/scrapegraphai.test.ts | 397 +++++++++++++++++++++++------------- 2 files changed, 324 insertions(+), 146 deletions(-) create mode 100644 tests/integration.test.ts diff --git a/tests/integration.test.ts b/tests/integration.test.ts new file mode 100644 index 0000000..3ee4091 --- /dev/null +++ b/tests/integration.test.ts @@ -0,0 +1,73 @@ +import { describe, expect, test } from "bun:test"; +import { crawl, extract, getCredits, getHistory, monitor, scrape, search } from "../src/index.js"; + +const API_KEY = process.env.SGAI_API_KEY || "sgai-669918e5-55be-4752-a684-f6da788d1384"; + +describe("integration", () => { + test("getCredits", async () => { + const res = await getCredits(API_KEY); + console.log("getCredits:", res); + expect(res.status).toBe("success"); + expect(res.data).toHaveProperty("remaining"); + expect(res.data).toHaveProperty("plan"); + }); + + test("scrape markdown", async () => { + const res = await scrape(API_KEY, { + url: "https://example.com", + formats: [{ type: "markdown" }], + }); + console.log("scrape:", res.status, res.error); + expect(res.status).toBe("success"); + expect(res.data?.results.markdown).toBeDefined(); + }); + + test("extract", async () => { + const res = await extract(API_KEY, { + url: "https://example.com", + prompt: "What is this page about?", + }); + console.log("extract:", res.status, res.error); + expect(res.status).toBe("success"); + }); + + test("search", async () => { + const res = await search(API_KEY, { + query: "anthropic claude", + numResults: 2, + }); + console.log("search:", res.status, res.error); + expect(res.status).toBe("success"); + expect(res.data?.results.length).toBeGreaterThan(0); + }); + + test("getHistory", async () => { + const res = await getHistory(API_KEY, { limit: 5 }); + console.log("getHistory:", res.status, res.data?.pagination); + expect(res.status).toBe("success"); + }); + + test("crawl.start and crawl.get", async () => { + const startRes = await crawl.start(API_KEY, { + url: "https://example.com", + maxPages: 2, + }); + console.log("crawl.start:", startRes.status, startRes.data?.id, startRes.error); + + if ( + startRes.status === "error" && + (startRes.error?.includes("Max") || startRes.error?.includes("Rate")) + ) { + console.log("Skipping - rate limited"); + return; + } + + expect(startRes.status).toBe("success"); + + if (startRes.data?.id) { + const getRes = await crawl.get(API_KEY, startRes.data.id); + console.log("crawl.get:", getRes.status, getRes.data?.status); + expect(getRes.status).toBe("success"); + } + }); +}); diff --git a/tests/scrapegraphai.test.ts b/tests/scrapegraphai.test.ts index 4186453..6a7591b 100644 --- a/tests/scrapegraphai.test.ts +++ b/tests/scrapegraphai.test.ts @@ -1,13 +1,11 @@ -import { afterEach, describe, expect, mock, spyOn, test } from "bun:test"; +import { afterEach, describe, expect, spyOn, test } from "bun:test"; +import * as sdk from "../src/scrapegraphai.js"; -mock.module("../src/env.js", () => ({ - env: { debug: false, timeoutS: 120 }, -})); - -import * as scrapegraphai from "../src/scrapegraphai.js"; - -const API_KEY = "test-sgai-key-abc123"; -const BASE = "https://api.scrapegraphai.com/v1"; +const API_KEY = "test-sgai-key"; +const BASE = process.env.SGAI_API_URL || "https://api.scrapegraphai.com/v2"; +const HEALTH_BASE = process.env.SGAI_API_URL + ? process.env.SGAI_API_URL.replace(/\/v\d+$/, "") + : "https://api.scrapegraphai.com"; function json(body: unknown, status = 200): Response { return new Response(JSON.stringify(body), { @@ -22,50 +20,46 @@ afterEach(() => { fetchSpy?.mockRestore(); }); -function expectPost(callIndex: number, path: string, body?: object) { +function expectRequest( + callIndex: number, + method: string, + path: string, + body?: object, + base = BASE, +) { const [url, init] = fetchSpy.mock.calls[callIndex] as [string, RequestInit]; - expect(url).toBe(`${BASE}${path}`); - expect(init.method).toBe("POST"); + expect(url).toBe(`${base}${path}`); + expect(init.method).toBe(method); expect((init.headers as Record)["SGAI-APIKEY"]).toBe(API_KEY); - expect((init.headers as Record)["Content-Type"]).toBe("application/json"); - if (body) expect(JSON.parse(init.body as string)).toEqual(body); + if (body) { + expect((init.headers as Record)["Content-Type"]).toBe("application/json"); + expect(JSON.parse(init.body as string)).toEqual(body); + } } -function expectGet(callIndex: number, path: string) { - const [url, init] = fetchSpy.mock.calls[callIndex] as [string, RequestInit]; - expect(url).toBe(`${BASE}${path}`); - expect(init.method).toBe("GET"); - expect((init.headers as Record)["SGAI-APIKEY"]).toBe(API_KEY); -} - -describe("smartScraper", () => { - const params = { user_prompt: "Extract prices", website_url: "https://example.com" }; +describe("scrape", () => { + const params = { url: "https://example.com" }; test("success", async () => { const body = { - request_id: "abc-123", - status: "completed", - website_url: "https://example.com", - user_prompt: "Extract prices", - result: { prices: [10, 20] }, - error: "", + results: { markdown: { data: ["# Hello"] } }, + metadata: { contentType: "text/html" }, }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.smartScraper(API_KEY, params); + const res = await sdk.scrape(API_KEY, params); expect(res.status).toBe("success"); expect(res.data).toEqual(body); expect(res.elapsedMs).toBeGreaterThanOrEqual(0); - expect(fetchSpy).toHaveBeenCalledTimes(1); - expectPost(0, "/smartscraper", params); + expectRequest(0, "POST", "/scrape", params); }); test("HTTP 401", async () => { fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce( json({ detail: "Invalid key" }, 401), ); - const res = await scrapegraphai.smartScraper(API_KEY, params); + const res = await sdk.scrape(API_KEY, params); expect(res.status).toBe("error"); expect(res.error).toContain("Invalid or missing API key"); @@ -73,7 +67,7 @@ describe("smartScraper", () => { test("HTTP 402", async () => { fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json({}, 402)); - const res = await scrapegraphai.smartScraper(API_KEY, params); + const res = await sdk.scrape(API_KEY, params); expect(res.status).toBe("error"); expect(res.error).toContain("Insufficient credits"); @@ -81,7 +75,7 @@ describe("smartScraper", () => { test("HTTP 422", async () => { fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json({}, 422)); - const res = await scrapegraphai.smartScraper(API_KEY, params); + const res = await sdk.scrape(API_KEY, params); expect(res.status).toBe("error"); expect(res.error).toContain("Invalid parameters"); @@ -89,7 +83,7 @@ describe("smartScraper", () => { test("HTTP 429", async () => { fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json({}, 429)); - const res = await scrapegraphai.smartScraper(API_KEY, params); + const res = await sdk.scrape(API_KEY, params); expect(res.status).toBe("error"); expect(res.error).toContain("Rate limited"); @@ -97,27 +91,17 @@ describe("smartScraper", () => { test("HTTP 500", async () => { fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json({}, 500)); - const res = await scrapegraphai.smartScraper(API_KEY, params); + const res = await sdk.scrape(API_KEY, params); expect(res.status).toBe("error"); expect(res.error).toContain("Server error"); }); - test("HTTP error with detail", async () => { - fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce( - json({ detail: "quota exceeded" }, 402), - ); - const res = await scrapegraphai.smartScraper(API_KEY, params); - - expect(res.status).toBe("error"); - expect(res.error).toContain("quota exceeded"); - }); - test("timeout", async () => { fetchSpy = spyOn(globalThis, "fetch").mockRejectedValueOnce( new DOMException("The operation was aborted", "TimeoutError"), ); - const res = await scrapegraphai.smartScraper(API_KEY, params); + const res = await sdk.scrape(API_KEY, params); expect(res.status).toBe("error"); expect(res.error).toBe("Request timed out"); @@ -125,211 +109,332 @@ describe("smartScraper", () => { test("network error", async () => { fetchSpy = spyOn(globalThis, "fetch").mockRejectedValueOnce(new Error("fetch failed")); - const res = await scrapegraphai.smartScraper(API_KEY, params); + const res = await sdk.scrape(API_KEY, params); expect(res.status).toBe("error"); expect(res.error).toBe("fetch failed"); }); }); -describe("searchScraper", () => { - const params = { user_prompt: "Best pizza in NYC" }; +describe("extract", () => { + const params = { url: "https://example.com", prompt: "Extract prices" }; test("success", async () => { const body = { - request_id: "abc-123", - status: "completed", - user_prompt: "Best pizza in NYC", - num_results: 3, - result: { answer: "Joe's Pizza" }, - markdown_content: null, - reference_urls: ["https://example.com"], - error: null, + raw: null, + json: { prices: [10, 20] }, + usage: { promptTokens: 100, completionTokens: 50 }, + metadata: { chunker: { chunks: [{ size: 1000 }] } }, }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.searchScraper(API_KEY, params); + const res = await sdk.extract(API_KEY, params); expect(res.status).toBe("success"); expect(res.data).toEqual(body); - expectPost(0, "/searchscraper", params); + expectRequest(0, "POST", "/extract", params); }); }); -describe("markdownify", () => { - const params = { website_url: "https://example.com" }; +describe("search", () => { + const params = { query: "best pizza NYC" }; test("success", async () => { const body = { - request_id: "abc-123", - status: "completed", - website_url: "https://example.com", - result: "# Hello", - error: "", + results: [{ url: "https://example.com", title: "Pizza", content: "Great pizza" }], + metadata: { search: {}, pages: { requested: 3, scraped: 3 } }, }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.markdownify(API_KEY, params); + const res = await sdk.search(API_KEY, params); expect(res.status).toBe("success"); expect(res.data).toEqual(body); - expectPost(0, "/markdownify", params); + expectRequest(0, "POST", "/search", params); }); }); -describe("scrape", () => { - const params = { website_url: "https://example.com" }; +describe("generateSchema", () => { + const params = { prompt: "Schema for product listing" }; test("success", async () => { const body = { - scrape_request_id: "abc-123", - status: "completed", - html: "...", - branding: null, - metadata: null, - error: "", + refinedPrompt: "Extract product details", + schema: { type: "object", properties: {} }, + usage: { promptTokens: 50, completionTokens: 100 }, }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.scrape(API_KEY, params); + const res = await sdk.generateSchema(API_KEY, params); expect(res.status).toBe("success"); expect(res.data).toEqual(body); - expectPost(0, "/scrape", params); + expectRequest(0, "POST", "/schema", params); }); }); -describe("crawl", () => { - const params = { url: "https://example.com", prompt: "Extract main content" }; - - test("immediate completion", async () => { - const body = { status: "done", pages: [{ url: "https://example.com", content: "data" }] }; +describe("getCredits", () => { + test("success", async () => { + const body = { + remaining: 1000, + used: 500, + plan: "pro", + jobs: { crawl: { used: 1, limit: 5 }, monitor: { used: 2, limit: 10 } }, + }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.crawl(API_KEY, params); + const res = await sdk.getCredits(API_KEY); expect(res.status).toBe("success"); - expect(res.data as any).toEqual(body); - expectPost(0, "/crawl"); + expect(res.data).toEqual(body); + expectRequest(0, "GET", "/credits"); }); +}); - test("polls with task_id", async () => { - fetchSpy = spyOn(globalThis, "fetch") - .mockResolvedValueOnce(json({ status: "pending", task_id: "crawl-99" })) - .mockResolvedValueOnce(json({ status: "done", task_id: "crawl-99", pages: [] })); +describe("checkHealth", () => { + test("success", async () => { + const body = { status: "ok", uptime: 12345 }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.crawl(API_KEY, params); + const res = await sdk.checkHealth(API_KEY); expect(res.status).toBe("success"); - expect(fetchSpy).toHaveBeenCalledTimes(2); - expectGet(1, "/crawl/crawl-99"); + expect(res.data).toEqual(body); + expectRequest(0, "GET", "/healthz", undefined, HEALTH_BASE); }); +}); - test("calls onPoll callback", async () => { - const statuses: string[] = []; - fetchSpy = spyOn(globalThis, "fetch") - .mockResolvedValueOnce(json({ status: "pending", task_id: "crawl-99" })) - .mockResolvedValueOnce(json({ status: "done", task_id: "crawl-99", pages: [] })); +describe("getHistory", () => { + test("success without params", async () => { + const body = { + data: [], + pagination: { page: 1, limit: 20, total: 0 }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - await scrapegraphai.crawl(API_KEY, params, (s) => statuses.push(s)); + const res = await sdk.getHistory(API_KEY); - expect(statuses).toEqual(["done"]); + expect(res.status).toBe("success"); + expect(res.data).toEqual(body); + expectRequest(0, "GET", "/history"); }); - test("poll failure", async () => { - fetchSpy = spyOn(globalThis, "fetch") - .mockResolvedValueOnce(json({ status: "pending", task_id: "crawl-99" })) - .mockResolvedValueOnce(json({ status: "failed", error: "Crawl exploded" })); + test("success with params", async () => { + const body = { + data: [], + pagination: { page: 2, limit: 10, total: 50 }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.crawl(API_KEY, params); + const res = await sdk.getHistory(API_KEY, { page: 2, limit: 10, service: "scrape" }); - expect(res.status).toBe("error"); - expect(res.error).toBe("Crawl exploded"); + expect(res.status).toBe("success"); + const [url] = fetchSpy.mock.calls[0] as [string, RequestInit]; + expect(url).toContain("page=2"); + expect(url).toContain("limit=10"); + expect(url).toContain("service=scrape"); }); }); -describe("agenticScraper", () => { - const params = { url: "https://example.com", steps: ["Click login"] }; - +describe("getHistoryEntry", () => { test("success", async () => { const body = { - request_id: "abc-123", + id: "abc-123", + service: "scrape", status: "completed", - result: { screenshot: "base64..." }, - error: "", + params: { url: "https://example.com" }, + result: {}, }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.agenticScraper(API_KEY, params); + const res = await sdk.getHistoryEntry(API_KEY, "abc-123"); expect(res.status).toBe("success"); expect(res.data).toEqual(body); - expectPost(0, "/agentic-scrapper", params); + expectRequest(0, "GET", "/history/abc-123"); }); }); -describe("generateSchema", () => { - const params = { user_prompt: "Schema for product" }; +describe("crawl", () => { + const params = { url: "https://example.com" }; - test("success", async () => { + test("start success", async () => { + const body = { + id: "crawl-123", + status: "running", + total: 50, + finished: 0, + pages: [], + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const res = await sdk.crawl.start(API_KEY, params); + + expect(res.status).toBe("success"); + expect(res.data).toEqual(body); + expectRequest(0, "POST", "/crawl", params); + }); + + test("get success", async () => { const body = { - request_id: "abc-123", + id: "crawl-123", status: "completed", - user_prompt: "Schema for product", - generated_schema: { type: "object" }, + total: 10, + finished: 10, + pages: [{ url: "https://example.com", status: "completed" }], }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.generateSchema(API_KEY, params); + const res = await sdk.crawl.get(API_KEY, "crawl-123"); expect(res.status).toBe("success"); expect(res.data).toEqual(body); - expectPost(0, "/generate_schema", params); + expectRequest(0, "GET", "/crawl/crawl-123"); + }); + + test("stop success", async () => { + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json({ ok: true })); + + const res = await sdk.crawl.stop(API_KEY, "crawl-123"); + + expect(res.status).toBe("success"); + expect(res.data).toEqual({ ok: true }); + expectRequest(0, "POST", "/crawl/crawl-123/stop"); + }); + + test("resume success", async () => { + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json({ ok: true })); + + const res = await sdk.crawl.resume(API_KEY, "crawl-123"); + + expect(res.status).toBe("success"); + expect(res.data).toEqual({ ok: true }); + expectRequest(0, "POST", "/crawl/crawl-123/resume"); + }); + + test("delete success", async () => { + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json({ ok: true })); + + const res = await sdk.crawl.delete(API_KEY, "crawl-123"); + + expect(res.status).toBe("success"); + expect(res.data).toEqual({ ok: true }); + expectRequest(0, "DELETE", "/crawl/crawl-123"); }); }); -describe("sitemap", () => { - const params = { website_url: "https://example.com" }; +describe("monitor", () => { + const createParams = { url: "https://example.com", interval: "0 * * * *" }; - test("success", async () => { + test("create success", async () => { const body = { - request_id: "abc-123", - urls: ["https://example.com/a", "https://example.com/b"], + cronId: "mon-123", + scheduleId: "sched-456", + interval: "0 * * * *", + status: "active", + config: createParams, + createdAt: "2024-01-01T00:00:00Z", + updatedAt: "2024-01-01T00:00:00Z", }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.sitemap(API_KEY, params); + const res = await sdk.monitor.create(API_KEY, createParams); expect(res.status).toBe("success"); expect(res.data).toEqual(body); - expectPost(0, "/sitemap", params); + expectRequest(0, "POST", "/monitor", createParams); }); -}); -describe("getCredits", () => { - test("success", async () => { - const body = { remaining_credits: 420, total_credits_used: 69 }; + test("list success", async () => { + const body = [ + { + cronId: "mon-123", + scheduleId: "sched-456", + interval: "0 * * * *", + status: "active", + }, + ]; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.getCredits(API_KEY); + const res = await sdk.monitor.list(API_KEY); expect(res.status).toBe("success"); expect(res.data).toEqual(body); - expectGet(0, "/credits"); + expectRequest(0, "GET", "/monitor"); }); -}); -describe("checkHealth", () => { - test("success", async () => { - const body = { status: "healthy" }; + test("get success", async () => { + const body = { + cronId: "mon-123", + scheduleId: "sched-456", + interval: "0 * * * *", + status: "active", + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const res = await sdk.monitor.get(API_KEY, "mon-123"); + + expect(res.status).toBe("success"); + expect(res.data).toEqual(body); + expectRequest(0, "GET", "/monitor/mon-123"); + }); + + test("update success", async () => { + const updateParams = { interval: "0 0 * * *" }; + const body = { + cronId: "mon-123", + scheduleId: "sched-456", + interval: "0 0 * * *", + status: "active", + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const res = await sdk.monitor.update(API_KEY, "mon-123", updateParams); + + expect(res.status).toBe("success"); + expect(res.data).toEqual(body); + expectRequest(0, "PATCH", "/monitor/mon-123", updateParams); + }); + + test("delete success", async () => { + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json({ ok: true })); + + const res = await sdk.monitor.delete(API_KEY, "mon-123"); + + expect(res.status).toBe("success"); + expect(res.data).toEqual({ ok: true }); + expectRequest(0, "DELETE", "/monitor/mon-123"); + }); + + test("pause success", async () => { + const body = { + cronId: "mon-123", + scheduleId: "sched-456", + interval: "0 * * * *", + status: "paused", + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const res = await sdk.monitor.pause(API_KEY, "mon-123"); + + expect(res.status).toBe("success"); + expect(res.data).toEqual(body); + expectRequest(0, "POST", "/monitor/mon-123/pause"); + }); + + test("resume success", async () => { + const body = { + cronId: "mon-123", + scheduleId: "sched-456", + interval: "0 * * * *", + status: "active", + }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await scrapegraphai.checkHealth(API_KEY); + const res = await sdk.monitor.resume(API_KEY, "mon-123"); expect(res.status).toBe("success"); expect(res.data).toEqual(body); - const [url, init] = fetchSpy.mock.calls[0] as [string, RequestInit]; - expect(url).toBe("https://api.scrapegraphai.com/healthz"); - expect(init.method).toBe("GET"); + expectRequest(0, "POST", "/monitor/mon-123/resume"); }); }); From 8f44af9d09c0f7cbca22785c3c52df4c7d7a30bc Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:33:52 +0200 Subject: [PATCH 06/28] test: add comprehensive tests for formats, fetchConfig, and document types - Add unit tests for all scrape formats (markdown, html, json, screenshot, summary, branding, links, images) - Add tests for fetchConfig options (mode, stealth, timeout, headers, cookies, country, scrolls) - Add tests for PDF/DOCX/image document scraping with OCR - Add extract tests for URL, HTML, and markdown inputs with schema - Add search tests with filters (location, timeRange, numResults) - Add crawl/monitor tests with full config options - Fix types to use z.input for request types (allows omitting fields with defaults) - Remove obsolete v1 integration_test.ts Co-Authored-By: Claude Opus 4.5 --- integration_test.ts | 130 ------- src/types.ts | 22 +- tests/integration.test.ts | 34 +- tests/scrapegraphai.test.ts | 695 ++++++++++++++++++++++++++++++++++++ 4 files changed, 739 insertions(+), 142 deletions(-) delete mode 100644 integration_test.ts diff --git a/integration_test.ts b/integration_test.ts deleted file mode 100644 index 10482eb..0000000 --- a/integration_test.ts +++ /dev/null @@ -1,130 +0,0 @@ -import { - type CreditsResponse, - type HealthResponse, - type MarkdownifyResponse, - type ScrapeResponse, - type SearchScraperResponse, - type SitemapResponse, - type SmartScraperResponse, - checkHealth, - getCredits, - markdownify, - scrape, - searchScraper, - sitemap, - smartScraper, -} from "./src/index.js"; - -const maybeKey = process.env.SGAI_API_KEY; -if (!maybeKey) { - console.error("Set SGAI_API_KEY env var"); - process.exit(1); -} -const apiKey: string = maybeKey; - -function assert(condition: boolean, msg: string) { - if (!condition) { - console.error(`FAIL: ${msg}`); - process.exit(1); - } -} - -function logResult(name: string, data: unknown) { - console.log(`\n=== ${name} ===`); - console.log(JSON.stringify(data, null, 2)); -} - -async function testHealth() { - const res = await checkHealth(apiKey); - logResult("checkHealth", res); - assert(res.status === "success", "health status should be success"); - const d = res.data as HealthResponse; - assert(typeof d.status === "string", "health.status should be string"); -} - -async function testCredits() { - const res = await getCredits(apiKey); - logResult("getCredits", res); - assert(res.status === "success", "credits status should be success"); - const d = res.data as CreditsResponse; - assert(typeof d.remaining_credits === "number", "remaining_credits should be number"); - assert(typeof d.total_credits_used === "number", "total_credits_used should be number"); -} - -async function testSmartScraper() { - const res = await smartScraper(apiKey, { - user_prompt: "Extract the page title and description", - website_url: "https://example.com", - }); - logResult("smartScraper", res); - assert(res.status === "success", "smartScraper status should be success"); - const d = res.data as SmartScraperResponse; - assert(typeof d.request_id === "string", "request_id should be string"); - assert(typeof d.status === "string", "status should be string"); - assert(typeof d.website_url === "string", "website_url should be string"); - assert(typeof d.user_prompt === "string", "user_prompt should be string"); - assert(d.result !== undefined, "result should exist"); -} - -async function testSearchScraper() { - const res = await searchScraper(apiKey, { - user_prompt: "What is the capital of France?", - }); - logResult("searchScraper", res); - assert(res.status === "success", "searchScraper status should be success"); - const d = res.data as SearchScraperResponse; - assert(typeof d.request_id === "string", "request_id should be string"); - assert(typeof d.user_prompt === "string", "user_prompt should be string"); - assert(Array.isArray(d.reference_urls), "reference_urls should be array"); - assert( - d.result !== undefined || d.markdown_content !== undefined, - "result or markdown_content should exist", - ); -} - -async function testMarkdownify() { - const res = await markdownify(apiKey, { - website_url: "https://example.com", - }); - logResult("markdownify", res); - assert(res.status === "success", "markdownify status should be success"); - const d = res.data as MarkdownifyResponse; - assert(typeof d.request_id === "string", "request_id should be string"); - assert(typeof d.website_url === "string", "website_url should be string"); - assert(typeof d.result === "string" || d.result === null, "result should be string or null"); -} - -async function testScrape() { - const res = await scrape(apiKey, { - website_url: "https://example.com", - }); - logResult("scrape", res); - assert(res.status === "success", "scrape status should be success"); - const d = res.data as ScrapeResponse; - assert(typeof d.scrape_request_id === "string", "scrape_request_id should be string"); - assert(typeof d.html === "string", "html should be string"); - assert(typeof d.status === "string", "status should be string"); -} - -async function testSitemap() { - const res = await sitemap(apiKey, { - website_url: "https://scrapegraphai.com", - }); - logResult("sitemap", res); - assert(res.status === "success", "sitemap status should be success"); - const d = res.data as SitemapResponse; - assert(typeof d.request_id === "string", "request_id should be string"); - assert(Array.isArray(d.urls), "urls should be array"); -} - -console.log("Running API battle tests...\n"); - -await testHealth(); -await testCredits(); -await testSmartScraper(); -await testSearchScraper(); -await testMarkdownify(); -await testScrape(); -await testSitemap(); - -console.log("\nAll tests passed."); diff --git a/src/types.ts b/src/types.ts index 57f412b..84d5d87 100644 --- a/src/types.ts +++ b/src/types.ts @@ -14,19 +14,19 @@ import type { apiSearchRequestSchema, } from "./schemas.js"; -export type ApiFetchConfig = z.infer; +export type ApiFetchConfig = z.input; export type ApiFetchContentType = z.infer; export type ApiHtmlMode = z.infer; -export type ApiScrapeFormatEntry = z.infer; - -export type ApiScrapeRequest = z.infer; -export type ApiExtractRequest = z.infer; -export type ApiGenerateSchemaRequest = z.infer; -export type ApiSearchRequest = z.infer; -export type ApiCrawlRequest = z.infer; -export type ApiMonitorCreateInput = z.infer; -export type ApiMonitorUpdateInput = z.infer; -export type ApiHistoryFilter = z.infer; +export type ApiScrapeFormatEntry = z.input; + +export type ApiScrapeRequest = z.input; +export type ApiExtractRequest = z.input; +export type ApiGenerateSchemaRequest = z.input; +export type ApiSearchRequest = z.input; +export type ApiCrawlRequest = z.input; +export type ApiMonitorCreateInput = z.input; +export type ApiMonitorUpdateInput = z.input; +export type ApiHistoryFilter = z.input; export type ApiScrapeFormat = | "markdown" diff --git a/tests/integration.test.ts b/tests/integration.test.ts index 3ee4091..9ecef2e 100644 --- a/tests/integration.test.ts +++ b/tests/integration.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test } from "bun:test"; -import { crawl, extract, getCredits, getHistory, monitor, scrape, search } from "../src/index.js"; +import { crawl, extract, getCredits, getHistory, scrape, search } from "../src/index.js"; const API_KEY = process.env.SGAI_API_KEY || "sgai-669918e5-55be-4752-a684-f6da788d1384"; @@ -22,6 +22,38 @@ describe("integration", () => { expect(res.data?.results.markdown).toBeDefined(); }); + test("scrape with multiple formats", async () => { + const res = await scrape(API_KEY, { + url: "https://example.com", + formats: [{ type: "markdown", mode: "reader" }, { type: "links" }, { type: "images" }], + }); + console.log("scrape multi:", res.status, res.error); + expect(res.status).toBe("success"); + expect(res.data?.results.markdown).toBeDefined(); + expect(res.data?.results.links).toBeDefined(); + }); + + test("scrape PDF document", async () => { + const res = await scrape(API_KEY, { + url: "https://pdfobject.com/pdf/sample.pdf", + contentType: "application/pdf", + formats: [{ type: "markdown" }], + }); + console.log("scrape PDF:", res.status, res.error); + expect(res.status).toBe("success"); + expect(res.data?.metadata.contentType).toBe("application/pdf"); + }); + + test("scrape with fetchConfig", async () => { + const res = await scrape(API_KEY, { + url: "https://example.com", + fetchConfig: { mode: "fast", timeout: 15000 }, + formats: [{ type: "markdown" }], + }); + console.log("scrape fetchConfig:", res.status, res.error); + expect(res.status).toBe("success"); + }); + test("extract", async () => { const res = await extract(API_KEY, { url: "https://example.com", diff --git a/tests/scrapegraphai.test.ts b/tests/scrapegraphai.test.ts index 6a7591b..b51569b 100644 --- a/tests/scrapegraphai.test.ts +++ b/tests/scrapegraphai.test.ts @@ -55,6 +55,339 @@ describe("scrape", () => { expectRequest(0, "POST", "/scrape", params); }); + test("with fetchConfig - js mode and stealth", async () => { + const body = { + results: { markdown: { data: ["# Hello"] } }, + metadata: { contentType: "text/html", provider: "playwright" }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const paramsWithConfig = { + url: "https://example.com", + fetchConfig: { + mode: "js" as const, + stealth: true, + timeout: 45000, + wait: 2000, + scrolls: 3, + }, + formats: [{ type: "markdown" as const }], + }; + + const res = await sdk.scrape(API_KEY, paramsWithConfig); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/scrape", paramsWithConfig); + }); + + test("with fetchConfig - headers and cookies", async () => { + const body = { + results: { html: { data: [""] } }, + metadata: { contentType: "text/html" }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const paramsWithConfig = { + url: "https://example.com", + fetchConfig: { + mode: "fast" as const, + headers: { "X-Custom-Header": "test-value", Authorization: "Bearer token123" }, + cookies: { session: "abc123", tracking: "xyz789" }, + }, + formats: [{ type: "html" as const }], + }; + + const res = await sdk.scrape(API_KEY, paramsWithConfig); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/scrape", paramsWithConfig); + }); + + test("with fetchConfig - country geo targeting", async () => { + const body = { + results: { markdown: { data: ["# Localized content"] } }, + metadata: { contentType: "text/html" }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const paramsWithConfig = { + url: "https://example.com", + fetchConfig: { country: "de" }, + formats: [{ type: "markdown" as const }], + }; + + const res = await sdk.scrape(API_KEY, paramsWithConfig); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/scrape", paramsWithConfig); + }); + + test("multiple formats - markdown, html, links, images", async () => { + const body = { + results: { + markdown: { data: ["# Title"] }, + html: { data: ["

Title

"] }, + links: { data: ["https://example.com/page1"], metadata: { count: 1 } }, + images: { data: ["https://example.com/image.png"], metadata: { count: 1 } }, + }, + metadata: { contentType: "text/html" }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const multiFormatParams = { + url: "https://example.com", + formats: [ + { type: "markdown" as const, mode: "reader" as const }, + { type: "html" as const, mode: "prune" as const }, + { type: "links" as const }, + { type: "images" as const }, + ], + }; + + const res = await sdk.scrape(API_KEY, multiFormatParams); + + expect(res.status).toBe("success"); + expect(res.data?.results.markdown).toBeDefined(); + expect(res.data?.results.html).toBeDefined(); + expect(res.data?.results.links).toBeDefined(); + expect(res.data?.results.images).toBeDefined(); + expectRequest(0, "POST", "/scrape", multiFormatParams); + }); + + test("screenshot format with options", async () => { + const body = { + results: { + screenshot: { + data: { url: "https://storage.example.com/shot.png", width: 1920, height: 1080 }, + metadata: { contentType: "image/png" }, + }, + }, + metadata: { contentType: "text/html" }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const screenshotParams = { + url: "https://example.com", + formats: [ + { + type: "screenshot" as const, + fullPage: true, + width: 1920, + height: 1080, + quality: 95, + }, + ], + }; + + const res = await sdk.scrape(API_KEY, screenshotParams); + + expect(res.status).toBe("success"); + expect(res.data?.results.screenshot?.data.url).toBeDefined(); + expectRequest(0, "POST", "/scrape", screenshotParams); + }); + + test("json format with prompt and schema", async () => { + const body = { + results: { + json: { + data: { title: "Example", price: 99.99 }, + metadata: { chunker: { chunks: [{ size: 500 }] } }, + }, + }, + metadata: { contentType: "text/html" }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const jsonParams = { + url: "https://example.com/product", + formats: [ + { + type: "json" as const, + prompt: "Extract product title and price", + schema: { + type: "object", + properties: { + title: { type: "string" }, + price: { type: "number" }, + }, + }, + }, + ], + }; + + const res = await sdk.scrape(API_KEY, jsonParams); + + expect(res.status).toBe("success"); + expect(res.data?.results.json?.data).toEqual({ title: "Example", price: 99.99 }); + expectRequest(0, "POST", "/scrape", jsonParams); + }); + + test("summary format", async () => { + const body = { + results: { + summary: { + data: "This is a summary of the page content.", + metadata: { chunker: { chunks: [{ size: 1000 }] } }, + }, + }, + metadata: { contentType: "text/html" }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const summaryParams = { + url: "https://example.com/article", + formats: [{ type: "summary" as const }], + }; + + const res = await sdk.scrape(API_KEY, summaryParams); + + expect(res.status).toBe("success"); + expect(res.data?.results.summary?.data).toBe("This is a summary of the page content."); + expectRequest(0, "POST", "/scrape", summaryParams); + }); + + test("branding format", async () => { + const body = { + results: { + branding: { + data: { + colorScheme: "light", + colors: { + primary: "#0066cc", + accent: "#ff6600", + background: "#ffffff", + textPrimary: "#333333", + link: "#0066cc", + }, + typography: { + primary: { family: "Inter", fallback: "sans-serif" }, + heading: { family: "Inter", fallback: "sans-serif" }, + mono: { family: "Fira Code", fallback: "monospace" }, + sizes: { h1: "2.5rem", h2: "2rem", body: "1rem" }, + }, + images: { logo: "", favicon: "", ogImage: "" }, + spacing: { baseUnit: 8, borderRadius: "4px" }, + frameworkHints: ["react"], + personality: { tone: "professional", energy: "medium", targetAudience: "developers" }, + confidence: 0.85, + }, + metadata: { + branding: { + title: "Example", + description: "Example site", + favicon: "", + language: "en", + themeColor: "#0066cc", + ogTitle: "Example", + ogDescription: "Example site", + ogImage: "", + ogUrl: "https://example.com", + }, + }, + }, + }, + metadata: { contentType: "text/html" }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const brandingParams = { + url: "https://example.com", + formats: [{ type: "branding" as const }], + }; + + const res = await sdk.scrape(API_KEY, brandingParams); + + expect(res.status).toBe("success"); + expect(res.data?.results.branding?.data.colorScheme).toBe("light"); + expectRequest(0, "POST", "/scrape", brandingParams); + }); + + test("PDF document scraping", async () => { + const body = { + results: { + markdown: { data: ["# PDF Document\n\nThis is the content extracted from the PDF."] }, + }, + metadata: { + contentType: "application/pdf", + ocr: { + model: "gpt-4o", + pagesProcessed: 2, + pages: [ + { + index: 0, + images: [], + tables: [], + hyperlinks: [], + dimensions: { dpi: 72, height: 792, width: 612 }, + }, + { + index: 1, + images: [], + tables: [], + hyperlinks: [], + dimensions: { dpi: 72, height: 792, width: 612 }, + }, + ], + }, + }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const pdfParams = { + url: "https://pdfobject.com/pdf/sample.pdf", + contentType: "application/pdf" as const, + formats: [{ type: "markdown" as const }], + }; + + const res = await sdk.scrape(API_KEY, pdfParams); + + expect(res.status).toBe("success"); + expect(res.data?.metadata.contentType).toBe("application/pdf"); + expect(res.data?.metadata.ocr?.pagesProcessed).toBe(2); + expectRequest(0, "POST", "/scrape", pdfParams); + }); + + test("DOCX document scraping", async () => { + const body = { + results: { markdown: { data: ["# Word Document\n\nContent from DOCX file."] } }, + metadata: { + contentType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const docxParams = { + url: "https://example.com/document.docx", + contentType: + "application/vnd.openxmlformats-officedocument.wordprocessingml.document" as const, + formats: [{ type: "markdown" as const }], + }; + + const res = await sdk.scrape(API_KEY, docxParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/scrape", docxParams); + }); + + test("image scraping with OCR", async () => { + const body = { + results: { markdown: { data: ["Text extracted from image via OCR"] } }, + metadata: { contentType: "image/png" }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const imageParams = { + url: "https://example.com/screenshot.png", + contentType: "image/png" as const, + formats: [{ type: "markdown" as const }], + }; + + const res = await sdk.scrape(API_KEY, imageParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/scrape", imageParams); + }); + test("HTTP 401", async () => { fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce( json({ detail: "Invalid key" }, 401), @@ -134,6 +467,127 @@ describe("extract", () => { expect(res.data).toEqual(body); expectRequest(0, "POST", "/extract", params); }); + + test("with HTML input instead of URL", async () => { + const body = { + raw: null, + json: { title: "Test Page" }, + usage: { promptTokens: 50, completionTokens: 20 }, + metadata: { chunker: { chunks: [{ size: 200 }] } }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const htmlParams = { + html: "Test Page

Hello

", + prompt: "Extract the page title", + }; + + const res = await sdk.extract(API_KEY, htmlParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/extract", htmlParams); + }); + + test("with markdown input instead of URL", async () => { + const body = { + raw: null, + json: { headings: ["Introduction", "Methods"] }, + usage: { promptTokens: 30, completionTokens: 15 }, + metadata: { chunker: { chunks: [{ size: 100 }] } }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const mdParams = { + markdown: "# Introduction\n\nSome content.\n\n# Methods\n\nMore content.", + prompt: "Extract all headings", + }; + + const res = await sdk.extract(API_KEY, mdParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/extract", mdParams); + }); + + test("with schema for structured output", async () => { + const body = { + raw: null, + json: { products: [{ name: "Widget", price: 29.99, inStock: true }] }, + usage: { promptTokens: 150, completionTokens: 80 }, + metadata: { chunker: { chunks: [{ size: 500 }] } }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const schemaParams = { + url: "https://example.com/products", + prompt: "Extract all products with their names, prices, and availability", + schema: { + type: "object", + properties: { + products: { + type: "array", + items: { + type: "object", + properties: { + name: { type: "string" }, + price: { type: "number" }, + inStock: { type: "boolean" }, + }, + }, + }, + }, + }, + }; + + const res = await sdk.extract(API_KEY, schemaParams); + + expect(res.status).toBe("success"); + expect(res.data?.json?.products).toHaveLength(1); + expectRequest(0, "POST", "/extract", schemaParams); + }); + + test("with fetchConfig and contentType for PDF", async () => { + const body = { + raw: "Raw text from PDF", + json: { sections: ["Abstract", "Introduction", "Conclusion"] }, + usage: { promptTokens: 200, completionTokens: 50 }, + metadata: { chunker: { chunks: [{ size: 2000 }] }, fetch: { provider: "playwright" } }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const pdfParams = { + url: "https://pdfobject.com/pdf/sample.pdf", + contentType: "application/pdf" as const, + prompt: "List all section headings in this document", + fetchConfig: { timeout: 60000 }, + }; + + const res = await sdk.extract(API_KEY, pdfParams); + + expect(res.status).toBe("success"); + expect(res.data?.raw).toBe("Raw text from PDF"); + expectRequest(0, "POST", "/extract", pdfParams); + }); + + test("with html mode options", async () => { + const body = { + raw: null, + json: { mainContent: "Article text without boilerplate" }, + usage: { promptTokens: 100, completionTokens: 30 }, + metadata: { chunker: { chunks: [{ size: 800 }] } }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const modeParams = { + url: "https://example.com/article", + prompt: "Extract the main article content", + mode: "reader" as const, + }; + + const res = await sdk.extract(API_KEY, modeParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/extract", modeParams); + }); }); describe("search", () => { @@ -152,6 +606,113 @@ describe("search", () => { expect(res.data).toEqual(body); expectRequest(0, "POST", "/search", params); }); + + test("with numResults and format options", async () => { + const body = { + results: [ + { url: "https://example1.com", title: "Result 1", content: "

HTML content 1

" }, + { url: "https://example2.com", title: "Result 2", content: "

HTML content 2

" }, + { url: "https://example3.com", title: "Result 3", content: "

HTML content 3

" }, + { url: "https://example4.com", title: "Result 4", content: "

HTML content 4

" }, + { url: "https://example5.com", title: "Result 5", content: "

HTML content 5

" }, + ], + metadata: { search: { provider: "google" }, pages: { requested: 5, scraped: 5 } }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const searchParams = { + query: "typescript best practices", + numResults: 5, + format: "html" as const, + }; + + const res = await sdk.search(API_KEY, searchParams); + + expect(res.status).toBe("success"); + expect(res.data?.results).toHaveLength(5); + expectRequest(0, "POST", "/search", searchParams); + }); + + test("with prompt and schema for structured extraction", async () => { + const body = { + results: [{ url: "https://example.com", title: "Product", content: "Widget $29.99" }], + json: { products: [{ name: "Widget", price: 29.99 }] }, + usage: { promptTokens: 100, completionTokens: 30 }, + metadata: { + search: {}, + pages: { requested: 3, scraped: 3 }, + chunker: { chunks: [{ size: 500 }] }, + }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const searchParams = { + query: "buy widgets online", + prompt: "Extract product names and prices from search results", + schema: { + type: "object", + properties: { + products: { + type: "array", + items: { + type: "object", + properties: { + name: { type: "string" }, + price: { type: "number" }, + }, + }, + }, + }, + }, + }; + + const res = await sdk.search(API_KEY, searchParams); + + expect(res.status).toBe("success"); + expect(res.data?.json).toBeDefined(); + expectRequest(0, "POST", "/search", searchParams); + }); + + test("with location and time range filters", async () => { + const body = { + results: [ + { url: "https://news.example.com", title: "Breaking News", content: "Recent event" }, + ], + metadata: { search: {}, pages: { requested: 3, scraped: 3 } }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const searchParams = { + query: "local news", + locationGeoCode: "us", + timeRange: "past_24_hours" as const, + }; + + const res = await sdk.search(API_KEY, searchParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/search", searchParams); + }); + + test("with fetchConfig and html mode", async () => { + const body = { + results: [{ url: "https://example.com", title: "Test", content: "# Clean content" }], + metadata: { search: {}, pages: { requested: 2, scraped: 2 } }, + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const searchParams = { + query: "test query", + numResults: 2, + mode: "prune" as const, + fetchConfig: { mode: "js" as const, timeout: 45000 }, + }; + + const res = await sdk.search(API_KEY, searchParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/search", searchParams); + }); }); describe("generateSchema", () => { @@ -275,6 +836,83 @@ describe("crawl", () => { expectRequest(0, "POST", "/crawl", params); }); + test("start with full config - formats and limits", async () => { + const body = { + id: "crawl-456", + status: "running", + total: 100, + finished: 0, + pages: [], + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const fullParams = { + url: "https://example.com", + formats: [ + { type: "markdown" as const, mode: "reader" as const }, + { type: "screenshot" as const, fullPage: false, width: 1280, height: 720, quality: 80 }, + ], + maxDepth: 3, + maxPages: 100, + maxLinksPerPage: 20, + }; + + const res = await sdk.crawl.start(API_KEY, fullParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/crawl", fullParams); + }); + + test("start with include/exclude patterns", async () => { + const body = { + id: "crawl-789", + status: "running", + total: 30, + finished: 0, + pages: [], + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const patternParams = { + url: "https://example.com", + includePatterns: ["/blog/*", "/docs/*"], + excludePatterns: ["/admin/*", "*.pdf"], + allowExternal: false, + }; + + const res = await sdk.crawl.start(API_KEY, patternParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/crawl", patternParams); + }); + + test("start with fetchConfig and contentTypes", async () => { + const body = { + id: "crawl-abc", + status: "running", + total: 50, + finished: 0, + pages: [], + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const configParams = { + url: "https://example.com", + contentTypes: ["text/html" as const, "application/pdf" as const], + fetchConfig: { + mode: "js" as const, + stealth: true, + timeout: 45000, + wait: 1000, + }, + }; + + const res = await sdk.crawl.start(API_KEY, configParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/crawl", configParams); + }); + test("get success", async () => { const body = { id: "crawl-123", @@ -345,6 +983,63 @@ describe("monitor", () => { expectRequest(0, "POST", "/monitor", createParams); }); + test("create with multiple formats and webhook", async () => { + const fullParams = { + url: "https://example.com/prices", + name: "Price Monitor", + interval: "0 */6 * * *", + formats: [ + { type: "markdown" as const, mode: "reader" as const }, + { type: "json" as const, prompt: "Extract all product prices", mode: "normal" as const }, + { type: "screenshot" as const, fullPage: true, width: 1440, height: 900, quality: 90 }, + ], + webhookUrl: "https://hooks.example.com/notify", + }; + const body = { + cronId: "mon-456", + scheduleId: "sched-789", + interval: "0 */6 * * *", + status: "active", + config: fullParams, + createdAt: "2024-01-01T00:00:00Z", + updatedAt: "2024-01-01T00:00:00Z", + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const res = await sdk.monitor.create(API_KEY, fullParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/monitor", fullParams); + }); + + test("create with fetchConfig", async () => { + const configParams = { + url: "https://spa-example.com", + interval: "0 0 * * *", + fetchConfig: { + mode: "js" as const, + stealth: true, + wait: 3000, + scrolls: 5, + }, + }; + const body = { + cronId: "mon-789", + scheduleId: "sched-abc", + interval: "0 0 * * *", + status: "active", + config: configParams, + createdAt: "2024-01-01T00:00:00Z", + updatedAt: "2024-01-01T00:00:00Z", + }; + fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); + + const res = await sdk.monitor.create(API_KEY, configParams); + + expect(res.status).toBe("success"); + expectRequest(0, "POST", "/monitor", configParams); + }); + test("list success", async () => { const body = [ { From 13578aaa26afc3942703f98b06c49a9716c2c6fd Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:37:43 +0200 Subject: [PATCH 07/28] refactor: rename getHistory/getHistoryEntry to history.list/history.get Follow namespace pattern consistent with crawl.* and monitor.* Co-Authored-By: Claude Opus 4.5 --- CLAUDE.md | 15 +++++++++++ src/index.ts | 3 +-- src/scrapegraphai.ts | 52 +++++++++++++++++-------------------- tests/integration.test.ts | 8 +++--- tests/scrapegraphai.test.ts | 16 +++++------- 5 files changed, 51 insertions(+), 43 deletions(-) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..db61fe8 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,15 @@ +# Claude Code Instructions + +## Before completing any task + +Always run these commands before committing or saying a task is done: + +```bash +bun run format +bun run lint +bunx tsc --noEmit +bun run build +bun test +``` + +No exceptions. diff --git a/src/index.ts b/src/index.ts index de300ea..e557c1a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,8 +5,7 @@ export { generateSchema, getCredits, checkHealth, - getHistory, - getHistoryEntry, + history, crawl, monitor, } from "./scrapegraphai.js"; diff --git a/src/scrapegraphai.ts b/src/scrapegraphai.ts index 99dac94..e10089b 100644 --- a/src/scrapegraphai.ts +++ b/src/scrapegraphai.ts @@ -186,35 +186,31 @@ export async function checkHealth(apiKey: string): Promise> { - try { - const qs = new URLSearchParams(); - if (params?.page) qs.set("page", String(params.page)); - if (params?.limit) qs.set("limit", String(params.limit)); - if (params?.service) qs.set("service", params.service); - const query = qs.toString(); - const path = query ? `/history?${query}` : "/history"; - const { data, elapsedMs } = await request("GET", path, apiKey); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} +export const history = { + async list(apiKey: string, params?: ApiHistoryFilter): Promise> { + try { + const qs = new URLSearchParams(); + if (params?.page) qs.set("page", String(params.page)); + if (params?.limit) qs.set("limit", String(params.limit)); + if (params?.service) qs.set("service", params.service); + const query = qs.toString(); + const path = query ? `/history?${query}` : "/history"; + const { data, elapsedMs } = await request("GET", path, apiKey); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, -export async function getHistoryEntry( - apiKey: string, - id: string, -): Promise> { - try { - const { data, elapsedMs } = await request("GET", `/history/${id}`, apiKey); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} + async get(apiKey: string, id: string): Promise> { + try { + const { data, elapsedMs } = await request("GET", `/history/${id}`, apiKey); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, +}; export const crawl = { async start(apiKey: string, params: ApiCrawlRequest): Promise> { diff --git a/tests/integration.test.ts b/tests/integration.test.ts index 9ecef2e..405ace5 100644 --- a/tests/integration.test.ts +++ b/tests/integration.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test } from "bun:test"; -import { crawl, extract, getCredits, getHistory, scrape, search } from "../src/index.js"; +import { crawl, extract, getCredits, history, scrape, search } from "../src/index.js"; const API_KEY = process.env.SGAI_API_KEY || "sgai-669918e5-55be-4752-a684-f6da788d1384"; @@ -73,9 +73,9 @@ describe("integration", () => { expect(res.data?.results.length).toBeGreaterThan(0); }); - test("getHistory", async () => { - const res = await getHistory(API_KEY, { limit: 5 }); - console.log("getHistory:", res.status, res.data?.pagination); + test("history.list", async () => { + const res = await history.list(API_KEY, { limit: 5 }); + console.log("history.list:", res.status, res.data?.pagination); expect(res.status).toBe("success"); }); diff --git a/tests/scrapegraphai.test.ts b/tests/scrapegraphai.test.ts index b51569b..6aa2372 100644 --- a/tests/scrapegraphai.test.ts +++ b/tests/scrapegraphai.test.ts @@ -765,29 +765,29 @@ describe("checkHealth", () => { }); }); -describe("getHistory", () => { - test("success without params", async () => { +describe("history", () => { + test("list success without params", async () => { const body = { data: [], pagination: { page: 1, limit: 20, total: 0 }, }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await sdk.getHistory(API_KEY); + const res = await sdk.history.list(API_KEY); expect(res.status).toBe("success"); expect(res.data).toEqual(body); expectRequest(0, "GET", "/history"); }); - test("success with params", async () => { + test("list success with params", async () => { const body = { data: [], pagination: { page: 2, limit: 10, total: 50 }, }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await sdk.getHistory(API_KEY, { page: 2, limit: 10, service: "scrape" }); + const res = await sdk.history.list(API_KEY, { page: 2, limit: 10, service: "scrape" }); expect(res.status).toBe("success"); const [url] = fetchSpy.mock.calls[0] as [string, RequestInit]; @@ -795,10 +795,8 @@ describe("getHistory", () => { expect(url).toContain("limit=10"); expect(url).toContain("service=scrape"); }); -}); -describe("getHistoryEntry", () => { - test("success", async () => { + test("get success", async () => { const body = { id: "abc-123", service: "scrape", @@ -808,7 +806,7 @@ describe("getHistoryEntry", () => { }; fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - const res = await sdk.getHistoryEntry(API_KEY, "abc-123"); + const res = await sdk.history.get(API_KEY, "abc-123"); expect(res.status).toBe("success"); expect(res.data).toEqual(body); From 6844d3362ac9768b6e0283efcde00bb858be4c1b Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:39:06 +0200 Subject: [PATCH 08/28] chore: split unit and integration tests for CI - Rename integration tests to *.spec.ts (excluded from CI) - `bun run test` runs only *.test.ts (unit tests for CI) - `bun run test:integration` runs *.spec.ts (live API tests) Co-Authored-By: Claude Opus 4.5 --- package.json | 4 ++-- tests/{integration.test.ts => integration.spec.ts} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename tests/{integration.test.ts => integration.spec.ts} (100%) diff --git a/package.json b/package.json index 801a314..f5cc9c1 100644 --- a/package.json +++ b/package.json @@ -16,8 +16,8 @@ "build": "tsup", "lint": "biome check .", "format": "biome format . --write", - "test": "bun test tests/", - "test:integration": "bun run integration_test.ts", + "test": "bun test tests/*.test.ts", + "test:integration": "bun test tests/*.spec.ts", "check": "tsc --noEmit && biome check .", "prepublishOnly": "tsup" }, diff --git a/tests/integration.test.ts b/tests/integration.spec.ts similarity index 100% rename from tests/integration.test.ts rename to tests/integration.spec.ts From d71e4d95bb3f7c540c1f25827221ef103e93f306 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:41:26 +0200 Subject: [PATCH 09/28] docs: update examples for v2 API - Remove old v1 examples (smartscraper, markdownify, searchscraper, sitemap, agenticscraper) - Add scrape examples (basic, multi-format, pdf, fetchConfig) - Add extract examples (basic, with-schema) - Add search examples (basic, with-extraction) - Add monitor examples (basic, with-webhook) - Update crawl examples for namespace API - Update schema examples for camelCase fields - Update utilities for v2 response shapes Co-Authored-By: Claude Opus 4.5 --- .../agenticscraper_ai_extraction.ts | 35 ------------- .../agenticscraper/agenticscraper_basic.ts | 22 -------- examples/crawl/crawl_basic.ts | 32 ++++++------ examples/crawl/crawl_markdown.ts | 28 ----------- examples/crawl/crawl_with_formats.ts | 23 +++++++++ examples/crawl/crawl_with_schema.ts | 50 ------------------- examples/extract/extract_basic.ts | 15 ++++++ examples/extract/extract_with_schema.ts | 22 ++++++++ examples/markdownify/markdownify_basic.ts | 13 ----- examples/markdownify/markdownify_stealth.ts | 17 ------- examples/monitor/monitor_basic.ts | 18 +++++++ examples/monitor/monitor_with_webhook.ts | 21 ++++++++ examples/schema/generate_schema_basic.ts | 7 ++- examples/schema/modify_existing_schema.ts | 6 +-- examples/scrape/scrape_basic.ts | 6 +-- examples/scrape/scrape_multi_format.ts | 22 ++++++++ examples/scrape/scrape_pdf.ts | 16 ++++++ examples/scrape/scrape_stealth.ts | 17 ------- examples/scrape/scrape_with_branding.ts | 16 ------ examples/scrape/scrape_with_fetchconfig.ts | 22 ++++++++ examples/search/search_basic.ts | 18 +++++++ examples/search/search_with_extraction.ts | 25 ++++++++++ examples/searchscraper/searchscraper_basic.ts | 16 ------ .../searchscraper/searchscraper_markdown.ts | 19 ------- .../searchscraper_with_schema.ts | 37 -------------- examples/sitemap/sitemap_basic.ts | 16 ------ examples/sitemap/sitemap_with_smartscraper.ts | 30 ----------- examples/smartscraper/smartscraper_basic.ts | 15 ------ examples/smartscraper/smartscraper_cookies.ts | 16 ------ examples/smartscraper/smartscraper_html.ts | 47 ----------------- .../smartscraper_infinite_scroll.ts | 16 ------ .../smartscraper/smartscraper_markdown.ts | 40 --------------- .../smartscraper/smartscraper_pagination.ts | 16 ------ examples/smartscraper/smartscraper_stealth.ts | 19 ------- .../smartscraper/smartscraper_with_schema.ts | 36 ------------- examples/utilities/credits.ts | 8 ++- examples/utilities/history.ts | 19 +++---- 37 files changed, 240 insertions(+), 561 deletions(-) delete mode 100644 examples/agenticscraper/agenticscraper_ai_extraction.ts delete mode 100644 examples/agenticscraper/agenticscraper_basic.ts delete mode 100644 examples/crawl/crawl_markdown.ts create mode 100644 examples/crawl/crawl_with_formats.ts delete mode 100644 examples/crawl/crawl_with_schema.ts create mode 100644 examples/extract/extract_basic.ts create mode 100644 examples/extract/extract_with_schema.ts delete mode 100644 examples/markdownify/markdownify_basic.ts delete mode 100644 examples/markdownify/markdownify_stealth.ts create mode 100644 examples/monitor/monitor_basic.ts create mode 100644 examples/monitor/monitor_with_webhook.ts create mode 100644 examples/scrape/scrape_multi_format.ts create mode 100644 examples/scrape/scrape_pdf.ts delete mode 100644 examples/scrape/scrape_stealth.ts delete mode 100644 examples/scrape/scrape_with_branding.ts create mode 100644 examples/scrape/scrape_with_fetchconfig.ts create mode 100644 examples/search/search_basic.ts create mode 100644 examples/search/search_with_extraction.ts delete mode 100644 examples/searchscraper/searchscraper_basic.ts delete mode 100644 examples/searchscraper/searchscraper_markdown.ts delete mode 100644 examples/searchscraper/searchscraper_with_schema.ts delete mode 100644 examples/sitemap/sitemap_basic.ts delete mode 100644 examples/sitemap/sitemap_with_smartscraper.ts delete mode 100644 examples/smartscraper/smartscraper_basic.ts delete mode 100644 examples/smartscraper/smartscraper_cookies.ts delete mode 100644 examples/smartscraper/smartscraper_html.ts delete mode 100644 examples/smartscraper/smartscraper_infinite_scroll.ts delete mode 100644 examples/smartscraper/smartscraper_markdown.ts delete mode 100644 examples/smartscraper/smartscraper_pagination.ts delete mode 100644 examples/smartscraper/smartscraper_stealth.ts delete mode 100644 examples/smartscraper/smartscraper_with_schema.ts diff --git a/examples/agenticscraper/agenticscraper_ai_extraction.ts b/examples/agenticscraper/agenticscraper_ai_extraction.ts deleted file mode 100644 index db90aa5..0000000 --- a/examples/agenticscraper/agenticscraper_ai_extraction.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { agenticScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const schema = { - type: "object", - properties: { - username: { type: "string" }, - email: { type: "string" }, - available_sections: { type: "array", items: { type: "string" } }, - credits_remaining: { type: "number" }, - }, - required: ["username", "available_sections"], -}; - -const res = await agenticScraper(apiKey, { - url: "https://dashboard.scrapegraphai.com/", - steps: [ - "Type email@gmail.com in email input box", - "Type test-password@123 in password input box", - "Click on login", - "Wait for dashboard to load completely", - ], - use_session: true, - ai_extraction: true, - user_prompt: - "Extract the user's dashboard info: username, email, available sections, and remaining credits", - output_schema: schema, -}); - -if (res.status === "success") { - console.log("Dashboard Info:", JSON.stringify(res.data?.result, null, 2)); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/agenticscraper/agenticscraper_basic.ts b/examples/agenticscraper/agenticscraper_basic.ts deleted file mode 100644 index 04f6ea9..0000000 --- a/examples/agenticscraper/agenticscraper_basic.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { agenticScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await agenticScraper(apiKey, { - url: "https://dashboard.scrapegraphai.com/", - steps: [ - "Type email@gmail.com in email input box", - "Type test-password@123 in password input box", - "Click on login", - ], - use_session: true, - ai_extraction: false, -}); - -if (res.status === "success") { - console.log("Request ID:", res.data?.request_id); - console.log("Status:", res.data?.status); - console.log("Result:", JSON.stringify(res.data?.result, null, 2)); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/crawl/crawl_basic.ts b/examples/crawl/crawl_basic.ts index 5cd34f2..69acb80 100644 --- a/examples/crawl/crawl_basic.ts +++ b/examples/crawl/crawl_basic.ts @@ -2,22 +2,20 @@ import { crawl } from "scrapegraph-js"; const apiKey = process.env.SGAI_API_KEY!; -const res = await crawl( - apiKey, - { - url: "https://scrapegraphai.com", - prompt: "Extract the main content from each page", - max_pages: 5, - depth: 2, - sitemap: true, - }, - (status) => console.log(`Poll: ${status}`), -); +const startRes = await crawl.start(apiKey, { + url: "https://example.com", + maxPages: 5, + maxDepth: 2, +}); -if (res.status === "success") { - console.log("Pages crawled:", res.data?.crawled_urls?.length); - console.log("Result:", JSON.stringify(res.data?.llm_result, null, 2)); - console.log(`Took ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); +if (startRes.status !== "success") { + console.error("Failed to start:", startRes.error); + process.exit(1); } + +console.log("Crawl started:", startRes.data?.id); +console.log("Status:", startRes.data?.status); + +const getRes = await crawl.get(apiKey, startRes.data!.id); +console.log("\nProgress:", getRes.data?.finished, "/", getRes.data?.total); +console.log("Pages:", getRes.data?.pages.map((p) => p.url)); diff --git a/examples/crawl/crawl_markdown.ts b/examples/crawl/crawl_markdown.ts deleted file mode 100644 index e0021ef..0000000 --- a/examples/crawl/crawl_markdown.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { crawl } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -// extraction_mode: false returns raw markdown for each page -const res = await crawl( - apiKey, - { - url: "https://scrapegraphai.com", - extraction_mode: false, - max_pages: 5, - depth: 2, - sitemap: true, - }, - (status) => console.log(`Poll: ${status}`), -); - -if (res.status === "success") { - console.log(`Crawled ${res.data?.pages?.length ?? 0} pages\n`); - for (const page of res.data?.pages ?? []) { - console.log(`--- ${page.url} ---`); - console.log(page.markdown.slice(0, 500)); - console.log("...\n"); - } - console.log(`Took ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/crawl/crawl_with_formats.ts b/examples/crawl/crawl_with_formats.ts new file mode 100644 index 0000000..2265af8 --- /dev/null +++ b/examples/crawl/crawl_with_formats.ts @@ -0,0 +1,23 @@ +import { crawl } from "scrapegraph-js"; + +const apiKey = process.env.SGAI_API_KEY!; + +const res = await crawl.start(apiKey, { + url: "https://example.com", + formats: [ + { type: "markdown", mode: "reader" }, + { type: "screenshot", width: 1280, height: 720 }, + ], + maxPages: 10, + maxDepth: 2, + includePatterns: ["/blog/*", "/docs/*"], + excludePatterns: ["/admin/*"], +}); + +if (res.status === "success") { + console.log("Crawl ID:", res.data?.id); + console.log("Status:", res.data?.status); + console.log("Total pages to crawl:", res.data?.total); +} else { + console.error("Failed:", res.error); +} diff --git a/examples/crawl/crawl_with_schema.ts b/examples/crawl/crawl_with_schema.ts deleted file mode 100644 index f236b2a..0000000 --- a/examples/crawl/crawl_with_schema.ts +++ /dev/null @@ -1,50 +0,0 @@ -import { crawl } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const schema = { - type: "object", - properties: { - company: { - type: "object", - properties: { - name: { type: "string" }, - description: { type: "string" }, - features: { type: "array", items: { type: "string" } }, - }, - required: ["name", "description"], - }, - services: { - type: "array", - items: { - type: "object", - properties: { - service_name: { type: "string" }, - description: { type: "string" }, - }, - required: ["service_name", "description"], - }, - }, - }, - required: ["company", "services"], -}; - -const res = await crawl( - apiKey, - { - url: "https://scrapegraphai.com", - prompt: "Extract company info, services, and features", - schema, - max_pages: 3, - depth: 2, - sitemap: true, - }, - (status) => console.log(`Poll: ${status}`), -); - -if (res.status === "success") { - console.log("Result:", JSON.stringify(res.data?.llm_result, null, 2)); - console.log(`Took ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/extract/extract_basic.ts b/examples/extract/extract_basic.ts new file mode 100644 index 0000000..9d2710b --- /dev/null +++ b/examples/extract/extract_basic.ts @@ -0,0 +1,15 @@ +import { extract } from "scrapegraph-js"; + +const apiKey = process.env.SGAI_API_KEY!; + +const res = await extract(apiKey, { + url: "https://example.com", + prompt: "What is this page about? Extract the main heading and description.", +}); + +if (res.status === "success") { + console.log("Extracted:", JSON.stringify(res.data?.json, null, 2)); + console.log("\nTokens used:", res.data?.usage); +} else { + console.error("Failed:", res.error); +} diff --git a/examples/extract/extract_with_schema.ts b/examples/extract/extract_with_schema.ts new file mode 100644 index 0000000..c274c54 --- /dev/null +++ b/examples/extract/extract_with_schema.ts @@ -0,0 +1,22 @@ +import { extract } from "scrapegraph-js"; + +const apiKey = process.env.SGAI_API_KEY!; + +const res = await extract(apiKey, { + url: "https://example.com", + prompt: "Extract the page title and description", + schema: { + type: "object", + properties: { + title: { type: "string" }, + description: { type: "string" }, + }, + required: ["title"], + }, +}); + +if (res.status === "success") { + console.log("Extracted:", JSON.stringify(res.data?.json, null, 2)); +} else { + console.error("Failed:", res.error); +} diff --git a/examples/markdownify/markdownify_basic.ts b/examples/markdownify/markdownify_basic.ts deleted file mode 100644 index b8bda56..0000000 --- a/examples/markdownify/markdownify_basic.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { markdownify } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await markdownify(apiKey, { - website_url: "https://scrapegraphai.com", -}); - -if (res.status === "success") { - console.log(res.data?.result); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/markdownify/markdownify_stealth.ts b/examples/markdownify/markdownify_stealth.ts deleted file mode 100644 index 056d54d..0000000 --- a/examples/markdownify/markdownify_stealth.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { markdownify } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await markdownify(apiKey, { - website_url: "https://example.com", - stealth: true, - headers: { - "Accept-Language": "en-US,en;q=0.9", - }, -}); - -if (res.status === "success") { - console.log(res.data?.result); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/monitor/monitor_basic.ts b/examples/monitor/monitor_basic.ts new file mode 100644 index 0000000..898feac --- /dev/null +++ b/examples/monitor/monitor_basic.ts @@ -0,0 +1,18 @@ +import { monitor } from "scrapegraph-js"; + +const apiKey = process.env.SGAI_API_KEY!; + +const res = await monitor.create(apiKey, { + url: "https://example.com", + name: "Example Monitor", + interval: "0 * * * *", + formats: [{ type: "markdown" }], +}); + +if (res.status === "success") { + console.log("Monitor created:", res.data?.cronId); + console.log("Status:", res.data?.status); + console.log("Interval:", res.data?.interval); +} else { + console.error("Failed:", res.error); +} diff --git a/examples/monitor/monitor_with_webhook.ts b/examples/monitor/monitor_with_webhook.ts new file mode 100644 index 0000000..b10173d --- /dev/null +++ b/examples/monitor/monitor_with_webhook.ts @@ -0,0 +1,21 @@ +import { monitor } from "scrapegraph-js"; + +const apiKey = process.env.SGAI_API_KEY!; + +const res = await monitor.create(apiKey, { + url: "https://example.com/prices", + name: "Price Monitor", + interval: "0 */6 * * *", + formats: [ + { type: "markdown" }, + { type: "json", prompt: "Extract all product prices" }, + ], + webhookUrl: "https://your-server.com/webhook", +}); + +if (res.status === "success") { + console.log("Monitor created:", res.data?.cronId); + console.log("Will notify:", res.data?.config.webhookUrl); +} else { + console.error("Failed:", res.error); +} diff --git a/examples/schema/generate_schema_basic.ts b/examples/schema/generate_schema_basic.ts index 4efca04..945e55a 100644 --- a/examples/schema/generate_schema_basic.ts +++ b/examples/schema/generate_schema_basic.ts @@ -3,14 +3,13 @@ import { generateSchema } from "scrapegraph-js"; const apiKey = process.env.SGAI_API_KEY!; const res = await generateSchema(apiKey, { - user_prompt: - "Find laptops with specifications like brand, processor, RAM, storage, and price", + prompt: "Find laptops with specifications like brand, processor, RAM, storage, and price", }); if (res.status === "success") { - console.log("Refined prompt:", res.data?.refined_prompt); + console.log("Refined prompt:", res.data?.refinedPrompt); console.log("\nGenerated schema:"); - console.log(JSON.stringify(res.data?.generated_schema, null, 2)); + console.log(JSON.stringify(res.data?.schema, null, 2)); } else { console.error("Failed:", res.error); } diff --git a/examples/schema/modify_existing_schema.ts b/examples/schema/modify_existing_schema.ts index d75e4a7..74fd0b8 100644 --- a/examples/schema/modify_existing_schema.ts +++ b/examples/schema/modify_existing_schema.ts @@ -22,13 +22,13 @@ const existingSchema = { }; const res = await generateSchema(apiKey, { - user_prompt: "Add brand, category, and rating fields to the existing product schema", - existing_schema: existingSchema, + prompt: "Add brand, category, and rating fields to the existing product schema", + existingSchema, }); if (res.status === "success") { console.log("Modified schema:"); - console.log(JSON.stringify(res.data?.generated_schema, null, 2)); + console.log(JSON.stringify(res.data?.schema, null, 2)); } else { console.error("Failed:", res.error); } diff --git a/examples/scrape/scrape_basic.ts b/examples/scrape/scrape_basic.ts index 7531f95..7bf1c42 100644 --- a/examples/scrape/scrape_basic.ts +++ b/examples/scrape/scrape_basic.ts @@ -3,12 +3,12 @@ import { scrape } from "scrapegraph-js"; const apiKey = process.env.SGAI_API_KEY!; const res = await scrape(apiKey, { - website_url: "https://example.com", + url: "https://example.com", + formats: [{ type: "markdown" }], }); if (res.status === "success") { - console.log(`HTML length: ${res.data?.html.length} chars`); - console.log("Preview:", res.data?.html.slice(0, 500)); + console.log("Markdown:", res.data?.results.markdown?.data); console.log(`\nTook ${res.elapsedMs}ms`); } else { console.error("Failed:", res.error); diff --git a/examples/scrape/scrape_multi_format.ts b/examples/scrape/scrape_multi_format.ts new file mode 100644 index 0000000..2ee73ff --- /dev/null +++ b/examples/scrape/scrape_multi_format.ts @@ -0,0 +1,22 @@ +import { scrape } from "scrapegraph-js"; + +const apiKey = process.env.SGAI_API_KEY!; + +const res = await scrape(apiKey, { + url: "https://example.com", + formats: [ + { type: "markdown", mode: "reader" }, + { type: "links" }, + { type: "images" }, + { type: "screenshot", fullPage: true, width: 1440, height: 900 }, + ], +}); + +if (res.status === "success") { + console.log("Markdown:", res.data?.results.markdown?.data?.slice(0, 200)); + console.log("\nLinks:", res.data?.results.links?.data?.slice(0, 5)); + console.log("\nImages:", res.data?.results.images?.data?.slice(0, 3)); + console.log("\nScreenshot URL:", res.data?.results.screenshot?.data.url); +} else { + console.error("Failed:", res.error); +} diff --git a/examples/scrape/scrape_pdf.ts b/examples/scrape/scrape_pdf.ts new file mode 100644 index 0000000..3f11bc2 --- /dev/null +++ b/examples/scrape/scrape_pdf.ts @@ -0,0 +1,16 @@ +import { scrape } from "scrapegraph-js"; + +const apiKey = process.env.SGAI_API_KEY!; + +const res = await scrape(apiKey, { + url: "https://pdfobject.com/pdf/sample.pdf", + contentType: "application/pdf", + formats: [{ type: "markdown" }], +}); + +if (res.status === "success") { + console.log("PDF Content:", res.data?.results.markdown?.data); + console.log("\nPages processed:", res.data?.metadata.ocr?.pagesProcessed); +} else { + console.error("Failed:", res.error); +} diff --git a/examples/scrape/scrape_stealth.ts b/examples/scrape/scrape_stealth.ts deleted file mode 100644 index 9bbf76e..0000000 --- a/examples/scrape/scrape_stealth.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { scrape } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await scrape(apiKey, { - website_url: "https://example.com", - stealth: true, - country_code: "us", -}); - -if (res.status === "success") { - console.log(`HTML length: ${res.data?.html.length} chars`); - console.log("Preview:", res.data?.html.slice(0, 500)); - console.log(`\nTook ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/scrape/scrape_with_branding.ts b/examples/scrape/scrape_with_branding.ts deleted file mode 100644 index 9eac191..0000000 --- a/examples/scrape/scrape_with_branding.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { scrape } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await scrape(apiKey, { - website_url: "https://example.com", - branding: true, -}); - -if (res.status === "success") { - console.log("Branding:", JSON.stringify(res.data?.branding, null, 2)); - console.log(`HTML length: ${res.data?.html.length} chars`); - console.log(`\nTook ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/scrape/scrape_with_fetchconfig.ts b/examples/scrape/scrape_with_fetchconfig.ts new file mode 100644 index 0000000..efdcfe7 --- /dev/null +++ b/examples/scrape/scrape_with_fetchconfig.ts @@ -0,0 +1,22 @@ +import { scrape } from "scrapegraph-js"; + +const apiKey = process.env.SGAI_API_KEY!; + +const res = await scrape(apiKey, { + url: "https://example.com", + fetchConfig: { + mode: "js", + stealth: true, + timeout: 45000, + wait: 2000, + scrolls: 3, + }, + formats: [{ type: "markdown" }], +}); + +if (res.status === "success") { + console.log("Content:", res.data?.results.markdown?.data); + console.log("\nProvider:", res.data?.metadata.provider); +} else { + console.error("Failed:", res.error); +} diff --git a/examples/search/search_basic.ts b/examples/search/search_basic.ts new file mode 100644 index 0000000..4a0a412 --- /dev/null +++ b/examples/search/search_basic.ts @@ -0,0 +1,18 @@ +import { search } from "scrapegraph-js"; + +const apiKey = process.env.SGAI_API_KEY!; + +const res = await search(apiKey, { + query: "best programming languages 2024", + numResults: 3, +}); + +if (res.status === "success") { + for (const result of res.data?.results ?? []) { + console.log(`\n${result.title}`); + console.log(`URL: ${result.url}`); + console.log(`Content: ${result.content.slice(0, 200)}...`); + } +} else { + console.error("Failed:", res.error); +} diff --git a/examples/search/search_with_extraction.ts b/examples/search/search_with_extraction.ts new file mode 100644 index 0000000..e16e0ba --- /dev/null +++ b/examples/search/search_with_extraction.ts @@ -0,0 +1,25 @@ +import { search } from "scrapegraph-js"; + +const apiKey = process.env.SGAI_API_KEY!; + +const res = await search(apiKey, { + query: "typescript best practices", + numResults: 5, + prompt: "Extract the main tips and recommendations", + schema: { + type: "object", + properties: { + tips: { + type: "array", + items: { type: "string" }, + }, + }, + }, +}); + +if (res.status === "success") { + console.log("Search results:", res.data?.results.length); + console.log("\nExtracted tips:", JSON.stringify(res.data?.json, null, 2)); +} else { + console.error("Failed:", res.error); +} diff --git a/examples/searchscraper/searchscraper_basic.ts b/examples/searchscraper/searchscraper_basic.ts deleted file mode 100644 index 78e56a2..0000000 --- a/examples/searchscraper/searchscraper_basic.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { searchScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await searchScraper(apiKey, { - user_prompt: "What is the latest version of Python and what are its main features?", - num_results: 3, -}); - -if (res.status === "success") { - console.log("Result:", JSON.stringify(res.data?.result, null, 2)); - console.log("\nReference URLs:"); - res.data?.reference_urls.forEach((url, i) => console.log(` ${i + 1}. ${url}`)); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/searchscraper/searchscraper_markdown.ts b/examples/searchscraper/searchscraper_markdown.ts deleted file mode 100644 index 15f6789..0000000 --- a/examples/searchscraper/searchscraper_markdown.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { searchScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -// extraction_mode: false returns raw markdown instead of AI-extracted data -// costs 2 credits per page vs 10 for AI extraction -const res = await searchScraper(apiKey, { - user_prompt: "Latest developments in artificial intelligence", - num_results: 3, - extraction_mode: false, -}); - -if (res.status === "success") { - console.log("Result:", JSON.stringify(res.data?.result, null, 2)); - console.log("\nReference URLs:"); - res.data?.reference_urls.forEach((url, i) => console.log(` ${i + 1}. ${url}`)); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/searchscraper/searchscraper_with_schema.ts b/examples/searchscraper/searchscraper_with_schema.ts deleted file mode 100644 index 085062d..0000000 --- a/examples/searchscraper/searchscraper_with_schema.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { searchScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const schema = { - type: "object", - properties: { - version: { type: "string" }, - release_date: { type: "string" }, - features: { - type: "array", - items: { - type: "object", - properties: { - name: { type: "string" }, - description: { type: "string" }, - }, - required: ["name", "description"], - }, - }, - }, - required: ["version", "features"], -}; - -const res = await searchScraper(apiKey, { - user_prompt: "What is the latest version of Python and its new features?", - num_results: 5, - output_schema: schema, -}); - -if (res.status === "success") { - console.log("Result:", JSON.stringify(res.data?.result, null, 2)); - console.log("\nReference URLs:"); - res.data?.reference_urls.forEach((url, i) => console.log(` ${i + 1}. ${url}`)); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/sitemap/sitemap_basic.ts b/examples/sitemap/sitemap_basic.ts deleted file mode 100644 index a1ffdd4..0000000 --- a/examples/sitemap/sitemap_basic.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { sitemap } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await sitemap(apiKey, { - website_url: "https://scrapegraphai.com", -}); - -if (res.status === "success") { - const urls = res.data?.urls ?? []; - console.log(`Found ${urls.length} URLs:\n`); - urls.slice(0, 20).forEach((url, i) => console.log(` ${i + 1}. ${url}`)); - if (urls.length > 20) console.log(` ... and ${urls.length - 20} more`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/sitemap/sitemap_with_smartscraper.ts b/examples/sitemap/sitemap_with_smartscraper.ts deleted file mode 100644 index 6c4a965..0000000 --- a/examples/sitemap/sitemap_with_smartscraper.ts +++ /dev/null @@ -1,30 +0,0 @@ -import { sitemap, smartScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const sitemapRes = await sitemap(apiKey, { - website_url: "https://scrapegraphai.com", -}); - -if (sitemapRes.status !== "success") { - console.error("Sitemap failed:", sitemapRes.error); - process.exit(1); -} - -const urls = sitemapRes.data?.urls ?? []; -console.log(`Found ${urls.length} URLs, scraping first 3...\n`); - -for (const url of urls.slice(0, 3)) { - console.log(`Scraping: ${url}`); - const res = await smartScraper(apiKey, { - user_prompt: "Extract the page title and main content summary", - website_url: url, - }); - - if (res.status === "success") { - console.log(" Result:", JSON.stringify(res.data?.result, null, 2)); - } else { - console.error(" Failed:", res.error); - } - console.log(); -} diff --git a/examples/smartscraper/smartscraper_basic.ts b/examples/smartscraper/smartscraper_basic.ts deleted file mode 100644 index 90dda7f..0000000 --- a/examples/smartscraper/smartscraper_basic.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { smartScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await smartScraper(apiKey, { - user_prompt: "What does the company do? Extract the main heading and description", - website_url: "https://scrapegraphai.com", -}); - -if (res.status === "success") { - console.log("Result:", JSON.stringify(res.data?.result, null, 2)); - console.log(`Took ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/smartscraper/smartscraper_cookies.ts b/examples/smartscraper/smartscraper_cookies.ts deleted file mode 100644 index 9674fd8..0000000 --- a/examples/smartscraper/smartscraper_cookies.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { smartScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await smartScraper(apiKey, { - user_prompt: "Extract all cookies info", - website_url: "https://httpbin.org/cookies", - cookies: { session_id: "abc123", user_token: "xyz789" }, -}); - -if (res.status === "success") { - console.log("Cookies:", JSON.stringify(res.data?.result, null, 2)); - console.log(`Took ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/smartscraper/smartscraper_html.ts b/examples/smartscraper/smartscraper_html.ts deleted file mode 100644 index b0cfed7..0000000 --- a/examples/smartscraper/smartscraper_html.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { smartScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const html = ` - - - -
-

Laptop Pro 15

-
TechCorp
-
$1,299.99
-
4.5/5
-
In Stock
-

High-performance laptop with 15-inch display, 16GB RAM, and 512GB SSD

-
-
-

Wireless Mouse Elite

-
PeripheralCo
-
$29.99
-
4.8/5
-
In Stock
-

Ergonomic wireless mouse with precision tracking

-
-
-

USB-C Hub Pro

-
ConnectTech
-
$49.99
-
4.3/5
-
Out of Stock
-

7-in-1 USB-C hub with HDMI, USB 3.0, and SD card reader

-
- - -`; - -const res = await smartScraper(apiKey, { - user_prompt: "Extract all products with name, brand, price, rating, and stock status", - website_html: html, -}); - -if (res.status === "success") { - console.log("Products:", JSON.stringify(res.data?.result, null, 2)); - console.log(`Took ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/smartscraper/smartscraper_infinite_scroll.ts b/examples/smartscraper/smartscraper_infinite_scroll.ts deleted file mode 100644 index 3e7e008..0000000 --- a/examples/smartscraper/smartscraper_infinite_scroll.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { smartScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await smartScraper(apiKey, { - user_prompt: "Extract all post titles and authors", - website_url: "https://news.ycombinator.com", - number_of_scrolls: 5, -}); - -if (res.status === "success") { - console.log("Posts:", JSON.stringify(res.data?.result, null, 2)); - console.log(`Took ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/smartscraper/smartscraper_markdown.ts b/examples/smartscraper/smartscraper_markdown.ts deleted file mode 100644 index 1fbacc3..0000000 --- a/examples/smartscraper/smartscraper_markdown.ts +++ /dev/null @@ -1,40 +0,0 @@ -import { smartScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const markdown = ` -# Product Catalog - -## Laptop Pro 15 -- **Brand**: TechCorp -- **Price**: $1,299.99 -- **Rating**: 4.5/5 -- **In Stock**: Yes -- **Description**: High-performance laptop with 15-inch display, 16GB RAM, and 512GB SSD - -## Wireless Mouse Elite -- **Brand**: PeripheralCo -- **Price**: $29.99 -- **Rating**: 4.8/5 -- **In Stock**: Yes -- **Description**: Ergonomic wireless mouse with precision tracking - -## USB-C Hub Pro -- **Brand**: ConnectTech -- **Price**: $49.99 -- **Rating**: 4.3/5 -- **In Stock**: No -- **Description**: 7-in-1 USB-C hub with HDMI, USB 3.0, and SD card reader -`; - -const res = await smartScraper(apiKey, { - user_prompt: "Extract all products with name, brand, price, rating, and stock status", - website_markdown: markdown, -}); - -if (res.status === "success") { - console.log("Products:", JSON.stringify(res.data?.result, null, 2)); - console.log(`Took ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/smartscraper/smartscraper_pagination.ts b/examples/smartscraper/smartscraper_pagination.ts deleted file mode 100644 index 93aa792..0000000 --- a/examples/smartscraper/smartscraper_pagination.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { smartScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await smartScraper(apiKey, { - user_prompt: "Extract all product info including name, price, rating, and image_url", - website_url: "https://www.amazon.in/s?k=tv", - total_pages: 3, -}); - -if (res.status === "success") { - console.log("Products:", JSON.stringify(res.data?.result, null, 2)); - console.log(`Took ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/smartscraper/smartscraper_stealth.ts b/examples/smartscraper/smartscraper_stealth.ts deleted file mode 100644 index 48dd2da..0000000 --- a/examples/smartscraper/smartscraper_stealth.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { smartScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await smartScraper(apiKey, { - user_prompt: "Extract the main content and headings", - website_url: "https://example.com", - stealth: true, - headers: { - "Accept-Language": "en-US,en;q=0.9", - }, -}); - -if (res.status === "success") { - console.log("Result:", JSON.stringify(res.data?.result, null, 2)); - console.log(`Took ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/smartscraper/smartscraper_with_schema.ts b/examples/smartscraper/smartscraper_with_schema.ts deleted file mode 100644 index d9ca09a..0000000 --- a/examples/smartscraper/smartscraper_with_schema.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { smartScraper } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const schema = { - type: "object", - properties: { - products: { - type: "array", - items: { - type: "object", - properties: { - name: { type: "string" }, - price: { type: "number" }, - rating: { type: "string" }, - image_url: { type: "string", format: "uri" }, - }, - required: ["name", "price"], - }, - }, - }, - required: ["products"], -}; - -const res = await smartScraper(apiKey, { - user_prompt: "Extract all product info including name, price, rating, and image_url", - website_url: "https://www.amazon.in/s?k=laptop", - output_schema: schema, -}); - -if (res.status === "success") { - console.log("Products:", JSON.stringify(res.data?.result, null, 2)); - console.log(`Took ${res.elapsedMs}ms`); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/utilities/credits.ts b/examples/utilities/credits.ts index 0815236..99b0249 100644 --- a/examples/utilities/credits.ts +++ b/examples/utilities/credits.ts @@ -5,8 +5,12 @@ const apiKey = process.env.SGAI_API_KEY!; const res = await getCredits(apiKey); if (res.status === "success") { - console.log("Remaining credits:", res.data?.remaining_credits); - console.log("Total credits used:", res.data?.total_credits_used); + console.log("Plan:", res.data?.plan); + console.log("Remaining credits:", res.data?.remaining); + console.log("Used credits:", res.data?.used); + console.log("\nJob limits:"); + console.log(" Crawl:", res.data?.jobs.crawl.used, "/", res.data?.jobs.crawl.limit); + console.log(" Monitor:", res.data?.jobs.monitor.used, "/", res.data?.jobs.monitor.limit); } else { console.error("Failed:", res.error); } diff --git a/examples/utilities/history.ts b/examples/utilities/history.ts index 89244f4..67e4160 100644 --- a/examples/utilities/history.ts +++ b/examples/utilities/history.ts @@ -1,20 +1,17 @@ -import { history, HISTORY_SERVICES } from "scrapegraph-js"; +import { history } from "scrapegraph-js"; const apiKey = process.env.SGAI_API_KEY!; -console.log("Available services:", HISTORY_SERVICES.join(", ")); - -const res = await history(apiKey, { - service: "smartscraper", - page: 1, - page_size: 5, +const res = await history.list(apiKey, { + service: "scrape", + limit: 5, }); if (res.status === "success") { - console.log(`\nTotal requests: ${res.data?.total_count}`); - console.log(`Page ${res.data?.page} of ${Math.ceil((res.data?.total_count ?? 0) / (res.data?.page_size ?? 10))}\n`); - for (const entry of res.data?.requests ?? []) { - console.log(` [${entry.status}] ${entry.request_id}`); + console.log(`Total: ${res.data?.pagination.total}`); + console.log(`Page ${res.data?.pagination.page}\n`); + for (const entry of res.data?.data ?? []) { + console.log(` [${entry.status}] ${entry.service} - ${entry.id}`); } } else { console.error("Failed:", res.error); From e30cc5558cd691d9be840332a15fcdc69889d656 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:44:17 +0200 Subject: [PATCH 10/28] docs: rewrite README for v2 API - Update all API documentation for v2 endpoints - Add examples table with path and description - Add scrape_json_extraction example - Enhance scrape_pdf and scrape_multi_format examples - Update environment variables section Co-Authored-By: Claude Opus 4.5 --- README.md | 265 +++++++++++----------- examples/scrape/scrape_json_extraction.ts | 41 ++++ examples/scrape/scrape_multi_format.ts | 51 ++++- examples/scrape/scrape_pdf.ts | 22 +- src/scrapegraphai.ts | 9 +- 5 files changed, 247 insertions(+), 141 deletions(-) create mode 100644 examples/scrape/scrape_json_extraction.ts diff --git a/README.md b/README.md index 1af72b7..47774a9 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ ScrapeGraph API Banner

-Official TypeScript SDK for the [ScrapeGraph AI API](https://scrapegraphai.com). Zero dependencies. +Official TypeScript SDK for the [ScrapeGraph AI API](https://scrapegraphai.com) v2. ## Install @@ -20,15 +20,15 @@ bun add scrapegraph-js ## Quick Start ```ts -import { smartScraper } from "scrapegraph-js"; +import { scrape } from "scrapegraph-js"; -const result = await smartScraper("your-api-key", { - user_prompt: "Extract the page title and description", - website_url: "https://example.com", +const result = await scrape("your-api-key", { + url: "https://example.com", + formats: [{ type: "markdown" }], }); if (result.status === "success") { - console.log(result.data); + console.log(result.data?.results.markdown?.data); } else { console.error(result.error); } @@ -47,187 +47,187 @@ type ApiResult = { ## API -All functions take `(apiKey, params)` where `params` is a typed object. - -### smartScraper +### scrape -Extract structured data from a webpage using AI. +Scrape a webpage in multiple formats (markdown, html, screenshot, json, etc). ```ts -const res = await smartScraper("key", { - user_prompt: "Extract product names and prices", - website_url: "https://example.com", - output_schema: { /* JSON schema */ }, // optional - number_of_scrolls: 5, // optional, 0-50 - total_pages: 3, // optional, 1-100 - stealth: true, // optional, +4 credits - cookies: { session: "abc" }, // optional - headers: { "Accept-Language": "en" }, // optional - steps: ["Click 'Load More'"], // optional, browser actions - wait_ms: 5000, // optional, default 3000 - country_code: "us", // optional, proxy routing - mock: true, // optional, testing mode +const res = await scrape("key", { + url: "https://example.com", + formats: [ + { type: "markdown", mode: "reader" }, + { type: "screenshot", fullPage: true, width: 1440, height: 900 }, + { type: "json", prompt: "Extract product info" }, + ], + contentType: "text/html", // optional, auto-detected + fetchConfig: { // optional + mode: "js", // "auto" | "fast" | "js" + stealth: true, + timeout: 30000, + wait: 2000, + scrolls: 3, + headers: { "Accept-Language": "en" }, + cookies: { session: "abc" }, + country: "us", + }, }); ``` -### searchScraper +**Formats:** +- `markdown` — Clean markdown (modes: `normal`, `reader`, `prune`) +- `html` — Raw HTML (modes: `normal`, `reader`, `prune`) +- `links` — All links on the page +- `images` — All image URLs +- `summary` — AI-generated summary +- `json` — Structured extraction with prompt/schema +- `branding` — Brand colors, typography, logos +- `screenshot` — Page screenshot (fullPage, width, height, quality) -Search the web and extract structured results. +### extract + +Extract structured data from a URL, HTML, or markdown using AI. ```ts -const res = await searchScraper("key", { - user_prompt: "Latest TypeScript release features", - num_results: 5, // optional, 3-20 - extraction_mode: true, // optional, false for markdown - output_schema: { /* */ }, // optional - stealth: true, // optional, +4 credits - time_range: "past_week", // optional, past_hour|past_24_hours|past_week|past_month|past_year - location_geo_code: "us", // optional, geographic targeting - mock: true, // optional, testing mode +const res = await extract("key", { + url: "https://example.com", + prompt: "Extract product names and prices", + schema: { /* JSON schema */ }, // optional + mode: "reader", // optional + fetchConfig: { /* ... */ }, // optional }); -// res.data.result (extraction mode) or res.data.markdown_content (markdown mode) +// Or pass html/markdown directly instead of url ``` -### markdownify +### search -Convert a webpage to clean markdown. +Search the web and optionally extract structured data. ```ts -const res = await markdownify("key", { - website_url: "https://example.com", - stealth: true, // optional, +4 credits - wait_ms: 5000, // optional, default 3000 - country_code: "us", // optional, proxy routing - mock: true, // optional, testing mode +const res = await search("key", { + query: "best programming languages 2024", + numResults: 5, // 1-20, default 3 + format: "markdown", // "markdown" | "html" + prompt: "Extract key points", // optional, for AI extraction + schema: { /* ... */ }, // optional + timeRange: "past_week", // optional + locationGeoCode: "us", // optional + fetchConfig: { /* ... */ }, // optional }); -// res.data.result is the markdown string ``` -### scrape +### generateSchema -Get raw HTML from a webpage. +Generate a JSON schema from a natural language description. ```ts -const res = await scrape("key", { - website_url: "https://example.com", - stealth: true, // optional, +4 credits - branding: true, // optional, extract brand design - country_code: "us", // optional, proxy routing - wait_ms: 5000, // optional, default 3000 +const res = await generateSchema("key", { + prompt: "Schema for a product with name, price, and rating", + existingSchema: { /* ... */ }, // optional, to modify }); -// res.data.html is the HTML string -// res.data.scrape_request_id is the request identifier ``` ### crawl -Crawl a website and its linked pages. Async — polls until completion. +Crawl a website and its linked pages. ```ts -const res = await crawl( - "key", - { - url: "https://example.com", - prompt: "Extract company info", // required when extraction_mode=true - max_pages: 10, // optional, default 10 - depth: 2, // optional, default 1 - breadth: 5, // optional, max links per depth - schema: { /* JSON schema */ }, // optional - sitemap: true, // optional - stealth: true, // optional, +4 credits - wait_ms: 5000, // optional, default 3000 - batch_size: 3, // optional, default 1 - same_domain_only: true, // optional, default true - cache_website: true, // optional - headers: { "Accept-Language": "en" }, // optional - }, - (status) => console.log(status), // optional poll callback -); -``` - -### agenticScraper +// Start a crawl +const start = await crawl.start("key", { + url: "https://example.com", + formats: [{ type: "markdown" }], + maxPages: 50, + maxDepth: 2, + maxLinksPerPage: 10, + includePatterns: ["/blog/*"], + excludePatterns: ["/admin/*"], + fetchConfig: { /* ... */ }, +}); -Automate browser actions (click, type, navigate) then extract data. +// Check status +const status = await crawl.get("key", start.data.id); -```ts -const res = await agenticScraper("key", { - url: "https://example.com/login", - steps: ["Type user@example.com in email", "Click login button"], // required - user_prompt: "Extract dashboard data", // required when ai_extraction=true - output_schema: { /* */ }, // required when ai_extraction=true - ai_extraction: true, // optional - use_session: true, // optional -}); +// Control +await crawl.stop("key", id); +await crawl.resume("key", id); +await crawl.delete("key", id); ``` -### generateSchema +### monitor -Generate a JSON schema from a natural language description. +Monitor a webpage for changes on a schedule. ```ts -const res = await generateSchema("key", { - user_prompt: "Schema for a product with name, price, and rating", - existing_schema: { /* modify this */ }, // optional +// Create a monitor +const mon = await monitor.create("key", { + url: "https://example.com", + name: "Price Monitor", + interval: "0 * * * *", // cron expression + formats: [{ type: "markdown" }], + webhookUrl: "https://...", // optional + fetchConfig: { /* ... */ }, }); + +// Manage monitors +await monitor.list("key"); +await monitor.get("key", cronId); +await monitor.update("key", cronId, { interval: "0 */6 * * *" }); +await monitor.pause("key", cronId); +await monitor.resume("key", cronId); +await monitor.delete("key", cronId); ``` -### sitemap +### history -Extract all URLs from a website's sitemap. +Fetch request history. ```ts -const res = await sitemap("key", { - website_url: "https://example.com", - headers: { /* */ }, // optional - stealth: true, // optional, +4 credits - mock: true, // optional, testing mode +const list = await history.list("key", { + service: "scrape", // optional filter + page: 1, + limit: 20, }); -// res.data.urls is string[] + +const entry = await history.get("key", "request-id"); ``` ### getCredits / checkHealth ```ts const credits = await getCredits("key"); -// { remaining_credits: 420, total_credits_used: 69 } +// { remaining: 1000, used: 500, plan: "pro", jobs: { crawl: {...}, monitor: {...} } } const health = await checkHealth("key"); -// { status: "healthy" } -``` - -### history - -Fetch request history for any service. - -```ts -const res = await history("key", { - service: "smartscraper", - page: 1, // optional, default 1 - page_size: 10, // optional, default 10 -}); +// { status: "ok", uptime: 12345 } ``` ## Examples -Find complete working examples in the [`examples/`](https://github.com/ScrapeGraphAI/scrapegraph-js/tree/main/examples) directory: - -| Service | Examples | -|---|---| -| [SmartScraper](https://github.com/ScrapeGraphAI/scrapegraph-js/tree/main/examples/smartscraper) | basic, cookies, html input, infinite scroll, markdown input, pagination, stealth, with schema | -| [SearchScraper](https://github.com/ScrapeGraphAI/scrapegraph-js/tree/main/examples/searchscraper) | basic, markdown mode, with schema | -| [Markdownify](https://github.com/ScrapeGraphAI/scrapegraph-js/tree/main/examples/markdownify) | basic, stealth | -| [Scrape](https://github.com/ScrapeGraphAI/scrapegraph-js/tree/main/examples/scrape) | basic, stealth, with branding | -| [Crawl](https://github.com/ScrapeGraphAI/scrapegraph-js/tree/main/examples/crawl) | basic, markdown mode, with schema | -| [Agentic Scraper](https://github.com/ScrapeGraphAI/scrapegraph-js/tree/main/examples/agenticscraper) | basic, AI extraction | -| [Schema Generation](https://github.com/ScrapeGraphAI/scrapegraph-js/tree/main/examples/schema) | basic, modify existing | -| [Sitemap](https://github.com/ScrapeGraphAI/scrapegraph-js/tree/main/examples/sitemap) | basic, with smartscraper | -| [Utilities](https://github.com/ScrapeGraphAI/scrapegraph-js/tree/main/examples/utilities) | credits, health, history | +| Path | Description | +|------|-------------| +| [`scrape/scrape_basic.ts`](examples/scrape/scrape_basic.ts) | Basic markdown scraping | +| [`scrape/scrape_multi_format.ts`](examples/scrape/scrape_multi_format.ts) | Multiple formats (markdown, links, images, screenshot, summary) | +| [`scrape/scrape_json_extraction.ts`](examples/scrape/scrape_json_extraction.ts) | Structured JSON extraction with schema | +| [`scrape/scrape_pdf.ts`](examples/scrape/scrape_pdf.ts) | PDF document parsing with OCR metadata | +| [`scrape/scrape_with_fetchconfig.ts`](examples/scrape/scrape_with_fetchconfig.ts) | JS rendering, stealth mode, scrolling | +| [`extract/extract_basic.ts`](examples/extract/extract_basic.ts) | AI data extraction from URL | +| [`extract/extract_with_schema.ts`](examples/extract/extract_with_schema.ts) | Extraction with JSON schema | +| [`search/search_basic.ts`](examples/search/search_basic.ts) | Web search with results | +| [`search/search_with_extraction.ts`](examples/search/search_with_extraction.ts) | Search + AI extraction | +| [`crawl/crawl_basic.ts`](examples/crawl/crawl_basic.ts) | Start and monitor a crawl | +| [`crawl/crawl_with_formats.ts`](examples/crawl/crawl_with_formats.ts) | Crawl with screenshots and patterns | +| [`monitor/monitor_basic.ts`](examples/monitor/monitor_basic.ts) | Create a page monitor | +| [`monitor/monitor_with_webhook.ts`](examples/monitor/monitor_with_webhook.ts) | Monitor with webhook notifications | +| [`schema/generate_schema_basic.ts`](examples/schema/generate_schema_basic.ts) | Generate JSON schema from prompt | +| [`schema/modify_existing_schema.ts`](examples/schema/modify_existing_schema.ts) | Modify an existing schema | +| [`utilities/credits.ts`](examples/utilities/credits.ts) | Check account credits and limits | +| [`utilities/health.ts`](examples/utilities/health.ts) | API health check | +| [`utilities/history.ts`](examples/utilities/history.ts) | Request history | ## Environment Variables | Variable | Description | Default | -|---|---|---| -| `SGAI_API_URL` | Override API base URL | `https://api.scrapegraphai.com/v1` | +|----------|-------------|---------| +| `SGAI_API_URL` | Override API base URL | `https://api.scrapegraphai.com/v2` | | `SGAI_DEBUG` | Enable debug logging (`"1"`) | off | | `SGAI_TIMEOUT_S` | Request timeout in seconds | `120` | @@ -235,9 +235,10 @@ Find complete working examples in the [`examples/`](https://github.com/ScrapeGra ```bash bun install -bun test # 21 tests -bun run build # tsup → dist/ -bun run check # tsc --noEmit + biome +bun run test # unit tests +bun run test:integration # live API tests (requires SGAI_API_KEY) +bun run build # tsup → dist/ +bun run check # tsc --noEmit + biome ``` ## License diff --git a/examples/scrape/scrape_json_extraction.ts b/examples/scrape/scrape_json_extraction.ts new file mode 100644 index 0000000..7007aa3 --- /dev/null +++ b/examples/scrape/scrape_json_extraction.ts @@ -0,0 +1,41 @@ +import { scrape } from "scrapegraph-js"; + +const apiKey = process.env.SGAI_API_KEY!; + +const res = await scrape(apiKey, { + url: "https://scrapegraphai.com", + formats: [ + { + type: "json", + prompt: "Extract the company name, tagline, and list of features", + schema: { + type: "object", + properties: { + companyName: { type: "string" }, + tagline: { type: "string" }, + features: { + type: "array", + items: { type: "string" }, + }, + }, + required: ["companyName"], + }, + }, + ], +}); + +if (res.status === "success") { + const json = res.data?.results.json; + + console.log("=== JSON Extraction ===\n"); + console.log("Extracted data:"); + console.log(JSON.stringify(json?.data, null, 2)); + + if (json?.metadata?.chunker) { + console.log("\nChunker info:"); + console.log(" Chunks:", json.metadata.chunker.chunks.length); + console.log(" Total size:", json.metadata.chunker.chunks.reduce((a, c) => a + c.size, 0), "chars"); + } +} else { + console.error("Failed:", res.error); +} diff --git a/examples/scrape/scrape_multi_format.ts b/examples/scrape/scrape_multi_format.ts index 2ee73ff..e359c8f 100644 --- a/examples/scrape/scrape_multi_format.ts +++ b/examples/scrape/scrape_multi_format.ts @@ -3,20 +3,59 @@ import { scrape } from "scrapegraph-js"; const apiKey = process.env.SGAI_API_KEY!; const res = await scrape(apiKey, { - url: "https://example.com", + url: "https://scrapegraphai.com", formats: [ { type: "markdown", mode: "reader" }, + { type: "html", mode: "prune" }, { type: "links" }, { type: "images" }, - { type: "screenshot", fullPage: true, width: 1440, height: 900 }, + { type: "summary" }, + { type: "screenshot", fullPage: false, width: 1440, height: 900, quality: 90 }, ], }); if (res.status === "success") { - console.log("Markdown:", res.data?.results.markdown?.data?.slice(0, 200)); - console.log("\nLinks:", res.data?.results.links?.data?.slice(0, 5)); - console.log("\nImages:", res.data?.results.images?.data?.slice(0, 3)); - console.log("\nScreenshot URL:", res.data?.results.screenshot?.data.url); + const results = res.data?.results; + + console.log("=== Scrape Results ===\n"); + console.log("Provider:", res.data?.metadata.provider); + console.log("Content-Type:", res.data?.metadata.contentType); + console.log("Elapsed:", res.elapsedMs, "ms\n"); + + if (results?.markdown) { + console.log("--- Markdown ---"); + console.log("Length:", results.markdown.data?.join("").length, "chars"); + console.log("Preview:", results.markdown.data?.[0]?.slice(0, 200), "...\n"); + } + + if (results?.html) { + console.log("--- HTML ---"); + console.log("Length:", results.html.data?.join("").length, "chars\n"); + } + + if (results?.links) { + console.log("--- Links ---"); + console.log("Count:", results.links.metadata?.count); + console.log("Sample:", results.links.data?.slice(0, 5), "\n"); + } + + if (results?.images) { + console.log("--- Images ---"); + console.log("Count:", results.images.metadata?.count); + console.log("Sample:", results.images.data?.slice(0, 3), "\n"); + } + + if (results?.summary) { + console.log("--- Summary ---"); + console.log(results.summary.data, "\n"); + } + + if (results?.screenshot) { + console.log("--- Screenshot ---"); + console.log("URL:", results.screenshot.data.url); + console.log("Dimensions:", results.screenshot.data.width, "x", results.screenshot.data.height); + console.log("Format:", results.screenshot.metadata?.contentType, "\n"); + } } else { console.error("Failed:", res.error); } diff --git a/examples/scrape/scrape_pdf.ts b/examples/scrape/scrape_pdf.ts index 3f11bc2..459e344 100644 --- a/examples/scrape/scrape_pdf.ts +++ b/examples/scrape/scrape_pdf.ts @@ -9,8 +9,26 @@ const res = await scrape(apiKey, { }); if (res.status === "success") { - console.log("PDF Content:", res.data?.results.markdown?.data); - console.log("\nPages processed:", res.data?.metadata.ocr?.pagesProcessed); + const md = res.data?.results.markdown; + const ocr = res.data?.metadata.ocr; + + console.log("=== PDF Extraction ===\n"); + console.log("Content Type:", res.data?.metadata.contentType); + console.log("OCR Model:", ocr?.model); + console.log("Pages Processed:", ocr?.pagesProcessed); + + if (ocr?.pages) { + for (const page of ocr.pages) { + console.log(`\nPage ${page.index + 1}:`); + console.log(` Dimensions: ${page.dimensions.width}x${page.dimensions.height} @ ${page.dimensions.dpi}dpi`); + console.log(` Images: ${page.images.length}`); + console.log(` Tables: ${page.tables.length}`); + console.log(` Hyperlinks: ${page.hyperlinks.length}`); + } + } + + console.log("\n=== Extracted Markdown ===\n"); + console.log(md?.data?.join("\n\n")); } else { console.error("Failed:", res.error); } diff --git a/src/scrapegraphai.ts b/src/scrapegraphai.ts index e10089b..6d7cd20 100644 --- a/src/scrapegraphai.ts +++ b/src/scrapegraphai.ts @@ -62,6 +62,12 @@ function mapHttpError(status: number): string { } } +function parseServerTiming(header: string | null): number | null { + if (!header) return null; + const match = header.match(/dur=(\d+(?:\.\d+)?)/); + return match ? Math.round(Number.parseFloat(match[1])) : null; +} + type RequestResult = { data: T; elapsedMs: number }; async function request( @@ -99,7 +105,8 @@ async function request( } const data = (await res.json()) as T; - const elapsedMs = Math.round(performance.now() - start); + const serverTiming = parseServerTiming(res.headers.get("Server-Timing")); + const elapsedMs = serverTiming ?? Math.round(performance.now() - start); debug(`← ${res.status} (${elapsedMs}ms)`, data); return { data, elapsedMs }; } From c9153e47d0eacd3a14e9ef1bfba072e1e238e802 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:47:08 +0200 Subject: [PATCH 11/28] fix: address code smells in examples and README - Remove process.exit() from crawl example, use if/else instead - Fix non-null assertion in crawl example - Fix undefined variable references in README crawl section - Use consistent example.com URLs across all examples Co-Authored-By: Claude Opus 4.5 --- README.md | 10 +++++----- examples/crawl/crawl_basic.ts | 17 ++++++++--------- examples/scrape/scrape_json_extraction.ts | 2 +- examples/scrape/scrape_multi_format.ts | 2 +- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 47774a9..7ab5b10 100644 --- a/README.md +++ b/README.md @@ -144,12 +144,12 @@ const start = await crawl.start("key", { }); // Check status -const status = await crawl.get("key", start.data.id); +const status = await crawl.get("key", start.data?.id!); -// Control -await crawl.stop("key", id); -await crawl.resume("key", id); -await crawl.delete("key", id); +// Control crawl by ID +await crawl.stop("key", start.data?.id!); +await crawl.resume("key", start.data?.id!); +await crawl.delete("key", start.data?.id!); ``` ### monitor diff --git a/examples/crawl/crawl_basic.ts b/examples/crawl/crawl_basic.ts index 69acb80..88f20fc 100644 --- a/examples/crawl/crawl_basic.ts +++ b/examples/crawl/crawl_basic.ts @@ -8,14 +8,13 @@ const startRes = await crawl.start(apiKey, { maxDepth: 2, }); -if (startRes.status !== "success") { +if (startRes.status !== "success" || !startRes.data) { console.error("Failed to start:", startRes.error); - process.exit(1); -} - -console.log("Crawl started:", startRes.data?.id); -console.log("Status:", startRes.data?.status); +} else { + console.log("Crawl started:", startRes.data.id); + console.log("Status:", startRes.data.status); -const getRes = await crawl.get(apiKey, startRes.data!.id); -console.log("\nProgress:", getRes.data?.finished, "/", getRes.data?.total); -console.log("Pages:", getRes.data?.pages.map((p) => p.url)); + const getRes = await crawl.get(apiKey, startRes.data.id); + console.log("\nProgress:", getRes.data?.finished, "/", getRes.data?.total); + console.log("Pages:", getRes.data?.pages.map((p) => p.url)); +} diff --git a/examples/scrape/scrape_json_extraction.ts b/examples/scrape/scrape_json_extraction.ts index 7007aa3..09adb7c 100644 --- a/examples/scrape/scrape_json_extraction.ts +++ b/examples/scrape/scrape_json_extraction.ts @@ -3,7 +3,7 @@ import { scrape } from "scrapegraph-js"; const apiKey = process.env.SGAI_API_KEY!; const res = await scrape(apiKey, { - url: "https://scrapegraphai.com", + url: "https://example.com", formats: [ { type: "json", diff --git a/examples/scrape/scrape_multi_format.ts b/examples/scrape/scrape_multi_format.ts index e359c8f..457e72c 100644 --- a/examples/scrape/scrape_multi_format.ts +++ b/examples/scrape/scrape_multi_format.ts @@ -3,7 +3,7 @@ import { scrape } from "scrapegraph-js"; const apiKey = process.env.SGAI_API_KEY!; const res = await scrape(apiKey, { - url: "https://scrapegraphai.com", + url: "https://example.com", formats: [ { type: "markdown", mode: "reader" }, { type: "html", mode: "prune" }, From 22b936efb1c2083dd668bc321a6d59dca2bf38da Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:49:44 +0200 Subject: [PATCH 12/28] docs: use local banner from media folder Co-Authored-By: Claude Opus 4.5 --- README.md | 4 ++-- media/banner.png | Bin 0 -> 41489 bytes 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 media/banner.png diff --git a/README.md b/README.md index 7ab5b10..41dddd3 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,8 @@ [![npm version](https://badge.fury.io/js/scrapegraph-js.svg)](https://badge.fury.io/js/scrapegraph-js) [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) -

- ScrapeGraph API Banner +

+ ScrapeGraph JS SDK

Official TypeScript SDK for the [ScrapeGraph AI API](https://scrapegraphai.com) v2. diff --git a/media/banner.png b/media/banner.png new file mode 100644 index 0000000000000000000000000000000000000000..8b06be509d1593b7a4714b8304b4cc1e25e7057b GIT binary patch literal 41489 zcmeEuXIoQg)NYhfoIyYZM5#)bD!mgF0qHVGZz8>m)DVgeD$*tNt{}Zh7Xpch)X-b# z0SPsbP!k|P&Su{8{)F@8T$4}x%9Z`>y`R0xz3z3dL_O2jr2UKKFAxYstM&BB-yqPR zFF+uwd)F=l-w03-vVuT2Kw3{8JrBy>ny1dT@^6;iaW;~fK3IU>ZyUY*C;Nu+6Lk+W z)o5Bp;+_pt=j-htrb9tf)#uS%>I(UO*S-|XjzVXXLx;3(sP%XSto8S&9*2SV-)2ZU z%P2ZK4Q8BC2KP@bPnRN$F9BBsKFJ@P{`ctmrThQ)yBq%Ji% zEOX2{K*Fn}#3(}wDL9m%Y_jL3w zZnf>G2R@WZ3_KxmvEUm%P1S24+NbJi>{P{Gc;%W}bE=+InMx0tHN<2LhY0<}k3L^S zAJvHAve1bg23aB-T$&|a#2p1of7On(xN`A}VV-N(E`2_y&2DrYzL{kqwRqiYIoe6G z${ik%*!<@Ovi&^50IU(ZDfMc$XH2$nyOEnE;+VcM^OzL9o$_zhCuJjY8B{_qGHbFO z_`Azyd4Jf*Z)3DDhFMJi93FDK=Z~A*wwP-1$2EG+HoBulkG;`V)|zoFOc{q@2c0Iv zl%ey|?XqdatgtIT%i)gk=5*>Fe9!e%?wIZgwa(t-Fn3zf2P%6SAe zJ%{{j>{N|Czkarvq2Wz`(~(lQJqV6jX?*iI%NHVdvDh<0{hcdsUgG2fa8t70A6uX& z8^zwpGa{UTfR5y=Cr+0b77#Jvl!646;JNQUhJo&@vf+D8+l^j=G#BT8HoVwBLt3!i z*c-K`@s0baG%+N;1?SbglG+)=ta<4GhbGjI=_?+N8^Qf?CdJfVHb$uq?{H~S|Fi$Q zKvkfcSnpyjeO}3`KtF9lj?~RlIU^w{XP!;-x7B^NYPu2Rc>;&{%O^P;hvS4H}>`T3jjwcd&U%LTk4Rl3i%HEt}5h3&?aSGBmIQd|7F2CCKQT>31JIj)v_{+O1HL&|j<#=%725_ay$@V~tP!_o z3v5KpzAX-gpZo5w4)E)yKg&9VC(N|iY3awgup~pK>2F@zh{2FVlr&^%o8|Wk zKiBLXPM&3bp^zSgZ(bXeU&{vGBV0zvAc%Ytf4137ym@iOl8oC0pl1RFgLA|FQ;nfj z#3}m(Yrhe7A(M)aAKY-0l}(s1a+z^?kzSU`e&#@?CMnU989eO7Vn`&KQA*ldAnG+th4{g>=`^FbTf@?&%nF8*e* z$K0A5+6KDKjM#baqT~fwTS5qyn`~7%_SuuF*pu$!8lN8tk~^r&0k6@R#VcC)&y9Q; zYIGW~tqXxVPNyC^T24yOww6s78AEel`3}Fw&5#zlpND|YcN3~xHp^Vz73pT|MR1@U}E2&WL_xO_j#97Mbs1JZsf2t}DO;a57u! zw;XA+TM9AlS(fz+q&~&Qu9Lz6(?O4BKMfQRujs)(fVDS{pMJ_a#quLhSHucNGG#nx z0+HnTO!RbX%a$z|Z#G#`)?HFLuW8B`T>{Br_4>Z#ZcDb1dfm+#eyWn!xhm~gsknNix4goSa<;WB%C? z55|4&(0lstT|V5YQC<|n}9ZK2x@^DX`{|1?u9 z*_$eafG|<=e!<@UKr68hX$v?`;b&l{75(qFRKD7H@!Jw}V#1(9%>bAk959=z*1bM~ z^e`itq1HAozzl9*{Epm9fr8eLm1V%`F(MX*Aj99{0V>sA=AU-X2zLp-)g*&hPG7p99YX#HBI? zbOKInB6#i6BGYy-N|0Yo&sBzMT&mw_EHFn+-RBif5120?eP34* zRyc^hT{-&fn|X}OWNX*RT3N}=O~uA;qGwH;grIC!S#u4x*-VMM?N3$qWc9U6Blbal zceC-4mQ4nvACA3+9+(F&CWuol@-mfCv?%+5-)3$1QjzKSBp_7~wE#D%c3CK!fk3Je zC!2>r^hA8sOyE$9q+^%yFjMbDz-`YrO_3J$E{0OpmW>|AU9yPXc|ur|jL=YyVjyvA zy3T1}2FN3Ll!3y=*RV6<0&SVz8apn{4f;uT8>*gHHIRX=Zyk&6L)p zC)P{9C3&9mepaS?^L@?HMmp)KhYbVhN=ir}Tey=$jgPU?(SAv|qfb9SmvcGuuYxNc zWtTx$?%(_4jcKwmSS4`Y9}R@Z7uA7>_<3m0r~BL*v2rWPMkgc!<%G$k!fdyI^GqPK zg-k$Fa-n$n-ygYc!qGrf@F{&{7sKf}npnbhMl`c-S<$~%_Q6?F*wPUzzU z@V&m-Crdnub`ttIR#yrAG-$Dy3{sa4sYpE%#9Osiawpi#qGmqdpHY9W15T?awybY3 zG-`J8SPANGu}1ss8pEJA^2l+d)YgxkO~^^}%xzL>Y&?ZM_|I#XTtWbNvlG2NTh>ZVxBDydenh;12yKX%WZf-sw%+EEixI;mRA~;--pjw-(oU?pNhc~)>kh;Aa%YA zZy0}8WTHkw&F8%O`6nFHx{?Yw_Vlfc1|b4*fhs(-Yf3DospP2_aa%6sG6|Va_5~~)uFjcVqttT?e8v!9@DF+{bkHeQSi=M0V#ghv=QlEFx9TZ{k!PMkO=^L*K|jOtK6(t^*)N<2 zu!`K~XovS*H11c=R#BGMjD7EJH`;Bz=0jX-o5WPHDAjU(wI-ov6*)& zMSXmSSdzLdA4-5T+9vj}~MZ+|Wx~}Fw$Kpf0V@4W0HE4#vT~`vwv1lNNjRZB| zZt{md_`rS>)F4Gn?twR27pM_HplFSMZ^telK2v{3dFxw801|=;C8^6Kk+YBSR8?4x;iSpn+_Am zFtV&^rO<4EFD6u()w~4IDqKwdXfcA@)L1dr$fTS!btm+1DPCpG&XK%_GPF{D?BuQ} z+kVQaCG8^#h452n+F_~*p_p^B=9gsY2C*j4@TY$Rp7iq@_`1&ZuUbg~$m4@C1I?=7 zM%MGmC`{!_pL6W7_IT%rnwZyozP0CM$o@MN2^V07-5k;6oIhJV+Fj;0ReXNKq`*Hi zeJ|G~Sb=2FU37`1V!?hn-QdZ7XdtI|#mwQyWfhzi?F+&OcAF_gpIXsX&@;AwLrUsj zKbJZp31O0JZ@%fF^w-#TV)j-m-J8QO@dfUj%e>&zZ|!U9m2Lh%17}kLW3sny-e`22 zD6>Uoo@6$All(?++(h`sLT`FQO}lNhMcr- z(6fYefs)&m1qSn(UqK-GTQ@HM5OPb$dF3&^3ndfh&zuG?EzRbb&vtJ3C`ZmEEh03Yd*%j(P9J+seVSEEW@rV!` zJvj1?HkW1o!&TJ?iLH<6J6(T2sKiMK(1itpwyJQ8CF_juWmd6(?Ozm8)g^XS@Jyj= zk+iIh-7sd`5b;ef3c{Io!(z7i#Kz)$F%Yk~RGLO%@fQy~w8y{+U&H&)Fcwwduc${x zCwZ*?vhI@=%BkaQHyS<6w;eQrSAo!%5FHDD&G^jNWByJS`qznLjgwtPHbXh{S9B6@ z3Tq7Ww0_&?-qfd;;|zxq7025D9`&xZ3tp7&%HdES{$gs;eI3M@`H1-<;MZutu#2B(HX>0!7scU)}X_$3raA!BlIqfWT zU^T5E9)Q9aWH`mt8=;~xZyk5Z?pq|)LMr7XMFjV<*SBV?6Tk}|R?P`1mi6%*9R)z3 zaD3snXjIlGMlTkCus`aT`DP%_g&rMxDl{zF;-V@MVMBeb<-9uGBQ1HMYLTQE2-`>w2&V^x}(G}KH zAuAQP%bgmml&d@AD-HYTlw`qblD$qL#Ks`}WRKTK(gyF{Ul~CG*X5;#xHZXn4Er47 z4{68qYUPMyu6n+Ycq10eS36Ndzed;Wdk>ijOx^JJI~b0!XmF_;n&8N#L@L^xJi-pepa{s5 zU3#SFD2dd(+RF{zM`;emw3q>?9xtotxBd-1@qv~#b*=$gxn$$N4p(lzuHbZrT2-R*>E4sLGS%!ZF&B0Kw`RG)2h z+wRTy#b4GUW(%!#RghmU6v%HUmA~xVI@s&C9i{p;?7Bf$7dmvBb_5`5C*>O`=c~_Z zuj)mut{Cce#9vT1c0dRg5NYf3-d1+iN&7Opc~DXi5PZ1$^F0kvxb9kdxA~tOY-q7f z0ffT5=|W?r7g)*X1yYvao1^ZY*L=P)^aP=NxMxeUe06l`Y_JT{aMogg8wpq+rS-qV zGRJ){q~V`e*q+2lj^OpBNKO@}{`3;K!;5_MyFP3!#J{f7AXzN9eqvRccRaKn^GoSE z8S%e)g)XfL^GO$YIw^D_tWx{v>9=ukOR%-fXAvG{gPFMUyfynMYtRf!f6otb-f=sJ8qafBw^ zpu2#^vwgWH48nH2beiqDP~_{k#p-_hs;Xg1XL1YE5L)}ZpdGDkFn(m{$B}-%=h^ZE zk=zvXmb!Z`{K~U`6b?~q=U<6kIky9C06zFFqZ7c>x;%v4amALh(o^~~?{|9)>L`4V z>;nw-lxjXGHXK^EO*Ri^yOu9sJ}Y%gFsif z?)~xk0jr!RDw)1&8~P$wB6_B@tZeq#Tv4Svsd@073O@<=Q1q_?Pt>3hbsIFV+;gr) za5F!j3sX=geL>Iu3arR`g+DRb-shx}y|LY*8Do+NFZ&L~jOne8rTY5}(eFvh%-bVf z79>E?MfR9GLC>&;6U3l{&F^buh@x-4j7_T~$B{W+Z6E3~p>jgtR4sKKQRrZsGTy~< zF0VLqX*!CZAAmrAJyE@;IyMfF0uiECvRmKvI_a4Qe_!l~-Wd0c(A(X(K^C2mS)CBprLN%od#Ccc9!W`brE;j7;0=B0F4+!IlI z2ND8e92lqBlSy=Yx!iYzVt%PVPxtjWF&2f{9XOrv&AY2wde`@FJh@|n#(sZBx6YQ7 z#|FIj^oSd8_*{C#v#2?1m(|GE!S{V~hp7F) zCv8Ey_;uL}z!`fI8*^8dTJ`32MF~L#bwq`EvWA~w_~EuYb9W_0O#k$8vSV&llYeCW z^kMY$rQ~>F-xuIe`xA`6j{R9}{ap>Z%!b&D_(r}A>QMxetDbi8+gXjAni@Sc8`O0m zLvk(gf+kGYLt~Jp<|oDB|nAl zJGv4-dF+^M7eHc4%A&U|?K`3mZs|4p?jWSy6A|=DvUZRxmuZDYZJ>xe5VVJ_G+t!)i)nlY(g@U%$5N7Ji_)y@*8f_B#0MSJ-V zCfH5)Cg*Fo8$IIz#%DiMz;OUI27LELUu6Vn!mM;n!9q&VMW)(%wZ7#ui8cXJGa0pH zLLM}RjAgRjn-fCk!KLYfpjfA0H2@wKxOTDKPxeiWI}E1A11Snk!xl|m^FV<>+3BDM z15jVU$ z`J>$PtEq~hg~GQj6=_-l9(^u z9(d`hy$o=pHC(`V zDiOou?_%R?zfmToF%Qsy!aC4v{Ex>?z@}coG0n%48syE_t=tTr&LtDcKi56 z{{#pu9i3g1{%A)rCZa1bCmm0{_j1G6$97?Gv)8SCdBolCx{{p4raTu*e(n04m|)9R z|7L*G=75dtOyZV;O$-0)u_B$8!xs9O_+I4b{j()<2oB%(&Su)DG9krxEbYZgwu@U| z{?OFAxP$^x%V0$dJT9FeX#)`pw^%$brXd9H|h}tBso!jP7 zoIEf8{AI}(0=LY1)PzhfO-;E_+X69Oj|r96jUr=EC3=oy>+GY>3Wi`_ zHSd_**pLlL(GR*hng^R9HX+l4UzTpW4yRndOuTawU<)hOH@|<|VD*ussPH zgT6&xRyXO-xoV&0qP}&Et&Jj_6#O+d@@1pLE{8vZAR7JQ!t_xicFb!WG`y6PGjP-#523I+@`qb|01@hndw><}*<7JD6sFwK9+AtJ(L9^XhnctmAqw*;o15%cUe& zb;H|QY|>jj>k0z2SxH;XGKy7L&iQ%;w+Hg7v2uKIyfEuf^Lc10KmD}7n@&qR^efo& z$Z%uUAKlJ%{Em;X5Zi7B%Vaw_@0o@N6ny$+8-_^+Cc41?Y} zo1-V0dD1`rLJ`|Wdn9GGZnbuk-Zj&Ae=1GP+I9Zb2EGQmhXi<9DX&UJlQ^YNHME_- zK*9{b`BXM71aCK>5l6pc66veyuR#yi4bg929ejm7+_@x_x zvA*eQ?6Eg0$z)!6#WLFDfY+{R7y%8 zHiHZoV)@KZxkKG1B%%GXj_01P3OYxkpCy}Z{58$BBzpZ?d#%4S*kFTB4K4!12+dff z`<2n|h94zk`bII}=7X$k1T=|$8d;yUtE0fwp20$?J0W z+IXmk3x|qlzrICJ@YeXiO#L}iBXcZY-_!rOjFmW#a}`>|GPFDlUwiB*tJ3i&gVyCX zO=-Nx%-fEt)+&Nl$s z%QbfAo?;=gn^pPw`A`zt;StSk>~{7J&>E%?^qsSPy40Di$D4iEW|Yd*^0p(x9JVFH z-Rh2Zvx8ZsK_U{TzeQMnB;``fCSBWLZ^_xbK8x!NV-`u-Ya^v?^e2OsUEHVwDqm~Cx_9{abA)f%?aK1n`T*xX=xARSGa+`(YslhRdWh)W zXTeH#k96;!#|cH80Q;Y)-ehMeHWLs-HK?cfnE8o$tjYsDF$oj1$qifGcz4YZ0g*>x z8@WN}@gEjs95WXn@`V9!VN->mNn;WP$>Yb20^{^7MpztZO<%rs|)L% zY#Ra}1sb|rHhCWp*fV=fBUCm;qn?t?U}=B-R6ac(3P)@=&i>!pt0|TKZs=;4jCIpc zYXKL_7xuedv}0F8bE>B;x&N077`tB_oE#hGbp6_xU{i~Z1}lH7_*D)wdvf(-0Ubp< zzK)N*Jp^!gMx&aCK;KJBQ;~7RmFTOA#JC4*OwufK*086vnJ^8b-mBq@eOE2q9v*zF zfA!79zfJ3o{NX=2;!6!zs`)C`8Iv%zzN*dZ#;6s~m;DdKH{Ycns#^S$WT~6(QLcZ+ z4Nt>+KIJ?{Lr#j)L-ZX+@{QveqE;&WyXB7Nuf$yC-rmh>^|u^1lHB8g=`OSt7{pG~ z7v+t8C=sR$(X?1Hm9mcfwVP*P|1&Z?YtUg%^*T7%S>5_A$s4ZFecQR%y3QdXb3aM% z=e)lTsa9#4!gk)?qxbq1Gf?7(N&gnL1SQuFsx?liuv^tS^g65TX8;}l&#D_Qwz3Cb zk+K;0^<%SzF>-5}{9qgMp{>RF3z4z*Uzr0joB*>?(m;X!=GK{9n|kj*USbHx%+)%g zII;w6WxVG8ZH&cvhb*^|7A#Sb*A+rgKU@!$xo!0&G}z&M_%h8D+}M|V_Sx4+>_hDo zCEOSymrtWppE@up_LO6HN$%iOL0`=DPR>bZsjY!?gwPWOvjrJCxIp~b^R~hWieajW zT+WHD>wWu((N@P3r}UrzWvsKS?$KMR8qqoOtF2O>(X_zEvkGKxS8tt;%3*JoQq!%b zGdF4G9Tgt(rqmaYA23EYO?Hbqg7pkblNl`ZNXu66xkCDrpISHkhvAzd>>^T)rcRIjl8wP}t8Ncce>`ksK{kL(q(Y6lBNqun}E{21!OOqSn@hASX z=zJHLr}Q=ztxI|XX-e)aYj>Rjgr9swtURv<;>)v$tOEU~DBI1@M$|m7`AGX(q3@#d zD@TsspSEB(qwK(DJ=Lo*ZfO&{PwJy~sY6K@l|Fcy*2 z#JjB1yqx>7p87E_4*${B%VKEL=>Dg>U>IhwJ5KYL4A{l|Byme#kR=)G>o?~-I<4k> z%FFW4S$w=tF&c*e5C!5O=LYFCu&rTR@BEPGaaCGfNN|D<)fO_moa3+GWZjRc&4XcW zZ4#@>R@-f2^I0-91A2RU_U8ov9E|~FEi8EpNrILQ7}C`2{t!4EP|=+4M1qC=07cF3 zD1M@ugzMjUf1xJiQJW_7sVkr^QKHc6$hI>2*L-=sk|P=33UGP0LH2u!PpSZ{;k)>j z8jrvxYDV9Tnu8MBoby77oafKV1$!?UvD)6W8|1elz+%&jEAzEG){~ZhiOGEDigglM z{t?(UlJ^wV6YV|oUVQKSW7<9avmb+hC45MxLzH#*$ZfM;+0r%BNm_tum<+J~@c0ZA z)`pyMKF&`NY{{+V;K?_m?WTt;NvXF2Dz+=&8>yuw6WNW@=H%DnFYC5$E8V>7fA`M& z)K)J3u+nE6FCeq^iw$22BdP^r61$6Qo^_RSNotdrIA@!yT|i_aUE7#o7f*>00>}z$ zS1P(WK-(?FLv2KOZOGlEFRy&@YSQ!M)9q%1rs*opFMMzQcr(faFg3dy(qWu$F#re$ zWHAK9*|vB1(tGB7f!m^1-!|_lF)7z2Ot-iG#SMQqe0_~j0R+O)@5Li}A!R)ioj>8}FcRTghvfY2c%t}pI$6*&AlrAGbM^_w$e4%AV;#xgQKy=NqPzhk8mp z(E=rYn7!u5SB7#<3O8P;7rXSp7_mpF&9!&1qd*1AcXz%MVeI<1#%WcgqFs+hHj>*8 ztJKTuLj%R(?u{F;54G}T+WagP8|UnVPs7U>oJu^7&YTrcfxdLBqy@JF6 zjx%+8XPQZY{0nmv@rIxb!uFC_CS4~bES^gpalEQ-2!l8zJLGHHW=7u1xGMA~73|W= zW9b4^p2_y|JF(l|9>H$y!Rv*Nw6?BnYkU0O=LIU6rCQ#DFt(}%95bm5^R*sgKM0fE z>9vicqpi><9m3Mihe>F+*X>(I6`odi-EXTGJZlyQiq6|AiDbufdqX*R!0Huk>ZHJP z@jRCsE3M?`3!(#yG2sZEy;6C+WT#4EVJaW8A;?!SkezMW;G&fA^qui_VC+7+ZoDew z)IAwGz({%ip@`I`A^TA)=hA1_SI@3|-gTWXoq4n4n@64lU_Hy>(gKvAjQ_rD(>4w8 z&D`;Fzf&!}qOMnk{ZA?h&MfbRS~vHIN{QJl2>qIhBdQ$c zaPV%Z8(baTIaUy*RhwrI&LD@#t54Q)v7eulS87DlnzeB+us-hv3GNFLA}%hB5nJ%D zD_(i=mWX5xZgt6}I9HaD7l6M`nkCh>RU`zhHg@conFZOmB;@FVH9JJa!`m9Im)8@zbD9rgd#r1{y?fSf>B$KxgOqI- zz0{o6~=&9oB}04@id7g6=6Y z0dYE*j7ryph9)9?qa7wJvkuj;74qXE9a3C&zAhA8q zDa4MH#VTk@u{&|DEslsQl7_B`o7r)s2uPKAT)*2ntS<*IZ_P2zs<8-=NGL>qPO4%UF0nD=cJL&sf z9UFP0;NBKyPK#H14s}8Eb@W3>FZ5wI^TJUMGy&?+&dJ5OaMUG>5A$8=O99&F;p0gL zp_@kRiUELPz|)dt_Kw|nK!2gkiGJ)DA~Bvy=56{bQ%AD`n(~)x-eJEYX^6LyJ8yCd zFFt&Uigt;G2}5S6^RHz#>DnJ@e8GHG(5+2r#dn8rk!qGt8YuD;jnHjrJ_(DRC!efm zt6b+jm!w9Uy!Ot8^=$`uGTB3(O8rIM+V+OQjw8gT-m`>1-Plhs=!KF}N7nXxMR)jW zYotBY5nI%%_a(TPO0^W|gbW{5WLkSpKPD5m!KCIPb>-&c)hv?PUVLCH_nuwHWD12m z^&WiEot0}FfDsw8A)ktIs1P!XRT}MZWj&Gi)jq_s9A+ZXloNEYJ4gy( zpjo2jg#MMJA6@#>v|*MhpI4b5D(qq#TV~U9!LH{^}deeGAQrt~J8+tQSHfw&>bE?;0O5 zbx#@5ARJ-zN8|a~;6&z){@S$LZdp>;$>EwhrBGx+fogEPEfO;*t0h6Z%W>c4gdy}wx2dsh z$`ZEi!xdOd$%*G2oY5u9koNw!|v_@>djp?sqb*n*-_rs1^-5McWQWS)4fMYNS`eQvy0KV2PivBmhzm zbwKap{CK7jSMS|*7o+dolNF+Wkn>4K$0HpOhlX}(GJG0jOh?ZJ&TYhf=c&I2d)7?e z>8a+qId?X?4s^kKy(?Yce~DC2do=P#{YGutQ-qQwYEC*PVv7joFKOT5-Vc=%g$~%1 z`v7s`fGx$OHRW^d>J`_ntS4SMC+dT_C%!o*34L)j5*?*X%LYI|;O@8zlF}BN8!g)Z zBt0?kW}?QvJ0bjZ+4krsGgsYYr6oO^+&6jx~~v;Zt%V{3v;@PIl~8*o2(Q^I;#E3Co*i|^I z5^pZzfgqZ&QiWZ)6Yj+L#Mr}v9hX|f-Dx>lOnCcbZ}g(mF;i3>mNj^cS&mbA-~>ME zL#DW3xN**}?3VQBU0Prj?j&|oIL8RWFohmJmCA89iL|}yU*l@rvOI&34jtp;OvTR6 z0;H;`RhYr}Q;9Fw64DOkDqJUEHN3QGmyi0HNOh{9izIYw+Rbh5UFSXR_KB38+8I0h#Xa7>Z<}*{vw+SM66YM;eH% zY3YZW91!eRt?G<#yuHU9Av^8XXnLicAAsbCm%^?Rf{|A2mC=Qv0Tt_J7pu1}S4T+! zkIyO4O|tCx%l)=g9_a;l1^&3nLGCqPZKLE_VuK5N4!JnZPV=j5lUHnm4nWk_1Zh;H zTZ0QEBN5ei)L&}_09r07_l0jXdwsu464VQlJZ5=3a(#R&$~vi)|GY*vh+ooN@jUs# z6R@c)dK-XP52oc516GTAc_E>ejrwEB^+6?{*JP4AlLNckz;h_&$lScO1l3z=gAf6c zs46|@L99AKR$kfsi+^ql)mPuj9L_+S-M#@lSzW{ToN3&+oEZ1IJB9828kl3~lvk#D zp48W?7|=6*M@ib-d;SvjuL6qcr!E-eSqi>kbgo`v-bdOy(S(-|j>m#5hS^PPJ(Ilo z#O4_a`KI|3l%;b|jIes<2U&OIvwVE9_m26R9j)Vy`1mbf9NpagJr5LYfEZBKzTqfp z&xSz|e!Vgv7Au)|+@G~@0BD3aA12NKV%#BR(i~5B*_oPx|E6ERe(z!U8O|1b+-q>b z2VT?=+q0B4vH7||1ss4BA%KFdVB-X+_*%PRwr88ByJ8ZU=iy=%ZDvuFw+eP50gkIpL0vYc)d(v-jsfY}xZywo}B{@-}$4-yN^!_)}%@ zQjur{@98!kmqBWFqNKu}KK|II4XZ>jjIOry7K0t}4PIr<0+IXP(^X(%8ajwGxL6JqB_$M3j z?w&1Z?J7cF8tGeqvPS1H z{XjVZ^5s#64`s83O%)#>QEOtfMw)FOv1H&gKUE*C3G%wGe^}X&7u|q+t=*J7o*Az7 zpDC&HCrce@+mMLLd$yM-WnUtC41`E%cS%I96H^*0g@Q+S;e!g(hNSFuiJ7XtLO zx_#$6k0y>lQYMuO&)GwK8san zWVY)JCY#d=wfb3h&XHMtju*=Tfy^1^Htlm`g})nvAFuDs4`{4HdAoDCF>cOs1rjbl#MQ&nxAUGf3!tSp(OJXj6nd-q$8>kR*au|KZ0 zEYa+(aoV5}Cww`0Wt7v}3QcPJt&ACxGM^FcNC1cyN6!rS)_Y3wG2ij>Qc z$1e8JU(N+TTH~!7Vx%%8Keh56*?quz?(5qJ-Sg$>wc+JFHYatrYK|sZyK(o@08M0s znI~LhK^ZRjb}%LhnF=k%?Kc&(m-!}zaKVmfuXb6(BD7iDl8h@2D9Z^ zWxcErArN4ULLp%5Xw$lP;I@c`(!sFW2sZYvEoJLl=ic>9+(xJ_(QgO483Qs!DZ2l9^XF9~{Ws@*~w`+Xdw%D*qD zW1khEJX*7y=>74_*r;}n1E?l-*`+~zXVI@h#5TYq?rwNgxK#$-Lu1eC4k{sZ@%s;A za~T1Kxbdc3cLm?ncMb+t%#;>EcAV}WjKwK!j#8|5|xhutWp#77Y zq_LBfcZSAKSNfqGs^o(Bgx{72&*%0d{3;;0E(Ng6@c^?yLIw$6;kKRIl>W`1d_aQu z4}(R_l;mo4Y^up&=_XSSO~8-_g|t4-k4yINeHOHLQ@`;vOw+mV(_~xrpo`f*GyVqU z)0>?qg}Xlt##WRrX*||C@t1q;;q-cWItU(k`x@bBXaE>;;Dt1KX({c^ z25n{5J3e--44mmjC_pxnLKk7qqop}Q3hErZ(5-qla2-$W{Dhhcv#}2)L9$=pnkna` zxb5!iu`kXGZf=rc3{M+>_%;F9Ow=Ow^6+<&u+~=eQ6@`V*bI4vI)^Zh5<;-AXf?U^WC4l;l6`3NjFIA1qtU1Bt&2bt?ZWn`-{k6iwu)KeRmfv zd$O7j*Xb%&IMk_Yx>{vS_RAJ+=AlyU?U~6AFQkgOJ8KswFTdw_Ua&jtEpeuz6~Bi) zbj36;gVXW^%N^dhCKxt42sGc=u!qH}_@dl3p1TkGKb_=rJr-<-7}u_NlY@blwJy?3 z;_tlHfEwRA^utP1377Pm^|eD-xM)uVzivQ&%nJW3xhg-<99qU7GuRls**#VHmnJ!RgIx4 zBt<@nnR*2;N^me2pw7wVq(2od3cW{)8_PF7fndIIwITd{Yi$2m&WNw|c&>NK3(D2A>@Qf_BYI4P3ucdpmd3F5$MzcTZ)?l5`6H`;HKA5VM zho`MgKVLf-_>@(@6ripG{t9~bH!!UFdBlCLM3E z(Fbr@-jh%gpva4$S262gi%hc)n9gv{Z6?D2>3L0anzP#$tvZflJ=mFA_=8ir07n@;SMqT1ud(JjiTuV^e>JL=7B!v) z1Ye^jFL0?EbiuJ7J4EAKflNBnW#2H8@1J?IanM4XHOr*a7I~D(%arr`YxR>bPxG`Q z_Y|BvJd7CN85ky%Q}TcSySdZ>z9SE9=ye!qp^f8KVT06%nJeg8HQ(P8fKYP;nuoa= z!FCRZI1Q@(IPPeM3rlFhS}9u2{34&IFgK|-ANuUzo_EsbY1^*~Z=5HgiyIzo*jNkm6@8)p%WwIM08*C-=I`LM$yXC`_KAcDh*Ez&ExA+7% z2aZ2=!5d+=y*jtPli}4?L%=Wu_9JAC$#o?d!Si;~Fm2UFvMCa4&Of9a<3Js>8NR~7 z&wu}9rZdrSk5+K)oA2b=?omTx4lrVx{5d3#ng*oaIJrk zFH~VXE2(sRUlt=(4!#t`D{P`{&-Z|-MRPTU^dkc0oS^);VlYQBoB+kR&o)Ml*R*YW zde3sR>stAAGD&}psHt1s1Cq4iUOtcfZ-6Ilvs25V?x9yc7uC`v%($4+2Q%U zLZwzVKsMd_qw-afpWv~vdmbHMaD1%rq_E&q;3rfO!iSdc*B><*^ooVxO!u?tx{#Q+ zkWwdi8;;Nb4jF#L1BnxK^rQ(4})f5y=M|17_#<8WK%*jNw4Nd+b7xUa%&Q? zi1$ya?VU@k)K)MmhlAs%T$;U;Pvf%^36lLuY8{@GL+$dl*cQ4z3))uGEOggE9_r#2 zv!ECvWiP;tE$Eqp6uy3LtR3raK=gZ9Q=SQyoo)jDZA|z=h}C|f2Og|&4I;$Yh7@wF zu@i#2fsx`oHOr9;PHrvRcMzbnP8K5?a$H=1;az}4T_c*w1??;nLijtHOKH0R01ZAF zNx;|a#4F;zA5PBbBgBE^NC+dri?d(%T_|gmVdJ50fZ_o*!sqIqCTb1KVz%}(960=_ zSJAwVhlv*y2n(b*f^rU_R6T=PV5>CCb!cpa@XfnScjOP4oPv8?z|MxTG=+f2}QBh%D z!n#(^uVua0Y$u3u28djY76%wG4x0%X3MPGOy>UU43^1LH))<7H@G$?R2`sXEmE$0s zt3zBfz%X8co$i>EGzV+b%Fq*|Yh%yqRAqz>B!4PBNpKWD?dhq;rf8F<-n+K?lLB!M z6YXdLPmitD<;$!#C#@>)1==1_LBNQ(yNeW{n4s{K>qva{ zS>9A??-Xg8+e^wjb=duFr5_)9Frpd08)Mr_Y9q{tE@69@c}^`Q&S!@gDeKelXt}`BmTB{%0KHHw zpa*yiaHiWH=IEM11Or!0gV>tyW;@za8q99o-~8&U-rDb!B|iIsK&#x+*CjG|vSI7e zm^tZc&I#UQK4U4;!<-p_Cebg(%IaByez9`NY6+U8dRneHeneg!6YW<6J(D$d-E0oF zYd!VoKi9O#wm?n_YZnWYm*YFQV|acDJv&|$6xAC?-@JrZ%E#{Payj+KHqx~ zw)x>sxQ4?x)Jp09wD*=#QE%b@=qL&*0-{KVQliAr9V&H@7U`01kj?>>ZfT@Ly1PNT zq?;k6yJ29c`R_S*{qD=V)?Mp&U);6MygX~o$l2fh-OuxUYCCJL$|}3vzSEJbn=V*Y z`?66pHkm*&q+|wn+-T;XBH$>>7(AnhA@_(n_=IFXPbX$)w&OUy@VEsAKz=lYi{C&R z8pfYFt5`oKFCw(P2~fL+Yi^i#Z9zMh6{mYfJo&tP$%F4=UJSPMH!)Pnj z&8B3LW=nPoW^Pv1|6l>8W7)E3|F^|{okj&3=|hhg?h7X=w66q1HJrP<@XhZtR)`|fUWbIRR#ZB@h{#U-fp3ug^?#T;!;C@KJ%Yaic; zk-}=BQVYWmFP)fvl1u57@Ya4is#Ydbuu=s?VpHTMT%pzTK}1i4ZMFGvbxeB&4t)g- zS`4UzX}=7(ra8sEcc3X89OLOc%(UAjNNDn@9DeRI5QI{7SLbiAiu^zyca+mTO0*of zW1uZuKehhiq2l(qI-{{r2$=~I$p`apge4M~%g~s1DL{LbnD0*>WL6~-w z70acYbZU5_Fa=dMzZgD&xGxqQAEY+ zPbajjUcwU;#BS@#UK_g|)y$XDDXN1&uRo|rt~uGcJB!F2G{3|d@v`GjH6jGU(ZNZm zbpQ8Yvys!yxYQMiWUIV>hm=rxb<$__0+m0{vmQDh3As6j;qMk+aa0{Kedh|!*!kNd zuAUuK#xI2^P7LPiH?}(!S+n@ber3#j^7E;X#zlqZ(=k+1wm!+wr+=iE>>oXdy7Ab@ zYRz}yFX3}r>jJ~}6rOm84^P+4eHO?2U$eW(!S;SVbe*?X%^WeJ3()k6Az=aCtQn-2wm+Rb&IRSm`R% zh8r*FlEy3S}Jo z*_)8h)SXG4JfrYD{F)^w+YaUyKge*C{{xS4gCMWg<7%>UmMfE6o0?~Zz0y&ej|X)& zrjA$@k`y~xc3Qmul^*!e-bO`oliq0V;}ANZ^?%O^Ra>1i^<2#ic{-g8@JKdmgVX{W*p8;j;Gd?uMtk4EE8CyvsJasO znEPfYzjm$PtWz!BM1au;@NBCkVh#K5Eb=B|{n_#9oir`7Jf4GbeI8S^beYAWM*|Ad zQb9i|f-*uXY}9Kws-YRQD!u8+mO7R?WYQ_2)Y5*SSfE;T3J{9az*?9oLfCowhk3-u z=;s_uvQzP4B!*;q&h%Wnl2=d&XgV4h*4+>C>gCJo{O1N8fZG5lc$1!I;2a$P+*)D> zeuM1}2xMCTa73@cv zK2S$F;Fbmen`nzcCM?`kEb?7}fyi?&q=)6al}>+%yxu^Y4BJQTB3(kSdQQ?XAQ+UL z_Iv#7s*2@9eMO~0cWlBRn9GKToOq3k+jy|@NsuaDeS?ZsFk6)&#z>hB`4{Vm{JuyZ z_x+ZPhyA1V6}-=V#CL+O^KtseGBNzrBM9ItWFQ-;^_HA88V#i@#S68=Evf)vBgM0dq`4C3#(~ zz)(m9Zq5D^K4MNfJH*FU#L3VizA~JKr?+6yL<<6`%K46VbaYp{Ec5Jjr8K4Q9BLTZ z3;_hBlSntoP*KqZcrk4dtLWM`n`gtmH5p{b$Lab#!z-5x+x?b??$!!bj9JQSQ zUt>bnp!HY)^(x|KR4s-Aj(Fl^JN8N3_W_ILk%BQsDe_S-)F#hs01(F3Ij=JNA%uY3 zt?Aez=VZq){%fhZBonhf*IQAg)%Doj^yUT67&3#CdUC9=3SuX*@!;5^Or7WrnjfYz zKqY6te48qk-sIq{5I$O`y|ePzyAH4Oj!plU4rssV)Z$v5bM4Sb|B%?!%ZLi{!8Fn@ zpWWzHtRo=@b@0r$ku>YeBqJsstHys`m*uA9Pv1#nLzfgzE4UV-1657tvAX_J@*uN# zwQeJ&VKXC`8@TN{fo?6*xoJmRYK{xExM)g$P~ae>>jkoE@oM25fO-)Fi99G!L-=5B zil~cQ0Q73As5pTXo$L73H3hk-nLpyhQH_ey9dH8#i?+yRy!c1!Zl2zS1 zrO}0u6RJ?kD{(^LntYwO2m$Ke><aPFr-Y2Qf8LG~4AKjXMckyGnPtCv{Q1 z6DR*7C2m_58sR}4p+?@L_J+}b^E|>f>8{2sTeOCA&0ADiqYj!T2c?@cHmyZ7Uup_7 zZc8eEvhMj5pGap0H(G0CC=ZD+#C@KXDHo8G^-$|&`wB2pxxbC~NVgix%4J@=lcoqV zs)nPhwoAjy3V=`d2z?f^jkRinaoKAvMpeRq!y+g5)gnl80nsA1h#8ZqQn?*D0jJg3 zm|RXegD=p9WF057MaIjJ(Eau?_3Y8W8Bz%ym)~`4sItNp7y#;e^Ji14iJ6<_ZZfBG z9qtK|6oEQZ{?z(XQ$6JFS(E0mme4()tZe%h20kM*suo{)iKW7l@Zz3cYrx))*o!$( zB*m8ID67+XbSVlA7SPDOWk>d9DEJMM3da2T0D>{1&x6kx;u46*>e4NQ_$?z?3ZMZ@J8=Hpvr zIVfTHCr3J_z=#wtzT_LDxG-3Tb0iUNFfMGH18O-*Es+`>HepXwu_PzRZ|hcvAdHtg67r1QVSUj z7MqDVbp}1By7-L+bQ8}w-vC;}1mKn~Ku@Mp^FDk;(>-jyeswGMov{UBR@{cQ`L4J}tHV^qqdLfPfHQQ{nj7+nt1ko`PO2(t(anuQFaG=C8NX_sB z-G#1O-iw?u=k4*W+|020lR*J8X{}4Zbnc>)Q-H23UsJE+HJR$*ZmDC`2w-U|Sepc$ zcNlpD&cYWAbb7HhtHQsV>rg&Vrgl~~llWL(T^RiR!vaJJ`cKYvI)CD}<3>HK;A?3q zh<|z2mkmZjYlhP4IdVJC@H`*_a%A+j5c~$9BgbDMN2m zkkB>Si9H_FLU70Q`z&I5t+f>C}pyOR2mV7&0!FzC2*V$dQNngl_Q%N ztECGlSimwxU9SR>rXc$cVE=(JDfxO8w4!u8=(ELhQl>(#_ynWd$gQ=R4W^%2%7*g}KQ=chVxzC+IyK0c!)Cb96Ec za7kFKDnG4G2BJ{|P${|0KLHQ2Fn|LjQak{0k}BwWT)YFK2VihHGHy#SL=mOTZ-E0h zk;6pJ%Jb94U^2)p5;D(;ppisu9wm@yxFL3t6{$B&fe$wZ5{Z7&A$R1xF2?{M3+fdI z5aQQ@9YF<-;ht0=&-d66SVYck2&>LNy1`vwKmVqZsLF#Kq<=LTe^qiCxyjBTkvd(f zAxqz)^BS%dZ;M!1Sx$5Qb(3WFx7zCABdg(Pm-Q0GkB_utHPXI*IqhZJ=H1}rylcj% zU97vDV8Nl=i|kXS_R5w^z67w< z+7D6O8Ap2%Y(=E?<=xX+ap5EwVx}b9y8(bxd%u#M=C46$R>Ut`E)<$uLf2QWlCeCc z9I(3$sDOr)rUH<~=nx=j&w>#FI(b?tHZ=XT=rAT56B84GhaMi;y2(Mo^=ew`wn^}m zg{N(*3A>6{q!9O=UQw zDE|eWmcQ=P`EzVi-~7G1ZQqGu`q2R_7#WS)pNjDyX_&wjk&tjBmutpwx&wCbfj)ArX1CsmgKo(3#+J06Rize=l=fa1-G0x}gpbL%s95O=g&QD^;0fNO6Dt)~V~!iRtxEw(DFF{Ibp&sL?Yz#2O)3-5sE&@gvX~4K_=^bf8*zi_z97 zfKVDKlOK*ZWKb;14=1@t=DYep=OgAW6tJH#RyI6{Y~NoRi#Jr)fUtP}vuTal1_|b7 z6zKE&7)yX`4Nl_)&a>2#nEw8L-Ox>h+2P;T2$2HSl3VZ~o4eaqssB zJMRUXi*Y$@#qzp6NtM*GT5N7YFxt#(jH&(8oPE>WfW_x5akjhmpbrDpZfW$&3~!(UW!2;7QvacW;Rt@q*8(PUA~I8l0QDjq_bI z-qz#9JmLJH{>JUTk2}}$^yTiFg02KeEc{A}*5dMCQYu&xRDvuov=kuX-B5}UT0Ma7 zBT8Ba%{y5aJ`i7F)OZ$Z(7c z9rw_L3nA1T&k;W>vs{bqr+?@M=X5XUWa^lf#S{5!)0z(`syD4myfL-NdIMa!OdzY_ zhEx+~6(x@yNo2Hr|6SO&ZMSmlmhR&ITxk=DlUQ^bcXFmu3j~sHAWR5J2mm>fZnC)u zpQS|WdNZ)IBMOSO8+2UPVqllMP46bAfU5w6%1JiUiJXAHkAFKChQ}k9DhvbcH;4@C zXRgr#Ra-IG>-6^OApCCa^>IHaa+3|t2STyv!VM7j5%eU9E_HT}16)%wfCwvb2J)09 zdTaHI(nkrHOG~Iwr4}7L(NQ{=K92>*Z7OZd`*mg$EZd3^2J?F{Wv<$s{A95?+7j+z zK}>#B6wo=6(0`&zSSal6F1fzWeR2qpEy&QRw5C$GEEwt{!TD6erl^x&VHYmKv48Up z0&xfFuW5msT+P`|?}2aofJSw1wyNMIMdM1C09tCp5P~*(RXuzF;AyIT`(*Tku(EO2 z1Qp3bXAd;^uCcUL#^_cT!H;7IR2|LWM^XX&0d|(w>MEEDv_OpL&RDm$TPS??AB;AY zJ%i;$_6bTMC#z5EB8`Ldld@$V#tu}Johq?%Z>x+Ie^jg%#XBp*$C|^oj5H0W-oU@w zy{p=Pa+akYtdHe`ohl=L-!$Y>u7~sqJ=9^UwM^fMlMr|~&B)x;R@Ig3`#+Sg?!iZe z)Fh@xxLol+`$e`$wS&$V}3nCT|Xiiiy3_X2uvQoG3)IzhJwb3{ zN!a%~z;psZ(+Jw7edetE>64Zxat&|;a>woxa*_8=MX4@_bq=;cWo)1nn1ot+h#LZP zk!KWjuo1ND&sD~|pI0T94<1CNqd;!pTvl0kT90==5U2o`fpn_8XmsQ1_Xb+FX&_d> zETC-f4+&JouB9c47>MYPJxDiUT2Q8zxmk>IHKkhU$Q{vsS-Ozy!8) zv(zzdFZ5Lk1&#`U;B>(;!o*z?d`y3{P+So7)TTn6&_F&AzzmSzJ7zWa-YS74U*+!G ziIkoj^dx6@)_mS%(--}EVm`6M3_v2`?=OJ^>=qQ_l6rU0eWZGVD6X*Cx`XB`bZb+r zRcBXnzF^nSjD|`we2>mJv1O&ICZ0 zHBhqs{cTFn2j@H@%%lkq4vC!qRNivABDqq0&=Kt1wS6BcgO#ac5p0o}af>KO#*!ek zAiUQ==H(V$`)=Zv4GWSb2cR#dVJpL`8*zYg#oaTAhyrJ!E|8&dU9SQ~A&;U9`t(L; zl%NAeC)QIz>_|OOv!EdYT>z9hBBg9WSxa@CR7cy5REt2|SsO6Eps;l>-oG^dIM)t( zUyB|KJmA2n7#cj%x|5g7QRYgd0qm%YM*a4Mr2egX%0$?Rn* z`V5FDKZUzU1`uk}Vwix$oEIl(V0I zZ$zK#z_lsi_-~8;;qH8#t|#C2J_2r2Qao3oJ+#NH!PU%X#$D7mdr+KV!1)$lBObi2 zXxKZg;M(BFMPrZ|5mI5&QxmChXrmj-ypxf}j~C#_JNX=18Pa9ke<@QD7sCa@WIE^kGv1l0$O|!brBb4?TC?TE4AtUi zuYoq;{Px8VjMqNO$z*_a@b)hhI|a9E@Xi{h_2J7SnIgFdy*YQVCWmSdf3%ZYyj82B zp?J6N_12Kn(7b+SV1d5=%dCUhvVEjc9^nV4M&RFS9cvZa!E}8(R1lflU{y1%Fs*Xw z&IXeA^t&hZ8=XS`dHN*MbTQL&VF)t(+l=ke+svtE`6`-CserQ{nz~bLMkrK1vXoS% z_I$xzwNMSeuKutMJIaBz?qmBse;fr+$^9Rbuv^d7qMKDvJ%Y7v7Ew34IOi$A=GEKs z)UgIseNvr$ISjJ5qaU5g?7ZTd^iQQmhR3IV?%te3ofcbK{UzYVd^+{GVdZ`!S*+3) zf%o_PnQ)oq}7atCdE#m=RoB!wdy)SHT?lW!y)kw7vkM_ZbSwR^?wK619>`$tj^kkBM8r?SXp+cxl;YGr)kOf&`O=ITi z0~WwDI51i)IUZh4%+IXTC_tLInpvYSm+fK}x4s4f#ztp&$kGNeGS8|7{$cHi+qi84_=t;w#wM2Us4X8&7z0&^H*H2WX>rNk3)veD%(t#DVH(7UA0jFpx7YF%!KEvr=iu+X}7ZqoJ$JutY6~EhJQObzn8;NRU)wWVPd z+Aa-kB5-tWd-!U+@%`YMH*nM?f<&UG?l#&NyTj?CoGvBRz;Sq=XjRRxbZG0PJpb$N zyc6TWB=vc1t9q_x>bAX@Er_hsRSUC{Qntryo5ZS(rO^$VOhe1ut3!rL+JYX_)@LJc zSLgDe(l_dRdy5QxSg5@p=i+SWN6+C5EetZL2+auNf0| z>tzKxD2(Odxt+a}e5SDaN@9WEelIq-W8ijtcM8M$M&%_9fRrdifF>wS`&>{avEDTq zGZLi|yOk8!0oWl-(}m0k@niAIjUEU@2i+9gf-X3Ti?Mq>1c+1(2N*$;zK|U-nyU2}7k$ z<_W8+7MtU4l>ebiDz2N!zGG_1hXtSD)WPUuZVP;@RH9DXVFC8NMWBT=f;JoXj&2~d zBE=8NY%j2O-0`#syFK4n7}>w>VRzMImgN^n&M3#9IoJfu!y{&BYQ!3cx3(W;tQqy{ z>XYFw8p#LNm!VroMi2+g4Ed^x&Jmb=mfQun#_du{{m!yRx10uE!LZi{fu=Fx7& z(Eel*T<3rvKmZL+1Cr2+?_ew!{y49_ksP-IITSVtNCKXX@%cvAll+DP*(nJ=96k1b znz*Q#Q#70dRzy^%KNlK{W*b@Eud}mu>uu(WjF>UdJlj$DA1pvvF?Ti_(R9o@p{28) z@+WR?$E=T?C%8ay>ZREve3KS=&QXF|*&ZxmN?q}0c7SRb}wc0<`39AeQGXY(B%To+U1A26rJEnZH&k^kgyv@|F zxK96A&exxT*a`xi2_W~$k@@*?zDbhYR;ybC}O$0r|?FN5(M;Kx{6{)0N@i6-U z!3&ID%&FHz<^B?jHm30-t*G!J>2opf7(#qwMU`L0b&z2O!%cAc1g2Me%Wpob~^GHaGI4}ysBgfBjGvPxYz<r`G!CYNBtp1VjW~tX zOt}%bA_I2$NpqxhXiC2%G8G&f%o;{JQlGpc-)%2RuD`+teC@&)b;0WvHW4^2iz6V= zDmU~HBqQs>Nw+8<+4v(h!eoD=%dY0NOtf5l0HU-bh(HQ+1XXW4wAxy#(4CJoY=8vt zNF)iKqO00)Yfw5fnBez|E+_P*>>M!D~sj(Xq*OsjsofT+S zF72^7wdKcL88m3-Hd{M;xZXcGF!E-q$<{6a**!#afJq+j|IA2-%f-BEOo|>WLi>)F zC!J(XCNwn1rq9GmUA+QCYl1MD)y_u$@7nB`pRp;2Hp*Tf#5ToKruT6WTp<$UJ)c?k z4B>Zdmq#8e3hcH+V78H}TzU zl;&S;zn0#x5m;_VnyCu^{mOf`<0_36b%2RvMp`v$sEF0P z1hIuK+e0J2o8pUpvmqa`?FK*~@6mRcslv-gk$rty^na;^dJf=Wyk@rj}$A? zn0XBZ*#>ItA-vl@yeDwiL`$>tY0ItwWFlE16Gf)r&2e%Ld?Q+)9#3SCY^(C4>+7RJ zEvn$uFifNNpdS74qY(qKRq;d?v&Y=XA~4kMs6I`jA7Z#6#toXIg@fp>S7vnp<&UUi(({4S3gQkhi8_xG+#{XFUS8 zJ{o_jX-g`Tp+e2LeyRa3&e6Gf5IQXku8TkidWT{D9TvrFT8)%8uK;){U)adLy@3;O z>=g&@Zcj|a3RGD>51p!yTba>35ph+{@w6T9)`!P}4B6HoesEq+=*3d*MLY&s@n zxvLeE$Fc}d6l4V{;ACv)`f*Y)nc3z7zsy*1R^0-)NAg0H&9L-(!{#B`B31t}1Bd67R-QU3YD}^G`Kh*~j+{C*= z^vSXu2XMnl40jeOon|0Do}t_NE2Rrk<~moV))pjrdTBxM5ax+1aa*TZyE-ND3;H1@%@N|l4hQK@fSNFgq1@*zP!p__$!z2XP z&SNj}h+p2laZyYrs3c1INxe+JsM;L9^8I60IEMCAL3z_IC`=DJTeEbh&ob+ujj1Xc5L5Y-*P0Y1l|y%78xpXJQ(v zvrv}stFC(9Y+KV~*h-s%8!D^%-s6#5>*-NGC#vBn;FL}H4tI$3P(8*-AmeIuHLCR+XBQ{gmy0v@MJb2*DFXuj=R;Zf8;)Kdc=w97zBSs5y48gceYqxv zZiTHYII9gkQ&kFLJ9<+@TKc?De`~S&&6Cc1ntj+}@cmL`uLAzEjUSy{OISz{`{kjI+y^PS^qqMoYuC#|mtDW(BRGICC)3lMY=7YgXj(dsFNR@aT>^8p*riZmM% z5l;{NZ>3plPYT|Jk}Cg+weBop(Zt`sk#ITL!1@wU4zk?^cm+~FMDgK_cT*=iQ9$=YYRAs-y;`VzNL;p6~ zs57kUVsq?$puU!CXTzA-ZzKD*S;_a(2lB+WhRB|%Hfh~BVMKp*SPb35%?)1r%d*W$ z3|$|*8&>A1>2N)}KOL1CmV*2mAH8SlsrQ^_8fjo5QcOC`W!_zc#L{Y(pN~Wg-tL|M zL*CtbZn*`QZD`+xW^ILIc)nA~5+Dnw$;n=GO}%PyJ_v3}J!L9jxrdPTSPGEb>i5}C z{YzF7)mf8+Ovd`DMX8Gu&c{KH%4fBnc5cN8boT1uZrsJi(F9i}@_0r>_))d9 zg&Vdwn5G1leNNZO7{~7Z;QRLEt6o9vTbF;7l{-T&I}DXIHGkiYLj!arb?XT`#bn>_ zgikf?I|7qHoandN+1d9?=H^VkD)E!rOXobk98hfUUvJL7x;(bZbX{%1y++QHjcEkj zRCO=?fLFg^w$eyh4$#~B_N9fF_`E!L41aIu!5*{O6A|NZ*wuS017hX4wu*b~pJvZ} z)|{y>q_6nL6wAFERsR|OZYx-AXaL;cIjwc2FvD)WK?DUQJZ<|RAh@2R$bzTC@~PF=Z&i+B4syt`fG@#%u5IEX%p_hAX{r6wse|dgp!7M+`GT|UqEhMgRp@iQ^R7|@?c4{Nu`PcCEe5>5 z2`9KG(@~>dWbb5V`+a1(5YOecpnQ}S@Jg2E-V6J?FChdVTAVHY%g*F;J_#Od+bXOWd?fCAxs@ZT*cD1f#Gs`rS-TRvG*hML5==AYjJis;&T~ z`EotdxP0G(UOVWy$%s{!PA=X4>;p)Q9AJUl$Q{QO)4!Z-#BqK-`iAH67hFCBT0x0T z8?^chp!(<(g3HTG1(4_D&P1?QyAp;?kRx!7$QPAA&(oUMHv3%}K6cm1=JC1eZc@rc zoY$d-JJFTa zp0)0=(#BVrC-SSg@KIR4m-(%_gr=z|T~N1$@=A)qWXlT22$jznFXv*!ApXYcd2R*Q zzyqlGN`zUmJ)|cVTrlDLlU46MgI3@-R<|u_-G8~8NlhCVP}T5&NCbs)0Pvi?0)Fld z;q$t{XVNz&N8qMH(ja!DzUa3P_glCNDZ{WMI`1AOLVZo92o9MOWBnv;avLRT4Pdn= zi*MFTn#`oshcva1cf74s`6_1<+hL2Zr>Gp9T{b0s_ts{vEo(Qn`z(Ftm%gDkQ$wZC zt#K-D`D7&u6>L8>C7lcHNro?~bz$>5`@6S1pADYraAhq^O8UR=5$GVAG;;PmCh*FJ zd1;E`N^to^&@sUmN65LOdws6jn6Hf(Mz*~0Aulw+27F%J(ydvLk6Xq+1e74p9-)Jh z!IfTqk)47_bRGmq$>Np-oU_Y?q?uIv`_50-JJ4T;>hG4L+UHes)<<;pr|Ql)^t2B zb_DCS^`^TSLcR*R;@nGXiz%Jyy5s0frT=&fK-xIsNYGGUkA!2es`nY3Rt z7rP-$gqEp5oR~9Uth&QNHJ+)q(saMV26LCHDi6qx0qF{z;jhjg_`=H#$0KqaP7xF6 z`vsAH0)Ap*%hBh#6{()uV=F*QXxr9oNT2x zA{+Xu@%A%WtF{*`Lf|)`g8069KJ~wtLJ23G4SuESQ4XcT32++tGc zw`yD4`oBXk-MU6GYmKLr*Pgi#j=tHSPb}ehsS@U}-0G0(ejRVUeO}V_AyuB->s!)o zY~-7*hb2!R-06v?lem-nr3vp5m0j6z#&e?8gv7)W5boeT_qh*T<)%GVyF7TQK;_kf&T$-7I1Z^Gi;=|Tc=Su=B!DW+}Mi`Vh4I{z(t7GQH@^g@+KA< zzfJ>nvv@k=5-*$^Pz}$uu+D?ME9{#!2v-6N4)7|3;}VQO4cQBW5>$M*Mwu^0WpBr- zVWpn@_WG5%8Q6khHJ@qLJo0;@^=4U~Tey_JM{aqBMU$ocGlftJ_bV@Vx96X* z4^CRs2{9q`)4kPpD-o5HX5ax8x8-(YxLRP<@(jZ)Yv!fkL-*=m5)Dq)xx7a+!onLi z>V$q0&T0-88l=u?(q4WDOF8+N_TS%i{hl`MW7h&p_ z3Kb8==f|BIY+#cOATfli`2O@topdbJvMqHVR2A+1tMjugU-)vRk_7&?ve(5*1o&Ax zV%pX08+kW$;$lT8B6|}U_iZ1KZ)`LeduC=)auz;rsinrT#>E^pWJbou z;0~+W&(%TfTrvlv4n+#c;IL~8w`9M`(a~dItrj5I$p~(gb^hw;FJ6JEL(_U=&Z?E{;6CY@JEyy8qcy~T8y2OHJm! zPJsE&-CpCH(}NtX^URBj*;>v<#Iy9JL)PKh^IzS^_1$+depNWCB`;i%I=A|ZzFvj~ zL4J^-PgaY+h!-IXi0T?`*cmt)RJ9!^-f`1}h=1{V@mL6Dt!pcJli2BHoEd5K{>*frQ>Nw)Aos2qcw#yyA)25r)WVM3nW%0%q zo^;8RB>1|D4tX>aZ^th1aRwiBzASYixBbYwvbQC>i~)OEf9@+Q-QK2~oyF_l#Hn=B zZ&rFyS>u^rx zZp4tTi>~B!Gm9*VmI(B5b8BD(=*tD-e6lm!whkUhri_bB88#Bs6hG=ekPE*|f7*X^ z>->sbm@esJn^kWX@@H~eF3Lew^R>K8H?nya;<4rd>r~y{pUytxjyRsnFjDELED*^xvl1{T zqsmc^*pSOOP&16k^65Ij=yJXeGE{-g1y39a6rvNFFG9iuvsRGb4*AMfJ2R{E+?UPv z{AIVuIqE915BYrArgz=kRtFtvYM80&46I689+ErFwPF&XM{|Wj(00X?2hz|5Wp-Ve zRPL&d#j8*WnVWww!tpS z5vC5sClY)!o{S)xt9xnb$sXE&F!L1iJvm1c@>x2&14?h4H>H=@2fRR%NyMb#46CEt zxXD)Ou9DN${Dt*UW_9(Ti;8BX&3kbGUCm8ZSla(Fd!_ZG_z!VZ{-YUo;jZx}c>Ca) z8PVv=t-K~zrvWNL)sXYei<2=5%Y+|K?bzgbg~DsK6FF+3hMvTl7g=?aF&um|n@R zqx;sV5*!Q4?IFIBEgZ&}byZc&50#sf4iE_J6}Wk7sotH!rxu7<RM10^1i3E;hjPJoE;G9tPI6dHY4Nw}%aUKfmgHs*C2f&Pm33tfTm{yqe@TADwi^XrN?1ZVCenPN!hQi25!J*M;P zifZb}p)>Al{-M9g)duC%(Nd#PXX$(z5hRWJfe++UwKr`lPt%sH-V51y)+MyE8y#h) zP+MPL-_@m4V@0AoHPsqft*s0amT)_;-OhS+P`J#@%pj(HuC^@xj2hgtg1$w%{-CDi zbW~atWj3$eW0=I$IXO`-L@^WO_#ut-f$cPG%8$v(a97us_+|^aMI5{QOpb}MbHJU< zb9sn=pd%yc*SW8u&TUY+vnD^dI^D~!7m{l^0KaU!9)kpYW6dsCNJFOMQ``bJc4&x-Z?L56r zOC9z%=pTEF05$22rit`K*L^O%&BmipnBnf~GN|U5$1%A` z@9m!3UT;IeX`NbAYv=auh@STkz2tVIdhi*bpv zDT2uSg5pbbf)Gd?QhN1o)c5k%qGh^uo-5 zm{nd#?W(6rcF)~yKotZMxPzFZ)11_jygXN6;k`n~T7lRP$<4$`3gv!ln+I!pE8i2C zrLXGgH%Ls9j>Z6Z04zDXAdDNCqZ=A?3Pl}5@s9WUZEV!Ov_o#6)G(1$?WfP8|6K^! zd5`hns-YeinzG<2BOR@DCDlMnbgXVhJ>TaomvaTo1GgDNTEGj$v*>XG77G9sQ~S@! zRB>vspY!5Q+@dvh&LS4UO#{Cdc1`B`di$-&vZ;#AEBXN$9x zP#Q{bD@E+y&9&(6wB}idY$nmQAvg}f$1UW-YvWc4^0jZOx#sd`w|Rl21CQ)(J|+H# z8e7kSHZn;YZG``8hXx+NSVo5>bnRm-xPVQjt*s4iVhr{Y2A7N%*Kg;eUmBRa_efY3 z4JAquwNdtTZ>MmUwI$oOt=W$TadxCB5k~F`ypX!!s#F1|T{Un?>E=&8WS%@MzXtAH z?R#IuYHli`+!S8CZ9-YHXPHOJRo_sZClGDCQIF3i#L`@6d(LrV`wl{4!7C`k2PLrT zKQ47Nf)LAq$+(MxEU5arMlWsZ5DW6zRQYCiB`d7?&f7Y^-Ra`|?MR)!PC^{V^5+F) z&K?`;UR60YHfGk=x6|Fi2y_M^j*W{bgP5fBMBj{~yvWB%?9Xz%MX>FV*n|<^GD^)RMUH@ILrcfA&~ZIpfhS849meZ?3zYQ(9{U?Z3mc9 zfN~=$+K$eGQAh)$XK`Ila|LsBoh-I_HA~UY|BXInays@t}=FTez(|g)~g6ty%M^I{<7cKZ+u`@yY#mJ|2x7?c7Pqx z5~(qsNV*J*4VtkoeX&EC4_iN)UAeW{Dch(C+l0^cRL?61+pxi&{QG@B0F8F4eGy$z zKUfsH3=l0Wt~2V*eeOKTV^e_JuA%C##+Ae3H1dPE8DEI{rUgg;lG;;=5H9GxBHq<++%U#&evfsf|P44=k z`;JY;R-mH%0!T?@R8h3D}Lb zCFqG*l=8UOfa9unGvaLiSxya==lhLm1Azibr<(ozFcV|2Y6F@aK)r?jyM)nym(Vphku>Aji48>mjyQYw@A>e@e_wxh)+rP^M zfv{6Q0UH;5-b#Sq{rA%o0M_o`4+;K*|A!y@1l~swGWvgg$D8DT^QZq;|IYt@?En3< z{cp|wTQkr|{eOOj{I}2k+h_mnGjs|4Un$A|y-xqVPXGUXo#GylgI@HGoRrwRPyZVY u>VI#;TZvmRJEBlMTXkrd2NS)r7!Z0LI$@7sWjFM(BPA{;R{Z|+_x}Uo4q2f9 literal 0 HcmV?d00001 From b5926d10c824d1b24b0484df275ebc498cffd61e Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:58:08 +0200 Subject: [PATCH 13/28] fix: remove hardcoded API key from integration tests Require SGAI_API_KEY env var instead of fallback Co-Authored-By: Claude Opus 4.5 --- tests/integration.spec.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration.spec.ts b/tests/integration.spec.ts index 405ace5..5380eb9 100644 --- a/tests/integration.spec.ts +++ b/tests/integration.spec.ts @@ -1,7 +1,8 @@ import { describe, expect, test } from "bun:test"; import { crawl, extract, getCredits, history, scrape, search } from "../src/index.js"; -const API_KEY = process.env.SGAI_API_KEY || "sgai-669918e5-55be-4752-a684-f6da788d1384"; +const API_KEY = process.env.SGAI_API_KEY; +if (!API_KEY) throw new Error("SGAI_API_KEY env var required for integration tests"); describe("integration", () => { test("getCredits", async () => { From 46bed0e88d53c1ad87cf89152b0dfb51929f3406 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:58:47 +0200 Subject: [PATCH 14/28] fix: add node types to tsconfig Co-Authored-By: Claude Opus 4.5 --- tsconfig.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tsconfig.json b/tsconfig.json index ab488c9..234c173 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -3,6 +3,7 @@ "target": "ES2024", "module": "nodenext", "moduleResolution": "nodenext", + "types": ["node"], "strict": true, "declaration": true, "declarationMap": true, From 276713be2ff291ac1efdafe6a7611fee8977ae58 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 14:59:52 +0200 Subject: [PATCH 15/28] chore: remove .claude and CLAUDE.md, add to gitignore Co-Authored-By: Claude Opus 4.5 --- .claude/commands/gh-pr.md | 2 - .claude/rules/coding-style.md | 714 ---------------------------------- .claude/rules/git-style.md | 29 -- .claude/skills/gh-pr.md | 187 --------- .gitignore | 3 +- CLAUDE.md | 15 - 6 files changed, 2 insertions(+), 948 deletions(-) delete mode 100644 .claude/commands/gh-pr.md delete mode 100644 .claude/rules/coding-style.md delete mode 100644 .claude/rules/git-style.md delete mode 100644 .claude/skills/gh-pr.md delete mode 100644 CLAUDE.md diff --git a/.claude/commands/gh-pr.md b/.claude/commands/gh-pr.md deleted file mode 100644 index 8d95e37..0000000 --- a/.claude/commands/gh-pr.md +++ /dev/null @@ -1,2 +0,0 @@ -Create a PR on github with an accurate description following our naming convention for the current changes. $ARGUMENTS - diff --git a/.claude/rules/coding-style.md b/.claude/rules/coding-style.md deleted file mode 100644 index 364aa43..0000000 --- a/.claude/rules/coding-style.md +++ /dev/null @@ -1,714 +0,0 @@ -# Coding Style Guide - -Universal TypeScript patterns. Framework-specific rules live in `api.md` and `web.md`. - ---- - -## 1. File Organization - -Every piece of code has exactly one home. No exceptions. Framework-specific layouts in `api.md` and `web.md`. - -### Placement Rules (Non-Negotiable) - -| What | Where | Rule | -|---|---|---| -| Hand-written types, interfaces, type aliases | `types/index.ts` | Types NEVER live in `lib/`, `app/`, hooks, components (error classes in `lib/errors.ts` are code, not types) | -| ORM-inferred types (`$inferSelect`, `$inferInsert`) | Co-located with schema in `db/schema.ts` | The ONE exception — generated from schema, not hand-written | -| Constants (maps, sets, numbers, config) | `lib/constants.ts` (frontend) or on the module namespace (backend, e.g. `redis.KEYS`). Cross-cutting backend constants with no domain home → `lib/constants.ts` | No functions in constants files | -| Utility functions | `lib/utils.ts` | Pure helpers only — no DB, no API calls, no side effects | -| Zod schemas | `lib/schemas.ts` (frontend) or `routes/*/schemas.ts` (backend) | Never inline schemas in route handlers or components | -| DB queries | `db/*.ts` | Drizzle typed builders, no raw SQL for WHERE/ORDER | - -**The moment a second file needs a type, constant, or schema — move it to its canonical home.** - -**All hand-written types go in `types/index.ts`.** Three exceptions: - -1. **ORM-inferred types** (`$inferSelect`, `$inferInsert`) stay in `db/schema.ts` -2. **Schema-inferred types** (`z.infer`) can stay co-located when used by a single file. When a second file needs it, move to `types/index.ts` -3. **Component-local props interfaces** can stay in a component file if used only within that component - -**Scaling**: Start with `types/index.ts`. Past ~50 types, split by domain (`types/api.ts`, `types/dashboard.ts`) with barrel re-exports. - -### Monorepo (shared package) - -When both apps need the same types, schemas, or DB definitions, extract into a shared package with subpath imports (`@myapp/shared/db`, `@myapp/shared/types`, etc.). Never barrel-import from the package root. - ---- - -## 2. Module Export & Import Patterns - -### Multi-Function Modules - -Two patterns — both result in `module.verb()` at the call site: - -**Pattern A: Default namespace object** (domain modules with shared constants) - -```ts -// lib/redis.ts -function get(key: string) { ... } -function set(key: string, value: unknown, ttl?: number) { ... } -const KEYS = { html: (hash: string) => `cache:html:${hash}` } - -export default { get, set, KEYS } - -// consumer -import redis from "@/lib/redis" -redis.get(redis.KEYS.html(hash)) -``` - -**Pattern B: Named exports with star import** (when you need to export types alongside functions) - -```ts -// lib/email.ts -export function sendVerification(to: string, url: string) { ... } -export function sendPasswordReset(to: string, url: string) { ... } - -// consumer -import * as email from "@/lib/email" -email.sendVerification(to, url) -``` - -Pick one per module, don't mix default + named exports. **Namespace is mandatory for domain modules** — bare function names are ambiguous: - -```ts -// BANNED -import { sendVerification } from "@/lib/email" -sendVerification(to, url) - -// REQUIRED -import * as email from "@/lib/email" -email.sendVerification(to, url) -``` - -**When named imports are OK** (no namespace needed): - -- Error classes: `import { HttpError } from "@/lib/errors"` -- Singletons/instances: `import { stripe } from "@/lib/stripe"` -- Grab-bag utility modules (`utils.ts`): functions are already unambiguous — `slugify()`, `cn()`, `formatDate()` don't need a `utils.` prefix -- Server actions consumed by Next.js `action={fn}` prop (needs bare reference) -- Co-located same-directory imports (`import { createTaskSchema } from "./schemas"`) — no namespace stuttering -- Re-exports from barrel files (`types/index.ts`) - -### Single-Function Modules - -Default export. Import alias matches the module domain or the function name. Don't repeat file context — `send` in `email.ts`, not `sendEmail`. - -```ts -// lib/email.ts -export default async function send(to: string, subject: string) { ... } - -// consumer -import send from "@/lib/email" -send(to, "Welcome") -``` - -### Nested Namespace Objects - -When a module has multiple related sub-domains, group them as nested objects. The call site reads like `module.subdomain.verb()`. - -```ts -// lib/payment.ts -const customer = { - async create(email: string): Promise { ... }, - async get(id: string): Promise { ... }, -} - -const charge = { - async create(input: ChargeInput): Promise { ... }, - async refund(chargeId: string): Promise { ... }, -} - -const webhook = { - verify(payload: string, signature: string): boolean { ... }, - async process(event: WebhookEvent): Promise { ... }, -} - -export default { customer, charge, webhook } - -// consumer -import payment from "@/lib/payment" -payment.customer.create(email) -payment.charge.refund(chargeId) -payment.webhook.verify(payload, sig) -``` - -**When to nest**: the module covers a single domain but has distinct sub-concerns (customers, charges, webhooks). Without nesting you'd get flat functions like `createCustomer`, `getCustomer`, `createCharge`, `refundCharge`, `verifyWebhook` — zero structure, zero discoverability. - -```ts -// BANNED — flat loose functions with prefixes to disambiguate -export function createCustomer(email: string) { ... } -export function getCustomer(id: string) { ... } -export function createCharge(input: ChargeInput) { ... } -export function refundCharge(chargeId: string) { ... } -export function verifyWebhook(payload: string, sig: string) { ... } - -// REQUIRED — nested namespaces -export default { customer, charge, webhook } -``` - -**Rule of thumb**: if you're prefixing function names to disambiguate (`create*`, `get*`, `verify*`) — you need sub-objects instead. - -### Decision Matrix - -| Module exports | Export style | Import style | Call site | -|---|---|---|---| -| Multiple functions | `export default { fn1, fn2 }` or named exports | `import mod from` or `import * as mod from` | `mod.fn1()` | -| Single function (lib) | `export default function name()` | `import name from` | `name()` | -| React component | `export function Component()` | `import { Component } from` | `` | -| Single instance | `export const thing = ...` | `import { thing } from` | `thing.method()` | -| Types only | `export type / export interface` | `import type { T } from` | — | -| Error classes | `export class FooError` | `import { FooError } from` | `instanceof FooError` | -| Library integration | Semantic export (`*` or full object) | `import * as name from` | `name.method()` | - ---- - -## 3. Naming - -### API Fields (camelCase — Non-Negotiable) - -All JSON over the wire — request schemas, response bodies, SSE event payloads, webhook payloads — uses **camelCase**. Matches Drizzle ORM convention so `c.json(row)` works with no mapping. No snake_case anywhere in the API contract. - -### Files - -| Type | Convention | Examples | -|---|---|---| -| Modules | `kebab-case.ts` | `rate-limit.ts`, `auth-client.ts` | -| Components | `kebab-case.tsx` | `nav-bar.tsx`, `pricing-card.tsx` | -| Hooks | `use-*.ts` | `use-oauth.ts`, `use-debounce.ts` | -| Types | `index.ts` in `types/` | One file, all types | -| Tests | `*.test.ts` | `credits.test.ts` | - -### Functions - -**Never repeat the module name in the function name.** `module.verb()`. - -```ts -// BANNED // REQUIRED -tokens.countTokens() tokens.count() -email.sendVerificationEmail() email.sendVerification() -cache.getCacheEntry() cache.get() -``` - -### Types - -Every type name must make sense in isolation. - -| Layer | Pattern | Examples | -|---|---|---| -| DB rows (read) | `*Select` | `UserSelect`, `OrderSelect` | -| DB rows (write) | `*Insert` | `UserInsert`, `OrderInsert` | -| API requests | `Api*Request` | `ApiCreateOrderRequest` | -| API responses | `Api*Response` | `ApiOrderResponse` | -| Discriminated entries | `Api*Entry` | `ApiHistoryEntry` | -| Paginated wrappers | `ApiPageResponse` | `ApiPageResponse` aliased as `ApiHistoryPage` | -| UI/domain types | Domain prefix | `DashboardProps`, `StripeInvoice` | -| Config objects | `*Config` / `*Options` | `FetchConfig`, `RetryOptions` | -| Generic utilities | No prefix | `ActionResponse`, `ApiPageResponse` | - -**DB types come from ORM schema inference** — never hand-roll interfaces. Use `Pick<>` / `Omit<>` to derive subsets. - -```ts -// BANNED -interface User { id: string; email: string; name: string } - -// REQUIRED -import type { UserSelect } from "@sgai/shared/db" -type UserSummary = Pick -``` - ---- - -## 4. Code Patterns - -### Early Returns - -Flip the condition, return early, keep the happy path flat. - -```ts -// BANNED // REQUIRED -if (user) { if (!user) return null - if (user.isActive) { if (!user.isActive) return null - if (user.hasPermission) { if (!user.hasPermission) return null - return doThing(user) return doThing(user) - } - } -} -return null -``` - -### Resolve Pattern (Kill Duplicate Paths) - -### Helper Bloat (Banned) - -Do not stack tiny helpers that only rename, normalize, or forward data once. - -```ts -// BANNED -function normalizeMonitorDiffs(diffs?: Partial): ApiMonitorDiffs { - return { - markdown: diffs?.markdown ?? [], - json: diffs?.json ?? [], - } -} - -function countMonitorDiffs(diffs?: Partial): number { - const normalized = normalizeMonitorDiffs(diffs) - return normalized.markdown.length + normalized.json.length -} - -// REQUIRED -function countMonitorDiffs(diffs?: Partial): number { - return (diffs?.markdown?.length ?? 0) + (diffs?.json?.length ?? 0) -} -``` - -Rules: - -- If a helper is called once, inline it unless it removes real complexity -- If a helper only adds defaults, rename indirection, or one property shuffle, inline it -- Do not create `normalize*`, `build*`, `create*`, `to*` wrappers unless they hide real branching or repeated logic -- A helper must pay rent: repeated use, meaningful branching, or domain logic worth naming - -When branching logic feeds into the same response, extract a `resolve` function returning a unified shape. - -```ts -async function resolve(url: string) { - const cached = await cache.get(url) - if (cached) return { content: cached, provider: "cache", cached: true } - const result = await fetcher.fetch(url) - await cache.set(url, result.content) - return { content: result.content, provider: result.provider, cached: false } -} -``` - -### No Wrapper Abstractions - -Keep modules generic. Call sites are explicit about keys. - -```ts -// BANNED: redis.getHtml(hash) -// REQUIRED: redis.get(redis.KEYS.html(hash)) -``` - ---- - -## 5. Functions - -Small functions, small names, one thing. But don't abstract two obvious lines. - -Refactor into a function when: -1. **Readability** — the function name explains a non-obvious implementation -2. **Redundancy** — the same logic appears in 3+ places - -Three similar lines in one file is better than a premature abstraction. - -**No over-engineering**: No factory-of-factories, no abstractions used < 3 times, no config objects for things that could be arguments, no feature flags for hypothetical futures. - -**Component-local state/event logic stays inline**: If a reducer/helper exists only to support one component or one `useEvent(...)` handler, keep it inside the handler or component body. Do not extract tiny `isXEvent`, `applyXEvent`, `upsertX`, or `countX` helpers unless the exact logic is reused in 3+ places or the extracted name removes real complexity. - -**Use clear verbs for mutations**: If a function changes state, name it like an action: `complete`, `pause`, `resume`, `flush`, `setStatus`. Do not hide writes behind vague names like `done`, `handle`, `process`, `finalize`, or enterprise sludge like `finishPendingJob` when a plain verb says the same thing. - -**Reads and writes must be obvious from the name**: Read-only functions use `get*`/`list*`/`find*`. Mutating functions use a verb. Never make a name sound like a read when it writes, and never split one simple state transition across multiple vaguely named helpers. - -**Do not re-declare existing shared shapes**: If an event, API payload, or domain object already has a shared type, import it and narrow it with `Extract<>`, indexed access, or helpers from the shared type. Do not hand-write local duplicates of existing contracts. - -Do: -```ts -let event: Extract -``` - -Don't: -```ts -let event: - | { type: "crawl.page.failed"; crawlId: string; page: ApiCrawlPage; error: string } - | { type: "crawl.page.skipped"; crawlId: string; page: ApiCrawlPage; reason: string } -``` - ---- - -## 6. Comments - -Code says "what" — comments say "why". Plain `//` with tag and `@Claude` annotation. - -**Tag format**: `// [TAG] @Claude ` - -| Tag | When to use | -|---|---| -| `[NOTE]` | Non-obvious logic — race conditions, ordering dependencies, cache invalidation | -| `[TODO]` | Known improvement or missing piece | -| `[BUG]` | Known bug or workaround for upstream issue | -| `[REFACTOR]` | Tech debt — works but should be restructured | - -```ts -// [NOTE] @Claude invalidate cache before DB write — stale reads on concurrent requests otherwise -await redis.del(redis.KEYS.task(taskId)) -await db.update(tasks).set({ status: "completed" }).where(eq(tasks.id, taskId)) - -// [BUG] @Claude Readability returns empty string for SPAs — fall back to raw HTML -if (!extracted.length) return raw -``` - -**`@Claude` is mandatory** — team standard for AI context attribution and auditability. - -### Strictly Forbidden - -- Comments restating what the code does -- Comments without a tag (`[NOTE]`, `[TODO]`, `[BUG]`, `[REFACTOR]`) -- Tagged comments without `@Claude` -- JSDoc on internal functions (types ARE the docs) -- Commented-out code (git has history) -- `@param` / `@returns` except on shared package public API - ---- - -## 7. Error Handling - -### Backend - -Define a base error class, extend per domain. Routes throw, middleware catches — no try/catch in route handlers. See `api.md` for error classes and framework wiring. - -### Frontend - -Server actions catch errors internally and return `{ data: null, error: "message" }`. Opposite of backend where you throw and let middleware catch. - -### ActionResponse (frontend only) - -Define once in `types/index.ts`. Used by **server actions and frontend code only** — API endpoints use HTTP status codes. - -```ts -export type ActionResponse = - | { data: T; error: null } - | { data: null; error: string } -``` - -- **Server Actions** (`"use server"`): Always return `ActionResponse`. -- **API Endpoints**: HTTP status codes + JSON body. Frontend callers wrap fetch results in `ActionResponse`. -- **Pure backend projects**: You don't need `ActionResponse` — it's a frontend contract. - ---- - -## 8. Types & Schemas - -### Discriminated Unions - -Use discriminated unions for polymorphic data. Pick a discriminator field and be consistent. - -```ts -interface OrderBase { id: string; createdAt: string; status: OrderStatus } - -interface PhysicalOrder extends OrderBase { - type: "physical" - shippingAddress: string -} - -interface DigitalOrder extends OrderBase { - type: "digital" - downloadUrl: string -} - -type Order = PhysicalOrder | DigitalOrder -``` - -### Event Types - -Discriminate by `type` field with **dotted namespace**: `{domain}.{resource}.{verb}`. - -**Standard verbs** (use these, nothing else): - -| Verb | Meaning | -|---|---| -| `started` | Operation began | -| `completed` | Operation finished successfully | -| `failed` | Operation errored | -| `detected` | Something was observed (e.g. change detected) | -| `paused` | Resource was paused/suspended | - -**Type naming**: `Api{Domain}Event` — no `Streaming` suffix. The transport (SSE, Redis pub/sub, webhook) is irrelevant to the type name. - -```ts -type ApiOrderEvent = - | { type: "order.payment.started" } - | { type: "order.payment.completed"; transactionId: string } - | { type: "order.result"; data: OrderResponse } - | { type: "order.failed"; error: string; code: string } -``` - -**Webhook payloads** use the same `type` strings as events but wrap richer data in a `data` field. Defined as a separate discriminated union (`ApiWebhookPayload`). - -```ts -type ApiWebhookPayload = - | { type: "order.change.detected"; data: { ... } } - | { type: "order.test"; data: { ... } } -``` - -### Provider Interfaces (Swappable Implementations) - -When multiple things do the same job, define an interface contract. Each implementation satisfies the interface — swap them without touching consumers. - -```ts -// types/index.ts -interface StorageProvider { - name: string - async upload(key: string, data: Buffer): Promise - async download(key: string): Promise - async delete(key: string): Promise -} - -// lib/storage/s3.ts -const s3: StorageProvider = { - name: "s3", - async upload(key, data) { ... }, - async download(key) { ... }, - async delete(key) { ... }, -} - -// lib/storage/local.ts -const local: StorageProvider = { - name: "local", - async upload(key, data) { ... }, - async download(key) { ... }, - async delete(key) { ... }, -} - -// consumer — doesn't care which provider -async function saveReport(storage: StorageProvider, report: Buffer) { - const url = await storage.upload("reports/latest.pdf", report) - ... -} -``` - -**When to use**: 2+ implementations with the same shape. Classic examples: -- Storage backends (S3 vs local filesystem vs GCS) -- Notification channels (email vs Slack vs Discord) -- Cache layers (Redis vs in-memory vs SQLite) -- Queue drivers (SQS vs RabbitMQ vs in-process) - -**When NOT to use**: one implementation with a hypothetical future second. YAGNI — inline it. Extract the interface when the second implementation actually exists. - -**Type the constant, not just the function signatures.** Annotating `const x: MyInterface = { ... }` catches mismatches at definition, not at the call site 3 files away. - -```ts -// BANNED — no contract, errors surface at call site -const s3 = { - name: "s3", - async upload(key: string, data: Buffer) { ... }, -} - -// REQUIRED — interface enforced at definition -const s3: StorageProvider = { - name: "s3", - async upload(key, data) { ... }, -} -``` - -### Zod Schemas - -Compose from small reusable sub-schemas. Infer types alongside. - -```ts -const paginationSchema = z.object({ - page: z.coerce.number().int().positive().default(1), - limit: z.coerce.number().int().positive().max(100).default(20), -}) - -const orderFilterSchema = paginationSchema.extend({ - status: z.enum(["pending", "completed", "failed"]).optional(), -}) - -type OrderFilter = z.infer -``` - -Schema-inferred types (`z.infer<...>`) can stay co-located when used by a single file. When a second file needs it, move to `types/index.ts`. - ---- - -## 9. Database (Drizzle ORM) - -### Typed Query Builders - -Always use Drizzle's typed methods. Raw `sql` only for Postgres functions Drizzle doesn't wrap. - -```ts -// BANNED -db.select().from(schema.orders).where(sql`${schema.orders.userId} = ${userId}`) - -// REQUIRED -import { and, desc, eq, gte } from "drizzle-orm" -db.select().from(schema.orders) - .where(and(eq(schema.orders.userId, userId), gte(schema.orders.createdAt, since))) - .orderBy(desc(schema.orders.createdAt)) -``` - -**Raw `sql` OK for**: `date_trunc`, `COALESCE`, `CASE WHEN`, `NULLIF`, window functions, custom aggregates. Always check Drizzle docs first. - -### Schema as Source of Truth - -```ts -export const orders = pgTable("orders", { - id: uuid("id").primaryKey().defaultRandom(), - userId: text("user_id").notNull().references(() => user.id), - status: text("status", { enum: ["pending", "completed", "failed"] }).notNull(), - total: integer("total").notNull(), - createdAt: timestamp("created_at").defaultNow().notNull(), -}) - -export type OrderSelect = typeof orders.$inferSelect -export type OrderInsert = typeof orders.$inferInsert -``` - -### Table Imports (Namespace Required) - -Always import table definitions via `import * as schema from "@sgai/shared/db"`. Access tables as `schema.tableName`. This avoids name clashes with domain modules (e.g. `cron` module vs `cron` table) and makes it instantly clear what's a table reference vs a function call. - -```ts -// BANNED — bare table imports clash with domain modules -import { cron, subscriptions, apiKeys } from "@sgai/shared/db" - -// REQUIRED — schema namespace -import * as schema from "@sgai/shared/db" - -schema.cron.userId -schema.subscriptions.remainingCredits -schema.apiKeys.apiKey -``` - -Types and factory functions stay as named imports — only table constants use the namespace: - -```ts -import * as schema from "@sgai/shared/db" -import type { ApiKeySelect, CronSelect } from "@sgai/shared/db" -import { createDb, type Database } from "@sgai/shared/db" -``` - -### Migrations - -Always generate + migrate. Never `db:push` in production. - ---- - -## 10. Logging (Pino) - -Structured logging with pino. One `lib/logger.ts`, child loggers per module. - -### Event Naming - -The pino message string (second argument) is a namespaced event name using **dot-separated** segments: `{domain}.{action}.{status}` — same convention as streaming/event types (Section 8). One delimiter everywhere, no cognitive overhead. Data goes in the first argument object. - -```ts -// BANNED — free-form prose messages -log.info({ count: 5 }, "finished processing batch") -log.error({ error: err.message }, "failed to save record") - -// REQUIRED — namespaced event as message, data in object -log.info({ count: 5 }, "job.batch.completed") -log.error({ error: err.message }, "job.record.save.failed") - -// No data? Message-only is fine -log.debug("job.parse.fallback") -``` - -### Naming Convention - -`{domain}.{resource}.{action}` — always lowercase, dot-separated. - -| Pattern | Examples | -|---|---| -| `{domain}.started` | `job.started`, `sync.started` | -| `{domain}.completed` | `job.completed`, `job.batch.completed` | -| `{domain}.failed` | `job.fetch.failed`, `job.record.save.failed` | -| `{domain}.{resource}.{action}` | `cron.schedule.created`, `queue.task.enqueued` | -| `{domain}.{action}.{status}` | `email.send.started`, `email.send.failed`, `cache.lookup.miss` | - -### Rules - -- **Event name is the pino message** (second arg) — `log.info({ data }, "domain.action.status")` -- **Child loggers** per module: `logger.child({ module: "email", recipient })` -- **No `console.log`** — use pino everywhere -- **Log level**: `debug` for internal flow details, `info` for operations completing, `warn` for recoverable issues, `error` for failures -- **pino-pretty** in dev, structured JSON in production - ---- - -## 11. Environment Variables - -Validate at startup with Zod. Crash on bad config — fail loud, fail early. - -```ts -const envSchema = z.object({ - DATABASE_URL: z.string().url(), - STRIPE_SECRET_KEY: z.string().startsWith("sk_"), - NODE_ENV: z.enum(["development", "production", "test"]).default("development"), -}) - -export const env = envSchema.parse(process.env) -``` - -No `process.env.THING` scattered across files. One `env.ts`, one import. **Monorepos**: each app has its own `env.ts` — shared package does NOT validate env vars. - ---- - -## 12. TypeScript Rules - -- `strict: true` always -- Never `any` — use `unknown` + narrowing, generics, or proper types -- `as const` for literal objects/arrays (not for objects with function values) -- `satisfies` for type-checked literals that keep narrow type. Combine both when you need narrowing + type checking: `const X = { ... } as const satisfies Record` -- Path aliases: `@/*` maps to source root -- `interface` for object shapes, `type` for unions/intersections - ---- - -## 13. Linting, Formatting & Validation Flow - -Biome replaces ESLint + Prettier. Single `biome.json` at project root: - -- **Tabs** for indentation, **100 char line width** -- **Recommended rules** enabled -- **Import organization** enabled -- **Ignores**: `node_modules`, `dist`, `.next`, `drizzle`, `components/ui` - -### Before Every Commit (MANDATORY) - -```bash -bun run format # Auto-fix formatting + imports -bun run lint # Check for remaining errors -bunx tsc --noEmit # TypeScript type checking -bun test # Tests (if applicable) -``` - -No exceptions. Web also runs `bun run build` for production validation. - ---- - -## 14. Testing Strategy - -Tests live alongside source: `email.ts` → `email.test.ts`. Use Bun test (API) or Vitest (Next.js). - -**Test**: Pure functions, API calls (mocked), business logic, edge cases. -**Don't test**: UI rendering, DB queries directly, third-party library behavior. - -Mock at the boundary (API calls, external services). Use factories for complex test objects: - -```ts -function makeUser(overrides?: Partial): UserSelect { - return { id: "test-id", email: "test@example.com", name: "Test User", ...overrides } -} -``` - ---- - -## 15. What NOT to Do - -- **No `any`** — `unknown`, generics, or proper types -- **No raw `sql`** for WHERE/ORDER — use ORM typed builders -- **No hand-written types outside `types/index.ts`** — exceptions: ORM-inferred, schema-inferred (single consumer), component-local props -- **No functions in constants files** -- **No manual DB type definitions** — derive from ORM schema -- **No JSDoc on internal functions** — types are the docs -- **No commented-out code** — git has history -- **No module name in function name** — `tokens.count()` not `tokens.countTokens()` -- **No wrapper abstractions** — generic modules, explicit call sites -- **No premature abstractions** — not used 3+ times → inline it -- **No extracted one-off UI event reducers/helpers** — keep tiny component-only event/state updates inline -- **No scattered `process.env`** — one `env.ts` -- **No `db:push` in production** — generate + migrate diff --git a/.claude/rules/git-style.md b/.claude/rules/git-style.md deleted file mode 100644 index 0c7cef5..0000000 --- a/.claude/rules/git-style.md +++ /dev/null @@ -1,29 +0,0 @@ -# Git Commit Style - -Conventional Commits. Format: `(): ` - -## Types - -| Type | When | -|------|------| -| `feat` | New feature | -| `fix` | Bug fix | -| `refactor` | Code change (not fix, not feature) | -| `chore` | Maintenance (deps, config, build) | -| `docs` | Documentation only | -| `style` | Formatting, whitespace | -| `perf` | Performance improvement | -| `test` | Adding/fixing tests | -| `content` | Content changes (blog, copy) | - -## Scope - -Optional. Area affected: `auth`, `payments`, `ui`, `api`, `referral`, `seo`, `web`, `shared`. - -## Rules - -1. Lowercase everything -2. No period at the end -3. Imperative mood ("add" not "added") -4. First line under 72 chars -5. Scope optional but helps changelogs diff --git a/.claude/skills/gh-pr.md b/.claude/skills/gh-pr.md deleted file mode 100644 index ac4aa25..0000000 --- a/.claude/skills/gh-pr.md +++ /dev/null @@ -1,187 +0,0 @@ ---- -name: creating-pr -description: Use when creating or updating pull requests with comprehensive descriptions and meaningful commits - streamlines PR workflow with branch management and commit best practices ---- - -You are an expert Git and GitHub workflow automation specialist with deep knowledge of version control best practices and pull request management. Your primary responsibility is streamlining the pull request creation process, ensuring high-quality commits with meaningful descriptions. - -## Common Operations - -### GitHub CLI Commands Reference - -```bash -# PR Management -gh pr view # View current branch PR -gh pr list # List open PRs -gh pr view --json number -q .number # Get PR number -gh pr create --title "" --body "" # Create new PR -gh pr edit --body "" # Update description -gh pr edit --add-label "" # Add labels - -# Git Commands -git branch --show-current # Current branch -git status # Check changes -git diff # View unstaged changes -git diff --cached # View staged changes -git diff HEAD~1..HEAD # Last commit diff -git rev-parse HEAD # Get commit SHA -git log -1 --pretty=%s # Last commit message -``` - -## Workflow - -### Creating/Updating Pull Requests - -1. **Branch Management**: - - - Check current branch: `git branch --show-current` - - If on main/master/next, create feature branch with conventional naming - - Branch convention: `//` (e.g., `fzuppichini/features/new-feature`) - - Switch to new branch: `git checkout -b //` - -2. **Analyze & Stage**: - - - Review changes: `git status` and `git diff` - - Identify change type (feature, fix, refactor, docs, test, chore) - - Stage ALL changes: `git add .` (preferred due to slow Husky hooks) - - Verify: `git diff --cached` - -3. **Commit & Push**: - - - **Single Commit Strategy**: Use one comprehensive commit per push due to slow Husky hooks - - Format: `type: brief description` (simple format preferred) - - Commit: `git commit -m "type: description"` with average git comment - - Push: `git push -u origin branch-name` - -4. **PR Management**: - - - Check existing: `gh pr view` - - If exists: push updates, **add update comment** (preserve original description) - - If not: `gh pr create` with title and description - -## Update Comment Templates - -When updating existing PRs, use these comment templates to preserve the original description: - -### General PR Update Template - -```markdown -## 🔄 PR Update - -**Commit**: `` - `` - -### Changes Made - -- [List specific changes in this update] -- [Highlight any breaking changes] -- [Note new features or fixes] - -### Impact - -- [Areas of code affected] -- [Performance/behavior changes] -- [Dependencies updated] - -### Testing - -- [How to test these changes] -- [Regression testing notes] - -### Next Steps - -- [Remaining work if any] -- [Items for review focus] - -🤖 Generated with [Claude Code](https://claude.ai/code) -``` - -### Critical Fix Update Template - -```markdown -## 🚨 Critical Fix Applied - -**Commit**: `` - `` - -### Issue Addressed - -[Description of critical issue fixed] - -### Solution - -[Technical approach taken] - -### Verification Steps - -1. [Step to reproduce original issue] -2. [Step to verify fix] -3. [Regression test steps] - -### Risk Assessment - -- **Impact**: [Low/Medium/High] -- **Scope**: [Files/features affected] -- **Backwards Compatible**: [Yes/No - details if no] - -🤖 Generated with [Claude Code](https://claude.ai/code) -``` - -### Feature Enhancement Template - -```markdown -## ✨ Feature Enhancement - -**Commit**: `` - `` - -### Enhancement Details - -[Description of feature improvement/addition] - -### Technical Implementation - -- [Key architectural decisions] -- [New dependencies or patterns] -- [Performance considerations] - -### User Experience Impact - -[How this affects end users] - -### Testing Strategy - -[Approach to testing this enhancement] - -🤖 Generated with [Claude Code](https://claude.ai/code) -``` - -## Example Usage Patterns - -### Creating PR: - -1. Create branch and make changes -2. Stage, commit, push → triggers PR creation -3. Each subsequent push triggers update comment -4. By default assume the PR is *wip* (work in progress) so open it appropriately - -### Commit Message Conventions - -See **[docs/GIT_STYLE.md](docs/GIT_STYLE.md)** for full guide. - -- `feat:` - New features -- `fix:` - Bug fixes -- `refactor:` - Code refactoring -- `docs:` - Documentation changes -- `test:` - Test additions/modifications -- `chore:` - Maintenance tasks -- `style:` - Formatting changes -- `content:` - Content changes (blog, copy) -- `perf:` - Performance improvements - -### Branch Naming Conventions - -Always use `//` format: - -- `username/features/description` - New features -- `username/fix/description` - Bug fixes -- `username/refactor/description` - Code refactoring -- `username/docs/description` - Documentation updates -- `username/test/description` - Test additions \ No newline at end of file diff --git a/.gitignore b/.gitignore index f483273..8a3b82e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ bun.lock *.tsbuildinfo .env doc/ -.DS_Store +.claude/ +CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index db61fe8..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,15 +0,0 @@ -# Claude Code Instructions - -## Before completing any task - -Always run these commands before committing or saying a task is done: - -```bash -bun run format -bun run lint -bunx tsc --noEmit -bun run build -bun test -``` - -No exceptions. From 07387860fd090f85f290c27285f57cd40243b7fd Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 15:01:45 +0200 Subject: [PATCH 16/28] minor changes --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 41dddd3..5d0c1a0 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ ScrapeGraph JS SDK

-Official TypeScript SDK for the [ScrapeGraph AI API](https://scrapegraphai.com) v2. +Official TypeScript SDK for the [ScrapeGraph AI API](https://scrapegraphai.com). ## Install From 3703f8c51e1eace445a6c817fd592c6697bb5d3c Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 15:02:39 +0200 Subject: [PATCH 17/28] docs: reorganize examples table by service, add SGAI_API_KEY env var Co-Authored-By: Claude Opus 4.5 --- README.md | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 5d0c1a0..656bd9e 100644 --- a/README.md +++ b/README.md @@ -202,31 +202,32 @@ const health = await checkHealth("key"); ## Examples -| Path | Description | -|------|-------------| -| [`scrape/scrape_basic.ts`](examples/scrape/scrape_basic.ts) | Basic markdown scraping | -| [`scrape/scrape_multi_format.ts`](examples/scrape/scrape_multi_format.ts) | Multiple formats (markdown, links, images, screenshot, summary) | -| [`scrape/scrape_json_extraction.ts`](examples/scrape/scrape_json_extraction.ts) | Structured JSON extraction with schema | -| [`scrape/scrape_pdf.ts`](examples/scrape/scrape_pdf.ts) | PDF document parsing with OCR metadata | -| [`scrape/scrape_with_fetchconfig.ts`](examples/scrape/scrape_with_fetchconfig.ts) | JS rendering, stealth mode, scrolling | -| [`extract/extract_basic.ts`](examples/extract/extract_basic.ts) | AI data extraction from URL | -| [`extract/extract_with_schema.ts`](examples/extract/extract_with_schema.ts) | Extraction with JSON schema | -| [`search/search_basic.ts`](examples/search/search_basic.ts) | Web search with results | -| [`search/search_with_extraction.ts`](examples/search/search_with_extraction.ts) | Search + AI extraction | -| [`crawl/crawl_basic.ts`](examples/crawl/crawl_basic.ts) | Start and monitor a crawl | -| [`crawl/crawl_with_formats.ts`](examples/crawl/crawl_with_formats.ts) | Crawl with screenshots and patterns | -| [`monitor/monitor_basic.ts`](examples/monitor/monitor_basic.ts) | Create a page monitor | -| [`monitor/monitor_with_webhook.ts`](examples/monitor/monitor_with_webhook.ts) | Monitor with webhook notifications | -| [`schema/generate_schema_basic.ts`](examples/schema/generate_schema_basic.ts) | Generate JSON schema from prompt | -| [`schema/modify_existing_schema.ts`](examples/schema/modify_existing_schema.ts) | Modify an existing schema | -| [`utilities/credits.ts`](examples/utilities/credits.ts) | Check account credits and limits | -| [`utilities/health.ts`](examples/utilities/health.ts) | API health check | -| [`utilities/history.ts`](examples/utilities/history.ts) | Request history | +| Service | Example | Description | +|---------|---------|-------------| +| scrape | [`scrape_basic.ts`](examples/scrape/scrape_basic.ts) | Basic markdown scraping | +| scrape | [`scrape_multi_format.ts`](examples/scrape/scrape_multi_format.ts) | Multiple formats (markdown, links, images, screenshot, summary) | +| scrape | [`scrape_json_extraction.ts`](examples/scrape/scrape_json_extraction.ts) | Structured JSON extraction with schema | +| scrape | [`scrape_pdf.ts`](examples/scrape/scrape_pdf.ts) | PDF document parsing with OCR metadata | +| scrape | [`scrape_with_fetchconfig.ts`](examples/scrape/scrape_with_fetchconfig.ts) | JS rendering, stealth mode, scrolling | +| extract | [`extract_basic.ts`](examples/extract/extract_basic.ts) | AI data extraction from URL | +| extract | [`extract_with_schema.ts`](examples/extract/extract_with_schema.ts) | Extraction with JSON schema | +| search | [`search_basic.ts`](examples/search/search_basic.ts) | Web search with results | +| search | [`search_with_extraction.ts`](examples/search/search_with_extraction.ts) | Search + AI extraction | +| crawl | [`crawl_basic.ts`](examples/crawl/crawl_basic.ts) | Start and monitor a crawl | +| crawl | [`crawl_with_formats.ts`](examples/crawl/crawl_with_formats.ts) | Crawl with screenshots and patterns | +| monitor | [`monitor_basic.ts`](examples/monitor/monitor_basic.ts) | Create a page monitor | +| monitor | [`monitor_with_webhook.ts`](examples/monitor/monitor_with_webhook.ts) | Monitor with webhook notifications | +| schema | [`generate_schema_basic.ts`](examples/schema/generate_schema_basic.ts) | Generate JSON schema from prompt | +| schema | [`modify_existing_schema.ts`](examples/schema/modify_existing_schema.ts) | Modify an existing schema | +| utilities | [`credits.ts`](examples/utilities/credits.ts) | Check account credits and limits | +| utilities | [`health.ts`](examples/utilities/health.ts) | API health check | +| utilities | [`history.ts`](examples/utilities/history.ts) | Request history | ## Environment Variables | Variable | Description | Default | |----------|-------------|---------| +| `SGAI_API_KEY` | Your ScrapeGraph API key | — | | `SGAI_API_URL` | Override API base URL | `https://api.scrapegraphai.com/v2` | | `SGAI_DEBUG` | Enable debug logging (`"1"`) | off | | `SGAI_TIMEOUT_S` | Request timeout in seconds | `120` | From 33ea3e68637218ca36307c9240d2a41728b07ef0 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 15:20:57 +0200 Subject: [PATCH 18/28] feat: add ScrapeGraphAI client pattern, remove generateSchema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add ScrapeGraphAI({ apiKey? }) factory that reads SGAI_API_KEY from env - Rename client methods: getCredits → credits, checkHealth → healthy - Remove generateSchema (no longer in API) - Update all examples to use new client pattern - Update README with client usage Co-Authored-By: Claude Opus 4.5 --- README.md | 72 ++++++++++------------ examples/crawl/crawl_basic.ts | 9 +-- examples/crawl/crawl_with_formats.ts | 7 ++- examples/extract/extract_basic.ts | 7 ++- examples/extract/extract_with_schema.ts | 7 ++- examples/monitor/monitor_basic.ts | 7 ++- examples/monitor/monitor_with_webhook.ts | 7 ++- examples/schema/generate_schema_basic.ts | 15 ----- examples/schema/modify_existing_schema.ts | 34 ---------- examples/scrape/scrape_basic.ts | 7 ++- examples/scrape/scrape_json_extraction.ts | 7 ++- examples/scrape/scrape_multi_format.ts | 7 ++- examples/scrape/scrape_pdf.ts | 7 ++- examples/scrape/scrape_with_fetchconfig.ts | 7 ++- examples/search/search_basic.ts | 7 ++- examples/search/search_with_extraction.ts | 7 ++- examples/utilities/credits.ts | 7 ++- examples/utilities/health.ts | 7 ++- examples/utilities/history.ts | 7 ++- src/index.ts | 7 +-- src/schemas.ts | 14 +---- src/scrapegraphai.ts | 62 +++++++++++++------ src/types.ts | 17 +---- tests/scrapegraphai.test.ts | 19 ------ 24 files changed, 144 insertions(+), 210 deletions(-) delete mode 100644 examples/schema/generate_schema_basic.ts delete mode 100644 examples/schema/modify_existing_schema.ts diff --git a/README.md b/README.md index 656bd9e..bd8b669 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ -# ScrapeGraph JS SDK +# ScrapeGraphAI JS SDK [![npm version](https://badge.fury.io/js/scrapegraph-js.svg)](https://badge.fury.io/js/scrapegraph-js) [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)

- ScrapeGraph JS SDK + ScrapeGraphAI JS SDK

-Official TypeScript SDK for the [ScrapeGraph AI API](https://scrapegraphai.com). +Official TypeScript SDK for the [ScrapeGraphAI AI API](https://scrapegraphai.com). ## Install @@ -20,9 +20,12 @@ bun add scrapegraph-js ## Quick Start ```ts -import { scrape } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const result = await scrape("your-api-key", { +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); + +const result = await sgai.scrape({ url: "https://example.com", formats: [{ type: "markdown" }], }); @@ -52,7 +55,7 @@ type ApiResult = { Scrape a webpage in multiple formats (markdown, html, screenshot, json, etc). ```ts -const res = await scrape("key", { +const res = await sgai.scrape({ url: "https://example.com", formats: [ { type: "markdown", mode: "reader" }, @@ -88,7 +91,7 @@ const res = await scrape("key", { Extract structured data from a URL, HTML, or markdown using AI. ```ts -const res = await extract("key", { +const res = await sgai.extract({ url: "https://example.com", prompt: "Extract product names and prices", schema: { /* JSON schema */ }, // optional @@ -103,7 +106,7 @@ const res = await extract("key", { Search the web and optionally extract structured data. ```ts -const res = await search("key", { +const res = await sgai.search({ query: "best programming languages 2024", numResults: 5, // 1-20, default 3 format: "markdown", // "markdown" | "html" @@ -115,24 +118,13 @@ const res = await search("key", { }); ``` -### generateSchema - -Generate a JSON schema from a natural language description. - -```ts -const res = await generateSchema("key", { - prompt: "Schema for a product with name, price, and rating", - existingSchema: { /* ... */ }, // optional, to modify -}); -``` - ### crawl Crawl a website and its linked pages. ```ts // Start a crawl -const start = await crawl.start("key", { +const start = await sgai.crawl.start({ url: "https://example.com", formats: [{ type: "markdown" }], maxPages: 50, @@ -144,12 +136,12 @@ const start = await crawl.start("key", { }); // Check status -const status = await crawl.get("key", start.data?.id!); +const status = await sgai.crawl.get(start.data?.id!); -// Control crawl by ID -await crawl.stop("key", start.data?.id!); -await crawl.resume("key", start.data?.id!); -await crawl.delete("key", start.data?.id!); +// Control +await sgai.crawl.stop(id); +await sgai.crawl.resume(id); +await sgai.crawl.delete(id); ``` ### monitor @@ -158,7 +150,7 @@ Monitor a webpage for changes on a schedule. ```ts // Create a monitor -const mon = await monitor.create("key", { +const mon = await sgai.monitor.create({ url: "https://example.com", name: "Price Monitor", interval: "0 * * * *", // cron expression @@ -168,12 +160,12 @@ const mon = await monitor.create("key", { }); // Manage monitors -await monitor.list("key"); -await monitor.get("key", cronId); -await monitor.update("key", cronId, { interval: "0 */6 * * *" }); -await monitor.pause("key", cronId); -await monitor.resume("key", cronId); -await monitor.delete("key", cronId); +await sgai.monitor.list(); +await sgai.monitor.get(cronId); +await sgai.monitor.update(cronId, { interval: "0 */6 * * *" }); +await sgai.monitor.pause(cronId); +await sgai.monitor.resume(cronId); +await sgai.monitor.delete(cronId); ``` ### history @@ -181,22 +173,22 @@ await monitor.delete("key", cronId); Fetch request history. ```ts -const list = await history.list("key", { +const list = await sgai.history.list({ service: "scrape", // optional filter page: 1, limit: 20, }); -const entry = await history.get("key", "request-id"); +const entry = await sgai.history.get("request-id"); ``` -### getCredits / checkHealth +### credits / healthy ```ts -const credits = await getCredits("key"); +const credits = await sgai.credits(); // { remaining: 1000, used: 500, plan: "pro", jobs: { crawl: {...}, monitor: {...} } } -const health = await checkHealth("key"); +const health = await sgai.healthy(); // { status: "ok", uptime: 12345 } ``` @@ -217,8 +209,6 @@ const health = await checkHealth("key"); | crawl | [`crawl_with_formats.ts`](examples/crawl/crawl_with_formats.ts) | Crawl with screenshots and patterns | | monitor | [`monitor_basic.ts`](examples/monitor/monitor_basic.ts) | Create a page monitor | | monitor | [`monitor_with_webhook.ts`](examples/monitor/monitor_with_webhook.ts) | Monitor with webhook notifications | -| schema | [`generate_schema_basic.ts`](examples/schema/generate_schema_basic.ts) | Generate JSON schema from prompt | -| schema | [`modify_existing_schema.ts`](examples/schema/modify_existing_schema.ts) | Modify an existing schema | | utilities | [`credits.ts`](examples/utilities/credits.ts) | Check account credits and limits | | utilities | [`health.ts`](examples/utilities/health.ts) | API health check | | utilities | [`history.ts`](examples/utilities/history.ts) | Request history | @@ -227,7 +217,7 @@ const health = await checkHealth("key"); | Variable | Description | Default | |----------|-------------|---------| -| `SGAI_API_KEY` | Your ScrapeGraph API key | — | +| `SGAI_API_KEY` | Your ScrapeGraphAI API key | — | | `SGAI_API_URL` | Override API base URL | `https://api.scrapegraphai.com/v2` | | `SGAI_DEBUG` | Enable debug logging (`"1"`) | off | | `SGAI_TIMEOUT_S` | Request timeout in seconds | `120` | @@ -244,4 +234,4 @@ bun run check # tsc --noEmit + biome ## License -MIT - [ScrapeGraph AI](https://scrapegraphai.com) +MIT - [ScrapeGraphAI AI](https://scrapegraphai.com) diff --git a/examples/crawl/crawl_basic.ts b/examples/crawl/crawl_basic.ts index 88f20fc..f0aeb57 100644 --- a/examples/crawl/crawl_basic.ts +++ b/examples/crawl/crawl_basic.ts @@ -1,8 +1,9 @@ -import { crawl } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const startRes = await crawl.start(apiKey, { +const startRes = await sgai.crawl.start({ url: "https://example.com", maxPages: 5, maxDepth: 2, @@ -14,7 +15,7 @@ if (startRes.status !== "success" || !startRes.data) { console.log("Crawl started:", startRes.data.id); console.log("Status:", startRes.data.status); - const getRes = await crawl.get(apiKey, startRes.data.id); + const getRes = await sgai.crawl.get(startRes.data.id); console.log("\nProgress:", getRes.data?.finished, "/", getRes.data?.total); console.log("Pages:", getRes.data?.pages.map((p) => p.url)); } diff --git a/examples/crawl/crawl_with_formats.ts b/examples/crawl/crawl_with_formats.ts index 2265af8..aab74c1 100644 --- a/examples/crawl/crawl_with_formats.ts +++ b/examples/crawl/crawl_with_formats.ts @@ -1,8 +1,9 @@ -import { crawl } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await crawl.start(apiKey, { +const res = await sgai.crawl.start({ url: "https://example.com", formats: [ { type: "markdown", mode: "reader" }, diff --git a/examples/extract/extract_basic.ts b/examples/extract/extract_basic.ts index 9d2710b..73992ef 100644 --- a/examples/extract/extract_basic.ts +++ b/examples/extract/extract_basic.ts @@ -1,8 +1,9 @@ -import { extract } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await extract(apiKey, { +const res = await sgai.extract({ url: "https://example.com", prompt: "What is this page about? Extract the main heading and description.", }); diff --git a/examples/extract/extract_with_schema.ts b/examples/extract/extract_with_schema.ts index c274c54..c09611e 100644 --- a/examples/extract/extract_with_schema.ts +++ b/examples/extract/extract_with_schema.ts @@ -1,8 +1,9 @@ -import { extract } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await extract(apiKey, { +const res = await sgai.extract({ url: "https://example.com", prompt: "Extract the page title and description", schema: { diff --git a/examples/monitor/monitor_basic.ts b/examples/monitor/monitor_basic.ts index 898feac..cdb227c 100644 --- a/examples/monitor/monitor_basic.ts +++ b/examples/monitor/monitor_basic.ts @@ -1,8 +1,9 @@ -import { monitor } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await monitor.create(apiKey, { +const res = await sgai.monitor.create({ url: "https://example.com", name: "Example Monitor", interval: "0 * * * *", diff --git a/examples/monitor/monitor_with_webhook.ts b/examples/monitor/monitor_with_webhook.ts index b10173d..ddbaa77 100644 --- a/examples/monitor/monitor_with_webhook.ts +++ b/examples/monitor/monitor_with_webhook.ts @@ -1,8 +1,9 @@ -import { monitor } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await monitor.create(apiKey, { +const res = await sgai.monitor.create({ url: "https://example.com/prices", name: "Price Monitor", interval: "0 */6 * * *", diff --git a/examples/schema/generate_schema_basic.ts b/examples/schema/generate_schema_basic.ts deleted file mode 100644 index 945e55a..0000000 --- a/examples/schema/generate_schema_basic.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { generateSchema } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const res = await generateSchema(apiKey, { - prompt: "Find laptops with specifications like brand, processor, RAM, storage, and price", -}); - -if (res.status === "success") { - console.log("Refined prompt:", res.data?.refinedPrompt); - console.log("\nGenerated schema:"); - console.log(JSON.stringify(res.data?.schema, null, 2)); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/schema/modify_existing_schema.ts b/examples/schema/modify_existing_schema.ts deleted file mode 100644 index 74fd0b8..0000000 --- a/examples/schema/modify_existing_schema.ts +++ /dev/null @@ -1,34 +0,0 @@ -import { generateSchema } from "scrapegraph-js"; - -const apiKey = process.env.SGAI_API_KEY!; - -const existingSchema = { - title: "ProductList", - type: "object", - properties: { - products: { - type: "array", - items: { - type: "object", - properties: { - name: { type: "string" }, - price: { type: "number" }, - }, - required: ["name", "price"], - }, - }, - }, - required: ["products"], -}; - -const res = await generateSchema(apiKey, { - prompt: "Add brand, category, and rating fields to the existing product schema", - existingSchema, -}); - -if (res.status === "success") { - console.log("Modified schema:"); - console.log(JSON.stringify(res.data?.schema, null, 2)); -} else { - console.error("Failed:", res.error); -} diff --git a/examples/scrape/scrape_basic.ts b/examples/scrape/scrape_basic.ts index 7bf1c42..0d34e05 100644 --- a/examples/scrape/scrape_basic.ts +++ b/examples/scrape/scrape_basic.ts @@ -1,8 +1,9 @@ -import { scrape } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await scrape(apiKey, { +const res = await sgai.scrape({ url: "https://example.com", formats: [{ type: "markdown" }], }); diff --git a/examples/scrape/scrape_json_extraction.ts b/examples/scrape/scrape_json_extraction.ts index 09adb7c..60430d6 100644 --- a/examples/scrape/scrape_json_extraction.ts +++ b/examples/scrape/scrape_json_extraction.ts @@ -1,8 +1,9 @@ -import { scrape } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await scrape(apiKey, { +const res = await sgai.scrape({ url: "https://example.com", formats: [ { diff --git a/examples/scrape/scrape_multi_format.ts b/examples/scrape/scrape_multi_format.ts index 457e72c..52783db 100644 --- a/examples/scrape/scrape_multi_format.ts +++ b/examples/scrape/scrape_multi_format.ts @@ -1,8 +1,9 @@ -import { scrape } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await scrape(apiKey, { +const res = await sgai.scrape({ url: "https://example.com", formats: [ { type: "markdown", mode: "reader" }, diff --git a/examples/scrape/scrape_pdf.ts b/examples/scrape/scrape_pdf.ts index 459e344..4a771d9 100644 --- a/examples/scrape/scrape_pdf.ts +++ b/examples/scrape/scrape_pdf.ts @@ -1,8 +1,9 @@ -import { scrape } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await scrape(apiKey, { +const res = await sgai.scrape({ url: "https://pdfobject.com/pdf/sample.pdf", contentType: "application/pdf", formats: [{ type: "markdown" }], diff --git a/examples/scrape/scrape_with_fetchconfig.ts b/examples/scrape/scrape_with_fetchconfig.ts index efdcfe7..30fbf49 100644 --- a/examples/scrape/scrape_with_fetchconfig.ts +++ b/examples/scrape/scrape_with_fetchconfig.ts @@ -1,8 +1,9 @@ -import { scrape } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await scrape(apiKey, { +const res = await sgai.scrape({ url: "https://example.com", fetchConfig: { mode: "js", diff --git a/examples/search/search_basic.ts b/examples/search/search_basic.ts index 4a0a412..f224aa8 100644 --- a/examples/search/search_basic.ts +++ b/examples/search/search_basic.ts @@ -1,8 +1,9 @@ -import { search } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await search(apiKey, { +const res = await sgai.search({ query: "best programming languages 2024", numResults: 3, }); diff --git a/examples/search/search_with_extraction.ts b/examples/search/search_with_extraction.ts index e16e0ba..967bd5f 100644 --- a/examples/search/search_with_extraction.ts +++ b/examples/search/search_with_extraction.ts @@ -1,8 +1,9 @@ -import { search } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await search(apiKey, { +const res = await sgai.search({ query: "typescript best practices", numResults: 5, prompt: "Extract the main tips and recommendations", diff --git a/examples/utilities/credits.ts b/examples/utilities/credits.ts index 99b0249..bef2949 100644 --- a/examples/utilities/credits.ts +++ b/examples/utilities/credits.ts @@ -1,8 +1,9 @@ -import { getCredits } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await getCredits(apiKey); +const res = await sgai.credits(); if (res.status === "success") { console.log("Plan:", res.data?.plan); diff --git a/examples/utilities/health.ts b/examples/utilities/health.ts index 8e17af0..c68a293 100644 --- a/examples/utilities/health.ts +++ b/examples/utilities/health.ts @@ -1,8 +1,9 @@ -import { checkHealth } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await checkHealth(apiKey); +const res = await sgai.healthy(); if (res.status === "success") { console.log("API Status:", res.data?.status); diff --git a/examples/utilities/history.ts b/examples/utilities/history.ts index 67e4160..f6cb220 100644 --- a/examples/utilities/history.ts +++ b/examples/utilities/history.ts @@ -1,8 +1,9 @@ -import { history } from "scrapegraph-js"; +import { ScrapeGraphAI } from "scrapegraph-js"; -const apiKey = process.env.SGAI_API_KEY!; +// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) +const sgai = ScrapeGraphAI(); -const res = await history.list(apiKey, { +const res = await sgai.history.list({ service: "scrape", limit: 5, }); diff --git a/src/index.ts b/src/index.ts index e557c1a..f55a044 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,8 +1,10 @@ export { + ScrapeGraphAI, + type ScrapeGraphAIClient, + type ScrapeGraphAIInput, scrape, extract, search, - generateSchema, getCredits, checkHealth, history, @@ -21,8 +23,6 @@ export type { ApiScrapeResultMap, ApiExtractRequest, ApiExtractResponse, - ApiGenerateSchemaRequest, - ApiGenerateSchemaResponse, ApiSearchRequest, ApiSearchResponse, ApiSearchResult, @@ -53,7 +53,6 @@ export type { export { apiScrapeRequestSchema, apiExtractRequestBaseSchema, - apiGenerateSchemaRequestSchema, apiSearchRequestSchema, apiCrawlRequestSchema, apiMonitorCreateSchema, diff --git a/src/schemas.ts b/src/schemas.ts index 617d1d6..dd8e2ab 100644 --- a/src/schemas.ts +++ b/src/schemas.ts @@ -1,13 +1,6 @@ import { z } from "zod"; -export const apiServiceEnumSchema = z.enum([ - "scrape", - "extract", - "schema", - "search", - "monitor", - "crawl", -]); +export const apiServiceEnumSchema = z.enum(["scrape", "extract", "search", "monitor", "crawl"]); export const apiStatusEnumSchema = z.enum(["completed", "failed"]); @@ -202,11 +195,6 @@ export const apiExtractRequestBaseSchema = z message: "Either url, html, or markdown is required", }); -export const apiGenerateSchemaRequestSchema = z.object({ - prompt: apiUserPromptSchema, - existingSchema: z.record(z.string(), z.unknown()).optional(), -}); - export const apiSearchRequestSchema = z .object({ query: z.string().min(1).max(500), diff --git a/src/scrapegraphai.ts b/src/scrapegraphai.ts index 6d7cd20..7225154 100644 --- a/src/scrapegraphai.ts +++ b/src/scrapegraphai.ts @@ -5,8 +5,6 @@ import type { ApiCreditsResponse, ApiExtractRequest, ApiExtractResponse, - ApiGenerateSchemaRequest, - ApiGenerateSchemaResponse, ApiHealthResponse, ApiHistoryEntry, ApiHistoryFilter, @@ -152,23 +150,6 @@ export async function search( } } -export async function generateSchema( - apiKey: string, - params: ApiGenerateSchemaRequest, -): Promise> { - try { - const { data, elapsedMs } = await request( - "POST", - "/schema", - apiKey, - params, - ); - return ok(data, elapsedMs); - } catch (err) { - return fail(err); - } -} - export async function getCredits(apiKey: string): Promise> { try { const { data, elapsedMs } = await request("GET", "/credits", apiKey); @@ -371,3 +352,46 @@ export const monitor = { } }, }; + +export interface ScrapeGraphAIInput { + apiKey?: string; +} + +function resolveApiKey(opts?: ScrapeGraphAIInput): string { + const key = opts?.apiKey ?? process.env.SGAI_API_KEY; + if (!key) throw new Error("API key required: pass { apiKey } or set SGAI_API_KEY env var"); + return key; +} + +export function ScrapeGraphAI(opts?: ScrapeGraphAIInput) { + const key = resolveApiKey(opts); + return { + scrape: (params: ApiScrapeRequest) => scrape(key, params), + extract: (params: ApiExtractRequest) => extract(key, params), + search: (params: ApiSearchRequest) => search(key, params), + credits: () => getCredits(key), + healthy: () => checkHealth(key), + history: { + list: (params?: ApiHistoryFilter) => history.list(key, params), + get: (id: string) => history.get(key, id), + }, + crawl: { + start: (params: ApiCrawlRequest) => crawl.start(key, params), + get: (id: string) => crawl.get(key, id), + stop: (id: string) => crawl.stop(key, id), + resume: (id: string) => crawl.resume(key, id), + delete: (id: string) => crawl.delete(key, id), + }, + monitor: { + create: (params: ApiMonitorCreateInput) => monitor.create(key, params), + list: () => monitor.list(key), + get: (id: string) => monitor.get(key, id), + update: (id: string, params: ApiMonitorUpdateInput) => monitor.update(key, id, params), + delete: (id: string) => monitor.delete(key, id), + pause: (id: string) => monitor.pause(key, id), + resume: (id: string) => monitor.resume(key, id), + }, + }; +} + +export type ScrapeGraphAIClient = ReturnType; diff --git a/src/types.ts b/src/types.ts index 84d5d87..726d243 100644 --- a/src/types.ts +++ b/src/types.ts @@ -4,7 +4,6 @@ import type { apiExtractRequestBaseSchema, apiFetchConfigSchema, apiFetchContentTypeSchema, - apiGenerateSchemaRequestSchema, apiHistoryFilterSchema, apiHtmlModeSchema, apiMonitorCreateSchema, @@ -21,7 +20,6 @@ export type ApiScrapeFormatEntry = z.input; export type ApiScrapeRequest = z.input; export type ApiExtractRequest = z.input; -export type ApiGenerateSchemaRequest = z.input; export type ApiSearchRequest = z.input; export type ApiCrawlRequest = z.input; export type ApiMonitorCreateInput = z.input; @@ -191,12 +189,6 @@ export interface ApiExtractResponse { }; } -export interface ApiGenerateSchemaResponse { - refinedPrompt: string; - schema: Record; - usage: ApiTokenUsage; -} - export interface ApiSearchResult { url: string; title: string; @@ -306,7 +298,7 @@ export interface ApiMonitorResponse { updatedAt: string; } -export type ApiHistoryService = "scrape" | "extract" | "schema" | "search" | "monitor" | "crawl"; +export type ApiHistoryService = "scrape" | "extract" | "search" | "monitor" | "crawl"; export type ApiHistoryStatus = "completed" | "failed" | "running" | "paused" | "deleted"; interface ApiHistoryBase { @@ -330,12 +322,6 @@ export interface ApiExtractHistoryEntry extends ApiHistoryBase { result: ApiExtractResponse; } -export interface ApiSchemaHistoryEntry extends ApiHistoryBase { - service: "schema"; - params: ApiGenerateSchemaRequest; - result: ApiGenerateSchemaResponse; -} - export interface ApiSearchHistoryEntry extends ApiHistoryBase { service: "search"; params: ApiSearchRequest; @@ -357,7 +343,6 @@ export interface ApiCrawlHistoryEntry extends ApiHistoryBase { export type ApiHistoryEntry = | ApiScrapeHistoryEntry | ApiExtractHistoryEntry - | ApiSchemaHistoryEntry | ApiSearchHistoryEntry | ApiMonitorHistoryEntry | ApiCrawlHistoryEntry; diff --git a/tests/scrapegraphai.test.ts b/tests/scrapegraphai.test.ts index 6aa2372..69a6695 100644 --- a/tests/scrapegraphai.test.ts +++ b/tests/scrapegraphai.test.ts @@ -715,25 +715,6 @@ describe("search", () => { }); }); -describe("generateSchema", () => { - const params = { prompt: "Schema for product listing" }; - - test("success", async () => { - const body = { - refinedPrompt: "Extract product details", - schema: { type: "object", properties: {} }, - usage: { promptTokens: 50, completionTokens: 100 }, - }; - fetchSpy = spyOn(globalThis, "fetch").mockResolvedValueOnce(json(body)); - - const res = await sdk.generateSchema(API_KEY, params); - - expect(res.status).toBe("success"); - expect(res.data).toEqual(body); - expectRequest(0, "POST", "/schema", params); - }); -}); - describe("getCredits", () => { test("success", async () => { const body = { From 0522abc1e3a2b8b5611c0df650d535f0e128c5da Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 15:27:19 +0200 Subject: [PATCH 19/28] test: update integration tests to use ScrapeGraphAI client - Use new client pattern instead of standalone functions - Add test for scrape with no formats (defaults to markdown) - Rename tests for clarity Co-Authored-By: Claude Opus 4.5 --- tests/integration.spec.ts | 49 +++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/tests/integration.spec.ts b/tests/integration.spec.ts index 5380eb9..8ba5038 100644 --- a/tests/integration.spec.ts +++ b/tests/integration.spec.ts @@ -1,30 +1,39 @@ import { describe, expect, test } from "bun:test"; -import { crawl, extract, getCredits, history, scrape, search } from "../src/index.js"; +import { ScrapeGraphAI } from "../src/index.js"; -const API_KEY = process.env.SGAI_API_KEY; -if (!API_KEY) throw new Error("SGAI_API_KEY env var required for integration tests"); +if (!process.env.SGAI_API_KEY) + throw new Error("SGAI_API_KEY env var required for integration tests"); + +const sgai = ScrapeGraphAI(); describe("integration", () => { - test("getCredits", async () => { - const res = await getCredits(API_KEY); - console.log("getCredits:", res); + test("credits", async () => { + const res = await sgai.credits(); + console.log("credits:", res); expect(res.status).toBe("success"); expect(res.data).toHaveProperty("remaining"); expect(res.data).toHaveProperty("plan"); }); - test("scrape markdown", async () => { - const res = await scrape(API_KEY, { + test("scrape - no formats (defaults to markdown)", async () => { + const res = await sgai.scrape({ url: "https://example.com" }); + console.log("scrape default:", res.status, res.error); + expect(res.status).toBe("success"); + expect(res.data?.results.markdown).toBeDefined(); + }); + + test("scrape - single format", async () => { + const res = await sgai.scrape({ url: "https://example.com", formats: [{ type: "markdown" }], }); - console.log("scrape:", res.status, res.error); + console.log("scrape single:", res.status, res.error); expect(res.status).toBe("success"); expect(res.data?.results.markdown).toBeDefined(); }); - test("scrape with multiple formats", async () => { - const res = await scrape(API_KEY, { + test("scrape - multiple formats", async () => { + const res = await sgai.scrape({ url: "https://example.com", formats: [{ type: "markdown", mode: "reader" }, { type: "links" }, { type: "images" }], }); @@ -34,8 +43,8 @@ describe("integration", () => { expect(res.data?.results.links).toBeDefined(); }); - test("scrape PDF document", async () => { - const res = await scrape(API_KEY, { + test("scrape - PDF document", async () => { + const res = await sgai.scrape({ url: "https://pdfobject.com/pdf/sample.pdf", contentType: "application/pdf", formats: [{ type: "markdown" }], @@ -45,8 +54,8 @@ describe("integration", () => { expect(res.data?.metadata.contentType).toBe("application/pdf"); }); - test("scrape with fetchConfig", async () => { - const res = await scrape(API_KEY, { + test("scrape - with fetchConfig", async () => { + const res = await sgai.scrape({ url: "https://example.com", fetchConfig: { mode: "fast", timeout: 15000 }, formats: [{ type: "markdown" }], @@ -56,7 +65,7 @@ describe("integration", () => { }); test("extract", async () => { - const res = await extract(API_KEY, { + const res = await sgai.extract({ url: "https://example.com", prompt: "What is this page about?", }); @@ -65,7 +74,7 @@ describe("integration", () => { }); test("search", async () => { - const res = await search(API_KEY, { + const res = await sgai.search({ query: "anthropic claude", numResults: 2, }); @@ -75,13 +84,13 @@ describe("integration", () => { }); test("history.list", async () => { - const res = await history.list(API_KEY, { limit: 5 }); + const res = await sgai.history.list({ limit: 5 }); console.log("history.list:", res.status, res.data?.pagination); expect(res.status).toBe("success"); }); test("crawl.start and crawl.get", async () => { - const startRes = await crawl.start(API_KEY, { + const startRes = await sgai.crawl.start({ url: "https://example.com", maxPages: 2, }); @@ -98,7 +107,7 @@ describe("integration", () => { expect(startRes.status).toBe("success"); if (startRes.data?.id) { - const getRes = await crawl.get(API_KEY, startRes.data.id); + const getRes = await sgai.crawl.get(startRes.data.id); console.log("crawl.get:", getRes.status, getRes.data?.status); expect(getRes.status).toBe("success"); } From 7c7d31cfc296d3db3eb8e7572b6d70f88d9d0bd9 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 16:11:46 +0200 Subject: [PATCH 20/28] chore: ignore scripts directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 8a3b82e..6f96c24 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +scripts/ node_modules dist/ .DS_Store From d4bad8144ba41719ee0a73e882e3567b6b2889ce Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 16:13:27 +0200 Subject: [PATCH 21/28] chore: bump version to 2.0.0 --- package.json | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index f5cc9c1..6fb163a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scrapegraph-js", - "version": "1.0.0", + "version": "2.0.0", "description": "Official JavaScript/TypeScript SDK for the ScrapeGraph AI API — smart web scraping powered by AI", "type": "module", "main": "dist/index.js", @@ -21,7 +21,15 @@ "check": "tsc --noEmit && biome check .", "prepublishOnly": "tsup" }, - "keywords": ["scraping", "ai", "scrapegraph", "typescript", "sdk", "web-scraping", "api"], + "keywords": [ + "scraping", + "ai", + "scrapegraph", + "typescript", + "sdk", + "web-scraping", + "api" + ], "author": "ScrapeGraph Team", "license": "MIT", "repository": { @@ -32,7 +40,9 @@ "engines": { "node": ">=22" }, - "files": ["dist"], + "files": [ + "dist" + ], "devDependencies": { "@biomejs/biome": "^1.9.4", "@types/bun": "^1.3.9", From b9037f57739e19009889e2d61052526d994f6646 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Tue, 14 Apr 2026 23:01:40 +0200 Subject: [PATCH 22/28] docs: update CONTRIBUTING.md for JS SDK --- CONTRIBUTING.MD | 104 ++++++++++++++++++------------------------------ 1 file changed, 39 insertions(+), 65 deletions(-) diff --git a/CONTRIBUTING.MD b/CONTRIBUTING.MD index aab0da0..1ba2814 100644 --- a/CONTRIBUTING.MD +++ b/CONTRIBUTING.MD @@ -1,83 +1,57 @@ -# Contributing to ScrapeGraphAI +# Contributing to scrapegraph-js -Thank you for your interest in contributing to **ScrapeGraphAI**! We welcome contributions from the community to help improve and grow the project. This document outlines the guidelines and steps for contributing. +## Setup -## Table of Contents +```bash +bun install +``` -- [Getting Started](#getting-started) -- [Contributing Guidelines](#contributing-guidelines) -- [Code Style](#code-style) -- [Submitting a Pull Request](#submitting-a-pull-request) -- [Reporting Issues](#reporting-issues) -- [License](#license) +## Development -## Getting Started +```bash +bun run build # build to dist/ +bun run check # tsc + biome lint +``` -To get started with contributing, follow these steps: +## Before committing -1. Fork the repository on GitHub **(FROM pre/beta branch)**. -2. Clone your forked repository to your local machine. -3. Install the necessary dependencies from requirements.txt or via pyproject.toml as you prefere :). -4. Make your changes or additions. -5. Test your changes thoroughly. -6. Commit your changes with descriptive commit messages. -7. Push your changes to your forked repository. -8. Submit a pull request to the pre/beta branch. +Run all checks: -N.B All the pull request to the main branch will be rejected! +```bash +bun run format # auto-fix formatting +bun run lint # check for errors +bunx tsc --noEmit # type check +bun run build # verify build +bun test # unit tests +``` -## Contributing Guidelines +## Testing -Please adhere to the following guidelines when contributing to ScrapeGraphAI: +```bash +bun test # unit tests only +bun run test:integration # live API tests (requires SGAI_API_KEY) +``` -- Follow the code style and formatting guidelines specified in the [Code Style](#code-style) section. -- Make sure your changes are well-documented and include any necessary updates to the project's documentation and requirements if needed. -- Write clear and concise commit messages that describe the purpose of your changes and the last commit before the pull request has to follow the following format: - - `feat: Add new feature` - - `fix: Correct issue with existing feature` - - `docs: Update documentation` - - `style: Improve formatting and style` - - `refactor: Restructure code` - - `test: Add or update tests` - - `perf: Improve performance` -- Be respectful and considerate towards other contributors and maintainers. +For integration tests, set `SGAI_API_KEY` in your environment or `.env` file. -## Code Style +## Commit messages -Please make sure to format your code accordingly before submitting a pull request. +Use conventional commits: -### Python +- `feat:` new feature +- `fix:` bug fix +- `refactor:` code change (no new feature, no bug fix) +- `chore:` maintenance (deps, config) +- `test:` add/update tests +- `docs:` documentation -- [Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/) -- [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html) -- [The Hitchhiker's Guide to Python](https://docs.python-guide.org/writing/style/) -- [Pylint style of code for the documentation](https://pylint.pycqa.org/en/1.6.0/tutorial.html) +## Pull requests -## Submitting a Pull Request - -To submit your changes for review, please follow these steps: - -1. Ensure that your changes are pushed to your forked repository. -2. Go to the main repository on GitHub and navigate to the "Pull Requests" tab. -3. Click on the "New Pull Request" button. -4. Select your forked repository and the branch containing your changes. -5. Provide a descriptive title and detailed description for your pull request. -6. Reviewers will provide feedback and discuss any necessary changes. -7. Once your pull request is approved, it will be merged into the pre/beta branch. - -## Reporting Issues - -If you encounter any issues or have suggestions for improvements, please open an issue on the GitHub repository. Provide a clear and detailed description of the problem or suggestion, along with any relevant information or steps to reproduce the issue. +1. Fork and create a branch from `main` +2. Make changes +3. Run all checks (see above) +4. Submit PR to `main` ## License -ScrapeGraphAI is licensed under the **MIT License**. See the [LICENSE](LICENSE) file for more information. -By contributing to this project, you agree to license your contributions under the same license. - -ScrapeGraphAI uses code from the Langchain -frameworks. You find their original licenses below. - -LANGCHAIN LICENSE -https://github.com/langchain-ai/langchain/blob/master/LICENSE - -Can't wait to see your contributions! :smile: +MIT - contributions are licensed under the same license. From 91f25cef4a2f5ea4b3eb7b053e88d76f4944859b Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Wed, 15 Apr 2026 11:21:36 +0200 Subject: [PATCH 23/28] style: format package.json arrays Co-Authored-By: Claude Opus 4.5 --- package.json | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/package.json b/package.json index 6fb163a..dcaed96 100644 --- a/package.json +++ b/package.json @@ -21,15 +21,7 @@ "check": "tsc --noEmit && biome check .", "prepublishOnly": "tsup" }, - "keywords": [ - "scraping", - "ai", - "scrapegraph", - "typescript", - "sdk", - "web-scraping", - "api" - ], + "keywords": ["scraping", "ai", "scrapegraph", "typescript", "sdk", "web-scraping", "api"], "author": "ScrapeGraph Team", "license": "MIT", "repository": { @@ -40,9 +32,7 @@ "engines": { "node": ">=22" }, - "files": [ - "dist" - ], + "files": ["dist"], "devDependencies": { "@biomejs/biome": "^1.9.4", "@types/bun": "^1.3.9", From 93a1b15f9ce1ba4110ec26305114311cac2de4c4 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Wed, 15 Apr 2026 11:29:55 +0200 Subject: [PATCH 24/28] feat(examples): add polling loop to crawl examples Co-Authored-By: Claude Opus 4.5 --- examples/crawl/crawl_basic.ts | 27 +++++++++++++++++++------ examples/crawl/crawl_with_formats.ts | 30 ++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/examples/crawl/crawl_basic.ts b/examples/crawl/crawl_basic.ts index f0aeb57..2f15b97 100644 --- a/examples/crawl/crawl_basic.ts +++ b/examples/crawl/crawl_basic.ts @@ -4,7 +4,7 @@ import { ScrapeGraphAI } from "scrapegraph-js"; const sgai = ScrapeGraphAI(); const startRes = await sgai.crawl.start({ - url: "https://example.com", + url: "https://scrapegraphai.com/", maxPages: 5, maxDepth: 2, }); @@ -12,10 +12,25 @@ const startRes = await sgai.crawl.start({ if (startRes.status !== "success" || !startRes.data) { console.error("Failed to start:", startRes.error); } else { - console.log("Crawl started:", startRes.data.id); - console.log("Status:", startRes.data.status); + const crawlId = startRes.data.id; + console.log("Crawl started:", crawlId); - const getRes = await sgai.crawl.get(startRes.data.id); - console.log("\nProgress:", getRes.data?.finished, "/", getRes.data?.total); - console.log("Pages:", getRes.data?.pages.map((p) => p.url)); + let status = startRes.data.status; + while (status === "running") { + await new Promise((r) => setTimeout(r, 2000)); + const getRes = await sgai.crawl.get(crawlId); + if (getRes.status !== "success" || !getRes.data) { + console.error("Failed to get status:", getRes.error); + break; + } + status = getRes.data.status; + console.log(`Progress: ${getRes.data.finished}/${getRes.data.total} - ${status}`); + + if (status === "completed" || status === "failed") { + console.log("\nPages crawled:"); + for (const page of getRes.data.pages) { + console.log(` ${page.url} - ${page.status}`); + } + } + } } diff --git a/examples/crawl/crawl_with_formats.ts b/examples/crawl/crawl_with_formats.ts index aab74c1..4a3e265 100644 --- a/examples/crawl/crawl_with_formats.ts +++ b/examples/crawl/crawl_with_formats.ts @@ -4,7 +4,7 @@ import { ScrapeGraphAI } from "scrapegraph-js"; const sgai = ScrapeGraphAI(); const res = await sgai.crawl.start({ - url: "https://example.com", + url: "https://scrapegraphai.com/", formats: [ { type: "markdown", mode: "reader" }, { type: "screenshot", width: 1280, height: 720 }, @@ -15,10 +15,28 @@ const res = await sgai.crawl.start({ excludePatterns: ["/admin/*"], }); -if (res.status === "success") { - console.log("Crawl ID:", res.data?.id); - console.log("Status:", res.data?.status); - console.log("Total pages to crawl:", res.data?.total); +if (res.status !== "success" || !res.data) { + console.error("Failed to start:", res.error); } else { - console.error("Failed:", res.error); + const crawlId = res.data.id; + console.log("Crawl started:", crawlId); + + let status = res.data.status; + while (status === "running") { + await new Promise((r) => setTimeout(r, 2000)); + const getRes = await sgai.crawl.get(crawlId); + if (getRes.status !== "success" || !getRes.data) { + console.error("Failed to get status:", getRes.error); + break; + } + status = getRes.data.status; + console.log(`Progress: ${getRes.data.finished}/${getRes.data.total} - ${status}`); + + if (status === "completed" || status === "failed") { + console.log("\nPages crawled:"); + for (const page of getRes.data.pages) { + console.log(` ${page.url} - ${page.status}`); + } + } + } } From bebf149a83979f789f009de8cfc7085d8dc508f5 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Wed, 15 Apr 2026 11:35:30 +0200 Subject: [PATCH 25/28] chore: update CLAUDE.md, fix health endpoint, add playground script - Update CLAUDE.md to match Python SDK structure - Fix health endpoint path: /healthz -> /api/v2/health - Add playground script with .env loading - Track CLAUDE.md in git Co-Authored-By: Claude Opus 4.5 --- .gitignore | 1 - CLAUDE.md | 143 ++++++++++++++++++++++++++++++++++++ package.json | 3 +- src/scrapegraphai.ts | 2 +- tests/scrapegraphai.test.ts | 2 +- 5 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 CLAUDE.md diff --git a/.gitignore b/.gitignore index 6f96c24..b08971a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,3 @@ bun.lock .env doc/ .claude/ -CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..bc37dc4 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,143 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Before completing any task + +Always run these commands before committing or saying a task is done: + +```bash +bun run format +bun run lint +bunx tsc --noEmit +bun run build +bun test +``` + +No exceptions. + +## Project Overview + +**scrapegraph-js** is the official JavaScript/TypeScript SDK for the ScrapeGraph AI API. It provides a TypeScript client for intelligent web scraping powered by AI. + +## Repository Structure + +``` +scrapegraph-js/ +├── src/ # TypeScript SDK source +├── tests/ # Test suite +├── examples/ # Usage examples +├── scripts/ # Development utilities +└── .github/workflows/ # CI/CD +``` + +## Tech Stack + +- **Language**: TypeScript (Node.js 22+) +- **Runtime**: Bun +- **Core Dependencies**: zod (validation) +- **Testing**: Bun test +- **Code Quality**: Biome (lint + format) +- **Build**: tsup + +## Commands + +```bash +# Install +bun install + +# Dev (watch mode) +bun run dev + +# Test +bun test # unit tests +bun run test:integration # integration tests + +# Format +bun run format + +# Lint +bun run lint + +# Type check +bunx tsc --noEmit + +# Build +bun run build + +# Playground (loads .env) +bun run playground +``` + +## Architecture + +**Core Components:** + +1. **Client** (`src/scrapegraphai.ts`): + - `ScrapeGraphAI()` - Factory function returning namespaced client + - Handles all API communication + +2. **Types** (`src/types.ts`): + - Request/response types for all endpoints + - Zod schema inference + +3. **Schemas** (`src/schemas.ts`): + - Zod validation schemas + +4. **Config** (`src/env.ts`): + - Environment variable handling + +## API Methods + +| Method | Purpose | +|--------|---------| +| `sgai.scrape()` | AI data extraction from URL | +| `sgai.extract()` | Extract from raw HTML/text | +| `sgai.search()` | Web search + extraction | +| `sgai.crawl.start()` | Start crawl job | +| `sgai.crawl.get()` | Get crawl status | +| `sgai.monitor.create()` | Create monitoring job | +| `sgai.monitor.get()` | Get monitor status | +| `sgai.monitor.update()` | Update monitor config | +| `sgai.monitor.delete()` | Delete monitor | +| `sgai.credits()` | Check API credits | +| `sgai.healthy()` | Health check | +| `sgai.history.list()` | List request history | +| `sgai.history.get()` | Get specific request | + +## Adding New Endpoint + +1. Add types in `src/types.ts` +2. Add Zod schema in `src/schemas.ts` +3. Add function in `src/scrapegraphai.ts` +4. Wire into `ScrapeGraphAI()` client object +5. Export types in `src/index.ts` +6. Add tests in `tests/` +7. Add example in `examples/` + +## Environment Variables + +- `SGAI_API_KEY` - API key for authentication +- `SGAI_DEBUG` - Enable debug logging (optional) + +## Usage + +```typescript +import { ScrapeGraphAI } from "scrapegraph-js"; + +const sgai = ScrapeGraphAI(); // reads SGAI_API_KEY from env + +const res = await sgai.scrape({ + url: "https://example.com", + prompt: "Extract the main heading", +}); + +if (res.status === "success") { + console.log(res.data?.result); +} +``` + +## Links + +- [API Docs](https://docs.scrapegraphai.com) +- [npm](https://www.npmjs.com/package/scrapegraph-js) diff --git a/package.json b/package.json index dcaed96..7e127a4 100644 --- a/package.json +++ b/package.json @@ -19,7 +19,8 @@ "test": "bun test tests/*.test.ts", "test:integration": "bun test tests/*.spec.ts", "check": "tsc --noEmit && biome check .", - "prepublishOnly": "tsup" + "prepublishOnly": "tsup", + "playground": "bun --env-file=.env scripts/playground.ts" }, "keywords": ["scraping", "ai", "scrapegraph", "typescript", "sdk", "web-scraping", "api"], "author": "ScrapeGraph Team", diff --git a/src/scrapegraphai.ts b/src/scrapegraphai.ts index 7225154..d1c3b35 100644 --- a/src/scrapegraphai.ts +++ b/src/scrapegraphai.ts @@ -163,7 +163,7 @@ export async function checkHealth(apiKey: string): Promise( "GET", - "/healthz", + "/api/v2/health", apiKey, undefined, HEALTH_URL, diff --git a/tests/scrapegraphai.test.ts b/tests/scrapegraphai.test.ts index 69a6695..e3c70c7 100644 --- a/tests/scrapegraphai.test.ts +++ b/tests/scrapegraphai.test.ts @@ -742,7 +742,7 @@ describe("checkHealth", () => { expect(res.status).toBe("success"); expect(res.data).toEqual(body); - expectRequest(0, "GET", "/healthz", undefined, HEALTH_BASE); + expectRequest(0, "GET", "/api/v2/health", undefined, HEALTH_BASE); }); }); From 2eba148108614a4448ea34ff4d4c50bc64ca18a2 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Wed, 15 Apr 2026 11:41:42 +0200 Subject: [PATCH 26/28] fix: update API base URL and rename timeout env var - Change base URL to https://api.scrapegraphai.com/api/v2 - Rename SGAI_TIMEOUT_S to SGAI_TIMEOUT - Simplify health endpoint to use BASE_URL Co-Authored-By: Claude Opus 4.5 --- README.md | 4 ++-- src/env.ts | 2 +- src/scrapegraphai.ts | 15 +++------------ tests/scrapegraphai.test.ts | 7 ++----- 4 files changed, 8 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index bd8b669..70f3ee8 100644 --- a/README.md +++ b/README.md @@ -218,9 +218,9 @@ const health = await sgai.healthy(); | Variable | Description | Default | |----------|-------------|---------| | `SGAI_API_KEY` | Your ScrapeGraphAI API key | — | -| `SGAI_API_URL` | Override API base URL | `https://api.scrapegraphai.com/v2` | +| `SGAI_API_URL` | Override API base URL | `https://api.scrapegraphai.com/api/v2` | | `SGAI_DEBUG` | Enable debug logging (`"1"`) | off | -| `SGAI_TIMEOUT_S` | Request timeout in seconds | `120` | +| `SGAI_TIMEOUT` | Request timeout in seconds | `120` | ## Development diff --git a/src/env.ts b/src/env.ts index a400712..dac5bce 100644 --- a/src/env.ts +++ b/src/env.ts @@ -1,4 +1,4 @@ export const env = { debug: process.env.SGAI_DEBUG === "1", - timeoutS: process.env.SGAI_TIMEOUT_S ? Number(process.env.SGAI_TIMEOUT_S) : 120, + timeout: process.env.SGAI_TIMEOUT ? Number(process.env.SGAI_TIMEOUT) : 120, }; diff --git a/src/scrapegraphai.ts b/src/scrapegraphai.ts index d1c3b35..539280d 100644 --- a/src/scrapegraphai.ts +++ b/src/scrapegraphai.ts @@ -19,10 +19,7 @@ import type { ApiSearchResponse, } from "./types.js"; -const BASE_URL = process.env.SGAI_API_URL || "https://api.scrapegraphai.com/v2"; -const HEALTH_URL = process.env.SGAI_API_URL - ? `${process.env.SGAI_API_URL.replace(/\/v\d+$/, "")}` - : "https://api.scrapegraphai.com"; +const BASE_URL = process.env.SGAI_API_URL || "https://api.scrapegraphai.com/api/v2"; function debug(label: string, data?: unknown) { if (!env.debug) return; @@ -86,7 +83,7 @@ async function request( ...(body ? { "Content-Type": "application/json" } : {}), }, body: body ? JSON.stringify(body) : undefined, - signal: AbortSignal.timeout(env.timeoutS * 1000), + signal: AbortSignal.timeout(env.timeout * 1000), }); if (!res.ok) { @@ -161,13 +158,7 @@ export async function getCredits(apiKey: string): Promise> { try { - const { data, elapsedMs } = await request( - "GET", - "/api/v2/health", - apiKey, - undefined, - HEALTH_URL, - ); + const { data, elapsedMs } = await request("GET", "/health", apiKey); return ok(data, elapsedMs); } catch (err) { return fail(err); diff --git a/tests/scrapegraphai.test.ts b/tests/scrapegraphai.test.ts index e3c70c7..6bdde81 100644 --- a/tests/scrapegraphai.test.ts +++ b/tests/scrapegraphai.test.ts @@ -2,10 +2,7 @@ import { afterEach, describe, expect, spyOn, test } from "bun:test"; import * as sdk from "../src/scrapegraphai.js"; const API_KEY = "test-sgai-key"; -const BASE = process.env.SGAI_API_URL || "https://api.scrapegraphai.com/v2"; -const HEALTH_BASE = process.env.SGAI_API_URL - ? process.env.SGAI_API_URL.replace(/\/v\d+$/, "") - : "https://api.scrapegraphai.com"; +const BASE = process.env.SGAI_API_URL || "https://api.scrapegraphai.com/api/v2"; function json(body: unknown, status = 200): Response { return new Response(JSON.stringify(body), { @@ -742,7 +739,7 @@ describe("checkHealth", () => { expect(res.status).toBe("success"); expect(res.data).toEqual(body); - expectRequest(0, "GET", "/api/v2/health", undefined, HEALTH_BASE); + expectRequest(0, "GET", "/health"); }); }); From 096c1107af1e2e2031643b4b8e8d9045d6943803 Mon Sep 17 00:00:00 2001 From: FrancescoSaverioZuppichini Date: Wed, 15 Apr 2026 12:45:33 +0200 Subject: [PATCH 27/28] feat: add monitor.activity endpoint and update examples - Add ApiMonitorTickStatus, ApiMonitorTickEntry, ApiMonitorActivityResponse types - Add monitor.activity(id, params?) method for paginated tick history - Update monitor examples to poll activity and display diffs - Track seen ticks to avoid duplicate output - Use throw for proper TypeScript type narrowing Co-Authored-By: Claude Opus 4.5 --- examples/monitor/monitor_basic.ts | 65 ++++++++++++++++++++---- examples/monitor/monitor_with_webhook.ts | 64 +++++++++++++++++++---- src/index.ts | 4 ++ src/scrapegraphai.ts | 22 ++++++++ src/types.ts | 22 ++++++++ 5 files changed, 156 insertions(+), 21 deletions(-) diff --git a/examples/monitor/monitor_basic.ts b/examples/monitor/monitor_basic.ts index cdb227c..337d694 100644 --- a/examples/monitor/monitor_basic.ts +++ b/examples/monitor/monitor_basic.ts @@ -1,19 +1,62 @@ import { ScrapeGraphAI } from "scrapegraph-js"; -// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) const sgai = ScrapeGraphAI(); const res = await sgai.monitor.create({ - url: "https://example.com", - name: "Example Monitor", - interval: "0 * * * *", - formats: [{ type: "markdown" }], + url: "https://time.is/", + name: "Time Monitor", + interval: "*/10 * * * *", + formats: [ + { + type: "json", + prompt: "Extract the current time", + schema: { + type: "object", + properties: { + time: { type: "string" }, + }, + required: ["time"], + }, + }, + ], }); -if (res.status === "success") { - console.log("Monitor created:", res.data?.cronId); - console.log("Status:", res.data?.status); - console.log("Interval:", res.data?.interval); -} else { - console.error("Failed:", res.error); +if (res.status !== "success" || !res.data) { + throw new Error(`Failed to create monitor: ${res.error}`); +} + +const { cronId: monitorId, interval } = res.data; +console.log(`Monitor created: ${monitorId}`); +console.log(`Interval: ${interval}`); +console.log("\nPolling for activity (Ctrl+C to stop)...\n"); + +function cleanup() { + console.log("\nStopping monitor..."); + sgai.monitor.delete(monitorId).then(() => { + console.log("Monitor deleted"); + process.exit(0); + }); +} + +process.on("SIGINT", cleanup); + +const seenIds = new Set(); + +while (true) { + const activity = await sgai.monitor.activity(monitorId); + if (activity.status === "success" && activity.data) { + for (const tick of activity.data.ticks) { + if (seenIds.has(tick.id)) continue; + seenIds.add(tick.id); + + const changes = tick.changed ? "CHANGED" : "no change"; + console.log(`[${tick.createdAt}] ${tick.status} - ${changes} (${tick.elapsedMs}ms)`); + if (tick.diffs && Object.keys(tick.diffs).length > 0) { + console.log(" Diffs:", JSON.stringify(tick.diffs, null, 2)); + } else if (tick.changed) { + console.log(" (no diffs data)"); + } + } + } + await new Promise((r) => setTimeout(r, 30000)); } diff --git a/examples/monitor/monitor_with_webhook.ts b/examples/monitor/monitor_with_webhook.ts index ddbaa77..6645896 100644 --- a/examples/monitor/monitor_with_webhook.ts +++ b/examples/monitor/monitor_with_webhook.ts @@ -1,22 +1,66 @@ import { ScrapeGraphAI } from "scrapegraph-js"; -// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." }) const sgai = ScrapeGraphAI(); const res = await sgai.monitor.create({ - url: "https://example.com/prices", - name: "Price Monitor", - interval: "0 */6 * * *", + url: "https://time.is/", + name: "Time Monitor with Webhook", + interval: "*/10 * * * *", formats: [ { type: "markdown" }, - { type: "json", prompt: "Extract all product prices" }, + { + type: "json", + prompt: "Extract the current time and timezone", + schema: { + type: "object", + properties: { + time: { type: "string" }, + timezone: { type: "string" }, + }, + required: ["time"], + }, + }, ], webhookUrl: "https://your-server.com/webhook", }); -if (res.status === "success") { - console.log("Monitor created:", res.data?.cronId); - console.log("Will notify:", res.data?.config.webhookUrl); -} else { - console.error("Failed:", res.error); +if (res.status !== "success" || !res.data) { + throw new Error(`Failed to create monitor: ${res.error}`); +} + +const { cronId: monitorId, interval, config } = res.data; +console.log(`Monitor created: ${monitorId}`); +console.log(`Interval: ${interval}`); +console.log(`Webhook: ${config.webhookUrl}`); +console.log("\nPolling for activity (Ctrl+C to stop)...\n"); + +function cleanup() { + console.log("\nStopping monitor..."); + sgai.monitor.delete(monitorId).then(() => { + console.log("Monitor deleted"); + process.exit(0); + }); +} + +process.on("SIGINT", cleanup); + +const seenIds = new Set(); + +while (true) { + const activity = await sgai.monitor.activity(monitorId); + if (activity.status === "success" && activity.data) { + for (const tick of activity.data.ticks) { + if (seenIds.has(tick.id)) continue; + seenIds.add(tick.id); + + const changes = tick.changed ? "CHANGED" : "no change"; + console.log(`[${tick.createdAt}] ${tick.status} - ${changes} (${tick.elapsedMs}ms)`); + if (tick.diffs && Object.keys(tick.diffs).length > 0) { + console.log(" Diffs:", JSON.stringify(tick.diffs, null, 2)); + } else if (tick.changed) { + console.log(" (no diffs data - first tick establishes baseline)"); + } + } + } + await new Promise((r) => setTimeout(r, 30000)); } diff --git a/src/index.ts b/src/index.ts index f55a044..b43f67e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -37,6 +37,10 @@ export type { ApiMonitorResponse, ApiMonitorResult, ApiMonitorDiffs, + ApiMonitorActivityParams, + ApiMonitorActivityResponse, + ApiMonitorTickEntry, + ApiMonitorTickStatus, ApiHistoryFilter, ApiHistoryEntry, ApiHistoryPage, diff --git a/src/scrapegraphai.ts b/src/scrapegraphai.ts index 539280d..4acf8b9 100644 --- a/src/scrapegraphai.ts +++ b/src/scrapegraphai.ts @@ -9,6 +9,8 @@ import type { ApiHistoryEntry, ApiHistoryFilter, ApiHistoryPage, + ApiMonitorActivityParams, + ApiMonitorActivityResponse, ApiMonitorCreateInput, ApiMonitorResponse, ApiMonitorUpdateInput, @@ -342,6 +344,24 @@ export const monitor = { return fail(err); } }, + + async activity( + apiKey: string, + id: string, + params?: ApiMonitorActivityParams, + ): Promise> { + try { + const qs = new URLSearchParams(); + if (params?.limit) qs.set("limit", String(params.limit)); + if (params?.cursor) qs.set("cursor", params.cursor); + const query = qs.toString(); + const path = query ? `/monitor/${id}/activity?${query}` : `/monitor/${id}/activity`; + const { data, elapsedMs } = await request("GET", path, apiKey); + return ok(data, elapsedMs); + } catch (err) { + return fail(err); + } + }, }; export interface ScrapeGraphAIInput { @@ -381,6 +401,8 @@ export function ScrapeGraphAI(opts?: ScrapeGraphAIInput) { delete: (id: string) => monitor.delete(key, id), pause: (id: string) => monitor.pause(key, id), resume: (id: string) => monitor.resume(key, id), + activity: (id: string, params?: ApiMonitorActivityParams) => + monitor.activity(key, id, params), }, }; } diff --git a/src/types.ts b/src/types.ts index 726d243..d27747e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -298,6 +298,28 @@ export interface ApiMonitorResponse { updatedAt: string; } +export type ApiMonitorTickStatus = "completed" | "failed" | "paused" | "running"; + +export interface ApiMonitorTickEntry { + id: string; + status: ApiMonitorTickStatus; + createdAt: string; + elapsedMs: number; + changed: boolean; + diffs: ApiMonitorDiffs; + error?: string; +} + +export interface ApiMonitorActivityResponse { + ticks: ApiMonitorTickEntry[]; + nextCursor: string | null; +} + +export interface ApiMonitorActivityParams { + limit?: number; + cursor?: string; +} + export type ApiHistoryService = "scrape" | "extract" | "search" | "monitor" | "crawl"; export type ApiHistoryStatus = "completed" | "failed" | "running" | "paused" | "deleted"; From 50a59f99d7c40a52c68df21ba82e0f4e3486c15e Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Thu, 16 Apr 2026 15:27:24 +0200 Subject: [PATCH 28/28] chore: link banner image to scrapegraphai.com and remove tracked .DS_Store Co-Authored-By: Claude Opus 4.6 (1M context) --- .DS_Store | Bin 6148 -> 0 bytes README.md | 4 +++- 2 files changed, 3 insertions(+), 1 deletion(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 - ScrapeGraphAI JS SDK + + ScrapeGraphAI JS SDK +

Official TypeScript SDK for the [ScrapeGraphAI AI API](https://scrapegraphai.com).