|
| 1 | +# Migrating from v1 to v2 |
| 2 | + |
| 3 | +This guide covers every breaking change in `scrapegraph-js` v2 and shows how to |
| 4 | +update your code. |
| 5 | + |
| 6 | +## Minimum runtime |
| 7 | + |
| 8 | +v2 requires **Node.js >= 22** (v1 worked on Node 18+). |
| 9 | + |
| 10 | +## Client initialization |
| 11 | + |
| 12 | +v1 passed the API key to every function call. v2 uses a single client instance. |
| 13 | + |
| 14 | +```diff |
| 15 | +- import { smartScraper, getCredits } from "scrapegraph-js"; |
| 16 | +- |
| 17 | +- const result = await smartScraper(apiKey, { ... }); |
| 18 | +- const credits = await getCredits(apiKey); |
| 19 | ++ import { scrapegraphai } from "scrapegraph-js"; |
| 20 | ++ |
| 21 | ++ const sgai = scrapegraphai({ apiKey: "your-api-key" }); |
| 22 | ++ |
| 23 | ++ const result = await sgai.extract("https://example.com", { prompt: "..." }); |
| 24 | ++ const credits = await sgai.credits(); |
| 25 | +``` |
| 26 | + |
| 27 | +The factory accepts optional settings that used to come from environment |
| 28 | +variables: |
| 29 | + |
| 30 | +```ts |
| 31 | +const sgai = scrapegraphai({ |
| 32 | + apiKey: "your-api-key", |
| 33 | + baseUrl: "https://api.scrapegraphai.com", // was SGAI_API_URL env var |
| 34 | + timeout: 30000, |
| 35 | + maxRetries: 2, |
| 36 | +}); |
| 37 | +``` |
| 38 | + |
| 39 | +## Return type |
| 40 | + |
| 41 | +Every method now returns `{ data, requestId }` and **throws** on error. |
| 42 | + |
| 43 | +```diff |
| 44 | +- const result = await smartScraper(apiKey, params); |
| 45 | +- if (result.status === "error") { |
| 46 | +- console.error(result.error); |
| 47 | +- } else { |
| 48 | +- console.log(result.data); |
| 49 | +- } |
| 50 | ++ try { |
| 51 | ++ const { data, requestId } = await sgai.extract(url, { prompt: "..." }); |
| 52 | ++ console.log(data); |
| 53 | ++ } catch (err) { |
| 54 | ++ console.error(err.message); |
| 55 | ++ } |
| 56 | +``` |
| 57 | + |
| 58 | +| v1 field | v2 equivalent | |
| 59 | +|---|---| |
| 60 | +| `result.status` | not needed, errors throw | |
| 61 | +| `result.data` | `result.data` | |
| 62 | +| `result.error` | caught via `try/catch` | |
| 63 | +| `result.elapsedMs` | removed | |
| 64 | + |
| 65 | +## Renamed and replaced functions |
| 66 | + |
| 67 | +| v1 function | v2 method | Notes | |
| 68 | +|---|---|---| |
| 69 | +| `smartScraper(apiKey, params)` | `sgai.extract(url, { prompt, schema })` | Renamed; takes URL as first arg | |
| 70 | +| `searchScraper(apiKey, params)` | `sgai.search(query, options)` | Renamed | |
| 71 | +| `markdownify(apiKey, params)` | `sgai.scrape(url, { format: "markdown" })` | Merged into `scrape` | |
| 72 | +| `scrape(apiKey, params)` | `sgai.scrape(url, options)` | Same concept, new signature | |
| 73 | +| `crawl(apiKey, params, onPoll?)` | `sgai.crawl.start(url, options)` | No built-in polling (see below) | |
| 74 | +| `getCredits(apiKey)` | `sgai.credits()` | Renamed | |
| 75 | +| `history(apiKey, params)` | `sgai.history(filter)` | Simplified params | |
| 76 | +| `createSiteMonitor(apiKey, params)` | `sgai.monitor.create(input)` | Moved under `monitor` namespace | |
| 77 | +| `listSiteMonitors(apiKey, params?)` | `sgai.monitor.list()` | Moved under `monitor` namespace | |
| 78 | +| `getSiteMonitor(apiKey, id)` | `sgai.monitor.get(id)` | Moved under `monitor` namespace | |
| 79 | +| `deleteSiteMonitor(apiKey, id)` | `sgai.monitor.delete(id)` | Moved under `monitor` namespace | |
| 80 | + |
| 81 | +### Removed (no v2 equivalent) |
| 82 | + |
| 83 | +| v1 function | Reason | |
| 84 | +|---|---| |
| 85 | +| `agenticScraper()` | Removed from the API | |
| 86 | +| `generateSchema()` | Removed from the API | |
| 87 | +| `sitemap()` | Removed from the API | |
| 88 | +| `checkHealth()` | Removed from the API | |
| 89 | +| `updateSiteMonitor()` | Use `monitor.pause()` / `monitor.resume()` instead | |
| 90 | + |
| 91 | +## Parameter changes |
| 92 | + |
| 93 | +v1 used `snake_case` parameter names. v2 uses `camelCase`. |
| 94 | + |
| 95 | +### smartScraper -> extract |
| 96 | + |
| 97 | +```diff |
| 98 | +- await smartScraper(apiKey, { |
| 99 | +- website_url: "https://example.com", |
| 100 | +- user_prompt: "Extract the title", |
| 101 | +- output_schema: { type: "object", properties: { title: { type: "string" } } }, |
| 102 | +- number_of_scrolls: 3, |
| 103 | +- country_code: "US", |
| 104 | +- wait_ms: 2000, |
| 105 | +- }); |
| 106 | ++ await sgai.extract("https://example.com", { |
| 107 | ++ prompt: "Extract the title", |
| 108 | ++ schema: { type: "object", properties: { title: { type: "string" } } }, |
| 109 | ++ fetchConfig: { |
| 110 | ++ scrolls: 3, |
| 111 | ++ country: "US", |
| 112 | ++ wait: 2000, |
| 113 | ++ }, |
| 114 | ++ }); |
| 115 | +``` |
| 116 | + |
| 117 | +v2 also accepts Zod schemas directly: |
| 118 | + |
| 119 | +```ts |
| 120 | +import { z } from "zod"; |
| 121 | + |
| 122 | +await sgai.extract("https://example.com", { |
| 123 | + prompt: "Extract the title", |
| 124 | + schema: z.object({ title: z.string() }), |
| 125 | +}); |
| 126 | +``` |
| 127 | + |
| 128 | +### searchScraper -> search |
| 129 | + |
| 130 | +```diff |
| 131 | +- await searchScraper(apiKey, { |
| 132 | +- user_prompt: "Latest news about AI", |
| 133 | +- num_results: 5, |
| 134 | +- extraction_mode: true, |
| 135 | +- output_schema: schema, |
| 136 | +- }); |
| 137 | ++ await sgai.search("Latest news about AI", { |
| 138 | ++ numResults: 5, |
| 139 | ++ schema: schema, |
| 140 | ++ prompt: "Extract key points", |
| 141 | ++ }); |
| 142 | +``` |
| 143 | + |
| 144 | +### markdownify -> scrape |
| 145 | + |
| 146 | +```diff |
| 147 | +- await markdownify(apiKey, { |
| 148 | +- website_url: "https://example.com", |
| 149 | +- wait_ms: 1000, |
| 150 | +- country_code: "US", |
| 151 | +- }); |
| 152 | ++ await sgai.scrape("https://example.com", { |
| 153 | ++ format: "markdown", |
| 154 | ++ fetchConfig: { |
| 155 | ++ wait: 1000, |
| 156 | ++ country: "US", |
| 157 | ++ }, |
| 158 | ++ }); |
| 159 | +``` |
| 160 | + |
| 161 | +### scrape |
| 162 | + |
| 163 | +```diff |
| 164 | +- await scrape(apiKey, { |
| 165 | +- website_url: "https://example.com", |
| 166 | +- branding: true, |
| 167 | +- country_code: "US", |
| 168 | +- }); |
| 169 | ++ await sgai.scrape("https://example.com", { |
| 170 | ++ format: "branding", // or "html", "screenshot" |
| 171 | ++ fetchConfig: { |
| 172 | ++ country: "US", |
| 173 | ++ }, |
| 174 | ++ }); |
| 175 | +``` |
| 176 | + |
| 177 | +### getCredits -> credits |
| 178 | + |
| 179 | +```diff |
| 180 | +- const result = await getCredits(apiKey); |
| 181 | +- console.log(result.data.remaining_credits); |
| 182 | ++ const { data } = await sgai.credits(); |
| 183 | ++ console.log(data); |
| 184 | +``` |
| 185 | + |
| 186 | +### history |
| 187 | + |
| 188 | +```diff |
| 189 | +- await history(apiKey, { |
| 190 | +- service: "smartscraper", |
| 191 | +- page: 1, |
| 192 | +- page_size: 10, |
| 193 | +- }); |
| 194 | ++ await sgai.history({ |
| 195 | ++ service: "extract", // service names changed too |
| 196 | ++ page: 1, |
| 197 | ++ limit: 10, // was page_size |
| 198 | ++ }); |
| 199 | +``` |
| 200 | + |
| 201 | +History service names mapping: |
| 202 | + |
| 203 | +| v1 service | v2 service | |
| 204 | +|---|---| |
| 205 | +| `"smartscraper"` | `"extract"` | |
| 206 | +| `"searchscraper"` | `"search"` | |
| 207 | +| `"markdownify"` | `"scrape"` | |
| 208 | +| `"scrape"` | `"scrape"` | |
| 209 | +| `"crawl"` | `"crawl"` | |
| 210 | +| `"agentic-scraper"` | removed | |
| 211 | +| `"sitemap"` | removed | |
| 212 | + |
| 213 | +## Crawling |
| 214 | + |
| 215 | +v1 had built-in polling that blocked until the crawl finished. v2 separates |
| 216 | +crawl lifecycle into discrete calls. |
| 217 | + |
| 218 | +```diff |
| 219 | +- const result = await crawl(apiKey, { |
| 220 | +- url: "https://example.com", |
| 221 | +- max_pages: 10, |
| 222 | +- depth: 2, |
| 223 | +- }, (status) => console.log(status)); |
| 224 | +- |
| 225 | +- console.log(result.data.pages); |
| 226 | ++ // Start the crawl |
| 227 | ++ const job = await sgai.crawl.start("https://example.com", { |
| 228 | ++ maxPages: 10, |
| 229 | ++ maxDepth: 2, |
| 230 | ++ }); |
| 231 | ++ |
| 232 | ++ // Poll manually |
| 233 | ++ const status = await sgai.crawl.status(job.data.id); |
| 234 | ++ |
| 235 | ++ // Control the crawl |
| 236 | ++ await sgai.crawl.stop(job.data.id); |
| 237 | ++ await sgai.crawl.resume(job.data.id); |
| 238 | +``` |
| 239 | + |
| 240 | +## Site monitors |
| 241 | + |
| 242 | +Monitor functions moved from top-level to the `monitor` namespace and gained |
| 243 | +pause/resume support. |
| 244 | + |
| 245 | +```diff |
| 246 | +- import { |
| 247 | +- createSiteMonitor, |
| 248 | +- listSiteMonitors, |
| 249 | +- getSiteMonitor, |
| 250 | +- deleteSiteMonitor, |
| 251 | +- } from "scrapegraph-js"; |
| 252 | +- |
| 253 | +- await createSiteMonitor(apiKey, { |
| 254 | +- website_url: "https://example.com", |
| 255 | +- webhook_url: "https://hook.example.com", |
| 256 | +- cron_expression: "0 * * * *", |
| 257 | +- }); |
| 258 | +- |
| 259 | +- await listSiteMonitors(apiKey); |
| 260 | +- await getSiteMonitor(apiKey, monitorId); |
| 261 | +- await deleteSiteMonitor(apiKey, monitorId); |
| 262 | ++ await sgai.monitor.create({ |
| 263 | ++ url: "https://example.com", |
| 264 | ++ prompt: "Notify me when the price changes", |
| 265 | ++ interval: "1h", |
| 266 | ++ }); |
| 267 | ++ |
| 268 | ++ await sgai.monitor.list(); |
| 269 | ++ await sgai.monitor.get(monitorId); |
| 270 | ++ await sgai.monitor.delete(monitorId); |
| 271 | ++ |
| 272 | ++ // New in v2 |
| 273 | ++ await sgai.monitor.pause(monitorId); |
| 274 | ++ await sgai.monitor.resume(monitorId); |
| 275 | +``` |
| 276 | + |
| 277 | +## Authentication header |
| 278 | + |
| 279 | +v1 sent `SGAI-APIKEY` as the header. v2 sends `Authorization: Bearer <key>`. |
| 280 | +This is handled internally and requires no code changes if you were using the |
| 281 | +SDK functions directly. If you had custom middleware inspecting headers, update |
| 282 | +accordingly. |
| 283 | + |
| 284 | +## Type exports |
| 285 | + |
| 286 | +v1 exported many granular types (`SmartScraperParams`, `SmartScraperResponse`, |
| 287 | +etc.). v2 exports a smaller, unified set: |
| 288 | + |
| 289 | +```ts |
| 290 | +import type { |
| 291 | + ClientConfig, |
| 292 | + RequestOptions, |
| 293 | + ApiScrapeOptions, |
| 294 | + ApiExtractOptions, |
| 295 | + ApiSearchOptions, |
| 296 | + ApiCrawlOptions, |
| 297 | + ApiMonitorCreateInput, |
| 298 | + ApiHistoryService, |
| 299 | +} from "scrapegraph-js"; |
| 300 | +``` |
| 301 | + |
| 302 | +## Quick checklist |
| 303 | + |
| 304 | +- [ ] Update to Node.js >= 22 |
| 305 | +- [ ] Replace individual function imports with `scrapegraphai()` factory |
| 306 | +- [ ] Wrap calls in `try/catch` instead of checking `result.status` |
| 307 | +- [ ] Rename functions: `smartScraper` -> `extract`, `searchScraper` -> `search`, `markdownify` -> `scrape` |
| 308 | +- [ ] Convert `snake_case` params to `camelCase` |
| 309 | +- [ ] Replace `crawl()` polling with `crawl.start()` + `crawl.status()` |
| 310 | +- [ ] Move site monitor calls to `monitor.*` namespace |
| 311 | +- [ ] Update type imports |
| 312 | +- [ ] Remove usages of `agenticScraper`, `generateSchema`, `sitemap`, `checkHealth` |
0 commit comments