docs: sync SDK pages with final v2 API surface

VinciGit00 · claude · VinciGit00 · commit e9876d69b61f · 2026-04-13T14:04:50.000+02:00
Align sdks/javascript.mdx and sdks/python.mdx with the current schemas
from scrapegraph-js#11 and scrapegraph-py#82:

- search(): add locationGeoCode/location_geo_code, timeRange/time_range,
  prompt, format, mode; correct numResults default to 3
- extract(): drop llmConfig from params (ignored by v2 route); document
  mode, contentType, html, markdown alternatives to url
- scrape(): document the formats[] array (tagged format entries with
  per-entry config) and add a multi-format example
- crawl.start(): document maxDepth/max_depth, maxPages/max_pages,
  maxLinksPerPage, allowExternal, contentTypes
- monitor.create(): drop prompt (not in v2 schema); add formats and
  webhookUrl/webhook_url
- LlmConfig: clarify it belongs inside scrape json/summary format
  entries, not on extract/search

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/sdks/javascript.mdx b/sdks/javascript.mdx
@@ -100,11 +100,18 @@ const { data, requestId } = await sgai.extract(
 
 | Parameter            | Type        | Required | Description                                              |
 | -------------------- | ----------- | -------- | -------------------------------------------------------- |
-| url                  | string      | Yes      | The URL of the webpage to scrape                         |
+| url                  | string      | Yes\*    | The URL of the webpage to scrape                         |
 | options.prompt       | string      | Yes      | A description of what you want to extract                |
 | options.schema       | ZodSchema / object | No | Zod schema or JSON schema for structured response       |
+| options.mode         | string      | No       | HTML processing mode: `"normal"`, `"reader"`, `"prune"`  |
+| options.contentType  | string      | No       | Override the detected content type (e.g. `"text/html"`)  |
 | options.fetchConfig  | FetchConfig | No       | Fetch configuration                                      |
-| options.llmConfig    | LlmConfig   | No       | LLM configuration                                        |
+| options.html         | string      | No       | Raw HTML input (alternative to `url`)                    |
+| options.markdown     | string      | No       | Raw markdown input (alternative to `url`)                |
+
+<Note>
+\*One of `url`, `html`, or `markdown` is required.
+</Note>
 
 <Accordion title="With Zod Schema" icon="code">
 ```javascript
@@ -130,7 +137,7 @@ console.log(`Author: ${data.author}`);
 ```
 </Accordion>
 
-<Accordion title="With FetchConfig and LlmConfig" icon="code">
+<Accordion title="With FetchConfig" icon="code">
 ```javascript
 const { data } = await sgai.extract(
   "https://example.com",
@@ -141,10 +148,6 @@ const { data } = await sgai.extract(
       wait: 2000,
       scrolls: 3,
     },
-    llmConfig: {
-      temperature: 0.3,
-      maxTokens: 1000,
-    },
   }
 );
 ```
@@ -163,13 +166,17 @@ const { data } = await sgai.search(
 
 #### Parameters
 
-| Parameter            | Type        | Required | Description                                              |
-| -------------------- | ----------- | -------- | -------------------------------------------------------- |
-| query                | string      | Yes      | The search query                                         |
-| options.numResults   | number      | No       | Number of results (3-20). Default: 5                     |
-| options.schema       | ZodSchema / object | No | Schema for structured response                          |
-| options.fetchConfig  | FetchConfig | No       | Fetch configuration                                      |
-| options.llmConfig    | LlmConfig   | No       | LLM configuration                                        |
+| Parameter                | Type        | Required | Description                                              |
+| ------------------------ | ----------- | -------- | -------------------------------------------------------- |
+| query                    | string      | Yes      | The search query (1-500 chars)                           |
+| options.numResults       | number      | No       | Number of results (1-20). Default: 3                     |
+| options.prompt           | string      | No       | Prompt used when extracting structured results           |
+| options.schema           | ZodSchema / object | No | Schema for structured response (requires `prompt`)     |
+| options.format           | string      | No       | `"markdown"` (default) or `"html"`                       |
+| options.mode             | string      | No       | HTML processing mode: `"normal"`, `"reader"`, `"prune"` (default) |
+| options.locationGeoCode  | string      | No       | Geo code for localized search (e.g. `"us"`, `"it"`, `"gb"`) |
+| options.timeRange        | string      | No       | Recency filter: `"past_hour"`, `"past_24_hours"`, `"past_week"`, `"past_month"`, `"past_year"` |
+| options.fetchConfig      | FetchConfig | No       | Fetch configuration                                      |
 
 <Accordion title="Schema Example" icon="code">
 ```javascript
@@ -196,7 +203,7 @@ console.log(`Price: ${data.price}`);
 
 ### scrape()
 
-Convert any webpage to markdown, HTML, screenshot, or branding format.
+Convert any webpage into one or more output formats in a single request.
 
 ```javascript
 const { data } = await sgai.scrape("https://example.com");
@@ -205,11 +212,30 @@ console.log(data);
 
 #### Parameters
 
-| Parameter            | Type        | Required | Description                                              |
-| -------------------- | ----------- | -------- | -------------------------------------------------------- |
-| url                  | string      | Yes      | The URL of the webpage to scrape                         |
-| options.format       | string      | No       | `"markdown"`, `"html"`, `"screenshot"`, `"branding"`     |
-| options.fetchConfig  | FetchConfig | No       | Fetch configuration                                      |
+| Parameter            | Type          | Required | Description                                              |
+| -------------------- | ------------- | -------- | -------------------------------------------------------- |
+| url                  | string        | Yes      | The URL of the webpage to scrape                         |
+| options.formats      | FormatEntry[] | No       | Array of format entries. Defaults to `[{ type: "markdown", mode: "normal" }]` |
+| options.contentType  | string        | No       | Override the detected content type                       |
+| options.fetchConfig  | FetchConfig   | No       | Fetch configuration                                      |
+
+Each format entry is a tagged object. Supported `type` values: `"markdown"`, `"html"`, `"screenshot"`, `"links"`, `"images"`, `"summary"`, `"json"`, `"branding"`. Entries can carry their own config:
+
+<Accordion title="Multi-format Example" icon="code">
+```javascript
+const { data } = await sgai.scrape("https://example.com", {
+  formats: [
+    { type: "markdown", mode: "normal" },
+    { type: "screenshot", fullPage: true, width: 1440, height: 900 },
+    {
+      type: "json",
+      prompt: "Extract the product list",
+      schema: { products: [{ name: "string", price: "string" }] },
+    },
+  ],
+});
+```
+</Accordion>
 
 ### crawl
 
@@ -219,7 +245,7 @@ Manage multi-page crawl operations asynchronously.
 // Start a crawl
 const job = await sgai.crawl.start("https://example.com", {
   maxDepth: 2,
-  maxPages: 10,
+  maxPages: 50,
   includePatterns: ["/blog/*", "/docs/**"],
   excludePatterns: ["/admin/*", "/api/*"],
 });
@@ -234,6 +260,21 @@ await sgai.crawl.stop(job.data.id);
 await sgai.crawl.resume(job.data.id);
 ```
 
+#### crawl.start() Parameters
+
+| Parameter                   | Type          | Required | Description                                              |
+| --------------------------- | ------------- | -------- | -------------------------------------------------------- |
+| url                         | string        | Yes      | The starting URL                                         |
+| options.formats             | FormatEntry[] | No       | Output formats per page. Defaults to `[{ type: "markdown", mode: "normal" }]` |
+| options.maxDepth            | number        | No       | Maximum crawl depth. Default: `2`                        |
+| options.maxPages            | number        | No       | Maximum pages to crawl (1-1000). Default: `50`           |
+| options.maxLinksPerPage     | number        | No       | Maximum links followed per page. Default: `10`           |
+| options.allowExternal       | boolean       | No       | Allow crossing domains. Default: `false`                 |
+| options.includePatterns     | string[]      | No       | URL patterns to include                                  |
+| options.excludePatterns     | string[]      | No       | URL patterns to exclude                                  |
+| options.contentTypes        | string[]      | No       | Allowed content types                                    |
+| options.fetchConfig         | FetchConfig   | No       | Fetch configuration                                      |
+
 ### monitor
 
 Create and manage site monitoring jobs.
@@ -243,8 +284,9 @@ Create and manage site monitoring jobs.
 const monitor = await sgai.monitor.create({
   name: "Price Tracker",
   url: "https://example.com",
-  prompt: "Track price changes",
   interval: "0 9 * * *",  // Daily at 9 AM
+  formats: [{ type: "markdown", mode: "normal" }],
+  webhookUrl: "https://example.com/webhook",
 });
 
 // List all monitors
@@ -305,7 +347,7 @@ Controls how pages are fetched. See the [proxy configuration guide](/services/ad
 
 ### LlmConfig
 
-Controls LLM behavior for AI-powered methods.
+Controls LLM behavior for format entries that run an LLM (scrape `json` and `summary` formats). Pass it inside the format entry — it is not accepted at the top level of `extract` or `search` in v2.
 
 ```javascript
 {
diff --git a/sdks/python.mdx b/sdks/python.mdx
@@ -85,10 +85,19 @@ print(response)
 
 | Parameter    | Type        | Required | Description                                              |
 | ------------ | ----------- | -------- | -------------------------------------------------------- |
-| url          | string      | Yes      | The URL of the webpage to scrape                         |
+| url          | string      | Yes\*    | The URL of the webpage to extract from                   |
 | prompt       | string      | Yes      | A description of what you want to extract                |
-| output_schema| object      | No       | Pydantic model for structured response                   |
+| output_schema| object      | No       | Pydantic model for structured response (alias for `schema`) |
+| schema       | dict        | No       | JSON schema for structured response                      |
+| mode         | string      | No       | HTML processing mode: `"normal"`, `"reader"`, `"prune"`  |
+| content_type | string      | No       | Override the detected content type                       |
 | fetch_config | FetchConfig | No       | Fetch configuration (stealth, rendering, etc.)           |
+| html         | string      | No       | Raw HTML input (alternative to `url`)                    |
+| markdown     | string      | No       | Raw markdown input (alternative to `url`)                |
+
+<Note>
+\*One of `url`, `html`, or `markdown` is required.
+</Note>
 
 <Accordion title="Schema Example" icon="code">
 ```python
@@ -123,12 +132,18 @@ response = client.search(
 
 #### Parameters
 
-| Parameter     | Type        | Required | Description                                              |
-| ------------- | ----------- | -------- | -------------------------------------------------------- |
-| query         | string      | Yes      | The search query                                         |
-| num_results   | number      | No       | Number of results (3-20). Default: 5                     |
-| output_schema | object      | No       | Pydantic model for structured response                   |
-| fetch_config  | FetchConfig | No       | Fetch configuration                                      |
+| Parameter         | Type        | Required | Description                                              |
+| ----------------- | ----------- | -------- | -------------------------------------------------------- |
+| query             | string      | Yes      | The search query (1-500 chars)                           |
+| num_results       | int         | No       | Number of results (1-20). Default: 5                     |
+| prompt            | string      | No       | Prompt used when extracting structured results           |
+| output_schema     | object      | No       | Pydantic model for structured response (alias for `schema`) |
+| schema            | dict        | No       | JSON schema for structured response (requires `prompt`)  |
+| format            | string      | No       | `"markdown"` (default) or `"html"`                       |
+| mode              | string      | No       | HTML processing mode: `"normal"`, `"reader"`, `"prune"` (default) |
+| location_geo_code | string      | No       | Geo code for localized results (e.g. `"us"`, `"it"`, `"gb"`) |
+| time_range        | string      | No       | Recency filter: `"past_hour"`, `"past_24_hours"`, `"past_week"`, `"past_month"`, `"past_year"` |
+| fetch_config      | FetchConfig | No       | Fetch configuration                                      |
 
 <Accordion title="Schema Example" icon="code">
 ```python
@@ -154,7 +169,7 @@ print(f"Price: {response['data']['price']}")
 
 ### Scrape
 
-Convert any webpage into markdown, HTML, screenshot, or branding format.
+Convert any webpage into one or more output formats in a single request.
 
 ```python
 response = client.scrape(
@@ -167,9 +182,30 @@ response = client.scrape(
 | Parameter     | Type        | Required | Description                                              |
 | ------------- | ----------- | -------- | -------------------------------------------------------- |
 | url           | string      | Yes      | The URL of the webpage to scrape                         |
-| format        | string      | No       | Output format: `"markdown"`, `"html"`, `"screenshot"`, `"branding"` |
+| formats       | list[dict]  | No       | Array of format entries. Defaults to `[{"type": "markdown", "mode": "normal"}]` |
+| format        | string      | No       | Legacy single-format shortcut (`"markdown"`, `"html"`, `"screenshot"`, `"branding"`) |
+| content_type  | string      | No       | Override the detected content type                       |
 | fetch_config  | FetchConfig | No       | Fetch configuration                                      |
 
+Each format entry is a dict with a `type` key. Supported types: `"markdown"`, `"html"`, `"screenshot"`, `"links"`, `"images"`, `"summary"`, `"json"`, `"branding"`. Entries can carry their own config:
+
+<Accordion title="Multi-format Example" icon="code">
+```python
+response = client.scrape(
+    url="https://example.com",
+    formats=[
+        {"type": "markdown", "mode": "normal"},
+        {"type": "screenshot", "fullPage": True, "width": 1440, "height": 900},
+        {
+            "type": "json",
+            "prompt": "Extract the product list",
+            "schema": {"products": [{"name": "string", "price": "string"}]},
+        },
+    ],
+)
+```
+</Accordion>
+
 ### Crawl
 
 Manage multi-page crawl operations asynchronously.
@@ -178,7 +214,8 @@ Manage multi-page crawl operations asynchronously.
 # Start a crawl
 job = client.crawl.start(
     url="https://example.com",
-    depth=2,
+    max_depth=2,
+    max_pages=50,
     include_patterns=["/blog/*", "/docs/**"],
     exclude_patterns=["/admin/*", "/api/*"],
 )
@@ -197,13 +234,19 @@ client.crawl.resume(job["id"])
 
 #### crawl.start() Parameters
 
-| Parameter        | Type        | Required | Description                                              |
-| ---------------- | ----------- | -------- | -------------------------------------------------------- |
-| url              | string      | Yes      | The starting URL to crawl                                |
-| depth            | int         | No       | Crawl depth level                                        |
-| include_patterns | list[str]   | No       | URL patterns to include (`*` any chars, `**` any path)   |
-| exclude_patterns | list[str]   | No       | URL patterns to exclude                                  |
-| fetch_config     | FetchConfig | No       | Fetch configuration                                      |
+| Parameter            | Type        | Required | Description                                              |
+| -------------------- | ----------- | -------- | -------------------------------------------------------- |
+| url                  | string      | Yes      | The starting URL to crawl                                |
+| formats              | list[dict]  | No       | Output formats per page. Defaults to `[{"type": "markdown", "mode": "normal"}]` |
+| max_depth            | int         | No       | Maximum crawl depth. Default: `2`                        |
+| max_pages            | int         | No       | Maximum pages to crawl (1-1000). Default: `10`           |
+| max_links_per_page   | int         | No       | Maximum links followed per page. Default: `10`           |
+| allow_external       | bool        | No       | Allow crossing domains. Default: `False`                 |
+| include_patterns     | list[str]   | No       | URL patterns to include (`*` any chars, `**` any path)   |
+| exclude_patterns     | list[str]   | No       | URL patterns to exclude                                  |
+| content_types        | list[str]   | No       | Allowed content types                                    |
+| fetch_config         | FetchConfig | No       | Fetch configuration                                      |
+| depth                | int         | No       | Legacy alias for `max_depth`                             |
 
 ### Monitor
 
@@ -214,8 +257,9 @@ Create and manage site monitoring jobs.
 monitor = client.monitor.create(
     name="Price Tracker",
     url="https://example.com",
-    prompt="Track price changes",
     interval="0 9 * * *",  # Daily at 9 AM
+    formats=[{"type": "markdown", "mode": "normal"}],
+    webhook_url="https://example.com/webhook",
 )
 
 # List all monitors
@@ -273,14 +317,14 @@ config = FetchConfig(
 
 ### LlmConfig
 
-Controls LLM behavior for AI-powered methods.
+Controls LLM behavior for format entries that run an LLM (scrape `json` and `summary` formats). Pass it inside the format entry — it is deprecated at the top level of `extract` and `search` in v2 and is ignored by the API.
 
 ```python
 from scrapegraph_py import LlmConfig
 
 config = LlmConfig(
     model="gpt-4o-mini",      # LLM model to use
-    temperature=0.3,           # Response creativity (0.0-2.0)
+    temperature=0.3,           # Response creativity (0.0-1.0)
     max_tokens=1000,           # Maximum response tokens
     chunker="auto",            # Content chunking strategy ("auto" or custom config)
 )
@@ -304,7 +348,7 @@ async def main():
         print(response)
 
         # Crawl
-        job = await client.crawl.start("https://example.com", depth=2)
+        job = await client.crawl.start("https://example.com", max_depth=2)
         status = await client.crawl.status(job["id"])
         print(status)