Skip to content

Commit f3eed23

Browse files
VinciGit00claude
andcommitted
refactor: split fetch mode into mode + stealth toggle
Align with scrapegraph-py PR #82 commit f5dc6e63. Replace compound fetch modes (direct+stealth, js+stealth) with separate mode (auto/fast/js) + stealth boolean on all tools and fetchConfig builder. Tested against local dev API (localhost:3002) — all endpoints verified working. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 41994d0 commit f3eed23

1 file changed

Lines changed: 46 additions & 25 deletions

File tree

src/scrapegraph_mcp/server.py

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ def _fetch_config(
187187
self,
188188
*,
189189
mode: Optional[str] = None,
190+
stealth: Optional[bool] = None,
190191
timeout: Optional[int] = None,
191192
wait: Optional[int] = None,
192193
headers: Optional[Dict[str, str]] = None,
@@ -198,6 +199,8 @@ def _fetch_config(
198199
cfg: Dict[str, Any] = {}
199200
if mode is not None:
200201
cfg["mode"] = mode
202+
if stealth is not None:
203+
cfg["stealth"] = stealth
201204
if timeout is not None:
202205
cfg["timeout"] = timeout
203206
if wait is not None:
@@ -239,6 +242,7 @@ def markdownify(
239242
self,
240243
website_url: str,
241244
mode: Optional[str] = None,
245+
stealth: Optional[bool] = None,
242246
headers: Optional[Dict[str, str]] = None,
243247
cookies: Optional[Dict[str, str]] = None,
244248
country: Optional[str] = None,
@@ -248,7 +252,7 @@ def markdownify(
248252
mock: Optional[bool] = None,
249253
) -> Dict[str, Any]:
250254
fc = self._fetch_config(
251-
mode=mode, timeout=timeout, wait=wait, headers=headers,
255+
mode=mode, stealth=stealth, timeout=timeout, wait=wait, headers=headers,
252256
cookies=cookies, country=country, scrolls=scrolls, mock=mock,
253257
)
254258
return self.scrape_v2(website_url, "markdown", fetch_config_dict=fc)
@@ -980,14 +984,20 @@ def parameter_reference_guide() -> str:
980984
- `"auto"`: Automatically selects the best provider chain
981985
- `"fast"`: Direct HTTP fetch via impit (fastest, no JS)
982986
- `"js"`: Headless browser rendering for JS-heavy pages
983-
- `"direct+stealth"`: Residential proxy with stealth headers (no JS)
984-
- `"js+stealth"`: JS rendering combined with stealth/residential proxy
985987
- **Performance Impact**:
986988
- `fast`: 2-5 seconds
987989
- `js`: 15-30 seconds
988-
- `direct+stealth`/`js+stealth`: variable, depends on proxy
989990
- **Cost**: Same regardless of setting
990991
992+
### `stealth`
993+
**Used in**: markdownify, scrape, smartscraper, smartcrawler_initiate, monitor_create
994+
995+
- **Type**: `Optional[bool]`
996+
- **Default**: `false`
997+
- **Purpose**: Use residential proxies to bypass bot detection
998+
- **Cost**: +5 credits per request
999+
- **Combine with any mode**: e.g. `mode="js"` + `stealth=True` for JS rendering with residential proxy
1000+
9911001
### `timeout`
9921002
**Used in**: markdownify, scrape, smartscraper, smartcrawler_initiate, monitor_create
9931003
- **Type**: `Optional[int]`
@@ -1200,7 +1210,7 @@ def parameter_reference_guide() -> str:
12001210
### For Dynamic Content
12011211
```
12021212
Tool: smartscraper or scrape
1203-
Parameters: mode="js" (or mode="js+stealth" if bot detection is present)
1213+
Parameters: mode="js" (add stealth=True if bot detection is present)
12041214
```
12051215
12061216
### For Multi-Page Content
@@ -1392,7 +1402,8 @@ def tool_comparison_guide() -> str:
13921402
def markdownify(
13931403
website_url: str,
13941404
ctx: Context,
1395-
mode: Optional[Literal["auto", "fast", "js", "direct+stealth", "js+stealth"]] = None,
1405+
mode: Optional[Literal["auto", "fast", "js"]] = None,
1406+
stealth: Optional[bool] = None,
13961407
headers: Optional[Dict[str, str]] = None,
13971408
cookies: Optional[Dict[str, str]] = None,
13981409
country: Optional[str] = None,
@@ -1410,8 +1421,7 @@ def markdownify(
14101421
- auto: Automatically selects the best provider chain (default).
14111422
- fast: Direct HTTP fetch via impit (fastest, no JS).
14121423
- js: Headless browser rendering for JavaScript-heavy pages.
1413-
- direct+stealth: Residential proxy with stealth headers (no JS).
1414-
- js+stealth: JS rendering combined with stealth/residential proxy.
1424+
stealth: Use residential proxies to bypass bot detection (+5 credits).
14151425
headers: Custom HTTP headers to send with the request.
14161426
cookies: Cookies to send with the request.
14171427
country: Two-letter country code for geo-located requests (e.g. 'us').
@@ -1424,8 +1434,9 @@ def markdownify(
14241434
api_key = get_api_key(ctx)
14251435
client = ScapeGraphClient(api_key)
14261436
return client.markdownify(
1427-
website_url=website_url, mode=mode, headers=headers, cookies=cookies,
1428-
country=country, timeout=timeout, wait=wait, scrolls=scrolls, mock=mock,
1437+
website_url=website_url, mode=mode, stealth=stealth, headers=headers,
1438+
cookies=cookies, country=country, timeout=timeout, wait=wait,
1439+
scrolls=scrolls, mock=mock,
14291440
)
14301441
except Exception as e:
14311442
return {"error": str(e)}
@@ -1447,7 +1458,8 @@ def smartscraper(
14471458
),
14481459
]
14491460
] = None,
1450-
mode: Optional[Literal["auto", "fast", "js", "direct+stealth", "js+stealth"]] = None,
1461+
mode: Optional[Literal["auto", "fast", "js"]] = None,
1462+
stealth: Optional[bool] = None,
14511463
headers: Optional[Dict[str, str]] = None,
14521464
cookies: Optional[Dict[str, str]] = None,
14531465
country: Optional[str] = None,
@@ -1464,7 +1476,8 @@ def smartscraper(
14641476
website_url: URL to extract data from (must include http:// or https://).
14651477
output_schema: JSON schema (dict or JSON string) defining the expected output structure.
14661478
If "required" field is missing, it will be automatically added as [].
1467-
mode: Fetch/proxy mode — auto, fast, js, direct+stealth, js+stealth.
1479+
mode: Fetch/proxy mode — auto, fast, js.
1480+
stealth: Use residential proxies to bypass bot detection (+5 credits).
14681481
headers: Custom HTTP headers.
14691482
cookies: Cookies to send with the request.
14701483
country: Two-letter country code for geo-located requests (e.g. 'us').
@@ -1496,7 +1509,7 @@ def smartscraper(
14961509
normalized_schema["required"] = []
14971510

14981511
fc = client._fetch_config(
1499-
mode=mode, timeout=timeout, wait=wait, headers=headers,
1512+
mode=mode, stealth=stealth, timeout=timeout, wait=wait, headers=headers,
15001513
cookies=cookies, country=country, scrolls=scrolls, mock=mock,
15011514
)
15021515

@@ -1531,7 +1544,8 @@ def searchscraper(
15311544
time_range: Optional[str] = None,
15321545
search_format: Literal["markdown", "html"] = "markdown",
15331546
search_mode: Literal["prune", "normal"] = "prune",
1534-
mode: Optional[Literal["auto", "fast", "js", "direct+stealth", "js+stealth"]] = None,
1547+
mode: Optional[Literal["auto", "fast", "js"]] = None,
1548+
stealth: Optional[bool] = None,
15351549
headers: Optional[Dict[str, str]] = None,
15361550
cookies: Optional[Dict[str, str]] = None,
15371551
country: Optional[str] = None,
@@ -1554,7 +1568,8 @@ def searchscraper(
15541568
time_range: Relative recency filter for results (e.g. 'past_day').
15551569
search_format: Per-result scrape format — 'markdown' (default) or 'html'.
15561570
search_mode: HTML processing mode — 'prune' (default) or 'normal'.
1557-
mode: Fetch/proxy mode — auto, fast, js, direct+stealth, js+stealth.
1571+
mode: Fetch/proxy mode — auto, fast, js.
1572+
stealth: Use residential proxies to bypass bot detection (+5 credits).
15581573
headers: Custom HTTP headers.
15591574
cookies: Cookies to send with the request.
15601575
country: Two-letter country code for geo-located fetches (fetch_config).
@@ -1584,7 +1599,7 @@ def searchscraper(
15841599
return {"error": "`prompt` is required when `output_schema` is provided"}
15851600

15861601
fc = client._fetch_config(
1587-
mode=mode, timeout=timeout, wait=wait, headers=headers,
1602+
mode=mode, stealth=stealth, timeout=timeout, wait=wait, headers=headers,
15881603
cookies=cookies, country=country, scrolls=scrolls, mock=mock,
15891604
)
15901605

@@ -1618,7 +1633,8 @@ def smartcrawler_initiate(
16181633
content_types: Optional[List[str]] = None,
16191634
include_patterns: Optional[List[str]] = None,
16201635
exclude_patterns: Optional[List[str]] = None,
1621-
mode: Optional[Literal["auto", "fast", "js", "direct+stealth", "js+stealth"]] = None,
1636+
mode: Optional[Literal["auto", "fast", "js"]] = None,
1637+
stealth: Optional[bool] = None,
16221638
headers: Optional[Dict[str, str]] = None,
16231639
cookies: Optional[Dict[str, str]] = None,
16241640
country: Optional[str] = None,
@@ -1643,7 +1659,8 @@ def smartcrawler_initiate(
16431659
content_types: Allowed response content types for crawled pages.
16441660
include_patterns: URL patterns to include.
16451661
exclude_patterns: URL patterns to exclude.
1646-
mode: Fetch/proxy mode — auto, fast, js, direct+stealth, js+stealth.
1662+
mode: Fetch/proxy mode — auto, fast, js.
1663+
stealth: Use residential proxies to bypass bot detection (+5 credits).
16471664
headers: Custom HTTP headers.
16481665
cookies: Cookies to send with the request.
16491666
country: Two-letter country code for geo-located requests.
@@ -1667,7 +1684,7 @@ def smartcrawler_initiate(
16671684
raise ValueError("max_links_per_page must be >= 1")
16681685

16691686
fc = client._fetch_config(
1670-
mode=mode, timeout=timeout, wait=wait, headers=headers,
1687+
mode=mode, stealth=stealth, timeout=timeout, wait=wait, headers=headers,
16711688
cookies=cookies, country=country, scrolls=scrolls, mock=mock,
16721689
)
16731690
return client.crawl_start(
@@ -1803,7 +1820,8 @@ def monitor_create(
18031820
),
18041821
]
18051822
] = None,
1806-
mode: Optional[Literal["auto", "fast", "js", "direct+stealth", "js+stealth"]] = None,
1823+
mode: Optional[Literal["auto", "fast", "js"]] = None,
1824+
stealth: Optional[bool] = None,
18071825
headers: Optional[Dict[str, str]] = None,
18081826
cookies: Optional[Dict[str, str]] = None,
18091827
country: Optional[str] = None,
@@ -1825,7 +1843,8 @@ def monitor_create(
18251843
name: Optional monitor name.
18261844
webhook_url: Optional webhook URL invoked when changes are detected.
18271845
output_schema: JSON schema (dict or JSON string) for structured output.
1828-
mode: Fetch/proxy mode — auto, fast, js, direct+stealth, js+stealth.
1846+
mode: Fetch/proxy mode — auto, fast, js.
1847+
stealth: Use residential proxies to bypass bot detection (+5 credits).
18291848
headers: Custom HTTP headers.
18301849
cookies: Cookies to send with the request.
18311850
country: Two-letter country code for geo-located requests.
@@ -1851,7 +1870,7 @@ def monitor_create(
18511870
return {"error": f"Invalid JSON for output_schema: {e}"}
18521871

18531872
fc = client._fetch_config(
1854-
mode=mode, timeout=timeout, wait=wait, headers=headers,
1873+
mode=mode, stealth=stealth, timeout=timeout, wait=wait, headers=headers,
18551874
cookies=cookies, country=country, scrolls=scrolls, mock=mock,
18561875
)
18571876
return client.monitor_create(
@@ -1979,7 +1998,8 @@ def scrape(
19791998
] = "markdown",
19801999
screenshot_full_page: bool = False,
19812000
content_type: Optional[str] = None,
1982-
mode: Optional[Literal["auto", "fast", "js", "direct+stealth", "js+stealth"]] = None,
2001+
mode: Optional[Literal["auto", "fast", "js"]] = None,
2002+
stealth: Optional[bool] = None,
19832003
headers: Optional[Dict[str, str]] = None,
19842004
cookies: Optional[Dict[str, str]] = None,
19852005
country: Optional[str] = None,
@@ -1997,7 +2017,8 @@ def scrape(
19972017
links, images, or summary.
19982018
screenshot_full_page: Capture full page screenshot (screenshot format only).
19992019
content_type: Optional contentType override passed through to the API.
2000-
mode: Fetch/proxy mode — auto, fast, js, direct+stealth, js+stealth.
2020+
mode: Fetch/proxy mode — auto, fast, js.
2021+
stealth: Use residential proxies to bypass bot detection (+5 credits).
20012022
headers: Custom HTTP headers.
20022023
cookies: Cookies to send with the request.
20032024
country: Two-letter country code for geo-located requests (e.g. 'us').
@@ -2010,7 +2031,7 @@ def scrape(
20102031
api_key = get_api_key(ctx)
20112032
client = ScapeGraphClient(api_key)
20122033
fc = client._fetch_config(
2013-
mode=mode, timeout=timeout, wait=wait, headers=headers,
2034+
mode=mode, stealth=stealth, timeout=timeout, wait=wait, headers=headers,
20142035
cookies=cookies, country=country, scrolls=scrolls, mock=mock,
20152036
)
20162037
return client.scrape(

0 commit comments

Comments
 (0)