Skip to content

Commit bc76497

Browse files
committed
refactor(api): align python sdk with v2 schema surface
1 parent 6959035 commit bc76497

11 files changed

Lines changed: 45 additions & 139 deletions

File tree

scrapegraph-py/MIGRATION_V2.md

Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ from scrapegraph_py import FetchConfig
9595
response = client.extract(
9696
url="https://example.com",
9797
prompt="Extract the main heading and description",
98-
output_schema=MyPydanticModel,
98+
schema=MyPydanticModel,
9999
fetch_config=FetchConfig(
100100
mode="js+stealth",
101101
headers={"User-Agent": "MyBot"},
@@ -110,7 +110,7 @@ response = client.extract(
110110
|---|---|
111111
| `website_url` | `url` |
112112
| `user_prompt` | `prompt` |
113-
| `output_schema` | `output_schema` (unchanged) |
113+
| `output_schema` | `schema` |
114114
| `headers` | `fetch_config=FetchConfig(headers=...)` |
115115
| `cookies` | `fetch_config=FetchConfig(cookies=...)` |
116116
| `number_of_scrolls` | `fetch_config=FetchConfig(scrolls=...)` |
@@ -152,15 +152,15 @@ result = client.get_searchscraper(request_id)
152152
response = client.search(
153153
query="What is the latest version of Python?",
154154
num_results=5,
155-
output_schema=MyModel,
155+
schema=MyModel,
156156
)
157157
```
158158

159159
| v1 parameter | v2 equivalent |
160160
|---|---|
161161
| `user_prompt` | `query` |
162162
| `num_results` | `num_results` (unchanged) |
163-
| `output_schema` | `output_schema` (unchanged) |
163+
| `output_schema` | `schema` |
164164
| `extraction_mode` | Removed (always AI extraction) |
165165
| `stealth` | Removed (use `fetch_config=FetchConfig(mode=...)` on other endpoints) |
166166
| `location_geo_code` | Removed |
@@ -390,17 +390,16 @@ execs = client.get_job_executions(job_id, page=1, page_size=20)
390390
#### v2
391391

392392
```python
393-
from scrapegraph_py import FetchConfig, LlmConfig
393+
from scrapegraph_py import FetchConfig
394394

395395
# Create
396396
monitor = client.monitor.create(
397397
name="Daily Scraper",
398398
url="https://example.com",
399399
prompt="Extract company info",
400400
interval="0 9 * * *",
401-
output_schema={"type": "object", "properties": {"name": {"type": "string"}}},
401+
schema={"type": "object", "properties": {"name": {"type": "string"}}},
402402
fetch_config=FetchConfig(mode="direct+stealth"),
403-
llm_config=LlmConfig(temperature=0.1),
404403
)
405404

406405
# List
@@ -517,7 +516,7 @@ The following v1 endpoints have been **removed** in v2:
517516

518517
## Shared Configuration Models
519518

520-
v2 introduces `FetchConfig` and `LlmConfig`reusable configuration objects that replace the scattered per-method parameters from v1.
519+
v2 introduces `FetchConfig` — a reusable configuration object that replaces the scattered per-method fetch parameters from v1.
521520

522521
### FetchConfig
523522

@@ -548,21 +547,6 @@ config = FetchConfig(
548547
| `direct+stealth` | Residential proxy with stealth headers, no JS |
549548
| `js+stealth` | JS rendering combined with stealth/residential proxy |
550549

551-
### LlmConfig
552-
553-
Controls the AI model used for extraction. Used by `extract()`, `search()`, and `monitor.create()`.
554-
555-
```python
556-
from scrapegraph_py import LlmConfig
557-
558-
config = LlmConfig(
559-
model="gpt-4", # LLM model to use
560-
temperature=0.3, # Sampling temperature (0.0-2.0)
561-
max_tokens=1000, # Max tokens in response
562-
chunker="auto", # Chunking strategy for large pages
563-
)
564-
```
565-
566550
---
567551

568552
## Removed Features

scrapegraph-py/scrapegraph_py/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from .models.monitor import MonitorCreateRequest
3333
from .models.scrape import ScrapeFormat, ScrapeRequest
3434
from .models.search import SearchRequest
35-
from .models.shared import FetchConfig, FetchMode, LlmConfig
35+
from .models.shared import FetchConfig, FetchMode
3636

3737
__version__ = VERSION
3838

@@ -42,7 +42,6 @@
4242
# Shared config
4343
"FetchConfig",
4444
"FetchMode",
45-
"LlmConfig",
4645
# Scrape
4746
"ScrapeFormat",
4847
"ScrapeRequest",

scrapegraph-py/scrapegraph_py/async_client.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from scrapegraph_py.config import API_BASE_URL, DEFAULT_HEADERS
2424
from scrapegraph_py.exceptions import APIError
2525
from scrapegraph_py.logger import sgai_logger as logger
26-
from scrapegraph_py.models.shared import FetchConfig, LlmConfig
26+
from scrapegraph_py.models.shared import FetchConfig
2727
from scrapegraph_py.utils.helpers import handle_async_response, validate_api_key
2828
from scrapegraph_py.utils.request_builders import (
2929
build_crawl_payload,
@@ -113,9 +113,7 @@ async def create(
113113
url: str,
114114
prompt: Optional[str],
115115
interval: str,
116-
output_schema: Optional[Dict[str, Any]] = None,
117116
fetch_config: Optional[FetchConfig] = None,
118-
llm_config: Optional[LlmConfig] = None,
119117
schema: Optional[Any] = None,
120118
formats: Optional[List[Dict[str, Any]]] = None,
121119
webhook_url: Optional[str] = None,
@@ -130,9 +128,7 @@ async def create(
130128
url=url,
131129
prompt=prompt,
132130
interval=interval,
133-
output_schema=output_schema,
134131
fetch_config=fetch_config,
135-
llm_config=llm_config,
136132
schema=schema,
137133
formats=formats,
138134
webhook_url=webhook_url,
@@ -335,9 +331,7 @@ async def extract(
335331
self,
336332
url: Optional[str],
337333
prompt: str,
338-
output_schema: Optional[Any] = None,
339334
fetch_config: Optional[FetchConfig] = None,
340-
llm_config: Optional[LlmConfig] = None,
341335
*,
342336
schema: Optional[Any] = None,
343337
mode: str = "normal",
@@ -350,9 +344,7 @@ async def extract(
350344
Args:
351345
url: URL to extract data from
352346
prompt: Natural language prompt describing what to extract
353-
output_schema: Legacy alias for schema
354347
fetch_config: Fetch configuration options
355-
llm_config: Deprecated and ignored by the SGAI v2 extract route
356348
"""
357349
logger.info(f"Extracting from {url}")
358350
return await self._make_request(
@@ -361,9 +353,7 @@ async def extract(
361353
json=build_extract_payload(
362354
url=url,
363355
prompt=prompt,
364-
output_schema=output_schema,
365356
fetch_config=fetch_config,
366-
llm_config=llm_config,
367357
schema=schema,
368358
mode=mode,
369359
content_type=content_type,
@@ -380,9 +370,7 @@ async def search(
380370
self,
381371
query: str,
382372
num_results: int = 5,
383-
output_schema: Optional[Any] = None,
384373
location_geo_code: Optional[str] = None,
385-
llm_config: Optional[LlmConfig] = None,
386374
*,
387375
schema: Optional[Any] = None,
388376
prompt: Optional[str] = None,
@@ -396,9 +384,7 @@ async def search(
396384
Args:
397385
query: The search query
398386
num_results: Number of results (3-20, default 5)
399-
output_schema: Legacy alias for schema
400387
location_geo_code: Geo code for geo-targeted results
401-
llm_config: Deprecated and ignored by the SGAI v2 search route
402388
"""
403389
logger.info(f"Searching: {query}")
404390
return await self._make_request(
@@ -407,9 +393,7 @@ async def search(
407393
json=build_search_payload(
408394
query=query,
409395
num_results=num_results,
410-
output_schema=output_schema,
411396
location_geo_code=location_geo_code,
412-
llm_config=llm_config,
413397
schema=schema,
414398
prompt=prompt,
415399
format=format,

scrapegraph-py/scrapegraph_py/client.py

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from scrapegraph_py.config import API_BASE_URL, DEFAULT_HEADERS
2525
from scrapegraph_py.exceptions import APIError
2626
from scrapegraph_py.logger import sgai_logger as logger
27-
from scrapegraph_py.models.shared import FetchConfig, LlmConfig
27+
from scrapegraph_py.models.shared import FetchConfig
2828
from scrapegraph_py.utils.helpers import handle_sync_response, validate_api_key
2929
from scrapegraph_py.utils.request_builders import (
3030
build_crawl_payload,
@@ -136,9 +136,7 @@ def create(
136136
url: str,
137137
prompt: Optional[str],
138138
interval: str,
139-
output_schema: Optional[Dict[str, Any]] = None,
140139
fetch_config: Optional[FetchConfig] = None,
141-
llm_config: Optional[LlmConfig] = None,
142140
schema: Optional[Any] = None,
143141
formats: Optional[List[Dict[str, Any]]] = None,
144142
webhook_url: Optional[str] = None,
@@ -150,9 +148,7 @@ def create(
150148
url: URL to monitor
151149
prompt: Legacy prompt for JSON extraction monitors
152150
interval: Cron expression (5 fields)
153-
output_schema: Legacy alias for schema
154151
fetch_config: Fetch configuration options
155-
llm_config: LLM configuration options for JSON formats
156152
"""
157153
logger.info(f"Creating monitor '{name}' for {url}")
158154
return self._client._make_request(
@@ -163,9 +159,7 @@ def create(
163159
url=url,
164160
prompt=prompt,
165161
interval=interval,
166-
output_schema=output_schema,
167162
fetch_config=fetch_config,
168-
llm_config=llm_config,
169163
schema=schema,
170164
formats=formats,
171165
webhook_url=webhook_url,
@@ -381,9 +375,7 @@ def extract(
381375
self,
382376
url: Optional[str],
383377
prompt: str,
384-
output_schema: Optional[Any] = None,
385378
fetch_config: Optional[FetchConfig] = None,
386-
llm_config: Optional[LlmConfig] = None,
387379
*,
388380
schema: Optional[Any] = None,
389381
mode: str = "normal",
@@ -396,9 +388,7 @@ def extract(
396388
Args:
397389
url: URL to extract data from
398390
prompt: Natural language prompt describing what to extract
399-
output_schema: Legacy alias for schema
400391
fetch_config: Fetch configuration options
401-
llm_config: Deprecated and ignored by the SGAI v2 extract route
402392
"""
403393
logger.info(f"Extracting from {url}")
404394
return self._make_request(
@@ -407,9 +397,7 @@ def extract(
407397
json=build_extract_payload(
408398
url=url,
409399
prompt=prompt,
410-
output_schema=output_schema,
411400
fetch_config=fetch_config,
412-
llm_config=llm_config,
413401
schema=schema,
414402
mode=mode,
415403
content_type=content_type,
@@ -426,9 +414,7 @@ def search(
426414
self,
427415
query: str,
428416
num_results: int = 5,
429-
output_schema: Optional[Any] = None,
430417
location_geo_code: Optional[str] = None,
431-
llm_config: Optional[LlmConfig] = None,
432418
*,
433419
schema: Optional[Any] = None,
434420
prompt: Optional[str] = None,
@@ -442,9 +428,7 @@ def search(
442428
Args:
443429
query: The search query
444430
num_results: Number of results (1-20, default 5)
445-
output_schema: Legacy alias for schema
446431
location_geo_code: Geo code for geo-targeted results
447-
llm_config: Deprecated and ignored by the SGAI v2 search route
448432
"""
449433
logger.info(f"Searching: {query}")
450434
return self._make_request(
@@ -453,9 +437,7 @@ def search(
453437
json=build_search_payload(
454438
query=query,
455439
num_results=num_results,
456-
output_schema=output_schema,
457440
location_geo_code=location_geo_code,
458-
llm_config=llm_config,
459441
schema=schema,
460442
prompt=prompt,
461443
format=format,

scrapegraph-py/scrapegraph_py/models/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@
88
from .monitor import MonitorCreateRequest
99
from .scrape import GetScrapeRequest, ScrapeFormat, ScrapeRequest
1010
from .search import SearchRequest
11-
from .shared import FetchConfig, FetchMode, LlmConfig
11+
from .shared import FetchConfig, FetchMode
1212

1313
__all__ = [
1414
# Shared
1515
"FetchConfig",
1616
"FetchMode",
17-
"LlmConfig",
1817
# Scrape
1918
"ScrapeFormat",
2019
"ScrapeRequest",

scrapegraph-py/scrapegraph_py/models/shared.py

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Shared configuration models for the ScrapeGraphAI v2 API.
33
4-
These models are used across multiple endpoints for fetch and LLM configuration.
4+
These models are used across multiple endpoints for fetch configuration.
55
"""
66

77
from enum import Enum
@@ -72,23 +72,3 @@ class FetchConfig(CamelModel):
7272
default=None, ge=0, le=100, description="Number of scrolls to perform (0-100)"
7373
)
7474
mock: bool = Field(default=False, description="Use mock mode for testing")
75-
76-
77-
class LlmConfig(CamelModel):
78-
"""Configuration for the LLM used in extraction."""
79-
80-
model: Optional[str] = Field(
81-
default=None, description="LLM model to use for extraction"
82-
)
83-
temperature: Optional[float] = Field(
84-
default=None,
85-
ge=0.0,
86-
le=2.0,
87-
description="Sampling temperature (0.0-2.0)",
88-
)
89-
max_tokens: Optional[int] = Field(
90-
default=None, ge=1, description="Maximum tokens in the response"
91-
)
92-
chunker: Optional[Dict[str, Any]] = Field(
93-
default=None, description="Chunking strategy for large pages"
94-
)

0 commit comments

Comments
 (0)