Skip to content

Commit e1d8033

Browse files
refactor: merge types into schemas, all Pydantic
- Delete types.py, everything in schemas.py - Remove Api prefix from response models - Pre-compile server timing regex - Fix json field shadowing with aliases Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 25dbdd8 commit e1d8033

5 files changed

Lines changed: 356 additions & 419 deletions

File tree

src/scrapegraph_py/__init__.py

Lines changed: 29 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,29 @@
11
from .client import ScrapeGraphAI
22
from .async_client import AsyncScrapeGraphAI
33

4-
from .types import (
5-
ApiResult,
6-
ApiScrapeResponse,
7-
ApiExtractResponse,
8-
ApiSearchResponse,
9-
ApiCrawlResponse,
10-
ApiCrawlResult,
11-
ApiCrawlPage,
12-
ApiMonitorResponse,
13-
ApiMonitorResult,
14-
ApiMonitorDiffs,
15-
ApiHistoryPage,
16-
ApiHistoryEntry,
17-
ApiCreditsResponse,
18-
ApiHealthResponse,
19-
ApiTokenUsage,
20-
ApiSearchResult,
21-
ApiBranding,
22-
)
23-
244
from .schemas import (
5+
ApiResult,
256
ScrapeRequest,
7+
ScrapeResponse,
268
ExtractRequest,
9+
ExtractResponse,
2710
SearchRequest,
11+
SearchResponse,
12+
SearchResult,
2813
CrawlRequest,
14+
CrawlResponse,
15+
CrawlPage,
2916
MonitorCreateRequest,
3017
MonitorUpdateRequest,
18+
MonitorResponse,
19+
MonitorResult,
20+
MonitorDiffs,
3121
HistoryFilter,
22+
HistoryPage,
23+
HistoryEntry,
24+
CreditsResponse,
25+
HealthResponse,
26+
TokenUsage,
3227
FetchConfig,
3328
MarkdownFormatConfig,
3429
HtmlFormatConfig,
@@ -44,29 +39,27 @@
4439
"ScrapeGraphAI",
4540
"AsyncScrapeGraphAI",
4641
"ApiResult",
47-
"ApiScrapeResponse",
48-
"ApiExtractResponse",
49-
"ApiSearchResponse",
50-
"ApiCrawlResponse",
51-
"ApiCrawlResult",
52-
"ApiCrawlPage",
53-
"ApiMonitorResponse",
54-
"ApiMonitorResult",
55-
"ApiMonitorDiffs",
56-
"ApiHistoryPage",
57-
"ApiHistoryEntry",
58-
"ApiCreditsResponse",
59-
"ApiHealthResponse",
60-
"ApiTokenUsage",
61-
"ApiSearchResult",
62-
"ApiBranding",
6342
"ScrapeRequest",
43+
"ScrapeResponse",
6444
"ExtractRequest",
45+
"ExtractResponse",
6546
"SearchRequest",
47+
"SearchResponse",
48+
"SearchResult",
6649
"CrawlRequest",
50+
"CrawlResponse",
51+
"CrawlPage",
6752
"MonitorCreateRequest",
6853
"MonitorUpdateRequest",
54+
"MonitorResponse",
55+
"MonitorResult",
56+
"MonitorDiffs",
6957
"HistoryFilter",
58+
"HistoryPage",
59+
"HistoryEntry",
60+
"CreditsResponse",
61+
"HealthResponse",
62+
"TokenUsage",
7063
"FetchConfig",
7164
"MarkdownFormatConfig",
7265
"HtmlFormatConfig",

src/scrapegraph_py/async_client.py

Lines changed: 45 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,31 @@
11
from __future__ import annotations
22
import os
3+
import re
34
import time
45
import httpx
56
from pydantic import BaseModel
67

78
from .env import env
8-
from .types import (
9-
ApiResult,
10-
ApiScrapeResponse,
11-
ApiExtractResponse,
12-
ApiSearchResponse,
13-
ApiCrawlResponse,
14-
ApiMonitorResponse,
15-
ApiHistoryPage,
16-
ApiHistoryEntry,
17-
ApiCreditsResponse,
18-
ApiHealthResponse,
19-
)
9+
10+
_SERVER_TIMING_RE = re.compile(r"dur=(\d+(?:\.\d+)?)")
2011
from .schemas import (
12+
ApiResult,
2113
ScrapeRequest,
14+
ScrapeResponse,
2215
ExtractRequest,
16+
ExtractResponse,
2317
SearchRequest,
18+
SearchResponse,
2419
CrawlRequest,
20+
CrawlResponse,
2521
MonitorCreateRequest,
2622
MonitorUpdateRequest,
23+
MonitorResponse,
2724
HistoryFilter,
25+
HistoryPage,
26+
HistoryEntry,
27+
CreditsResponse,
28+
HealthResponse,
2829
)
2930

3031

@@ -78,11 +79,11 @@ class AsyncCrawlResource:
7879
def __init__(self, client: AsyncScrapeGraphAI):
7980
self._client = client
8081

81-
async def start(self, params: CrawlRequest) -> ApiResult[ApiCrawlResponse]:
82-
return await self._client._post("/crawl", params, ApiCrawlResponse)
82+
async def start(self, params: CrawlRequest) -> ApiResult[CrawlResponse]:
83+
return await self._client._post("/crawl", params, CrawlResponse)
8384

84-
async def get(self, id: str) -> ApiResult[ApiCrawlResponse]:
85-
return await self._client._get(f"/crawl/{id}", ApiCrawlResponse)
85+
async def get(self, id: str) -> ApiResult[CrawlResponse]:
86+
return await self._client._get(f"/crawl/{id}", CrawlResponse)
8687

8788
async def stop(self, id: str) -> ApiResult[dict]:
8889
return await self._client._post_empty(f"/crawl/{id}/stop")
@@ -98,33 +99,33 @@ class AsyncMonitorResource:
9899
def __init__(self, client: AsyncScrapeGraphAI):
99100
self._client = client
100101

101-
async def create(self, params: MonitorCreateRequest) -> ApiResult[ApiMonitorResponse]:
102-
return await self._client._post("/monitor", params, ApiMonitorResponse)
102+
async def create(self, params: MonitorCreateRequest) -> ApiResult[MonitorResponse]:
103+
return await self._client._post("/monitor", params, MonitorResponse)
103104

104-
async def list(self) -> ApiResult[list[ApiMonitorResponse]]:
105-
return await self._client._get("/monitor", list[ApiMonitorResponse])
105+
async def list(self) -> ApiResult[list[MonitorResponse]]:
106+
return await self._client._get("/monitor", list[MonitorResponse])
106107

107-
async def get(self, id: str) -> ApiResult[ApiMonitorResponse]:
108-
return await self._client._get(f"/monitor/{id}", ApiMonitorResponse)
108+
async def get(self, id: str) -> ApiResult[MonitorResponse]:
109+
return await self._client._get(f"/monitor/{id}", MonitorResponse)
109110

110-
async def update(self, id: str, params: MonitorUpdateRequest) -> ApiResult[ApiMonitorResponse]:
111-
return await self._client._patch(f"/monitor/{id}", params, ApiMonitorResponse)
111+
async def update(self, id: str, params: MonitorUpdateRequest) -> ApiResult[MonitorResponse]:
112+
return await self._client._patch(f"/monitor/{id}", params, MonitorResponse)
112113

113114
async def delete(self, id: str) -> ApiResult[dict]:
114115
return await self._client._delete(f"/monitor/{id}")
115116

116-
async def pause(self, id: str) -> ApiResult[ApiMonitorResponse]:
117-
return await self._client._post_empty(f"/monitor/{id}/pause", ApiMonitorResponse)
117+
async def pause(self, id: str) -> ApiResult[MonitorResponse]:
118+
return await self._client._post_empty(f"/monitor/{id}/pause", MonitorResponse)
118119

119-
async def resume(self, id: str) -> ApiResult[ApiMonitorResponse]:
120-
return await self._client._post_empty(f"/monitor/{id}/resume", ApiMonitorResponse)
120+
async def resume(self, id: str) -> ApiResult[MonitorResponse]:
121+
return await self._client._post_empty(f"/monitor/{id}/resume", MonitorResponse)
121122

122123

123124
class AsyncHistoryResource:
124125
def __init__(self, client: AsyncScrapeGraphAI):
125126
self._client = client
126127

127-
async def list(self, params: HistoryFilter | None = None) -> ApiResult[ApiHistoryPage]:
128+
async def list(self, params: HistoryFilter | None = None) -> ApiResult[HistoryPage]:
128129
qs = {}
129130
if params:
130131
if params.page:
@@ -133,10 +134,10 @@ async def list(self, params: HistoryFilter | None = None) -> ApiResult[ApiHistor
133134
qs["limit"] = str(params.limit)
134135
if params.service:
135136
qs["service"] = params.service
136-
return await self._client._get("/history", ApiHistoryPage, params=qs if qs else None)
137+
return await self._client._get("/history", HistoryPage, params=qs if qs else None)
137138

138-
async def get(self, id: str) -> ApiResult[ApiHistoryEntry]:
139-
return await self._client._get(f"/history/{id}", ApiHistoryEntry)
139+
async def get(self, id: str) -> ApiResult[HistoryEntry]:
140+
return await self._client._get(f"/history/{id}", HistoryEntry)
140141

141142

142143
class AsyncScrapeGraphAI:
@@ -184,10 +185,8 @@ async def _request[T](
184185
resp = await self._http.request(method, path, json=json_body, params=params)
185186

186187
server_timing = resp.headers.get("Server-Timing")
187-
if server_timing:
188-
import re
189-
match = re.search(r"dur=(\d+(?:\.\d+)?)", server_timing)
190-
elapsed_ms = int(float(match.group(1))) if match else int((time.perf_counter() - start) * 1000)
188+
if server_timing and (match := _SERVER_TIMING_RE.search(server_timing)):
189+
elapsed_ms = int(float(match.group(1)))
191190
else:
192191
elapsed_ms = int((time.perf_counter() - start) * 1000)
193192

@@ -228,20 +227,20 @@ async def _patch[T](self, path: str, body: BaseModel, response_type: type[T]) ->
228227
async def _delete(self, path: str) -> ApiResult[dict]:
229228
return await self._request("DELETE", path, dict)
230229

231-
async def scrape(self, params: ScrapeRequest) -> ApiResult[ApiScrapeResponse]:
232-
return await self._post("/scrape", params, ApiScrapeResponse)
230+
async def scrape(self, params: ScrapeRequest) -> ApiResult[ScrapeResponse]:
231+
return await self._post("/scrape", params, ScrapeResponse)
233232

234-
async def extract(self, params: ExtractRequest) -> ApiResult[ApiExtractResponse]:
235-
return await self._post("/extract", params, ApiExtractResponse)
233+
async def extract(self, params: ExtractRequest) -> ApiResult[ExtractResponse]:
234+
return await self._post("/extract", params, ExtractResponse)
236235

237-
async def search(self, params: SearchRequest) -> ApiResult[ApiSearchResponse]:
238-
return await self._post("/search", params, ApiSearchResponse)
236+
async def search(self, params: SearchRequest) -> ApiResult[SearchResponse]:
237+
return await self._post("/search", params, SearchResponse)
239238

240-
async def credits(self) -> ApiResult[ApiCreditsResponse]:
241-
return await self._get("/credits", ApiCreditsResponse)
239+
async def credits(self) -> ApiResult[CreditsResponse]:
240+
return await self._get("/credits", CreditsResponse)
242241

243-
async def health(self) -> ApiResult[ApiHealthResponse]:
244-
return await self._request("GET", "/healthz", ApiHealthResponse, base_url=env.health_url)
242+
async def health(self) -> ApiResult[HealthResponse]:
243+
return await self._request("GET", "/healthz", HealthResponse, base_url=env.health_url)
245244

246245
async def close(self) -> None:
247246
await self._http.aclose()

0 commit comments

Comments
 (0)