Skip to content

Commit b7822f2

Browse files
fix: update examples for Pydantic models and add dotenv loading
- Convert dict-style access to Pydantic attribute access in all examples - Add polling loop to crawl examples (matches JS SDK behavior) - Add dotenv loading to all examples for easier local testing - Fix health endpoint to use /health instead of /healthz - Update CLAUDE.md with pre-commit checklist using ruff Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 500b442 commit b7822f2

36 files changed

Lines changed: 228 additions & 94 deletions

CLAUDE.md

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,31 @@ uv sync
3737
# Test
3838
uv run pytest tests/ -v
3939

40-
# Format
41-
uv run black scrapegraph_py tests
42-
uv run isort scrapegraph_py tests
43-
44-
# Lint
45-
uv run ruff check scrapegraph_py tests
40+
# Format & lint
41+
uv run ruff format src tests
42+
uv run ruff check src tests --fix
4643

4744
# Type check
48-
uv run mypy scrapegraph_py
45+
uv run mypy src
4946

5047
# Build
5148
uv build
5249
```
5350

51+
## Before completing any task
52+
53+
Always run these commands before committing or saying a task is done:
54+
55+
```bash
56+
uv run ruff format src tests
57+
uv run ruff check src tests --fix
58+
uv run mypy src
59+
uv build
60+
uv run pytest tests/ -v
61+
```
62+
63+
No exceptions.
64+
5465
## Architecture
5566

5667
**Core Components:**

examples/crawl/crawl_basic.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,34 @@
1+
from dotenv import load_dotenv
2+
load_dotenv()
3+
4+
import time
15
from scrapegraph_py import ScrapeGraphAI, CrawlRequest
26

37
sgai = ScrapeGraphAI()
48

59
start_res = sgai.crawl.start(CrawlRequest(
6-
url="https://example.com",
10+
url="https://scrapegraphai.com/",
711
max_pages=5,
812
max_depth=2,
913
))
1014

1115
if start_res.status != "success" or not start_res.data:
1216
print("Failed to start:", start_res.error)
1317
else:
14-
print("Crawl started:", start_res.data.id)
15-
print("Status:", start_res.data.status)
18+
crawl_id = start_res.data.id
19+
print("Crawl started:", crawl_id)
20+
21+
status = start_res.data.status
22+
while status == "running":
23+
time.sleep(2)
24+
get_res = sgai.crawl.get(crawl_id)
25+
if get_res.status != "success" or not get_res.data:
26+
print("Failed to get status:", get_res.error)
27+
break
28+
status = get_res.data.status
29+
print(f"Progress: {get_res.data.finished}/{get_res.data.total} - {status}")
1630

17-
get_res = sgai.crawl.get(start_res.data.id)
18-
if get_res.status == "success":
19-
print("\nProgress:", get_res.data.finished, "/", get_res.data.total)
20-
print("Pages:", [p["url"] for p in get_res.data.get("pages", [])])
31+
if status in ("completed", "failed"):
32+
print("\nPages crawled:")
33+
for page in get_res.data.pages:
34+
print(f" {page.url} - {page.status}")
Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,36 @@
1+
from dotenv import load_dotenv
2+
load_dotenv()
3+
14
import asyncio
25
from scrapegraph_py import AsyncScrapeGraphAI, CrawlRequest
36

47
async def main():
58
async with AsyncScrapeGraphAI() as sgai:
69
start_res = await sgai.crawl.start(CrawlRequest(
7-
url="https://example.com",
10+
url="https://scrapegraphai.com/",
811
max_pages=5,
912
max_depth=2,
1013
))
1114

1215
if start_res.status != "success" or not start_res.data:
1316
print("Failed to start:", start_res.error)
1417
else:
15-
print("Crawl started:", start_res.data.id)
16-
print("Status:", start_res.data.status)
18+
crawl_id = start_res.data.id
19+
print("Crawl started:", crawl_id)
20+
21+
status = start_res.data.status
22+
while status == "running":
23+
await asyncio.sleep(2)
24+
get_res = await sgai.crawl.get(crawl_id)
25+
if get_res.status != "success" or not get_res.data:
26+
print("Failed to get status:", get_res.error)
27+
break
28+
status = get_res.data.status
29+
print(f"Progress: {get_res.data.finished}/{get_res.data.total} - {status}")
1730

18-
get_res = await sgai.crawl.get(start_res.data.id)
19-
if get_res.status == "success":
20-
print("\nProgress:", get_res.data.finished, "/", get_res.data.total)
21-
print("Pages:", [p["url"] for p in get_res.data.get("pages", [])])
31+
if status in ("completed", "failed"):
32+
print("\nPages crawled:")
33+
for page in get_res.data.pages:
34+
print(f" {page.url} - {page.status}")
2235

2336
asyncio.run(main())
Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
from dotenv import load_dotenv
2+
load_dotenv()
3+
4+
import time
15
from scrapegraph_py import (
26
ScrapeGraphAI,
37
CrawlRequest,
@@ -8,7 +12,7 @@
812
sgai = ScrapeGraphAI()
913

1014
start_res = sgai.crawl.start(CrawlRequest(
11-
url="https://example.com",
15+
url="https://scrapegraphai.com/",
1216
max_pages=3,
1317
max_depth=1,
1418
formats=[
@@ -22,13 +26,20 @@
2226
else:
2327
crawl_id = start_res.data.id
2428
print("Crawl started:", crawl_id)
25-
print("Status:", start_res.data.status)
2629

27-
get_res = sgai.crawl.get(crawl_id)
28-
if get_res.status == "success":
29-
print("\nProgress:", get_res.data.finished, "/", get_res.data.total)
30+
status = start_res.data.status
31+
while status == "running":
32+
time.sleep(2)
33+
get_res = sgai.crawl.get(crawl_id)
34+
if get_res.status != "success" or not get_res.data:
35+
print("Failed to get status:", get_res.error)
36+
break
37+
status = get_res.data.status
38+
print(f"Progress: {get_res.data.finished}/{get_res.data.total} - {status}")
3039

31-
for page in get_res.data.get("pages", []):
32-
print(f"\n Page: {page['url']}")
33-
print(f" Status: {page['status']}")
34-
print(f" Depth: {page['depth']}")
40+
if status in ("completed", "failed"):
41+
print("\nPages crawled:")
42+
for page in get_res.data.pages:
43+
print(f"\n Page: {page.url}")
44+
print(f" Status: {page.status}")
45+
print(f" Depth: {page.depth}")

examples/crawl/crawl_with_formats_async.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from dotenv import load_dotenv
2+
load_dotenv()
3+
14
import asyncio
25
from scrapegraph_py import (
36
AsyncScrapeGraphAI,
@@ -9,7 +12,7 @@
912
async def main():
1013
async with AsyncScrapeGraphAI() as sgai:
1114
start_res = await sgai.crawl.start(CrawlRequest(
12-
url="https://example.com",
15+
url="https://scrapegraphai.com/",
1316
max_pages=3,
1417
max_depth=1,
1518
formats=[
@@ -23,15 +26,22 @@ async def main():
2326
else:
2427
crawl_id = start_res.data.id
2528
print("Crawl started:", crawl_id)
26-
print("Status:", start_res.data.status)
2729

28-
get_res = await sgai.crawl.get(crawl_id)
29-
if get_res.status == "success":
30-
print("\nProgress:", get_res.data.finished, "/", get_res.data.total)
30+
status = start_res.data.status
31+
while status == "running":
32+
await asyncio.sleep(2)
33+
get_res = await sgai.crawl.get(crawl_id)
34+
if get_res.status != "success" or not get_res.data:
35+
print("Failed to get status:", get_res.error)
36+
break
37+
status = get_res.data.status
38+
print(f"Progress: {get_res.data.finished}/{get_res.data.total} - {status}")
3139

32-
for page in get_res.data.get("pages", []):
33-
print(f"\n Page: {page['url']}")
34-
print(f" Status: {page['status']}")
35-
print(f" Depth: {page['depth']}")
40+
if status in ("completed", "failed"):
41+
print("\nPages crawled:")
42+
for page in get_res.data.pages:
43+
print(f"\n Page: {page.url}")
44+
print(f" Status: {page.status}")
45+
print(f" Depth: {page.depth}")
3646

3747
asyncio.run(main())

examples/extract/extract_basic.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from dotenv import load_dotenv
2+
load_dotenv()
3+
14
import json
25
from scrapegraph_py import ScrapeGraphAI, ExtractRequest
36

@@ -9,7 +12,7 @@
912
))
1013

1114
if res.status == "success":
12-
print("Extracted:", json.dumps(res.data.get("json"), indent=2))
13-
print("\nTokens used:", res.data.get("usage"))
15+
print("Extracted:", json.dumps(res.data.json_data, indent=2))
16+
print("\nTokens used:", res.data.usage)
1417
else:
1518
print("Failed:", res.error)

examples/extract/extract_basic_async.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from dotenv import load_dotenv
2+
load_dotenv()
3+
14
import asyncio
25
import json
36
from scrapegraph_py import AsyncScrapeGraphAI, ExtractRequest
@@ -10,8 +13,8 @@ async def main():
1013
))
1114

1215
if res.status == "success":
13-
print("Extracted:", json.dumps(res.data.get("json"), indent=2))
14-
print("\nTokens used:", res.data.get("usage"))
16+
print("Extracted:", json.dumps(res.data.json_data, indent=2))
17+
print("\nTokens used:", res.data.usage)
1518
else:
1619
print("Failed:", res.error)
1720

examples/extract/extract_with_schema.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from dotenv import load_dotenv
2+
load_dotenv()
3+
14
import json
25
from scrapegraph_py import ScrapeGraphAI, ExtractRequest
36

@@ -21,8 +24,8 @@
2124
))
2225

2326
if res.status == "success":
24-
print("Extracted:", json.dumps(res.data.get("json"), indent=2))
25-
print("\nRaw:", res.data.get("raw"))
26-
print("\nTokens used:", res.data.get("usage"))
27+
print("Extracted:", json.dumps(res.data.json_data, indent=2))
28+
print("\nRaw:", res.data.raw)
29+
print("\nTokens used:", res.data.usage)
2730
else:
2831
print("Failed:", res.error)

examples/extract/extract_with_schema_async.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from dotenv import load_dotenv
2+
load_dotenv()
3+
14
import asyncio
25
import json
36
from scrapegraph_py import AsyncScrapeGraphAI, ExtractRequest
@@ -22,9 +25,9 @@ async def main():
2225
))
2326

2427
if res.status == "success":
25-
print("Extracted:", json.dumps(res.data.get("json"), indent=2))
26-
print("\nRaw:", res.data.get("raw"))
27-
print("\nTokens used:", res.data.get("usage"))
28+
print("Extracted:", json.dumps(res.data.json_data, indent=2))
29+
print("\nRaw:", res.data.raw)
30+
print("\nTokens used:", res.data.usage)
2831
else:
2932
print("Failed:", res.error)
3033

examples/monitor/monitor_basic.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from dotenv import load_dotenv
2+
load_dotenv()
3+
14
from scrapegraph_py import ScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
25

36
sgai = ScrapeGraphAI()

0 commit comments

Comments
 (0)