Skip to content

Commit 84400ff

Browse files
feat: use Pydantic alias_generator, add async examples, update README
- Replace manual _to_camel with Pydantic's built-in alias_generator - CamelModel base class handles snake_case -> camelCase conversion - Simplify _serialize to single model_dump call - Add async versions of all 16 examples - Update README with expanded async client docs and examples table - Add banner from JS SDK Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent d1faf57 commit 84400ff

21 files changed

Lines changed: 707 additions & 260 deletions

README.md

Lines changed: 272 additions & 213 deletions
Large diffs are not rendered by default.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import asyncio
2+
from scrapegraph_py import AsyncScrapeGraphAI, CrawlRequest
3+
4+
async def main():
5+
async with AsyncScrapeGraphAI() as sgai:
6+
start_res = await sgai.crawl.start(CrawlRequest(
7+
url="https://example.com",
8+
max_pages=5,
9+
max_depth=2,
10+
))
11+
12+
if start_res.status != "success" or not start_res.data:
13+
print("Failed to start:", start_res.error)
14+
else:
15+
print("Crawl started:", start_res.data["id"])
16+
print("Status:", start_res.data["status"])
17+
18+
get_res = await sgai.crawl.get(start_res.data["id"])
19+
if get_res.status == "success":
20+
print("\nProgress:", get_res.data["finished"], "/", get_res.data["total"])
21+
print("Pages:", [p["url"] for p in get_res.data.get("pages", [])])
22+
23+
asyncio.run(main())
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import asyncio
2+
from scrapegraph_py import (
3+
AsyncScrapeGraphAI,
4+
CrawlRequest,
5+
MarkdownFormatConfig,
6+
LinksFormatConfig,
7+
)
8+
9+
async def main():
10+
async with AsyncScrapeGraphAI() as sgai:
11+
start_res = await sgai.crawl.start(CrawlRequest(
12+
url="https://example.com",
13+
max_pages=3,
14+
max_depth=1,
15+
formats=[
16+
MarkdownFormatConfig(),
17+
LinksFormatConfig(),
18+
],
19+
))
20+
21+
if start_res.status != "success" or not start_res.data:
22+
print("Failed to start:", start_res.error)
23+
else:
24+
crawl_id = start_res.data["id"]
25+
print("Crawl started:", crawl_id)
26+
print("Status:", start_res.data["status"])
27+
28+
get_res = await sgai.crawl.get(crawl_id)
29+
if get_res.status == "success":
30+
print("\nProgress:", get_res.data["finished"], "/", get_res.data["total"])
31+
32+
for page in get_res.data.get("pages", []):
33+
print(f"\n Page: {page['url']}")
34+
print(f" Status: {page['status']}")
35+
print(f" Depth: {page['depth']}")
36+
37+
asyncio.run(main())
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import asyncio
2+
import json
3+
from scrapegraph_py import AsyncScrapeGraphAI, ExtractRequest
4+
5+
async def main():
6+
async with AsyncScrapeGraphAI() as sgai:
7+
res = await sgai.extract(ExtractRequest(
8+
url="https://example.com",
9+
prompt="What is this page about? Extract the main heading and description.",
10+
))
11+
12+
if res.status == "success":
13+
print("Extracted:", json.dumps(res.data.get("json"), indent=2))
14+
print("\nTokens used:", res.data.get("usage"))
15+
else:
16+
print("Failed:", res.error)
17+
18+
asyncio.run(main())
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import asyncio
2+
import json
3+
from scrapegraph_py import AsyncScrapeGraphAI, ExtractRequest
4+
5+
async def main():
6+
async with AsyncScrapeGraphAI() as sgai:
7+
res = await sgai.extract(ExtractRequest(
8+
url="https://example.com",
9+
prompt="Extract structured information about this page",
10+
schema={
11+
"type": "object",
12+
"properties": {
13+
"title": {"type": "string"},
14+
"description": {"type": "string"},
15+
"links": {
16+
"type": "array",
17+
"items": {"type": "string"},
18+
},
19+
},
20+
"required": ["title"],
21+
},
22+
))
23+
24+
if res.status == "success":
25+
print("Extracted:", json.dumps(res.data.get("json"), indent=2))
26+
print("\nRaw:", res.data.get("raw"))
27+
print("\nTokens used:", res.data.get("usage"))
28+
else:
29+
print("Failed:", res.error)
30+
31+
asyncio.run(main())
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import asyncio
2+
from scrapegraph_py import AsyncScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
3+
4+
async def main():
5+
async with AsyncScrapeGraphAI() as sgai:
6+
res = await sgai.monitor.create(MonitorCreateRequest(
7+
url="https://example.com",
8+
name="Example Monitor",
9+
interval="0 * * * *",
10+
formats=[MarkdownFormatConfig()],
11+
))
12+
13+
if res.status == "success":
14+
print("Monitor created:", res.data["cronId"])
15+
print("Status:", res.data["status"])
16+
print("Interval:", res.data["interval"])
17+
else:
18+
print("Failed:", res.error)
19+
20+
asyncio.run(main())
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import asyncio
2+
from scrapegraph_py import AsyncScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
3+
4+
async def main():
5+
async with AsyncScrapeGraphAI() as sgai:
6+
res = await sgai.monitor.create(MonitorCreateRequest(
7+
url="https://example.com",
8+
name="Example Monitor with Webhook",
9+
interval="0 */6 * * *",
10+
webhook_url="https://your-webhook-endpoint.com/hook",
11+
formats=[MarkdownFormatConfig()],
12+
))
13+
14+
if res.status == "success":
15+
print("Monitor created:", res.data["cronId"])
16+
print("Status:", res.data["status"])
17+
print("Interval:", res.data["interval"])
18+
print("Webhook configured")
19+
else:
20+
print("Failed:", res.error)
21+
22+
asyncio.run(main())
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import asyncio
2+
from scrapegraph_py import AsyncScrapeGraphAI, ScrapeRequest, MarkdownFormatConfig
3+
4+
async def main():
5+
async with AsyncScrapeGraphAI() as sgai:
6+
res = await sgai.scrape(ScrapeRequest(
7+
url="https://example.com",
8+
formats=[MarkdownFormatConfig()],
9+
))
10+
11+
if res.status == "success":
12+
print("Markdown:", res.data["results"].get("markdown", {}).get("data"))
13+
print(f"\nTook {res.elapsed_ms}ms")
14+
else:
15+
print("Failed:", res.error)
16+
17+
asyncio.run(main())
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import asyncio
2+
import json
3+
from scrapegraph_py import AsyncScrapeGraphAI, ScrapeRequest, JsonFormatConfig
4+
5+
async def main():
6+
async with AsyncScrapeGraphAI() as sgai:
7+
res = await sgai.scrape(ScrapeRequest(
8+
url="https://example.com",
9+
formats=[
10+
JsonFormatConfig(
11+
prompt="Extract the company name, tagline, and list of features",
12+
schema={
13+
"type": "object",
14+
"properties": {
15+
"companyName": {"type": "string"},
16+
"tagline": {"type": "string"},
17+
"features": {
18+
"type": "array",
19+
"items": {"type": "string"},
20+
},
21+
},
22+
"required": ["companyName"],
23+
},
24+
),
25+
],
26+
))
27+
28+
if res.status == "success":
29+
json_result = res.data["results"].get("json", {})
30+
31+
print("=== JSON Extraction ===\n")
32+
print("Extracted data:")
33+
print(json.dumps(json_result.get("data"), indent=2))
34+
35+
chunker = json_result.get("metadata", {}).get("chunker")
36+
if chunker:
37+
chunks = chunker.get("chunks", [])
38+
print("\nChunker info:")
39+
print(" Chunks:", len(chunks))
40+
print(" Total size:", sum(c.get("size", 0) for c in chunks), "chars")
41+
else:
42+
print("Failed:", res.error)
43+
44+
asyncio.run(main())
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import asyncio
2+
from scrapegraph_py import (
3+
AsyncScrapeGraphAI,
4+
ScrapeRequest,
5+
MarkdownFormatConfig,
6+
LinksFormatConfig,
7+
ScreenshotFormatConfig,
8+
)
9+
10+
async def main():
11+
async with AsyncScrapeGraphAI() as sgai:
12+
res = await sgai.scrape(ScrapeRequest(
13+
url="https://example.com",
14+
formats=[
15+
MarkdownFormatConfig(),
16+
LinksFormatConfig(),
17+
ScreenshotFormatConfig(width=1280, height=720),
18+
],
19+
))
20+
21+
if res.status == "success":
22+
results = res.data["results"]
23+
24+
print("=== Markdown ===")
25+
print(results.get("markdown", {}).get("data", [""])[0][:500], "...")
26+
27+
print("\n=== Links ===")
28+
links = results.get("links", {}).get("data", [])
29+
print(f"Found {len(links)} links")
30+
for link in links[:5]:
31+
print(f" - {link}")
32+
33+
print("\n=== Screenshot ===")
34+
screenshot = results.get("screenshot", {}).get("data", {})
35+
print(f"URL: {screenshot.get('url')}")
36+
print(f"Size: {screenshot.get('width')}x{screenshot.get('height')}")
37+
else:
38+
print("Failed:", res.error)
39+
40+
asyncio.run(main())

0 commit comments

Comments
 (0)