feat: use Pydantic alias_generator, add async examples, update README

FrancescoSaverioZuppichini · claude · FrancescoSaverioZuppichini · commit 84400ffe5b72 · 2026-04-14T22:49:25.000+02:00
- Replace manual _to_camel with Pydantic's built-in alias_generator
- CamelModel base class handles snake_case -&gt; camelCase conversion
- Simplify _serialize to single model_dump call
- Add async versions of all 16 examples
- Update README with expanded async client docs and examples table
- Add banner from JS SDK

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/README.md b/README.md
diff --git a/examples/crawl/crawl_basic_async.py b/examples/crawl/crawl_basic_async.py
@@ -0,0 +1,23 @@
+import asyncio
+from scrapegraph_py import AsyncScrapeGraphAI, CrawlRequest
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        start_res = await sgai.crawl.start(CrawlRequest(
+            url="https://example.com",
+            max_pages=5,
+            max_depth=2,
+        ))
+
+        if start_res.status != "success" or not start_res.data:
+            print("Failed to start:", start_res.error)
+        else:
+            print("Crawl started:", start_res.data["id"])
+            print("Status:", start_res.data["status"])
+
+            get_res = await sgai.crawl.get(start_res.data["id"])
+            if get_res.status == "success":
+                print("\nProgress:", get_res.data["finished"], "/", get_res.data["total"])
+                print("Pages:", [p["url"] for p in get_res.data.get("pages", [])])
+
+asyncio.run(main())
diff --git a/examples/crawl/crawl_with_formats_async.py b/examples/crawl/crawl_with_formats_async.py
@@ -0,0 +1,37 @@
+import asyncio
+from scrapegraph_py import (
+    AsyncScrapeGraphAI,
+    CrawlRequest,
+    MarkdownFormatConfig,
+    LinksFormatConfig,
+)
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        start_res = await sgai.crawl.start(CrawlRequest(
+            url="https://example.com",
+            max_pages=3,
+            max_depth=1,
+            formats=[
+                MarkdownFormatConfig(),
+                LinksFormatConfig(),
+            ],
+        ))
+
+        if start_res.status != "success" or not start_res.data:
+            print("Failed to start:", start_res.error)
+        else:
+            crawl_id = start_res.data["id"]
+            print("Crawl started:", crawl_id)
+            print("Status:", start_res.data["status"])
+
+            get_res = await sgai.crawl.get(crawl_id)
+            if get_res.status == "success":
+                print("\nProgress:", get_res.data["finished"], "/", get_res.data["total"])
+
+                for page in get_res.data.get("pages", []):
+                    print(f"\n  Page: {page['url']}")
+                    print(f"  Status: {page['status']}")
+                    print(f"  Depth: {page['depth']}")
+
+asyncio.run(main())
diff --git a/examples/extract/extract_basic_async.py b/examples/extract/extract_basic_async.py
@@ -0,0 +1,18 @@
+import asyncio
+import json
+from scrapegraph_py import AsyncScrapeGraphAI, ExtractRequest
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.extract(ExtractRequest(
+            url="https://example.com",
+            prompt="What is this page about? Extract the main heading and description.",
+        ))
+
+        if res.status == "success":
+            print("Extracted:", json.dumps(res.data.get("json"), indent=2))
+            print("\nTokens used:", res.data.get("usage"))
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/extract/extract_with_schema_async.py b/examples/extract/extract_with_schema_async.py
@@ -0,0 +1,31 @@
+import asyncio
+import json
+from scrapegraph_py import AsyncScrapeGraphAI, ExtractRequest
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.extract(ExtractRequest(
+            url="https://example.com",
+            prompt="Extract structured information about this page",
+            schema={
+                "type": "object",
+                "properties": {
+                    "title": {"type": "string"},
+                    "description": {"type": "string"},
+                    "links": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                    },
+                },
+                "required": ["title"],
+            },
+        ))
+
+        if res.status == "success":
+            print("Extracted:", json.dumps(res.data.get("json"), indent=2))
+            print("\nRaw:", res.data.get("raw"))
+            print("\nTokens used:", res.data.get("usage"))
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/monitor/monitor_basic_async.py b/examples/monitor/monitor_basic_async.py
@@ -0,0 +1,20 @@
+import asyncio
+from scrapegraph_py import AsyncScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.monitor.create(MonitorCreateRequest(
+            url="https://example.com",
+            name="Example Monitor",
+            interval="0 * * * *",
+            formats=[MarkdownFormatConfig()],
+        ))
+
+        if res.status == "success":
+            print("Monitor created:", res.data["cronId"])
+            print("Status:", res.data["status"])
+            print("Interval:", res.data["interval"])
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/monitor/monitor_with_webhook_async.py b/examples/monitor/monitor_with_webhook_async.py
@@ -0,0 +1,22 @@
+import asyncio
+from scrapegraph_py import AsyncScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.monitor.create(MonitorCreateRequest(
+            url="https://example.com",
+            name="Example Monitor with Webhook",
+            interval="0 */6 * * *",
+            webhook_url="https://your-webhook-endpoint.com/hook",
+            formats=[MarkdownFormatConfig()],
+        ))
+
+        if res.status == "success":
+            print("Monitor created:", res.data["cronId"])
+            print("Status:", res.data["status"])
+            print("Interval:", res.data["interval"])
+            print("Webhook configured")
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/scrape/scrape_basic_async.py b/examples/scrape/scrape_basic_async.py
@@ -0,0 +1,17 @@
+import asyncio
+from scrapegraph_py import AsyncScrapeGraphAI, ScrapeRequest, MarkdownFormatConfig
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.scrape(ScrapeRequest(
+            url="https://example.com",
+            formats=[MarkdownFormatConfig()],
+        ))
+
+        if res.status == "success":
+            print("Markdown:", res.data["results"].get("markdown", {}).get("data"))
+            print(f"\nTook {res.elapsed_ms}ms")
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/scrape/scrape_json_extraction_async.py b/examples/scrape/scrape_json_extraction_async.py
@@ -0,0 +1,44 @@
+import asyncio
+import json
+from scrapegraph_py import AsyncScrapeGraphAI, ScrapeRequest, JsonFormatConfig
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.scrape(ScrapeRequest(
+            url="https://example.com",
+            formats=[
+                JsonFormatConfig(
+                    prompt="Extract the company name, tagline, and list of features",
+                    schema={
+                        "type": "object",
+                        "properties": {
+                            "companyName": {"type": "string"},
+                            "tagline": {"type": "string"},
+                            "features": {
+                                "type": "array",
+                                "items": {"type": "string"},
+                            },
+                        },
+                        "required": ["companyName"],
+                    },
+                ),
+            ],
+        ))
+
+        if res.status == "success":
+            json_result = res.data["results"].get("json", {})
+
+            print("=== JSON Extraction ===\n")
+            print("Extracted data:")
+            print(json.dumps(json_result.get("data"), indent=2))
+
+            chunker = json_result.get("metadata", {}).get("chunker")
+            if chunker:
+                chunks = chunker.get("chunks", [])
+                print("\nChunker info:")
+                print("  Chunks:", len(chunks))
+                print("  Total size:", sum(c.get("size", 0) for c in chunks), "chars")
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/scrape/scrape_multi_format_async.py b/examples/scrape/scrape_multi_format_async.py
@@ -0,0 +1,40 @@
+import asyncio
+from scrapegraph_py import (
+    AsyncScrapeGraphAI,
+    ScrapeRequest,
+    MarkdownFormatConfig,
+    LinksFormatConfig,
+    ScreenshotFormatConfig,
+)
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.scrape(ScrapeRequest(
+            url="https://example.com",
+            formats=[
+                MarkdownFormatConfig(),
+                LinksFormatConfig(),
+                ScreenshotFormatConfig(width=1280, height=720),
+            ],
+        ))
+
+        if res.status == "success":
+            results = res.data["results"]
+
+            print("=== Markdown ===")
+            print(results.get("markdown", {}).get("data", [""])[0][:500], "...")
+
+            print("\n=== Links ===")
+            links = results.get("links", {}).get("data", [])
+            print(f"Found {len(links)} links")
+            for link in links[:5]:
+                print(f"  - {link}")
+
+            print("\n=== Screenshot ===")
+            screenshot = results.get("screenshot", {}).get("data", {})
+            print(f"URL: {screenshot.get('url')}")
+            print(f"Size: {screenshot.get('width')}x{screenshot.get('height')}")
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/scrape/scrape_pdf_async.py b/examples/scrape/scrape_pdf_async.py
@@ -0,0 +1,18 @@
+import asyncio
+from scrapegraph_py import AsyncScrapeGraphAI, ScrapeRequest, MarkdownFormatConfig
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.scrape(ScrapeRequest(
+            url="https://www.w3.org/WAI/WCAG21/Techniques/pdf/img/table-word.pdf",
+            content_type="application/pdf",
+            formats=[MarkdownFormatConfig()],
+        ))
+
+        if res.status == "success":
+            print("Markdown:", res.data["results"].get("markdown", {}).get("data"))
+            print(f"\nTook {res.elapsed_ms}ms")
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/scrape/scrape_with_fetchconfig_async.py b/examples/scrape/scrape_with_fetchconfig_async.py
@@ -0,0 +1,23 @@
+import asyncio
+from scrapegraph_py import AsyncScrapeGraphAI, ScrapeRequest, MarkdownFormatConfig, FetchConfig
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.scrape(ScrapeRequest(
+            url="https://example.com",
+            formats=[MarkdownFormatConfig()],
+            fetch_config=FetchConfig(
+                mode="js",
+                timeout=45000,
+                wait=2000,
+                stealth=True,
+            ),
+        ))
+
+        if res.status == "success":
+            print("Markdown:", res.data["results"].get("markdown", {}).get("data"))
+            print(f"\nTook {res.elapsed_ms}ms")
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/search/search_basic_async.py b/examples/search/search_basic_async.py
@@ -0,0 +1,19 @@
+import asyncio
+from scrapegraph_py import AsyncScrapeGraphAI, SearchRequest
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.search(SearchRequest(
+            query="best programming languages 2024",
+            num_results=3,
+        ))
+
+        if res.status == "success":
+            for result in res.data.get("results", []):
+                print(f"\n{result['title']}")
+                print(f"URL: {result['url']}")
+                print(f"Content: {result['content'][:200]}...")
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/search/search_with_extraction_async.py b/examples/search/search_with_extraction_async.py
@@ -0,0 +1,39 @@
+import asyncio
+import json
+from scrapegraph_py import AsyncScrapeGraphAI, SearchRequest
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.search(SearchRequest(
+            query="best programming languages 2024",
+            num_results=3,
+            prompt="Summarize the top programming languages mentioned and why they are recommended",
+            schema={
+                "type": "object",
+                "properties": {
+                    "languages": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "name": {"type": "string"},
+                                "reason": {"type": "string"},
+                            },
+                        },
+                    },
+                },
+            },
+        ))
+
+        if res.status == "success":
+            print("=== Search Results ===")
+            for result in res.data.get("results", []):
+                print(f"\n{result['title']}")
+                print(f"URL: {result['url']}")
+
+            print("\n=== Extracted Summary ===")
+            print(json.dumps(res.data.get("json"), indent=2))
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/utilities/credits_async.py b/examples/utilities/credits_async.py
@@ -0,0 +1,19 @@
+import asyncio
+from scrapegraph_py import AsyncScrapeGraphAI
+
+async def main():
+    async with AsyncScrapeGraphAI() as sgai:
+        res = await sgai.credits()
+
+        if res.status == "success":
+            data = res.data
+            print("Plan:", data["plan"])
+            print("Remaining credits:", data["remaining"])
+            print("Used credits:", data["used"])
+            print("\nJob limits:")
+            print("  Crawl:", data["jobs"]["crawl"]["used"], "/", data["jobs"]["crawl"]["limit"])
+            print("  Monitor:", data["jobs"]["monitor"]["used"], "/", data["jobs"]["monitor"]["limit"])
+        else:
+            print("Failed:", res.error)
+
+asyncio.run(main())
diff --git a/examples/utilities/health_async.py b/examples/utilities/health_async.py
diff --git a/examples/utilities/history_async.py b/examples/utilities/history_async.py
diff --git a/media/banner.png b/media/banner.png
diff --git a/src/scrapegraph_py/async_client.py b/src/scrapegraph_py/async_client.py
diff --git a/src/scrapegraph_py/client.py b/src/scrapegraph_py/client.py
diff --git a/src/scrapegraph_py/schemas.py b/src/scrapegraph_py/schemas.py