ScrapeGraphAI
diff --git a/‎CLAUDE.md‎
Lines changed: 18 additions & 7 deletions b/‎CLAUDE.md‎
Lines changed: 18 additions & 7 deletions
diff --git a/‎examples/crawl/crawl_basic.py‎
Lines changed: 21 additions & 7 deletions b/‎examples/crawl/crawl_basic.py‎
Lines changed: 21 additions & 7 deletions
diff --git a/‎examples/crawl/crawl_basic_async.py‎
Lines changed: 20 additions & 7 deletions b/‎examples/crawl/crawl_basic_async.py‎
Lines changed: 20 additions & 7 deletions
diff --git a/‎examples/crawl/crawl_with_formats.py‎
Lines changed: 20 additions & 9 deletions b/‎examples/crawl/crawl_with_formats.py‎
Lines changed: 20 additions & 9 deletions
diff --git a/‎examples/crawl/crawl_with_formats_async.py‎
Lines changed: 19 additions & 9 deletions b/‎examples/crawl/crawl_with_formats_async.py‎
Lines changed: 19 additions & 9 deletions
diff --git a/‎examples/extract/extract_basic.py‎
Lines changed: 5 additions & 2 deletions b/‎examples/extract/extract_basic.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎examples/extract/extract_basic_async.py‎
Lines changed: 5 additions & 2 deletions b/‎examples/extract/extract_basic_async.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎examples/extract/extract_with_schema.py‎
Lines changed: 6 additions & 3 deletions b/‎examples/extract/extract_with_schema.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎examples/extract/extract_with_schema_async.py‎
Lines changed: 6 additions & 3 deletions b/‎examples/extract/extract_with_schema_async.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎examples/monitor/monitor_basic.py‎
Lines changed: 3 additions & 0 deletions b/‎examples/monitor/monitor_basic.py‎
Lines changed: 3 additions & 0 deletions
@@ -37,20 +37,31 @@ uv sync
 # Test
 uv run pytest tests/ -v
 
-# Format
-uv run black scrapegraph_py tests
-uv run isort scrapegraph_py tests
-
-# Lint
-uv run ruff check scrapegraph_py tests
+# Format & lint
+uv run ruff format src tests
+uv run ruff check src tests --fix
 
 # Type check
-uv run mypy scrapegraph_py
+uv run mypy src
 
 # Build
 uv build
 ```
 
+## Before completing any task
+
+Always run these commands before committing or saying a task is done:
+
+```bash
+uv run ruff format src tests
+uv run ruff check src tests --fix
+uv run mypy src
+uv build
+uv run pytest tests/ -v
+```
+
+No exceptions.
+
 ## Architecture
 
 **Core Components:**
 
@@ -1,20 +1,34 @@
+from dotenv import load_dotenv
+load_dotenv()
+
+import time
 from scrapegraph_py import ScrapeGraphAI, CrawlRequest
 
 sgai = ScrapeGraphAI()
 
 start_res = sgai.crawl.start(CrawlRequest(
-    url="https://example.com",
+    url="https://scrapegraphai.com/",
     max_pages=5,
     max_depth=2,
 ))
 
 if start_res.status != "success" or not start_res.data:
     print("Failed to start:", start_res.error)
 else:
-    print("Crawl started:", start_res.data.id)
-    print("Status:", start_res.data.status)
+    crawl_id = start_res.data.id
+    print("Crawl started:", crawl_id)
+
+    status = start_res.data.status
+    while status == "running":
+        time.sleep(2)
+        get_res = sgai.crawl.get(crawl_id)
+        if get_res.status != "success" or not get_res.data:
+            print("Failed to get status:", get_res.error)
+            break
+        status = get_res.data.status
+        print(f"Progress: {get_res.data.finished}/{get_res.data.total} - {status}")
 
-    get_res = sgai.crawl.get(start_res.data.id)
-    if get_res.status == "success":
-        print("\nProgress:", get_res.data.finished, "/", get_res.data.total)
-        print("Pages:", [p["url"] for p in get_res.data.get("pages", [])])
+        if status in ("completed", "failed"):
+            print("\nPages crawled:")
+            for page in get_res.data.pages:
+                print(f"  {page.url} - {page.status}")
@@ -1,23 +1,36 @@
+from dotenv import load_dotenv
+load_dotenv()
+
 import asyncio
 from scrapegraph_py import AsyncScrapeGraphAI, CrawlRequest
 
 async def main():
     async with AsyncScrapeGraphAI() as sgai:
         start_res = await sgai.crawl.start(CrawlRequest(
-            url="https://example.com",
+            url="https://scrapegraphai.com/",
             max_pages=5,
             max_depth=2,
         ))
 
         if start_res.status != "success" or not start_res.data:
             print("Failed to start:", start_res.error)
         else:
-            print("Crawl started:", start_res.data.id)
-            print("Status:", start_res.data.status)
+            crawl_id = start_res.data.id
+            print("Crawl started:", crawl_id)
+
+            status = start_res.data.status
+            while status == "running":
+                await asyncio.sleep(2)
+                get_res = await sgai.crawl.get(crawl_id)
+                if get_res.status != "success" or not get_res.data:
+                    print("Failed to get status:", get_res.error)
+                    break
+                status = get_res.data.status
+                print(f"Progress: {get_res.data.finished}/{get_res.data.total} - {status}")
 
-            get_res = await sgai.crawl.get(start_res.data.id)
-            if get_res.status == "success":
-                print("\nProgress:", get_res.data.finished, "/", get_res.data.total)
-                print("Pages:", [p["url"] for p in get_res.data.get("pages", [])])
+                if status in ("completed", "failed"):
+                    print("\nPages crawled:")
+                    for page in get_res.data.pages:
+                        print(f"  {page.url} - {page.status}")
 
 asyncio.run(main())
@@ -1,3 +1,7 @@
+from dotenv import load_dotenv
+load_dotenv()
+
+import time
 from scrapegraph_py import (
     ScrapeGraphAI,
     CrawlRequest,
@@ -8,7 +12,7 @@
 sgai = ScrapeGraphAI()
 
 start_res = sgai.crawl.start(CrawlRequest(
-    url="https://example.com",
+    url="https://scrapegraphai.com/",
     max_pages=3,
     max_depth=1,
     formats=[
@@ -22,13 +26,20 @@
 else:
     crawl_id = start_res.data.id
     print("Crawl started:", crawl_id)
-    print("Status:", start_res.data.status)
 
-    get_res = sgai.crawl.get(crawl_id)
-    if get_res.status == "success":
-        print("\nProgress:", get_res.data.finished, "/", get_res.data.total)
+    status = start_res.data.status
+    while status == "running":
+        time.sleep(2)
+        get_res = sgai.crawl.get(crawl_id)
+        if get_res.status != "success" or not get_res.data:
+            print("Failed to get status:", get_res.error)
+            break
+        status = get_res.data.status
+        print(f"Progress: {get_res.data.finished}/{get_res.data.total} - {status}")
 
-        for page in get_res.data.get("pages", []):
-            print(f"\n  Page: {page['url']}")
-            print(f"  Status: {page['status']}")
-            print(f"  Depth: {page['depth']}")
+        if status in ("completed", "failed"):
+            print("\nPages crawled:")
+            for page in get_res.data.pages:
+                print(f"\n  Page: {page.url}")
+                print(f"  Status: {page.status}")
+                print(f"  Depth: {page.depth}")
@@ -1,3 +1,6 @@
+from dotenv import load_dotenv
+load_dotenv()
+
 import asyncio
 from scrapegraph_py import (
     AsyncScrapeGraphAI,
@@ -9,7 +12,7 @@
 async def main():
     async with AsyncScrapeGraphAI() as sgai:
         start_res = await sgai.crawl.start(CrawlRequest(
-            url="https://example.com",
+            url="https://scrapegraphai.com/",
             max_pages=3,
             max_depth=1,
             formats=[
@@ -23,15 +26,22 @@ async def main():
         else:
             crawl_id = start_res.data.id
             print("Crawl started:", crawl_id)
-            print("Status:", start_res.data.status)
 
-            get_res = await sgai.crawl.get(crawl_id)
-            if get_res.status == "success":
-                print("\nProgress:", get_res.data.finished, "/", get_res.data.total)
+            status = start_res.data.status
+            while status == "running":
+                await asyncio.sleep(2)
+                get_res = await sgai.crawl.get(crawl_id)
+                if get_res.status != "success" or not get_res.data:
+                    print("Failed to get status:", get_res.error)
+                    break
+                status = get_res.data.status
+                print(f"Progress: {get_res.data.finished}/{get_res.data.total} - {status}")
 
-                for page in get_res.data.get("pages", []):
-                    print(f"\n  Page: {page['url']}")
-                    print(f"  Status: {page['status']}")
-                    print(f"  Depth: {page['depth']}")
+                if status in ("completed", "failed"):
+                    print("\nPages crawled:")
+                    for page in get_res.data.pages:
+                        print(f"\n  Page: {page.url}")
+                        print(f"  Status: {page.status}")
+                        print(f"  Depth: {page.depth}")
 
 asyncio.run(main())
@@ -1,3 +1,6 @@
+from dotenv import load_dotenv
+load_dotenv()
+
 import json
 from scrapegraph_py import ScrapeGraphAI, ExtractRequest
 
@@ -9,7 +12,7 @@
 ))
 
 if res.status == "success":
-    print("Extracted:", json.dumps(res.data.get("json"), indent=2))
-    print("\nTokens used:", res.data.get("usage"))
+    print("Extracted:", json.dumps(res.data.json_data, indent=2))
+    print("\nTokens used:", res.data.usage)
 else:
     print("Failed:", res.error)
@@ -1,3 +1,6 @@
+from dotenv import load_dotenv
+load_dotenv()
+
 import asyncio
 import json
 from scrapegraph_py import AsyncScrapeGraphAI, ExtractRequest
@@ -10,8 +13,8 @@ async def main():
         ))
 
         if res.status == "success":
-            print("Extracted:", json.dumps(res.data.get("json"), indent=2))
-            print("\nTokens used:", res.data.get("usage"))
+            print("Extracted:", json.dumps(res.data.json_data, indent=2))
+            print("\nTokens used:", res.data.usage)
         else:
             print("Failed:", res.error)
 
 
@@ -1,3 +1,6 @@
+from dotenv import load_dotenv
+load_dotenv()
+
 import json
 from scrapegraph_py import ScrapeGraphAI, ExtractRequest
 
@@ -21,8 +24,8 @@
 ))
 
 if res.status == "success":
-    print("Extracted:", json.dumps(res.data.get("json"), indent=2))
-    print("\nRaw:", res.data.get("raw"))
-    print("\nTokens used:", res.data.get("usage"))
+    print("Extracted:", json.dumps(res.data.json_data, indent=2))
+    print("\nRaw:", res.data.raw)
+    print("\nTokens used:", res.data.usage)
 else:
     print("Failed:", res.error)
@@ -1,3 +1,6 @@
+from dotenv import load_dotenv
+load_dotenv()
+
 import asyncio
 import json
 from scrapegraph_py import AsyncScrapeGraphAI, ExtractRequest
@@ -22,9 +25,9 @@ async def main():
         ))
 
         if res.status == "success":
-            print("Extracted:", json.dumps(res.data.get("json"), indent=2))
-            print("\nRaw:", res.data.get("raw"))
-            print("\nTokens used:", res.data.get("usage"))
+            print("Extracted:", json.dumps(res.data.json_data, indent=2))
+            print("\nRaw:", res.data.raw)
+            print("\nTokens used:", res.data.usage)
         else:
             print("Failed:", res.error)
 
 
@@ -1,3 +1,6 @@
+from dotenv import load_dotenv
+load_dotenv()
+
 from scrapegraph_py import ScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
 
 sgai = ScrapeGraphAI()