feat!: migrate to scrapegraph-py v2 API surface

VinciGit00 · claude · VinciGit00 · commit fd23bb0544b3 · 2026-03-31T09:41:37.000-07:00
Update all SDK usage to match the new v2 API from ScrapeGraphAI/scrapegraph-py#82: - smartscraper() → extract(url=, prompt=) - searchscraper() → search(query=) - markdownify() → scrape(url=) - Bump dependency to scrapegraph-py>=2.0.0 BREAKING CHANGE: requires scrapegraph-py v2.0.0+ Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/examples/markdownify/markdownify_scrapegraphai.py b/examples/markdownify/markdownify_scrapegraphai.py
@@ -1,7 +1,8 @@
 """
-Example script demonstrating the markdownify functionality
+Example script demonstrating the scrape functionality (v2 API - replaces markdownify)
 """
 
+import json
 import os
 from dotenv import load_dotenv
 from scrapegraph_py import Client
@@ -20,16 +21,13 @@ def main():
         raise ValueError("SCRAPEGRAPH_API_KEY environment variable not found")
     sgai_client = Client(api_key=api_key)
 
-    # Example 1: Convert a website to Markdown
-    print("Example 1: Converting website to Markdown")
+    # Scrape a website as markdown (v2 API - replaces markdownify)
+    print("Scraping website as Markdown")
     print("-" * 50)
-    response = sgai_client.markdownify(
-        website_url="https://example.com"
+    response = sgai_client.scrape(
+        url="https://example.com"
     )
-    print("Markdown output:")
-    print(response["result"])  # Access the result key from the dictionary
-    print("\nMetadata:")
-    print(response.get("metadata", {}))  # Use get() with default value
-    print("\n" + "=" * 50 + "\n")
+    print(json.dumps(response, indent=2))
+
 if __name__ == "__main__":
     main()
diff --git a/examples/search_graph/scrapegraphai/searchscraper_scrapegraphai.py b/examples/search_graph/scrapegraphai/searchscraper_scrapegraphai.py
@@ -1,54 +1,14 @@
 """
-Example implementation of search-based scraping using Scrapegraph AI.
-This example demonstrates how to use the searchscraper to extract information from the web.
+Example implementation of search-based scraping using Scrapegraph AI v2 API.
+This example demonstrates how to use the search endpoint to extract information from the web.
 """
 
+import json
 import os
-from typing import Dict, Any
 from dotenv import load_dotenv
 from scrapegraph_py import Client
 from scrapegraph_py.logger import sgai_logger
 
-def format_response(response: Dict[str, Any]) -> None:
-    """
-    Format and print the search response in a readable way.
-
-    Args:
-        response (Dict[str, Any]): The response from the search API
-    """
-    print("\n" + "="*50)
-    print("SEARCH RESULTS")
-    print("="*50)
-
-    # Print request ID
-    print(f"\nRequest ID: {response['request_id']}")
-
-    # Print number of sources
-    urls = response.get('reference_urls', [])
-    print(f"\nSources Processed: {len(urls)}")
-
-    # Print the extracted information
-    print("\nExtracted Information:")
-    print("-"*30)
-    if isinstance(response['result'], dict):
-        for key, value in response['result'].items():
-            print(f"\n{key.upper()}:")
-            if isinstance(value, list):
-                for item in value:
-                    print(f"  • {item}")
-            else:
-                print(f"  {value}")
-    else:
-        print(response['result'])
-
-    # Print source URLs
-    if urls:
-        print("\nSources:")
-        print("-"*30)
-        for i, url in enumerate(urls, 1):
-            print(f"{i}. {url}")
-    print("\n" + "="*50)
-
 def main():
     # Load environment variables
     load_dotenv()
@@ -65,13 +25,13 @@ def main():
     sgai_client = Client(api_key=api_key)
 
     try:
-        # Basic search scraper example
+        # Search request (v2 API - replaces searchscraper)
         print("\nSearching for information...")
 
-        search_response = sgai_client.searchscraper(
-            user_prompt="Extract webpage information"
+        search_response = sgai_client.search(
+            query="Extract webpage information"
         )
-        format_response(search_response)
+        print(json.dumps(search_response, indent=2))
 
     except Exception as e:
         print(f"\nError occurred: {str(e)}")
diff --git a/examples/smart_scraper_graph/scrapegraphai/smartscraper_scrapegraphai.py b/examples/smart_scraper_graph/scrapegraphai/smartscraper_scrapegraphai.py
@@ -1,7 +1,8 @@
 """
-Example implementation using scrapegraph-py client directly.
+Example implementation using scrapegraph-py v2 client directly.
 """
 
+import json
 import os
 from dotenv import load_dotenv
 from scrapegraph_py import Client
@@ -14,7 +15,7 @@ def main():
     # Get API key from environment variables
     api_key = os.getenv("SCRAPEGRAPH_API_KEY")
     if not api_key:
-        raise ValueError("SCRAPEGRAPH_API_KEY non trovato nelle variabili d'ambiente")
+        raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")
 
     # Set up logging
     sgai_logger.set_logging(level="INFO")
@@ -23,17 +24,14 @@ def main():
     sgai_client = Client(api_key=api_key)
 
     try:
-        # SmartScraper request
-        response = sgai_client.smartscraper(
-            website_url="https://scrapegraphai.com",
-            user_prompt="Extract the founders' informations"
+        # Extract request (v2 API - replaces smartscraper)
+        response = sgai_client.extract(
+            url="https://scrapegraphai.com",
+            prompt="Extract the founders' informations"
         )
 
         # Print the response
-        print(f"Request ID: {response['request_id']}")
-        print(f"Result: {response['result']}")
-        if response.get('reference_urls'):
-            print(f"Reference URLs: {response['reference_urls']}")
+        print(json.dumps(response, indent=2))
 
     except Exception as e:
         print(f"Error occurred: {str(e)}")
diff --git a/pyproject.toml b/pyproject.toml
@@ -32,7 +32,7 @@ dependencies = [
     "jsonschema>=4.25.1",
     "duckduckgo-search>=8.1.1",
     "pydantic>=2.12.5",
-    "scrapegraph-py>=1.44.0",
+    "scrapegraph-py>=2.0.0",
 ]
 
 readme = "README.md"
diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py
@@ -90,18 +90,17 @@ def _create_graph(self) -> BaseGraph:
             # Initialize the client with explicit API key
             sgai_client = Client(api_key=self.config.get("api_key"))
 
-            # SmartScraper request
-            response = sgai_client.smartscraper(
-                website_url=self.source,
-                user_prompt=self.prompt,
+            # Extract request (v2 API)
+            response = sgai_client.extract(
+                url=self.source,
+                prompt=self.prompt,
             )
 
             # Use logging instead of print for better production practices
-            if "request_id" in response and "result" in response:
-                logger.info(f"Request ID: {response['request_id']}")
-                logger.info(f"Result: {response['result']}")
-            else:
-                logger.warning("Missing expected keys in response.")
+            if "id" in response:
+                logger.info(f"Request ID: {response['id']}")
+            if "data" in response:
+                logger.info(f"Result: {response['data']}")
 
             sgai_client.close()
 

Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@ dependencies = [`
`32`	`32`	`"jsonschema>=4.25.1",`
`33`	`33`	`"duckduckgo-search>=8.1.1",`
`34`	`34`	`"pydantic>=2.12.5",`
`35`		`- "scrapegraph-py>=1.44.0",`
	`35`	`+ "scrapegraph-py>=2.0.0",`
`36`	`36`	`]`
`37`	`37`
`38`	`38`	`readme = "README.md"`