Skip to content

Commit fd23bb0

Browse files
VinciGit00claude
andcommitted
feat!: migrate to scrapegraph-py v2 API surface
Update all SDK usage to match the new v2 API from ScrapeGraphAI/scrapegraph-py#82: - smartscraper() → extract(url=, prompt=) - searchscraper() → search(query=) - markdownify() → scrape(url=) - Bump dependency to scrapegraph-py>=2.0.0 BREAKING CHANGE: requires scrapegraph-py v2.0.0+ Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7b5733d commit fd23bb0

5 files changed

Lines changed: 32 additions & 77 deletions

File tree

examples/markdownify/markdownify_scrapegraphai.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
"""
2-
Example script demonstrating the markdownify functionality
2+
Example script demonstrating the scrape functionality (v2 API - replaces markdownify)
33
"""
44

5+
import json
56
import os
67
from dotenv import load_dotenv
78
from scrapegraph_py import Client
@@ -20,16 +21,13 @@ def main():
2021
raise ValueError("SCRAPEGRAPH_API_KEY environment variable not found")
2122
sgai_client = Client(api_key=api_key)
2223

23-
# Example 1: Convert a website to Markdown
24-
print("Example 1: Converting website to Markdown")
24+
# Scrape a website as markdown (v2 API - replaces markdownify)
25+
print("Scraping website as Markdown")
2526
print("-" * 50)
26-
response = sgai_client.markdownify(
27-
website_url="https://example.com"
27+
response = sgai_client.scrape(
28+
url="https://example.com"
2829
)
29-
print("Markdown output:")
30-
print(response["result"]) # Access the result key from the dictionary
31-
print("\nMetadata:")
32-
print(response.get("metadata", {})) # Use get() with default value
33-
print("\n" + "=" * 50 + "\n")
30+
print(json.dumps(response, indent=2))
31+
3432
if __name__ == "__main__":
3533
main()

examples/search_graph/scrapegraphai/searchscraper_scrapegraphai.py

Lines changed: 7 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,14 @@
11
"""
2-
Example implementation of search-based scraping using Scrapegraph AI.
3-
This example demonstrates how to use the searchscraper to extract information from the web.
2+
Example implementation of search-based scraping using Scrapegraph AI v2 API.
3+
This example demonstrates how to use the search endpoint to extract information from the web.
44
"""
55

6+
import json
67
import os
7-
from typing import Dict, Any
88
from dotenv import load_dotenv
99
from scrapegraph_py import Client
1010
from scrapegraph_py.logger import sgai_logger
1111

12-
def format_response(response: Dict[str, Any]) -> None:
13-
"""
14-
Format and print the search response in a readable way.
15-
16-
Args:
17-
response (Dict[str, Any]): The response from the search API
18-
"""
19-
print("\n" + "="*50)
20-
print("SEARCH RESULTS")
21-
print("="*50)
22-
23-
# Print request ID
24-
print(f"\nRequest ID: {response['request_id']}")
25-
26-
# Print number of sources
27-
urls = response.get('reference_urls', [])
28-
print(f"\nSources Processed: {len(urls)}")
29-
30-
# Print the extracted information
31-
print("\nExtracted Information:")
32-
print("-"*30)
33-
if isinstance(response['result'], dict):
34-
for key, value in response['result'].items():
35-
print(f"\n{key.upper()}:")
36-
if isinstance(value, list):
37-
for item in value:
38-
print(f" • {item}")
39-
else:
40-
print(f" {value}")
41-
else:
42-
print(response['result'])
43-
44-
# Print source URLs
45-
if urls:
46-
print("\nSources:")
47-
print("-"*30)
48-
for i, url in enumerate(urls, 1):
49-
print(f"{i}. {url}")
50-
print("\n" + "="*50)
51-
5212
def main():
5313
# Load environment variables
5414
load_dotenv()
@@ -65,13 +25,13 @@ def main():
6525
sgai_client = Client(api_key=api_key)
6626

6727
try:
68-
# Basic search scraper example
28+
# Search request (v2 API - replaces searchscraper)
6929
print("\nSearching for information...")
7030

71-
search_response = sgai_client.searchscraper(
72-
user_prompt="Extract webpage information"
31+
search_response = sgai_client.search(
32+
query="Extract webpage information"
7333
)
74-
format_response(search_response)
34+
print(json.dumps(search_response, indent=2))
7535

7636
except Exception as e:
7737
print(f"\nError occurred: {str(e)}")

examples/smart_scraper_graph/scrapegraphai/smartscraper_scrapegraphai.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
"""
2-
Example implementation using scrapegraph-py client directly.
2+
Example implementation using scrapegraph-py v2 client directly.
33
"""
44

5+
import json
56
import os
67
from dotenv import load_dotenv
78
from scrapegraph_py import Client
@@ -14,7 +15,7 @@ def main():
1415
# Get API key from environment variables
1516
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
1617
if not api_key:
17-
raise ValueError("SCRAPEGRAPH_API_KEY non trovato nelle variabili d'ambiente")
18+
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")
1819

1920
# Set up logging
2021
sgai_logger.set_logging(level="INFO")
@@ -23,17 +24,14 @@ def main():
2324
sgai_client = Client(api_key=api_key)
2425

2526
try:
26-
# SmartScraper request
27-
response = sgai_client.smartscraper(
28-
website_url="https://scrapegraphai.com",
29-
user_prompt="Extract the founders' informations"
27+
# Extract request (v2 API - replaces smartscraper)
28+
response = sgai_client.extract(
29+
url="https://scrapegraphai.com",
30+
prompt="Extract the founders' informations"
3031
)
3132

3233
# Print the response
33-
print(f"Request ID: {response['request_id']}")
34-
print(f"Result: {response['result']}")
35-
if response.get('reference_urls'):
36-
print(f"Reference URLs: {response['reference_urls']}")
34+
print(json.dumps(response, indent=2))
3735

3836
except Exception as e:
3937
print(f"Error occurred: {str(e)}")

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ dependencies = [
3232
"jsonschema>=4.25.1",
3333
"duckduckgo-search>=8.1.1",
3434
"pydantic>=2.12.5",
35-
"scrapegraph-py>=1.44.0",
35+
"scrapegraph-py>=2.0.0",
3636
]
3737

3838
readme = "README.md"

scrapegraphai/graphs/smart_scraper_graph.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,17 @@ def _create_graph(self) -> BaseGraph:
9090
# Initialize the client with explicit API key
9191
sgai_client = Client(api_key=self.config.get("api_key"))
9292

93-
# SmartScraper request
94-
response = sgai_client.smartscraper(
95-
website_url=self.source,
96-
user_prompt=self.prompt,
93+
# Extract request (v2 API)
94+
response = sgai_client.extract(
95+
url=self.source,
96+
prompt=self.prompt,
9797
)
9898

9999
# Use logging instead of print for better production practices
100-
if "request_id" in response and "result" in response:
101-
logger.info(f"Request ID: {response['request_id']}")
102-
logger.info(f"Result: {response['result']}")
103-
else:
104-
logger.warning("Missing expected keys in response.")
100+
if "id" in response:
101+
logger.info(f"Request ID: {response['id']}")
102+
if "data" in response:
103+
logger.info(f"Result: {response['data']}")
105104

106105
sgai_client.close()
107106

0 commit comments

Comments
 (0)