Skip to content

Commit d1faf57

Browse files
feat: add integration tests matching JS SDK
- Test credits, scrape, extract, search, history, crawl - Fix HttpUrl serialization (mode="json" in model_dump) - Add python-dotenv for loading .env Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 45846ba commit d1faf57

5 files changed

Lines changed: 133 additions & 2 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,6 @@ packages = ["src/scrapegraph_py"]
4242
dev = [
4343
"pytest>=8.0.0",
4444
"pytest-asyncio>=0.23.0",
45+
"python-dotenv>=1.0.0",
4546
"ruff>=0.4.0",
4647
]

src/scrapegraph_py/async_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def _to_camel(s: str) -> str:
6363

6464

6565
def _serialize(model: BaseModel) -> dict:
66-
data = model.model_dump(exclude_none=True, by_alias=True)
66+
data = model.model_dump(mode="json", exclude_none=True, by_alias=True)
6767

6868
def convert_keys(obj):
6969
if isinstance(obj, dict):

src/scrapegraph_py/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def _to_camel(s: str) -> str:
6363

6464

6565
def _serialize(model: BaseModel) -> dict:
66-
data = model.model_dump(exclude_none=True, by_alias=True)
66+
data = model.model_dump(mode="json", exclude_none=True, by_alias=True)
6767

6868
def convert_keys(obj):
6969
if isinstance(obj, dict):

tests/test_integration.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import os
2+
import pytest
3+
from dotenv import load_dotenv
4+
5+
load_dotenv()
6+
7+
if not os.environ.get("SGAI_API_KEY"):
8+
pytest.skip("SGAI_API_KEY env var required for integration tests", allow_module_level=True)
9+
10+
from scrapegraph_py import (
11+
ScrapeGraphAI,
12+
ScrapeRequest,
13+
ExtractRequest,
14+
SearchRequest,
15+
CrawlRequest,
16+
HistoryFilter,
17+
MarkdownFormatConfig,
18+
LinksFormatConfig,
19+
ImagesFormatConfig,
20+
FetchConfig,
21+
)
22+
23+
sgai = ScrapeGraphAI()
24+
25+
26+
class TestIntegration:
27+
def test_credits(self):
28+
res = sgai.credits()
29+
print("credits:", res)
30+
assert res.status == "success"
31+
assert "remaining" in res.data
32+
assert "plan" in res.data
33+
34+
def test_scrape_default_format(self):
35+
res = sgai.scrape(ScrapeRequest(url="https://example.com"))
36+
print("scrape default:", res.status, res.error)
37+
assert res.status == "success"
38+
assert res.data["results"].get("markdown") is not None
39+
40+
def test_scrape_single_format(self):
41+
res = sgai.scrape(ScrapeRequest(
42+
url="https://example.com",
43+
formats=[MarkdownFormatConfig()],
44+
))
45+
print("scrape single:", res.status, res.error)
46+
assert res.status == "success"
47+
assert res.data["results"].get("markdown") is not None
48+
49+
def test_scrape_multiple_formats(self):
50+
res = sgai.scrape(ScrapeRequest(
51+
url="https://example.com",
52+
formats=[
53+
MarkdownFormatConfig(mode="reader"),
54+
LinksFormatConfig(),
55+
ImagesFormatConfig(),
56+
],
57+
))
58+
print("scrape multi:", res.status, res.error)
59+
assert res.status == "success"
60+
assert res.data["results"].get("markdown") is not None
61+
assert res.data["results"].get("links") is not None
62+
63+
def test_scrape_pdf(self):
64+
res = sgai.scrape(ScrapeRequest(
65+
url="https://pdfobject.com/pdf/sample.pdf",
66+
content_type="application/pdf",
67+
formats=[MarkdownFormatConfig()],
68+
))
69+
print("scrape PDF:", res.status, res.error)
70+
assert res.status == "success"
71+
assert res.data["metadata"]["contentType"] == "application/pdf"
72+
73+
def test_scrape_with_fetch_config(self):
74+
res = sgai.scrape(ScrapeRequest(
75+
url="https://example.com",
76+
fetch_config=FetchConfig(mode="fast", timeout=15000),
77+
formats=[MarkdownFormatConfig()],
78+
))
79+
print("scrape fetchConfig:", res.status, res.error)
80+
assert res.status == "success"
81+
82+
def test_extract(self):
83+
res = sgai.extract(ExtractRequest(
84+
url="https://example.com",
85+
prompt="What is this page about?",
86+
))
87+
print("extract:", res.status, res.error)
88+
assert res.status == "success"
89+
90+
def test_search(self):
91+
res = sgai.search(SearchRequest(
92+
query="anthropic claude",
93+
num_results=2,
94+
))
95+
print("search:", res.status, res.error)
96+
assert res.status == "success"
97+
assert len(res.data["results"]) > 0
98+
99+
def test_history_list(self):
100+
res = sgai.history.list(HistoryFilter(limit=5))
101+
print("history.list:", res.status, res.data.get("pagination") if res.data else None)
102+
assert res.status == "success"
103+
104+
def test_crawl_start_and_get(self):
105+
start_res = sgai.crawl.start(CrawlRequest(
106+
url="https://example.com",
107+
max_pages=2,
108+
))
109+
print("crawl.start:", start_res.status, start_res.data.get("id") if start_res.data else None, start_res.error)
110+
111+
if start_res.status == "error" and ("Max" in (start_res.error or "") or "Rate" in (start_res.error or "")):
112+
pytest.skip("Rate limited")
113+
114+
assert start_res.status == "success"
115+
116+
if start_res.data and start_res.data.get("id"):
117+
get_res = sgai.crawl.get(start_res.data["id"])
118+
print("crawl.get:", get_res.status, get_res.data.get("status") if get_res.data else None)
119+
assert get_res.status == "success"

uv.lock

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)