Skip to content

Commit d358fa0

Browse files
committed
validate host value, sanetize user agent string
1 parent 74ef8ee commit d358fa0

File tree

5 files changed

+75
-5
lines changed

5 files changed

+75
-5
lines changed

opencage/geocoder.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,36 @@
1818
DEFAULT_DOMAIN = 'api.opencagedata.com'
1919

2020

21+
def _validate_domain(domain):
22+
"""Validate that the API domain is an allowed hostname.
23+
24+
Only subdomains of opencagedata.com, localhost, and 0.0.0.0 are
25+
permitted. An optional port suffix (e.g. ``localhost:8080``) is allowed.
26+
27+
Args:
28+
domain: Hostname string, optionally with a port.
29+
30+
Returns:
31+
The validated domain string.
32+
33+
Raises:
34+
ValueError: If the domain is not in the allow-list.
35+
"""
36+
# Strip optional port
37+
host = domain.rsplit(':', 1)[0] if ':' in domain else domain
38+
39+
if host in ('localhost', '0.0.0.0'):
40+
return domain
41+
42+
if host.endswith('.opencagedata.com'):
43+
return domain
44+
45+
raise ValueError(
46+
f"Invalid API domain '{domain}'. "
47+
f"Must be a subdomain of opencagedata.com, localhost, or 0.0.0.0."
48+
)
49+
50+
2151
def backoff_max_time():
2252
"""Return the maximum backoff time in seconds for retrying API requests.
2353
@@ -146,6 +176,7 @@ def __init__(
146176

147177
if protocol and protocol not in ('http', 'https'):
148178
protocol = 'https'
179+
_validate_domain(domain)
149180
self.url = protocol + '://' + domain + '/geocode/v1/json'
150181

151182
# https://docs.aiohttp.org/en/stable/client_advanced.html#ssl-control-for-tcp-sockets
@@ -353,7 +384,8 @@ def _opencage_headers(self, client):
353384

354385
comment = ''
355386
if self.user_agent_comment:
356-
comment = f" ({self.user_agent_comment})"
387+
clean = self.user_agent_comment.replace('\r', '').replace('\n', '')
388+
comment = f" ({clean})"
357389

358390
return {
359391
'User-Agent': f"opencage-python/{__version__} Python/{py_version} {client}/{client_version}{comment}"

test/fixtures/cli/.DS_Store

6 KB
Binary file not shown.

test/test_error_ssl.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@
1111

1212
@pytest.mark.asyncio
1313
async def test_sslerror():
14-
bad_domain = 'wrong.host.badssl.com'
15-
async with OpenCageGeocode('6d0e711d72d74daeb2b0bfd2a5cdfdba', domain=bad_domain) as geocoder:
14+
# Use a bad certificate (from badssl.com) against the real OpenCage domain
15+
# to trigger an SSL error without needing a non-allowed domain
16+
sslcontext = ssl.create_default_context(cafile='test/fixtures/badssl-com-chain.pem')
17+
18+
async with OpenCageGeocode('6d0e711d72d74daeb2b0bfd2a5cdfdba', sslcontext=sslcontext) as geocoder:
1619
with pytest.raises(SSLError) as excinfo:
1720
await geocoder.geocode_async("something")
1821
assert str(excinfo.value).startswith('SSL Certificate error')

test/test_geocoder_args.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,18 @@ def test_api_key_env_var():
2121

2222
def test_custom_domain():
2323
"""Test that custom domain can be set"""
24-
geocoder = OpenCageGeocode('abcde', domain='example.com')
25-
assert geocoder.url == 'https://example.com/geocode/v1/json'
24+
geocoder = OpenCageGeocode('abcde', domain='api2.opencagedata.com')
25+
assert geocoder.url == 'https://api2.opencagedata.com/geocode/v1/json'
26+
27+
28+
def test_custom_domain_localhost():
29+
"""Test that localhost domain can be set"""
30+
geocoder = OpenCageGeocode('abcde', domain='localhost:8080')
31+
assert geocoder.url == 'https://localhost:8080/geocode/v1/json'
32+
33+
34+
def test_custom_domain_invalid():
35+
"""Test that invalid domains are rejected"""
36+
import pytest
37+
with pytest.raises(ValueError, match="Invalid API domain"):
38+
OpenCageGeocode('abcde', domain='www.example.com')

test/test_headers.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,25 @@ def test_sync():
3333
user_agent = request.headers['User-Agent']
3434

3535
assert user_agent_format.match(user_agent) is not None
36+
37+
38+
@responses.activate
39+
def test_user_agent_comment_crlf_stripped():
40+
"""Test that CR/LF characters are stripped from user_agent_comment to prevent header injection."""
41+
geocoder_crlf = OpenCageGeocode('abcde', user_agent_comment="bad\r\nInjected-Header: value")
42+
43+
responses.add(
44+
responses.GET,
45+
geocoder_crlf.url,
46+
body=Path('test/fixtures/uk_postcode.json').read_text(encoding="utf-8"),
47+
status=200
48+
)
49+
50+
geocoder_crlf.geocode("EC1M 5RF")
51+
52+
request = responses.calls[-1].request
53+
user_agent = request.headers['User-Agent']
54+
55+
assert '\r' not in user_agent
56+
assert '\n' not in user_agent
57+
assert 'Injected-Header' in user_agent # still present, just on the same line

0 commit comments

Comments
 (0)