Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 23 additions & 11 deletions dataretrieval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def query(url, payload, delimiter=",", ssl_check=True):
url: string
URL to query
payload: dict
query parameters passed to ``requests.get``
query parameters passed to ``requests.get``. Not mutated.
delimiter: string
Comment on lines 163 to 167
delimiter to use with lists
ssl_check: bool
Expand All @@ -172,20 +172,23 @@ def query(url, payload, delimiter=",", ssl_check=True):

Returns
-------
string: query response
The response from the API query ``requests.get`` function call.
response : ``requests.Response``
The response object from the underlying ``requests.get`` call.

Raises
------
ValueError
For any non-success HTTP status (4xx/5xx). The message includes
the status code, reason, and URL. Specific guidance is included
for 400, 404, 414, and 500/502/503 statuses; any other 4xx/5xx
falls through to a generic ValueError so callers don't silently
receive an HTML error page as if it were data.
"""
params = {key: to_str(value, delimiter) for key, value in payload.items()}

for key, value in payload.items():
payload[key] = to_str(value, delimiter)
# for index in range(len(payload)):
# key, value = payload[index]
# payload[index] = (key, to_str(value))

# define the user agent for the query
user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"}

response = requests.get(url, params=payload, headers=user_agent, verify=ssl_check)
response = requests.get(url, params=params, headers=user_agent, verify=ssl_check)

if response.status_code == 400:
raise ValueError(
Expand Down Expand Up @@ -218,6 +221,15 @@ def query(url, payload, delimiter=",", ssl_check=True):
+ f"The service at {response.url} may be down or experiencing issues."
)

# Catch-all for any other 4xx/5xx (401, 403, 405, 408, 429, 501, 504, ...)
# so callers don't silently receive an HTML error page as if it were data.
# Re-raise as ValueError to keep the exception contract uniform with the
# explicit status branches above.
if not response.ok:
raise ValueError(
f"HTTP {response.status_code} {response.reason} for {response.url}"
)

if response.text.startswith("No sites/data"):
raise NoSitesError(response.url)

Expand Down
31 changes: 31 additions & 0 deletions tests/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,37 @@ def test_header(self):
assert response.status_code == 200 # GET was successful
assert "user-agent" in response.request.headers

def test_does_not_mutate_caller_payload(self, requests_mock):
"""`query` must not mutate the caller's payload dict.

Regression: previously the function did
``payload[key] = to_str(value, delimiter)`` in place, so list
values were overwritten with their stringified joins after the
call returned.
"""
url = "https://example.com/svc"
requests_mock.get(url, text="ok")
payload = {"sites": ["A", "B"], "stateCd": "MD"}
original = {k: v for k, v in payload.items()}

utils.query(url, payload)

assert payload == original
assert payload["sites"] is original["sites"]

@pytest.mark.parametrize("status", [401, 403, 405, 408, 429, 501, 504])
def test_unhandled_4xx_5xx_raises(self, requests_mock, status):
"""`query` must surface every 4xx/5xx, not just the few it formats.

Regression: codes outside {400, 404, 414, 500, 502, 503} used to
pass through silently — callers received the error body as if
it were data.
"""
url = "https://example.com/svc"
requests_mock.get(url, status_code=status, text="<html>denied</html>")
with pytest.raises(ValueError, match=str(status)):
utils.query(url, {"k": "v"})


class Test_BaseMetadata:
"""Tests of BaseMetadata"""
Expand Down
Loading