From c71b86541b839024523ec9ffd1e9551010fd6172 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Sun, 3 May 2026 15:18:12 -0500 Subject: [PATCH 1/2] utils.query: surface unhandled 4xx/5xx and stop mutating the caller's payload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related correctness fixes to the shared HTTP wrapper used by every module in the package. * The function only formatted explicit messages for 400, 404, 414, and 500/502/503. Any other status (401, 403, 405, 408, 429, 501, 504, …) fell through and the response was returned as if it had succeeded — callers parsed an HTML error page as RDB or CSV. Add a ``raise_for_status()`` after the explicit branches so every non-success surfaces, while keeping the friendlier messages for the codes we already format. * The body did ``for key, value in payload.items(): payload[key] = to_str(...)``, mutating the caller's dict. List values came back replaced with their stringified joins, breaking any caller that re-used the dict. Build a fresh ``params`` dict in a comprehension and leave the input alone. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/utils.py | 16 +++++++--------- tests/utils_test.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py index 4aa76a61..36cd0d74 100644 --- a/dataretrieval/utils.py +++ b/dataretrieval/utils.py @@ -163,7 +163,7 @@ def query(url, payload, delimiter=",", ssl_check=True): url: string URL to query payload: dict - query parameters passed to ``requests.get`` + query parameters passed to ``requests.get``. Not mutated. delimiter: string delimiter to use with lists ssl_check: bool @@ -175,17 +175,11 @@ def query(url, payload, delimiter=",", ssl_check=True): string: query response The response from the API query ``requests.get`` function call. """ + params = {key: to_str(value, delimiter) for key, value in payload.items()} - for key, value in payload.items(): - payload[key] = to_str(value, delimiter) - # for index in range(len(payload)): - # key, value = payload[index] - # payload[index] = (key, to_str(value)) - - # define the user agent for the query user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"} - response = requests.get(url, params=payload, headers=user_agent, verify=ssl_check) + response = requests.get(url, params=params, headers=user_agent, verify=ssl_check) if response.status_code == 400: raise ValueError( @@ -218,6 +212,10 @@ def query(url, payload, delimiter=",", ssl_check=True): + f"The service at {response.url} may be down or experiencing issues." ) + # Catch-all for any other 4xx/5xx (401, 403, 405, 408, 429, 501, 504, ...) + # so callers don't silently receive an HTML error page as if it were data. + response.raise_for_status() + if response.text.startswith("No sites/data"): raise NoSitesError(response.url) diff --git a/tests/utils_test.py b/tests/utils_test.py index 4cb9b383..a4edc7c2 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -41,6 +41,37 @@ def test_header(self): assert response.status_code == 200 # GET was successful assert "user-agent" in response.request.headers + def test_does_not_mutate_caller_payload(self, requests_mock): + """`query` must not mutate the caller's payload dict. + + Regression: previously the function did + ``payload[key] = to_str(value, delimiter)`` in place, so list + values were overwritten with their stringified joins after the + call returned. + """ + url = "https://example.com/svc" + requests_mock.get(url, text="ok") + payload = {"sites": ["A", "B"], "stateCd": "MD"} + original = {k: v for k, v in payload.items()} + + utils.query(url, payload) + + assert payload == original + assert payload["sites"] is original["sites"] + + @pytest.mark.parametrize("status", [401, 403, 405, 408, 429, 501, 504]) + def test_unhandled_4xx_5xx_raises(self, requests_mock, status): + """`query` must surface every 4xx/5xx, not just the few it formats. + + Regression: codes outside {400, 404, 414, 500, 502, 503} used to + pass through silently — callers received the error body as if + it were data. + """ + url = "https://example.com/svc" + requests_mock.get(url, status_code=status, text="denied") + with pytest.raises(Exception): # noqa: B017 -- HTTPError or ValueError + utils.query(url, {"k": "v"}) + class Test_BaseMetadata: """Tests of BaseMetadata""" From 5b8a6614bf75c0e9cbe3496f94e559d0fd8548f1 Mon Sep 17 00:00:00 2001 From: thodson-usgs Date: Mon, 4 May 2026 10:10:48 -0500 Subject: [PATCH 2/2] Standardize exception type and document return for query() Per copilot review on PR #253: - Re-raise the catch-all 4xx/5xx as ValueError with status/reason/url for parity with the explicit 400/404/414/500/502/503 branches; callers no longer need to handle both ValueError and HTTPError from the same helper. - Update the Returns section to document the actual return type (requests.Response) and the ValueError raise contract. - Narrow the test from pytest.raises(Exception) to pytest.raises( ValueError) with a match on the status code to assert the contract. Co-Authored-By: Claude Opus 4.7 (1M context) --- dataretrieval/utils.py | 20 +++++++++++++++++--- tests/utils_test.py | 2 +- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py index 36cd0d74..ebc35537 100644 --- a/dataretrieval/utils.py +++ b/dataretrieval/utils.py @@ -172,8 +172,17 @@ def query(url, payload, delimiter=",", ssl_check=True): Returns ------- - string: query response - The response from the API query ``requests.get`` function call. + response : ``requests.Response`` + The response object from the underlying ``requests.get`` call. + + Raises + ------ + ValueError + For any non-success HTTP status (4xx/5xx). The message includes + the status code, reason, and URL. Specific guidance is included + for 400, 404, 414, and 500/502/503 statuses; any other 4xx/5xx + falls through to a generic ValueError so callers don't silently + receive an HTML error page as if it were data. """ params = {key: to_str(value, delimiter) for key, value in payload.items()} @@ -214,7 +223,12 @@ def query(url, payload, delimiter=",", ssl_check=True): # Catch-all for any other 4xx/5xx (401, 403, 405, 408, 429, 501, 504, ...) # so callers don't silently receive an HTML error page as if it were data. - response.raise_for_status() + # Re-raise as ValueError to keep the exception contract uniform with the + # explicit status branches above. + if not response.ok: + raise ValueError( + f"HTTP {response.status_code} {response.reason} for {response.url}" + ) if response.text.startswith("No sites/data"): raise NoSitesError(response.url) diff --git a/tests/utils_test.py b/tests/utils_test.py index a4edc7c2..28c610f7 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -69,7 +69,7 @@ def test_unhandled_4xx_5xx_raises(self, requests_mock, status): """ url = "https://example.com/svc" requests_mock.get(url, status_code=status, text="denied") - with pytest.raises(Exception): # noqa: B017 -- HTTPError or ValueError + with pytest.raises(ValueError, match=str(status)): utils.query(url, {"k": "v"})