diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py index 4aa76a61..ebc35537 100644 --- a/dataretrieval/utils.py +++ b/dataretrieval/utils.py @@ -163,7 +163,7 @@ def query(url, payload, delimiter=",", ssl_check=True): url: string URL to query payload: dict - query parameters passed to ``requests.get`` + query parameters passed to ``requests.get``. Not mutated. delimiter: string delimiter to use with lists ssl_check: bool @@ -172,20 +172,23 @@ def query(url, payload, delimiter=",", ssl_check=True): Returns ------- - string: query response - The response from the API query ``requests.get`` function call. + response : ``requests.Response`` + The response object from the underlying ``requests.get`` call. + + Raises + ------ + ValueError + For any non-success HTTP status (4xx/5xx). The message includes + the status code, reason, and URL. Specific guidance is included + for 400, 404, 414, and 500/502/503 statuses; any other 4xx/5xx + falls through to a generic ValueError so callers don't silently + receive an HTML error page as if it were data. """ + params = {key: to_str(value, delimiter) for key, value in payload.items()} - for key, value in payload.items(): - payload[key] = to_str(value, delimiter) - # for index in range(len(payload)): - # key, value = payload[index] - # payload[index] = (key, to_str(value)) - - # define the user agent for the query user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"} - response = requests.get(url, params=payload, headers=user_agent, verify=ssl_check) + response = requests.get(url, params=params, headers=user_agent, verify=ssl_check) if response.status_code == 400: raise ValueError( @@ -218,6 +221,15 @@ def query(url, payload, delimiter=",", ssl_check=True): + f"The service at {response.url} may be down or experiencing issues." ) + # Catch-all for any other 4xx/5xx (401, 403, 405, 408, 429, 501, 504, ...) + # so callers don't silently receive an HTML error page as if it were data. + # Re-raise as ValueError to keep the exception contract uniform with the + # explicit status branches above. + if not response.ok: + raise ValueError( + f"HTTP {response.status_code} {response.reason} for {response.url}" + ) + if response.text.startswith("No sites/data"): raise NoSitesError(response.url) diff --git a/tests/utils_test.py b/tests/utils_test.py index 4cb9b383..28c610f7 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -41,6 +41,37 @@ def test_header(self): assert response.status_code == 200 # GET was successful assert "user-agent" in response.request.headers + def test_does_not_mutate_caller_payload(self, requests_mock): + """`query` must not mutate the caller's payload dict. + + Regression: previously the function did + ``payload[key] = to_str(value, delimiter)`` in place, so list + values were overwritten with their stringified joins after the + call returned. + """ + url = "https://example.com/svc" + requests_mock.get(url, text="ok") + payload = {"sites": ["A", "B"], "stateCd": "MD"} + original = {k: v for k, v in payload.items()} + + utils.query(url, payload) + + assert payload == original + assert payload["sites"] is original["sites"] + + @pytest.mark.parametrize("status", [401, 403, 405, 408, 429, 501, 504]) + def test_unhandled_4xx_5xx_raises(self, requests_mock, status): + """`query` must surface every 4xx/5xx, not just the few it formats. + + Regression: codes outside {400, 404, 414, 500, 502, 503} used to + pass through silently — callers received the error body as if + it were data. + """ + url = "https://example.com/svc" + requests_mock.get(url, status_code=status, text="denied") + with pytest.raises(ValueError, match=str(status)): + utils.query(url, {"k": "v"}) + class Test_BaseMetadata: """Tests of BaseMetadata"""