diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py index 24e1737e..ea40dc65 100644 --- a/dataretrieval/wqp.py +++ b/dataretrieval/wqp.py @@ -147,7 +147,7 @@ def get_results( response = query(url, kwargs, delimiter=";", ssl_check=ssl_check) df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_sites( @@ -202,7 +202,7 @@ def what_sites( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_organizations( @@ -253,7 +253,7 @@ def what_organizations( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_projects(ssl_check=True, legacy=True, **kwargs): @@ -300,7 +300,7 @@ def what_projects(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_activities( @@ -364,7 +364,7 @@ def what_activities( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_detection_limits( @@ -422,7 +422,7 @@ def what_detection_limits( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_habitat_metrics( @@ -473,7 +473,7 @@ def what_habitat_metrics( df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_project_weights(ssl_check=True, legacy=True, **kwargs): @@ -525,7 +525,7 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): @@ -577,7 +577,7 @@ def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) - return df, WQP_Metadata(response) + return df, WQP_Metadata(response, legacy=legacy, ssl_check=ssl_check, **kwargs) def wqp_url(service): @@ -615,17 +615,20 @@ class WQP_Metadata(BaseMetadata): ---------- url : str Response url - query_time : datetme.timedelta + query_time : datetime.timedelta Response elapsed time header : requests.structures.CaseInsensitiveDict Response headers - comments : None - Metadata comments. WQP does not return comments. - site_info : tuple[pd.DataFrame, NWIS_Metadata] | None - Site information if the query included `sites`, `site` or `site_no`. + comment : None + Metadata comment. WQP does not return comments. + site_info : tuple[pd.DataFrame, WQP_Metadata] | None + Site information if the query included a site filter (`siteid`, + `sites`, `site`, or `site_no`). """ - def __init__(self, response, **parameters) -> None: + def __init__( + self, response, legacy: bool = True, ssl_check: bool = True, **parameters + ) -> None: """Generates a standard set of metadata informed by the response with specific metadata for WQP data. @@ -633,9 +636,17 @@ def __init__(self, response, **parameters) -> None: ---------- response : Response Response object from requests module - + legacy : bool + Whether the originating request used the legacy WQX endpoint. + Forwarded to ``what_sites`` when ``site_info`` is accessed so the + resolved station metadata uses the same profile as the original + query. + ssl_check : bool + Whether the originating request verified SSL. Forwarded to + ``what_sites`` for consistency. parameters : dict - Unpacked dictionary of the parameters supplied in the request + Unpacked dictionary of the remaining parameters supplied in the + request. Returns ------- @@ -647,15 +658,21 @@ def __init__(self, response, **parameters) -> None: super().__init__(response) self._parameters = parameters - - @property - def site_info(self): - if "sites" in self._parameters: - return what_sites(sites=parameters["sites"]) - elif "site" in self._parameters: - return what_sites(sites=parameters["site"]) - elif "site_no" in self._parameters: - return what_sites(sites=parameters["site_no"]) + self._legacy = legacy + self._ssl_check = ssl_check + + @property + def site_info(self): + # Walk WQP-native key first, then legacy NWIS-style aliases. Whichever + # matched, pass the value as `siteid` -- that's what_sites' native arg. + for key in ("siteid", "sites", "site", "site_no"): + if key in self._parameters: + return what_sites( + siteid=self._parameters[key], + legacy=self._legacy, + ssl_check=self._ssl_check, + ) + return None def _check_kwargs(kwargs): diff --git a/tests/wqp_test.py b/tests/wqp_test.py index a337f7ec..7b3e6a7e 100644 --- a/tests/wqp_test.py +++ b/tests/wqp_test.py @@ -218,6 +218,69 @@ def test_check_kwargs(): kwargs = _check_kwargs(kwargs) +def test_wqp_metadata_site_info_property_resolves(requests_mock): + """`site_info` must be a real property bound to the class. + + Regression: previously `site_info` was defined as a closure inside + `__init__`, so `md.site_info` fell through to `BaseMetadata.site_info` + and raised `NotImplementedError`. Also verify the parameters are + threaded through from `get_results`. + """ + results_url = ( + "https://www.waterqualitydata.us/data/Result/Search?" + "siteid=WIDNR_WQX-10032762&mimeType=csv" + ) + sites_url = ( + "https://www.waterqualitydata.us/data/Station/Search?" + "siteid=WIDNR_WQX-10032762&mimeType=csv" + ) + mock_request(requests_mock, results_url, "tests/data/wqp_results.txt") + mock_request(requests_mock, sites_url, "tests/data/wqp_sites.txt") + + _df, md = get_results(siteid="WIDNR_WQX-10032762") + + # site_info must be a bound property — accessing it should return the + # what_sites() tuple, not raise NotImplementedError. + site_df, site_md = md.site_info + assert isinstance(site_df, DataFrame) + assert site_md.url == sites_url + + +def test_wqp_metadata_site_info_returns_none_without_site_filter(requests_mock): + """`site_info` returns None when no site filter was supplied.""" + results_url = ( + "https://www.waterqualitydata.us/data/Result/Search?" + "characteristicName=Chloride&mimeType=csv" + ) + mock_request(requests_mock, results_url, "tests/data/wqp_results.txt") + _df, md = get_results(characteristicName="Chloride") + assert md.site_info is None + + +def test_wqp_metadata_site_info_uses_wqx3_when_originating_query_was_wqx3( + requests_mock, +): + """`site_info` must use the same legacy/WQX3.0 profile as the originating query. + + Regression: previously `site_info` always called `what_sites()` with default + `legacy=True`, so a WQX3.0 results query produced a legacy Station lookup. + """ + results_url = ( + "https://www.waterqualitydata.us/wqx3/Result/search?" + "siteid=UTAHDWQ_WQX-4993795&mimeType=csv&dataProfile=fullPhysChem" + ) + sites_wqx3_url = ( + "https://www.waterqualitydata.us/wqx3/Station/search?" + "siteid=UTAHDWQ_WQX-4993795&mimeType=csv" + ) + mock_request(requests_mock, results_url, "tests/data/wqp3_results.txt") + mock_request(requests_mock, sites_wqx3_url, "tests/data/wqp_sites.txt") + + _df, md = get_results(legacy=False, siteid="UTAHDWQ_WQX-4993795") + _site_df, site_md = md.site_info + assert site_md.url == sites_wqx3_url + + def test_get_results_wqx3_preserves_user_dataProfile(requests_mock): """A valid user-supplied WQX3.0 profile must not be overwritten.