diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py index 7070da50..444c1a4b 100644 --- a/dataretrieval/waterdata/utils.py +++ b/dataretrieval/waterdata/utils.py @@ -892,11 +892,13 @@ def _handle_stats_nesting( # otherwise return a geodataframe if not geopd: df = pd.json_normalize(body["features"]).drop( - columns=["type", "properties.data"] + columns=["type", "properties.data"], errors="ignore" ) df.columns = df.columns.str.split(".").str[-1] else: - df = gpd.GeoDataFrame.from_features(body["features"]).drop(columns=["data"]) + df = gpd.GeoDataFrame.from_features(body["features"]).drop( + columns=["data"], errors="ignore" + ) # Unnest json features, properties, data, and values while retaining necessary # metadata to merge with main dataframe. diff --git a/tests/waterdata_utils_test.py b/tests/waterdata_utils_test.py index 36150be8..c533431d 100644 --- a/tests/waterdata_utils_test.py +++ b/tests/waterdata_utils_test.py @@ -4,6 +4,7 @@ from dataretrieval.waterdata.utils import ( _get_args, + _handle_stats_nesting, _walk_pages, ) @@ -80,3 +81,33 @@ def test_walk_pages_multiple_mocked(): assert mock_client.send.called assert mock_client.request.called assert mock_client.request.call_args[0][1] == "https://example.com/page2" + + +def test_handle_stats_nesting_tolerates_missing_drop_columns(): + """If the upstream stats response shape ever changes such that one of + the columns we try to drop ("type", "properties.data") is absent, the + function should still return a DataFrame instead of raising KeyError. + """ + body = { + "next": None, + "features": [ + { + "properties": { + "monitoring_location_id": "USGS-12345", + "data": [ + { + "parameter_code": "00060", + "unit_of_measure": "ft^3/s", + "parent_time_series_id": "ts-1", + "values": [{"statistic_id": "mean", "value": 10.0}], + } + ], + }, + } + ], + } + + df = _handle_stats_nesting(body, geopd=False) + + assert len(df) == 1 + assert df["monitoring_location_id"].iloc[0] == "USGS-12345"