Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 78 additions & 49 deletions dataretrieval/waterdata/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,54 @@ def _switch_properties_id(properties: list[str] | None, id_name: str, service: s
return [p for p in properties if p not in ["geometry", service_id]]


_DATETIME_FORMATS = (
"%Y-%m-%dT%H:%M:%S.%f%z",
"%Y-%m-%dT%H:%M:%S%z",
"%Y-%m-%dT%H:%M:%S.%f",
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%d %H:%M:%S.%f",
"%Y-%m-%d %H:%M:%S",
"%Y-%m-%d",
)

# Anchored to ``[Pp]\d`` so a normal word containing ``p`` (e.g. ``"Apr"``)
# doesn't get mis-classified as an ISO 8601 duration; the optional ``T``
# admits time-only forms like ``PT36H``.
_DURATION_RE = re.compile(r"^[Pp]T?\d")


def _parse_datetime(value: str) -> datetime | None:
"""Parse a single datetime string against the supported formats.

Returns a ``datetime`` (tz-aware iff the input carried a UTC offset),
or ``None`` if no format matched.
"""
# ``datetime.strptime`` accepts a numeric offset like ``+00:00`` but not
# the ``Z`` shorthand, so normalize trailing ``Z`` first.
candidate = value[:-1] + "+00:00" if value.endswith("Z") else value
for fmt in _DATETIME_FORMATS:
try:
return datetime.strptime(candidate, fmt)
except ValueError:
continue
return None


def _format_one(dt, *, date: bool, local_tz) -> str | None:
"""Format a single datetime element for inclusion in the API time arg."""
if pd.isna(dt) or dt == "" or dt is None:
return ".."
parsed = _parse_datetime(dt)
if parsed is None:
return None
if date:
return parsed.strftime("%Y-%m-%d")
aware = parsed if parsed.tzinfo is not None else parsed.replace(tzinfo=local_tz)
return aware.astimezone(ZoneInfo("UTC")).strftime("%Y-%m-%dT%H:%M:%SZ")


def _format_api_dates(
datetime_input: str | list[str], date: bool = False
datetime_input: str | list[str | None] | None, date: bool = False
) -> str | None:
"""
Formats date or datetime input(s) for use with an API.
Expand All @@ -139,10 +185,12 @@ def _format_api_dates(

Parameters
----------
datetime_input : Union[str, List[str]]
datetime_input : Union[str, List[Optional[str]], None]
A single date/datetime string or a list of one or two date/datetime
strings. Accepts formats like "%Y-%m-%d %H:%M:%S", ISO 8601, or relative
periods (e.g., "P7D").
strings. Accepts formats like "%Y-%m-%d %H:%M:%S", ISO 8601 (with or
without ``Z``/numeric offset), or relative periods (e.g., "P7D" /
"PT36H"). Range endpoints may be ``None``/``NaN``/empty to denote a
half-bounded range.
date : bool, optional
If True, uses only the date portion ("YYYY-MM-DD"). If False (default),
returns full datetime in UTC ISO 8601 format ("YYYY-MM-DDTHH:MM:SSZ").
Expand All @@ -164,12 +212,16 @@ def _format_api_dates(

Notes
-----
- Handles blank or NA values by returning None.
- Supports relative period strings (e.g., "P7D") and passes them through
unchanged.
- A single blank/NA value returns None. In a two-value range, a blank/NA
endpoint is rendered as ``".."`` to denote an open bound (e.g.
``"2024-01-01/.."``); the range is only None when *every* element is
blank/NA or any non-NA element fails to parse.
- Supports ISO 8601 durations such as "P7D" and "PT36H" and pre-formatted
intervals containing ``"/"``; both are passed through unchanged.
- Converts datetimes to UTC and formats as ISO 8601 with 'Z' suffix when
`date` is False.
- For date ranges, replaces "nan" with ".." in the output.
`date` is False. Inputs with an explicit offset (``Z`` or ``+HH:MM``) are
converted from that offset to UTC; naive inputs are interpreted in the
local time zone for backwards compatibility.
"""
# Get timezone
local_timezone = datetime.now().astimezone().tzinfo
Expand All @@ -182,48 +234,25 @@ def _format_api_dates(
if all(pd.isna(dt) or dt == "" or dt is None for dt in datetime_input):
return None

if len(datetime_input) <= 2:
# If the list is of length 1, first look for things like "P7D" or dates
# already formatted in ISO08601. Otherwise, try to coerce to datetime
if len(datetime_input) == 1 and (
re.search(r"P", datetime_input[0], re.IGNORECASE)
or "/" in datetime_input[0]
):
return datetime_input[0]
# Otherwise, use list comprehension to parse dates
else:
try:
# Parse to naive datetime
parsed_dates = [
datetime.strptime(dt, "%Y-%m-%d %H:%M:%S") # noqa: DTZ007
for dt in datetime_input
]
except ValueError:
# Parse to date only
try:
parsed_dates = [
datetime.strptime(dt, "%Y-%m-%d") # noqa: DTZ007
for dt in datetime_input
]
except ValueError:
return None
# If the service only accepts dates for this input, not
# datetimes (e.g. "daily"), return just the dates separated by a
# "/", otherwise, return the datetime in UTC format.
if date:
return "/".join(dt.strftime("%Y-%m-%d") for dt in parsed_dates)
else:
parsed_locals = [
dt.replace(tzinfo=local_timezone) for dt in parsed_dates
]
formatted = "/".join(
dt.astimezone(ZoneInfo("UTC")).strftime("%Y-%m-%dT%H:%M:%SZ")
for dt in parsed_locals
)
return formatted
else:
if len(datetime_input) > 2:
raise ValueError("datetime_input should only include 1-2 values")

# Pass through duration ("P7D", "PT36H") and pre-formatted interval ("a/b")
# strings untouched.
if len(datetime_input) == 1 and isinstance(datetime_input[0], str):
single = datetime_input[0]
if _DURATION_RE.match(single) or "/" in single:
return single

# Half-bounded ranges: NA endpoints render as ".."; any unparseable non-NA
# element invalidates the range.
formatted = [
_format_one(dt, date=date, local_tz=local_timezone) for dt in datetime_input
]
if any(f is None for f in formatted):
return None
return "/".join(formatted)


def _cql2_param(args: dict[str, Any]) -> str:
"""
Expand Down
64 changes: 64 additions & 0 deletions tests/waterdata_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import requests

from dataretrieval.waterdata.utils import (
_format_api_dates,
_get_args,
_handle_stats_nesting,
_walk_pages,
Expand Down Expand Up @@ -111,3 +112,66 @@ def test_handle_stats_nesting_tolerates_missing_drop_columns():

assert len(df) == 1
assert df["monitoring_location_id"].iloc[0] == "USGS-12345"


# --- _format_api_dates -------------------------------------------------------


def test_format_api_dates_iso8601_with_z():
"""ISO 8601 datetimes with a 'Z' suffix must be parsed, not dropped to None."""
assert _format_api_dates("2018-02-12T23:20:50Z") == "2018-02-12T23:20:50Z"


def test_format_api_dates_iso8601_with_fractional_seconds():
assert _format_api_dates("2018-02-12T23:20:50.123Z") == "2018-02-12T23:20:50Z"


def test_format_api_dates_iso8601_with_offset():
"""Numeric offsets must be converted to UTC."""
assert _format_api_dates("2018-02-12T19:20:50-04:00") == "2018-02-12T23:20:50Z"


def test_format_api_dates_iso8601_pair():
"""A list of two ISO 8601 datetimes must be parsed into a UTC interval."""
result = _format_api_dates(["2018-02-12T23:20:50Z", "2018-03-18T12:31:12Z"])
assert result == "2018-02-12T23:20:50Z/2018-03-18T12:31:12Z"


def test_format_api_dates_passthrough_interval():
assert _format_api_dates("2018-02-12T00:00:00Z/..") == "2018-02-12T00:00:00Z/.."


def test_format_api_dates_passthrough_duration():
assert _format_api_dates("P7D") == "P7D"


Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Already covered — see test_format_api_dates_passthrough_time_only_duration at tests/waterdata_utils_test.py:148, which asserts _format_api_dates("PT36H") == "PT36H".

def test_format_api_dates_passthrough_time_only_duration():
"""ISO 8601 time-only durations (PT...) are passed through unchanged."""
assert _format_api_dates("PT36H") == "PT36H"


def test_format_api_dates_word_with_p_is_not_a_duration():
"""Strings containing the letter 'p' must not be misclassified as durations."""
assert _format_api_dates("Apr") is None


def test_format_api_dates_date_only():
assert _format_api_dates("2024-01-01", date=True) == "2024-01-01"


def test_format_api_dates_date_only_pair():
assert (
_format_api_dates(["2024-01-01", "2024-02-01"], date=True)
== "2024-01-01/2024-02-01"
)


def test_format_api_dates_space_separated_still_works():
"""The legacy space-separated format must still parse."""
assert _format_api_dates("2024-01-01 00:00:00", date=True) == "2024-01-01"


def test_format_api_dates_open_ended_range_with_none():
"""A None / NaN endpoint becomes '..' in the output range."""
assert _format_api_dates(["2024-01-01", None], date=True) == "2024-01-01/.."
assert _format_api_dates([None, "2024-01-01"], date=True) == "../2024-01-01"
Loading