From 8aae7b41532c0fa3250bbd8b3aafac5b5d56dd88 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Thu, 7 May 2026 07:03:50 -0500
Subject: [PATCH 01/12] Parse Date/Time/TimeZone triplets in samples and WQP
 responses

Add a shared utils.attach_datetime_columns helper that scans a CSV-derived
DataFrame for <prefix>Date / <prefix>Time / <prefix>TimeZone triplets and
appends a derived <prefix>DateTime UTC column for each one, leaving the
original triplet columns intact. Recognizes both the WQX3 / Samples
naming (Activity_StartDate, Activity_StartTime, Activity_StartTimeZone)
and the legacy WQP naming (ActivityStartDate, ActivityStartTime/Time,
ActivityStartTime/TimeZoneCode). Mirrors R dataRetrieval's create_dateTime.

Wired into waterdata.get_samples and wqp.get_results. Closes #266.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 NEWS.md                        |   2 +
 dataretrieval/utils.py         |  92 ++++++++++++++++++++++++++++
 dataretrieval/waterdata/api.py |  11 +++-
 dataretrieval/wqp.py           |  10 ++-
 tests/utils_test.py            | 107 +++++++++++++++++++++++++++++++++
 tests/waterdata_test.py        |  15 ++++-
 tests/wqp_test.py              |  12 +++-
 7 files changed, 241 insertions(+), 8 deletions(-)
diff --git a/NEWS.md b/NEWS.md
index 2faaeb42..8ddd3282 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,5 @@
+**05/07/2026:** `waterdata.get_samples()` and `wqp.get_results()` now append a derived `<prefix>DateTime` UTC column for every Date/Time/TimeZone triplet in the response (e.g. `Activity_StartDate` + `Activity_StartTime` + `Activity_StartTimeZone` → `Activity_StartDateTime`). Both the WQX3 (`<X>Date`/`<X>Time`/`<X>TimeZone`) and legacy WQP (`<X>Date`/`<X>Time/Time`/`<X>Time/TimeZoneCode`) shapes are recognized; abbreviations like EST/EDT/CST/PST resolve to a UTC `Timestamp`, unknown codes resolve to `NaT`, and the original triplet columns are preserved. Mirrors R's `create_dateTime` behavior. Closes #266.
+
 **05/06/2026:** Each remaining active function in `dataretrieval.nwis` now emits a per-function `DeprecationWarning` naming the `waterdata` replacement to migrate to (visible the first time users call each getter). The `nwis` module is scheduled for removal on or after **2027-05-06**.
 
 **05/06/2026:** Added `waterdata.get_ratings(...)` — wraps the new Water Data STAC catalog (`api.waterdata.usgs.gov/stac/v0/search`) for USGS stage-discharge rating curves. Returns parsed `exsa` / `base` / `corr` rating tables as a dict of DataFrames keyed by feature ID, or just the list of available STAC features when `download_and_parse=False`. Mirrors R's `read_waterdata_ratings`.
diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index 4aa76a61..d8827bf4 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -94,6 +94,98 @@ def format_datetime(df, date_field, time_field, tz_field):
     return df
 
 
+# Triplet patterns we recognize in WQP and Samples CSV responses. Each entry
+# defines how to derive the time/timezone column names from a date column, and
+# the suffix to strip when forming the new <prefix>DateTime column name.
+_DATETIME_TRIPLET_PATTERNS = (
+    # WQX3 / Samples: Activity_StartDate, Activity_StartTime, Activity_StartTimeZone
+    {
+        "date_suffix": "Date",
+        "time_from_date": lambda d: d[: -len("Date")] + "Time",
+        "tz_from_date": lambda d: d[: -len("Date")] + "TimeZone",
+    },
+    # Legacy WQP: <X>Date, <X>Time/Time, <X>Time/TimeZoneCode
+    {
+        "date_suffix": "Date",
+        "time_from_date": lambda d: d[: -len("Date")] + "Time/Time",
+        "tz_from_date": lambda d: d[: -len("Date")] + "Time/TimeZoneCode",
+    },
+)
+
+
+def _build_utc_datetime(date_series, time_series, tz_series):
+    """Combine date + time + tz-abbreviation columns into a UTC pandas Series.
+
+    Unknown timezone codes (and rows missing any of the three values) yield
+    ``NaT``. The input columns are not mutated.
+    """
+    offsets = tz_series.map(tz)
+    combined = (
+        date_series.astype("string")
+        + " "
+        + time_series.astype("string")
+        + " "
+        + offsets.astype("string")
+    )
+    # Rows where any input is missing produce a string containing "<NA>"; mark
+    # those so pd.to_datetime returns NaT rather than guessing.
+    invalid = (
+        date_series.isna() | time_series.isna() | tz_series.isna() | offsets.isna()
+    )
+    combined = combined.mask(invalid)
+    return pd.to_datetime(combined, format="mixed", utc=True, errors="coerce")
+
+
+def attach_datetime_columns(df):
+    """Add ``<prefix>DateTime`` UTC columns for any Date/Time/TimeZone triplets.
+
+    Detects two naming patterns that appear in USGS Samples and Water Quality
+    Portal CSV responses:
+
+    * **WQX3** — ``<prefix>Date``, ``<prefix>Time``, ``<prefix>TimeZone``
+    * **Legacy WQP** — ``<prefix>Date``, ``<prefix>Time/Time``,
+      ``<prefix>Time/TimeZoneCode``
+
+    For every triplet present, a new ``<prefix>DateTime`` column is appended
+    holding a UTC ``Timestamp`` (offsets resolved via
+    :data:`dataretrieval.codes.tz`). The original Date/Time/TimeZone columns
+    are left intact, and an existing ``<prefix>DateTime`` column is never
+    overwritten.
+
+    Parameters
+    ----------
+    df : ``pandas.DataFrame``
+        DataFrame returned from a Samples or WQP CSV endpoint.
+
+    Returns
+    -------
+    df : ``pandas.DataFrame``
+        A DataFrame with any derivable ``<prefix>DateTime`` columns appended.
+        Callers should use the returned value (the helper may concatenate
+        rather than mutate in place).
+    """
+    columns = set(df.columns)
+    new_columns = {}
+    for col in df.columns:
+        if not col.endswith("Date"):
+            continue
+        for pattern in _DATETIME_TRIPLET_PATTERNS:
+            time_col = pattern["time_from_date"](col)
+            tz_col = pattern["tz_from_date"](col)
+            if time_col not in columns or tz_col not in columns:
+                continue
+            target = col[: -len("Date")] + "DateTime"
+            if target in columns or target in new_columns:
+                break
+            new_columns[target] = _build_utc_datetime(df[col], df[time_col], df[tz_col])
+            break
+    if not new_columns:
+        return df
+    # Concat in one shot — appending columns one-by-one to a wide CSV-derived
+    # frame triggers pandas' fragmentation PerformanceWarning.
+    return pd.concat([df, pd.DataFrame(new_columns, index=df.index)], axis=1)
+
+
 class BaseMetadata:
     """Base class for metadata.
 
diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py
index 886a989c..60ce70d1 100644
--- a/dataretrieval/waterdata/api.py
+++ b/dataretrieval/waterdata/api.py
@@ -16,7 +16,7 @@
 import requests
 from requests.models import PreparedRequest
 
-from dataretrieval.utils import BaseMetadata, to_str
+from dataretrieval.utils import BaseMetadata, attach_datetime_columns, to_str
 from dataretrieval.waterdata.filters import FILTER_LANG
 from dataretrieval.waterdata.types import (
     CODE_SERVICES,
@@ -2266,7 +2266,13 @@ def get_samples(
     Returns
     -------
     df : ``pandas.DataFrame``
-        Formatted data returned from the API query.
+        Formatted data returned from the API query. For each
+        ``<prefix>Date`` / ``<prefix>Time`` / ``<prefix>TimeZone`` triplet in
+        the response (e.g. ``Activity_StartDate``, ``Activity_StartTime``,
+        ``Activity_StartTimeZone``), an additional ``<prefix>DateTime`` column
+        is appended holding a UTC ``Timestamp`` derived from the three. The
+        original Date/Time/TimeZone columns are left intact; rows whose
+        timezone abbreviation is not recognized resolve to ``NaT``.
     md : :obj:`dataretrieval.utils.Metadata`
         Custom ``dataretrieval`` metadata object pertaining to the query.
 
@@ -2323,6 +2329,7 @@ def get_samples(
     response.raise_for_status()
 
     df = pd.read_csv(StringIO(response.text), delimiter=",")
+    df = attach_datetime_columns(df)
 
     return df, BaseMetadata(response)
 
diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py
index 24e1737e..6df145e6 100644
--- a/dataretrieval/wqp.py
+++ b/dataretrieval/wqp.py
@@ -17,7 +17,7 @@
 
 import pandas as pd
 
-from .utils import BaseMetadata, query
+from .utils import BaseMetadata, attach_datetime_columns, query
 
 if TYPE_CHECKING:
     from pandas import DataFrame
@@ -101,7 +101,12 @@ def get_results(
     Returns
     -------
     df : ``pandas.DataFrame``
-        Formatted data returned from the API query.
+        Formatted data returned from the API query. For each
+        ``<prefix>Date`` / ``<prefix>Time`` / ``<prefix>TimeZone`` triplet in
+        the response (legacy WQP uses ``<prefix>Time/Time`` and
+        ``<prefix>Time/TimeZoneCode``), an additional ``<prefix>DateTime``
+        column is appended holding a UTC ``Timestamp``. Original triplet
+        columns are preserved; unrecognized timezone codes yield ``NaT``.
     md : :obj:`dataretrieval.utils.Metadata`
         Custom ``dataretrieval`` metadata object pertaining to the query.
 
@@ -147,6 +152,7 @@ def get_results(
     response = query(url, kwargs, delimiter=";", ssl_check=ssl_check)
 
     df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
+    df = attach_datetime_columns(df)
     return df, WQP_Metadata(response)
 
 
diff --git a/tests/utils_test.py b/tests/utils_test.py
index 4cb9b383..7a15af60 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -97,3 +97,110 @@ def test_to_str_custom_delimiter(self):
 
     def test_to_str_non_iterable(self):
         assert utils.to_str(123) is None
+
+
+class Test_attach_datetime_columns:
+    """Tests of attach_datetime_columns, which derives <prefix>DateTime UTC
+    columns from Date/Time/TimeZone triplets in Samples and WQP CSVs."""
+
+    def test_wqx3_triplet_resolves_to_utc(self):
+        """The Samples / WQX3 pattern (Activity_Start*) is detected and the
+        resulting DateTime is converted to UTC."""
+        df = pd.DataFrame(
+            {
+                "Activity_StartDate": ["2024-01-09", "2024-02-15"],
+                "Activity_StartTime": ["10:00:00", "14:30:00"],
+                "Activity_StartTimeZone": ["PST", "EST"],
+            }
+        )
+        df = utils.attach_datetime_columns(df)
+        assert "Activity_StartDateTime" in df.columns
+        # PST is UTC-8 → 10:00 PST is 18:00 UTC
+        assert df["Activity_StartDateTime"][0] == pd.Timestamp(
+            "2024-01-09 18:00:00", tz="UTC"
+        )
+        # EST is UTC-5 → 14:30 EST is 19:30 UTC
+        assert df["Activity_StartDateTime"][1] == pd.Timestamp(
+            "2024-02-15 19:30:00", tz="UTC"
+        )
+        # Original columns are preserved
+        assert df["Activity_StartTimeZone"].tolist() == ["PST", "EST"]
+
+    def test_legacy_wqp_triplet_resolves_to_utc(self):
+        """The legacy WQP pattern (slash-separated time/tz columns) is also
+        detected."""
+        df = pd.DataFrame(
+            {
+                "ActivityStartDate": ["2024-01-09"],
+                "ActivityStartTime/Time": ["10:00:00"],
+                "ActivityStartTime/TimeZoneCode": ["PST"],
+            }
+        )
+        df = utils.attach_datetime_columns(df)
+        assert "ActivityStartDateTime" in df.columns
+        assert df["ActivityStartDateTime"][0] == pd.Timestamp(
+            "2024-01-09 18:00:00", tz="UTC"
+        )
+
+    def test_unknown_timezone_is_NaT(self):
+        """Unknown timezone codes resolve to NaT rather than raising."""
+        df = pd.DataFrame(
+            {
+                "Activity_StartDate": ["2024-01-09"],
+                "Activity_StartTime": ["10:00:00"],
+                "Activity_StartTimeZone": ["BOGUS"],
+            }
+        )
+        df = utils.attach_datetime_columns(df)
+        assert df["Activity_StartDateTime"].isna().all()
+
+    def test_missing_time_or_tz_is_NaT(self):
+        """Rows with a missing time or tz produce NaT but don't poison others."""
+        df = pd.DataFrame(
+            {
+                "Activity_StartDate": ["2024-01-09", "2024-02-15"],
+                "Activity_StartTime": ["10:00:00", None],
+                "Activity_StartTimeZone": ["PST", "EST"],
+            }
+        )
+        df = utils.attach_datetime_columns(df)
+        assert df["Activity_StartDateTime"][0] == pd.Timestamp(
+            "2024-01-09 18:00:00", tz="UTC"
+        )
+        assert pd.isna(df["Activity_StartDateTime"][1])
+
+    def test_existing_datetime_column_not_overwritten(self):
+        """An existing <prefix>DateTime column is left alone."""
+        df = pd.DataFrame(
+            {
+                "Activity_StartDate": ["2024-01-09"],
+                "Activity_StartTime": ["10:00:00"],
+                "Activity_StartTimeZone": ["PST"],
+                "Activity_StartDateTime": ["preexisting"],
+            }
+        )
+        df = utils.attach_datetime_columns(df)
+        assert df["Activity_StartDateTime"].tolist() == ["preexisting"]
+
+    def test_multiple_triplets_handled(self):
+        """All Date/Time/TimeZone triplets in the frame get DateTime columns."""
+        df = pd.DataFrame(
+            {
+                "Activity_StartDate": ["2024-01-09"],
+                "Activity_StartTime": ["10:00:00"],
+                "Activity_StartTimeZone": ["PST"],
+                "LabInfo_AnalysisStartDate": ["2024-01-10"],
+                "LabInfo_AnalysisStartTime": ["09:00:00"],
+                "LabInfo_AnalysisStartTimeZone": ["EST"],
+            }
+        )
+        df = utils.attach_datetime_columns(df)
+        assert "Activity_StartDateTime" in df.columns
+        assert "LabInfo_AnalysisStartDateTime" in df.columns
+
+    def test_lone_date_column_left_alone(self):
+        """A Date column without matching Time/TimeZone columns is ignored."""
+        df = pd.DataFrame({"LastChangeDate": ["2024-01-09"]})
+        df = utils.attach_datetime_columns(df)
+        assert "LastChangeDateTime" not in df.columns
+        assert list(df.columns) == ["LastChangeDate"]
diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py
index 1edf012e..4ddb3155 100644
--- a/tests/waterdata_test.py
+++ b/tests/waterdata_test.py
@@ -1,6 +1,7 @@
 import datetime
 import sys
 
+import pandas as pd
 import pytest
 from pandas import DataFrame
 
@@ -54,11 +55,20 @@ def test_mock_get_samples(requests_mock):
         monitoringLocationIdentifier="USGS-05406500",
     )
     assert type(df) is DataFrame
-    assert df.size == 12127
+    # 67 rows × 181 source columns + 6 derived <prefix>DateTime columns
+    assert df.shape == (67, 187)
     assert md.url == request_url
     assert isinstance(md.query_time, datetime.timedelta)
     assert md.header == {"mock_header": "value"}
     assert md.comment is None
+    # The Activity start triplet is parsed into a UTC Timestamp column.
+    assert "Activity_StartDateTime" in df.columns
+    # Row 0 is "2023-08-22 08:50:00 CDT" → 13:50 UTC.
+    assert df["Activity_StartDateTime"].iloc[0] == pd.Timestamp(
+        "2023-08-22 13:50:00", tz="UTC"
+    )
+    # Original triplet columns are preserved.
+    assert df["Activity_StartTimeZone"].iloc[0] == "CDT"
 
 
 def test_mock_get_samples_summary(requests_mock):
@@ -127,7 +137,8 @@ def test_samples_activity():
         monitoringLocationIdentifier="USGS-06719505",
     )
     assert len(df) > 0
-    assert len(df.columns) == 95
+    # 95 columns from the API plus 2 derived <prefix>DateTime columns.
+    assert len(df.columns) == 97
     assert "Location_HUCTwelveDigitCode" in df.columns
 
 
diff --git a/tests/wqp_test.py b/tests/wqp_test.py
index a337f7ec..cbd772ff 100644
--- a/tests/wqp_test.py
+++ b/tests/wqp_test.py
@@ -33,11 +33,15 @@ def test_get_results(requests_mock):
         startDateHi="09-30-2011",
     )
     assert type(df) is DataFrame
-    assert df.size == 315
+    # 5 rows × 63 source columns + 2 derived <prefix>DateTime columns
+    assert df.shape == (5, 65)
     assert md.url == request_url
     assert isinstance(md.query_time, datetime.timedelta)
     assert md.header == {"mock_header": "value"}
     assert md.comment is None
+    # Legacy WQP triplets (slash-separated) are parsed into UTC.
+    assert "ActivityStartDateTime" in df.columns
+    assert df["ActivityStartDateTime"].notna().all()
 
 
 def test_get_results_WQX3(requests_mock):
@@ -58,11 +62,15 @@ def test_get_results_WQX3(requests_mock):
         startDateHi="09-30-2011",
     )
     assert type(df) is DataFrame
-    assert df.size == 900
+    # 5 rows × 180 source columns + 6 derived <prefix>DateTime columns
+    assert df.shape == (5, 186)
     assert md.url == request_url
     assert isinstance(md.query_time, datetime.timedelta)
     assert md.header == {"mock_header": "value"}
     assert md.comment is None
+    # WQX3 WQP triplets are parsed into UTC.
+    assert "Activity_StartDateTime" in df.columns
+    assert df["Activity_StartDateTime"].notna().all()
 
 
 def test_what_sites(requests_mock):

From 68eb573fd6cbe8af86c774e337637ab074cf2352 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Thu, 7 May 2026 07:14:29 -0500
Subject: [PATCH 02/12] Address /simplify findings on attach_datetime_columns

- Replace the lambda-laden _DATETIME_TRIPLET_PATTERNS dict with a flat
  _TIME_TZ_SUFFIXES tuple of (time_suffix, tz_suffix) pairs; the unused
  date_suffix field is gone.
- Use str.removesuffix("Date") for the prefix swap and resolve the target
  column name once before iterating patterns, hoisting the existence check
  out of the inner loop.
- Drop the redundant <NA>-mask in _build_utc_datetime; errors="coerce"
  already turns rows with missing inputs into NaT.
- Switch pd.to_datetime from format="mixed" to a fixed
  "%Y-%m-%d %H:%M:%S %z" so pandas doesn't probe formats per row.
- Trim WHAT-comments and per-test docstrings so the new tests match the
  noise level of the surrounding Test_to_str class.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/utils.py  | 57 +++++++++++++++--------------------------
 tests/utils_test.py     | 15 -----------
 tests/waterdata_test.py |  7 +----
 tests/wqp_test.py       |  6 -----
 4 files changed, 21 insertions(+), 64 deletions(-)

diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index d8827bf4..4be188a1 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -94,23 +94,9 @@ def format_datetime(df, date_field, time_field, tz_field):
     return df
 
 
-# Triplet patterns we recognize in WQP and Samples CSV responses. Each entry
-# defines how to derive the time/timezone column names from a date column, and
-# the suffix to strip when forming the new <prefix>DateTime column name.
-_DATETIME_TRIPLET_PATTERNS = (
-    # WQX3 / Samples: Activity_StartDate, Activity_StartTime, Activity_StartTimeZone
-    {
-        "date_suffix": "Date",
-        "time_from_date": lambda d: d[: -len("Date")] + "Time",
-        "tz_from_date": lambda d: d[: -len("Date")] + "TimeZone",
-    },
-    # Legacy WQP: <X>Date, <X>Time/Time, <X>Time/TimeZoneCode
-    {
-        "date_suffix": "Date",
-        "time_from_date": lambda d: d[: -len("Date")] + "Time/Time",
-        "tz_from_date": lambda d: d[: -len("Date")] + "Time/TimeZoneCode",
-    },
-)
+# (time-suffix, tz-suffix) pairs that follow a "<prefix>Date" column.
+# First entry is WQX3 / Samples, second is legacy WQP (slash-separated).
+_TIME_TZ_SUFFIXES = (("Time", "TimeZone"), ("Time/Time", "Time/TimeZoneCode"))
 
 
 def _build_utc_datetime(date_series, time_series, tz_series):
@@ -127,13 +113,9 @@ def _build_utc_datetime(date_series, time_series, tz_series):
         + " "
         + offsets.astype("string")
     )
-    # Rows where any input is missing produce a string containing "<NA>"; mark
-    # those so pd.to_datetime returns NaT rather than guessing.
-    invalid = (
-        date_series.isna() | time_series.isna() | tz_series.isna() | offsets.isna()
+    return pd.to_datetime(
+        combined, format="%Y-%m-%d %H:%M:%S %z", utc=True, errors="coerce"
     )
-    combined = combined.mask(invalid)
-    return pd.to_datetime(combined, format="mixed", utc=True, errors="coerce")
 
 
 def attach_datetime_columns(df):
@@ -160,29 +142,30 @@ def attach_datetime_columns(df):
     Returns
     -------
     df : ``pandas.DataFrame``
-        A DataFrame with any derivable ``<prefix>DateTime`` columns appended.
-        Callers should use the returned value (the helper may concatenate
-        rather than mutate in place).
+        A new DataFrame with any derivable ``<prefix>DateTime`` columns
+        appended (or the original frame if no triplets were found).
     """
     columns = set(df.columns)
     new_columns = {}
     for col in df.columns:
         if not col.endswith("Date"):
             continue
-        for pattern in _DATETIME_TRIPLET_PATTERNS:
-            time_col = pattern["time_from_date"](col)
-            tz_col = pattern["tz_from_date"](col)
-            if time_col not in columns or tz_col not in columns:
-                continue
-            target = col[: -len("Date")] + "DateTime"
-            if target in columns or target in new_columns:
+        prefix = col.removesuffix("Date")
+        target = prefix + "DateTime"
+        if target in columns or target in new_columns:
+            continue
+        for time_suffix, tz_suffix in _TIME_TZ_SUFFIXES:
+            time_col = prefix + time_suffix
+            tz_col = prefix + tz_suffix
+            if time_col in columns and tz_col in columns:
+                new_columns[target] = _build_utc_datetime(
+                    df[col], df[time_col], df[tz_col]
+                )
                 break
-            new_columns[target] = _build_utc_datetime(df[col], df[time_col], df[tz_col])
-            break
     if not new_columns:
         return df
-    # Concat in one shot — appending columns one-by-one to a wide CSV-derived
-    # frame triggers pandas' fragmentation PerformanceWarning.
+    # Concat in one shot — per-column assignment on a wide CSV-derived frame
+    # triggers pandas' fragmentation PerformanceWarning.
     return pd.concat([df, pd.DataFrame(new_columns, index=df.index)], axis=1)
 
 
diff --git a/tests/utils_test.py b/tests/utils_test.py
index 7a15af60..8f52acd9 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -104,8 +104,6 @@ class Test_attach_datetime_columns:
     columns from Date/Time/TimeZone triplets in Samples and WQP CSVs."""
 
     def test_wqx3_triplet_resolves_to_utc(self):
-        """The Samples / WQX3 pattern (Activity_Start*) is detected and the
-        resulting DateTime is converted to UTC."""
         df = pd.DataFrame(
             {
                 "Activity_StartDate": ["2024-01-09", "2024-02-15"],
@@ -114,21 +112,15 @@ def test_wqx3_triplet_resolves_to_utc(self):
             }
         )
         df = utils.attach_datetime_columns(df)
-        assert "Activity_StartDateTime" in df.columns
-        # PST is UTC-8 → 10:00 PST is 18:00 UTC
         assert df["Activity_StartDateTime"][0] == pd.Timestamp(
             "2024-01-09 18:00:00", tz="UTC"
         )
-        # EST is UTC-5 → 14:30 EST is 19:30 UTC
         assert df["Activity_StartDateTime"][1] == pd.Timestamp(
             "2024-02-15 19:30:00", tz="UTC"
         )
-        # Original columns are preserved
         assert df["Activity_StartTimeZone"].tolist() == ["PST", "EST"]
 
     def test_legacy_wqp_triplet_resolves_to_utc(self):
-        """The legacy WQP pattern (slash-separated time/tz columns) is also
-        detected."""
         df = pd.DataFrame(
             {
                 "ActivityStartDate": ["2024-01-09"],
@@ -137,13 +129,11 @@ def test_legacy_wqp_triplet_resolves_to_utc(self):
             }
         )
         df = utils.attach_datetime_columns(df)
-        assert "ActivityStartDateTime" in df.columns
         assert df["ActivityStartDateTime"][0] == pd.Timestamp(
             "2024-01-09 18:00:00", tz="UTC"
         )
 
     def test_unknown_timezone_is_NaT(self):
-        """Unknown timezone codes resolve to NaT rather than raising."""
         df = pd.DataFrame(
             {
                 "Activity_StartDate": ["2024-01-09"],
@@ -155,7 +145,6 @@ def test_unknown_timezone_is_NaT(self):
         assert df["Activity_StartDateTime"].isna().all()
 
     def test_missing_time_or_tz_is_NaT(self):
-        """Rows with a missing time or tz produce NaT but don't poison others."""
         df = pd.DataFrame(
             {
                 "Activity_StartDate": ["2024-01-09", "2024-02-15"],
@@ -170,7 +159,6 @@ def test_missing_time_or_tz_is_NaT(self):
         assert pd.isna(df["Activity_StartDateTime"][1])
 
     def test_existing_datetime_column_not_overwritten(self):
-        """An existing <prefix>DateTime column is left alone."""
         df = pd.DataFrame(
             {
                 "Activity_StartDate": ["2024-01-09"],
@@ -183,7 +171,6 @@ def test_existing_datetime_column_not_overwritten(self):
         assert df["Activity_StartDateTime"].tolist() == ["preexisting"]
 
     def test_multiple_triplets_handled(self):
-        """All Date/Time/TimeZone triplets in the frame get DateTime columns."""
         df = pd.DataFrame(
             {
                 "Activity_StartDate": ["2024-01-09"],
@@ -199,8 +186,6 @@ def test_multiple_triplets_handled(self):
         assert "LabInfo_AnalysisStartDateTime" in df.columns
 
     def test_lone_date_column_left_alone(self):
-        """A Date column without matching Time/TimeZone columns is ignored."""
         df = pd.DataFrame({"LastChangeDate": ["2024-01-09"]})
         df = utils.attach_datetime_columns(df)
-        assert "LastChangeDateTime" not in df.columns
         assert list(df.columns) == ["LastChangeDate"]
diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py
index 4ddb3155..493c73ff 100644
--- a/tests/waterdata_test.py
+++ b/tests/waterdata_test.py
@@ -55,19 +55,15 @@ def test_mock_get_samples(requests_mock):
         monitoringLocationIdentifier="USGS-05406500",
     )
     assert type(df) is DataFrame
-    # 67 rows × 181 source columns + 6 derived <prefix>DateTime columns
     assert df.shape == (67, 187)
     assert md.url == request_url
     assert isinstance(md.query_time, datetime.timedelta)
     assert md.header == {"mock_header": "value"}
     assert md.comment is None
-    # The Activity start triplet is parsed into a UTC Timestamp column.
-    assert "Activity_StartDateTime" in df.columns
-    # Row 0 is "2023-08-22 08:50:00 CDT" → 13:50 UTC.
+    # Row 0 of the fixture is "2023-08-22 08:50:00 CDT" → 13:50 UTC.
     assert df["Activity_StartDateTime"].iloc[0] == pd.Timestamp(
         "2023-08-22 13:50:00", tz="UTC"
     )
-    # Original triplet columns are preserved.
     assert df["Activity_StartTimeZone"].iloc[0] == "CDT"
 
 
@@ -137,7 +133,6 @@ def test_samples_activity():
         monitoringLocationIdentifier="USGS-06719505",
     )
     assert len(df) > 0
-    # 95 columns from the API plus 2 derived <prefix>DateTime columns.
     assert len(df.columns) == 97
     assert "Location_HUCTwelveDigitCode" in df.columns
 
diff --git a/tests/wqp_test.py b/tests/wqp_test.py
index cbd772ff..f432ab26 100644
--- a/tests/wqp_test.py
+++ b/tests/wqp_test.py
@@ -33,14 +33,11 @@ def test_get_results(requests_mock):
         startDateHi="09-30-2011",
     )
     assert type(df) is DataFrame
-    # 5 rows × 63 source columns + 2 derived <prefix>DateTime columns
     assert df.shape == (5, 65)
     assert md.url == request_url
     assert isinstance(md.query_time, datetime.timedelta)
     assert md.header == {"mock_header": "value"}
     assert md.comment is None
-    # Legacy WQP triplets (slash-separated) are parsed into UTC.
-    assert "ActivityStartDateTime" in df.columns
     assert df["ActivityStartDateTime"].notna().all()
 
 
@@ -62,14 +59,11 @@ def test_get_results_WQX3(requests_mock):
         startDateHi="09-30-2011",
     )
     assert type(df) is DataFrame
-    # 5 rows × 180 source columns + 6 derived <prefix>DateTime columns
     assert df.shape == (5, 186)
     assert md.url == request_url
     assert isinstance(md.query_time, datetime.timedelta)
     assert md.header == {"mock_header": "value"}
     assert md.comment is None
-    # WQX3 WQP triplets are parsed into UTC.
-    assert "Activity_StartDateTime" in df.columns
     assert df["Activity_StartDateTime"].notna().all()
 
 

From 69594758f97565bc75e4539160fc87848828f9a6 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Thu, 7 May 2026 07:42:18 -0500
Subject: [PATCH 03/12] Drop str.removesuffix to keep py38 compatibility
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The /simplify pass introduced col.removesuffix("Date"), but the project
declares requires-python = ">=3.8" (and the ruff target is py38), and
removesuffix was added in Python 3.9 — so the helper would AttributeError
at first call on a 3.8 interpreter. Revert to the slice form.

Reported by Copilot review on PR #272.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index 4be188a1..018e47aa 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -150,7 +150,7 @@ def attach_datetime_columns(df):
     for col in df.columns:
         if not col.endswith("Date"):
             continue
-        prefix = col.removesuffix("Date")
+        prefix = col[: -len("Date")]
         target = prefix + "DateTime"
         if target in columns or target in new_columns:
             continue

From 3a6cad9a8aa8d550acbd51725747156892ffbe80 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Thu, 7 May 2026 07:52:09 -0500
Subject: [PATCH 04/12] Bump declared Python floor to 3.9 to match CI

CI's matrix already tests only Python 3.9 / 3.13 / 3.14 (and the
waterdata test module skips itself on <3.10), but pyproject.toml still
declared requires-python = ">=3.8" and ruff was targeting py38. Bring
the manifest in line with reality:

- requires-python = ">=3.9"
- [tool.ruff] target-version = "py39"

That unblocks col.removesuffix("Date") in attach_datetime_columns
(restored), and surfaces two pre-existing pyupgrade fixes that ruff now
applies under the py39 target:

- dataretrieval/waterdata/ratings.py: import Iterable from
  collections.abc instead of typing.
- dataretrieval/waterdata/utils.py: zoneinfo is stdlib on 3.9+, so the
  ZoneInfo import moves into the stdlib group.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/utils.py             | 2 +-
 dataretrieval/waterdata/ratings.py | 3 ++-
 dataretrieval/waterdata/utils.py   | 2 +-
 pyproject.toml                     | 4 ++--
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index 018e47aa..4be188a1 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -150,7 +150,7 @@ def attach_datetime_columns(df):
     for col in df.columns:
         if not col.endswith("Date"):
             continue
-        prefix = col[: -len("Date")]
+        prefix = col.removesuffix("Date")
         target = prefix + "DateTime"
         if target in columns or target in new_columns:
             continue
diff --git a/dataretrieval/waterdata/ratings.py b/dataretrieval/waterdata/ratings.py
index a1d0a3bb..f5a1a0ff 100644
--- a/dataretrieval/waterdata/ratings.py
+++ b/dataretrieval/waterdata/ratings.py
@@ -14,7 +14,8 @@
 
 import logging
 import os
-from typing import Any, Iterable, Literal, get_args
+from collections.abc import Iterable
+from typing import Any, Literal, get_args
 
 import pandas as pd
 import requests
diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py
index 784f2969..413da7dd 100644
--- a/dataretrieval/waterdata/utils.py
+++ b/dataretrieval/waterdata/utils.py
@@ -6,10 +6,10 @@
 import re
 from datetime import datetime
 from typing import Any, get_args
+from zoneinfo import ZoneInfo
 
 import pandas as pd
 import requests
-from zoneinfo import ZoneInfo
 
 from dataretrieval import __version__
 from dataretrieval.utils import BaseMetadata
diff --git a/pyproject.toml b/pyproject.toml
index 1322dcc3..35edcc5e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "dataretrieval"
 description = "Discover and retrieve water data from U.S. federal hydrologic web services."
 readme = "README.md"
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 keywords = ["USGS", "water data"]
 license = "CC0-1.0"
 license-files = ["LICENSE.md"]
@@ -63,7 +63,7 @@ repository = "https://github.com/DOI-USGS/dataretrieval-python.git"
 write_to = "dataretrieval/_version.py"
 
 [tool.ruff]
-target-version = "py38"
+target-version = "py39"
 extend-exclude = ["demos"]
 
 [tool.ruff.lint]

From 2f7cf10661f25da360a162cc50034e171ca5a96b Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Thu, 7 May 2026 08:19:00 -0500
Subject: [PATCH 05/12] Apply ruff format under the new py39 target

Bumping ruff target-version from py38 to py39 made the formatter prefer
parenthesized context managers (a 3.9-PEG-parser feature). The CI lint
job picked up the resulting drift in tests/waterdata_filters_test.py;
apply the formatter to bring it back in line.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tests/waterdata_filters_test.py | 79 ++++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 32 deletions(-)

diff --git a/tests/waterdata_filters_test.py b/tests/waterdata_filters_test.py
index 21eb6c1b..545f7039 100644
--- a/tests/waterdata_filters_test.py
+++ b/tests/waterdata_filters_test.py
@@ -190,14 +190,18 @@ def fake_walk_pages(*_args, **_kwargs):
         frame = pd.DataFrame({"id": [f"chunk-{idx}"], "value": [idx]})
         return frame, _fake_response()
 
-    with mock.patch(
-        "dataretrieval.waterdata.utils._construct_api_requests",
-        side_effect=fake_construct_api_requests,
-    ), mock.patch(
-        "dataretrieval.waterdata.utils._walk_pages", side_effect=fake_walk_pages
-    ), mock.patch(
-        "dataretrieval.waterdata.filters._effective_filter_budget",
-        return_value=_CQL_FILTER_CHUNK_LEN,
+    with (
+        mock.patch(
+            "dataretrieval.waterdata.utils._construct_api_requests",
+            side_effect=fake_construct_api_requests,
+        ),
+        mock.patch(
+            "dataretrieval.waterdata.utils._walk_pages", side_effect=fake_walk_pages
+        ),
+        mock.patch(
+            "dataretrieval.waterdata.filters._effective_filter_budget",
+            return_value=_CQL_FILTER_CHUNK_LEN,
+        ),
     ):
         df, _ = get_continuous(
             monitoring_location_id="USGS-07374525",
@@ -239,14 +243,18 @@ def fake_walk_pages(*_args, **_kwargs):
         frame = pd.DataFrame({"id": ["shared-feature"], "value": [1]})
         return frame, _fake_response()
 
-    with mock.patch(
-        "dataretrieval.waterdata.utils._construct_api_requests",
-        return_value=_fake_prepared_request(),
-    ), mock.patch(
-        "dataretrieval.waterdata.utils._walk_pages", side_effect=fake_walk_pages
-    ), mock.patch(
-        "dataretrieval.waterdata.filters._effective_filter_budget",
-        return_value=_CQL_FILTER_CHUNK_LEN,
+    with (
+        mock.patch(
+            "dataretrieval.waterdata.utils._construct_api_requests",
+            return_value=_fake_prepared_request(),
+        ),
+        mock.patch(
+            "dataretrieval.waterdata.utils._walk_pages", side_effect=fake_walk_pages
+        ),
+        mock.patch(
+            "dataretrieval.waterdata.filters._effective_filter_budget",
+            return_value=_CQL_FILTER_CHUNK_LEN,
+        ),
     ):
         df, _ = get_continuous(
             monitoring_location_id="USGS-07374525",
@@ -293,14 +301,18 @@ def fake_walk_pages(*_args, **_kwargs):
             )
         return frame, _fake_response()
 
-    with mock.patch(
-        "dataretrieval.waterdata.utils._construct_api_requests",
-        return_value=_fake_prepared_request(),
-    ), mock.patch(
-        "dataretrieval.waterdata.utils._walk_pages", side_effect=fake_walk_pages
-    ), mock.patch(
-        "dataretrieval.waterdata.filters._effective_filter_budget",
-        return_value=_CQL_FILTER_CHUNK_LEN,
+    with (
+        mock.patch(
+            "dataretrieval.waterdata.utils._construct_api_requests",
+            return_value=_fake_prepared_request(),
+        ),
+        mock.patch(
+            "dataretrieval.waterdata.utils._walk_pages", side_effect=fake_walk_pages
+        ),
+        mock.patch(
+            "dataretrieval.waterdata.filters._effective_filter_budget",
+            return_value=_CQL_FILTER_CHUNK_LEN,
+        ),
     ):
         df, _ = get_continuous(
             monitoring_location_id="USGS-07374525",
@@ -434,14 +446,17 @@ def fake_construct_api_requests(**kwargs):
         sent_filters.append(kwargs.get("filter"))
         return _fake_prepared_request()
 
-    with mock.patch(
-        "dataretrieval.waterdata.utils._construct_api_requests",
-        side_effect=fake_construct_api_requests,
-    ), mock.patch(
-        "dataretrieval.waterdata.utils._walk_pages",
-        return_value=(
-            pd.DataFrame({"id": ["row-1"], "value": [1]}),
-            _fake_response(),
+    with (
+        mock.patch(
+            "dataretrieval.waterdata.utils._construct_api_requests",
+            side_effect=fake_construct_api_requests,
+        ),
+        mock.patch(
+            "dataretrieval.waterdata.utils._walk_pages",
+            return_value=(
+                pd.DataFrame({"id": ["row-1"], "value": [1]}),
+                _fake_response(),
+            ),
         ),
     ):
         get_continuous(

From c63561949f23d2d5b77a135f1b3eb73785669c87 Mon Sep 17 00:00:00 2001
From: Timothy Hodson <34148978+thodson-usgs@users.noreply.github.com>
Date: Thu, 7 May 2026 08:45:28 -0500
Subject: [PATCH 06/12] Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 tests/waterdata_filters_test.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/tests/waterdata_filters_test.py b/tests/waterdata_filters_test.py
index 545f7039..2e378ffa 100644
--- a/tests/waterdata_filters_test.py
+++ b/tests/waterdata_filters_test.py
@@ -446,17 +446,14 @@ def fake_construct_api_requests(**kwargs):
         sent_filters.append(kwargs.get("filter"))
         return _fake_prepared_request()
 
-    with (
-        mock.patch(
-            "dataretrieval.waterdata.utils._construct_api_requests",
-            side_effect=fake_construct_api_requests,
-        ),
-        mock.patch(
-            "dataretrieval.waterdata.utils._walk_pages",
-            return_value=(
-                pd.DataFrame({"id": ["row-1"], "value": [1]}),
-                _fake_response(),
-            ),
+    with mock.patch(
+        "dataretrieval.waterdata.utils._construct_api_requests",
+        side_effect=fake_construct_api_requests,
+    ), mock.patch(
+        "dataretrieval.waterdata.utils._walk_pages",
+        return_value=(
+            pd.DataFrame({"id": ["row-1"], "value": [1]}),
+            _fake_response(),
         ),
     ):
         get_continuous(

From dd03edf8c36a0a56c6791b6f7154eab2d55f2485 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Thu, 7 May 2026 08:46:58 -0500
Subject: [PATCH 07/12] Note Python 3.9 floor bump in NEWS

The previous commit (3a6cad9) raised requires-python from >=3.8 to
>=3.9 to align pyproject with what CI actually tested. That is a
breaking change for any downstream user still on 3.8, so call it out
in the changelog.

Reported by Copilot review on PR #272.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 8ddd3282..50440268 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,5 @@
+**05/07/2026:** Bumped the declared minimum Python version from **3.8** to **3.9** (`pyproject.toml`'s `requires-python` and the ruff target). This brings the manifest in line with what was already being tested — CI's matrix has long covered only 3.9, 3.13, and 3.14, the `waterdata` test module already skipped itself on Python < 3.10, and several modules already use 3.9-only stdlib (e.g. `zoneinfo`). Users on 3.8 will no longer be able to install the package; please upgrade.
+
 **05/07/2026:** `waterdata.get_samples()` and `wqp.get_results()` now append a derived `<prefix>DateTime` UTC column for every Date/Time/TimeZone triplet in the response (e.g. `Activity_StartDate` + `Activity_StartTime` + `Activity_StartTimeZone` → `Activity_StartDateTime`). Both the WQX3 (`<X>Date`/`<X>Time`/`<X>TimeZone`) and legacy WQP (`<X>Date`/`<X>Time/Time`/`<X>Time/TimeZoneCode`) shapes are recognized; abbreviations like EST/EDT/CST/PST resolve to a UTC `Timestamp`, unknown codes resolve to `NaT`, and the original triplet columns are preserved. Mirrors R's `create_dateTime` behavior. Closes #266.
 
 **05/06/2026:** Each remaining active function in `dataretrieval.nwis` now emits a per-function `DeprecationWarning` naming the `waterdata` replacement to migrate to (visible the first time users call each getter). The `nwis` module is scheduled for removal on or after **2027-05-06**.

From f198449a83df8d856882c79fd715984cd4ca673c Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Thu, 7 May 2026 08:49:16 -0500
Subject: [PATCH 08/12] Restore consistent parenthesized-with form

The Copilot autofix in c635619 reverted one of four parenthesized-with
blocks back to the chained form, leaving the file inconsistent under
the project's ruff target (py39 prefers the parenthesized form per the
3.9 PEG parser). Re-running ruff format restores all four blocks to
the canonical form so ruff format --check passes again.

The "parenthesized with is 3.10+ only" concern is technically
incorrect on this codebase: the 3.9 PEG parser accepts it, and the
last CI run on 3a6cad9 / 2f7cf10 passed test (ubuntu-latest, 3.9) and
test (windows-latest, 3.9) with this syntax in place.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tests/waterdata_filters_test.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/tests/waterdata_filters_test.py b/tests/waterdata_filters_test.py
index 2e378ffa..545f7039 100644
--- a/tests/waterdata_filters_test.py
+++ b/tests/waterdata_filters_test.py
@@ -446,14 +446,17 @@ def fake_construct_api_requests(**kwargs):
         sent_filters.append(kwargs.get("filter"))
         return _fake_prepared_request()
 
-    with mock.patch(
-        "dataretrieval.waterdata.utils._construct_api_requests",
-        side_effect=fake_construct_api_requests,
-    ), mock.patch(
-        "dataretrieval.waterdata.utils._walk_pages",
-        return_value=(
-            pd.DataFrame({"id": ["row-1"], "value": [1]}),
-            _fake_response(),
+    with (
+        mock.patch(
+            "dataretrieval.waterdata.utils._construct_api_requests",
+            side_effect=fake_construct_api_requests,
+        ),
+        mock.patch(
+            "dataretrieval.waterdata.utils._walk_pages",
+            return_value=(
+                pd.DataFrame({"id": ["row-1"], "value": [1]}),
+                _fake_response(),
+            ),
         ),
     ):
         get_continuous(

From c1e937a6cfde7fdc3c070237e6d9860d1b71b93d Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Thu, 7 May 2026 10:47:12 -0500
Subject: [PATCH 09/12] Mark attach_datetime_columns as private and add type
 hints
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The helper is purely an internal post-processing step inside
get_samples / get_results — users have no reason to call it directly,
and dataretrieval/__init__.py's `from dataretrieval.utils import *`
was leaking it into the public API surface as
`dataretrieval.attach_datetime_columns`. Underscore-prefix it and
update the two call sites plus the unit tests.

Also annotate _attach_datetime_columns and _build_utc_datetime with
pd.DataFrame / pd.Series / pd.Series → pd.Series signatures, matching
the typing style already used in dataretrieval/waterdata/utils.py.

Addresses self-review of PR #272.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/utils.py         |  6 ++++--
 dataretrieval/waterdata/api.py |  4 ++--
 dataretrieval/wqp.py           |  4 ++--
 tests/utils_test.py            | 16 ++++++++--------
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index 4be188a1..ede32dc7 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -99,7 +99,9 @@ def format_datetime(df, date_field, time_field, tz_field):
 _TIME_TZ_SUFFIXES = (("Time", "TimeZone"), ("Time/Time", "Time/TimeZoneCode"))
 
 
-def _build_utc_datetime(date_series, time_series, tz_series):
+def _build_utc_datetime(
+    date_series: pd.Series, time_series: pd.Series, tz_series: pd.Series
+) -> pd.Series:
     """Combine date + time + tz-abbreviation columns into a UTC pandas Series.
 
     Unknown timezone codes (and rows missing any of the three values) yield
@@ -118,7 +120,7 @@ def _build_utc_datetime(date_series, time_series, tz_series):
     )
 
 
-def attach_datetime_columns(df):
+def _attach_datetime_columns(df: pd.DataFrame) -> pd.DataFrame:
     """Add ``<prefix>DateTime`` UTC columns for any Date/Time/TimeZone triplets.
 
     Detects two naming patterns that appear in USGS Samples and Water Quality
diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py
index 60ce70d1..a5e35c7a 100644
--- a/dataretrieval/waterdata/api.py
+++ b/dataretrieval/waterdata/api.py
@@ -16,7 +16,7 @@
 import requests
 from requests.models import PreparedRequest
 
-from dataretrieval.utils import BaseMetadata, attach_datetime_columns, to_str
+from dataretrieval.utils import BaseMetadata, _attach_datetime_columns, to_str
 from dataretrieval.waterdata.filters import FILTER_LANG
 from dataretrieval.waterdata.types import (
     CODE_SERVICES,
@@ -2329,7 +2329,7 @@ def get_samples(
     response.raise_for_status()
 
     df = pd.read_csv(StringIO(response.text), delimiter=",")
-    df = attach_datetime_columns(df)
+    df = _attach_datetime_columns(df)
 
     return df, BaseMetadata(response)
 
diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py
index 6df145e6..dd822310 100644
--- a/dataretrieval/wqp.py
+++ b/dataretrieval/wqp.py
@@ -17,7 +17,7 @@
 
 import pandas as pd
 
-from .utils import BaseMetadata, attach_datetime_columns, query
+from .utils import BaseMetadata, _attach_datetime_columns, query
 
 if TYPE_CHECKING:
     from pandas import DataFrame
@@ -152,7 +152,7 @@ def get_results(
     response = query(url, kwargs, delimiter=";", ssl_check=ssl_check)
 
     df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False)
-    df = attach_datetime_columns(df)
+    df = _attach_datetime_columns(df)
     return df, WQP_Metadata(response)
 
 
diff --git a/tests/utils_test.py b/tests/utils_test.py
index 8f52acd9..760401ca 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -100,7 +100,7 @@ def test_to_str_non_iterable(self):
 
 
 class Test_attach_datetime_columns:
-    """Tests of attach_datetime_columns, which derives <prefix>DateTime UTC
+    """Tests of _attach_datetime_columns, which derives <prefix>DateTime UTC
     columns from Date/Time/TimeZone triplets in Samples and WQP CSVs."""
 
     def test_wqx3_triplet_resolves_to_utc(self):
@@ -111,7 +111,7 @@ def test_wqx3_triplet_resolves_to_utc(self):
                 "Activity_StartTimeZone": ["PST", "EST"],
             }
         )
-        df = utils.attach_datetime_columns(df)
+        df = utils._attach_datetime_columns(df)
         assert df["Activity_StartDateTime"][0] == pd.Timestamp(
             "2024-01-09 18:00:00", tz="UTC"
         )
@@ -128,7 +128,7 @@ def test_legacy_wqp_triplet_resolves_to_utc(self):
                 "ActivityStartTime/TimeZoneCode": ["PST"],
             }
         )
-        df = utils.attach_datetime_columns(df)
+        df = utils._attach_datetime_columns(df)
         assert df["ActivityStartDateTime"][0] == pd.Timestamp(
             "2024-01-09 18:00:00", tz="UTC"
         )
@@ -141,7 +141,7 @@ def test_unknown_timezone_is_NaT(self):
                 "Activity_StartTimeZone": ["BOGUS"],
             }
         )
-        df = utils.attach_datetime_columns(df)
+        df = utils._attach_datetime_columns(df)
         assert df["Activity_StartDateTime"].isna().all()
 
     def test_missing_time_or_tz_is_NaT(self):
@@ -152,7 +152,7 @@ def test_missing_time_or_tz_is_NaT(self):
                 "Activity_StartTimeZone": ["PST", "EST"],
             }
         )
-        df = utils.attach_datetime_columns(df)
+        df = utils._attach_datetime_columns(df)
         assert df["Activity_StartDateTime"][0] == pd.Timestamp(
             "2024-01-09 18:00:00", tz="UTC"
         )
@@ -167,7 +167,7 @@ def test_existing_datetime_column_not_overwritten(self):
                 "Activity_StartDateTime": ["preexisting"],
             }
         )
-        df = utils.attach_datetime_columns(df)
+        df = utils._attach_datetime_columns(df)
         assert df["Activity_StartDateTime"].tolist() == ["preexisting"]
 
     def test_multiple_triplets_handled(self):
@@ -181,11 +181,11 @@ def test_multiple_triplets_handled(self):
                 "LabInfo_AnalysisStartTimeZone": ["EST"],
             }
         )
-        df = utils.attach_datetime_columns(df)
+        df = utils._attach_datetime_columns(df)
         assert "Activity_StartDateTime" in df.columns
         assert "LabInfo_AnalysisStartDateTime" in df.columns
 
     def test_lone_date_column_left_alone(self):
         df = pd.DataFrame({"LastChangeDate": ["2024-01-09"]})
-        df = utils.attach_datetime_columns(df)
+        df = utils._attach_datetime_columns(df)
         assert list(df.columns) == ["LastChangeDate"]

From 2ba6084ebe33cc72a2d47eed51951b2f110d21ce Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Thu, 7 May 2026 10:57:42 -0500
Subject: [PATCH 10/12] Self-document _TIME_TZ_SUFFIXES with example column
 names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The two-tuple constant identifying the WQP/Samples Date/Time/TimeZone
naming patterns was previously labeled "WQX3 / Samples" and "legacy
WQP" — accurate for someone steeped in USGS jargon, opaque for a
maintainer reading the file cold. Spell out an example column-name
triplet next to each entry so the constant is self-explanatory.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 dataretrieval/utils.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index ede32dc7..a0b19348 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -95,8 +95,14 @@ def format_datetime(df, date_field, time_field, tz_field):
 
 
 # (time-suffix, tz-suffix) pairs that follow a "<prefix>Date" column.
-# First entry is WQX3 / Samples, second is legacy WQP (slash-separated).
-_TIME_TZ_SUFFIXES = (("Time", "TimeZone"), ("Time/Time", "Time/TimeZoneCode"))
+_TIME_TZ_SUFFIXES = (
+    # WQX3 / Samples, e.g.
+    #   Activity_StartDate / Activity_StartTime / Activity_StartTimeZone
+    ("Time", "TimeZone"),
+    # Legacy WQP (slash-separated), e.g.
+    #   ActivityStartDate / ActivityStartTime/Time / ActivityStartTime/TimeZoneCode
+    ("Time/Time", "Time/TimeZoneCode"),
+)
 
 
 def _build_utc_datetime(

From 1d69a3429f07bb4587bd78c6f85ee3d1904ec857 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 8 May 2026 16:17:52 -0500
Subject: [PATCH 11/12] Sort samples/WQP rows by activity-start datetime

Per ldecicco-USGS's review of #272: R dataRetrieval ends its WQP/Samples
pipeline by sorting the returned table on the activity-start datetime,
because the API's natural order is unstable. Mirror that here.

_attach_datetime_columns now picks a sort key with the same precedence
R uses:
  1. Activity_StartDateTime  (WQX3 / Samples)
  2. ActivityStartDateTime   (legacy WQP)
  3. first detected *Date column (fallback)

The sort runs in addition to (and after) the DateTime-column derivation,
and uses ignore_index=True to match the convention in
dataretrieval/waterdata/utils.py::_sort_rows. Three new unit tests cover
each branch of the precedence; the existing mock samples test was
updated to assert the new monotonic-increasing iloc[0] row from the
fixture (2023-06-20 09:25 CDT, the earliest sample in the file).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 NEWS.md                        |  2 +-
 dataretrieval/utils.py         | 35 ++++++++++++++++++++++++--------
 dataretrieval/waterdata/api.py |  4 +++-
 dataretrieval/wqp.py           |  2 ++
 tests/utils_test.py            | 37 ++++++++++++++++++++++++++++++++++
 tests/waterdata_test.py        |  6 ++++--
 6 files changed, 74 insertions(+), 12 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 50440268..246ede15 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,6 @@
 **05/07/2026:** Bumped the declared minimum Python version from **3.8** to **3.9** (`pyproject.toml`'s `requires-python` and the ruff target). This brings the manifest in line with what was already being tested — CI's matrix has long covered only 3.9, 3.13, and 3.14, the `waterdata` test module already skipped itself on Python < 3.10, and several modules already use 3.9-only stdlib (e.g. `zoneinfo`). Users on 3.8 will no longer be able to install the package; please upgrade.
 
-**05/07/2026:** `waterdata.get_samples()` and `wqp.get_results()` now append a derived `<prefix>DateTime` UTC column for every Date/Time/TimeZone triplet in the response (e.g. `Activity_StartDate` + `Activity_StartTime` + `Activity_StartTimeZone` → `Activity_StartDateTime`). Both the WQX3 (`<X>Date`/`<X>Time`/`<X>TimeZone`) and legacy WQP (`<X>Date`/`<X>Time/Time`/`<X>Time/TimeZoneCode`) shapes are recognized; abbreviations like EST/EDT/CST/PST resolve to a UTC `Timestamp`, unknown codes resolve to `NaT`, and the original triplet columns are preserved. Mirrors R's `create_dateTime` behavior. Closes #266.
+**05/07/2026:** `waterdata.get_samples()` and `wqp.get_results()` now append a derived `<prefix>DateTime` UTC column for every Date/Time/TimeZone triplet in the response (e.g. `Activity_StartDate` + `Activity_StartTime` + `Activity_StartTimeZone` → `Activity_StartDateTime`). Both the WQX3 (`<X>Date`/`<X>Time`/`<X>TimeZone`) and legacy WQP (`<X>Date`/`<X>Time/Time`/`<X>Time/TimeZoneCode`) shapes are recognized; abbreviations like EST/EDT/CST/PST resolve to a UTC `Timestamp`, unknown codes resolve to `NaT`, and the original triplet columns are preserved. Returned rows are also now sorted by `Activity_StartDateTime` (or the legacy `ActivityStartDateTime`) — the underlying APIs return rows in an unstable order. Mirrors R's `create_dateTime` and end-of-pipeline sort. Closes #266.
 
 **05/06/2026:** Each remaining active function in `dataretrieval.nwis` now emits a per-function `DeprecationWarning` naming the `waterdata` replacement to migrate to (visible the first time users call each getter). The `nwis` module is scheduled for removal on or after **2027-05-06**.
 
diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
index a0b19348..76bbb6ad 100644
--- a/dataretrieval/utils.py
+++ b/dataretrieval/utils.py
@@ -127,7 +127,8 @@ def _build_utc_datetime(
 
 
 def _attach_datetime_columns(df: pd.DataFrame) -> pd.DataFrame:
-    """Add ``<prefix>DateTime`` UTC columns for any Date/Time/TimeZone triplets.
+    """Add ``<prefix>DateTime`` UTC columns for any Date/Time/TimeZone triplets
+    and sort the frame by the activity-start datetime.
 
     Detects two naming patterns that appear in USGS Samples and Water Quality
     Portal CSV responses:
@@ -142,6 +143,12 @@ def _attach_datetime_columns(df: pd.DataFrame) -> pd.DataFrame:
     are left intact, and an existing ``<prefix>DateTime`` column is never
     overwritten.
 
+    Rows are sorted (and the index reset) by the canonical activity-start
+    datetime when present — ``Activity_StartDateTime`` (WQX3) or
+    ``ActivityStartDateTime`` (legacy WQP) — falling back to the first
+    detected ``*Date`` column. Mirrors R ``dataRetrieval``'s
+    end-of-pipeline sort in ``importWQP.R``.
+
     Parameters
     ----------
     df : ``pandas.DataFrame``
@@ -150,14 +157,18 @@ def _attach_datetime_columns(df: pd.DataFrame) -> pd.DataFrame:
     Returns
     -------
     df : ``pandas.DataFrame``
-        A new DataFrame with any derivable ``<prefix>DateTime`` columns
-        appended (or the original frame if no triplets were found).
+        A new DataFrame with derivable ``<prefix>DateTime`` columns appended
+        and rows sorted by the activity-start datetime (if any date column
+        was detected).
     """
     columns = set(df.columns)
     new_columns = {}
+    first_date_col = None
     for col in df.columns:
         if not col.endswith("Date"):
             continue
+        if first_date_col is None:
+            first_date_col = col
         prefix = col.removesuffix("Date")
         target = prefix + "DateTime"
         if target in columns or target in new_columns:
@@ -170,11 +181,19 @@ def _attach_datetime_columns(df: pd.DataFrame) -> pd.DataFrame:
                     df[col], df[time_col], df[tz_col]
                 )
                 break
-    if not new_columns:
-        return df
-    # Concat in one shot — per-column assignment on a wide CSV-derived frame
-    # triggers pandas' fragmentation PerformanceWarning.
-    return pd.concat([df, pd.DataFrame(new_columns, index=df.index)], axis=1)
+    if new_columns:
+        # Concat in one shot — per-column assignment on a wide CSV-derived
+        # frame triggers pandas' fragmentation PerformanceWarning.
+        df = pd.concat([df, pd.DataFrame(new_columns, index=df.index)], axis=1)
+    if "Activity_StartDateTime" in df.columns:
+        sort_key = "Activity_StartDateTime"
+    elif "ActivityStartDateTime" in df.columns:
+        sort_key = "ActivityStartDateTime"
+    else:
+        sort_key = first_date_col
+    if sort_key is not None:
+        df = df.sort_values(by=sort_key, ignore_index=True)
+    return df
 
 
 class BaseMetadata:
diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py
index a5e35c7a..bca8dc40 100644
--- a/dataretrieval/waterdata/api.py
+++ b/dataretrieval/waterdata/api.py
@@ -2272,7 +2272,9 @@ def get_samples(
         ``Activity_StartTimeZone``), an additional ``<prefix>DateTime`` column
         is appended holding a UTC ``Timestamp`` derived from the three. The
         original Date/Time/TimeZone columns are left intact; rows whose
-        timezone abbreviation is not recognized resolve to ``NaT``.
+        timezone abbreviation is not recognized resolve to ``NaT``. Rows are
+        sorted by ``Activity_StartDateTime`` when present (the API's default
+        order is unstable).
     md : :obj:`dataretrieval.utils.Metadata`
         Custom ``dataretrieval`` metadata object pertaining to the query.
 
diff --git a/dataretrieval/wqp.py b/dataretrieval/wqp.py
index dd822310..8cfc6ca1 100644
--- a/dataretrieval/wqp.py
+++ b/dataretrieval/wqp.py
@@ -107,6 +107,8 @@ def get_results(
         ``<prefix>Time/TimeZoneCode``), an additional ``<prefix>DateTime``
         column is appended holding a UTC ``Timestamp``. Original triplet
         columns are preserved; unrecognized timezone codes yield ``NaT``.
+        Rows are sorted by ``ActivityStartDateTime`` (or ``Activity_StartDateTime``
+        for WQX3 responses) when present.
     md : :obj:`dataretrieval.utils.Metadata`
         Custom ``dataretrieval`` metadata object pertaining to the query.
 
diff --git a/tests/utils_test.py b/tests/utils_test.py
index 760401ca..a2c4cf9b 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -189,3 +189,40 @@ def test_lone_date_column_left_alone(self):
         df = pd.DataFrame({"LastChangeDate": ["2024-01-09"]})
         df = utils._attach_datetime_columns(df)
         assert list(df.columns) == ["LastChangeDate"]
+
+    def test_rows_sorted_by_wqx3_activity_start(self):
+        df = pd.DataFrame(
+            {
+                "Activity_StartDate": ["2024-03-01", "2024-01-09", "2024-02-15"],
+                "Activity_StartTime": ["10:00:00", "10:00:00", "10:00:00"],
+                "Activity_StartTimeZone": ["UTC", "UTC", "UTC"],
+                "marker": ["c", "a", "b"],
+            }
+        )
+        df = utils._attach_datetime_columns(df)
+        assert df["marker"].tolist() == ["a", "b", "c"]
+        assert df.index.tolist() == [0, 1, 2]
+
+    def test_rows_sorted_by_legacy_activity_start_when_wqx3_absent(self):
+        df = pd.DataFrame(
+            {
+                "ActivityStartDate": ["2024-03-01", "2024-01-09"],
+                "ActivityStartTime/Time": ["10:00:00", "10:00:00"],
+                "ActivityStartTime/TimeZoneCode": ["UTC", "UTC"],
+                "marker": ["b", "a"],
+            }
+        )
+        df = utils._attach_datetime_columns(df)
+        assert df["marker"].tolist() == ["a", "b"]
+
+    def test_rows_sorted_by_first_date_column_as_fallback(self):
+        # No triplet → no DateTime column added, but rows still sort by the
+        # first *Date column found (mirrors R's importWQP.R fallback).
+        df = pd.DataFrame(
+            {
+                "LastChangeDate": ["2024-03-01", "2024-01-09"],
+                "marker": ["b", "a"],
+            }
+        )
+        df = utils._attach_datetime_columns(df)
+        assert df["marker"].tolist() == ["a", "b"]
diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py
index 493c73ff..7ae7d557 100644
--- a/tests/waterdata_test.py
+++ b/tests/waterdata_test.py
@@ -60,11 +60,13 @@ def test_mock_get_samples(requests_mock):
     assert isinstance(md.query_time, datetime.timedelta)
     assert md.header == {"mock_header": "value"}
     assert md.comment is None
-    # Row 0 of the fixture is "2023-08-22 08:50:00 CDT" → 13:50 UTC.
+    # Rows now come back sorted by Activity_StartDateTime; the earliest in
+    # the fixture is "2023-06-20 09:25:00 CDT" → 14:25 UTC.
     assert df["Activity_StartDateTime"].iloc[0] == pd.Timestamp(
-        "2023-08-22 13:50:00", tz="UTC"
+        "2023-06-20 14:25:00", tz="UTC"
     )
     assert df["Activity_StartTimeZone"].iloc[0] == "CDT"
+    assert df["Activity_StartDateTime"].is_monotonic_increasing
 
 
 def test_mock_get_samples_summary(requests_mock):

From 59df88b115a2d52adba1d2dc40f76fa5f344e530 Mon Sep 17 00:00:00 2001
From: thodson-usgs <thodson@usgs.gov>
Date: Fri, 8 May 2026 16:24:15 -0500
Subject: [PATCH 12/12] Trim trivial tests on _attach_datetime_columns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop 5 unit tests and a handful of sort-specific integration assertions:

- test_missing_time_or_tz_is_NaT: redundant with test_unknown_timezone_is_NaT —
  both exercise the same NaT coercion path through pd.to_datetime.
- test_multiple_triplets_handled: the samples_results.txt mock fixture
  already has 6 triplets, so the integration test exercises this.
- test_lone_date_column_left_alone: trivially obvious from the loop body.
- test_rows_sorted_by_wqx3_activity_start, _legacy_*, _first_date_column_*:
  per maintainer feedback, the sort behavior doesn't need dedicated test
  coverage on a private helper.

In test_mock_get_samples, drop the iloc[0]/is_monotonic_increasing
assertions and replace with a minimal "DateTime column has at least one
parsed timestamp" check. The shape assertion (67, 187) still proves all
6 derived DateTime columns were appended.

Also drop the now-unused `import pandas as pd` from tests/waterdata_test.py.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tests/utils_test.py     | 71 -----------------------------------------
 tests/waterdata_test.py | 10 ++----
 2 files changed, 2 insertions(+), 79 deletions(-)

diff --git a/tests/utils_test.py b/tests/utils_test.py
index a2c4cf9b..2c350b2b 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -144,20 +144,6 @@ def test_unknown_timezone_is_NaT(self):
         df = utils._attach_datetime_columns(df)
         assert df["Activity_StartDateTime"].isna().all()
 
-    def test_missing_time_or_tz_is_NaT(self):
-        df = pd.DataFrame(
-            {
-                "Activity_StartDate": ["2024-01-09", "2024-02-15"],
-                "Activity_StartTime": ["10:00:00", None],
-                "Activity_StartTimeZone": ["PST", "EST"],
-            }
-        )
-        df = utils._attach_datetime_columns(df)
-        assert df["Activity_StartDateTime"][0] == pd.Timestamp(
-            "2024-01-09 18:00:00", tz="UTC"
-        )
-        assert pd.isna(df["Activity_StartDateTime"][1])
-
     def test_existing_datetime_column_not_overwritten(self):
         df = pd.DataFrame(
             {
@@ -169,60 +155,3 @@ def test_existing_datetime_column_not_overwritten(self):
         )
         df = utils._attach_datetime_columns(df)
         assert df["Activity_StartDateTime"].tolist() == ["preexisting"]
-
-    def test_multiple_triplets_handled(self):
-        df = pd.DataFrame(
-            {
-                "Activity_StartDate": ["2024-01-09"],
-                "Activity_StartTime": ["10:00:00"],
-                "Activity_StartTimeZone": ["PST"],
-                "LabInfo_AnalysisStartDate": ["2024-01-10"],
-                "LabInfo_AnalysisStartTime": ["09:00:00"],
-                "LabInfo_AnalysisStartTimeZone": ["EST"],
-            }
-        )
-        df = utils._attach_datetime_columns(df)
-        assert "Activity_StartDateTime" in df.columns
-        assert "LabInfo_AnalysisStartDateTime" in df.columns
-
-    def test_lone_date_column_left_alone(self):
-        df = pd.DataFrame({"LastChangeDate": ["2024-01-09"]})
-        df = utils._attach_datetime_columns(df)
-        assert list(df.columns) == ["LastChangeDate"]
-
-    def test_rows_sorted_by_wqx3_activity_start(self):
-        df = pd.DataFrame(
-            {
-                "Activity_StartDate": ["2024-03-01", "2024-01-09", "2024-02-15"],
-                "Activity_StartTime": ["10:00:00", "10:00:00", "10:00:00"],
-                "Activity_StartTimeZone": ["UTC", "UTC", "UTC"],
-                "marker": ["c", "a", "b"],
-            }
-        )
-        df = utils._attach_datetime_columns(df)
-        assert df["marker"].tolist() == ["a", "b", "c"]
-        assert df.index.tolist() == [0, 1, 2]
-
-    def test_rows_sorted_by_legacy_activity_start_when_wqx3_absent(self):
-        df = pd.DataFrame(
-            {
-                "ActivityStartDate": ["2024-03-01", "2024-01-09"],
-                "ActivityStartTime/Time": ["10:00:00", "10:00:00"],
-                "ActivityStartTime/TimeZoneCode": ["UTC", "UTC"],
-                "marker": ["b", "a"],
-            }
-        )
-        df = utils._attach_datetime_columns(df)
-        assert df["marker"].tolist() == ["a", "b"]
-
-    def test_rows_sorted_by_first_date_column_as_fallback(self):
-        # No triplet → no DateTime column added, but rows still sort by the
-        # first *Date column found (mirrors R's importWQP.R fallback).
-        df = pd.DataFrame(
-            {
-                "LastChangeDate": ["2024-03-01", "2024-01-09"],
-                "marker": ["b", "a"],
-            }
-        )
-        df = utils._attach_datetime_columns(df)
-        assert df["marker"].tolist() == ["a", "b"]
diff --git a/tests/waterdata_test.py b/tests/waterdata_test.py
index 7ae7d557..b53ee296 100644
--- a/tests/waterdata_test.py
+++ b/tests/waterdata_test.py
@@ -1,7 +1,6 @@
 import datetime
 import sys
 
-import pandas as pd
 import pytest
 from pandas import DataFrame
 
@@ -55,18 +54,13 @@ def test_mock_get_samples(requests_mock):
         monitoringLocationIdentifier="USGS-05406500",
     )
     assert type(df) is DataFrame
+    # 181 source columns + 6 derived <prefix>DateTime columns
     assert df.shape == (67, 187)
     assert md.url == request_url
     assert isinstance(md.query_time, datetime.timedelta)
     assert md.header == {"mock_header": "value"}
     assert md.comment is None
-    # Rows now come back sorted by Activity_StartDateTime; the earliest in
-    # the fixture is "2023-06-20 09:25:00 CDT" → 14:25 UTC.
-    assert df["Activity_StartDateTime"].iloc[0] == pd.Timestamp(
-        "2023-06-20 14:25:00", tz="UTC"
-    )
-    assert df["Activity_StartTimeZone"].iloc[0] == "CDT"
-    assert df["Activity_StartDateTime"].is_monotonic_increasing
+    assert df["Activity_StartDateTime"].notna().any()
 
 
 def test_mock_get_samples_summary(requests_mock):