Implement cog3pio backend entrypoint to read TIFFs

weiji14 · weiji14 · commit 2012579574d6 · 2026-03-05T15:14:37.000+13:00
Read TIFF data into xarray via cog3pio's experimental CudaCogReader struct that uses nvTIFF as its backend. Use cupy.from_dlpack to read the DLPack tensor, and reshape the 1-D array into a 3-D array (CHW form), setting the coordinates as appropriate. Added some API docs and basic unit tests. Cherry-picked from weiji14/cog3pio#71
diff --git a/ci/doc.yml b/ci/doc.yml
@@ -4,7 +4,7 @@ channels:
 dependencies:
   - cupy-core
   - pip
-  - python=3.10
+  - python=3.13
   - sphinx
   - sphinx-design
   - sphinx-copybutton
@@ -18,3 +18,4 @@ dependencies:
   - pip:
       # relative to this file. Needs to be editable to be accepted.
       - --editable ..
+      - cog3pio[cuda] @ git+https://github.com/weiji14/cog3pio.git@178a3ffb8163c97f7af9e71bc68b6545a4e8e192 # https://github.com/weiji14/cog3pio/pull/71
diff --git a/cupy_xarray/__init__.py b/cupy_xarray/__init__.py
@@ -1,4 +1,5 @@
 from . import _version
-from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor  # noqa
+from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor  # noqa: F401
+from .cog3pio import Cog3pioBackendEntrypoint  # noqa: F401
 
 __version__ = _version.get_versions()["version"]
diff --git a/cupy_xarray/cog3pio.py b/cupy_xarray/cog3pio.py
@@ -0,0 +1,100 @@
+"""
+`cog3pio` backend for xarray to read TIFF files directly into CuPy arrays in GPU memory.
+"""
+
+import os
+from collections.abc import Iterable
+
+import cupy as cp  # type: ignore[import-untyped]
+import numpy as np
+import xarray as xr
+from cog3pio import CudaCogReader
+from xarray.backends import BackendEntrypoint
+
+
+# %%
+class Cog3pioBackendEntrypoint(BackendEntrypoint):
+    """
+    Xarray backend to read GeoTIFF files using 'cog3pio' engine.
+
+    When using :py:func:`xarray.open_dataarray` with ``engine="cog3pio"``, the
+    ``device_id`` parameter can be set to the CUDA GPU id to do the decoding on.
+
+    Examples
+    --------
+    Read a GeoTIFF from a HTTP url into an [xarray.DataArray][]:
+
+    >>> import xarray as xr
+    >>> # Read GeoTIFF into an xarray.DataArray
+    >>> dataarray: xr.DataArray = xr.open_dataarray(
+    ...     filename_or_obj="https://github.com/OSGeo/gdal/raw/v3.11.0/autotest/gcore/data/byte_zstd.tif",
+    ...     engine="cog3pio",
+    ...     device_id=0,  # cuda:0
+    ... )
+    >>> dataarray.sizes
+    Frozen({'band': 1, 'y': 20, 'x': 20})
+    >>> dataarray.dtype
+    dtype('uint8')
+
+    """
+
+    description = "Use .tif files in Xarray"
+    open_dataset_parameters = ("filename_or_obj", "drop_variables", "device_id")
+    url = "https://github.com/weiji14/cog3pio"
+
+    def open_dataset(  # type: ignore[override]
+        self,
+        filename_or_obj: str,
+        *,
+        drop_variables: str | Iterable[str] | None = None,
+        device_id: int,
+        # other backend specific keyword arguments
+        # `chunks` and `cache` DO NOT go here, they are handled by xarray
+        mask_and_scale=None,
+    ) -> xr.Dataset:
+        """
+        Backend open_dataset method used by Xarray in [xarray.open_dataset][].
+
+        Parameters
+        ----------
+        filename_or_obj : str
+            File path or url to a TIFF (.tif) image file that can be read by the
+            nvTIFF or image-tiff backend library.
+        device_id : int
+            CUDA device ID on which to place the created cupy array.
+
+        Returns
+        -------
+        xarray.Dataset
+
+        """
+
+        with cp.cuda.Stream(ptds=True):
+            cog = CudaCogReader(path=filename_or_obj, device_id=device_id)
+            array_: cp.ndarray = cp.from_dlpack(cog)  # 1-D Array
+            x_coords, y_coords = cog.xy_coords()  # TODO consider using rasterix
+            height, width = (len(y_coords), len(x_coords))
+            channels: int = len(array_) // (height * width)
+            # TODO make API to get proper 3-D shape directly, or use cuTENSOR
+            array_ = array_.reshape(height, width, channels)  # HWC
+            array = array_.transpose(2, 0, 1)  # CHW
+
+        dataarray: xr.DataArray = xr.DataArray(
+            data=array,
+            coords={
+                "band": np.arange(channels, dtype=np.uint8),
+                "y": y_coords,
+                "x": x_coords,
+            },
+            name=None,
+            attrs=None,
+        )
+
+        return dataarray.to_dataset(name="raster")
+
+    def guess_can_open(self, filename_or_obj):
+        try:
+            _, ext = os.path.splitext(filename_or_obj)
+        except TypeError:
+            return False
+        return ext in {".tif", ".tiff"}
diff --git a/cupy_xarray/tests/test_cog3pio.py b/cupy_xarray/tests/test_cog3pio.py
@@ -0,0 +1,34 @@
+"""
+Tests for xarray 'cog3pio' backend engine.
+"""
+
+import cupy as cp
+import pytest
+import xarray as xr
+
+from cupy_xarray.cog3pio import Cog3pioBackendEntrypoint
+
+cog3pio = pytest.importorskip("cog3pio")
+
+
+def test_entrypoint():
+    assert "cog3pio" in xr.backends.list_engines()
+
+
+def test_xarray_backend_open_dataarray():
+    """
+    Ensure that passing engine='cog3pio' to xarray.open_dataarray works to read a
+    Cloud-optimized GeoTIFF from a http url.
+    """
+    with xr.open_dataarray(
+        filename_or_obj="https://github.com/developmentseed/titiler/raw/1.2.0/src/titiler/mosaic/tests/fixtures/TCI.tif",
+        engine=Cog3pioBackendEntrypoint,
+        device_id=0,
+    ) as da:
+        assert isinstance(da.data, cp.ndarray)
+        assert da.sizes == {"band": 3, "y": 1098, "x": 1098}
+        assert da.x.min() == 700010.0
+        assert da.x.max() == 809710.0
+        assert da.y.min() == 3490250.0
+        assert da.y.max() == 3599950.0
+        assert da.dtype == "uint8"
diff --git a/docs/api.rst b/docs/api.rst
@@ -51,3 +51,19 @@ Methods
 
     Dataset.cupy.as_cupy
     Dataset.cupy.as_numpy
+
+
+Backends
+--------
+
+cog3pio
+~~~~~~~
+
+.. currentmodule:: cupy_xarray
+
+.. automodule:: cupy_xarray.cog3pio
+
+.. autosummary::
+   :toctree: generated/
+
+   Cog3pioBackendEntrypoint
diff --git a/docs/conf.py b/docs/conf.py
@@ -54,9 +54,10 @@
 nb_execution_mode = "off"
 
 intersphinx_mapping = {
-    "python": ("https://docs.python.org/3/", None),
-    "dask": ("https://docs.dask.org/en/latest", None),
+    "cog3pio": ("https://cog3pio.readthedocs.io/en/latest", None),
     "cupy": ("https://docs.cupy.dev/en/latest", None),
+    "dask": ("https://docs.dask.org/en/latest", None),
+    "python": ("https://docs.python.org/3/", None),
     "xarray": ("http://docs.xarray.dev/en/latest/", None),
 }
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,9 @@ dependencies = [
     "xarray>=2024.02.0",
 ]
 
+[project.entry-points."xarray.backends"]
+cog3pio = "cupy_xarray.cog3pio:Cog3pioBackendEntrypoint"
+
 [project.optional-dependencies]
 test = [
     "dask",

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,9 @@ dependencies = [`
`19`	`19`	`"xarray>=2024.02.0",`
`20`	`20`	`]`
`21`	`21`
	`22`	`+[project.entry-points."xarray.backends"]`
	`23`	`+cog3pio = "cupy_xarray.cog3pio:Cog3pioBackendEntrypoint"`
	`24`	`+`
`22`	`25`	`[project.optional-dependencies]`
`23`	`26`	`test = [`
`24`	`27`	`"dask",`