Skip to content

Commit 2012579

Browse files
committed
Implement cog3pio backend entrypoint to read TIFFs
Read TIFF data into xarray via cog3pio's experimental CudaCogReader struct that uses nvTIFF as its backend. Use cupy.from_dlpack to read the DLPack tensor, and reshape the 1-D array into a 3-D array (CHW form), setting the coordinates as appropriate. Added some API docs and basic unit tests. Cherry-picked from weiji14/cog3pio#71
1 parent 7ab75c0 commit 2012579

7 files changed

Lines changed: 160 additions & 4 deletions

File tree

ci/doc.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
dependencies:
55
- cupy-core
66
- pip
7-
- python=3.10
7+
- python=3.13
88
- sphinx
99
- sphinx-design
1010
- sphinx-copybutton
@@ -18,3 +18,4 @@ dependencies:
1818
- pip:
1919
# relative to this file. Needs to be editable to be accepted.
2020
- --editable ..
21+
- cog3pio[cuda] @ git+https://github.com/weiji14/cog3pio.git@178a3ffb8163c97f7af9e71bc68b6545a4e8e192 # https://github.com/weiji14/cog3pio/pull/71

cupy_xarray/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from . import _version
2-
from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor # noqa
2+
from .accessors import CupyDataArrayAccessor, CupyDatasetAccessor # noqa: F401
3+
from .cog3pio import Cog3pioBackendEntrypoint # noqa: F401
34

45
__version__ = _version.get_versions()["version"]

cupy_xarray/cog3pio.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
"""
2+
`cog3pio` backend for xarray to read TIFF files directly into CuPy arrays in GPU memory.
3+
"""
4+
5+
import os
6+
from collections.abc import Iterable
7+
8+
import cupy as cp # type: ignore[import-untyped]
9+
import numpy as np
10+
import xarray as xr
11+
from cog3pio import CudaCogReader
12+
from xarray.backends import BackendEntrypoint
13+
14+
15+
# %%
16+
class Cog3pioBackendEntrypoint(BackendEntrypoint):
17+
"""
18+
Xarray backend to read GeoTIFF files using 'cog3pio' engine.
19+
20+
When using :py:func:`xarray.open_dataarray` with ``engine="cog3pio"``, the
21+
``device_id`` parameter can be set to the CUDA GPU id to do the decoding on.
22+
23+
Examples
24+
--------
25+
Read a GeoTIFF from a HTTP url into an [xarray.DataArray][]:
26+
27+
>>> import xarray as xr
28+
>>> # Read GeoTIFF into an xarray.DataArray
29+
>>> dataarray: xr.DataArray = xr.open_dataarray(
30+
... filename_or_obj="https://github.com/OSGeo/gdal/raw/v3.11.0/autotest/gcore/data/byte_zstd.tif",
31+
... engine="cog3pio",
32+
... device_id=0, # cuda:0
33+
... )
34+
>>> dataarray.sizes
35+
Frozen({'band': 1, 'y': 20, 'x': 20})
36+
>>> dataarray.dtype
37+
dtype('uint8')
38+
39+
"""
40+
41+
description = "Use .tif files in Xarray"
42+
open_dataset_parameters = ("filename_or_obj", "drop_variables", "device_id")
43+
url = "https://github.com/weiji14/cog3pio"
44+
45+
def open_dataset( # type: ignore[override]
46+
self,
47+
filename_or_obj: str,
48+
*,
49+
drop_variables: str | Iterable[str] | None = None,
50+
device_id: int,
51+
# other backend specific keyword arguments
52+
# `chunks` and `cache` DO NOT go here, they are handled by xarray
53+
mask_and_scale=None,
54+
) -> xr.Dataset:
55+
"""
56+
Backend open_dataset method used by Xarray in [xarray.open_dataset][].
57+
58+
Parameters
59+
----------
60+
filename_or_obj : str
61+
File path or url to a TIFF (.tif) image file that can be read by the
62+
nvTIFF or image-tiff backend library.
63+
device_id : int
64+
CUDA device ID on which to place the created cupy array.
65+
66+
Returns
67+
-------
68+
xarray.Dataset
69+
70+
"""
71+
72+
with cp.cuda.Stream(ptds=True):
73+
cog = CudaCogReader(path=filename_or_obj, device_id=device_id)
74+
array_: cp.ndarray = cp.from_dlpack(cog) # 1-D Array
75+
x_coords, y_coords = cog.xy_coords() # TODO consider using rasterix
76+
height, width = (len(y_coords), len(x_coords))
77+
channels: int = len(array_) // (height * width)
78+
# TODO make API to get proper 3-D shape directly, or use cuTENSOR
79+
array_ = array_.reshape(height, width, channels) # HWC
80+
array = array_.transpose(2, 0, 1) # CHW
81+
82+
dataarray: xr.DataArray = xr.DataArray(
83+
data=array,
84+
coords={
85+
"band": np.arange(channels, dtype=np.uint8),
86+
"y": y_coords,
87+
"x": x_coords,
88+
},
89+
name=None,
90+
attrs=None,
91+
)
92+
93+
return dataarray.to_dataset(name="raster")
94+
95+
def guess_can_open(self, filename_or_obj):
96+
try:
97+
_, ext = os.path.splitext(filename_or_obj)
98+
except TypeError:
99+
return False
100+
return ext in {".tif", ".tiff"}

cupy_xarray/tests/test_cog3pio.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""
2+
Tests for xarray 'cog3pio' backend engine.
3+
"""
4+
5+
import cupy as cp
6+
import pytest
7+
import xarray as xr
8+
9+
from cupy_xarray.cog3pio import Cog3pioBackendEntrypoint
10+
11+
cog3pio = pytest.importorskip("cog3pio")
12+
13+
14+
def test_entrypoint():
15+
assert "cog3pio" in xr.backends.list_engines()
16+
17+
18+
def test_xarray_backend_open_dataarray():
19+
"""
20+
Ensure that passing engine='cog3pio' to xarray.open_dataarray works to read a
21+
Cloud-optimized GeoTIFF from a http url.
22+
"""
23+
with xr.open_dataarray(
24+
filename_or_obj="https://github.com/developmentseed/titiler/raw/1.2.0/src/titiler/mosaic/tests/fixtures/TCI.tif",
25+
engine=Cog3pioBackendEntrypoint,
26+
device_id=0,
27+
) as da:
28+
assert isinstance(da.data, cp.ndarray)
29+
assert da.sizes == {"band": 3, "y": 1098, "x": 1098}
30+
assert da.x.min() == 700010.0
31+
assert da.x.max() == 809710.0
32+
assert da.y.min() == 3490250.0
33+
assert da.y.max() == 3599950.0
34+
assert da.dtype == "uint8"

docs/api.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,19 @@ Methods
5151

5252
Dataset.cupy.as_cupy
5353
Dataset.cupy.as_numpy
54+
55+
56+
Backends
57+
--------
58+
59+
cog3pio
60+
~~~~~~~
61+
62+
.. currentmodule:: cupy_xarray
63+
64+
.. automodule:: cupy_xarray.cog3pio
65+
66+
.. autosummary::
67+
:toctree: generated/
68+
69+
Cog3pioBackendEntrypoint

docs/conf.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,10 @@
5454
nb_execution_mode = "off"
5555

5656
intersphinx_mapping = {
57-
"python": ("https://docs.python.org/3/", None),
58-
"dask": ("https://docs.dask.org/en/latest", None),
57+
"cog3pio": ("https://cog3pio.readthedocs.io/en/latest", None),
5958
"cupy": ("https://docs.cupy.dev/en/latest", None),
59+
"dask": ("https://docs.dask.org/en/latest", None),
60+
"python": ("https://docs.python.org/3/", None),
6061
"xarray": ("http://docs.xarray.dev/en/latest/", None),
6162
}
6263

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ dependencies = [
1919
"xarray>=2024.02.0",
2020
]
2121

22+
[project.entry-points."xarray.backends"]
23+
cog3pio = "cupy_xarray.cog3pio:Cog3pioBackendEntrypoint"
24+
2225
[project.optional-dependencies]
2326
test = [
2427
"dask",

0 commit comments

Comments
 (0)