|
| 1 | +""" |
| 2 | +`cog3pio` backend for xarray to read TIFF files directly into CuPy arrays in GPU memory. |
| 3 | +""" |
| 4 | + |
| 5 | +import os |
| 6 | +from collections.abc import Iterable |
| 7 | + |
| 8 | +import cupy as cp # type: ignore[import-untyped] |
| 9 | +import numpy as np |
| 10 | +import xarray as xr |
| 11 | +from cog3pio import CudaCogReader |
| 12 | +from xarray.backends import BackendEntrypoint |
| 13 | + |
| 14 | + |
| 15 | +# %% |
| 16 | +class Cog3pioBackendEntrypoint(BackendEntrypoint): |
| 17 | + """ |
| 18 | + Xarray backend to read GeoTIFF files using 'cog3pio' engine. |
| 19 | +
|
| 20 | + When using :py:func:`xarray.open_dataarray` with ``engine="cog3pio"``, the |
| 21 | + ``device_id`` parameter can be set to the CUDA GPU id to do the decoding on. |
| 22 | +
|
| 23 | + Examples |
| 24 | + -------- |
| 25 | + Read a GeoTIFF from a HTTP url into an [xarray.DataArray][]: |
| 26 | +
|
| 27 | + >>> import xarray as xr |
| 28 | + >>> # Read GeoTIFF into an xarray.DataArray |
| 29 | + >>> dataarray: xr.DataArray = xr.open_dataarray( |
| 30 | + ... filename_or_obj="https://github.com/OSGeo/gdal/raw/v3.11.0/autotest/gcore/data/byte_zstd.tif", |
| 31 | + ... engine="cog3pio", |
| 32 | + ... device_id=0, # cuda:0 |
| 33 | + ... ) |
| 34 | + >>> dataarray.sizes |
| 35 | + Frozen({'band': 1, 'y': 20, 'x': 20}) |
| 36 | + >>> dataarray.dtype |
| 37 | + dtype('uint8') |
| 38 | +
|
| 39 | + """ |
| 40 | + |
| 41 | + description = "Use .tif files in Xarray" |
| 42 | + open_dataset_parameters = ("filename_or_obj", "drop_variables", "device_id") |
| 43 | + url = "https://github.com/weiji14/cog3pio" |
| 44 | + |
| 45 | + def open_dataset( # type: ignore[override] |
| 46 | + self, |
| 47 | + filename_or_obj: str, |
| 48 | + *, |
| 49 | + drop_variables: str | Iterable[str] | None = None, |
| 50 | + device_id: int, |
| 51 | + # other backend specific keyword arguments |
| 52 | + # `chunks` and `cache` DO NOT go here, they are handled by xarray |
| 53 | + mask_and_scale=None, |
| 54 | + ) -> xr.Dataset: |
| 55 | + """ |
| 56 | + Backend open_dataset method used by Xarray in [xarray.open_dataset][]. |
| 57 | +
|
| 58 | + Parameters |
| 59 | + ---------- |
| 60 | + filename_or_obj : str |
| 61 | + File path or url to a TIFF (.tif) image file that can be read by the |
| 62 | + nvTIFF or image-tiff backend library. |
| 63 | + device_id : int |
| 64 | + CUDA device ID on which to place the created cupy array. |
| 65 | +
|
| 66 | + Returns |
| 67 | + ------- |
| 68 | + xarray.Dataset |
| 69 | +
|
| 70 | + """ |
| 71 | + |
| 72 | + with cp.cuda.Stream(ptds=True): |
| 73 | + cog = CudaCogReader(path=filename_or_obj, device_id=device_id) |
| 74 | + array_: cp.ndarray = cp.from_dlpack(cog) # 1-D Array |
| 75 | + x_coords, y_coords = cog.xy_coords() # TODO consider using rasterix |
| 76 | + height, width = (len(y_coords), len(x_coords)) |
| 77 | + channels: int = len(array_) // (height * width) |
| 78 | + # TODO make API to get proper 3-D shape directly, or use cuTENSOR |
| 79 | + array_ = array_.reshape(height, width, channels) # HWC |
| 80 | + array = array_.transpose(2, 0, 1) # CHW |
| 81 | + |
| 82 | + dataarray: xr.DataArray = xr.DataArray( |
| 83 | + data=array, |
| 84 | + coords={ |
| 85 | + "band": np.arange(channels, dtype=np.uint8), |
| 86 | + "y": y_coords, |
| 87 | + "x": x_coords, |
| 88 | + }, |
| 89 | + name=None, |
| 90 | + attrs=None, |
| 91 | + ) |
| 92 | + |
| 93 | + return dataarray.to_dataset(name="raster") |
| 94 | + |
| 95 | + def guess_can_open(self, filename_or_obj): |
| 96 | + try: |
| 97 | + _, ext = os.path.splitext(filename_or_obj) |
| 98 | + except TypeError: |
| 99 | + return False |
| 100 | + return ext in {".tif", ".tiff"} |
0 commit comments