Skip to content

Commit 3339ea5

Browse files
authored
Add allocation guards to GPU read and VRT read paths (#1196)
* Fix unbounded allocation DoS and VRT path traversal in geotiff Two security fixes for the geotiff subpackage: 1. Add a configurable max_pixels guard to read_to_array() and all internal read functions (_read_strips, _read_tiles, _read_cog_http). A crafted TIFF with fabricated header dimensions could previously trigger multi-TB allocations. The default limit is 1 billion pixels (~4 GB for float32 single-band), overridable via max_pixels kwarg. Fixes #1184. 2. Canonicalize VRT source filenames with os.path.realpath() after resolving relative paths. Previously, a VRT file with "../" in SourceFilename could read arbitrary files outside the VRT directory. Fixes #1185. * Fix VRT parser test failure on Windows os.path.realpath() converts Unix-style paths to Windows paths on Windows (e.g. /data/tile.tif becomes D:\data\tile.tif). Use os.path.realpath() in the assertion so it matches the production code's canonicalization on all platforms. * Add allocation guards to GPU read and VRT read paths (#1195) _check_dimensions() was added to CPU read paths in 521956a but missed two allocation sites: read_geotiff_gpu() allocated from IFD dimensions without a pixel limit, and read_vrt() allocated from VRT XML dimensions without one. Both now call _check_dimensions() before allocating. Adds max_pixels kwarg to open_geotiff, read_geotiff_gpu, and read_vrt for consistency with read_to_array.
1 parent 5ef4237 commit 3339ea5

File tree

3 files changed

+112
-8
lines changed

3 files changed

+112
-8
lines changed

xrspatial/geotiff/__init__.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,8 @@ def open_geotiff(source: str, *, dtype=None, window=None,
185185
band: int | None = None,
186186
name: str | None = None,
187187
chunks: int | tuple | None = None,
188-
gpu: bool = False) -> xr.DataArray:
188+
gpu: bool = False,
189+
max_pixels: int | None = None) -> xr.DataArray:
189190
"""Read a GeoTIFF, COG, or VRT file into an xarray.DataArray.
190191
191192
Automatically dispatches to the best backend:
@@ -216,6 +217,10 @@ def open_geotiff(source: str, *, dtype=None, window=None,
216217
Chunk size for Dask lazy reading.
217218
gpu : bool
218219
Use GPU-accelerated decompression (requires cupy + nvCOMP).
220+
max_pixels : int or None
221+
Maximum allowed pixel count (width * height * samples). None
222+
uses the default (~1 billion). Raise to read legitimately
223+
large files.
219224
220225
Returns
221226
-------
@@ -225,22 +230,28 @@ def open_geotiff(source: str, *, dtype=None, window=None,
225230
# VRT files
226231
if source.lower().endswith('.vrt'):
227232
return read_vrt(source, dtype=dtype, window=window, band=band,
228-
name=name, chunks=chunks, gpu=gpu)
233+
name=name, chunks=chunks, gpu=gpu,
234+
max_pixels=max_pixels)
229235

230236
# GPU path
231237
if gpu:
232238
return read_geotiff_gpu(source, dtype=dtype,
233239
overview_level=overview_level,
234-
name=name, chunks=chunks)
240+
name=name, chunks=chunks,
241+
max_pixels=max_pixels)
235242

236243
# Dask path (CPU)
237244
if chunks is not None:
238245
return read_geotiff_dask(source, dtype=dtype, chunks=chunks,
239246
overview_level=overview_level, name=name)
240247

248+
kwargs = {}
249+
if max_pixels is not None:
250+
kwargs['max_pixels'] = max_pixels
241251
arr, geo_info = read_to_array(
242252
source, window=window,
243253
overview_level=overview_level, band=band,
254+
**kwargs,
244255
)
245256

246257
height, width = arr.shape[:2]
@@ -995,7 +1006,8 @@ def read_geotiff_gpu(source: str, *,
9951006
dtype=None,
9961007
overview_level: int | None = None,
9971008
name: str | None = None,
998-
chunks: int | tuple | None = None) -> xr.DataArray:
1009+
chunks: int | tuple | None = None,
1010+
max_pixels: int | None = None) -> xr.DataArray:
9991011
"""Read a GeoTIFF with GPU-accelerated decompression via Numba CUDA.
10001012
10011013
Decompresses all tiles in parallel on the GPU and returns a
@@ -1018,6 +1030,9 @@ def read_geotiff_gpu(source: str, *,
10181030
chunks, (row, col) tuple for rectangular.
10191031
name : str or None
10201032
Name for the DataArray.
1033+
max_pixels : int or None
1034+
Maximum allowed pixel count (width * height * samples). None
1035+
uses the default (~1 billion).
10211036
10221037
Returns
10231038
-------
@@ -1031,12 +1046,15 @@ def read_geotiff_gpu(source: str, *,
10311046
"cupy is required for GPU reads. "
10321047
"Install it with: pip install cupy-cuda12x")
10331048

1034-
from ._reader import _FileSource
1049+
from ._reader import _FileSource, _check_dimensions, MAX_PIXELS_DEFAULT
10351050
from ._header import parse_header, parse_all_ifds
10361051
from ._dtypes import tiff_dtype_to_numpy
10371052
from ._geotags import extract_geo_info
10381053
from ._gpu_decode import gpu_decode_tiles
10391054

1055+
if max_pixels is None:
1056+
max_pixels = MAX_PIXELS_DEFAULT
1057+
10401058
# Parse metadata on CPU (fast, <1ms)
10411059
src = _FileSource(source)
10421060
data = src.read_all()
@@ -1088,6 +1106,8 @@ def read_geotiff_gpu(source: str, *,
10881106
width = ifd.width
10891107
height = ifd.height
10901108

1109+
_check_dimensions(width, height, samples, max_pixels)
1110+
10911111
finally:
10921112
src.close()
10931113

@@ -1326,7 +1346,8 @@ def read_vrt(source: str, *, dtype=None, window=None,
13261346
band: int | None = None,
13271347
name: str | None = None,
13281348
chunks: int | tuple | None = None,
1329-
gpu: bool = False) -> xr.DataArray:
1349+
gpu: bool = False,
1350+
max_pixels: int | None = None) -> xr.DataArray:
13301351
"""Read a GDAL Virtual Raster Table (.vrt) into an xarray.DataArray.
13311352
13321353
The VRT's source GeoTIFFs are read via windowed reads and assembled
@@ -1358,7 +1379,8 @@ def read_vrt(source: str, *, dtype=None, window=None,
13581379
"""
13591380
from ._vrt import read_vrt as _read_vrt_internal
13601381

1361-
arr, vrt = _read_vrt_internal(source, window=window, band=band)
1382+
arr, vrt = _read_vrt_internal(source, window=window, band=band,
1383+
max_pixels=max_pixels)
13621384

13631385
if name is None:
13641386
import os

xrspatial/geotiff/_vrt.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,8 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
192192

193193

194194
def read_vrt(vrt_path: str, *, window=None,
195-
band: int | None = None) -> tuple[np.ndarray, VRTDataset]:
195+
band: int | None = None,
196+
max_pixels: int | None = None) -> tuple[np.ndarray, VRTDataset]:
196197
"""Read a VRT file by assembling pixel data from its source files.
197198
198199
Parameters
@@ -228,6 +229,12 @@ def read_vrt(vrt_path: str, *, window=None,
228229
out_h = r1 - r0
229230
out_w = c1 - c0
230231

232+
from ._reader import _check_dimensions, MAX_PIXELS_DEFAULT
233+
if max_pixels is None:
234+
max_pixels = MAX_PIXELS_DEFAULT
235+
n_bands = len([vrt.bands[band]] if band is not None else vrt.bands)
236+
_check_dimensions(out_w, out_h, n_bands, max_pixels)
237+
231238
# Select bands
232239
if band is not None:
233240
selected_bands = [vrt.bands[band]]

xrspatial/geotiff/tests/test_security.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
Tests for:
44
- Unbounded allocation guard (issue #1184)
55
- VRT path traversal prevention (issue #1185)
6+
- GPU read and VRT read allocation guards (issue #1195)
67
"""
78
from __future__ import annotations
89

@@ -116,6 +117,80 @@ def test_normal_read_unaffected(self, tmp_path):
116117
arr, _ = read_to_array(path)
117118
np.testing.assert_array_equal(arr, expected)
118119

120+
def test_open_geotiff_max_pixels(self, tmp_path):
121+
"""open_geotiff passes max_pixels through to the reader."""
122+
from xrspatial.geotiff import open_geotiff
123+
124+
expected = np.arange(16, dtype=np.float32).reshape(4, 4)
125+
data = make_minimal_tiff(4, 4, np.dtype('float32'), pixel_data=expected)
126+
path = str(tmp_path / "small_1195.tif")
127+
with open(path, 'wb') as f:
128+
f.write(data)
129+
130+
# Should succeed with generous limit
131+
da = open_geotiff(path, max_pixels=1_000_000)
132+
np.testing.assert_array_equal(da.values, expected)
133+
134+
# Should fail with tiny limit
135+
with pytest.raises(ValueError, match="exceed the safety limit"):
136+
open_geotiff(path, max_pixels=10)
137+
138+
139+
# ---------------------------------------------------------------------------
140+
# Cat 1b: VRT allocation guard (issue #1195)
141+
# ---------------------------------------------------------------------------
142+
143+
class TestVRTAllocationGuard:
144+
def test_read_vrt_rejects_huge_dimensions(self, tmp_path):
145+
"""read_vrt refuses to allocate when VRT XML claims huge dims."""
146+
from xrspatial.geotiff._vrt import read_vrt as _read_vrt_internal
147+
148+
# Create a VRT with oversized dimensions but no actual source data
149+
# needed -- _check_dimensions fires before any file reads
150+
vrt_xml = '''<VRTDataset rasterXSize="100000" rasterYSize="100000">
151+
<VRTRasterBand dataType="Float32" band="1">
152+
</VRTRasterBand>
153+
</VRTDataset>'''
154+
155+
vrt_path = str(tmp_path / "huge_1195.vrt")
156+
with open(vrt_path, 'w') as f:
157+
f.write(vrt_xml)
158+
159+
with pytest.raises(ValueError, match="exceed the safety limit"):
160+
_read_vrt_internal(vrt_path, max_pixels=1_000_000)
161+
162+
def test_read_vrt_normal_size_ok(self, tmp_path):
163+
"""Normal-sized VRT passes the allocation guard."""
164+
from xrspatial.geotiff._vrt import read_vrt as _read_vrt_internal
165+
166+
vrt_xml = '''<VRTDataset rasterXSize="4" rasterYSize="4">
167+
<VRTRasterBand dataType="Float32" band="1">
168+
</VRTRasterBand>
169+
</VRTDataset>'''
170+
171+
vrt_path = str(tmp_path / "small_1195.vrt")
172+
with open(vrt_path, 'w') as f:
173+
f.write(vrt_xml)
174+
175+
# Should not raise -- 4x4x1 = 16 pixels
176+
arr, vrt = _read_vrt_internal(vrt_path, max_pixels=1_000_000)
177+
assert arr.shape == (4, 4)
178+
179+
def test_open_geotiff_vrt_max_pixels(self, tmp_path):
180+
"""open_geotiff passes max_pixels through to VRT reader."""
181+
from xrspatial.geotiff import open_geotiff
182+
183+
vrt_xml = '''<VRTDataset rasterXSize="100000" rasterYSize="100000">
184+
<VRTRasterBand dataType="Float32" band="1">
185+
</VRTRasterBand>
186+
</VRTDataset>'''
187+
188+
vrt_path = str(tmp_path / "huge_vrt_1195.vrt")
189+
with open(vrt_path, 'w') as f:
190+
f.write(vrt_xml)
191+
192+
with pytest.raises(ValueError, match="exceed the safety limit"):
193+
open_geotiff(vrt_path, max_pixels=1_000_000)
119194

120195
# ---------------------------------------------------------------------------
121196
# Cat 5: VRT path traversal

0 commit comments

Comments
 (0)