Skip to content

Commit b7e7d1e

Browse files
authored
Fix geotiff unbounded allocation DoS and VRT path traversal (#1189)
* Fix unbounded allocation DoS and VRT path traversal in geotiff Two security fixes for the geotiff subpackage: 1. Add a configurable max_pixels guard to read_to_array() and all internal read functions (_read_strips, _read_tiles, _read_cog_http). A crafted TIFF with fabricated header dimensions could previously trigger multi-TB allocations. The default limit is 1 billion pixels (~4 GB for float32 single-band), overridable via max_pixels kwarg. Fixes #1184. 2. Canonicalize VRT source filenames with os.path.realpath() after resolving relative paths. Previously, a VRT file with "../" in SourceFilename could read arbitrary files outside the VRT directory. Fixes #1185. * Fix VRT parser test failure on Windows os.path.realpath() converts Unix-style paths to Windows paths on Windows (e.g. /data/tile.tif becomes D:\data\tile.tif). Use os.path.realpath() in the assertion so it matches the production code's canonicalization on all platforms.
1 parent f3e8603 commit b7e7d1e

4 files changed

Lines changed: 252 additions & 10 deletions

File tree

xrspatial/geotiff/_reader.py

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,27 @@
1919
from ._geotags import GeoInfo, GeoTransform, extract_geo_info
2020
from ._header import IFD, TIFFHeader, parse_all_ifds, parse_header
2121

22+
# ---------------------------------------------------------------------------
23+
# Allocation guard: reject TIFF dimensions that would exhaust memory
24+
# ---------------------------------------------------------------------------
25+
26+
#: Default maximum total pixel count (width * height * samples).
27+
#: ~1 billion pixels, which is ~4 GB for float32 single-band.
28+
#: Override per-call via the ``max_pixels`` keyword argument.
29+
MAX_PIXELS_DEFAULT = 1_000_000_000
30+
31+
32+
def _check_dimensions(width, height, samples, max_pixels):
33+
"""Raise ValueError if the requested allocation exceeds *max_pixels*."""
34+
total = width * height * samples
35+
if total > max_pixels:
36+
raise ValueError(
37+
f"TIFF image dimensions ({width} x {height} x {samples} = "
38+
f"{total:,} pixels) exceed the safety limit of "
39+
f"{max_pixels:,} pixels. Pass a larger max_pixels value to "
40+
f"read_to_array() if this file is legitimate."
41+
)
42+
2243

2344
# ---------------------------------------------------------------------------
2445
# Data source abstraction
@@ -292,7 +313,8 @@ def _decode_strip_or_tile(data_slice, compression, width, height, samples,
292313
# ---------------------------------------------------------------------------
293314

294315
def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader,
295-
dtype: np.dtype, window=None) -> np.ndarray:
316+
dtype: np.dtype, window=None,
317+
max_pixels: int = MAX_PIXELS_DEFAULT) -> np.ndarray:
296318
"""Read a strip-organized TIFF image.
297319
298320
Parameters
@@ -307,6 +329,8 @@ def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader,
307329
Output pixel dtype.
308330
window : tuple or None
309331
(row_start, col_start, row_stop, col_stop) or None for full image.
332+
max_pixels : int
333+
Maximum allowed pixel count (width * height * samples).
310334
311335
Returns
312336
-------
@@ -344,6 +368,8 @@ def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader,
344368
out_h = r1 - r0
345369
out_w = c1 - c0
346370

371+
_check_dimensions(out_w, out_h, samples, max_pixels)
372+
347373
if samples > 1:
348374
result = np.empty((out_h, out_w, samples), dtype=dtype)
349375
else:
@@ -408,7 +434,8 @@ def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader,
408434
# ---------------------------------------------------------------------------
409435

410436
def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
411-
dtype: np.dtype, window=None) -> np.ndarray:
437+
dtype: np.dtype, window=None,
438+
max_pixels: int = MAX_PIXELS_DEFAULT) -> np.ndarray:
412439
"""Read a tile-organized TIFF image.
413440
414441
Parameters
@@ -423,6 +450,8 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
423450
Output pixel dtype.
424451
window : tuple or None
425452
(row_start, col_start, row_stop, col_stop) or None for full image.
453+
max_pixels : int
454+
Maximum allowed pixel count (width * height * samples).
426455
427456
Returns
428457
-------
@@ -462,6 +491,8 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
462491
out_h = r1 - r0
463492
out_w = c1 - c0
464493

494+
_check_dimensions(out_w, out_h, samples, max_pixels)
495+
465496
_alloc = np.zeros if window is not None else np.empty
466497
if samples > 1:
467498
result = _alloc((out_h, out_w, samples), dtype=dtype)
@@ -545,7 +576,9 @@ def _decode_one(job):
545576
# ---------------------------------------------------------------------------
546577

547578
def _read_cog_http(url: str, overview_level: int | None = None,
548-
band: int | None = None) -> tuple[np.ndarray, GeoInfo]:
579+
band: int | None = None,
580+
max_pixels: int = MAX_PIXELS_DEFAULT,
581+
) -> tuple[np.ndarray, GeoInfo]:
549582
"""Read a COG via HTTP range requests.
550583
551584
Parameters
@@ -556,6 +589,8 @@ def _read_cog_http(url: str, overview_level: int | None = None,
556589
Which overview to read (0 = full res, 1 = first overview, etc.).
557590
band : int
558591
Band index (0-based, for multi-band files).
592+
max_pixels : int
593+
Maximum allowed pixel count (width * height * samples).
559594
560595
Returns
561596
-------
@@ -613,6 +648,8 @@ def _read_cog_http(url: str, overview_level: int | None = None,
613648
tiles_across = math.ceil(width / tw)
614649
tiles_down = math.ceil(height / th)
615650

651+
_check_dimensions(width, height, samples, max_pixels)
652+
616653
if samples > 1:
617654
result = np.empty((height, width, samples), dtype=dtype)
618655
else:
@@ -653,7 +690,9 @@ def _read_cog_http(url: str, overview_level: int | None = None,
653690
# ---------------------------------------------------------------------------
654691

655692
def read_to_array(source: str, *, window=None, overview_level: int | None = None,
656-
band: int | None = None) -> tuple[np.ndarray, GeoInfo]:
693+
band: int | None = None,
694+
max_pixels: int = MAX_PIXELS_DEFAULT,
695+
) -> tuple[np.ndarray, GeoInfo]:
657696
"""Read a GeoTIFF/COG to a numpy array.
658697
659698
Parameters
@@ -666,13 +705,18 @@ def read_to_array(source: str, *, window=None, overview_level: int | None = None
666705
Overview level (0 = full res).
667706
band : int
668707
Band index for multi-band files.
708+
max_pixels : int
709+
Maximum allowed total pixel count (width * height * samples).
710+
Prevents memory exhaustion from crafted TIFF headers.
711+
Default is 1 billion (~4 GB for float32 single-band).
669712
670713
Returns
671714
-------
672715
(np.ndarray, GeoInfo) tuple
673716
"""
674717
if source.startswith(('http://', 'https://')):
675-
return _read_cog_http(source, overview_level=overview_level, band=band)
718+
return _read_cog_http(source, overview_level=overview_level, band=band,
719+
max_pixels=max_pixels)
676720

677721
# Local file or cloud storage: read all bytes then parse
678722
if _is_fsspec_uri(source):
@@ -701,9 +745,11 @@ def read_to_array(source: str, *, window=None, overview_level: int | None = None
701745
geo_info = extract_geo_info(ifd, data, header.byte_order)
702746

703747
if ifd.is_tiled:
704-
arr = _read_tiles(data, ifd, header, dtype, window)
748+
arr = _read_tiles(data, ifd, header, dtype, window,
749+
max_pixels=max_pixels)
705750
else:
706-
arr = _read_strips(data, ifd, header, dtype, window)
751+
arr = _read_strips(data, ifd, header, dtype, window,
752+
max_pixels=max_pixels)
707753

708754
# For multi-band with band selection, extract single band
709755
if arr.ndim == 3 and ifd.samples_per_pixel > 1 and band is not None:

xrspatial/geotiff/_vrt.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
136136
relative.get('relativeToVRT', '0') == '1')
137137
if is_relative and not os.path.isabs(filename):
138138
filename = os.path.join(vrt_dir, filename)
139+
# Canonicalize to prevent path traversal (e.g. ../)
140+
filename = os.path.realpath(filename)
139141

140142
src_band = int(_text(src_elem, 'SourceBand') or '1')
141143

xrspatial/geotiff/tests/test_features.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""Tests for new features: multi-band, integer nodata, packbits, zstd, dask, BigTIFF."""
22
from __future__ import annotations
33

4+
import os
5+
46
import numpy as np
57
import pytest
68
import xarray as xr
@@ -823,12 +825,10 @@ def test_vrt_parser(self):
823825
assert vrt.bands[0].nodata == 0.0
824826
assert len(vrt.bands[0].sources) == 1
825827
src = vrt.bands[0].sources[0]
826-
assert src.filename == '/data/tile.tif'
828+
assert src.filename == os.path.realpath('/data/tile.tif')
827829
assert src.src_rect.x_off == 10
828830

829831

830-
import os
831-
832832
class TestCloudStorage:
833833

834834
def test_cloud_scheme_detection(self):
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"""Security tests for the geotiff subpackage.
2+
3+
Tests for:
4+
- Unbounded allocation guard (issue #1184)
5+
- VRT path traversal prevention (issue #1185)
6+
"""
7+
from __future__ import annotations
8+
9+
import os
10+
import struct
11+
import tempfile
12+
13+
import numpy as np
14+
import pytest
15+
16+
from xrspatial.geotiff._reader import (
17+
MAX_PIXELS_DEFAULT,
18+
_check_dimensions,
19+
_read_strips,
20+
_read_tiles,
21+
read_to_array,
22+
)
23+
from xrspatial.geotiff._header import parse_header, parse_all_ifds
24+
from xrspatial.geotiff._dtypes import tiff_dtype_to_numpy
25+
from .conftest import make_minimal_tiff
26+
27+
28+
# ---------------------------------------------------------------------------
29+
# Cat 1: Unbounded allocation guard
30+
# ---------------------------------------------------------------------------
31+
32+
class TestDimensionGuard:
33+
def test_check_dimensions_rejects_oversized(self):
34+
"""_check_dimensions raises when total pixels exceed the limit."""
35+
with pytest.raises(ValueError, match="exceed the safety limit"):
36+
_check_dimensions(100_000, 100_000, 1, MAX_PIXELS_DEFAULT)
37+
38+
def test_check_dimensions_accepts_normal(self):
39+
"""_check_dimensions does not raise for normal sizes."""
40+
_check_dimensions(1000, 1000, 1, MAX_PIXELS_DEFAULT)
41+
42+
def test_check_dimensions_considers_samples(self):
43+
"""Multi-band images multiply the pixel budget."""
44+
# 50_000 x 50_000 x 3 = 7.5 billion, should be rejected
45+
with pytest.raises(ValueError, match="exceed the safety limit"):
46+
_check_dimensions(50_000, 50_000, 3, MAX_PIXELS_DEFAULT)
47+
48+
def test_custom_limit(self):
49+
"""A custom max_pixels lets callers tighten or relax the limit."""
50+
# Tight limit: 100 pixels
51+
with pytest.raises(ValueError, match="exceed the safety limit"):
52+
_check_dimensions(20, 20, 1, max_pixels=100)
53+
54+
# Relaxed: passes with large limit
55+
_check_dimensions(100_000, 100_000, 1, max_pixels=100_000_000_000)
56+
57+
def test_read_strips_rejects_huge_header(self):
58+
"""_read_strips refuses to allocate when header claims huge dims."""
59+
# Build a valid TIFF with small pixel data but huge header dimensions.
60+
# We fake the header to claim 100000x100000 but only provide 4x4 data.
61+
data = make_minimal_tiff(4, 4, np.dtype('float32'))
62+
header = parse_header(data)
63+
ifds = parse_all_ifds(data, header)
64+
ifd = ifds[0]
65+
66+
# Monkey-patch the IFD width/height to simulate a crafted header
67+
from xrspatial.geotiff._header import IFDEntry
68+
ifd.entries[256] = IFDEntry(tag=256, type_id=3, count=1, value=100_000)
69+
ifd.entries[257] = IFDEntry(tag=257, type_id=3, count=1, value=100_000)
70+
71+
dtype = tiff_dtype_to_numpy(ifd.bits_per_sample, ifd.sample_format)
72+
73+
with pytest.raises(ValueError, match="exceed the safety limit"):
74+
_read_strips(data, ifd, header, dtype, max_pixels=1_000_000)
75+
76+
def test_read_tiles_rejects_huge_header(self):
77+
"""_read_tiles refuses to allocate when header claims huge dims."""
78+
data = make_minimal_tiff(8, 8, np.dtype('float32'), tiled=True, tile_size=4)
79+
header = parse_header(data)
80+
ifds = parse_all_ifds(data, header)
81+
ifd = ifds[0]
82+
83+
from xrspatial.geotiff._header import IFDEntry
84+
ifd.entries[256] = IFDEntry(tag=256, type_id=3, count=1, value=100_000)
85+
ifd.entries[257] = IFDEntry(tag=257, type_id=3, count=1, value=100_000)
86+
87+
dtype = tiff_dtype_to_numpy(ifd.bits_per_sample, ifd.sample_format)
88+
89+
with pytest.raises(ValueError, match="exceed the safety limit"):
90+
_read_tiles(data, ifd, header, dtype, max_pixels=1_000_000)
91+
92+
def test_read_to_array_max_pixels_kwarg(self, tmp_path):
93+
"""read_to_array passes max_pixels through to the internal readers."""
94+
expected = np.arange(16, dtype=np.float32).reshape(4, 4)
95+
data = make_minimal_tiff(4, 4, np.dtype('float32'), pixel_data=expected)
96+
path = str(tmp_path / "small.tif")
97+
with open(path, 'wb') as f:
98+
f.write(data)
99+
100+
# Should succeed with a generous limit
101+
arr, _ = read_to_array(path, max_pixels=1_000_000)
102+
np.testing.assert_array_equal(arr, expected)
103+
104+
# Should fail with a tiny limit
105+
with pytest.raises(ValueError, match="exceed the safety limit"):
106+
read_to_array(path, max_pixels=10)
107+
108+
def test_normal_read_unaffected(self, tmp_path):
109+
"""Normal reads within the default limit are not affected."""
110+
expected = np.arange(64, dtype=np.float32).reshape(8, 8)
111+
data = make_minimal_tiff(8, 8, np.dtype('float32'), pixel_data=expected)
112+
path = str(tmp_path / "normal.tif")
113+
with open(path, 'wb') as f:
114+
f.write(data)
115+
116+
arr, _ = read_to_array(path)
117+
np.testing.assert_array_equal(arr, expected)
118+
119+
120+
# ---------------------------------------------------------------------------
121+
# Cat 5: VRT path traversal
122+
# ---------------------------------------------------------------------------
123+
124+
class TestVRTPathTraversal:
125+
def test_relative_path_canonicalized(self, tmp_path):
126+
"""Relative paths in VRT SourceFilename are canonicalized."""
127+
from xrspatial.geotiff._vrt import parse_vrt
128+
129+
vrt_xml = '''<VRTDataset rasterXSize="4" rasterYSize="4">
130+
<VRTRasterBand dataType="Float32" band="1">
131+
<SimpleSource>
132+
<SourceFilename relativeToVRT="1">../../../etc/shadow</SourceFilename>
133+
<SourceBand>1</SourceBand>
134+
<SrcRect xOff="0" yOff="0" xSize="4" ySize="4"/>
135+
<DstRect xOff="0" yOff="0" xSize="4" ySize="4"/>
136+
</SimpleSource>
137+
</VRTRasterBand>
138+
</VRTDataset>'''
139+
140+
vrt_dir = str(tmp_path / "subdir")
141+
os.makedirs(vrt_dir)
142+
143+
vrt = parse_vrt(vrt_xml, vrt_dir)
144+
source_path = vrt.bands[0].sources[0].filename
145+
146+
# After canonicalization, the path should NOT contain ".."
147+
assert ".." not in source_path
148+
# It should be an absolute path
149+
assert os.path.isabs(source_path)
150+
# Verify it was resolved through realpath
151+
assert source_path == os.path.realpath(source_path)
152+
153+
def test_normal_relative_path_still_works(self, tmp_path):
154+
"""Normal relative paths without traversal still resolve correctly."""
155+
from xrspatial.geotiff._vrt import parse_vrt
156+
157+
vrt_xml = '''<VRTDataset rasterXSize="4" rasterYSize="4">
158+
<VRTRasterBand dataType="Float32" band="1">
159+
<SimpleSource>
160+
<SourceFilename relativeToVRT="1">data/tile.tif</SourceFilename>
161+
<SourceBand>1</SourceBand>
162+
<SrcRect xOff="0" yOff="0" xSize="4" ySize="4"/>
163+
<DstRect xOff="0" yOff="0" xSize="4" ySize="4"/>
164+
</SimpleSource>
165+
</VRTRasterBand>
166+
</VRTDataset>'''
167+
168+
vrt_dir = str(tmp_path)
169+
vrt = parse_vrt(vrt_xml, vrt_dir)
170+
source_path = vrt.bands[0].sources[0].filename
171+
172+
expected = os.path.realpath(os.path.join(vrt_dir, "data", "tile.tif"))
173+
assert source_path == expected
174+
175+
def test_absolute_path_also_canonicalized(self, tmp_path):
176+
"""Absolute paths in VRT are also canonicalized."""
177+
from xrspatial.geotiff._vrt import parse_vrt
178+
179+
vrt_xml = '''<VRTDataset rasterXSize="4" rasterYSize="4">
180+
<VRTRasterBand dataType="Float32" band="1">
181+
<SimpleSource>
182+
<SourceFilename relativeToVRT="0">/tmp/../tmp/test.tif</SourceFilename>
183+
<SourceBand>1</SourceBand>
184+
<SrcRect xOff="0" yOff="0" xSize="4" ySize="4"/>
185+
<DstRect xOff="0" yOff="0" xSize="4" ySize="4"/>
186+
</SimpleSource>
187+
</VRTRasterBand>
188+
</VRTDataset>'''
189+
190+
vrt = parse_vrt(vrt_xml, str(tmp_path))
191+
source_path = vrt.bands[0].sources[0].filename
192+
193+
assert ".." not in source_path
194+
assert source_path == os.path.realpath("/tmp/../tmp/test.tif")

0 commit comments

Comments
 (0)