Skip to content

Commit 5ef4237

Browse files
authored
Fix crop=True dropping boundary pixels when all_touched=True (#1197) (#1200)
* Fix unbounded allocation DoS and VRT path traversal in geotiff Two security fixes for the geotiff subpackage: 1. Add a configurable max_pixels guard to read_to_array() and all internal read functions (_read_strips, _read_tiles, _read_cog_http). A crafted TIFF with fabricated header dimensions could previously trigger multi-TB allocations. The default limit is 1 billion pixels (~4 GB for float32 single-band), overridable via max_pixels kwarg. Fixes #1184. 2. Canonicalize VRT source filenames with os.path.realpath() after resolving relative paths. Previously, a VRT file with "../" in SourceFilename could read arbitrary files outside the VRT directory. Fixes #1185. * Fix VRT parser test failure on Windows os.path.realpath() converts Unix-style paths to Windows paths on Windows (e.g. /data/tile.tif becomes D:\data\tile.tif). Use os.path.realpath() in the assertion so it matches the production code's canonicalization on all platforms. * Fix crop=True dropping boundary pixels when all_touched=True (#1197) _crop_to_bbox compared pixel center coordinates against the geometry bounding box without accounting for pixel cell extent. When all_touched=True, pixels whose centers fell just outside the bbox were excluded even though their cells overlapped the polygon. Now _crop_to_bbox receives the all_touched flag and expands the bbox comparison by half a pixel on each side when set, so rasterize gets to see every pixel whose cell intersects the geometry. Also removed dead ascending/descending branches that computed the same mask regardless of coordinate order.
1 parent 32f28da commit 5ef4237

2 files changed

Lines changed: 76 additions & 14 deletions

File tree

xrspatial/polygon_clip.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,14 @@ def _resolve_geometry(geometry):
7979
)
8080

8181

82-
def _crop_to_bbox(raster, geom_pairs):
82+
def _crop_to_bbox(raster, geom_pairs, all_touched=False):
8383
"""Slice the raster to the bounding box of the geometry.
8484
8585
Returns the sliced DataArray and the geometry pairs (unchanged).
86+
87+
When *all_touched* is True the bounding-box comparison is expanded by
88+
half a pixel on every side so that pixels whose cells overlap the
89+
geometry boundary are not prematurely excluded.
8690
"""
8791
from shapely.ops import unary_union
8892
merged = unary_union([g for g, _ in geom_pairs])
@@ -91,19 +95,24 @@ def _crop_to_bbox(raster, geom_pairs):
9195
y_coords = raster.coords[raster.dims[-2]].values
9296
x_coords = raster.coords[raster.dims[-1]].values
9397

94-
# Determine coordinate ordering (ascending or descending)
95-
y_ascending = y_coords[-1] >= y_coords[0]
96-
x_ascending = x_coords[-1] >= x_coords[0]
97-
98-
if y_ascending:
99-
y_mask = (y_coords >= miny) & (y_coords <= maxy)
100-
else:
101-
y_mask = (y_coords >= miny) & (y_coords <= maxy)
98+
# When all_touched is set, expand the bbox by half a pixel so that
99+
# pixels whose cells overlap the geometry survive the crop.
100+
if all_touched:
101+
if len(x_coords) > 1:
102+
half_px_x = abs(float(x_coords[1] - x_coords[0])) / 2.0
103+
else:
104+
half_px_x = 0.0
105+
if len(y_coords) > 1:
106+
half_py_y = abs(float(y_coords[1] - y_coords[0])) / 2.0
107+
else:
108+
half_py_y = 0.0
109+
minx -= half_px_x
110+
maxx += half_px_x
111+
miny -= half_py_y
112+
maxy += half_py_y
102113

103-
if x_ascending:
104-
x_mask = (x_coords >= minx) & (x_coords <= maxx)
105-
else:
106-
x_mask = (x_coords >= minx) & (x_coords <= maxx)
114+
y_mask = (y_coords >= miny) & (y_coords <= maxy)
115+
x_mask = (x_coords >= minx) & (x_coords <= maxx)
107116

108117
y_idx = np.where(y_mask)[0]
109118
x_idx = np.where(x_mask)[0]
@@ -186,7 +195,7 @@ def clip_polygon(
186195

187196
# Optionally crop to bounding box first (reduces rasterize cost)
188197
if crop:
189-
raster = _crop_to_bbox(raster, geom_pairs)
198+
raster = _crop_to_bbox(raster, geom_pairs, all_touched=all_touched)
190199

191200
# Build a binary mask via rasterize, aligned to the (possibly cropped)
192201
# raster grid. Always produce a plain numpy mask first, then convert

xrspatial/tests/test_polygon_clip.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,42 @@ def test_all_touched(self):
152152
n_touched = np.count_nonzero(np.isfinite(result_touched.values))
153153
assert n_touched >= n_default
154154

155+
def test_all_touched_crop_matches_nocrop(self):
156+
"""crop=True with all_touched=True must not drop boundary pixels.
157+
158+
Regression test for #1197: _crop_to_bbox was comparing pixel
159+
centers against the geometry bounding box without accounting for
160+
pixel cell extent, so pixels whose centers fell just outside the
161+
bbox were excluded even though their cells overlapped the polygon.
162+
"""
163+
raster = _make_raster()
164+
# Polygon whose edges land between pixel centers on all four
165+
# sides. Pixel spacing is 0.5, so the left edge at x=0.15
166+
# sits inside the cell of pixel x=0.0 (cell [-0.25, 0.25]).
167+
poly = Polygon([(0.15, 0.15), (0.15, 3.35), (2.35, 3.35),
168+
(2.35, 0.15)])
169+
170+
result_crop = clip_polygon(raster, poly, crop=True,
171+
all_touched=True)
172+
result_nocrop = clip_polygon(raster, poly, crop=False,
173+
all_touched=True)
174+
175+
# The crop path must keep every pixel the nocrop path keeps.
176+
n_crop = np.count_nonzero(np.isfinite(result_crop.values))
177+
n_nocrop = np.count_nonzero(np.isfinite(result_nocrop.values))
178+
assert n_crop == n_nocrop, (
179+
f"crop=True lost {n_nocrop - n_crop} boundary pixels"
180+
)
181+
182+
# Pixel values must match wherever both arrays have data.
183+
# Align the cropped result back into the full grid for comparison.
184+
aligned = result_nocrop.copy()
185+
crop_y = result_crop.coords['y'].values
186+
crop_x = result_crop.coords['x'].values
187+
aligned_slice = aligned.sel(y=crop_y, x=crop_x)
188+
np.testing.assert_array_equal(result_crop.values,
189+
aligned_slice.values)
190+
155191
def test_single_cell_raster(self):
156192
"""1x1 raster with polygon that covers it."""
157193
data = np.array([[42.0]])
@@ -207,6 +243,23 @@ def test_crop_matches_numpy(self):
207243
dk_result.values, np_result.values, equal_nan=True
208244
)
209245

246+
def test_all_touched_crop_matches_nocrop(self):
247+
"""Dask: crop=True with all_touched=True keeps boundary pixels (#1197)."""
248+
dk_raster = _make_raster(backend='dask+numpy', chunks=(4, 3))
249+
poly = Polygon([(0.15, 0.15), (0.15, 3.35), (2.35, 3.35),
250+
(2.35, 0.15)])
251+
252+
result_crop = clip_polygon(dk_raster, poly, crop=True,
253+
all_touched=True)
254+
result_nocrop = clip_polygon(dk_raster, poly, crop=False,
255+
all_touched=True)
256+
257+
n_crop = np.count_nonzero(np.isfinite(result_crop.values))
258+
n_nocrop = np.count_nonzero(np.isfinite(result_nocrop.values))
259+
assert n_crop == n_nocrop, (
260+
f"crop=True lost {n_nocrop - n_crop} boundary pixels"
261+
)
262+
210263

211264
# ---------------------------------------------------------------------------
212265
# CuPy backend

0 commit comments

Comments
 (0)