From 1920f9a997e2c935cf6b696bbe5f23165185d2ea Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 5 Apr 2026 19:41:01 -0700 Subject: [PATCH 1/2] Fix OOM in visibility module for dask-backed rasters _extract_transect was calling .compute() on the full dask array just to read a handful of transect cells. Now uses vindex fancy indexing so only the relevant chunks are materialized. cumulative_viewshed was allocating a full-size np.zeros count array and calling .values on each viewshed result, forcing materialization every iteration. Now accumulates lazily with da.zeros and dask array addition when the input is dask-backed. --- xrspatial/visibility.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/xrspatial/visibility.py b/xrspatial/visibility.py index ab4de453..65139ba0 100644 --- a/xrspatial/visibility.py +++ b/xrspatial/visibility.py @@ -61,7 +61,9 @@ def _extract_transect(raster, cells): if has_dask_array(): import dask.array as da if isinstance(data, da.Array): - data = data.compute() + # Only compute the needed cells, not the entire array + elevations = data.vindex[rows, cols].compute().astype(np.float64) + return elevations, x_coords, y_coords if has_cuda_and_cupy() and is_cupy_array(data): data = data.get() @@ -217,7 +219,16 @@ def cumulative_viewshed( if not observers: raise ValueError("observers list must not be empty") - count = np.zeros(raster.shape, dtype=np.int32) + # Detect dask backend to keep accumulation lazy + _is_dask = False + if has_dask_array(): + import dask.array as da + _is_dask = isinstance(raster.data, da.Array) + + if _is_dask: + count = da.zeros(raster.shape, dtype=np.int32, chunks=raster.data.chunks) + else: + count = np.zeros(raster.shape, dtype=np.int32) for obs in observers: ox = obs['x'] @@ -229,11 +240,17 @@ def cumulative_viewshed( vs = viewshed(raster, x=ox, y=oy, observer_elev=oe, target_elev=te, max_distance=md) - vs_np = vs.values - count += (vs_np != INVISIBLE).astype(np.int32) - - return xarray.DataArray(count, coords=raster.coords, - dims=raster.dims, attrs=raster.attrs) + vs_data = vs.data + if _is_dask and not isinstance(vs_data, da.Array): + vs_data = da.from_array(vs_data, chunks=raster.data.chunks) + count = count + (vs_data != INVISIBLE).astype(np.int32) + + result = xarray.DataArray(count, coords=raster.coords, + dims=raster.dims, attrs=raster.attrs) + if _is_dask: + chunk_dict = dict(zip(raster.dims, raster.data.chunks)) + result = result.chunk(chunk_dict) + return result def visibility_frequency( From 71ce918e46f813cdc5c8a4cb033c6261c633ac59 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sun, 5 Apr 2026 19:43:11 -0700 Subject: [PATCH 2/2] Tighten viewshed Tier B memory estimate and avoid needless copy The dask Tier B memory guard underestimated peak usage at 280 bytes/pixel. Actual peak during lexsort reaches ~360 bytes/pixel (sorted + unsorted event_list coexist) plus 8 bytes/pixel for the computed raster. Updated estimate to 368 bytes/pixel to prevent borderline OOM. Also use astype(copy=False) to skip the float64 copy when data is already float64. --- xrspatial/viewshed.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xrspatial/viewshed.py b/xrspatial/viewshed.py index 96b095f6..0e282052 100644 --- a/xrspatial/viewshed.py +++ b/xrspatial/viewshed.py @@ -1560,7 +1560,7 @@ def _viewshed_cpu( num_events = 3 * (n_rows * n_cols - 1) event_list = np.zeros((num_events, 7), dtype=np.float64) - raster.data = raster.data.astype(np.float64) + raster.data = raster.data.astype(np.float64, copy=False) _init_event_list(event_list=event_list, raster=raster.data, vp_row=viewpoint_row, vp_col=viewpoint_col, @@ -2167,7 +2167,9 @@ def _viewshed_dask(raster, x, y, observer_elev, target_elev): cupy_backed = is_dask_cupy(raster) # --- Tier B: full grid fits in memory → compute and run exact algo --- - r2_bytes = 280 * height * width + # Peak memory: event_list sort needs 2x 168*H*W + raster 8*H*W + + # visibility_grid 8*H*W ≈ 360 bytes/pixel, plus the computed raster. + r2_bytes = 360 * height * width + 8 * height * width # working + raster avail = _available_memory_bytes() if r2_bytes < 0.5 * avail: raster_mem = raster.copy()