From 1920f9a997e2c935cf6b696bbe5f23165185d2ea Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Sun, 5 Apr 2026 19:41:01 -0700
Subject: [PATCH 1/2] Fix OOM in visibility module for dask-backed rasters

_extract_transect was calling .compute() on the full dask array just to
read a handful of transect cells. Now uses vindex fancy indexing so only
the relevant chunks are materialized.

cumulative_viewshed was allocating a full-size np.zeros count array and
calling .values on each viewshed result, forcing materialization every
iteration. Now accumulates lazily with da.zeros and dask array addition
when the input is dask-backed.
---
 xrspatial/visibility.py | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/xrspatial/visibility.py b/xrspatial/visibility.py
index ab4de453..65139ba0 100644
--- a/xrspatial/visibility.py
+++ b/xrspatial/visibility.py
@@ -61,7 +61,9 @@ def _extract_transect(raster, cells):
     if has_dask_array():
         import dask.array as da
         if isinstance(data, da.Array):
-            data = data.compute()
+            # Only compute the needed cells, not the entire array
+            elevations = data.vindex[rows, cols].compute().astype(np.float64)
+            return elevations, x_coords, y_coords
     if has_cuda_and_cupy() and is_cupy_array(data):
         data = data.get()
 
@@ -217,7 +219,16 @@ def cumulative_viewshed(
     if not observers:
         raise ValueError("observers list must not be empty")
 
-    count = np.zeros(raster.shape, dtype=np.int32)
+    # Detect dask backend to keep accumulation lazy
+    _is_dask = False
+    if has_dask_array():
+        import dask.array as da
+        _is_dask = isinstance(raster.data, da.Array)
+
+    if _is_dask:
+        count = da.zeros(raster.shape, dtype=np.int32, chunks=raster.data.chunks)
+    else:
+        count = np.zeros(raster.shape, dtype=np.int32)
 
     for obs in observers:
         ox = obs['x']
@@ -229,11 +240,17 @@ def cumulative_viewshed(
         vs = viewshed(raster, x=ox, y=oy, observer_elev=oe,
                       target_elev=te, max_distance=md)
 
-        vs_np = vs.values
-        count += (vs_np != INVISIBLE).astype(np.int32)
-
-    return xarray.DataArray(count, coords=raster.coords,
-                            dims=raster.dims, attrs=raster.attrs)
+        vs_data = vs.data
+        if _is_dask and not isinstance(vs_data, da.Array):
+            vs_data = da.from_array(vs_data, chunks=raster.data.chunks)
+        count = count + (vs_data != INVISIBLE).astype(np.int32)
+
+    result = xarray.DataArray(count, coords=raster.coords,
+                              dims=raster.dims, attrs=raster.attrs)
+    if _is_dask:
+        chunk_dict = dict(zip(raster.dims, raster.data.chunks))
+        result = result.chunk(chunk_dict)
+    return result
 
 
 def visibility_frequency(

From 71ce918e46f813cdc5c8a4cb033c6261c633ac59 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Sun, 5 Apr 2026 19:43:11 -0700
Subject: [PATCH 2/2] Tighten viewshed Tier B memory estimate and avoid
 needless copy

The dask Tier B memory guard underestimated peak usage at 280 bytes/pixel.
Actual peak during lexsort reaches ~360 bytes/pixel (sorted + unsorted
event_list coexist) plus 8 bytes/pixel for the computed raster. Updated
estimate to 368 bytes/pixel to prevent borderline OOM.

Also use astype(copy=False) to skip the float64 copy when data is already
float64.
---
 xrspatial/viewshed.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/xrspatial/viewshed.py b/xrspatial/viewshed.py
index 96b095f6..0e282052 100644
--- a/xrspatial/viewshed.py
+++ b/xrspatial/viewshed.py
@@ -1560,7 +1560,7 @@ def _viewshed_cpu(
     num_events = 3 * (n_rows * n_cols - 1)
     event_list = np.zeros((num_events, 7), dtype=np.float64)
 
-    raster.data = raster.data.astype(np.float64)
+    raster.data = raster.data.astype(np.float64, copy=False)
 
     _init_event_list(event_list=event_list, raster=raster.data,
                      vp_row=viewpoint_row, vp_col=viewpoint_col,
@@ -2167,7 +2167,9 @@ def _viewshed_dask(raster, x, y, observer_elev, target_elev):
     cupy_backed = is_dask_cupy(raster)
 
     # --- Tier B: full grid fits in memory → compute and run exact algo ---
-    r2_bytes = 280 * height * width
+    # Peak memory: event_list sort needs 2x 168*H*W + raster 8*H*W +
+    # visibility_grid 8*H*W ≈ 360 bytes/pixel, plus the computed raster.
+    r2_bytes = 360 * height * width + 8 * height * width  # working + raster
     avail = _available_memory_bytes()
     if r2_bytes < 0.5 * avail:
         raster_mem = raster.copy()