diff --git a/docs/source/reference/dask_laziness.rst b/docs/source/reference/dask_laziness.rst new file mode 100644 index 00000000..40dbf2c6 --- /dev/null +++ b/docs/source/reference/dask_laziness.rst @@ -0,0 +1,250 @@ +.. _reference.dask_laziness: + +********************* +Dask backend behavior +********************* + +When you pass a dask-backed ``DataArray`` to an xarray-spatial function, the +result *should* also be dask-backed so your pipeline stays lazy until you call +``.compute()``. Most functions do this, but some algorithms need random access +to the full array and have to materialize intermediate results. + +This page lists every public function and its laziness level so you can plan +dask pipelines without reading source code. + +Laziness levels +=============== + +**Fully lazy** -- the function returns a dask array without triggering any +computation. Safe for arbitrarily large out-of-core datasets. + +**Partially lazy** -- the function computes small bounded statistics (scalars, +quartiles, a ~20K sample) during setup, then returns a dask array for the main +result. The statistics are cheap; the heavy work stays lazy. + +**Fully materialized** -- the algorithm needs the entire array in memory +(connected-component labeling, A* search, viewshed sweepline, etc.). The +result may be re-wrapped as dask, but the function calls ``.compute()`` +internally. Watch your memory on large inputs. + + +Terrain metrics +=============== + +.. list-table:: + :header-rows: 1 + :widths: 30 20 50 + + * - Function + - Laziness + - Notes + * - ``slope`` + - Fully lazy + - ``map_overlap``, planar and geodesic + * - ``aspect`` + - Fully lazy + - ``map_overlap``, planar and geodesic + * - ``curvature`` + - Fully lazy + - ``map_overlap`` + * - ``hillshade`` + - Fully lazy + - ``map_overlap`` + * - ``northness`` + - Fully lazy + - Uses ``da.cos`` / ``da.deg2rad`` on aspect output + * - ``eastness`` + - Fully lazy + - Uses ``da.sin`` / ``da.deg2rad`` on aspect output + + +Focal operations +================ + +.. list-table:: + :header-rows: 1 + :widths: 30 20 50 + + * - Function + - Laziness + - Notes + * - ``mean`` + - Fully lazy + - Iterative ``map_overlap`` + * - ``apply`` + - Fully lazy + - ``map_overlap`` with user kernel + * - ``focal_stats`` + - Fully lazy + - Multiple stats via ``map_overlap``, 3D output + * - ``hotspots`` + - Partially lazy + - Computes global mean and std, result is dask + + +Classification +============== + +.. list-table:: + :header-rows: 1 + :widths: 30 20 50 + + * - Function + - Laziness + - Notes + * - ``binary`` + - Fully lazy + - ``map_blocks`` + * - ``reclassify`` + - Fully lazy + - ``map_blocks`` + * - ``quantile`` + - Partially lazy + - Computes percentiles from ~20K sample + * - ``natural_breaks`` + - Partially lazy + - Computes Jenks breaks from ~20K sample + scalar max + * - ``equal_interval`` + - Partially lazy + - Computes scalar min/max + * - ``std_mean`` + - Partially lazy + - Computes scalar mean/std/max + * - ``head_tail_breaks`` + - Partially lazy + - Computes O(log N) scalar means + * - ``percentiles`` + - Partially lazy + - Computes percentiles from ~20K sample + * - ``maximum_breaks`` + - Partially lazy + - Computes breaks from ~20K sample + * - ``box_plot`` + - Partially lazy + - Computes scalar quartiles and max + + +Normalization +============= + +.. list-table:: + :header-rows: 1 + :widths: 30 20 50 + + * - Function + - Laziness + - Notes + * - ``rescale`` + - Fully lazy + - ``da.nanmin`` / ``da.nanmax`` (lazy reductions) + * - ``standardize`` + - Fully lazy + - ``da.nanmean`` / ``da.nanstd`` (lazy reductions) + + +Visibility +========== + +.. list-table:: + :header-rows: 1 + :widths: 30 20 50 + + * - Function + - Laziness + - Notes + * - ``viewshed`` + - Fully materialized + - Sweepline algorithm needs random access + * - ``line_of_sight`` + - Fully materialized + - Extracts 1D transect via ``.compute()`` + * - ``cumulative_viewshed`` + - Fully materialized + - Runs multiple viewshed calls + * - ``visibility_frequency`` + - Fully materialized + - Wraps ``cumulative_viewshed`` + + +Morphology +========== + +.. list-table:: + :header-rows: 1 + :widths: 30 20 50 + + * - Function + - Laziness + - Notes + * - ``sieve`` + - Fully materialized + - Connected-component labeling needs the full array; result re-wrapped as dask + + +Proximity +========= + +.. list-table:: + :header-rows: 1 + :widths: 30 20 50 + + * - Function + - Laziness + - Notes + * - ``proximity`` + - Fully materialized + - Distance computation needs full array + * - ``allocation`` + - Fully materialized + - Nearest-source allocation + * - ``direction`` + - Fully materialized + - Direction to nearest source + + +Zonal +===== + +.. list-table:: + :header-rows: 1 + :widths: 30 20 50 + + * - Function + - Laziness + - Notes + * - ``zonal_stats`` / ``stats`` + - Partially lazy + - Groupby aggregation via dask dataframe + * - ``zonal_crosstab`` / ``crosstab`` + - Partially lazy + - Groupby cross-tabulation + * - ``zonal_apply`` / ``apply`` + - Fully lazy + - ``map_blocks`` per zone + * - ``regions`` + - Fully materialized + - Connected-component labeling + * - ``trim`` + - Fully lazy + - Lazy slicing + * - ``crop`` + - Fully lazy + - Lazy slicing + + +Pathfinding +=========== + +.. list-table:: + :header-rows: 1 + :widths: 30 20 50 + + * - Function + - Laziness + - Notes + * - ``a_star_search`` + - Fully materialized + - A* needs random access and visited-set tracking + * - ``multi_stop_search`` + - Fully materialized + - Iterative A* diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst index 792cb0b7..3b3eef32 100644 --- a/docs/source/reference/index.rst +++ b/docs/source/reference/index.rst @@ -7,6 +7,7 @@ Reference .. toctree:: :maxdepth: 2 + dask_laziness classification dasymetric diffusion diff --git a/xrspatial/tests/test_dask_laziness.py b/xrspatial/tests/test_dask_laziness.py new file mode 100644 index 00000000..da4216f8 --- /dev/null +++ b/xrspatial/tests/test_dask_laziness.py @@ -0,0 +1,180 @@ +"""Verify that dask-backed DataArrays stay lazy through fully-lazy functions. + +Each test calls a function with a dask input and asserts the output is still a +dask collection (no accidental .compute()). We do NOT test correctness here -- +other test modules cover that. We only test that the task graph survives. +""" + +from __future__ import annotations + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.tests.general_checks import create_test_raster, dask_array_available + +try: + import dask + import dask.array as da +except ImportError: + da = None + + +pytestmark = dask_array_available + + +def _is_lazy(result): + """Return True if *result* is a dask-backed DataArray (not computed).""" + if isinstance(result, xr.DataArray): + return dask.is_dask_collection(result) + if isinstance(result, da.Array): + return True + return False + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def elev(): + """8x6 dask-backed elevation raster.""" + data = np.array([ + [870.5, 283.0, 845.3, 51.2, 990.8, 600.6], + [704.2, 242.2, 429.3, 779.9, 193.3, 984.7], + [226.6, 815.7, 290.6, 76.5, 820.9, 32.3], + [344.8, 256.3, 806.8, 602.0, 721.2, 497.0], + [185.4, 834.1, 387.1, 716.0, 49.6, 753.0], + [302.4, 151.5, 442.3, 358.5, 659.8, 447.1], + [148.0, 819.2, 469.0, 977.1, 597.7, 999.1], + [268.2, 626.0, 840.3, 448.3, 859.3, 528.0], + ], dtype=np.float32) + return create_test_raster(data, backend='dask+numpy', + attrs={'res': (1, 1)}, chunks=(4, 3)) + + +@pytest.fixture +def classify_elev(): + """Larger raster for classification functions that need more data.""" + rng = np.random.default_rng(12345) + data = rng.random((20, 20), dtype=np.float32) * 1000 + return create_test_raster(data, backend='dask+numpy', + attrs={'res': (1, 1)}, chunks=(10, 10)) + + +# --------------------------------------------------------------------------- +# Terrain metrics +# --------------------------------------------------------------------------- + +class TestTerrainLazy: + def test_slope(self, elev): + from xrspatial import slope + assert _is_lazy(slope(elev)) + + def test_aspect(self, elev): + from xrspatial import aspect + assert _is_lazy(aspect(elev)) + + def test_curvature(self, elev): + from xrspatial import curvature + assert _is_lazy(curvature(elev)) + + def test_hillshade(self, elev): + from xrspatial import hillshade + assert _is_lazy(hillshade(elev)) + + def test_northness(self, elev): + from xrspatial import northness + assert _is_lazy(northness(elev)) + + def test_eastness(self, elev): + from xrspatial import eastness + assert _is_lazy(eastness(elev)) + + +# --------------------------------------------------------------------------- +# Focal +# --------------------------------------------------------------------------- + +class TestFocalLazy: + def test_mean(self, elev): + from xrspatial import mean + assert _is_lazy(mean(elev)) + + def test_apply(self, elev): + from xrspatial.focal import apply as focal_apply + kernel = np.ones((3, 3), dtype=np.float32) / 9 + assert _is_lazy(focal_apply(elev, kernel)) + + def test_focal_stats(self, elev): + from xrspatial.focal import focal_stats + kernel = np.ones((3, 3), dtype=np.float32) / 9 + result = focal_stats(elev, kernel) + assert _is_lazy(result) + + def test_hotspots(self, elev): + from xrspatial.focal import hotspots + kernel = np.ones((3, 3), dtype=np.float32) / 9 + assert _is_lazy(hotspots(elev, kernel)) + + +# --------------------------------------------------------------------------- +# Classification (partially lazy -- returns dask after computing small stats) +# --------------------------------------------------------------------------- + +class TestClassifyLazy: + def test_binary(self, classify_elev): + from xrspatial import binary + assert _is_lazy(binary(classify_elev, values=[0, 500])) + + def test_reclassify(self, classify_elev): + from xrspatial import reclassify + assert _is_lazy(reclassify(classify_elev, + bins=[250, 500, 750, 1000], + new_values=[1, 2, 3, 4])) + + def test_quantile(self, classify_elev): + from xrspatial import quantile + assert _is_lazy(quantile(classify_elev, k=4)) + + def test_natural_breaks(self, classify_elev): + from xrspatial import natural_breaks + assert _is_lazy(natural_breaks(classify_elev, k=4)) + + def test_equal_interval(self, classify_elev): + from xrspatial import equal_interval + assert _is_lazy(equal_interval(classify_elev, k=4)) + + def test_std_mean(self, classify_elev): + from xrspatial import std_mean + assert _is_lazy(std_mean(classify_elev)) + + def test_head_tail_breaks(self, classify_elev): + from xrspatial import head_tail_breaks + assert _is_lazy(head_tail_breaks(classify_elev)) + + def test_percentiles(self, classify_elev): + from xrspatial import percentiles + assert _is_lazy(percentiles(classify_elev)) + + def test_maximum_breaks(self, classify_elev): + from xrspatial import maximum_breaks + assert _is_lazy(maximum_breaks(classify_elev, k=4)) + + def test_box_plot(self, classify_elev): + from xrspatial import box_plot + assert _is_lazy(box_plot(classify_elev)) + + +# --------------------------------------------------------------------------- +# Normalization +# --------------------------------------------------------------------------- + +class TestNormalizeLazy: + def test_rescale(self, elev): + from xrspatial import rescale + assert _is_lazy(rescale(elev)) + + def test_standardize(self, elev): + from xrspatial import standardize + assert _is_lazy(standardize(elev))