diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 1f856a82..978691cb 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -37,7 +37,7 @@ jobs: run: | asv continuous origin/master HEAD \ --factor $ASV_FACTOR \ - --bench "^(Slope|Proximity|Zonal|CostDistance|Focal)" \ + --bench "^(Slope|Proximity|Zonal|CostDistance|Focal|Rescale|Standardize|Diffusion|Dasymetric)" \ | tee bench_output.txt - name: Run benchmarks (push to master) @@ -45,7 +45,7 @@ jobs: working-directory: benchmarks run: | asv run HEAD^! \ - --bench "^(Slope|Proximity|Zonal|CostDistance|Focal)" \ + --bench "^(Slope|Proximity|Zonal|CostDistance|Focal|Rescale|Standardize|Diffusion|Dasymetric)" \ | tee bench_output.txt - name: Check for regressions diff --git a/benchmarks/benchmarks/balanced_allocation.py b/benchmarks/benchmarks/balanced_allocation.py new file mode 100644 index 00000000..74e1d608 --- /dev/null +++ b/benchmarks/benchmarks/balanced_allocation.py @@ -0,0 +1,46 @@ +import numpy as np +import xarray as xr + +from xrspatial.balanced_allocation import balanced_allocation + +from .common import get_xr_dataarray + + +class BalancedAllocation: + # Memory-intensive: holds N_sources cost surfaces simultaneously. + # Keep grids small to avoid OOM during benchmarking. + params = ([100, 300], ["numpy", "dask"]) + param_names = ("nx", "type") + + def setup(self, nx, type): + ny = nx // 2 + # Friction surface: positive values everywhere. + friction = get_xr_dataarray((ny, nx), type) + if type == "dask": + import dask.array as da + friction.data = da.fabs(friction.data) + 0.1 + else: + friction.data = np.abs(friction.data) + 0.1 + self.friction = friction + + # Source raster: place 4 source points in corners. + sources = np.zeros((ny, nx), dtype=np.float32) + margin = max(1, nx // 10) + sources[margin, margin] = 1 + sources[margin, nx - margin - 1] = 2 + sources[ny - margin - 1, margin] = 3 + sources[ny - margin - 1, nx - margin - 1] = 4 + + x = np.linspace(-180, 180, nx) + y = np.linspace(-90, 90, ny) + + if type == "dask": + import dask.array as da + data = da.from_array(sources, chunks=(max(1, ny // 2), max(1, nx // 2))) + else: + data = sources + + self.raster = xr.DataArray(data, coords=dict(y=y, x=x), dims=["y", "x"]) + + def time_balanced_allocation(self, nx, type): + balanced_allocation(self.raster, self.friction, max_iterations=10) diff --git a/benchmarks/benchmarks/dasymetric.py b/benchmarks/benchmarks/dasymetric.py new file mode 100644 index 00000000..453bee16 --- /dev/null +++ b/benchmarks/benchmarks/dasymetric.py @@ -0,0 +1,60 @@ +import numpy as np +import xarray as xr + +from xrspatial.dasymetric import disaggregate + +from .common import get_xr_dataarray + + +class Dasymetric: + params = ([100, 300, 1000], ["numpy", "cupy", "dask"]) + param_names = ("nx", "type") + + def setup(self, nx, type): + ny = nx // 2 + + # Zones: 4 rectangular blocks. + zones_np = np.zeros((ny, nx), dtype=np.int32) + zones_np[: ny // 2, : nx // 2] = 1 + zones_np[: ny // 2, nx // 2 :] = 2 + zones_np[ny // 2 :, : nx // 2] = 3 + zones_np[ny // 2 :, nx // 2 :] = 4 + + x = np.linspace(-180, 180, nx) + y = np.linspace(-90, 90, ny) + + if type == "dask": + import dask.array as da + zdata = da.from_array(zones_np, chunks=(max(1, ny // 2), max(1, nx // 2))) + elif type == "cupy": + from xrspatial.utils import has_cuda_and_cupy + if not has_cuda_and_cupy(): + raise NotImplementedError() + import cupy + zdata = cupy.asarray(zones_np) + else: + zdata = zones_np + + self.zones = xr.DataArray(zdata, coords=dict(y=y, x=x), dims=["y", "x"]) + + # Values: one total per zone. + self.values = {1: 1000.0, 2: 2000.0, 3: 1500.0, 4: 2500.0} + + # Weight surface: use the standard Gaussian bump. + weight = get_xr_dataarray((ny, nx), type) + # Make weights non-negative. + if type == "dask": + import dask.array as da + weight.data = da.fabs(weight.data) + 0.01 + elif type == "cupy": + import cupy + weight.data = cupy.abs(weight.data) + 0.01 + else: + weight.data = np.abs(weight.data) + 0.01 + self.weight = weight + + def time_disaggregate_weighted(self, nx, type): + disaggregate(self.zones, self.values, self.weight, method="weighted") + + def time_disaggregate_binary(self, nx, type): + disaggregate(self.zones, self.values, self.weight, method="binary") diff --git a/benchmarks/benchmarks/diffusion.py b/benchmarks/benchmarks/diffusion.py new file mode 100644 index 00000000..55a6f181 --- /dev/null +++ b/benchmarks/benchmarks/diffusion.py @@ -0,0 +1,17 @@ +from xrspatial.diffusion import diffuse + +from .common import Benchmarking + + +class Diffusion(Benchmarking): + params = ([100, 300, 1000], ["numpy", "cupy", "dask"]) + param_names = ("nx", "type") + + def __init__(self): + super().__init__(func=None) + + def time_diffuse_1step(self, nx, type): + diffuse(self.xr, diffusivity=1.0, steps=1) + + def time_diffuse_10steps(self, nx, type): + diffuse(self.xr, diffusivity=1.0, steps=10) diff --git a/benchmarks/benchmarks/erosion.py b/benchmarks/benchmarks/erosion.py new file mode 100644 index 00000000..9787fffa --- /dev/null +++ b/benchmarks/benchmarks/erosion.py @@ -0,0 +1,21 @@ +from xrspatial.erosion import erode + +from .common import get_xr_dataarray + + +class Erosion: + # Erosion is a global operation that materialises dask arrays. + # Keep grid sizes small and iteration counts low so benchmarks + # finish in reasonable time. + params = ([100, 300], ["numpy", "cupy", "dask"]) + param_names = ("nx", "type") + + def setup(self, nx, type): + ny = nx // 2 + self.xr = get_xr_dataarray((ny, nx), type) + + def time_erode_500(self, nx, type): + erode(self.xr, iterations=500, seed=42) + + def time_erode_5000(self, nx, type): + erode(self.xr, iterations=5000, seed=42) diff --git a/benchmarks/benchmarks/normalize.py b/benchmarks/benchmarks/normalize.py new file mode 100644 index 00000000..9224d623 --- /dev/null +++ b/benchmarks/benchmarks/normalize.py @@ -0,0 +1,19 @@ +from xrspatial.normalize import rescale, standardize + +from .common import Benchmarking + + +class Rescale(Benchmarking): + def __init__(self): + super().__init__(func=rescale) + + def time_rescale(self, nx, type): + return self.time(nx, type) + + +class Standardize(Benchmarking): + def __init__(self): + super().__init__(func=standardize) + + def time_standardize(self, nx, type): + return self.time(nx, type) diff --git a/benchmarks/benchmarks/reproject.py b/benchmarks/benchmarks/reproject.py new file mode 100644 index 00000000..0ee5f1e0 --- /dev/null +++ b/benchmarks/benchmarks/reproject.py @@ -0,0 +1,27 @@ +from .common import get_xr_dataarray + +try: + from xrspatial.reproject import reproject as _reproject + _HAS_PYPROJ = True +except ImportError: + _HAS_PYPROJ = False + + +class Reproject: + params = ([100, 300, 1000], ["numpy", "dask"]) + param_names = ("nx", "type") + + def setup(self, nx, type): + if not _HAS_PYPROJ: + raise NotImplementedError("pyproj required") + + ny = nx // 2 + self.xr = get_xr_dataarray((ny, nx), type) + # Tag with WGS84 so reproject knows the source CRS. + self.xr.attrs["crs"] = "EPSG:4326" + + def time_reproject_to_mercator(self, nx, type): + _reproject(self.xr, "EPSG:3857") + + def time_reproject_to_utm(self, nx, type): + _reproject(self.xr, "EPSG:32610")