diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py index 347a1666..c34be4c1 100644 --- a/eis_toolkit/cli.py +++ b/eis_toolkit/cli.py @@ -1340,8 +1340,6 @@ def distance_to_anomaly_cli( Uses only the first band of the raster. """ - # from sys import platform - from eis_toolkit.raster_processing.distance_to_anomaly import distance_to_anomaly if second_threshold_criteria_value is not None: @@ -1350,29 +1348,27 @@ def distance_to_anomaly_cli( threshold_criteria_value = first_threshold_criteria_value with ProgressLog.reading_input_files(): - raster = rasterio.open(input_raster) - # # Use optimized version if Windows - # if platform == "win32": - # out_image, out_meta = distance_to_anomaly_gdal( - # anomaly_raster_profile=raster.profile, - # anomaly_raster_data=raster.read(1), - # threshold_criteria_value=threshold_criteria_value, - # threshold_criteria=get_enum_values(threshold_criteria), - # max_distance=max_distance, - # ) - # else: - with ProgressLog.running_algorithm(): - out_image, out_meta = distance_to_anomaly( - anomaly_raster_profile=raster.profile, - anomaly_raster_data=raster.read(1), + with rasterio.open(input_raster) as raster: + raster_array = raster.read(1) + profile = raster.profile.copy() + + # Create nodata mask + mask = (raster_array == profile["nodata"]) | np.isnan(raster_array) + + with ProgressLog.running_algorithm(): + out_image, out_profile = distance_to_anomaly( + anomaly_raster_profile=profile, + anomaly_raster_data=raster_array, threshold_criteria_value=threshold_criteria_value, threshold_criteria=get_enum_values(threshold_criteria), max_distance=max_distance, ) - raster.close() + + # Apply nodata mask after processing + out_image[mask] = out_profile["nodata"] with ProgressLog.saving_output_files(output_raster): - with rasterio.open(output_raster, "w", **out_meta) as dest: + with rasterio.open(output_raster, "w", **out_profile) as dest: dest.write(out_image, 1) ProgressLog.finish() @@ -1395,8 +1391,6 @@ def proximity_to_anomaly_cli( Uses only the first band of the raster. """ - # from sys import platform - from eis_toolkit.raster_processing.proximity_to_anomaly import proximity_to_anomaly if second_threshold_criteria_value is not None: @@ -1405,31 +1399,28 @@ def proximity_to_anomaly_cli( threshold_criteria_value = first_threshold_criteria_value with ProgressLog.reading_input_files(): - raster = rasterio.open(input_raster) - # Use optimized version if Windows - # if platform == "win32": - # out_image, out_meta = proximity_to_anomaly_gdal( - # anomaly_raster_profile=raster.profile, - # anomaly_raster_data=raster.read(1), - # threshold_criteria_value=threshold_criteria_value, - # threshold_criteria=get_enum_values(threshold_criteria), - # max_distance=max_distance, - # scaling_range=(anomaly_value, max_distance_value), - # ) - # else: - with ProgressLog.running_algorithm(): - out_image, out_meta = proximity_to_anomaly( - anomaly_raster_profile=raster.profile, - anomaly_raster_data=raster.read(1), + with rasterio.open(input_raster) as raster: + raster_array = raster.read(1) + profile = raster.profile.copy() + + # Create nodata mask + mask = (raster_array == profile["nodata"]) | np.isnan(raster_array) + + with ProgressLog.running_algorithm(): + out_image, out_profile = proximity_to_anomaly( + anomaly_raster_profile=profile, + anomaly_raster_data=raster_array, threshold_criteria_value=threshold_criteria_value, threshold_criteria=get_enum_values(threshold_criteria), max_distance=max_distance, scaling_range=(anomaly_value, max_distance_value), ) - raster.close() + + # Apply nodata mask + out_image[mask] = out_profile["nodata"] with ProgressLog.saving_output_files(output_raster): - with rasterio.open(output_raster, "w", **out_meta) as dest: + with rasterio.open(output_raster, "w", **out_profile) as dest: dest.write(out_image, 1) ProgressLog.finish() @@ -2225,16 +2216,24 @@ def distance_computation_cli( profile["crs"] = geodataframe.crs profile["driver"] = "GTiff" profile["dtype"] = "float32" + mask = None else: with rasterio.open(base_raster) as raster: profile = raster.profile.copy() + raster_array = raster.read(1) + mask = (raster_array == profile["nodata"]) | np.isnan(raster_array) with ProgressLog.running_algorithm(): - out_image = distance_computation(geodataframe=geodataframe, raster_profile=profile, max_distance=max_distance) - profile["count"] = 1 + out_image, out_profile = distance_computation( + geodataframe=geodataframe, raster_profile=profile, max_distance=max_distance + ) + + # Apply nodata mask + if mask is not None: + out_image[mask] = out_profile["nodata"] with ProgressLog.saving_output_files(output_raster): - with rasterio.open(output_raster, "w", **profile) as dst: + with rasterio.open(output_raster, "w", **out_profile) as dst: dst.write(out_image, 1) ProgressLog.finish() @@ -2307,21 +2306,27 @@ def proximity_computation_cli( profile["crs"] = geodataframe.crs profile["driver"] = "GTiff" profile["dtype"] = "float32" + mask = None else: with rasterio.open(base_raster) as raster: profile = raster.profile.copy() + raster_array = raster.read(1) + mask = (raster_array == profile["nodata"]) | np.isnan(raster_array) with ProgressLog.running_algorithm(): - out_image = proximity_computation( + out_image, out_profile = proximity_computation( geodataframe=geodataframe, raster_profile=profile, maximum_distance=max_distance, scale_range=(geometries_value, max_distance_value), ) - profile["count"] = 1 + + # Apply nodata mask + if mask is not None: + out_image[mask] = out_profile["nodata"] with ProgressLog.saving_output_files(output_raster): - with rasterio.open(output_raster, "w", **profile) as dst: + with rasterio.open(output_raster, "w", **out_profile) as dst: dst.write(out_image, 1) ProgressLog.finish() diff --git a/eis_toolkit/raster_processing/distance_to_anomaly.py b/eis_toolkit/raster_processing/distance_to_anomaly.py index c082d594..0236b340 100644 --- a/eis_toolkit/raster_processing/distance_to_anomaly.py +++ b/eis_toolkit/raster_processing/distance_to_anomaly.py @@ -1,38 +1,20 @@ from itertools import chain from numbers import Number +from os import path +from pathlib import Path +from tempfile import TemporaryDirectory import geopandas as gpd import numpy as np +import rasterio from beartype import beartype from beartype.typing import Literal, Optional, Tuple, Union -from osgeo import gdal from rasterio import profiles from eis_toolkit.exceptions import EmptyDataException, InvalidParameterValueException, NumericValueSignException from eis_toolkit.utilities.checks.raster import check_raster_profile -from eis_toolkit.utilities.miscellaneous import row_points, toggle_gdal_exceptions -from eis_toolkit.vector_processing.distance_computation import distance_computation - -# Enabling gdal exceptions globally -toggle_gdal_exceptions() - - -def _check_threshold_criteria_and_value(threshold_criteria, threshold_criteria_value): - if threshold_criteria in {"lower", "higher"} and not isinstance(threshold_criteria_value, Number): - raise InvalidParameterValueException( - f"Expected threshold_criteria_value {threshold_criteria_value} " - "to be a single number rather than a tuple." - ) - if threshold_criteria in {"in_between", "outside"}: - if not isinstance(threshold_criteria_value, tuple): - raise InvalidParameterValueException( - f"Expected threshold_criteria_value ({threshold_criteria_value}) to be a tuple rather than a number." - ) - if threshold_criteria_value[0] >= threshold_criteria_value[1]: - raise InvalidParameterValueException( - f"Expected first value in threshold_criteria_value ({threshold_criteria_value})" - "tuple to be lower than the second." - ) +from eis_toolkit.utilities.miscellaneous import row_points +from eis_toolkit.vector_processing._distance_computation_numba import distance_computation_numba @beartype @@ -45,6 +27,8 @@ def distance_to_anomaly( ) -> Tuple[np.ndarray, Union[profiles.Profile, dict]]: """Calculate distance from raster cell to nearest anomaly. + If `osgeo_utils.gdal_proximity` is not available, will print a warning and fallback to a slower implementation. + The criteria for what is anomalous can be defined as a single number and criteria text of "higher" or "lower". Alternatively, the definition can be a range where values inside (criteria text of "within") or outside are @@ -66,59 +50,17 @@ def distance_to_anomaly( Returns: A 2D numpy array with the distances to anomalies computed and the original anomaly raster profile. - """ - check_raster_profile(raster_profile=anomaly_raster_profile) - _check_threshold_criteria_and_value( - threshold_criteria=threshold_criteria, threshold_criteria_value=threshold_criteria_value - ) - - out_image = _distance_to_anomaly( - anomaly_raster_profile=anomaly_raster_profile, - anomaly_raster_data=anomaly_raster_data, - threshold_criteria=threshold_criteria, - threshold_criteria_value=threshold_criteria_value, - max_distance=max_distance, - ) - return out_image, anomaly_raster_profile - + import typer -@beartype -def distance_to_anomaly_gdal( - anomaly_raster_profile: Union[profiles.Profile, dict], - anomaly_raster_data: np.ndarray, - threshold_criteria_value: Union[Tuple[Number, Number], Number], - threshold_criteria: Literal["lower", "higher", "in_between", "outside"], - max_distance: Optional[Number] = None, -) -> Tuple[np.ndarray, profiles.Profile]: - """Calculate distance from raster cell to nearest anomaly. - - This tool is much faster than `distance_to_anomaly` but only available on - Windows. - - The criteria for what is anomalous can be defined as a single number and - criteria text of "higher" or "lower". Alternatively, the definition can be - a range where values inside (criteria text of "within") or outside are - marked as anomalous (criteria text of "outside"). If anomaly_raster_profile does - contain "nodata" key, np.nan is assumed to correspond to nodata values. - - Args: - anomaly_raster_profile: The raster profile in which the distances - to the nearest anomalous value are determined. - anomaly_raster_data: The raster data in which the distances - to the nearest anomalous value are determined. - threshold_criteria_value: Value(s) used to define anomalous. - If the threshold criteria requires a tuple of values, - the first value should be the minimum and the second - the maximum value. - threshold_criteria: Method to define anomalous. - max_distance: The maximum distance in the output array. + try: + from osgeo_utils import gdal_proximity # noqa: F401 - Returns: - A 2D numpy array with the distances to anomalies computed - and the original anomaly raster profile. + func = _distance_to_anomaly_gdal + except ModuleNotFoundError: + func = _distance_to_anomaly_numba + typer.echo("WARNING: Falling back to a slower implementation as 'gdal_proximity' was not available!") - """ check_raster_profile(raster_profile=anomaly_raster_profile) _check_threshold_criteria_and_value( threshold_criteria=threshold_criteria, threshold_criteria_value=threshold_criteria_value @@ -126,15 +68,45 @@ def distance_to_anomaly_gdal( if max_distance is not None and max_distance <= 0: raise NumericValueSignException("Expected max distance to be a positive number.") - out_array, out_meta = _distance_to_anomaly_gdal( - anomaly_raster_profile=anomaly_raster_profile, - anomaly_raster_data=anomaly_raster_data, + distance_raster_data = anomaly_raster_data.copy() + distance_raster_data.astype(np.float64) + out_profile = anomaly_raster_profile.copy() + out_profile["dtype"] = np.float32 + out_profile["count"] = 1 + + # We need to convert nodata to a negative number in case it is non-negative in the input raster + if out_profile["nodata"] >= 0.0: + old_nodata = out_profile["nodata"] + new_nodata = -9999 + distance_raster_data = np.where(np.isin(distance_raster_data, old_nodata), new_nodata, distance_raster_data) + out_profile["nodata"] = new_nodata + + out_array, out_profile = func( + anomaly_raster_profile=out_profile, + anomaly_raster_data=distance_raster_data, threshold_criteria=threshold_criteria, threshold_criteria_value=threshold_criteria_value, max_distance=max_distance, ) + return out_array, out_profile - return out_array, out_meta + +def _check_threshold_criteria_and_value(threshold_criteria, threshold_criteria_value): + if threshold_criteria in {"lower", "higher"} and not isinstance(threshold_criteria_value, Number): + raise InvalidParameterValueException( + f"Expected threshold_criteria_value {threshold_criteria_value} " + "to be a single number rather than a tuple." + ) + if threshold_criteria in {"in_between", "outside"}: + if not isinstance(threshold_criteria_value, tuple): + raise InvalidParameterValueException( + f"Expected threshold_criteria_value ({threshold_criteria_value}) to be a tuple rather than a number." + ) + if threshold_criteria_value[0] >= threshold_criteria_value[1]: + raise InvalidParameterValueException( + f"Expected first value in threshold_criteria_value ({threshold_criteria_value})" + "tuple to be lower than the second." + ) def _fits_criteria( @@ -187,13 +159,30 @@ def _validate_threshold_criteria( return data_fits_criteria -def _distance_to_anomaly( +def _write_binary_anomaly_raster(tmp_dir: Path, anomaly_raster_profile, data_fits_criteria: np.ndarray): + anomaly_raster_binary_path = tmp_dir / "anomaly_raster_binary.tif" + + anomaly_raster_binary_profile = {**anomaly_raster_profile, **dict(dtype=rasterio.uint8, count=1, nodata=None)} + with rasterio.open(anomaly_raster_binary_path, mode="w", **anomaly_raster_binary_profile) as anomaly_raster_binary: + anomaly_raster_binary.write(data_fits_criteria.astype(rasterio.uint8), 1) + + return anomaly_raster_binary_path + + +def _distance_to_anomaly_gdal( anomaly_raster_profile: Union[profiles.Profile, dict], anomaly_raster_data: np.ndarray, threshold_criteria_value: Union[Tuple[Number, Number], Number], threshold_criteria: Literal["lower", "higher", "in_between", "outside"], max_distance: Optional[Number], -) -> np.ndarray: + # output_path: Path, + verbose: bool = False, +) -> Tuple[np.ndarray, Union[profiles.Profile, dict]]: + + from osgeo_utils import gdal_proximity + + from eis_toolkit.utilities.gdal import toggle_gdal_exceptions + data_fits_criteria = _validate_threshold_criteria( anomaly_raster_profile, anomaly_raster_data, @@ -201,168 +190,118 @@ def _distance_to_anomaly( threshold_criteria, ) - cols = np.arange(anomaly_raster_data.shape[1]) - rows = np.arange(anomaly_raster_data.shape[0]) - - all_points_by_rows = [ - row_points(row=row, cols=cols[data_fits_criteria[row]], raster_transform=anomaly_raster_profile["transform"]) - for row in rows - ] - all_points = list(chain(*all_points_by_rows)) - all_points_gdf = gpd.GeoDataFrame(geometry=all_points, crs=anomaly_raster_profile["crs"]) + with TemporaryDirectory() as tmp_dir_str: + tmp_dir = Path(tmp_dir_str) + anomaly_raster_binary_path = _write_binary_anomaly_raster( + tmp_dir=tmp_dir, anomaly_raster_profile=anomaly_raster_profile, data_fits_criteria=data_fits_criteria + ) + tmp_out_raster = path.join(tmp_dir, "temp_output.tif") + with toggle_gdal_exceptions(): + gdal_proximity.gdal_proximity( + src_filename=str(anomaly_raster_binary_path), + dst_filename=tmp_out_raster, + alg_options=("VALUES=1", "DISTUNITS=GEO"), + quiet=not verbose, + ) + with rasterio.open(tmp_out_raster) as out_raster: + out_array = out_raster.read(1) - distance_array = distance_computation( - raster_profile=anomaly_raster_profile, geodataframe=all_points_gdf, max_distance=max_distance - ) + if max_distance is not None: + out_array[out_array > max_distance] = max_distance - return distance_array + return out_array, anomaly_raster_profile -def _distance_to_anomaly_gdal( +def _distance_to_anomaly_numba( anomaly_raster_profile: Union[profiles.Profile, dict], anomaly_raster_data: np.ndarray, threshold_criteria_value: Union[Tuple[Number, Number], Number], threshold_criteria: Literal["lower", "higher", "in_between", "outside"], max_distance: Optional[Number], ) -> Tuple[np.ndarray, profiles.Profile]: - data_fits_criteria = _validate_threshold_criteria( anomaly_raster_profile, anomaly_raster_data, threshold_criteria_value, threshold_criteria, ) - # converting True False values to binary formant. - converted_values = np.where(data_fits_criteria, 1, 0) - driver = gdal.GetDriverByName("MEM") - - width = anomaly_raster_profile["width"] - height = anomaly_raster_profile["height"] - temp_raster = driver.Create("", width, height, 1, gdal.GDT_Float32) - transformation = anomaly_raster_profile["transform"] - x_geo = (transformation.c, transformation.a, transformation.b) - y_geo = (transformation.f, transformation.d, transformation.e) - temp_raster.SetGeoTransform(x_geo + y_geo) - crs = anomaly_raster_profile["crs"].to_wkt() - band = temp_raster.GetRasterBand(1) - band.WriteArray(converted_values) - nodatavalue = anomaly_raster_profile["nodata"] - band.SetNoDataValue(nodatavalue) - - # Create empty proximity raster - out_raster = driver.Create("", width, height, 1, gdal.GDT_Float32) - out_raster.SetGeoTransform(x_geo + y_geo) - out_raster.SetProjection(crs) - out_band = out_raster.GetRasterBand(1) - out_band.SetNoDataValue(nodatavalue) - options = ["values=1", "distunits=GEO"] - - # Compute proximity - gdal.ComputeProximity(band, out_band, options) - - # Create outputs - out_array = out_band.ReadAsArray() - if max_distance is not None: - out_array[out_array > max_distance] = max_distance + + cols = np.arange(anomaly_raster_data.shape[1]) + rows = np.arange(anomaly_raster_data.shape[0]) + + all_points_by_rows = [ + row_points(row=row, cols=cols[data_fits_criteria[row]], raster_transform=anomaly_raster_profile["transform"]) + for row in rows + ] + all_points = list(chain(*all_points_by_rows)) + all_points_gdf = gpd.GeoDataFrame(geometry=all_points, crs=anomaly_raster_profile["crs"]) + + distance_array = distance_computation_numba( + raster_profile=anomaly_raster_profile, geodataframe=all_points_gdf, max_distance=max_distance + ) out_meta = anomaly_raster_profile.copy() # Update metadata - out_meta["dtype"] = out_array.dtype.name + out_meta["dtype"] = distance_array.dtype.name out_meta["count"] = 1 - return out_array, out_meta + return distance_array, out_meta -# @beartype -# def distance_to_anomaly_gdal( -# anomaly_raster_profile: Union[profiles.Profile, dict], -# anomaly_raster_data: np.ndarray, -# threshold_criteria_value: Union[Tuple[Number, Number], Number], -# threshold_criteria: Literal["lower", "higher", "in_between", "outside"], -# output_path: Path, -# verbose: bool = False, -# ) -> Path: -# """Calculate distance from raster cell to nearest anomaly. - -# Distance is calculated for each cell in the anomaly raster and saved to a -# new raster at output_path. The criteria for what is anomalous can be -# defined as a single number and criteria text of "higher" or "lower". -# Alternatively, the definition can be a range where values inside -# (criteria text of "within") or outside are marked as anomalous -# (criteria text of "outside"). If anomaly_raster_profile does -# contain "nodata" key, np.nan is assumed to correspond to nodata values. - -# Does not work on Windows. - -# Args: -# anomaly_raster_profile: The raster profile in which the distances -# to the nearest anomalous value are determined. -# anomaly_raster_data: The raster data in which the distances -# to the nearest anomalous value are determined. -# threshold_criteria_value: Value(s) used to define anomalous. -# threshold_criteria: Method to define anomalous. -# output_path: The path to the raster with the distances to anomalies -# calculated. -# verbose: Whether to print gdal_proximity output. - -# Returns: -# The path to the raster with the distances to anomalies calculated. -# """ -# check_raster_profile(raster_profile=anomaly_raster_profile) -# _check_threshold_criteria_and_value( -# threshold_criteria=threshold_criteria, threshold_criteria_value=threshold_criteria_value -# ) - -# return _distance_to_anomaly_gdal( -# output_path=output_path, -# anomaly_raster_profile=anomaly_raster_profile, -# anomaly_raster_data=anomaly_raster_data, -# threshold_criteria=threshold_criteria, -# threshold_criteria_value=threshold_criteria_value, -# verbose=verbose, -# ) - - -# def _write_binary_anomaly_raster(tmp_dir: Path, anomaly_raster_profile, data_fits_criteria: np.ndarray): -# anomaly_raster_binary_path = tmp_dir / "anomaly_raster_binary.tif" - -# anomaly_raster_binary_profile = {**anomaly_raster_profile, **dict(dtype=rasterio.uint8, count=1, nodata=None)} -# with rasterio.open( -# anomaly_raster_binary_path, mode="w", **anomaly_raster_binary_profile -# ) as anomaly_raster_binary: -# anomaly_raster_binary.write(data_fits_criteria.astype(rasterio.uint8), 1) - -# return anomaly_raster_binary_path - - -# def _distance_to_anomaly_gdal( +# def _distance_to_anomaly_gdal_alt( # anomaly_raster_profile: Union[profiles.Profile, dict], # anomaly_raster_data: np.ndarray, # threshold_criteria_value: Union[Tuple[Number, Number], Number], # threshold_criteria: Literal["lower", "higher", "in_between", "outside"], -# output_path: Path, -# verbose: bool, -# ): -# from osgeo_utils import gdal_proximity - -# data_fits_criteria = _fits_criteria( -# threshold_criteria=threshold_criteria, -# threshold_criteria_value=threshold_criteria_value, -# anomaly_raster_data=anomaly_raster_data, -# nodata_value=anomaly_raster_profile.get("nodata"), -# ) - -# with TemporaryDirectory() as tmp_dir_str: -# tmp_dir = Path(tmp_dir_str) -# anomaly_raster_binary_path = _write_binary_anomaly_raster( -# tmp_dir=tmp_dir, anomaly_raster_profile=anomaly_raster_profile, data_fits_criteria=data_fits_criteria +# max_distance: Optional[Number], +# ) -> Tuple[np.ndarray, profiles.Profile]: + +# from osgeo import gdal +# from eis_toolkit.utilities.gdal import toggle_gdal_exceptions + +# with toggle_gdal_exceptions(): +# data_fits_criteria = _validate_threshold_criteria( +# anomaly_raster_profile, +# anomaly_raster_data, +# threshold_criteria_value, +# threshold_criteria, # ) -# with toggle_gdal_exceptions(): -# gdal_proximity.gdal_proximity( -# src_filename=str(anomaly_raster_binary_path), -# dst_filename=str(output_path), -# alg_options=("VALUES=1", "DISTUNITS=GEO"), -# quiet=not verbose, -# ) - -# return output_path +# # converting True False values to binary formant. +# converted_values = np.where(data_fits_criteria, 1, 0) +# driver = gdal.GetDriverByName("MEM") + +# width = anomaly_raster_profile["width"] +# height = anomaly_raster_profile["height"] +# temp_raster = driver.Create("", width, height, 1, gdal.GDT_Float32) +# transformation = anomaly_raster_profile["transform"] +# x_geo = (transformation.c, transformation.a, transformation.b) +# y_geo = (transformation.f, transformation.d, transformation.e) +# temp_raster.SetGeoTransform(x_geo + y_geo) +# crs = anomaly_raster_profile["crs"].to_wkt() +# band = temp_raster.GetRasterBand(1) +# band.WriteArray(converted_values) +# nodatavalue = anomaly_raster_profile["nodata"] +# band.SetNoDataValue(nodatavalue) + +# # Create empty proximity raster +# out_raster = driver.Create("", width, height, 1, gdal.GDT_Float32) +# out_raster.SetGeoTransform(x_geo + y_geo) +# out_raster.SetProjection(crs) +# out_band = out_raster.GetRasterBand(1) +# out_band.SetNoDataValue(nodatavalue) +# options = ["values=1", "distunits=GEO"] + +# # Compute proximity +# gdal.ComputeProximity(band, out_band, options) + +# # Create outputs +# out_array = out_band.ReadAsArray() +# if max_distance is not None: +# out_array[out_array > max_distance] = max_distance +# out_meta = anomaly_raster_profile.copy() + +# # Update metadata +# out_meta["dtype"] = out_array.dtype.name +# out_meta["count"] = 1 + +# return out_array, out_meta diff --git a/eis_toolkit/raster_processing/proximity_to_anomaly.py b/eis_toolkit/raster_processing/proximity_to_anomaly.py index 50a1a0a4..be8529b7 100644 --- a/eis_toolkit/raster_processing/proximity_to_anomaly.py +++ b/eis_toolkit/raster_processing/proximity_to_anomaly.py @@ -5,7 +5,7 @@ from beartype.typing import Literal, Tuple, Union from rasterio import profiles -from eis_toolkit.raster_processing.distance_to_anomaly import distance_to_anomaly, distance_to_anomaly_gdal +from eis_toolkit.raster_processing.distance_to_anomaly import distance_to_anomaly from eis_toolkit.transformations.linear import _min_max_scaling @@ -54,53 +54,3 @@ def proximity_to_anomaly( out_image = _min_max_scaling(out_image, scaling_range) return out_image, anomaly_raster_profile - - -@beartype -def proximity_to_anomaly_gdal( - anomaly_raster_profile: Union[profiles.Profile, dict], - anomaly_raster_data: np.ndarray, - threshold_criteria_value: Union[Tuple[Number, Number], Number], - threshold_criteria: Literal["lower", "higher", "in_between", "outside"], - max_distance: Number, - scaling_range: Tuple[Number, Number] = (1, 0), -) -> Tuple[np.ndarray, Union[profiles.Profile, dict]]: - """Calculate proximity from raster cell to nearest anomaly. - - Uses an optimized, faster version of `distance_to_anomaly` in the background. - Available only on Windows. - - The criteria for what is anomalous can be defined as a single number and - criteria text of "higher" or "lower". Alternatively, the definition can be - a range where values inside (criteria text of "within") or outside are - marked as anomalous (criteria text of "outside"). If anomaly_raster_profile does - contain "nodata" key, np.nan is assumed to correspond to nodata values. - - Scales proximity values linearly in the given range. The first number in scale_range - denotes the value at the anomaly cells, the second at given maximum_distance. - - Args: - anomaly_raster_profile: The raster profile in which the distances - to the nearest anomalous value are determined. - anomaly_raster_data: The raster data in which the distances - to the nearest anomalous value are determined. - threshold_criteria_value: Value(s) used to define anomalous. - If the threshold criteria requires a tuple of values, - the first value should be the minimum and the second - the maximum value. - threshold_criteria: Method to define anomalous. - max_distance: The maximum distance in the output array beyond which - proximity is considered 0. - scaling_range: Min and max values used for scaling the proximity values. - Defaults to (1, 0). - - Returns: - A 2D numpy array with the distances to anomalies computed - and the original anomaly raster profile. - """ - out_image, anomaly_raster_profile = distance_to_anomaly_gdal( - anomaly_raster_profile, anomaly_raster_data, threshold_criteria_value, threshold_criteria, max_distance - ) - out_image = _min_max_scaling(out_image, scaling_range) - - return out_image, anomaly_raster_profile diff --git a/eis_toolkit/utilities/gdal.py b/eis_toolkit/utilities/gdal.py new file mode 100644 index 00000000..7ce98be3 --- /dev/null +++ b/eis_toolkit/utilities/gdal.py @@ -0,0 +1,20 @@ +from contextlib import contextmanager + +from osgeo import gdal + + +@contextmanager +def toggle_gdal_exceptions(): + """Toggle GDAL exceptions using a context manager. + + If the exceptions are already enabled, this function will do nothing. + """ + already_has_exceptions_enabled = False + try: + if gdal.GetUseExceptions() != 0: + already_has_exceptions_enabled = True + gdal.UseExceptions() + yield + finally: + if not already_has_exceptions_enabled: + gdal.DontUseExceptions() diff --git a/eis_toolkit/utilities/miscellaneous.py b/eis_toolkit/utilities/miscellaneous.py index 7e0fcb02..145bc8fd 100644 --- a/eis_toolkit/utilities/miscellaneous.py +++ b/eis_toolkit/utilities/miscellaneous.py @@ -1,11 +1,9 @@ -from contextlib import contextmanager from numbers import Number import numpy as np import pandas as pd from beartype import beartype from beartype.typing import Any, List, Optional, Sequence, Tuple, Union -from osgeo import gdal from rasterio import transform from shapely.geometry import Point @@ -358,20 +356,3 @@ def row_points( point_xs, point_ys = zip(*[raster_transform * (col + 0.5, row + 0.5) for col in cols]) # point_xs, point_ys = transform.xy(transform=raster_transform, cols=cols, rows=row) return [Point(x, y) for x, y in zip(point_xs, point_ys)] - - -@contextmanager -def toggle_gdal_exceptions(): - """Toggle GDAL exceptions using a context manager. - - If the exceptions are already enabled, this function will do nothing. - """ - already_has_exceptions_enabled = False - try: - if gdal.GetUseExceptions() != 0: - already_has_exceptions_enabled = True - gdal.UseExceptions() - yield - finally: - if not already_has_exceptions_enabled: - gdal.DontUseExceptions() diff --git a/eis_toolkit/vector_processing/_distance_computation_numba.py b/eis_toolkit/vector_processing/_distance_computation_numba.py new file mode 100644 index 00000000..9a109ba5 --- /dev/null +++ b/eis_toolkit/vector_processing/_distance_computation_numba.py @@ -0,0 +1,218 @@ +from numbers import Number + +import geopandas as gpd +import numpy as np +from beartype import beartype +from beartype.typing import Optional, Union +from numba import njit, prange +from rasterio import profiles, transform + +from eis_toolkit import exceptions +from eis_toolkit.utilities.checks.raster import check_raster_profile + + +@beartype +def distance_computation_numba( + geodataframe: gpd.GeoDataFrame, raster_profile: Union[profiles.Profile, dict], max_distance: Optional[Number] = None +) -> np.ndarray: + """ + Calculate distance from each raster cell (centre) to the nearest input geometry. + + Pixels on top of input geometries are assigned distance of 0. + + Args: + geodataframe: The GeoDataFrame with geometries to determine distance to. + raster_profile: The raster profile of the raster in which the distances + to the nearest geometry are determined. + max_distance: The maximum distance in the output array. Pixels beyond this + distance will be assigned `max_distance` value. + + Returns: + A 2D numpy array with the distances computed. + + Raises: + NonMatchingCrsException: The input raster profile and geodataframe have mismatching CRS. + EmptyDataFrameException: The input geodataframe is empty. + NumericValueSignException: Max distance is defined and is not a positive number. + """ + if raster_profile.get("crs") != geodataframe.crs: + raise exceptions.NonMatchingCrsException( + "Expected coordinate systems to match between raster and GeoDataFrame." + ) + if geodataframe.empty: + raise exceptions.EmptyDataFrameException("Expected GeoDataFrame to not be empty.") + if max_distance is not None and max_distance <= 0: + raise exceptions.NumericValueSignException("Expected max distance to be a positive number.") + + check_raster_profile(raster_profile=raster_profile) + + raster_width = raster_profile.get("width") + raster_height = raster_profile.get("height") + raster_transform = raster_profile.get("transform") + + # Generate the grid of raster cell center points + raster_points = _generate_raster_points(raster_width, raster_height, raster_transform) + + # Initialize lists needed for Numba-compatible calculations + segment_coords = [] # These will also contain points coords, if present + segment_indices = [0] # Start index + polygon_coords = [] + polygon_indices = [0] # Start index + + for geometry in geodataframe.geometry: + if geometry.geom_type == "Polygon": + coords = list(geometry.exterior.coords) + for x, y in coords: + polygon_coords.extend([x, y]) + polygon_indices.append(len(polygon_coords) // 2) + segments = [ + (coords[i][0], coords[i][1], coords[i + 1][0], coords[i + 1][1]) for i in range(len(coords) - 1) + ] + + elif geometry.geom_type == "MultiPolygon": + # For MultiPolygon, iterate over each polygon + segments = [] + for poly in geometry.geoms: + coords = list(poly.exterior.coords) + for x, y in coords: + polygon_coords.extend([x, y]) + polygon_indices.append(len(polygon_coords) // 2) + + # Add polygon boundary as segments for distance calculations + segments.extend( + [(coords[i][0], coords[i][1], coords[i + 1][0], coords[i + 1][1]) for i in range(len(coords) - 1)] + ) + + elif geometry.geom_type == "LineString": + coords = list(geometry.coords) + segments = [ + (coords[i][0], coords[i][1], coords[i + 1][0], coords[i + 1][1]) for i in range(len(coords) - 1) + ] + + elif geometry.geom_type == "MultiLineString": + # For MultiLineString, iterate through each line string component + segments = [] + for line in geometry.geoms: + coords = list(line.coords) + segments.extend( + [(coords[i][0], coords[i][1], coords[i + 1][0], coords[i + 1][1]) for i in range(len(coords) - 1)] + ) + + elif geometry.geom_type == "Point": + segments = [(geometry.x, geometry.y)] + + elif geometry.geom_type == "MultiPoint": + # For MultiPoint, iterate over each point and add as individual (x, y) tuples + segments = [(point.x, point.y) for point in geometry.geoms] + + else: + raise exceptions.GeometryTypeException(f"Encountered unsupported geometry type: {geometry.geom_type}.") + + segment_coords.extend(segments) + segment_indices.append(len(segment_coords)) # End index for this geometry's segments + + # Convert all lists to numpy arrays + segment_coords = np.array(segment_coords, dtype=np.float64) + segment_indices = np.array(segment_indices, dtype=np.int64) + polygon_coords = np.array(polygon_coords, dtype=np.float64) + polygon_indices = np.array(polygon_indices, dtype=np.int64) + + distance_matrix = _compute_distances_core( + raster_points, + segment_coords, + segment_indices, + polygon_coords, + polygon_indices, + raster_width, + raster_height, + max_distance, + ) + + return distance_matrix + + +@njit(parallel=True) +def _compute_distances_core( + raster_points: np.ndarray, + segment_coords: np.ndarray, + segment_indices: np.ndarray, + polygon_coords: np.ndarray, + polygon_indices: np.ndarray, + width: int, + height: int, + max_distance: Optional[Number], +) -> np.ndarray: + distance_matrix = np.full((height, width), np.inf) + for i in prange(len(raster_points)): + px, py = raster_points[i] + min_dist = np.inf + + # Check if the point is inside any polygon, if polygons are present + if len(polygon_indices) > 1 and _point_in_polygon(px, py, polygon_coords, polygon_indices): + min_dist = 0 # Set distance to zero if point is inside a polygon + else: + # Only calculate distance to segments if point is outside all polygons + for j in range(len(segment_indices) - 1): + for k in range(segment_indices[j], segment_indices[j + 1]): + # Case 1: Point + if len(segment_coords[k]) == 2: + x1, y1 = segment_coords[k] + dist = np.sqrt((px - x1) ** 2 + (py - y1) ** 2) + # Case 2: Line segment + else: + x1, y1, x2, y2 = segment_coords[k] + dist = _point_to_segment_distance(px, py, x1, y1, x2, y2) + if dist < min_dist: + min_dist = dist + + # Apply max_distance threshold if specified + if max_distance is not None: + min_dist = min(min_dist, max_distance) + + # Update the distance matrix + distance_matrix[i // width, i % width] = min_dist + return distance_matrix + + +def _generate_raster_points(width: int, height: int, affine_transform: transform.Affine) -> np.ndarray: + """Generate a full grid of points from the raster dimensions and affine transform.""" + cols, rows = np.meshgrid(np.arange(width), np.arange(height)) + cols = cols.ravel() + rows = rows.ravel() + xs, ys = transform.xy(affine_transform, rows, cols, offset="center") + return np.column_stack([xs, ys]) + + +@njit +def _point_to_segment_distance(px: Number, py: Number, x1: Number, y1: Number, x2: Number, y2: Number) -> np.ndarray: + """Calculate the minimum distance from a point to a line segment.""" + dx, dy = x2 - x1, y2 - y1 + if dx == 0 and dy == 0: + # Segment is a point (Should not happen) + return np.sqrt((px - x1) ** 2 + (py - y1) ** 2) + t = max(0, min(1, ((px - x1) * dx + (py - y1) * dy) / (dx * dx + dy * dy))) + nearest_x, nearest_y = x1 + t * dx, y1 + t * dy + return np.sqrt((px - nearest_x) ** 2 + (py - nearest_y) ** 2) + + +@njit +def _point_in_polygon(px: Number, py: Number, polygon_coords: np.ndarray, polygon_indices: np.ndarray) -> bool: + """Determine if a point is inside any polygon using the ray-casting algorithm.""" + for p_start, p_end in zip(polygon_indices[:-1], polygon_indices[1:]): + inside = False + xints = 0.0 + n = p_end - p_start + p1x, p1y = polygon_coords[2 * p_start], polygon_coords[2 * p_start + 1] + for i in range(n + 1): + p2x, p2y = polygon_coords[2 * (p_start + i % n)], polygon_coords[2 * (p_start + i % n) + 1] + if py > min(p1y, p2y): + if py <= max(p1y, p2y): + if px <= max(p1x, p2x): + if p1y != p2y: + xints = (py - p1y) * (p2x - p1x) / (p2y - p1y) + p1x + if p1x == p2x or px <= xints: + inside = not inside + p1x, p1y = p2x, p2y + if inside: + return True + return False diff --git a/eis_toolkit/vector_processing/distance_computation.py b/eis_toolkit/vector_processing/distance_computation.py index 4b7476d5..e51b39e3 100644 --- a/eis_toolkit/vector_processing/distance_computation.py +++ b/eis_toolkit/vector_processing/distance_computation.py @@ -3,26 +3,27 @@ import geopandas as gpd import numpy as np from beartype import beartype -from beartype.typing import Optional, Union -from numba import njit, prange -from rasterio import profiles, transform +from beartype.typing import Optional, Tuple, Union +from rasterio import profiles from eis_toolkit import exceptions +from eis_toolkit.raster_processing.distance_to_anomaly import distance_to_anomaly from eis_toolkit.utilities.checks.raster import check_raster_profile +from eis_toolkit.vector_processing.rasterize_vector import rasterize_vector @beartype def distance_computation( - geodataframe: gpd.GeoDataFrame, raster_profile: Union[profiles.Profile, dict], max_distance: Optional[Number] = None -) -> np.ndarray: + geodataframe: gpd.GeoDataFrame, + raster_profile: Union[profiles.Profile, dict], + max_distance: Optional[Number] = None, +) -> Tuple[np.ndarray, Union[profiles.Profile, dict]]: """ Calculate distance from each raster cell (centre) to the nearest input geometry. Pixels on top of input geometries are assigned distance of 0. - Uses Numba to perform calculations quickly. The computation time increases (roughly) - linearly with the amount of raster pixels defined by given `raster_profile`. Supports - Polygon, MultiPolygon, LineString, MultiLineString, Point and MultiPoint geometries. + Rasterizes geometries and uses `Distance to anomaly` tool for computation. Args: geodataframe: The GeoDataFrame with geometries to determine distance to. @@ -32,7 +33,7 @@ def distance_computation( distance will be assigned `max_distance` value. Returns: - A 2D numpy array with the distances computed. + A 2D numpy array with the distances computed and raster profile. Raises: NonMatchingCrsException: The input raster profile and geodataframe have mismatching CRS. @@ -50,176 +51,12 @@ def distance_computation( check_raster_profile(raster_profile=raster_profile) - raster_width = raster_profile.get("width") - raster_height = raster_profile.get("height") - raster_transform = raster_profile.get("transform") - - # Generate the grid of raster cell center points - raster_points = _generate_raster_points(raster_width, raster_height, raster_transform) - - # Initialize lists needed for Numba-compatible calculations - segment_coords = [] # These will also contain points coords, if present - segment_indices = [0] # Start index - polygon_coords = [] - polygon_indices = [0] # Start index - - for geometry in geodataframe.geometry: - if geometry.geom_type == "Polygon": - coords = list(geometry.exterior.coords) - for x, y in coords: - polygon_coords.extend([x, y]) - polygon_indices.append(len(polygon_coords) // 2) - segments = [ - (coords[i][0], coords[i][1], coords[i + 1][0], coords[i + 1][1]) for i in range(len(coords) - 1) - ] - - elif geometry.geom_type == "MultiPolygon": - # For MultiPolygon, iterate over each polygon - segments = [] - for poly in geometry.geoms: - coords = list(poly.exterior.coords) - for x, y in coords: - polygon_coords.extend([x, y]) - polygon_indices.append(len(polygon_coords) // 2) - - # Add polygon boundary as segments for distance calculations - segments.extend( - [(coords[i][0], coords[i][1], coords[i + 1][0], coords[i + 1][1]) for i in range(len(coords) - 1)] - ) - - elif geometry.geom_type == "LineString": - coords = list(geometry.coords) - segments = [ - (coords[i][0], coords[i][1], coords[i + 1][0], coords[i + 1][1]) for i in range(len(coords) - 1) - ] - - elif geometry.geom_type == "MultiLineString": - # For MultiLineString, iterate through each line string component - segments = [] - for line in geometry.geoms: - coords = list(line.coords) - segments.extend( - [(coords[i][0], coords[i][1], coords[i + 1][0], coords[i + 1][1]) for i in range(len(coords) - 1)] - ) - - elif geometry.geom_type == "Point": - segments = [(geometry.x, geometry.y)] - - elif geometry.geom_type == "MultiPoint": - # For MultiPoint, iterate over each point and add as individual (x, y) tuples - segments = [(point.x, point.y) for point in geometry.geoms] - - else: - raise exceptions.GeometryTypeException(f"Encountered unsupported geometry type: {geometry.geom_type}.") - - segment_coords.extend(segments) - segment_indices.append(len(segment_coords)) # End index for this geometry's segments - - # Convert all lists to numpy arrays - segment_coords = np.array(segment_coords, dtype=np.float64) - segment_indices = np.array(segment_indices, dtype=np.int64) - polygon_coords = np.array(polygon_coords, dtype=np.float64) - polygon_indices = np.array(polygon_indices, dtype=np.int64) - - distance_matrix = _compute_distances_core( - raster_points, - segment_coords, - segment_indices, - polygon_coords, - polygon_indices, - raster_width, - raster_height, - max_distance, + # NOTE: Default value is set to 2 and fill value to 1 for cases where given raster_profile has 0 as nodata + rasterized_data = rasterize_vector( + geodataframe, raster_profile, value_column=None, default_value=2.0, fill_value=1.0 ) - - return distance_matrix - - -@njit(parallel=True) -def _compute_distances_core( - raster_points: np.ndarray, - segment_coords: np.ndarray, - segment_indices: np.ndarray, - polygon_coords: np.ndarray, - polygon_indices: np.ndarray, - width: int, - height: int, - max_distance: Optional[Number], -) -> np.ndarray: - distance_matrix = np.full((height, width), np.inf) - for i in prange(len(raster_points)): - px, py = raster_points[i] - min_dist = np.inf - - # Check if the point is inside any polygon, if polygons are present - if len(polygon_indices) > 1 and _point_in_polygon(px, py, polygon_coords, polygon_indices): - min_dist = 0 # Set distance to zero if point is inside a polygon - else: - # Only calculate distance to segments if point is outside all polygons - for j in range(len(segment_indices) - 1): - for k in range(segment_indices[j], segment_indices[j + 1]): - # Case 1: Point - if len(segment_coords[k]) == 2: - x1, y1 = segment_coords[k] - dist = np.sqrt((px - x1) ** 2 + (py - y1) ** 2) - # Case 2: Line segment - else: - x1, y1, x2, y2 = segment_coords[k] - dist = _point_to_segment_distance(px, py, x1, y1, x2, y2) - if dist < min_dist: - min_dist = dist - - # Apply max_distance threshold if specified - if max_distance is not None: - min_dist = min(min_dist, max_distance) - - # Update the distance matrix - distance_matrix[i // width, i % width] = min_dist - return distance_matrix - - -def _generate_raster_points(width: int, height: int, affine_transform: transform.Affine) -> np.ndarray: - """Generate a full grid of points from the raster dimensions and affine transform.""" - cols, rows = np.meshgrid(np.arange(width), np.arange(height)) - cols = cols.ravel() - rows = rows.ravel() - xs, ys = transform.xy(affine_transform, rows, cols, offset="center") - return np.column_stack([xs, ys]) - - -@njit -def _point_to_segment_distance(px: Number, py: Number, x1: Number, y1: Number, x2: Number, y2: Number) -> np.ndarray: - """Calculate the minimum distance from a point to a line segment.""" - dx, dy = x2 - x1, y2 - y1 - if dx == 0 and dy == 0: - # Segment is a point (Should not happen) - return np.sqrt((px - x1) ** 2 + (py - y1) ** 2) - t = max(0, min(1, ((px - x1) * dx + (py - y1) * dy) / (dx * dx + dy * dy))) - nearest_x, nearest_y = x1 + t * dx, y1 + t * dy - return np.sqrt((px - nearest_x) ** 2 + (py - nearest_y) ** 2) - - -@njit -def _point_in_polygon(px: Number, py: Number, polygon_coords: np.ndarray, polygon_indices: np.ndarray) -> bool: - """Determine if a point is inside any polygon using the ray-casting algorithm.""" - for p_start, p_end in zip(polygon_indices[:-1], polygon_indices[1:]): - inside = False - xints = 0.0 - n = p_end - p_start - p1x, p1y = polygon_coords[2 * p_start], polygon_coords[2 * p_start + 1] - for i in range(n + 1): - p2x, p2y = polygon_coords[2 * (p_start + i % n)], polygon_coords[2 * (p_start + i % n) + 1] - if py > min(p1y, p2y): - if py <= max(p1y, p2y): - if px <= max(p1x, p2x): - if p1y != p2y: - xints = (py - p1y) * (p2x - p1x) / (p2y - p1y) + p1x - if p1x == p2x or px <= xints: - inside = not inside - p1x, p1y = p2x, p2y - if inside: - return True - return False + out_array, out_profile = distance_to_anomaly(raster_profile, rasterized_data, 1.5, "higher", max_distance) + return out_array, out_profile # @beartype diff --git a/eis_toolkit/vector_processing/proximity_computation.py b/eis_toolkit/vector_processing/proximity_computation.py index 472faf6a..8bd6400c 100644 --- a/eis_toolkit/vector_processing/proximity_computation.py +++ b/eis_toolkit/vector_processing/proximity_computation.py @@ -16,7 +16,7 @@ def proximity_computation( raster_profile: Union[profiles.Profile, dict], maximum_distance: Number, scale_range: Tuple[Number, Number] = (1, 0), -) -> np.ndarray: +) -> Tuple[np.ndarray, Union[profiles.Profile, dict]]: """Compute proximity to the nearest geometries. Scales proximity values linearly in the given range. The first number in scale_range @@ -30,15 +30,15 @@ def proximity_computation( scaling_range: Min and max values used for scaling the proximity values. Defaults to (1,0). Returns: - A 2D numpy array with the scaled values. + A 2D numpy array with the linearly scaled proximity values and raster profile.. Raises: NonMatchingCrsException: The input raster profile and geodataframe have mismatching CRS. EmptyDataFrameException: The input geodataframe is empty. """ - out_matrix = _linear_proximity_computation(geodataframe, raster_profile, maximum_distance, scale_range) + out_image, out_profile = _linear_proximity_computation(geodataframe, raster_profile, maximum_distance, scale_range) - return out_matrix + return out_image, out_profile @beartype @@ -47,25 +47,9 @@ def _linear_proximity_computation( raster_profile: Union[profiles.Profile, dict], maximum_distance: Number, scaling_range: Tuple[Number, Number], -) -> np.ndarray: - """Compute proximity to the nearest geometries. - - Args: - geodataframe: The GeoDataFrame with geometries to determine proximity to. - raster_profile: The raster profile of the raster in which the distances - to the nearest geometry are determined. - max_distance: The maximum distance in the output array. - scaling_range: a tuple of maximum value in the scaling and minimum value. - - Returns: - A 2D numpy array with the linearly scaled values. - - Raises: - NonMatchingCrsException: The input raster profile and geodataframe have mismatching CRS. - EmptyDataFrameException: The input geodataframe is empty. - """ - out_matrix = distance_computation(geodataframe, raster_profile, maximum_distance) +) -> Tuple[np.ndarray, Union[profiles.Profile, dict]]: + out_image, out_profile = distance_computation(geodataframe, raster_profile, maximum_distance) - out_matrix = _min_max_scaling(out_matrix, scaling_range) + out_image = _min_max_scaling(out_image, scaling_range) - return out_matrix + return out_image, out_profile diff --git a/tests/raster_processing/test_distance_to_anomaly.py b/tests/raster_processing/test_distance_to_anomaly.py index 19585988..e93bbf3a 100644 --- a/tests/raster_processing/test_distance_to_anomaly.py +++ b/tests/raster_processing/test_distance_to_anomaly.py @@ -1,4 +1,3 @@ -import sys from contextlib import nullcontext from functools import partial @@ -26,7 +25,7 @@ 5.0, "lower", EXPECTED_SMALL_RASTER_SHAPE, - 5.6949, + 5.6953, id="small_raster_lower", ), pytest.param( @@ -35,7 +34,7 @@ 5.0, "higher", EXPECTED_SMALL_RASTER_SHAPE, - 6.451948, + 6.4521, id="small_raster_higher", ), pytest.param( @@ -44,7 +43,7 @@ (2.5, 7.5), "in_between", EXPECTED_SMALL_RASTER_SHAPE, - 2.114331, + 2.1145, id="small_raster_in_between", ), pytest.param( @@ -53,7 +52,7 @@ (2.5, 7.5), "outside", EXPECTED_SMALL_RASTER_SHAPE, - 32.490106, + 32.4904, id="small_raster_outside", ), ] @@ -101,7 +100,7 @@ def test_distance_to_anomaly_expected( assert out_image.shape == expected_shape if expected_mean is not None: - assert np.isclose(np.mean(out_image), expected_mean) + assert np.isclose(np.mean(out_image), expected_mean, atol=0.0001) @pytest.mark.parametrize( @@ -262,203 +261,209 @@ def test_distance_to_anomaly_nodata_handling( assert not np.isclose(np.mean(out_image), expected_mean_without_nodata) -@pytest.mark.parametrize( - ",".join( - [ - "anomaly_raster_profile", - "anomaly_raster_data", - "threshold_criteria_value", - "threshold_criteria", - "expected_shape", - "expected_mean", - ] - ), - EXPECTED_PYTESTPARAMS, -) -@pytest.mark.xfail(sys.platform != "win32", reason="gdal_array available only on Windows.", raises=ModuleNotFoundError) -def test_distance_to_anomaly_gdal_expected( - anomaly_raster_profile, - anomaly_raster_data, - threshold_criteria_value, - threshold_criteria, - expected_shape, - expected_mean, -): - """Test distance_to_anomaly_gdal with expected result.""" - - assert not np.any(np.isnan(anomaly_raster_data)) - out_image, out_profile = distance_to_anomaly.distance_to_anomaly_gdal( - anomaly_raster_profile=anomaly_raster_profile, - anomaly_raster_data=anomaly_raster_data, - threshold_criteria_value=threshold_criteria_value, - threshold_criteria=threshold_criteria, - ) - - _check_result(out_image=out_image, out_profile=out_profile) - - assert out_image.shape == expected_shape - if expected_mean is not None: - # adding a relative tolerance and absolute tolerance to accomodate the very small error - # that arises due to the floating point errors - assert np.isclose(np.mean(out_image), expected_mean, rtol=1e-2, atol=1e-2) - - -@pytest.mark.parametrize( - ",".join( - [ - "anomaly_raster_profile", - "anomaly_raster_data", - "threshold_criteria_value", - "threshold_criteria", - "expected_shape", - "expected_mean_without_nodata", - "nodata_mask_value", - ] - ), - [ - pytest.param( - SMALL_RASTER_PROFILE, - SMALL_RASTER_DATA, - 5.0, - "higher", - EXPECTED_SMALL_RASTER_SHAPE, - 6.451948, - # Part of values over 5 will now be masked as nodata - 9.0, - id="small_raster_with_inserted_nodata", - ), - ], -) -@pytest.mark.xfail(sys.platform != "win32", reason="gdal_array available only on Windows.", raises=ModuleNotFoundError) -def test_distance_to_anomaly_gdal_nodata_handling( - anomaly_raster_profile, - anomaly_raster_data, - threshold_criteria_value, - threshold_criteria, - expected_shape, - expected_mean_without_nodata, - nodata_mask_value, -): - """Test distance_to_anomaly_gdal with expected result.""" - - anomaly_raster_data_with_nodata = np.where(anomaly_raster_data > nodata_mask_value, np.nan, anomaly_raster_data) - assert np.any(np.isnan(anomaly_raster_data_with_nodata)) - - out_image, out_profile = distance_to_anomaly.distance_to_anomaly_gdal( - anomaly_raster_profile=anomaly_raster_profile, - anomaly_raster_data=anomaly_raster_data_with_nodata, - threshold_criteria_value=threshold_criteria_value, - threshold_criteria=threshold_criteria, - ) - - _check_result(out_image=out_image, out_profile=out_profile) - - assert out_image.shape == expected_shape - - # Result should not be same as without nodata addition - assert not np.isclose(np.mean(out_image), expected_mean_without_nodata) - - -@pytest.mark.parametrize( - ",".join( - [ - "anomaly_raster_profile", - "anomaly_raster_data", - "threshold_criteria_value", - "threshold_criteria", - "profile_additions", - "raises", - ] - ), - [ - pytest.param( - SMALL_RASTER_PROFILE, - SMALL_RASTER_DATA, - (100.5, 122.5), - "in_between", - dict, - partial(pytest.raises, EmptyDataException), - id="expected_empty_data_due_to_threshold_range_outside_values2", - ), - pytest.param( - SMALL_RASTER_PROFILE, - SMALL_RASTER_DATA, - 5.0, - "lower", - dict, - nullcontext, - id="no_expected_exception", - ), - pytest.param( - SMALL_RASTER_PROFILE, - SMALL_RASTER_DATA, - 5.0, - "higher", - partial(dict, height=2.2), - partial(pytest.raises, InvalidParameterValueException), - id="expected_invalid_param_due_to_float_value_in_profile", - ), - pytest.param( - SMALL_RASTER_PROFILE, - SMALL_RASTER_DATA, - 5.0, - "higher", - partial(dict, transform=None), - partial(pytest.raises, InvalidParameterValueException), - id="expected_invalid_param_due_to_none_transform_value", - ), - pytest.param( - SMALL_RASTER_PROFILE, - SMALL_RASTER_DATA, - 5.0, - "in_between", - dict, - partial(pytest.raises, InvalidParameterValueException), - id="expected_invalid_param_due_to_number_rather_than_range", - ), - pytest.param( - SMALL_RASTER_PROFILE, - SMALL_RASTER_DATA, - (7.5, 2.5), - "in_between", - dict, - partial(pytest.raises, InvalidParameterValueException), - id="expected_invalid_param_due_to_invalid_order_in_tuple", - ), - pytest.param( - SMALL_RASTER_PROFILE, - SMALL_RASTER_DATA, - (1.5, 2.5, 7.5), - "in_between", - dict, - partial(pytest.raises, BeartypeCallHintParamViolation), - id="expected_invalid_param_due_to_tuple_of_length_three", - ), - ], -) -@pytest.mark.xfail(sys.platform != "win32", reason="gdal_array available only on Windows.", raises=ModuleNotFoundError) -def test_distance_to_anomaly_gdal_expected_check( - anomaly_raster_profile, - anomaly_raster_data, - threshold_criteria_value, - threshold_criteria, - profile_additions, - raises, -): - """Test distance_to_anomaly_gdal checks.""" - - anomaly_raster_profile.update(profile_additions()) - anomaly_raster_profile_with_additions = anomaly_raster_profile - with raises() as exc_info: - out_image, out_profile = distance_to_anomaly.distance_to_anomaly_gdal( - anomaly_raster_profile=anomaly_raster_profile_with_additions, - anomaly_raster_data=anomaly_raster_data, - threshold_criteria_value=threshold_criteria_value, - threshold_criteria=threshold_criteria, - ) - - if exc_info is not None: - # Expected error - return - - _check_result(out_image=out_image, out_profile=out_profile) +# @pytest.mark.parametrize( +# ",".join( +# [ +# "anomaly_raster_profile", +# "anomaly_raster_data", +# "threshold_criteria_value", +# "threshold_criteria", +# "expected_shape", +# "expected_mean", +# ] +# ), +# EXPECTED_PYTESTPARAMS, +# ) +# @pytest.mark.xfail( +# sys.platform != "win32", reason="gdal_array available only on Windows.", raises=ModuleNotFoundError +# ) +# def test_distance_to_anomaly_gdal_expected( +# anomaly_raster_profile, +# anomaly_raster_data, +# threshold_criteria_value, +# threshold_criteria, +# expected_shape, +# expected_mean, +# ): +# """Test distance_to_anomaly_gdal with expected result.""" + +# assert not np.any(np.isnan(anomaly_raster_data)) +# out_image, out_profile = distance_to_anomaly.distance_to_anomaly_gdal( +# anomaly_raster_profile=anomaly_raster_profile, +# anomaly_raster_data=anomaly_raster_data, +# threshold_criteria_value=threshold_criteria_value, +# threshold_criteria=threshold_criteria, +# ) + +# _check_result(out_image=out_image, out_profile=out_profile) + +# assert out_image.shape == expected_shape +# if expected_mean is not None: +# # adding a relative tolerance and absolute tolerance to accomodate the very small error +# # that arises due to the floating point errors +# assert np.isclose(np.mean(out_image), expected_mean, rtol=1e-2, atol=1e-2) + + +# @pytest.mark.parametrize( +# ",".join( +# [ +# "anomaly_raster_profile", +# "anomaly_raster_data", +# "threshold_criteria_value", +# "threshold_criteria", +# "expected_shape", +# "expected_mean_without_nodata", +# "nodata_mask_value", +# ] +# ), +# [ +# pytest.param( +# SMALL_RASTER_PROFILE, +# SMALL_RASTER_DATA, +# 5.0, +# "higher", +# EXPECTED_SMALL_RASTER_SHAPE, +# 6.451948, +# # Part of values over 5 will now be masked as nodata +# 9.0, +# id="small_raster_with_inserted_nodata", +# ), +# ], +# ) +# @pytest.mark.xfail( +# sys.platform != "win32", reason="gdal_array available only on Windows.", raises=ModuleNotFoundError +# ) +# def test_distance_to_anomaly_gdal_nodata_handling( +# anomaly_raster_profile, +# anomaly_raster_data, +# threshold_criteria_value, +# threshold_criteria, +# expected_shape, +# expected_mean_without_nodata, +# nodata_mask_value, +# ): +# """Test distance_to_anomaly_gdal with expected result.""" + +# anomaly_raster_data_with_nodata = np.where(anomaly_raster_data > nodata_mask_value, np.nan, anomaly_raster_data) +# assert np.any(np.isnan(anomaly_raster_data_with_nodata)) + +# out_image, out_profile = distance_to_anomaly.distance_to_anomaly_gdal( +# anomaly_raster_profile=anomaly_raster_profile, +# anomaly_raster_data=anomaly_raster_data_with_nodata, +# threshold_criteria_value=threshold_criteria_value, +# threshold_criteria=threshold_criteria, +# ) + +# _check_result(out_image=out_image, out_profile=out_profile) + +# assert out_image.shape == expected_shape + +# # Result should not be same as without nodata addition +# assert not np.isclose(np.mean(out_image), expected_mean_without_nodata) + + +# @pytest.mark.parametrize( +# ",".join( +# [ +# "anomaly_raster_profile", +# "anomaly_raster_data", +# "threshold_criteria_value", +# "threshold_criteria", +# "profile_additions", +# "raises", +# ] +# ), +# [ +# pytest.param( +# SMALL_RASTER_PROFILE, +# SMALL_RASTER_DATA, +# (100.5, 122.5), +# "in_between", +# dict, +# partial(pytest.raises, EmptyDataException), +# id="expected_empty_data_due_to_threshold_range_outside_values2", +# ), +# pytest.param( +# SMALL_RASTER_PROFILE, +# SMALL_RASTER_DATA, +# 5.0, +# "lower", +# dict, +# nullcontext, +# id="no_expected_exception", +# ), +# pytest.param( +# SMALL_RASTER_PROFILE, +# SMALL_RASTER_DATA, +# 5.0, +# "higher", +# partial(dict, height=2.2), +# partial(pytest.raises, InvalidParameterValueException), +# id="expected_invalid_param_due_to_float_value_in_profile", +# ), +# pytest.param( +# SMALL_RASTER_PROFILE, +# SMALL_RASTER_DATA, +# 5.0, +# "higher", +# partial(dict, transform=None), +# partial(pytest.raises, InvalidParameterValueException), +# id="expected_invalid_param_due_to_none_transform_value", +# ), +# pytest.param( +# SMALL_RASTER_PROFILE, +# SMALL_RASTER_DATA, +# 5.0, +# "in_between", +# dict, +# partial(pytest.raises, InvalidParameterValueException), +# id="expected_invalid_param_due_to_number_rather_than_range", +# ), +# pytest.param( +# SMALL_RASTER_PROFILE, +# SMALL_RASTER_DATA, +# (7.5, 2.5), +# "in_between", +# dict, +# partial(pytest.raises, InvalidParameterValueException), +# id="expected_invalid_param_due_to_invalid_order_in_tuple", +# ), +# pytest.param( +# SMALL_RASTER_PROFILE, +# SMALL_RASTER_DATA, +# (1.5, 2.5, 7.5), +# "in_between", +# dict, +# partial(pytest.raises, BeartypeCallHintParamViolation), +# id="expected_invalid_param_due_to_tuple_of_length_three", +# ), +# ], +# ) +# @pytest.mark.xfail( +# sys.platform != "win32", reason="gdal_array available only on Windows.", raises=ModuleNotFoundError +# ) +# def test_distance_to_anomaly_gdal_expected_check( +# anomaly_raster_profile, +# anomaly_raster_data, +# threshold_criteria_value, +# threshold_criteria, +# profile_additions, +# raises, +# ): +# """Test distance_to_anomaly_gdal checks.""" + +# anomaly_raster_profile.update(profile_additions()) +# anomaly_raster_profile_with_additions = anomaly_raster_profile +# with raises() as exc_info: +# out_image, out_profile = distance_to_anomaly.distance_to_anomaly_gdal( +# anomaly_raster_profile=anomaly_raster_profile_with_additions, +# anomaly_raster_data=anomaly_raster_data, +# threshold_criteria_value=threshold_criteria_value, +# threshold_criteria=threshold_criteria, +# ) + +# if exc_info is not None: +# # Expected error +# return + +# _check_result(out_image=out_image, out_profile=out_profile) diff --git a/tests/vector_processing/distance_computation_test.py b/tests/vector_processing/distance_computation_test.py index 5e0e85f5..57c9febc 100644 --- a/tests/vector_processing/distance_computation_test.py +++ b/tests/vector_processing/distance_computation_test.py @@ -47,7 +47,7 @@ POINT_GEOMETRIES_WITHIN_SMALL_RASTER, EXPECTED_SMALL_RASTER_SHAPE, 0.0, - 107.83784122468327, + 107.51744, id="point_geometries_within_small_raster", ), pytest.param( @@ -55,7 +55,7 @@ LINE_GEOMETRIES_WITHIN_SMALL_RASTER, EXPECTED_SMALL_RASTER_SHAPE, 0.0, - 107.83784122468327, + 107.51744, id="line_geometries_within_small_raster", ), pytest.param( @@ -63,7 +63,7 @@ POLYGON_GEOMETRIES_WITHIN_SMALL_RASTER, EXPECTED_SMALL_RASTER_SHAPE, 0.0, - 91.0, + 92.0, id="polygon_geometries_within_small_raster", ), ], @@ -73,12 +73,12 @@ def test_distance_computation_with_expected_results( ): """Test distance_computation.""" - result = distance_computation(raster_profile=raster_profile, geodataframe=geodataframe) + out_image, _ = distance_computation(raster_profile=raster_profile, geodataframe=geodataframe) - assert isinstance(result, np.ndarray) - assert result.shape == expected_shape - assert np.isclose(result.min(), expected_min) - assert np.isclose(result.max(), expected_max) + assert isinstance(out_image, np.ndarray) + assert out_image.shape == expected_shape + assert np.isclose(out_image.min(), expected_min, atol=0.0001) + assert np.isclose(out_image.max(), expected_max, atol=0.0001) @pytest.mark.parametrize( @@ -115,6 +115,6 @@ def test_distance_computation(raster_profile, geodataframe, expected_error): """Test distance_computation.""" with expected_error: - result = distance_computation(raster_profile=raster_profile, geodataframe=geodataframe) - assert isinstance(result, np.ndarray) - assert len(result.shape) == 2 + out_image, _ = distance_computation(raster_profile=raster_profile, geodataframe=geodataframe) + assert isinstance(out_image, np.ndarray) + assert len(out_image.shape) == 2 diff --git a/tests/vector_processing/proximity_computation_test.py b/tests/vector_processing/proximity_computation_test.py index 743019bb..7af782a3 100644 --- a/tests/vector_processing/proximity_computation_test.py +++ b/tests/vector_processing/proximity_computation_test.py @@ -45,11 +45,11 @@ def test_proximity_computation_inversion_with_expected_result( ): """Tests if the enteries in the output matrix are between the minimum and maximum value.""" - result = proximity_computation(geodataframe, raster_profile, maximum_distance, scaling_range) + out_image, _ = proximity_computation(geodataframe, raster_profile, maximum_distance, scaling_range) - assert result.shape == expected_shape + assert out_image.shape == expected_shape # Assert that all values in result within scaling_range - assert np.all((result >= scaling_range[1]) & (result <= scaling_range[0])), "Scaling out of scaling_range" + assert np.all((out_image >= scaling_range[1]) & (out_image <= scaling_range[0])), "Scaling out of scaling_range" @pytest.mark.parametrize( @@ -78,16 +78,16 @@ def test_proximity_computation_with_expected_result( ): """Tests if the enteries in the output matrix are between the minimum and maximum value.""" - result = proximity_computation(geodataframe, raster_profile, maximum_distance, scaling_range) + out_image, _ = proximity_computation(geodataframe, raster_profile, maximum_distance, scaling_range) - assert result.shape == expected_shape + assert out_image.shape == expected_shape # Assert that all values in result within scaling_range - assert np.all((result <= scaling_range[1]) & (result >= scaling_range[0])), "Scaling out of scaling_range" + assert np.all((out_image <= scaling_range[1]) & (out_image >= scaling_range[0])), "Scaling out of scaling_range" def test_proximity_computation_with_expected_error(): """Tests if an exception is raised for a negative maximum distance.""" with pytest.raises(NumericValueSignException, match="Expected max distance to be a positive number."): - result = proximity_computation(gdf, raster_profile, -25, (1, 0)) - assert np.all((result >= 0) & (result <= 1)), "Scaling out of scaling_range" + out_image, _ = proximity_computation(gdf, raster_profile, -25, (1, 0)) + assert np.all((out_image >= 0) & (out_image <= 1)), "Scaling out of scaling_range"