diff --git a/eis_toolkit/training_data_tools/points_to_raster.py b/eis_toolkit/training_data_tools/points_to_raster.py index bf5d8b61..a6600ae0 100644 --- a/eis_toolkit/training_data_tools/points_to_raster.py +++ b/eis_toolkit/training_data_tools/points_to_raster.py @@ -1,16 +1,29 @@ +import math from numbers import Number import geopandas import numpy as np +import pandas as pd from beartype import beartype from beartype.typing import Literal, Optional, Tuple, Union from rasterio import profiles, transform from scipy.ndimage import binary_dilation -from eis_toolkit.exceptions import EmptyDataFrameException, NonMatchingCrsException +from eis_toolkit.exceptions import ( + EmptyDataFrameException, + InvalidColumnException, + NonMatchingCrsException, + NonNumericDataException, +) from eis_toolkit.utilities.checks.raster import check_raster_profile +def _convert_radius(radius: int, x: Number, y: Number) -> int: + raster_radius = math.sqrt(x**2 + y**2) # RADIUS OF A SINGLE PIXEL + r = radius / raster_radius + return math.ceil(r) if r - math.floor(r) >= 0.5 else math.floor(r) + + def _get_kernel_size(radius: int) -> tuple[int, int]: size = 1 + (radius * 2) return size, radius @@ -48,7 +61,7 @@ def _create_local_buffer( def _create_buffer_around_labels( array: np.ndarray, radius: int = 1, - target_value: int = 1, + target_value: Number = 1, buffer: Optional[str] = None, overwrite_nodata: bool = False, ) -> np.ndarray: @@ -104,6 +117,9 @@ def _point_to_raster(raster_array, raster_meta, geodataframe, attribute, radius, unique_values = list(set(values)) if radius is not None: + x = raster_transform[0] + y = raster_transform[4] + radius = _convert_radius(radius, x, y) for target_value in unique_values: raster_array = _create_buffer_around_labels(raster_array, radius, target_value, buffer) @@ -115,18 +131,24 @@ def points_to_raster( geodataframe: geopandas.GeoDataFrame, raster_profile: Union[profiles.Profile, dict], attribute: Optional[str] = None, - radius: Optional[int] = None, + radius: Optional[Number] = None, buffer: Optional[Literal["min", "avg", "max"]] = None, ) -> Tuple[np.ndarray, Union[profiles.Profile, dict]]: - """Convert a point data set into a binary raster. + """Convert a GeoDataFrame of points into a binary raster using a provided base raster profile. - Assigs attribute values if provided else 1 to pixels corresponding to the points and 0 elsewhere. + Accepts a base raster profile and a geodataframe with points to be converted to binary raster. + By default, the points are assigned a value of 1, and all other areas are set to 0. If an + attribute is provided, the raster will take the corresponding values from the attribute column + in the GeoDataFrame instead of 1. The base raster profile defines the template for the raster's + extent, resolution, and projection. Optionally, a radius can be applied around each point (with + units consistent with the raster profile) to expand the point's influence within the raster. In + the case of overlapping radii with different attribute values, a buffer can be used to resolve + the conflict by selecting the minimum, maximum, or average value from the overlapping pixels. Args: geodataframe: The geodataframe points set to be converted into raster. - raster_profile: The raster profile determining the output raster grid properties. attribute: Values to be be assigned to the geodataframe. - radius: Radius to be applied around the geodataframe in [m]. + radius: Radius to be applied around the geodataframe with units consistent with raster profile. buffer: Buffers the matrix value when two or more radii with different attribute value overlap. 'avg': performs averaging of the two attribute value 'min': minimum of the two attribute values @@ -138,6 +160,8 @@ def points_to_raster( Raises: EmptyDataFrameException: The input GeoDataFrame is empty. NonMatchingCrsException: The raster and geodataframe are not in the same CRS. + InvalidColumnException: The attribute column was not found in geodataframe. + NonNumericDataException: Some numeric parameters have invalid values. """ if geodataframe.empty: @@ -146,6 +170,14 @@ def points_to_raster( if raster_profile.get("crs") != geodataframe.crs: raise NonMatchingCrsException("Expected coordinate systems to match between raster and GeoDataFrame.") + if attribute is not None: + + if attribute not in geodataframe.columns: + raise InvalidColumnException(f"Attribute '{attribute}' not found in the geodataframe") + + if not pd.to_numeric(geodataframe[attribute], errors="coerce").notna().all(): + raise NonNumericDataException(f"Values in the '{attribute}' column are non numeric type") + check_raster_profile(raster_profile=raster_profile) raster_width = raster_profile.get("width") diff --git a/eis_toolkit/training_data_tools/random_sampling.py b/eis_toolkit/training_data_tools/random_sampling.py index 8dc979b3..e785be2f 100644 --- a/eis_toolkit/training_data_tools/random_sampling.py +++ b/eis_toolkit/training_data_tools/random_sampling.py @@ -1,5 +1,3 @@ -from numbers import Number - import geopandas as gpd import numpy as np import rasterio @@ -9,13 +7,13 @@ from rasterio import profiles from shapely.geometry import Point -from eis_toolkit.exceptions import EmptyDataException +from eis_toolkit.exceptions import EmptyDataFrameException, NumericValueSignException def _random_sampling( indices: np.ndarray, values: np.ndarray, - sample_number: Number, + sample_number: int, random_seed: int, ) -> np.ndarray: @@ -34,15 +32,20 @@ def _random_sampling( def generate_negatives( raster_array: np.ndarray, raster_profile: Union[profiles.Profile, dict], - sample_number: Number, + sample_number: int, random_seed: int = 48, ) -> Tuple[gpd.GeoDataFrame, np.ndarray, Union[profiles.Profile, dict]]: - """Generate probable negatives from raster array with marked positives. + """Generate probable negatives from binary raster array with marked positives. + + Generates a list of random negative points from a binary raster array, + ensuring that these negatives do not overlap with the already marked positive + points. The positives can include points with or without attribute and radius, + as in the points_to_raster tool. Args: - raster_array: Raster array with marked positives. + raster_array: Binary raster array with marked positives. raster_profile: The raster profile determining the output raster grid properties. - sample_number: Maximum number of negatives to be generated. + sample_number: maximum number of negatives to be generated. random_seed: Seed for generating random negatives. Returns: @@ -50,10 +53,14 @@ def generate_negatives( Raises: EmptyDataException: The raster array is empty. + NumericValueSignException: The sample number is negative or zero. """ if raster_array.size == 0: - raise EmptyDataException + raise EmptyDataFrameException("Expected non empty raster array.") + + if sample_number <= 0: + raise NumericValueSignException("The sample number should be always be greater than zero") out_array = np.copy(raster_array) diff --git a/notebooks/testing_points_to_raster.ipynb b/notebooks/testing_points_to_raster.ipynb index ffeddb61..b8707c1a 100644 --- a/notebooks/testing_points_to_raster.ipynb +++ b/notebooks/testing_points_to_raster.ipynb @@ -109,7 +109,7 @@ "def plot_binary_raster_from_template_raster(template_raster_path):\n", "\n", " attribute = 'value'\n", - " radius = 3\n", + " radius = 8\n", " \n", " with rasterio.open(template_raster_path) as temp_raster:\n", "\n", diff --git a/notebooks/testing_random_sampling.ipynb b/notebooks/testing_random_sampling.ipynb index 64d7f5be..9e629711 100644 --- a/notebooks/testing_random_sampling.ipynb +++ b/notebooks/testing_random_sampling.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -102,7 +102,7 @@ " raster_profile=raster_profile)\n", " \n", " sampled_negatives, outarray2, _ = generate_negatives(raster_array=outarray,\n", - " raster_meta=outmeta,\n", + " raster_profile=outmeta,\n", " sample_number=10,\n", " random_seed=30)\n", "\n", @@ -140,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "metadata": {}, "outputs": [ { diff --git a/tests/data/remote/nonsquared_pixelsize_raster.tif.aux.xml b/tests/data/remote/nonsquared_pixelsize_raster.tif.aux.xml new file mode 100644 index 00000000..e2093664 --- /dev/null +++ b/tests/data/remote/nonsquared_pixelsize_raster.tif.aux.xml @@ -0,0 +1,11 @@ + + + + 0.12201571464539 + 999.97509765625 + 494.99426819916 + 291.24802892273 + 100 + + + diff --git a/tests/training_data_tools/points_to_raster_test.py b/tests/training_data_tools/points_to_raster_test.py index 11a1b04d..3853d124 100644 --- a/tests/training_data_tools/points_to_raster_test.py +++ b/tests/training_data_tools/points_to_raster_test.py @@ -4,7 +4,12 @@ import pytest import rasterio -from eis_toolkit.exceptions import NonMatchingCrsException +from eis_toolkit.exceptions import ( + EmptyDataFrameException, + InvalidColumnException, + NonMatchingCrsException, + NonNumericDataException, +) from eis_toolkit.training_data_tools.points_to_raster import points_to_raster from tests.raster_processing.clip_test import raster_path as SMALL_RASTER_PATH @@ -12,6 +17,7 @@ PATH_LABELS_GPKG = test_dir.joinpath("data/remote/interpolating/interpolation_test_data_small.gpkg") geodataframe = gpd.read_file(PATH_LABELS_GPKG) +gdf = gpd.GeoDataFrame() @pytest.mark.parametrize("geodataframe", [geodataframe]) # Case where CRS matches @@ -31,6 +37,45 @@ def test_points_to_raster(geodataframe): ), f"Expected output array shape {(temp_raster.height,temp_raster.width)} but got {outarray.shape}" +@pytest.mark.parametrize("geodataframe", [geodataframe]) +def test_InvalidColumnException(geodataframe): + """Test that incorrect attribute raises InvalidColumnException.""" + with pytest.raises(InvalidColumnException): + with rasterio.open(SMALL_RASTER_PATH) as temp_raster: + + raster_profile = temp_raster.profile + + outarray, outmeta = points_to_raster( + geodataframe=geodataframe, attribute="data", raster_profile=raster_profile + ) + + +@pytest.mark.parametrize("geodataframe", [geodataframe]) +def test_Nonnumeric_Data(geodataframe): + """Test that non numeric values in attribute column raises NonNumericDataException.""" + with pytest.raises(NonNumericDataException): + with rasterio.open(SMALL_RASTER_PATH) as temp_raster: + + raster_profile = temp_raster.profile + + outarray, outmeta = points_to_raster( + geodataframe=geodataframe, attribute="id", raster_profile=raster_profile + ) + + +@pytest.mark.parametrize("geodataframe", [gdf]) +def test_Empty_Dataframe(geodataframe): + """Test that empty geodataframe raises EmptyDataFrameException.""" + with pytest.raises(EmptyDataFrameException): + with rasterio.open(SMALL_RASTER_PATH) as temp_raster: + + raster_profile = temp_raster.profile + + outarray, outmeta = points_to_raster( + geodataframe=geodataframe, attribute="id", raster_profile=raster_profile + ) + + @pytest.mark.parametrize("geodataframe", [geodataframe.to_crs(epsg=4326)]) # Case where CRS do not matches def test_non_matching_crs_error(geodataframe): """Test that different crs raises NonMatchingCrsException.""" diff --git a/tests/training_data_tools/random_sampling_test.py b/tests/training_data_tools/random_sampling_test.py index c1a2da56..16e89b01 100644 --- a/tests/training_data_tools/random_sampling_test.py +++ b/tests/training_data_tools/random_sampling_test.py @@ -1,9 +1,11 @@ from pathlib import Path import geopandas as gpd +import numpy as np import pytest import rasterio +from eis_toolkit.exceptions import EmptyDataFrameException, NumericValueSignException from eis_toolkit.training_data_tools.points_to_raster import points_to_raster from eis_toolkit.training_data_tools.random_sampling import generate_negatives from tests.raster_processing.clip_test import raster_path as SMALL_RASTER_PATH @@ -14,16 +16,18 @@ gdf = gpd.read_file(PATH_LABELS_GPKG) -@pytest.mark.parametrize("geodataframe", [gdf]) -def test_points_to_raster(geodataframe): +@pytest.mark.parametrize("geodataframe, sample_number, random_seed", [(gdf, 10, 30)]) +def test_points_to_raster(geodataframe, sample_number, random_seed): """Test that generate_negatives function works as expected.""" with rasterio.open(SMALL_RASTER_PATH) as temp_raster: raster_profile = temp_raster.profile - outarray, outmeta = points_to_raster(geodataframe=gdf, attribute="value", raster_profile=raster_profile) + outarray, outmeta = points_to_raster( + geodataframe=geodataframe, attribute="value", raster_profile=raster_profile + ) sampled_negatives, outarray2, outmeta2 = generate_negatives( - raster_array=outarray, raster_meta=outmeta, sample_number=10, random_seed=30 + raster_array=outarray, raster_profile=outmeta, sample_number=sample_number, random_seed=random_seed ) row, col = rasterio.transform.rowcol( @@ -36,3 +40,37 @@ def test_points_to_raster(geodataframe): ), f"Expected output array shape {(temp_raster.height, temp_raster.width)} but got {outarray2.shape}" assert (outarray2[row, col] == -1).all() + + +@pytest.mark.parametrize("geodataframe, sample_number, random_seed", [(gdf, 10, 30)]) +def test_Empty_Data_Frame_exception(geodataframe, sample_number, random_seed): + """Test that generate_negatives function raises EmptyDataFrameException for an empty raster array.""" + with pytest.raises(EmptyDataFrameException): + with rasterio.open(SMALL_RASTER_PATH) as temp_raster: + raster_profile = temp_raster.profile + + outarray, outmeta = points_to_raster( + geodataframe=geodataframe, attribute="value", raster_profile=raster_profile + ) + + outarray = np.array([]) + + sampled_negatives, outarray2, outmeta2 = generate_negatives( + raster_array=outarray, raster_profile=outmeta, sample_number=sample_number, random_seed=random_seed + ) + + +@pytest.mark.parametrize("geodataframe, sample_number, random_seed", [(gdf, -10, 30), (gdf, 0, 30)]) +def test_Numeric_value_sign_exception(geodataframe, sample_number, random_seed): + """Test that generate_negatives function raises NumericValueSignException for negative and zero sample number.""" + with pytest.raises(NumericValueSignException): + with rasterio.open(SMALL_RASTER_PATH) as temp_raster: + raster_profile = temp_raster.profile + + outarray, outmeta = points_to_raster( + geodataframe=geodataframe, attribute="value", raster_profile=raster_profile + ) + + sampled_negatives, outarray2, outmeta2 = generate_negatives( + raster_array=outarray, raster_profile=outmeta, sample_number=sample_number, random_seed=random_seed + )