Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 39 additions & 7 deletions eis_toolkit/training_data_tools/points_to_raster.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,29 @@
import math
from numbers import Number

import geopandas
import numpy as np
import pandas as pd
from beartype import beartype
from beartype.typing import Literal, Optional, Tuple, Union
from rasterio import profiles, transform
from scipy.ndimage import binary_dilation

from eis_toolkit.exceptions import EmptyDataFrameException, NonMatchingCrsException
from eis_toolkit.exceptions import (
EmptyDataFrameException,
InvalidColumnException,
NonMatchingCrsException,
NonNumericDataException,
)
from eis_toolkit.utilities.checks.raster import check_raster_profile


def _convert_radius(radius: int, x: Number, y: Number) -> int:
raster_radius = math.sqrt(x**2 + y**2) # RADIUS OF A SINGLE PIXEL
r = radius / raster_radius
return math.ceil(r) if r - math.floor(r) >= 0.5 else math.floor(r)


def _get_kernel_size(radius: int) -> tuple[int, int]:
size = 1 + (radius * 2)
return size, radius
Expand Down Expand Up @@ -48,7 +61,7 @@ def _create_local_buffer(
def _create_buffer_around_labels(
array: np.ndarray,
radius: int = 1,
target_value: int = 1,
target_value: Number = 1,
buffer: Optional[str] = None,
overwrite_nodata: bool = False,
) -> np.ndarray:
Expand Down Expand Up @@ -104,6 +117,9 @@ def _point_to_raster(raster_array, raster_meta, geodataframe, attribute, radius,
unique_values = list(set(values))

if radius is not None:
x = raster_transform[0]
y = raster_transform[4]
radius = _convert_radius(radius, x, y)
for target_value in unique_values:
raster_array = _create_buffer_around_labels(raster_array, radius, target_value, buffer)

Expand All @@ -115,18 +131,24 @@ def points_to_raster(
geodataframe: geopandas.GeoDataFrame,
raster_profile: Union[profiles.Profile, dict],
attribute: Optional[str] = None,
radius: Optional[int] = None,
radius: Optional[Number] = None,
buffer: Optional[Literal["min", "avg", "max"]] = None,
) -> Tuple[np.ndarray, Union[profiles.Profile, dict]]:
"""Convert a point data set into a binary raster.
"""Convert a GeoDataFrame of points into a binary raster using a provided base raster profile.

Assigs attribute values if provided else 1 to pixels corresponding to the points and 0 elsewhere.
Accepts a base raster profile and a geodataframe with points to be converted to binary raster.
By default, the points are assigned a value of 1, and all other areas are set to 0. If an
attribute is provided, the raster will take the corresponding values from the attribute column
in the GeoDataFrame instead of 1. The base raster profile defines the template for the raster's
extent, resolution, and projection. Optionally, a radius can be applied around each point (with
units consistent with the raster profile) to expand the point's influence within the raster. In
the case of overlapping radii with different attribute values, a buffer can be used to resolve
the conflict by selecting the minimum, maximum, or average value from the overlapping pixels.

Args:
geodataframe: The geodataframe points set to be converted into raster.
raster_profile: The raster profile determining the output raster grid properties.
attribute: Values to be be assigned to the geodataframe.
radius: Radius to be applied around the geodataframe in [m].
radius: Radius to be applied around the geodataframe with units consistent with raster profile.
buffer: Buffers the matrix value when two or more radii with different attribute value overlap.
'avg': performs averaging of the two attribute value
'min': minimum of the two attribute values
Expand All @@ -138,6 +160,8 @@ def points_to_raster(
Raises:
EmptyDataFrameException: The input GeoDataFrame is empty.
NonMatchingCrsException: The raster and geodataframe are not in the same CRS.
InvalidColumnException: The attribute column was not found in geodataframe.
NonNumericDataException: Some numeric parameters have invalid values.
"""

if geodataframe.empty:
Expand All @@ -146,6 +170,14 @@ def points_to_raster(
if raster_profile.get("crs") != geodataframe.crs:
raise NonMatchingCrsException("Expected coordinate systems to match between raster and GeoDataFrame.")

if attribute is not None:

if attribute not in geodataframe.columns:
raise InvalidColumnException(f"Attribute '{attribute}' not found in the geodataframe")

if not pd.to_numeric(geodataframe[attribute], errors="coerce").notna().all():
raise NonNumericDataException(f"Values in the '{attribute}' column are non numeric type")

check_raster_profile(raster_profile=raster_profile)

raster_width = raster_profile.get("width")
Expand Down
25 changes: 16 additions & 9 deletions eis_toolkit/training_data_tools/random_sampling.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from numbers import Number

import geopandas as gpd
import numpy as np
import rasterio
Expand All @@ -9,13 +7,13 @@
from rasterio import profiles
from shapely.geometry import Point

from eis_toolkit.exceptions import EmptyDataException
from eis_toolkit.exceptions import EmptyDataFrameException, NumericValueSignException


def _random_sampling(
indices: np.ndarray,
values: np.ndarray,
sample_number: Number,
sample_number: int,
random_seed: int,
) -> np.ndarray:

Expand All @@ -34,26 +32,35 @@ def _random_sampling(
def generate_negatives(
raster_array: np.ndarray,
raster_profile: Union[profiles.Profile, dict],
sample_number: Number,
sample_number: int,
random_seed: int = 48,
) -> Tuple[gpd.GeoDataFrame, np.ndarray, Union[profiles.Profile, dict]]:
"""Generate probable negatives from raster array with marked positives.
"""Generate probable negatives from binary raster array with marked positives.

Generates a list of random negative points from a binary raster array,
ensuring that these negatives do not overlap with the already marked positive
points. The positives can include points with or without attribute and radius,
as in the points_to_raster tool.

Args:
raster_array: Raster array with marked positives.
raster_array: Binary raster array with marked positives.
raster_profile: The raster profile determining the output raster grid properties.
sample_number: Maximum number of negatives to be generated.
sample_number: maximum number of negatives to be generated.
random_seed: Seed for generating random negatives.

Returns:
A tuple containing the shapely points, output raster as a NumPy array and updated metadata.

Raises:
EmptyDataException: The raster array is empty.
NumericValueSignException: The sample number is negative or zero.
"""

if raster_array.size == 0:
raise EmptyDataException
raise EmptyDataFrameException("Expected non empty raster array.")

if sample_number <= 0:
raise NumericValueSignException("The sample number should be always be greater than zero")

out_array = np.copy(raster_array)

Expand Down
2 changes: 1 addition & 1 deletion notebooks/testing_points_to_raster.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
"def plot_binary_raster_from_template_raster(template_raster_path):\n",
"\n",
" attribute = 'value'\n",
" radius = 3\n",
" radius = 8\n",
" \n",
" with rasterio.open(template_raster_path) as temp_raster:\n",
"\n",
Expand Down
12 changes: 6 additions & 6 deletions notebooks/testing_random_sampling.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -13,7 +13,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -35,7 +35,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -89,7 +89,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -102,7 +102,7 @@
" raster_profile=raster_profile)\n",
" \n",
" sampled_negatives, outarray2, _ = generate_negatives(raster_array=outarray,\n",
" raster_meta=outmeta,\n",
" raster_profile=outmeta,\n",
" sample_number=10,\n",
" random_seed=30)\n",
"\n",
Expand Down Expand Up @@ -140,7 +140,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 10,
"metadata": {},
"outputs": [
{
Expand Down
11 changes: 11 additions & 0 deletions tests/data/remote/nonsquared_pixelsize_raster.tif.aux.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<PAMDataset>
<PAMRasterBand band="1">
<Metadata>
<MDI key="STATISTICS_MINIMUM">0.12201571464539</MDI>
<MDI key="STATISTICS_MAXIMUM">999.97509765625</MDI>
<MDI key="STATISTICS_MEAN">494.99426819916</MDI>
<MDI key="STATISTICS_STDDEV">291.24802892273</MDI>
<MDI key="STATISTICS_VALID_PERCENT">100</MDI>
</Metadata>
</PAMRasterBand>
</PAMDataset>
47 changes: 46 additions & 1 deletion tests/training_data_tools/points_to_raster_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,20 @@
import pytest
import rasterio

from eis_toolkit.exceptions import NonMatchingCrsException
from eis_toolkit.exceptions import (
EmptyDataFrameException,
InvalidColumnException,
NonMatchingCrsException,
NonNumericDataException,
)
from eis_toolkit.training_data_tools.points_to_raster import points_to_raster
from tests.raster_processing.clip_test import raster_path as SMALL_RASTER_PATH

test_dir = Path(__file__).parent.parent
PATH_LABELS_GPKG = test_dir.joinpath("data/remote/interpolating/interpolation_test_data_small.gpkg")

geodataframe = gpd.read_file(PATH_LABELS_GPKG)
gdf = gpd.GeoDataFrame()


@pytest.mark.parametrize("geodataframe", [geodataframe]) # Case where CRS matches
Expand All @@ -31,6 +37,45 @@ def test_points_to_raster(geodataframe):
), f"Expected output array shape {(temp_raster.height,temp_raster.width)} but got {outarray.shape}"


@pytest.mark.parametrize("geodataframe", [geodataframe])
def test_InvalidColumnException(geodataframe):
"""Test that incorrect attribute raises InvalidColumnException."""
with pytest.raises(InvalidColumnException):
with rasterio.open(SMALL_RASTER_PATH) as temp_raster:

raster_profile = temp_raster.profile

outarray, outmeta = points_to_raster(
geodataframe=geodataframe, attribute="data", raster_profile=raster_profile
)


@pytest.mark.parametrize("geodataframe", [geodataframe])
def test_Nonnumeric_Data(geodataframe):
"""Test that non numeric values in attribute column raises NonNumericDataException."""
with pytest.raises(NonNumericDataException):
with rasterio.open(SMALL_RASTER_PATH) as temp_raster:

raster_profile = temp_raster.profile

outarray, outmeta = points_to_raster(
geodataframe=geodataframe, attribute="id", raster_profile=raster_profile
)


@pytest.mark.parametrize("geodataframe", [gdf])
def test_Empty_Dataframe(geodataframe):
"""Test that empty geodataframe raises EmptyDataFrameException."""
with pytest.raises(EmptyDataFrameException):
with rasterio.open(SMALL_RASTER_PATH) as temp_raster:

raster_profile = temp_raster.profile

outarray, outmeta = points_to_raster(
geodataframe=geodataframe, attribute="id", raster_profile=raster_profile
)


@pytest.mark.parametrize("geodataframe", [geodataframe.to_crs(epsg=4326)]) # Case where CRS do not matches
def test_non_matching_crs_error(geodataframe):
"""Test that different crs raises NonMatchingCrsException."""
Expand Down
46 changes: 42 additions & 4 deletions tests/training_data_tools/random_sampling_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from pathlib import Path

import geopandas as gpd
import numpy as np
import pytest
import rasterio

from eis_toolkit.exceptions import EmptyDataFrameException, NumericValueSignException
from eis_toolkit.training_data_tools.points_to_raster import points_to_raster
from eis_toolkit.training_data_tools.random_sampling import generate_negatives
from tests.raster_processing.clip_test import raster_path as SMALL_RASTER_PATH
Expand All @@ -14,16 +16,18 @@
gdf = gpd.read_file(PATH_LABELS_GPKG)


@pytest.mark.parametrize("geodataframe", [gdf])
def test_points_to_raster(geodataframe):
@pytest.mark.parametrize("geodataframe, sample_number, random_seed", [(gdf, 10, 30)])
def test_points_to_raster(geodataframe, sample_number, random_seed):
"""Test that generate_negatives function works as expected."""
with rasterio.open(SMALL_RASTER_PATH) as temp_raster:
raster_profile = temp_raster.profile

outarray, outmeta = points_to_raster(geodataframe=gdf, attribute="value", raster_profile=raster_profile)
outarray, outmeta = points_to_raster(
geodataframe=geodataframe, attribute="value", raster_profile=raster_profile
)

sampled_negatives, outarray2, outmeta2 = generate_negatives(
raster_array=outarray, raster_meta=outmeta, sample_number=10, random_seed=30
raster_array=outarray, raster_profile=outmeta, sample_number=sample_number, random_seed=random_seed
)

row, col = rasterio.transform.rowcol(
Expand All @@ -36,3 +40,37 @@ def test_points_to_raster(geodataframe):
), f"Expected output array shape {(temp_raster.height, temp_raster.width)} but got {outarray2.shape}"

assert (outarray2[row, col] == -1).all()


@pytest.mark.parametrize("geodataframe, sample_number, random_seed", [(gdf, 10, 30)])
def test_Empty_Data_Frame_exception(geodataframe, sample_number, random_seed):
"""Test that generate_negatives function raises EmptyDataFrameException for an empty raster array."""
with pytest.raises(EmptyDataFrameException):
with rasterio.open(SMALL_RASTER_PATH) as temp_raster:
raster_profile = temp_raster.profile

outarray, outmeta = points_to_raster(
geodataframe=geodataframe, attribute="value", raster_profile=raster_profile
)

outarray = np.array([])

sampled_negatives, outarray2, outmeta2 = generate_negatives(
raster_array=outarray, raster_profile=outmeta, sample_number=sample_number, random_seed=random_seed
)


@pytest.mark.parametrize("geodataframe, sample_number, random_seed", [(gdf, -10, 30), (gdf, 0, 30)])
def test_Numeric_value_sign_exception(geodataframe, sample_number, random_seed):
"""Test that generate_negatives function raises NumericValueSignException for negative and zero sample number."""
with pytest.raises(NumericValueSignException):
with rasterio.open(SMALL_RASTER_PATH) as temp_raster:
raster_profile = temp_raster.profile

outarray, outmeta = points_to_raster(
geodataframe=geodataframe, attribute="value", raster_profile=raster_profile
)

sampled_negatives, outarray2, outmeta2 = generate_negatives(
raster_array=outarray, raster_profile=outmeta, sample_number=sample_number, random_seed=random_seed
)
Loading