Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 37 additions & 18 deletions eis_toolkit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3091,9 +3091,10 @@ def gamma_overlay_cli(input_rasters: INPUT_FILES_ARGUMENT, output_raster: OUTPUT
# WOFE
@app.command()
def weights_of_evidence_calculate_weights_cli(
input_raster: INPUT_FILE_OPTION,
input_vector: INPUT_FILE_OPTION,
output_dir: OUTPUT_DIR_OPTION,
evidential_raster: INPUT_FILE_OPTION,
deposits: INPUT_FILE_OPTION,
output_results_table: OUTPUT_FILE_OPTION,
output_raster_dir: OUTPUT_DIR_OPTION,
raster_nodata: Optional[float] = None,
weights_type: Annotated[WeightsType, typer.Option(case_sensitive=False)] = WeightsType.unique,
studentized_contrast_threshold: float = 1,
Expand All @@ -3102,6 +3103,9 @@ def weights_of_evidence_calculate_weights_cli(
"""
Calculate weights of spatial associations.

Save path for resulting CSV is set using --output-results-table parameter. Output rasters are saved to directory
set with --output-raster-dir parameter.

Parameter --studentized-contrast-threshold is used with 'categorical', 'ascending' and 'descending' weight types.

Parameter --arrays-to-generate controls which columns in the weights dataframe are returned as arrays. All column
Expand All @@ -3115,8 +3119,13 @@ def weights_of_evidence_calculate_weights_cli(

typer.echo("Progress: 10%")

evidential_raster = rasterio.open(input_raster)
deposits = gpd.read_file(input_vector)
evidential_raster = rasterio.open(evidential_raster)

if deposits.suffix in (".tif", ".tiff", ".asc", ".img", ".vrt", ".grd"):
deposits = rasterio.open(deposits)
else:
deposits = gpd.read_file(deposits)

typer.echo("Progress: 25%")

if arrays_to_generate == []:
Expand All @@ -3132,36 +3141,42 @@ def weights_of_evidence_calculate_weights_cli(
)
typer.echo("Progress: 75%")

df.to_csv(output_dir.joinpath("wofe_results.csv"))
df.to_csv(output_results_table)

out_rasters_dict = {}
file_name = input_raster.name.split(".")[0]
file_name = evidential_raster.name.split("/")[-1].split(".")[0]
raster_meta.pop("dtype") # Remove dtype from metadata to set it individually

for key, array in arrays.items():
# Set correct dtype for the array
if key in ["Class", "Pixel count", "Deposit count"]:
dtype = np.uint8
else:
dtype = np.float32

array = nan_to_nodata(array, raster_meta["nodata"])
output_raster_name = file_name + "_weights_" + weights_type + "_" + key
output_raster_path = output_dir.joinpath(output_raster_name + ".tif")
with rasterio.open(output_raster_path, "w", **raster_meta) as dst:
output_raster_path = output_raster_dir.joinpath(output_raster_name + ".tif")
with rasterio.open(output_raster_path, "w", dtype=dtype, **raster_meta) as dst:
dst.write(array, 1)
out_rasters_dict[output_raster_name] = str(output_raster_path)

json_str = json.dumps(out_rasters_dict)
typer.echo("Progress 100%")

typer.echo(f"Number of deposit pixels: {nr_of_deposits}")
typer.echo(f"Number of all evidence pixels: {nr_of_pixels}")
typer.echo(f"Output rasters: {json_str}")
typer.echo(f"Weight calculations completed, rasters and CSV saved to {output_dir}.")
typer.echo(f"Weight calculations completed, rasters saved to {output_raster_dir}.")
typer.echo(f"Weights table saved to {output_results_table}.")


@app.command()
def weights_of_evidence_calculate_responses_cli(
input_rasters_weights: INPUT_FILES_OPTION,
input_rasters_standard_deviations: INPUT_FILES_OPTION,
input_weights_table: INPUT_FILE_OPTION,
output_probabilities: OUTPUT_FILE_OPTION,
output_probabilities_std: OUTPUT_FILE_OPTION,
output_confidence_array: OUTPUT_FILE_OPTION,
nr_of_deposits: Annotated[int, typer.Option()],
nr_of_pixels: Annotated[int, typer.Option()],
):
"""
Calculate the posterior probabilities for the given generalized weight arrays.
Expand Down Expand Up @@ -3198,10 +3213,12 @@ def weights_of_evidence_calculate_responses_cli(

dict_array.append({"W+": array_W, "S_W+": array_S_W})

weights_df = pd.read_csv(input_weights_table)

typer.echo("Progress: 25%")

posterior_probabilities, posterior_probabilies_std, confidence_array = weights_of_evidence_calculate_responses(
output_arrays=dict_array, nr_of_deposits=nr_of_deposits, nr_of_pixels=nr_of_pixels
output_arrays=dict_array, weights_df=weights_df
)
typer.echo("Progress: 75%")

Expand All @@ -3220,7 +3237,7 @@ def weights_of_evidence_calculate_responses_cli(
typer.echo("Progress: 100%")

typer.echo(
f"Responses calculations finished, writing output rasters to {output_probabilities}, \
f"Posterior probability calculations finished, output rasters saved to {output_probabilities}, \
{output_probabilities_std} and {output_confidence_array}"
)

Expand All @@ -3229,7 +3246,7 @@ def weights_of_evidence_calculate_responses_cli(
def agterberg_cheng_CI_test_cli(
input_posterior_probabilities: INPUT_FILE_OPTION,
input_posterior_probabilities_std: INPUT_FILE_OPTION,
nr_of_deposits: Annotated[int, typer.Option()],
input_weights_table: INPUT_FILE_OPTION,
):
"""Perform the conditional independence test presented by Agterberg-Cheng (2002)."""
from eis_toolkit.prediction.weights_of_evidence import agterberg_cheng_CI_test
Expand All @@ -3244,12 +3261,14 @@ def agterberg_cheng_CI_test_cli(
posterior_probabilities_std = src.read(1)
posterior_probabilities_std = nodata_to_nan(posterior_probabilities_std, src.nodata)

weights_df = pd.read_csv(input_weights_table)

typer.echo("Progress: 25%")

_, _, _, _, summary = agterberg_cheng_CI_test(
posterior_probabilities=posterior_probabilities,
posterior_probabilities_std=posterior_probabilities_std,
nr_of_deposits=nr_of_deposits,
weights_df=weights_df,
)

typer.echo("Progress: 100%")
Expand Down
73 changes: 52 additions & 21 deletions eis_toolkit/prediction/weights_of_evidence.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@
import pandas as pd
import rasterio
from beartype import beartype
from beartype.typing import Dict, List, Literal, Optional, Sequence, Tuple

from eis_toolkit.exceptions import ClassificationFailedException, InvalidColumnException, InvalidParameterValueException
from beartype.typing import Dict, List, Literal, Optional, Sequence, Tuple, Union

from eis_toolkit.exceptions import (
ClassificationFailedException,
InvalidColumnException,
InvalidParameterValueException,
NonMatchingRasterMetadataException,
)
from eis_toolkit.utilities.checks.raster import check_raster_grids
from eis_toolkit.vector_processing.rasterize_vector import rasterize_vector
from eis_toolkit.warnings import ClassificationFailedWarning, InvalidColumnWarning

Expand Down Expand Up @@ -68,7 +74,7 @@
}


def _read_and_preprocess_evidence(
def _read_and_preprocess_raster_data(
raster: rasterio.io.DatasetReader, nodata: Optional[Number] = None, band: int = 1
) -> np.ndarray:
"""Read raster data and handle NoData values."""
Expand Down Expand Up @@ -244,6 +250,22 @@ def _generate_arrays_from_metrics(
return array_dict


def _calculate_nr_of_deposit_pixels(array: np.ndarray, df: pd.DataFrame) -> Tuple[int, int]:
masked_array = array[~np.isnan(array)]
nr_of_pixels = int(np.size(masked_array))

pixels_column = df["Pixel count"]

match = pixels_column == nr_of_pixels
if match.any():
nr_of_deposits = df.loc[match, "Deposit count"].iloc[0]
else:
nr_of_pixels = df["Pixel count"].sum()
nr_of_deposits = df["Deposit count"].sum()

return nr_of_deposits, nr_of_pixels


@beartype
def generalize_weights_cumulative(
df: pd.DataFrame,
Expand Down Expand Up @@ -349,7 +371,7 @@ def generalize_weights_cumulative(
@beartype
def weights_of_evidence_calculate_weights(
evidential_raster: rasterio.io.DatasetReader,
deposits: gpd.GeoDataFrame,
deposits: Union[gpd.GeoDataFrame, rasterio.io.DatasetReader],
raster_nodata: Optional[Number] = None,
weights_type: Literal["unique", "categorical", "ascending", "descending"] = "unique",
studentized_contrast_threshold: Number = 1,
Expand All @@ -360,7 +382,7 @@ def weights_of_evidence_calculate_weights(

Args:
evidential_raster: The evidential raster.
deposits: Vector data representing the mineral deposits or occurences point data.
deposits: Vector or raster data representing the mineral deposits or occurences point data.
raster_nodata: If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None
(nodata from raster metadata is used).
weights_type: Accepted values are 'unique', 'categorical', 'ascending' and 'descending'.
Expand Down Expand Up @@ -409,13 +431,22 @@ def weights_of_evidence_calculate_weights(
metrics_to_arrays = arrays_to_generate.copy()

# 1. Preprocess data
evidence_array = _read_and_preprocess_evidence(evidential_raster, raster_nodata)
evidence_array = _read_and_preprocess_raster_data(evidential_raster, raster_nodata)
raster_meta = evidential_raster.meta
raster_profile = evidential_raster.profile

# Rasterize deposits
deposit_array = rasterize_vector(
geodataframe=deposits, raster_profile=raster_meta, default_value=1.0, fill_value=0.0
)
# Rasterize deposits if vector data
if isinstance(deposits, gpd.GeoDataFrame):
deposit_array = rasterize_vector(
geodataframe=deposits, raster_profile=raster_meta, default_value=1.0, fill_value=0.0
)
else:
deposit_profile = deposits.profile

if check_raster_grids([raster_profile, deposit_profile], same_extent=True):
deposit_array = _read_and_preprocess_raster_data(deposits, raster_nodata)
else:
raise NonMatchingRasterMetadataException("Input rasters should have the same grid properties.")

# Mask NaN out of the array
nodata_mask = np.isnan(evidence_array)
Expand Down Expand Up @@ -482,7 +513,7 @@ def weights_of_evidence_calculate_weights(

@beartype
def weights_of_evidence_calculate_responses(
output_arrays: Sequence[Dict[str, np.ndarray]], nr_of_deposits: int, nr_of_pixels: int
output_arrays: Sequence[Dict[str, np.ndarray]], weights_df: pd.DataFrame
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Calculate the posterior probabilities for the given generalized weight arrays.

Expand All @@ -491,14 +522,17 @@ def weights_of_evidence_calculate_responses(
For each dictionary, generalized weight and generalized standard deviation arrays are used and summed
together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found,
the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function).
nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations.
nr_of_pixels: Number of evidence pixels in the input data for weights of evidence calculations.
weights_df: Output dataframe of WofE calculate weights algorithm. Used for determining number of deposits and
number of pixels.

Returns:
Array of posterior probabilites.
Array of standard deviations in the posterior probability calculations.
Array of confidence of the prospectivity values obtained in the posterior probability array.
"""
array = list(output_arrays[0].values())[0]
nr_of_deposits, nr_of_pixels = _calculate_nr_of_deposit_pixels(array, weights_df)

gen_weights_sum = sum(
[
item[GENERALIZED_WEIGHT_PLUS_COLUMN]
Expand Down Expand Up @@ -531,7 +565,7 @@ def weights_of_evidence_calculate_responses(

@beartype
def agterberg_cheng_CI_test(
posterior_probabilities: np.ndarray, posterior_probabilities_std: np.ndarray, nr_of_deposits: int
posterior_probabilities: np.ndarray, posterior_probabilities_std: np.ndarray, weights_df: pd.DataFrame
) -> Tuple[bool, bool, bool, float, str]:
"""Perform the conditional independence test presented by Agterberg-Cheng (2002).

Expand All @@ -541,7 +575,8 @@ def agterberg_cheng_CI_test(
Args:
posterior_probabilities: Array of posterior probabilites.
posterior_probabilities_std: Array of standard deviations in the posterior probability calculations.
nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations.
weights_df: Output dataframe of WofE calculate weights algorithm. Used for determining number of deposits.

Returns:
Whether the conditional hypothesis can be accepted for the evidence layers that the input
posterior probabilities and standard deviations of posterior probabilities are calculated from.
Expand All @@ -550,12 +585,8 @@ def agterberg_cheng_CI_test(
Ratio T/n. Results > 1, may be because of lack of conditional independence of layers.
T should not exceed n by more than 15% (Bonham-Carter 1994, p. 316).
A summary of the the conditional independence calculations.

Raises:
InvalidParameterValueException: Value of nr_of_deposits is not at least 1.
"""
if nr_of_deposits < 1:
raise InvalidParameterValueException("Expected input deposits count to be at least 1.")
nr_of_deposits, _ = _calculate_nr_of_deposit_pixels(posterior_probabilities, weights_df)

# One-tailed significance test according to Agterberg-Cheng (2002):
# Conditional independence must satisfy:
Expand Down
14 changes: 10 additions & 4 deletions eis_toolkit/utilities/checks/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import rasterio.transform
from beartype import beartype
from beartype.typing import Iterable, Sequence, Union
from rasterio.crs import CRS

from eis_toolkit.exceptions import InvalidParameterValueException

Expand Down Expand Up @@ -37,21 +38,25 @@ def check_matching_crs(objects: Iterable) -> bool:
Returns:
True if everything matches, False if not.
"""
epsg_list = []
crs_list = []

for object in objects:
if not isinstance(object, (rasterio.profiles.Profile, dict)):
if not object.crs:
return False
epsg = object.crs.to_epsg()
epsg_list.append(epsg)
crs_list.append(epsg)
else:
if "crs" in object:
epsg_list.append(object["crs"])
crs_object = object["crs"]
if type(crs_object) == CRS:
crs_list.append(crs_object.to_epsg())
else:
crs_list.append(crs_object)
else:
return False

if len(set(epsg_list)) != 1:
if len(set(crs_list)) != 1:
return False

return True
Expand Down Expand Up @@ -119,6 +124,7 @@ def check_raster_grids(
Returns:
True if gridding and optionally bounds matches, False if not.
"""

if not check_matching_crs(raster_profiles):
return False
if not check_matching_pixel_alignment(raster_profiles):
Expand Down
Loading