diff --git a/eis_toolkit/cli.py b/eis_toolkit/cli.py index a723a99c..1ad52c24 100644 --- a/eis_toolkit/cli.py +++ b/eis_toolkit/cli.py @@ -3091,9 +3091,10 @@ def gamma_overlay_cli(input_rasters: INPUT_FILES_ARGUMENT, output_raster: OUTPUT # WOFE @app.command() def weights_of_evidence_calculate_weights_cli( - input_raster: INPUT_FILE_OPTION, - input_vector: INPUT_FILE_OPTION, - output_dir: OUTPUT_DIR_OPTION, + evidential_raster: INPUT_FILE_OPTION, + deposits: INPUT_FILE_OPTION, + output_results_table: OUTPUT_FILE_OPTION, + output_raster_dir: OUTPUT_DIR_OPTION, raster_nodata: Optional[float] = None, weights_type: Annotated[WeightsType, typer.Option(case_sensitive=False)] = WeightsType.unique, studentized_contrast_threshold: float = 1, @@ -3102,6 +3103,9 @@ def weights_of_evidence_calculate_weights_cli( """ Calculate weights of spatial associations. + Save path for resulting CSV is set using --output-results-table parameter. Output rasters are saved to directory + set with --output-raster-dir parameter. + Parameter --studentized-contrast-threshold is used with 'categorical', 'ascending' and 'descending' weight types. Parameter --arrays-to-generate controls which columns in the weights dataframe are returned as arrays. All column @@ -3115,8 +3119,13 @@ def weights_of_evidence_calculate_weights_cli( typer.echo("Progress: 10%") - evidential_raster = rasterio.open(input_raster) - deposits = gpd.read_file(input_vector) + evidential_raster = rasterio.open(evidential_raster) + + if deposits.suffix in (".tif", ".tiff", ".asc", ".img", ".vrt", ".grd"): + deposits = rasterio.open(deposits) + else: + deposits = gpd.read_file(deposits) + typer.echo("Progress: 25%") if arrays_to_generate == []: @@ -3132,36 +3141,42 @@ def weights_of_evidence_calculate_weights_cli( ) typer.echo("Progress: 75%") - df.to_csv(output_dir.joinpath("wofe_results.csv")) + df.to_csv(output_results_table) out_rasters_dict = {} - file_name = input_raster.name.split(".")[0] + file_name = evidential_raster.name.split("/")[-1].split(".")[0] + raster_meta.pop("dtype") # Remove dtype from metadata to set it individually + for key, array in arrays.items(): + # Set correct dtype for the array + if key in ["Class", "Pixel count", "Deposit count"]: + dtype = np.uint8 + else: + dtype = np.float32 + array = nan_to_nodata(array, raster_meta["nodata"]) output_raster_name = file_name + "_weights_" + weights_type + "_" + key - output_raster_path = output_dir.joinpath(output_raster_name + ".tif") - with rasterio.open(output_raster_path, "w", **raster_meta) as dst: + output_raster_path = output_raster_dir.joinpath(output_raster_name + ".tif") + with rasterio.open(output_raster_path, "w", dtype=dtype, **raster_meta) as dst: dst.write(array, 1) out_rasters_dict[output_raster_name] = str(output_raster_path) json_str = json.dumps(out_rasters_dict) typer.echo("Progress 100%") - typer.echo(f"Number of deposit pixels: {nr_of_deposits}") - typer.echo(f"Number of all evidence pixels: {nr_of_pixels}") typer.echo(f"Output rasters: {json_str}") - typer.echo(f"Weight calculations completed, rasters and CSV saved to {output_dir}.") + typer.echo(f"Weight calculations completed, rasters saved to {output_raster_dir}.") + typer.echo(f"Weights table saved to {output_results_table}.") @app.command() def weights_of_evidence_calculate_responses_cli( input_rasters_weights: INPUT_FILES_OPTION, input_rasters_standard_deviations: INPUT_FILES_OPTION, + input_weights_table: INPUT_FILE_OPTION, output_probabilities: OUTPUT_FILE_OPTION, output_probabilities_std: OUTPUT_FILE_OPTION, output_confidence_array: OUTPUT_FILE_OPTION, - nr_of_deposits: Annotated[int, typer.Option()], - nr_of_pixels: Annotated[int, typer.Option()], ): """ Calculate the posterior probabilities for the given generalized weight arrays. @@ -3198,10 +3213,12 @@ def weights_of_evidence_calculate_responses_cli( dict_array.append({"W+": array_W, "S_W+": array_S_W}) + weights_df = pd.read_csv(input_weights_table) + typer.echo("Progress: 25%") posterior_probabilities, posterior_probabilies_std, confidence_array = weights_of_evidence_calculate_responses( - output_arrays=dict_array, nr_of_deposits=nr_of_deposits, nr_of_pixels=nr_of_pixels + output_arrays=dict_array, weights_df=weights_df ) typer.echo("Progress: 75%") @@ -3220,7 +3237,7 @@ def weights_of_evidence_calculate_responses_cli( typer.echo("Progress: 100%") typer.echo( - f"Responses calculations finished, writing output rasters to {output_probabilities}, \ + f"Posterior probability calculations finished, output rasters saved to {output_probabilities}, \ {output_probabilities_std} and {output_confidence_array}" ) @@ -3229,7 +3246,7 @@ def weights_of_evidence_calculate_responses_cli( def agterberg_cheng_CI_test_cli( input_posterior_probabilities: INPUT_FILE_OPTION, input_posterior_probabilities_std: INPUT_FILE_OPTION, - nr_of_deposits: Annotated[int, typer.Option()], + input_weights_table: INPUT_FILE_OPTION, ): """Perform the conditional independence test presented by Agterberg-Cheng (2002).""" from eis_toolkit.prediction.weights_of_evidence import agterberg_cheng_CI_test @@ -3244,12 +3261,14 @@ def agterberg_cheng_CI_test_cli( posterior_probabilities_std = src.read(1) posterior_probabilities_std = nodata_to_nan(posterior_probabilities_std, src.nodata) + weights_df = pd.read_csv(input_weights_table) + typer.echo("Progress: 25%") _, _, _, _, summary = agterberg_cheng_CI_test( posterior_probabilities=posterior_probabilities, posterior_probabilities_std=posterior_probabilities_std, - nr_of_deposits=nr_of_deposits, + weights_df=weights_df, ) typer.echo("Progress: 100%") diff --git a/eis_toolkit/prediction/weights_of_evidence.py b/eis_toolkit/prediction/weights_of_evidence.py index e9b84d00..5a21a395 100644 --- a/eis_toolkit/prediction/weights_of_evidence.py +++ b/eis_toolkit/prediction/weights_of_evidence.py @@ -6,9 +6,15 @@ import pandas as pd import rasterio from beartype import beartype -from beartype.typing import Dict, List, Literal, Optional, Sequence, Tuple - -from eis_toolkit.exceptions import ClassificationFailedException, InvalidColumnException, InvalidParameterValueException +from beartype.typing import Dict, List, Literal, Optional, Sequence, Tuple, Union + +from eis_toolkit.exceptions import ( + ClassificationFailedException, + InvalidColumnException, + InvalidParameterValueException, + NonMatchingRasterMetadataException, +) +from eis_toolkit.utilities.checks.raster import check_raster_grids from eis_toolkit.vector_processing.rasterize_vector import rasterize_vector from eis_toolkit.warnings import ClassificationFailedWarning, InvalidColumnWarning @@ -68,7 +74,7 @@ } -def _read_and_preprocess_evidence( +def _read_and_preprocess_raster_data( raster: rasterio.io.DatasetReader, nodata: Optional[Number] = None, band: int = 1 ) -> np.ndarray: """Read raster data and handle NoData values.""" @@ -244,6 +250,22 @@ def _generate_arrays_from_metrics( return array_dict +def _calculate_nr_of_deposit_pixels(array: np.ndarray, df: pd.DataFrame) -> Tuple[int, int]: + masked_array = array[~np.isnan(array)] + nr_of_pixels = int(np.size(masked_array)) + + pixels_column = df["Pixel count"] + + match = pixels_column == nr_of_pixels + if match.any(): + nr_of_deposits = df.loc[match, "Deposit count"].iloc[0] + else: + nr_of_pixels = df["Pixel count"].sum() + nr_of_deposits = df["Deposit count"].sum() + + return nr_of_deposits, nr_of_pixels + + @beartype def generalize_weights_cumulative( df: pd.DataFrame, @@ -349,7 +371,7 @@ def generalize_weights_cumulative( @beartype def weights_of_evidence_calculate_weights( evidential_raster: rasterio.io.DatasetReader, - deposits: gpd.GeoDataFrame, + deposits: Union[gpd.GeoDataFrame, rasterio.io.DatasetReader], raster_nodata: Optional[Number] = None, weights_type: Literal["unique", "categorical", "ascending", "descending"] = "unique", studentized_contrast_threshold: Number = 1, @@ -360,7 +382,7 @@ def weights_of_evidence_calculate_weights( Args: evidential_raster: The evidential raster. - deposits: Vector data representing the mineral deposits or occurences point data. + deposits: Vector or raster data representing the mineral deposits or occurences point data. raster_nodata: If nodata value of raster is wanted to specify manually. Optional parameter, defaults to None (nodata from raster metadata is used). weights_type: Accepted values are 'unique', 'categorical', 'ascending' and 'descending'. @@ -409,13 +431,22 @@ def weights_of_evidence_calculate_weights( metrics_to_arrays = arrays_to_generate.copy() # 1. Preprocess data - evidence_array = _read_and_preprocess_evidence(evidential_raster, raster_nodata) + evidence_array = _read_and_preprocess_raster_data(evidential_raster, raster_nodata) raster_meta = evidential_raster.meta + raster_profile = evidential_raster.profile - # Rasterize deposits - deposit_array = rasterize_vector( - geodataframe=deposits, raster_profile=raster_meta, default_value=1.0, fill_value=0.0 - ) + # Rasterize deposits if vector data + if isinstance(deposits, gpd.GeoDataFrame): + deposit_array = rasterize_vector( + geodataframe=deposits, raster_profile=raster_meta, default_value=1.0, fill_value=0.0 + ) + else: + deposit_profile = deposits.profile + + if check_raster_grids([raster_profile, deposit_profile], same_extent=True): + deposit_array = _read_and_preprocess_raster_data(deposits, raster_nodata) + else: + raise NonMatchingRasterMetadataException("Input rasters should have the same grid properties.") # Mask NaN out of the array nodata_mask = np.isnan(evidence_array) @@ -482,7 +513,7 @@ def weights_of_evidence_calculate_weights( @beartype def weights_of_evidence_calculate_responses( - output_arrays: Sequence[Dict[str, np.ndarray]], nr_of_deposits: int, nr_of_pixels: int + output_arrays: Sequence[Dict[str, np.ndarray]], weights_df: pd.DataFrame ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Calculate the posterior probabilities for the given generalized weight arrays. @@ -491,14 +522,17 @@ def weights_of_evidence_calculate_responses( For each dictionary, generalized weight and generalized standard deviation arrays are used and summed together pixel-wise to calculate the posterior probabilities. If generalized arrays are not found, the W+ and S_W+ arrays are used (so if outputs from unique weight calculations are used for this function). - nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations. - nr_of_pixels: Number of evidence pixels in the input data for weights of evidence calculations. + weights_df: Output dataframe of WofE calculate weights algorithm. Used for determining number of deposits and + number of pixels. Returns: Array of posterior probabilites. Array of standard deviations in the posterior probability calculations. Array of confidence of the prospectivity values obtained in the posterior probability array. """ + array = list(output_arrays[0].values())[0] + nr_of_deposits, nr_of_pixels = _calculate_nr_of_deposit_pixels(array, weights_df) + gen_weights_sum = sum( [ item[GENERALIZED_WEIGHT_PLUS_COLUMN] @@ -531,7 +565,7 @@ def weights_of_evidence_calculate_responses( @beartype def agterberg_cheng_CI_test( - posterior_probabilities: np.ndarray, posterior_probabilities_std: np.ndarray, nr_of_deposits: int + posterior_probabilities: np.ndarray, posterior_probabilities_std: np.ndarray, weights_df: pd.DataFrame ) -> Tuple[bool, bool, bool, float, str]: """Perform the conditional independence test presented by Agterberg-Cheng (2002). @@ -541,7 +575,8 @@ def agterberg_cheng_CI_test( Args: posterior_probabilities: Array of posterior probabilites. posterior_probabilities_std: Array of standard deviations in the posterior probability calculations. - nr_of_deposits: Number of deposit pixels in the input data for weights of evidence calculations. + weights_df: Output dataframe of WofE calculate weights algorithm. Used for determining number of deposits. + Returns: Whether the conditional hypothesis can be accepted for the evidence layers that the input posterior probabilities and standard deviations of posterior probabilities are calculated from. @@ -550,12 +585,8 @@ def agterberg_cheng_CI_test( Ratio T/n. Results > 1, may be because of lack of conditional independence of layers. T should not exceed n by more than 15% (Bonham-Carter 1994, p. 316). A summary of the the conditional independence calculations. - - Raises: - InvalidParameterValueException: Value of nr_of_deposits is not at least 1. """ - if nr_of_deposits < 1: - raise InvalidParameterValueException("Expected input deposits count to be at least 1.") + nr_of_deposits, _ = _calculate_nr_of_deposit_pixels(posterior_probabilities, weights_df) # One-tailed significance test according to Agterberg-Cheng (2002): # Conditional independence must satisfy: diff --git a/eis_toolkit/utilities/checks/raster.py b/eis_toolkit/utilities/checks/raster.py index a93a32fc..78b52a66 100644 --- a/eis_toolkit/utilities/checks/raster.py +++ b/eis_toolkit/utilities/checks/raster.py @@ -3,6 +3,7 @@ import rasterio.transform from beartype import beartype from beartype.typing import Iterable, Sequence, Union +from rasterio.crs import CRS from eis_toolkit.exceptions import InvalidParameterValueException @@ -37,21 +38,25 @@ def check_matching_crs(objects: Iterable) -> bool: Returns: True if everything matches, False if not. """ - epsg_list = [] + crs_list = [] for object in objects: if not isinstance(object, (rasterio.profiles.Profile, dict)): if not object.crs: return False epsg = object.crs.to_epsg() - epsg_list.append(epsg) + crs_list.append(epsg) else: if "crs" in object: - epsg_list.append(object["crs"]) + crs_object = object["crs"] + if type(crs_object) == CRS: + crs_list.append(crs_object.to_epsg()) + else: + crs_list.append(crs_object) else: return False - if len(set(epsg_list)) != 1: + if len(set(crs_list)) != 1: return False return True @@ -119,6 +124,7 @@ def check_raster_grids( Returns: True if gridding and optionally bounds matches, False if not. """ + if not check_matching_crs(raster_profiles): return False if not check_matching_pixel_alignment(raster_profiles):