diff --git a/README.md b/README.md index e0ac1ec..6c3c87c 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,9 @@ Aggregate global powerplant capacities into any resolution. +

+ +

## About @@ -18,8 +21,63 @@ and the `snakemake` [documentation](https://snakemake.readthedocs.io/en/stable/s Data processing steps: -1. -2. + +

+ +

+ +1. Stable version-controlled global datasets are downloaded, including: + - Disaggregated powerplant statistics from [GEM](https://globalenergymonitor.org/), [Transition-Zero](https://www.transitionzero.org/products/solar-asset-mapper), and [GloHydroRES](https://zenodo.org/records/14526360). + - National-level statistics from the [EIA](https://www.eia.gov/). +2. Individual powerplants are prepared into seven different categories (bioenergy, fossil, geothermal, hydropower, nuclear, solar, wind). + - Fuel-burning powerplants (fossil, bioenergy) are assigned unique fuel-classes depending on the combination of fuels they utilise. + - For utility-scale solar projects, satellite detected [TZ-Solar Asset Mapper](https://www.transitionzero.org/products/solar-asset-mapper) facilities are matched to [GEM-Global Solar Power Tracker](https://globalenergymonitor.org/) data to obtain a highly complete dataset of large-scale solar facilities. +3. Powerplants are selected according to the shapes file provided by the user. Depending on the configuration, their placement may be adjusted per technology and country. + +

+ +

+ +4. Powerplant start and end dates are imputed per category/technology using the configuration. + - `lifetime_years` determines overall technology lifetime. + - `retirement_delay_years` determines the remaining years of powerplants currently operating beyond their expected lifetime. + +

+ +

+ +> [!NOTE] +> Powerplant start/end dates are only imputed if they are not provided in the original dataset. + +5. Optionally, powerplant capacities are adjusted evenly per category and country to match EIA statistics. + + +

+ +

+ +> [!IMPORTANT] +> This stage may significantly inflate/deflate individual powerplants. +> We encourage users to carefully assess if this adjustment is merited by their use-case. + +6. Powerplant capacity is aggregated to the provided shapes, for either adjusted or unadjusted powerplants. + +

+ +

+ +7. Solar is processed as a special case because rooftop PV panels are not covered in GEM or Transition-Zero data. + 1. Per country: $solar_{rooftop\_PV} = solar_{national\_statistics} - solar_{large\_scale}$. + 2. A user-provided proxy raster is used to determine how to disaggregate $solar_{rooftop\_PV}$. + 3. This proxy is used to determine the aggregated rooftop PV capacity per-shape. + +

+ +

+ +> [!NOTE] +> Due to this assumption, the lifetime of rooftop PV capacity is left undetermined. + ## Configuration @@ -60,7 +118,21 @@ snakemake --use-conda --cores 2 # run the workflow! ## References -This module is based on the following research and datasets: - -* -* +This module is based on the following research and datasets. +For specific versions please consult our [stable dataset repository](https://doi.org/10.5281/zenodo.16779120). + +* **Global Energy Monitor datasets.** . License: CC BY 4.0. + - Global Bioenergy Power Tracker + - Global Coal Plant Tracker + - Global Geothermal Power Tracker + - Global Nuclear Power Tracker + - Global Oil and Gas Plant Tracker + - Global Solar Power Tracker + - Global Wind Power Tracker +* **Global Hydropower powerplants.** +Shah, J., Hu, J., Edelenbosch, O., & van Vliet, M. T. H. (2024). GloHydroRes - a global dataset combining open-source hydropower plant and reservoir data [Data set]. Zenodo. . License: CC BY 4.0. +* **National capacity dataset.** +U.S. Energy Information Administration (Oct 2008). . License: Public domain. +* **Satellite Utility-scale PV dataset.** +TransitionZero Solar Asset Mapper, TransitionZero. . +License: CC BY-NC 4.0. diff --git a/config/config.yaml b/config/config.yaml index 0edf014..8bbfe30 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -95,8 +95,8 @@ imputation: remove_shape_overlaps: false inner_distance: 100 # matches projected CRS unit. on_overlap: "raise" # either 'split_capacity', or 'raise' - on_forced_class_error: "drop" # either 'drop', 'ignore', or 'raise' - forced_class: + on_shape_class_error: "drop" # either 'drop', 'ignore', or 'raise' + shape_class: bioenergy turbine: "land" ccgt: "land" coal turbine: "land" diff --git a/docs/europe_example.png b/docs/europe_example.png deleted file mode 100644 index c63e9a3..0000000 Binary files a/docs/europe_example.png and /dev/null differ diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 972d37a..0000000 --- a/docs/index.md +++ /dev/null @@ -1,4 +0,0 @@ -# Home - -Welcome to the documentation of the `module_powerplants` data module! -Please consult the [specification guidelines](./specification.md) and the [`clio` documentation](https://clio.readthedocs.io/) for more information. diff --git a/figures/.gitkeep b/figures/.gitkeep deleted file mode 100644 index dcafcbe..0000000 --- a/figures/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -# For module documentation figures diff --git a/figures/fossil_adjustment_MEX.png b/figures/fossil_adjustment_MEX.png new file mode 100644 index 0000000..a780030 Binary files /dev/null and b/figures/fossil_adjustment_MEX.png differ diff --git a/figures/fossil_aggregation_MEX.png b/figures/fossil_aggregation_MEX.png new file mode 100644 index 0000000..66c4bf4 Binary files /dev/null and b/figures/fossil_aggregation_MEX.png differ diff --git a/figures/fossil_histogram_MEX.png b/figures/fossil_histogram_MEX.png new file mode 100644 index 0000000..739bdb2 Binary files /dev/null and b/figures/fossil_histogram_MEX.png differ diff --git a/figures/powerplant_location_algorithm.svg b/figures/powerplant_location_algorithm.svg new file mode 100644 index 0000000..b5d622e --- /dev/null +++ b/figures/powerplant_location_algorithm.svg @@ -0,0 +1,593 @@ + + + + Powerplant placement algorithm + Vertical summary showing how powerplants are removed when outside all shapes, kept when correctly placed, split across overlapping country shapes, and moved to the nearest forced-class shape in the same country. + + + + + + + + + + + + + + + + + + + Powerplant placement + + + 1. Powerplants outside are removed + Any powerplant outside the provided shapes is considered out of scope. + + + Shape A + + + + + + 2. Powerplants inside are preserved + Powerplants inside the shapes are kept if:- They intersect with exactly one shape.- Their technology matches the configured shape_class. + + + + Shape A + + Shape B + + + + + 3. Powerplants in overlaps are split + If a powerplant intersects multiple shapes, it will be handled using theon_overlap configuration option.- "split_capacity": powerplants will be split evenly outside overlaps.- "raise": stop with an error message. + + + + + Shape A + + Shape B + + Shape C + + + + + + + + + + 4. Powerplants tied to a shape class are moved + Technologies can be assigned a shape_class: "land" or "maritime".Mismatches are moved to the closest matching shape in the same country.If no valid adjustment is possible, on_shape_class_error sets the behavior.- "drop": remove them.- "ignore": leave unchanged.- "raise": stop with an error. + + + + + Country A: maritime + + Country A: land + + Country B: land + + + + + + + + + Powerplant placement algorithm + + + + diff --git a/figures/rooftop_pv_proxy_MEX.png b/figures/rooftop_pv_proxy_MEX.png new file mode 100644 index 0000000..be908c1 Binary files /dev/null and b/figures/rooftop_pv_proxy_MEX.png differ diff --git a/figures/rulegraph.png b/figures/rulegraph.png new file mode 100644 index 0000000..82467d3 Binary files /dev/null and b/figures/rulegraph.png differ diff --git a/figures/solar_aggregation_europe.png b/figures/solar_aggregation_europe.png new file mode 100644 index 0000000..34f7708 Binary files /dev/null and b/figures/solar_aggregation_europe.png differ diff --git a/workflow/internal/config.schema.yaml b/workflow/internal/config.schema.yaml index 2fce492..4a89192 100644 --- a/workflow/internal/config.schema.yaml +++ b/workflow/internal/config.schema.yaml @@ -364,7 +364,7 @@ properties: - remove_shape_overlaps - inner_distance - on_overlap - - on_forced_class_error + - on_shape_class_error properties: remove_shape_overlaps: title: Remove shape overlaps @@ -390,7 +390,7 @@ properties: enum: - raise - split_capacity - on_forced_class_error: + on_shape_class_error: title: Forced shape class adjustment error handling description: | Controls how to handle powerplants whose technology is configured for a forced `shape_class`, but no matching shape exists in that plant's country. @@ -402,7 +402,7 @@ properties: - raise - ignore - drop - forced_class: + shape_class: title: Forced `shape_class` adjustment. description: | Optional. Maps powerplant technologies to a `shape_class`. diff --git a/workflow/scripts/_plots.py b/workflow/scripts/_plots.py index c917883..a95adbf 100644 --- a/workflow/scripts/_plots.py +++ b/workflow/scripts/_plots.py @@ -1,13 +1,10 @@ """Plot functions used in one or more rules.""" -import math - import _schemas import _utils import geopandas as gpd import numpy as np import pandas as pd -from cmap import Colormap from matplotlib import pyplot as plt from matplotlib import ticker as mticker from matplotlib.axes import Axes @@ -29,87 +26,6 @@ def plot_empty(title: str, output_path: str) -> None: fig.savefig(output_path) -def plot_powerplant_capacity_buildup(df: pd.DataFrame, output_path: str, colormap: str): - """Plot stacked bar charts of active powerplant capacity over time per country. - - Input should be a powerplant capacity file of a single category. - """ - suptitle = "Active powerplant capacity by technology per country" - - if df.empty: - plot_empty(suptitle, output_path) - return - - # Year range (x-axis) - start_year = df["start_year"].astype(int).min() - end_year = df["end_year"].astype(int).max() - years = list(range(start_year, end_year + 1)) - - # Layout (per country in alphabetical order) - countries = sorted(df["country_id"].unique()) - n_countries = len(countries) - cols = 2 - rows = math.ceil(n_countries / cols) - - # Tech type color range - tech_types = sorted(df["technology"].dropna().unique()) - cmap = Colormap(colormap).to_mpl() - colors = [cmap(i) for i in np.linspace(0, 1, len(tech_types))] - - # Figure (always 2 columns, flexible rows) - fig, axes = plt.subplots( - rows, - cols, - figsize=(cols * 5, rows * 4), - sharex=False, - sharey=False, - constrained_layout=True, - ) - axes_flat = np.array(axes).ravel() - - # Plot per country - for ax, country in zip(axes_flat, countries): - country_df = df[df["country_id"] == country] - if country_df.empty: - draw_empty(ax, country, f"No data for {country}") - continue - - cap_mw = pd.DataFrame(0.0, index=years, columns=tech_types) - for year in years: - active = country_df[ - (country_df["start_year"] <= year) & (year < country_df["end_year"]) - ] - cap_mw.loc[year] = ( - active.groupby("technology")["output_capacity_mw"] - .sum() - .reindex(tech_types, fill_value=0) - ) - - cap_mw.plot(kind="bar", stacked=True, ax=ax, color=colors, legend=False, rot=45) - ax.set_title(country) - ax.set_ylabel("Capacity (MW)") - ax.locator_params(axis="x", nbins=10) - ax.minorticks_off() - - # Hide extra axes - for ax in axes_flat[n_countries:]: - ax.set_visible(False) - - # Add details - handles, labels = axes_flat[0].get_legend_handles_labels() - fig.legend( - handles[::-1], - labels[::-1], - loc="center left", - bbox_to_anchor=(1.0, 0.5), - title="Technology", - frameon=False, - ) - fig.suptitle(suptitle, fontsize=14) - - fig.savefig(output_path, bbox_inches="tight") - - def plot_capacity_adjustment( stats_file: str, unadjusted_file: str, @@ -319,5 +235,6 @@ def plot_capacity_aggregation( legend_kwds={"label": "Capacity ($MW$)"}, missing_kwds={"color": "lightgrey", "alpha": 0.2}, ) + ax.set_axis_off() ax.set_title(title + f" in year {agg.attrs['year']}") fig.savefig(output_file, bbox_inches="tight") diff --git a/workflow/scripts/impute_ages.py b/workflow/scripts/impute_ages.py index cd85627..cda8805 100644 --- a/workflow/scripts/impute_ages.py +++ b/workflow/scripts/impute_ages.py @@ -1,5 +1,6 @@ """Imputation of missing values.""" +import math import sys from typing import TYPE_CHECKING, Any @@ -7,7 +8,10 @@ import _schemas import _utils import geopandas as gpd +import numpy as np import pandas as pd +from cmap import Colormap +from matplotlib import pyplot as plt if TYPE_CHECKING: snakemake: Any @@ -140,6 +144,89 @@ def explore(imputed: gpd.GeoDataFrame, output_path: str, colormap="tab20"): explorer.save(output_path) +def plot_powerplant_capacity_buildup( + df: pd.DataFrame, output_path: str, colormap: str, cat: str = "powerplant" +): + """Plot stacked bar charts of active powerplant capacity over time per country. + + Input should be a powerplant capacity file of a single category. + """ + suptitle = f"Active {cat} capacity by technology per country" + + if df.empty: + _plots.plot_empty(suptitle, output_path) + return + + # Year range (x-axis) + start_year = df["start_year"].astype(int).min() + end_year = df["end_year"].astype(int).max() + years = list(range(start_year, end_year + 1)) + + # Layout (per country in alphabetical order) + countries = sorted(df["country_id"].unique()) + n_countries = len(countries) + cols = 2 if n_countries > 1 else 1 + rows = math.ceil(n_countries / cols) + + # Tech type color range + tech_types = sorted(df["technology"].dropna().unique()) + cmap = Colormap(colormap).to_mpl() + colors = [cmap(i) for i in np.linspace(0, 1, len(tech_types))] + + # Figure (always 2 columns, flexible rows) + fig, axes = plt.subplots( + rows, + cols, + figsize=(cols * 5, rows * 4), + sharex=False, + sharey=False, + constrained_layout=True, + ) + axes_flat = np.array(axes).ravel() + + # Plot per country + for ax, country in zip(axes_flat, countries): + country_df = df[df["country_id"] == country] + if country_df.empty: + _plots.draw_empty(ax, country, f"No data for {country}") + continue + + cap_mw = pd.DataFrame(0.0, index=years, columns=tech_types) + for year in years: + active = country_df[ + (country_df["start_year"] <= year) & (year < country_df["end_year"]) + ] + cap_mw.loc[year] = ( + active.groupby("technology")["output_capacity_mw"] + .sum() + .reindex(tech_types, fill_value=0) + ) + + cap_mw.plot(kind="bar", stacked=True, ax=ax, color=colors, legend=False, rot=45) + ax.set_title(country) + ax.set_ylabel("Capacity (MW)") + ax.locator_params(axis="x", nbins=10) + ax.minorticks_off() + + # Hide extra axes + for ax in axes_flat[n_countries:]: + ax.set_visible(False) + + # Add details + handles, labels = axes_flat[0].get_legend_handles_labels() + fig.legend( + handles[::-1], + labels[::-1], + loc="center left", + bbox_to_anchor=(1.0, 0.5), + title="Technology", + frameon=False, + ) + fig.suptitle(suptitle, fontsize=14) + + fig.savefig(output_path, bbox_inches="tight") + + def main() -> None: """Main snakemake process.""" imputed_gdf = impute( @@ -149,8 +236,11 @@ def main() -> None: ) imputed_gdf.to_parquet(snakemake.output.aged) - _plots.plot_powerplant_capacity_buildup( - imputed_gdf, snakemake.output.histogram, "seaborn:tab20" + plot_powerplant_capacity_buildup( + imputed_gdf, + snakemake.output.histogram, + "seaborn:tab20", + snakemake.wildcards.category, ) explore(imputed_gdf, snakemake.output.explorer) diff --git a/workflow/scripts/impute_location.py b/workflow/scripts/impute_location.py index 02c5508..057d5a4 100644 --- a/workflow/scripts/impute_location.py +++ b/workflow/scripts/impute_location.py @@ -483,10 +483,10 @@ def main() -> None: adjustment = adjust_powerplant_location( powerplants=assigned, exclusive_shapes=exclusive, - forced_shape_class=location_cnf.get("forced_class", {}), + forced_shape_class=location_cnf.get("shape_class", {}), projected_crs=projected_crs, inner_distance=inner_distance, - on_error=location_cnf["on_forced_class_error"], + on_error=location_cnf["on_shape_class_error"], ) adjusted = schema.validate(adjustment.powerplants) adjusted.to_parquet(snakemake.output.relocated) diff --git a/workflow/scripts/proxy.py b/workflow/scripts/proxy.py index 41b62b5..35e7c69 100644 --- a/workflow/scripts/proxy.py +++ b/workflow/scripts/proxy.py @@ -108,19 +108,20 @@ def plot(proxy_file: str, shapes_file: str, output_file: str, pixels: int = 500_ # Coarsen the proxy data coarse = area_potential_da.coarsen(x=factor, y=factor, boundary="trim").mean() - fig, ax = plt.subplots(figsize=(6, 6), dpi=300) - coarse.plot.imshow( + fig, ax = plt.subplots(dpi=300) + im = coarse.plot.imshow( ax=ax, cmap=Colormap("seaborn:rocket").to_matplotlib(), - add_colorbar=True, - cbar_kwargs={"location": "right", "label": "Proxied potential"}, + add_colorbar=False, alpha=1, ) - # project to the raster's CRS for speed shapes_gdf.to_crs(area_potential_da.rio.crs).geometry.boundary.plot( - ax=ax, color="lightgrey", linewidth=0.3, alpha=0.5 + ax=ax, color="lightgrey", linewidth=0.5, alpha=0.7 ) + ax.set_axis_off() ax.set_title(f"Aggregation proxy (coarsened ~{pixel_count:.1e} pixels)") + fig.colorbar(im, ax=ax, location="right", label="Proxied capacity") + fig.savefig(output_file, bbox_inches="tight")