diff --git a/examples/xarray_data_run.py b/examples/xarray_data_run.py new file mode 100644 index 0000000..c1361aa --- /dev/null +++ b/examples/xarray_data_run.py @@ -0,0 +1,124 @@ +#!/usr/bin/env -S uv run --script +# +# /// script +# requires-python = ">=3.12" +# dependencies = [ +# "omfiles[fsspec,grids,xarray]>=1.2.0", # x-release-please-version +# "matplotlib", +# ] +# /// + +"""Plot forecast time series for a 3x3 neighbourhood of grid cells.""" + +import datetime as dt + +import fsspec +import matplotlib.dates as mdates +import matplotlib.pyplot as plt +import numpy as np +import xarray as xr +from omfiles.grids import OmGrid + +MODEL_DOMAIN = "dwd_icon" +VARIABLE = "temperature_2m" +TARGET_LAT = 46.8 +TARGET_LON = 10.8 +GRID_RADIUS = 1 # 1 cell in each direction -> 3x3 neighbourhood +OUTPUT_PATH = "xarray_data_run_timeseries.png" + +# Example URI for a data_run file in the `data_run` S3 bucket. +# See data organization details: https://github.com/open-meteo/open-data?tab=readme-ov-file#data-organization +# Note: data_run is retained ~ 3 months. The script uses one file within this period. +run_time = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=2) +s3_uri = ( + f"s3://openmeteo/data_run/{MODEL_DOMAIN}/{run_time.year}/{run_time.month:02}/{run_time.day:02}/0000Z/{VARIABLE}.om" +) +print(f"Using OM file: {s3_uri}") + +# Use blockcache so repeated remote reads do not have to fetch the same bytes again. +backend = fsspec.open( + f"blockcache::{s3_uri}", + mode="rb", + s3={"anon": True, "default_block_size": 65536}, + blockcache={ + "cache_storage": "cache", + # Data run files do not change, so checking on every access is unnecessary. + "check_files": False, + }, +) + +ds = xr.open_dataset(backend, engine="om") # type: ignore +print(ds.data_vars) +print(ds.attrs) + +# time is a coordinate stored in the data_run files as unix timestamp +print(ds["time"]) + +# In data_run the variable is encoded in the file_name. +# The variable itself is not named. +variable_name = "" +data = ds[variable_name] + +if data.ndim != 3: + raise ValueError(f"Expected a 3D variable with shape (lat, lon, time), got {data.shape}") + +# Reconstruct the latitude/longitude grid from the projection metadata. +grid = OmGrid(ds.attrs["crs_wkt"], data.shape[:2]) +lon2d, lat2d = grid.get_meshgrid() + +# Find the nearest grid cell to the chosen target location and use the +# surrounding 3x3 neighbourhood for plotting. +distance2 = (lat2d - TARGET_LAT) ** 2 + (lon2d - TARGET_LON) ** 2 +center_y, center_x = np.unravel_index(np.argmin(distance2), distance2.shape) + +if ( + center_y - GRID_RADIUS < 0 + or center_y + GRID_RADIUS >= data.shape[0] + or center_x - GRID_RADIUS < 0 + or center_x + GRID_RADIUS >= data.shape[1] +): + raise ValueError("Target location is too close to the domain boundary for a 3x3 neighbourhood") + +# Forecast coordinates may be stored as Unix timestamps. Convert numeric values +# to UTC datetimes so matplotlib renders readable date labels. +forecast_dim = data.dims[2] +forecast_coord = data.coords[forecast_dim] + +raw_forecast_times = np.asarray(forecast_coord.values) +forecast_times = raw_forecast_times.astype("datetime64[s]") +x_label = "Forecast time (UTC)" + +fig, axes = plt.subplots(3, 3, figsize=(14, 10), sharex=True, sharey=True) +axes = np.asarray(axes) + +# Plot one time series for each cell in the 3x3 neighbourhood around the target point. +for row_offset, y_index in enumerate(range(center_y - GRID_RADIUS, center_y + GRID_RADIUS + 1)): + for col_offset, x_index in enumerate(range(center_x - GRID_RADIUS, center_x + GRID_RADIUS + 1)): + ax = axes[row_offset, col_offset] + series = data.isel({data.dims[0]: y_index, data.dims[1]: x_index}).values + cell_lat = lat2d[y_index, x_index] + cell_lon = lon2d[y_index, x_index] + + ax.plot(forecast_times, series, linewidth=1.8) + ax.grid(True, alpha=0.3) + ax.set_title(f"lat={cell_lat:.3f}, lon={cell_lon:.3f}", fontsize=10) + + if np.issubdtype(np.asarray(forecast_times).dtype, np.datetime64): + ax.xaxis.set_major_locator(mdates.AutoDateLocator(maxticks=8)) + ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d\n%H:%M")) + + if row_offset == 2: + ax.set_xlabel(x_label) + if col_offset == 0: + ax.set_ylabel(variable_name) + +fig.suptitle( + f"{MODEL_DOMAIN} {variable_name} time series for a 3x3 grid neighbourhood\n" + f"centered near lat={TARGET_LAT}, lon={TARGET_LON}", + fontsize=13, + fontweight="bold", +) +fig.autofmt_xdate() +plt.tight_layout() +plt.savefig(OUTPUT_PATH, dpi=300, bbox_inches="tight") +print(f"Saved plot to {OUTPUT_PATH}") diff --git a/examples/spatial_xarray.py b/examples/xarray_spatial.py similarity index 100% rename from examples/spatial_xarray.py rename to examples/xarray_spatial.py diff --git a/release-please-config.json b/release-please-config.json index 10d6f66..97066a1 100644 --- a/release-please-config.json +++ b/release-please-config.json @@ -13,6 +13,7 @@ "examples/regrid_ecmwf_ifs_hres_gaussian_O1280_to_0.1_degree.py", "examples/regrid_subset_of_projected_domain.py", "examples/select_by_coordinates.py", - "examples/spatial_xarray.py" + "examples/xarray_data_run.py", + "examples/xarray_spatial.py" ] }