Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions examples/xarray_data_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env -S uv run --script
#
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "omfiles[fsspec,grids,xarray]>=1.2.0", # x-release-please-version
# "matplotlib",
# ]
# ///

"""Plot forecast time series for a 3x3 neighbourhood of grid cells."""

import datetime as dt

import fsspec
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
from omfiles.grids import OmGrid

MODEL_DOMAIN = "dwd_icon"
VARIABLE = "temperature_2m"
TARGET_LAT = 46.8
TARGET_LON = 10.8
GRID_RADIUS = 1 # 1 cell in each direction -> 3x3 neighbourhood
OUTPUT_PATH = "xarray_data_run_timeseries.png"

# Example URI for a data_run file in the `data_run` S3 bucket.
# See data organization details: https://github.com/open-meteo/open-data?tab=readme-ov-file#data-organization
# Note: data_run is retained ~ 3 months. The script uses one file within this period.
run_time = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=2)
s3_uri = (
f"s3://openmeteo/data_run/{MODEL_DOMAIN}/{run_time.year}/{run_time.month:02}/{run_time.day:02}/0000Z/{VARIABLE}.om"
)
print(f"Using OM file: {s3_uri}")

# Use blockcache so repeated remote reads do not have to fetch the same bytes again.
backend = fsspec.open(
f"blockcache::{s3_uri}",
mode="rb",
s3={"anon": True, "default_block_size": 65536},
blockcache={
"cache_storage": "cache",
# Data run files do not change, so checking on every access is unnecessary.
"check_files": False,
},
)

ds = xr.open_dataset(backend, engine="om") # type: ignore
print(ds.data_vars)
print(ds.attrs)

# time is a coordinate stored in the data_run files as unix timestamp
print(ds["time"])

# In data_run the variable is encoded in the file_name.
# The variable itself is not named.
variable_name = ""
data = ds[variable_name]

if data.ndim != 3:
raise ValueError(f"Expected a 3D variable with shape (lat, lon, time), got {data.shape}")

# Reconstruct the latitude/longitude grid from the projection metadata.
grid = OmGrid(ds.attrs["crs_wkt"], data.shape[:2])
lon2d, lat2d = grid.get_meshgrid()

# Find the nearest grid cell to the chosen target location and use the
# surrounding 3x3 neighbourhood for plotting.
distance2 = (lat2d - TARGET_LAT) ** 2 + (lon2d - TARGET_LON) ** 2
center_y, center_x = np.unravel_index(np.argmin(distance2), distance2.shape)

if (
center_y - GRID_RADIUS < 0
or center_y + GRID_RADIUS >= data.shape[0]
or center_x - GRID_RADIUS < 0
or center_x + GRID_RADIUS >= data.shape[1]
):
raise ValueError("Target location is too close to the domain boundary for a 3x3 neighbourhood")

# Forecast coordinates may be stored as Unix timestamps. Convert numeric values
# to UTC datetimes so matplotlib renders readable date labels.
forecast_dim = data.dims[2]
forecast_coord = data.coords[forecast_dim]

raw_forecast_times = np.asarray(forecast_coord.values)
forecast_times = raw_forecast_times.astype("datetime64[s]")
x_label = "Forecast time (UTC)"

fig, axes = plt.subplots(3, 3, figsize=(14, 10), sharex=True, sharey=True)
axes = np.asarray(axes)

# Plot one time series for each cell in the 3x3 neighbourhood around the target point.
for row_offset, y_index in enumerate(range(center_y - GRID_RADIUS, center_y + GRID_RADIUS + 1)):
for col_offset, x_index in enumerate(range(center_x - GRID_RADIUS, center_x + GRID_RADIUS + 1)):
ax = axes[row_offset, col_offset]
series = data.isel({data.dims[0]: y_index, data.dims[1]: x_index}).values
cell_lat = lat2d[y_index, x_index]
cell_lon = lon2d[y_index, x_index]

ax.plot(forecast_times, series, linewidth=1.8)
ax.grid(True, alpha=0.3)
ax.set_title(f"lat={cell_lat:.3f}, lon={cell_lon:.3f}", fontsize=10)

if np.issubdtype(np.asarray(forecast_times).dtype, np.datetime64):
ax.xaxis.set_major_locator(mdates.AutoDateLocator(maxticks=8))
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d\n%H:%M"))

if row_offset == 2:
ax.set_xlabel(x_label)
if col_offset == 0:
ax.set_ylabel(variable_name)

fig.suptitle(
f"{MODEL_DOMAIN} {variable_name} time series for a 3x3 grid neighbourhood\n"
f"centered near lat={TARGET_LAT}, lon={TARGET_LON}",
fontsize=13,
fontweight="bold",
)
fig.autofmt_xdate()
plt.tight_layout()
plt.savefig(OUTPUT_PATH, dpi=300, bbox_inches="tight")
print(f"Saved plot to {OUTPUT_PATH}")
File renamed without changes.
3 changes: 2 additions & 1 deletion release-please-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"examples/regrid_ecmwf_ifs_hres_gaussian_O1280_to_0.1_degree.py",
"examples/regrid_subset_of_projected_domain.py",
"examples/select_by_coordinates.py",
"examples/spatial_xarray.py"
"examples/xarray_data_run.py",
"examples/xarray_spatial.py"
]
}
Loading