From d6a427c81861291b83cb2c4633f36bbece38cbca Mon Sep 17 00:00:00 2001 From: anshul23102 Date: Mon, 23 Mar 2026 22:57:38 +0530 Subject: [PATCH 1/9] feat(benchmark): add run_benchmark MVP with alignment, metrics, and test data --- modules/benchmark/R/run_benchmark.R | 72 +++++++++++++++++++ .../benchmark/inst/testdata/sample_model.csv | 5 ++ .../benchmark/inst/testdata/sample_obs.csv | 5 ++ 3 files changed, 82 insertions(+) create mode 100644 modules/benchmark/R/run_benchmark.R create mode 100644 modules/benchmark/inst/testdata/sample_model.csv create mode 100644 modules/benchmark/inst/testdata/sample_obs.csv diff --git a/modules/benchmark/R/run_benchmark.R b/modules/benchmark/R/run_benchmark.R new file mode 100644 index 00000000000..8cc99bf6765 --- /dev/null +++ b/modules/benchmark/R/run_benchmark.R @@ -0,0 +1,72 @@ +##' Run a simple benchmark pipeline +##' +##' Loads model output and observations, aligns by time, +##' computes RMSE and MAE, and returns a results table with a plot. +##' +##' @param model_path path to model output CSV file (must have 'time' and 'value' columns) +##' @param obs_path path to observations CSV file (must have 'time' and 'value' columns) +##' @param metrics character vector of metrics to compute. Options: "RMSE", "MAE" +##' @param tolerance_secs nearest-neighbor time tolerance in seconds (default 1 hour) +##' +##' @return list with: metrics (data.frame), aligned (data.frame), plot (ggplot) +##' @export +##' +##' @author Your Name +run_benchmark <- function(model_path, obs_path, + metrics = c("RMSE", "MAE"), + tolerance_secs = 3600) { + + # --- Load data --- + model_df <- read.csv(model_path, stringsAsFactors = FALSE) + obs_df <- read.csv(obs_path, stringsAsFactors = FALSE) + + # --- Ensure time column is POSIXct --- + model_df$time <- as.POSIXct(model_df$time, tz = "UTC") + obs_df$time <- as.POSIXct(obs_df$time, tz = "UTC") + + # --- Align by nearest time --- + aligned <- align_by_time(model_df, obs_df, tolerance_secs = tolerance_secs) + + # --- Compute metrics --- + results <- list() + for (m in toupper(metrics)) { + results[[m]] <- switch(m, + "RMSE" = sqrt(mean((aligned$model - aligned$obs)^2, na.rm = TRUE)), + "MAE" = mean(abs(aligned$model - aligned$obs), na.rm = TRUE), + stop("Unknown metric: ", m) + ) + } + metrics_df <- data.frame(metric = names(results), + value = unlist(results, use.names = FALSE)) + + # --- Plot --- + plot <- ggplot2::ggplot(aligned, ggplot2::aes(x = time)) + + ggplot2::geom_line(ggplot2::aes(y = model, color = "model")) + + ggplot2::geom_line(ggplot2::aes(y = obs, color = "obs")) + + ggplot2::labs(color = "", y = "value", title = "Model vs Observations") + + list(metrics = metrics_df, aligned = aligned, plot = plot) +} + + +##' Align model and observation data frames by nearest time +##' +##' @param model_df data.frame with columns: time (POSIXct), value +##' @param obs_df data.frame with columns: time (POSIXct), value +##' @param tolerance_secs max allowed time difference in seconds +##' +##' @return data.frame with columns: time, model, obs +align_by_time <- function(model_df, obs_df, tolerance_secs = 3600) { + aligned <- do.call(rbind, lapply(seq_len(nrow(model_df)), function(i) { + diffs <- abs(as.numeric(difftime(obs_df$time, model_df$time[i], units = "secs"))) + nearest <- which.min(diffs) + if (diffs[nearest] <= tolerance_secs) { + data.frame(time = model_df$time[i], + model = model_df$value[i], + obs = obs_df$value[nearest]) + } else { + NULL + } + })) + aligned +} diff --git a/modules/benchmark/inst/testdata/sample_model.csv b/modules/benchmark/inst/testdata/sample_model.csv new file mode 100644 index 00000000000..cf5df29d50d --- /dev/null +++ b/modules/benchmark/inst/testdata/sample_model.csv @@ -0,0 +1,5 @@ +time,value +2020-01-01 00:00:00,1.0 +2020-01-01 01:00:00,2.0 +2020-01-01 02:00:00,3.0 +2020-01-01 03:00:00,4.0 diff --git a/modules/benchmark/inst/testdata/sample_obs.csv b/modules/benchmark/inst/testdata/sample_obs.csv new file mode 100644 index 00000000000..f24d28f7c26 --- /dev/null +++ b/modules/benchmark/inst/testdata/sample_obs.csv @@ -0,0 +1,5 @@ +time,value +2020-01-01 00:00:00,1.1 +2020-01-01 01:00:00,1.9 +2020-01-01 02:00:00,3.2 +2020-01-01 03:00:00,3.9 From f8203ff28728788a2f704ba6417df5cfcc38ce33 Mon Sep 17 00:00:00 2001 From: anshul23102 Date: Mon, 23 Mar 2026 23:16:03 +0530 Subject: [PATCH 2/9] feat(benchmark): add tests and update README with quickstart --- modules/benchmark/README.md | 35 +++++++++++++++++++ .../tests/testthat/test-run_benchmark.R | 21 +++++++++++ 2 files changed, 56 insertions(+) create mode 100644 modules/benchmark/tests/testthat/test-run_benchmark.R diff --git a/modules/benchmark/README.md b/modules/benchmark/README.md index a8fd53648d8..69bdb5e2f3d 100644 --- a/modules/benchmark/README.md +++ b/modules/benchmark/README.md @@ -1,4 +1,39 @@ +## Quickstart: run_benchmark() +`run_benchmark()` is a simple entry point that loads model output and +observations, aligns them by time, computes metrics, and returns a plot. + +### Input format + +Both input files must be CSV with two columns: +- `time` — timestamp (e.g. `2020-01-01 00:00:00`) +- `value` — numeric variable value + +### Usage +```r +library(PEcAn.benchmark) + +res <- run_benchmark( + model_path = "inst/testdata/sample_model.csv", + obs_path = "inst/testdata/sample_obs.csv" +) + +# View metrics +print(res$metrics) +# metric value +# 1 RMSE 0.1322876 +# 2 MAE 0.1250000 + +# View plot +res$plot +``` + +### Parameters + +- `model_path` — path to model output CSV +- `obs_path` — path to observations CSV +- `metrics` — vector of metrics to compute: `"RMSE"`, `"MAE"` (default: both) +- `tolerance_secs` — max time difference for matching (default: 3600 seconds) # PEcAn.benchmark diff --git a/modules/benchmark/tests/testthat/test-run_benchmark.R b/modules/benchmark/tests/testthat/test-run_benchmark.R new file mode 100644 index 00000000000..746e6f370d7 --- /dev/null +++ b/modules/benchmark/tests/testthat/test-run_benchmark.R @@ -0,0 +1,21 @@ +library(testthat) + +test_that("run_benchmark basic works", { + model <- data.frame( + time = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"), + value = c(1, 2, 3, 4) + ) + obs <- data.frame( + time = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"), + value = c(1.1, 1.9, 3.2, 3.9) + ) + tmp1 <- tempfile(fileext = ".csv") + tmp2 <- tempfile(fileext = ".csv") + write.csv(model, tmp1, row.names = FALSE) + write.csv(obs, tmp2, row.names = FALSE) + + res <- run_benchmark(tmp1, tmp2, metrics = c("RMSE", "MAE")) + expect_true("metrics" %in% names(res)) + expect_true("aligned" %in% names(res)) + expect_true(nrow(res$metrics) == 2) +}) From acc2d0d84886b97b23f548eca9eb04a93ccc266e Mon Sep 17 00:00:00 2001 From: anshul23102 Date: Wed, 25 Mar 2026 12:07:39 +0530 Subject: [PATCH 3/9] docs(benchmark): add roxygen man page for run_benchmark --- modules/benchmark/man/run_benchmark.Rd | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 modules/benchmark/man/run_benchmark.Rd diff --git a/modules/benchmark/man/run_benchmark.Rd b/modules/benchmark/man/run_benchmark.Rd new file mode 100644 index 00000000000..cb742bc35e6 --- /dev/null +++ b/modules/benchmark/man/run_benchmark.Rd @@ -0,0 +1,19 @@ +\name{run_benchmark} +\alias{run_benchmark} +\title{Run a simple benchmark pipeline} +\usage{ +run_benchmark(model_path, obs_path, metrics = c("RMSE", "MAE"), tolerance_secs = 3600) +} +\arguments{ +\item{model_path}{path to model output CSV file} +\item{obs_path}{path to observations CSV file} +\item{metrics}{character vector of metrics to compute} +\item{tolerance_secs}{nearest-neighbor time tolerance in seconds} +} +\value{ +list with: metrics (data.frame), aligned (data.frame), plot (ggplot) +} +\description{ +Loads model output and observations, aligns by time, +computes RMSE and MAE, and returns a results table with a plot. +} From 0d272fb09a361b4e8faef6146236f891931fe21e Mon Sep 17 00:00:00 2001 From: anshul23102 Date: Wed, 1 Apr 2026 12:11:23 +0530 Subject: [PATCH 4/9] docs(benchmark): add man pages and update NAMESPACE --- modules/benchmark/NAMESPACE | 1 + modules/benchmark/man/align_by_time.Rd | 21 +++++++++++++++++++++ modules/benchmark/man/run_benchmark.Rd | 16 ++++++++++++---- 3 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 modules/benchmark/man/align_by_time.Rd diff --git a/modules/benchmark/NAMESPACE b/modules/benchmark/NAMESPACE index e89a0005f74..ea1785a2437 100644 --- a/modules/benchmark/NAMESPACE +++ b/modules/benchmark/NAMESPACE @@ -46,3 +46,4 @@ importFrom(ggplot2,ggplot) importFrom(ggplot2,labs) importFrom(magrittr,"%>%") importFrom(rlang,.data) +export(run_benchmark) diff --git a/modules/benchmark/man/align_by_time.Rd b/modules/benchmark/man/align_by_time.Rd new file mode 100644 index 00000000000..33e313bb5b5 --- /dev/null +++ b/modules/benchmark/man/align_by_time.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_benchmark.R +\name{align_by_time} +\alias{align_by_time} +\title{Align model and observation data frames by nearest time} +\usage{ +align_by_time(model_df, obs_df, tolerance_secs = 3600) +} +\arguments{ +\item{model_df}{data.frame with columns: time (POSIXct), value} + +\item{obs_df}{data.frame with columns: time (POSIXct), value} + +\item{tolerance_secs}{max allowed time difference in seconds} +} +\value{ +data.frame with columns: time, model, obs +} +\description{ +Align model and observation data frames by nearest time +} diff --git a/modules/benchmark/man/run_benchmark.Rd b/modules/benchmark/man/run_benchmark.Rd index cb742bc35e6..d7d7470694d 100644 --- a/modules/benchmark/man/run_benchmark.Rd +++ b/modules/benchmark/man/run_benchmark.Rd @@ -1,3 +1,5 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_benchmark.R \name{run_benchmark} \alias{run_benchmark} \title{Run a simple benchmark pipeline} @@ -5,10 +7,13 @@ run_benchmark(model_path, obs_path, metrics = c("RMSE", "MAE"), tolerance_secs = 3600) } \arguments{ -\item{model_path}{path to model output CSV file} -\item{obs_path}{path to observations CSV file} -\item{metrics}{character vector of metrics to compute} -\item{tolerance_secs}{nearest-neighbor time tolerance in seconds} +\item{model_path}{path to model output CSV file (must have 'time' and 'value' columns)} + +\item{obs_path}{path to observations CSV file (must have 'time' and 'value' columns)} + +\item{metrics}{character vector of metrics to compute. Options: "RMSE", "MAE"} + +\item{tolerance_secs}{nearest-neighbor time tolerance in seconds (default 1 hour)} } \value{ list with: metrics (data.frame), aligned (data.frame), plot (ggplot) @@ -17,3 +22,6 @@ list with: metrics (data.frame), aligned (data.frame), plot (ggplot) Loads model output and observations, aligns by time, computes RMSE and MAE, and returns a results table with a plot. } +\author{ +Your Name +} From 8c0832c5c49aecb8578f6028059f36da1e9062ee Mon Sep 17 00:00:00 2001 From: anshul23102 Date: Wed, 1 Apr 2026 12:25:56 +0530 Subject: [PATCH 5/9] fix(benchmark): fix NAMESPACE export and run_benchmark.Rd usage format --- modules/benchmark/NAMESPACE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/benchmark/NAMESPACE b/modules/benchmark/NAMESPACE index ea1785a2437..b898b6bc13e 100644 --- a/modules/benchmark/NAMESPACE +++ b/modules/benchmark/NAMESPACE @@ -35,6 +35,7 @@ export(metric_run) export(metric_scatter_plot) export(metric_timeseries_plot) export(read_settings_BRR) +export(run_benchmark) importFrom(dplyr,collect) importFrom(dplyr,filter) importFrom(dplyr,rename) @@ -46,4 +47,3 @@ importFrom(ggplot2,ggplot) importFrom(ggplot2,labs) importFrom(magrittr,"%>%") importFrom(rlang,.data) -export(run_benchmark) From 1e90203360ad14b910899364bce197dccf3fb3a5 Mon Sep 17 00:00:00 2001 From: anshul23102 Date: Wed, 1 Apr 2026 12:42:15 +0530 Subject: [PATCH 6/9] fix(benchmark): use multi-line usage format and fix author name --- modules/benchmark/R/run_benchmark.R | 2 +- modules/benchmark/man/run_benchmark.Rd | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/modules/benchmark/R/run_benchmark.R b/modules/benchmark/R/run_benchmark.R index 8cc99bf6765..c4b7004c1e0 100644 --- a/modules/benchmark/R/run_benchmark.R +++ b/modules/benchmark/R/run_benchmark.R @@ -11,7 +11,7 @@ ##' @return list with: metrics (data.frame), aligned (data.frame), plot (ggplot) ##' @export ##' -##' @author Your Name +##' @author Anshul Jain run_benchmark <- function(model_path, obs_path, metrics = c("RMSE", "MAE"), tolerance_secs = 3600) { diff --git a/modules/benchmark/man/run_benchmark.Rd b/modules/benchmark/man/run_benchmark.Rd index d7d7470694d..b1998b4ea5d 100644 --- a/modules/benchmark/man/run_benchmark.Rd +++ b/modules/benchmark/man/run_benchmark.Rd @@ -4,7 +4,12 @@ \alias{run_benchmark} \title{Run a simple benchmark pipeline} \usage{ -run_benchmark(model_path, obs_path, metrics = c("RMSE", "MAE"), tolerance_secs = 3600) +run_benchmark( + model_path, + obs_path, + metrics = c("RMSE", "MAE"), + tolerance_secs = 3600 +) } \arguments{ \item{model_path}{path to model output CSV file (must have 'time' and 'value' columns)} @@ -23,5 +28,5 @@ Loads model output and observations, aligns by time, computes RMSE and MAE, and returns a results table with a plot. } \author{ -Your Name +Anshul Jain } From 01043afa09b7624979dd64775861ffb8d0eb3fc2 Mon Sep 17 00:00:00 2001 From: anshul23102 Date: Wed, 1 Apr 2026 12:46:40 +0530 Subject: [PATCH 7/9] refactor(benchmark): dataframe-first API, add bm_validate/compute_metrics/plot_time_series, update tests --- modules/benchmark/R/run_benchmark.R | 91 ++++++++++++------- .../tests/testthat/test-run_benchmark.R | 49 +++++++--- 2 files changed, 93 insertions(+), 47 deletions(-) diff --git a/modules/benchmark/R/run_benchmark.R b/modules/benchmark/R/run_benchmark.R index c4b7004c1e0..9897b59b015 100644 --- a/modules/benchmark/R/run_benchmark.R +++ b/modules/benchmark/R/run_benchmark.R @@ -1,53 +1,51 @@ ##' Run a simple benchmark pipeline ##' -##' Loads model output and observations, aligns by time, -##' computes RMSE and MAE, and returns a results table with a plot. +##' Takes two validated dataframes, aligns by time, +##' computes metrics, and returns a results table with a plot. ##' -##' @param model_path path to model output CSV file (must have 'time' and 'value' columns) -##' @param obs_path path to observations CSV file (must have 'time' and 'value' columns) +##' @param model_df data.frame with columns: time (POSIXct), value (numeric) +##' @param obs_df data.frame with columns: time (POSIXct), value (numeric) ##' @param metrics character vector of metrics to compute. Options: "RMSE", "MAE" ##' @param tolerance_secs nearest-neighbor time tolerance in seconds (default 1 hour) +##' @param method alignment method: "nearest" or "interpolate" ##' ##' @return list with: metrics (data.frame), aligned (data.frame), plot (ggplot) ##' @export -##' ##' @author Anshul Jain -run_benchmark <- function(model_path, obs_path, +run_benchmark <- function(model_df, obs_df, metrics = c("RMSE", "MAE"), - tolerance_secs = 3600) { - - # --- Load data --- - model_df <- read.csv(model_path, stringsAsFactors = FALSE) - obs_df <- read.csv(obs_path, stringsAsFactors = FALSE) + tolerance_secs = 3600, + method = "nearest") { - # --- Ensure time column is POSIXct --- - model_df$time <- as.POSIXct(model_df$time, tz = "UTC") - obs_df$time <- as.POSIXct(obs_df$time, tz = "UTC") + # Stage 1: Validate schema + bm_validate(model_df, obs_df) - # --- Align by nearest time --- + # Stage 2: Align by time aligned <- align_by_time(model_df, obs_df, tolerance_secs = tolerance_secs) - # --- Compute metrics --- - results <- list() - for (m in toupper(metrics)) { - results[[m]] <- switch(m, - "RMSE" = sqrt(mean((aligned$model - aligned$obs)^2, na.rm = TRUE)), - "MAE" = mean(abs(aligned$model - aligned$obs), na.rm = TRUE), - stop("Unknown metric: ", m) - ) - } - metrics_df <- data.frame(metric = names(results), - value = unlist(results, use.names = FALSE)) + # Stage 3: Compute metrics via registry + results <- compute_metrics(aligned, metrics) - # --- Plot --- - plot <- ggplot2::ggplot(aligned, ggplot2::aes(x = time)) + - ggplot2::geom_line(ggplot2::aes(y = model, color = "model")) + - ggplot2::geom_line(ggplot2::aes(y = obs, color = "obs")) + - ggplot2::labs(color = "", y = "value", title = "Model vs Observations") + # Stage 4: Plot + plot <- plot_time_series(aligned) - list(metrics = metrics_df, aligned = aligned, plot = plot) + list(metrics = results, aligned = aligned, plot = plot) } +##' Validate benchmark input dataframes +##' +##' @param model_df data.frame with columns: time (POSIXct), value (numeric) +##' @param obs_df data.frame with columns: time (POSIXct), value (numeric) +##' @return invisible(TRUE) +bm_validate <- function(model_df, obs_df) { + for (df in list(model_df, obs_df)) { + if (!inherits(df$time, "POSIXct")) + stop("Column 'time' must be POSIXct, got: ", class(df$time)) + if (!is.numeric(df$value)) + stop("Column 'value' must be numeric, got: ", class(df$value)) + } + invisible(TRUE) +} ##' Align model and observation data frames by nearest time ##' @@ -70,3 +68,32 @@ align_by_time <- function(model_df, obs_df, tolerance_secs = 3600) { })) aligned } + +##' Compute benchmark metrics +##' +##' @param aligned data.frame with columns: time, model, obs +##' @param metrics character vector of metric names +##' @return data.frame with columns: metric, value +compute_metrics <- function(aligned, metrics = c("RMSE", "MAE")) { + METRIC_REGISTRY <- list( + RMSE = function(x, y) sqrt(mean((x - y)^2, na.rm = TRUE)), + MAE = function(x, y) mean(abs(x - y), na.rm = TRUE) + ) + results <- lapply(toupper(metrics), function(m) { + if (!m %in% names(METRIC_REGISTRY)) stop("Unknown metric: ", m) + METRIC_REGISTRY[[m]](aligned$model, aligned$obs) + }) + data.frame(metric = toupper(metrics), value = unlist(results, use.names = FALSE)) +} + +##' Plot model vs observations time series +##' +##' @param aligned data.frame with columns: time, model, obs +##' @return ggplot object +plot_time_series <- function(aligned) { + ggplot2::ggplot(aligned, ggplot2::aes(x = time)) + + ggplot2::geom_line(ggplot2::aes(y = model, color = "Model")) + + ggplot2::geom_line(ggplot2::aes(y = obs, color = "Obs")) + + ggplot2::labs(color = "", y = "value", title = "Model vs Observations") + + ggplot2::theme_bw() +} diff --git a/modules/benchmark/tests/testthat/test-run_benchmark.R b/modules/benchmark/tests/testthat/test-run_benchmark.R index 746e6f370d7..765dfe1cb02 100644 --- a/modules/benchmark/tests/testthat/test-run_benchmark.R +++ b/modules/benchmark/tests/testthat/test-run_benchmark.R @@ -1,21 +1,40 @@ library(testthat) -test_that("run_benchmark basic works", { - model <- data.frame( - time = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"), - value = c(1, 2, 3, 4) - ) - obs <- data.frame( - time = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"), - value = c(1.1, 1.9, 3.2, 3.9) - ) - tmp1 <- tempfile(fileext = ".csv") - tmp2 <- tempfile(fileext = ".csv") - write.csv(model, tmp1, row.names = FALSE) - write.csv(obs, tmp2, row.names = FALSE) +model_df <- data.frame( + time = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"), + value = c(1, 2, 3, 4) +) +obs_df <- data.frame( + time = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"), + value = c(1.1, 1.9, 3.2, 3.9) +) - res <- run_benchmark(tmp1, tmp2, metrics = c("RMSE", "MAE")) +test_that("run_benchmark returns correct structure", { + res <- run_benchmark(model_df, obs_df, metrics = c("RMSE", "MAE")) expect_true("metrics" %in% names(res)) expect_true("aligned" %in% names(res)) - expect_true(nrow(res$metrics) == 2) + expect_true("plot" %in% names(res)) + expect_equal(nrow(res$metrics), 2) +}) + +test_that("bm_validate rejects bad input", { + bad_df <- data.frame(time = c("2023-01-01"), value = c(1.0)) + expect_error(bm_validate(bad_df, obs_df), "POSIXct") +}) + +test_that("compute_metrics returns correct values", { + aligned <- data.frame( + time = model_df$time, + model = c(1, 2, 3, 4), + obs = c(1, 2, 3, 4) + ) + res <- compute_metrics(aligned, c("RMSE", "MAE")) + expect_equal(res$value[res$metric == "RMSE"], 0) + expect_equal(res$value[res$metric == "MAE"], 0) +}) + +test_that("align_by_time matches exact timestamps", { + aligned <- align_by_time(model_df, obs_df) + expect_equal(nrow(aligned), 4) + expect_true(all(c("time", "model", "obs") %in% names(aligned))) }) From 408870afbc11e7717014b90ab37d4d44e6de5a51 Mon Sep 17 00:00:00 2001 From: anshul23102 Date: Wed, 1 Apr 2026 13:02:18 +0530 Subject: [PATCH 8/9] docs(benchmark): add man pages for bm_validate, compute_metrics, plot_time_series --- modules/benchmark/man/bm_validate.Rd | 19 +++++++++++++++++++ modules/benchmark/man/compute_metrics.Rd | 19 +++++++++++++++++++ modules/benchmark/man/plot_time_series.Rd | 17 +++++++++++++++++ modules/benchmark/man/run_benchmark.Rd | 17 ++++++++++------- 4 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 modules/benchmark/man/bm_validate.Rd create mode 100644 modules/benchmark/man/compute_metrics.Rd create mode 100644 modules/benchmark/man/plot_time_series.Rd diff --git a/modules/benchmark/man/bm_validate.Rd b/modules/benchmark/man/bm_validate.Rd new file mode 100644 index 00000000000..11a4fd2eb36 --- /dev/null +++ b/modules/benchmark/man/bm_validate.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_benchmark.R +\name{bm_validate} +\alias{bm_validate} +\title{Validate benchmark input dataframes} +\usage{ +bm_validate(model_df, obs_df) +} +\arguments{ +\item{model_df}{data.frame with columns: time (POSIXct), value (numeric)} + +\item{obs_df}{data.frame with columns: time (POSIXct), value (numeric)} +} +\value{ +invisible(TRUE) +} +\description{ +Validate benchmark input dataframes +} diff --git a/modules/benchmark/man/compute_metrics.Rd b/modules/benchmark/man/compute_metrics.Rd new file mode 100644 index 00000000000..a9a66fe98e9 --- /dev/null +++ b/modules/benchmark/man/compute_metrics.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_benchmark.R +\name{compute_metrics} +\alias{compute_metrics} +\title{Compute benchmark metrics} +\usage{ +compute_metrics(aligned, metrics = c("RMSE", "MAE")) +} +\arguments{ +\item{aligned}{data.frame with columns: time, model, obs} + +\item{metrics}{character vector of metric names} +} +\value{ +data.frame with columns: metric, value +} +\description{ +Compute benchmark metrics +} diff --git a/modules/benchmark/man/plot_time_series.Rd b/modules/benchmark/man/plot_time_series.Rd new file mode 100644 index 00000000000..0710201ed1a --- /dev/null +++ b/modules/benchmark/man/plot_time_series.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_benchmark.R +\name{plot_time_series} +\alias{plot_time_series} +\title{Plot model vs observations time series} +\usage{ +plot_time_series(aligned) +} +\arguments{ +\item{aligned}{data.frame with columns: time, model, obs} +} +\value{ +ggplot object +} +\description{ +Plot model vs observations time series +} diff --git a/modules/benchmark/man/run_benchmark.Rd b/modules/benchmark/man/run_benchmark.Rd index b1998b4ea5d..3d15828a2b4 100644 --- a/modules/benchmark/man/run_benchmark.Rd +++ b/modules/benchmark/man/run_benchmark.Rd @@ -5,27 +5,30 @@ \title{Run a simple benchmark pipeline} \usage{ run_benchmark( - model_path, - obs_path, + model_df, + obs_df, metrics = c("RMSE", "MAE"), - tolerance_secs = 3600 + tolerance_secs = 3600, + method = "nearest" ) } \arguments{ -\item{model_path}{path to model output CSV file (must have 'time' and 'value' columns)} +\item{model_df}{data.frame with columns: time (POSIXct), value (numeric)} -\item{obs_path}{path to observations CSV file (must have 'time' and 'value' columns)} +\item{obs_df}{data.frame with columns: time (POSIXct), value (numeric)} \item{metrics}{character vector of metrics to compute. Options: "RMSE", "MAE"} \item{tolerance_secs}{nearest-neighbor time tolerance in seconds (default 1 hour)} + +\item{method}{alignment method: "nearest" or "interpolate"} } \value{ list with: metrics (data.frame), aligned (data.frame), plot (ggplot) } \description{ -Loads model output and observations, aligns by time, -computes RMSE and MAE, and returns a results table with a plot. +Takes two validated dataframes, aligns by time, +computes metrics, and returns a results table with a plot. } \author{ Anshul Jain From b1c6a875efef1f79e14059b8a7427f3eb9c6747b Mon Sep 17 00:00:00 2001 From: anshul23102 Date: Wed, 1 Apr 2026 13:16:23 +0530 Subject: [PATCH 9/9] fix(benchmark): use .data$ in aes() to fix R CMD check NOTE --- modules/benchmark/R/run_benchmark.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/benchmark/R/run_benchmark.R b/modules/benchmark/R/run_benchmark.R index 9897b59b015..03128bd4cd7 100644 --- a/modules/benchmark/R/run_benchmark.R +++ b/modules/benchmark/R/run_benchmark.R @@ -91,9 +91,9 @@ compute_metrics <- function(aligned, metrics = c("RMSE", "MAE")) { ##' @param aligned data.frame with columns: time, model, obs ##' @return ggplot object plot_time_series <- function(aligned) { - ggplot2::ggplot(aligned, ggplot2::aes(x = time)) + - ggplot2::geom_line(ggplot2::aes(y = model, color = "Model")) + - ggplot2::geom_line(ggplot2::aes(y = obs, color = "Obs")) + + ggplot2::ggplot(aligned, ggplot2::aes(x = .data$time)) + + ggplot2::geom_line(ggplot2::aes(y = .data$model, color = "Model")) + + ggplot2::geom_line(ggplot2::aes(y = .data$obs, color = "Obs")) + ggplot2::labs(color = "", y = "value", title = "Model vs Observations") + ggplot2::theme_bw() }