From d6a427c81861291b83cb2c4633f36bbece38cbca Mon Sep 17 00:00:00 2001
From: anshul23102 <anshul23102@iiitd.ac.in>
Date: Mon, 23 Mar 2026 22:57:38 +0530
Subject: [PATCH 1/9] feat(benchmark): add run_benchmark MVP with alignment,
 metrics, and test data

---
 modules/benchmark/R/run_benchmark.R           | 72 +++++++++++++++++++
 .../benchmark/inst/testdata/sample_model.csv  |  5 ++
 .../benchmark/inst/testdata/sample_obs.csv    |  5 ++
 3 files changed, 82 insertions(+)
 create mode 100644 modules/benchmark/R/run_benchmark.R
 create mode 100644 modules/benchmark/inst/testdata/sample_model.csv
 create mode 100644 modules/benchmark/inst/testdata/sample_obs.csv

diff --git a/modules/benchmark/R/run_benchmark.R b/modules/benchmark/R/run_benchmark.R
new file mode 100644
index 00000000000..8cc99bf6765
--- /dev/null
+++ b/modules/benchmark/R/run_benchmark.R
@@ -0,0 +1,72 @@
+##' Run a simple benchmark pipeline
+##'
+##' Loads model output and observations, aligns by time,
+##' computes RMSE and MAE, and returns a results table with a plot.
+##'
+##' @param model_path path to model output CSV file (must have 'time' and 'value' columns)
+##' @param obs_path path to observations CSV file (must have 'time' and 'value' columns)
+##' @param metrics character vector of metrics to compute. Options: "RMSE", "MAE"
+##' @param tolerance_secs nearest-neighbor time tolerance in seconds (default 1 hour)
+##'
+##' @return list with: metrics (data.frame), aligned (data.frame), plot (ggplot)
+##' @export
+##'
+##' @author Your Name
+run_benchmark <- function(model_path, obs_path,
+                          metrics = c("RMSE", "MAE"),
+                          tolerance_secs = 3600) {
+
+  # --- Load data ---
+  model_df <- read.csv(model_path, stringsAsFactors = FALSE)
+  obs_df   <- read.csv(obs_path,   stringsAsFactors = FALSE)
+
+  # --- Ensure time column is POSIXct ---
+  model_df$time <- as.POSIXct(model_df$time, tz = "UTC")
+  obs_df$time   <- as.POSIXct(obs_df$time,   tz = "UTC")
+
+  # --- Align by nearest time ---
+  aligned <- align_by_time(model_df, obs_df, tolerance_secs = tolerance_secs)
+
+  # --- Compute metrics ---
+  results <- list()
+  for (m in toupper(metrics)) {
+    results[[m]] <- switch(m,
+      "RMSE" = sqrt(mean((aligned$model - aligned$obs)^2, na.rm = TRUE)),
+      "MAE"  = mean(abs(aligned$model - aligned$obs),     na.rm = TRUE),
+      stop("Unknown metric: ", m)
+    )
+  }
+  metrics_df <- data.frame(metric = names(results),
+                           value  = unlist(results, use.names = FALSE))
+
+  # --- Plot ---
+  plot <- ggplot2::ggplot(aligned, ggplot2::aes(x = time)) +
+    ggplot2::geom_line(ggplot2::aes(y = model, color = "model")) +
+    ggplot2::geom_line(ggplot2::aes(y = obs,   color = "obs")) +
+    ggplot2::labs(color = "", y = "value", title = "Model vs Observations")
+
+  list(metrics = metrics_df, aligned = aligned, plot = plot)
+}
+
+
+##' Align model and observation data frames by nearest time
+##'
+##' @param model_df data.frame with columns: time (POSIXct), value
+##' @param obs_df   data.frame with columns: time (POSIXct), value
+##' @param tolerance_secs max allowed time difference in seconds
+##'
+##' @return data.frame with columns: time, model, obs
+align_by_time <- function(model_df, obs_df, tolerance_secs = 3600) {
+  aligned <- do.call(rbind, lapply(seq_len(nrow(model_df)), function(i) {
+    diffs <- abs(as.numeric(difftime(obs_df$time, model_df$time[i], units = "secs")))
+    nearest <- which.min(diffs)
+    if (diffs[nearest] <= tolerance_secs) {
+      data.frame(time  = model_df$time[i],
+                 model = model_df$value[i],
+                 obs   = obs_df$value[nearest])
+    } else {
+      NULL
+    }
+  }))
+  aligned
+}
diff --git a/modules/benchmark/inst/testdata/sample_model.csv b/modules/benchmark/inst/testdata/sample_model.csv
new file mode 100644
index 00000000000..cf5df29d50d
--- /dev/null
+++ b/modules/benchmark/inst/testdata/sample_model.csv
@@ -0,0 +1,5 @@
+time,value
+2020-01-01 00:00:00,1.0
+2020-01-01 01:00:00,2.0
+2020-01-01 02:00:00,3.0
+2020-01-01 03:00:00,4.0
diff --git a/modules/benchmark/inst/testdata/sample_obs.csv b/modules/benchmark/inst/testdata/sample_obs.csv
new file mode 100644
index 00000000000..f24d28f7c26
--- /dev/null
+++ b/modules/benchmark/inst/testdata/sample_obs.csv
@@ -0,0 +1,5 @@
+time,value
+2020-01-01 00:00:00,1.1
+2020-01-01 01:00:00,1.9
+2020-01-01 02:00:00,3.2
+2020-01-01 03:00:00,3.9

From f8203ff28728788a2f704ba6417df5cfcc38ce33 Mon Sep 17 00:00:00 2001
From: anshul23102 <anshul23102@iiitd.ac.in>
Date: Mon, 23 Mar 2026 23:16:03 +0530
Subject: [PATCH 2/9] feat(benchmark): add tests and update README with
 quickstart

---
 modules/benchmark/README.md                   | 35 +++++++++++++++++++
 .../tests/testthat/test-run_benchmark.R       | 21 +++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 modules/benchmark/tests/testthat/test-run_benchmark.R

diff --git a/modules/benchmark/README.md b/modules/benchmark/README.md
index a8fd53648d8..69bdb5e2f3d 100644
--- a/modules/benchmark/README.md
+++ b/modules/benchmark/README.md
@@ -1,4 +1,39 @@
+## Quickstart: run_benchmark()
 
+`run_benchmark()` is a simple entry point that loads model output and
+observations, aligns them by time, computes metrics, and returns a plot.
+
+### Input format
+
+Both input files must be CSV with two columns:
+- `time` — timestamp (e.g. `2020-01-01 00:00:00`)
+- `value` — numeric variable value
+
+### Usage
+```r
+library(PEcAn.benchmark)
+
+res <- run_benchmark(
+  model_path = "inst/testdata/sample_model.csv",
+  obs_path   = "inst/testdata/sample_obs.csv"
+)
+
+# View metrics
+print(res$metrics)
+#   metric     value
+# 1   RMSE 0.1322876
+# 2    MAE 0.1250000
+
+# View plot
+res$plot
+```
+
+### Parameters
+
+- `model_path` — path to model output CSV
+- `obs_path` — path to observations CSV  
+- `metrics` — vector of metrics to compute: `"RMSE"`, `"MAE"` (default: both)
+- `tolerance_secs` — max time difference for matching (default: 3600 seconds)
 # PEcAn.benchmark
 
 <!-- badges: start -->
diff --git a/modules/benchmark/tests/testthat/test-run_benchmark.R b/modules/benchmark/tests/testthat/test-run_benchmark.R
new file mode 100644
index 00000000000..746e6f370d7
--- /dev/null
+++ b/modules/benchmark/tests/testthat/test-run_benchmark.R
@@ -0,0 +1,21 @@
+library(testthat)
+
+test_that("run_benchmark basic works", {
+  model <- data.frame(
+    time  = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"),
+    value = c(1, 2, 3, 4)
+  )
+  obs <- data.frame(
+    time  = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"),
+    value = c(1.1, 1.9, 3.2, 3.9)
+  )
+  tmp1 <- tempfile(fileext = ".csv")
+  tmp2 <- tempfile(fileext = ".csv")
+  write.csv(model, tmp1, row.names = FALSE)
+  write.csv(obs,   tmp2, row.names = FALSE)
+
+  res <- run_benchmark(tmp1, tmp2, metrics = c("RMSE", "MAE"))
+  expect_true("metrics" %in% names(res))
+  expect_true("aligned" %in% names(res))
+  expect_true(nrow(res$metrics) == 2)
+})

From acc2d0d84886b97b23f548eca9eb04a93ccc266e Mon Sep 17 00:00:00 2001
From: anshul23102 <anshul23102@iiitd.ac.in>
Date: Wed, 25 Mar 2026 12:07:39 +0530
Subject: [PATCH 3/9] docs(benchmark): add roxygen man page for run_benchmark

---
 modules/benchmark/man/run_benchmark.Rd | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 modules/benchmark/man/run_benchmark.Rd

diff --git a/modules/benchmark/man/run_benchmark.Rd b/modules/benchmark/man/run_benchmark.Rd
new file mode 100644
index 00000000000..cb742bc35e6
--- /dev/null
+++ b/modules/benchmark/man/run_benchmark.Rd
@@ -0,0 +1,19 @@
+\name{run_benchmark}
+\alias{run_benchmark}
+\title{Run a simple benchmark pipeline}
+\usage{
+run_benchmark(model_path, obs_path, metrics = c("RMSE", "MAE"), tolerance_secs = 3600)
+}
+\arguments{
+\item{model_path}{path to model output CSV file}
+\item{obs_path}{path to observations CSV file}
+\item{metrics}{character vector of metrics to compute}
+\item{tolerance_secs}{nearest-neighbor time tolerance in seconds}
+}
+\value{
+list with: metrics (data.frame), aligned (data.frame), plot (ggplot)
+}
+\description{
+Loads model output and observations, aligns by time,
+computes RMSE and MAE, and returns a results table with a plot.
+}

From 0d272fb09a361b4e8faef6146236f891931fe21e Mon Sep 17 00:00:00 2001
From: anshul23102 <anshul23102@iiitd.ac.in>
Date: Wed, 1 Apr 2026 12:11:23 +0530
Subject: [PATCH 4/9] docs(benchmark): add man pages and update NAMESPACE

---
 modules/benchmark/NAMESPACE            |  1 +
 modules/benchmark/man/align_by_time.Rd | 21 +++++++++++++++++++++
 modules/benchmark/man/run_benchmark.Rd | 16 ++++++++++++----
 3 files changed, 34 insertions(+), 4 deletions(-)
 create mode 100644 modules/benchmark/man/align_by_time.Rd

diff --git a/modules/benchmark/NAMESPACE b/modules/benchmark/NAMESPACE
index e89a0005f74..ea1785a2437 100644
--- a/modules/benchmark/NAMESPACE
+++ b/modules/benchmark/NAMESPACE
@@ -46,3 +46,4 @@ importFrom(ggplot2,ggplot)
 importFrom(ggplot2,labs)
 importFrom(magrittr,"%>%")
 importFrom(rlang,.data)
+export(run_benchmark)
diff --git a/modules/benchmark/man/align_by_time.Rd b/modules/benchmark/man/align_by_time.Rd
new file mode 100644
index 00000000000..33e313bb5b5
--- /dev/null
+++ b/modules/benchmark/man/align_by_time.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/run_benchmark.R
+\name{align_by_time}
+\alias{align_by_time}
+\title{Align model and observation data frames by nearest time}
+\usage{
+align_by_time(model_df, obs_df, tolerance_secs = 3600)
+}
+\arguments{
+\item{model_df}{data.frame with columns: time (POSIXct), value}
+
+\item{obs_df}{data.frame with columns: time (POSIXct), value}
+
+\item{tolerance_secs}{max allowed time difference in seconds}
+}
+\value{
+data.frame with columns: time, model, obs
+}
+\description{
+Align model and observation data frames by nearest time
+}
diff --git a/modules/benchmark/man/run_benchmark.Rd b/modules/benchmark/man/run_benchmark.Rd
index cb742bc35e6..d7d7470694d 100644
--- a/modules/benchmark/man/run_benchmark.Rd
+++ b/modules/benchmark/man/run_benchmark.Rd
@@ -1,3 +1,5 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/run_benchmark.R
 \name{run_benchmark}
 \alias{run_benchmark}
 \title{Run a simple benchmark pipeline}
@@ -5,10 +7,13 @@
 run_benchmark(model_path, obs_path, metrics = c("RMSE", "MAE"), tolerance_secs = 3600)
 }
 \arguments{
-\item{model_path}{path to model output CSV file}
-\item{obs_path}{path to observations CSV file}
-\item{metrics}{character vector of metrics to compute}
-\item{tolerance_secs}{nearest-neighbor time tolerance in seconds}
+\item{model_path}{path to model output CSV file (must have 'time' and 'value' columns)}
+
+\item{obs_path}{path to observations CSV file (must have 'time' and 'value' columns)}
+
+\item{metrics}{character vector of metrics to compute. Options: "RMSE", "MAE"}
+
+\item{tolerance_secs}{nearest-neighbor time tolerance in seconds (default 1 hour)}
 }
 \value{
 list with: metrics (data.frame), aligned (data.frame), plot (ggplot)
@@ -17,3 +22,6 @@ list with: metrics (data.frame), aligned (data.frame), plot (ggplot)
 Loads model output and observations, aligns by time,
 computes RMSE and MAE, and returns a results table with a plot.
 }
+\author{
+Your Name
+}

From 8c0832c5c49aecb8578f6028059f36da1e9062ee Mon Sep 17 00:00:00 2001
From: anshul23102 <anshul23102@iiitd.ac.in>
Date: Wed, 1 Apr 2026 12:25:56 +0530
Subject: [PATCH 5/9] fix(benchmark): fix NAMESPACE export and run_benchmark.Rd
 usage format

---
 modules/benchmark/NAMESPACE | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/benchmark/NAMESPACE b/modules/benchmark/NAMESPACE
index ea1785a2437..b898b6bc13e 100644
--- a/modules/benchmark/NAMESPACE
+++ b/modules/benchmark/NAMESPACE
@@ -35,6 +35,7 @@ export(metric_run)
 export(metric_scatter_plot)
 export(metric_timeseries_plot)
 export(read_settings_BRR)
+export(run_benchmark)
 importFrom(dplyr,collect)
 importFrom(dplyr,filter)
 importFrom(dplyr,rename)
@@ -46,4 +47,3 @@ importFrom(ggplot2,ggplot)
 importFrom(ggplot2,labs)
 importFrom(magrittr,"%>%")
 importFrom(rlang,.data)
-export(run_benchmark)

From 1e90203360ad14b910899364bce197dccf3fb3a5 Mon Sep 17 00:00:00 2001
From: anshul23102 <anshul23102@iiitd.ac.in>
Date: Wed, 1 Apr 2026 12:42:15 +0530
Subject: [PATCH 6/9] fix(benchmark): use multi-line usage format and fix
 author name

---
 modules/benchmark/R/run_benchmark.R    | 2 +-
 modules/benchmark/man/run_benchmark.Rd | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/modules/benchmark/R/run_benchmark.R b/modules/benchmark/R/run_benchmark.R
index 8cc99bf6765..c4b7004c1e0 100644
--- a/modules/benchmark/R/run_benchmark.R
+++ b/modules/benchmark/R/run_benchmark.R
@@ -11,7 +11,7 @@
 ##' @return list with: metrics (data.frame), aligned (data.frame), plot (ggplot)
 ##' @export
 ##'
-##' @author Your Name
+##' @author Anshul Jain
 run_benchmark <- function(model_path, obs_path,
                           metrics = c("RMSE", "MAE"),
                           tolerance_secs = 3600) {
diff --git a/modules/benchmark/man/run_benchmark.Rd b/modules/benchmark/man/run_benchmark.Rd
index d7d7470694d..b1998b4ea5d 100644
--- a/modules/benchmark/man/run_benchmark.Rd
+++ b/modules/benchmark/man/run_benchmark.Rd
@@ -4,7 +4,12 @@
 \alias{run_benchmark}
 \title{Run a simple benchmark pipeline}
 \usage{
-run_benchmark(model_path, obs_path, metrics = c("RMSE", "MAE"), tolerance_secs = 3600)
+run_benchmark(
+  model_path,
+  obs_path,
+  metrics = c("RMSE", "MAE"),
+  tolerance_secs = 3600
+)
 }
 \arguments{
 \item{model_path}{path to model output CSV file (must have 'time' and 'value' columns)}
@@ -23,5 +28,5 @@ Loads model output and observations, aligns by time,
 computes RMSE and MAE, and returns a results table with a plot.
 }
 \author{
-Your Name
+Anshul Jain
 }

From 01043afa09b7624979dd64775861ffb8d0eb3fc2 Mon Sep 17 00:00:00 2001
From: anshul23102 <anshul23102@iiitd.ac.in>
Date: Wed, 1 Apr 2026 12:46:40 +0530
Subject: [PATCH 7/9] refactor(benchmark): dataframe-first API, add
 bm_validate/compute_metrics/plot_time_series, update tests

---
 modules/benchmark/R/run_benchmark.R           | 91 ++++++++++++-------
 .../tests/testthat/test-run_benchmark.R       | 49 +++++++---
 2 files changed, 93 insertions(+), 47 deletions(-)

diff --git a/modules/benchmark/R/run_benchmark.R b/modules/benchmark/R/run_benchmark.R
index c4b7004c1e0..9897b59b015 100644
--- a/modules/benchmark/R/run_benchmark.R
+++ b/modules/benchmark/R/run_benchmark.R
@@ -1,53 +1,51 @@
 ##' Run a simple benchmark pipeline
 ##'
-##' Loads model output and observations, aligns by time,
-##' computes RMSE and MAE, and returns a results table with a plot.
+##' Takes two validated dataframes, aligns by time,
+##' computes metrics, and returns a results table with a plot.
 ##'
-##' @param model_path path to model output CSV file (must have 'time' and 'value' columns)
-##' @param obs_path path to observations CSV file (must have 'time' and 'value' columns)
+##' @param model_df data.frame with columns: time (POSIXct), value (numeric)
+##' @param obs_df   data.frame with columns: time (POSIXct), value (numeric)
 ##' @param metrics character vector of metrics to compute. Options: "RMSE", "MAE"
 ##' @param tolerance_secs nearest-neighbor time tolerance in seconds (default 1 hour)
+##' @param method alignment method: "nearest" or "interpolate"
 ##'
 ##' @return list with: metrics (data.frame), aligned (data.frame), plot (ggplot)
 ##' @export
-##'
 ##' @author Anshul Jain
-run_benchmark <- function(model_path, obs_path,
+run_benchmark <- function(model_df, obs_df,
                           metrics = c("RMSE", "MAE"),
-                          tolerance_secs = 3600) {
-
-  # --- Load data ---
-  model_df <- read.csv(model_path, stringsAsFactors = FALSE)
-  obs_df   <- read.csv(obs_path,   stringsAsFactors = FALSE)
+                          tolerance_secs = 3600,
+                          method = "nearest") {
 
-  # --- Ensure time column is POSIXct ---
-  model_df$time <- as.POSIXct(model_df$time, tz = "UTC")
-  obs_df$time   <- as.POSIXct(obs_df$time,   tz = "UTC")
+  # Stage 1: Validate schema
+  bm_validate(model_df, obs_df)
 
-  # --- Align by nearest time ---
+  # Stage 2: Align by time
   aligned <- align_by_time(model_df, obs_df, tolerance_secs = tolerance_secs)
 
-  # --- Compute metrics ---
-  results <- list()
-  for (m in toupper(metrics)) {
-    results[[m]] <- switch(m,
-      "RMSE" = sqrt(mean((aligned$model - aligned$obs)^2, na.rm = TRUE)),
-      "MAE"  = mean(abs(aligned$model - aligned$obs),     na.rm = TRUE),
-      stop("Unknown metric: ", m)
-    )
-  }
-  metrics_df <- data.frame(metric = names(results),
-                           value  = unlist(results, use.names = FALSE))
+  # Stage 3: Compute metrics via registry
+  results <- compute_metrics(aligned, metrics)
 
-  # --- Plot ---
-  plot <- ggplot2::ggplot(aligned, ggplot2::aes(x = time)) +
-    ggplot2::geom_line(ggplot2::aes(y = model, color = "model")) +
-    ggplot2::geom_line(ggplot2::aes(y = obs,   color = "obs")) +
-    ggplot2::labs(color = "", y = "value", title = "Model vs Observations")
+  # Stage 4: Plot
+  plot <- plot_time_series(aligned)
 
-  list(metrics = metrics_df, aligned = aligned, plot = plot)
+  list(metrics = results, aligned = aligned, plot = plot)
 }
 
+##' Validate benchmark input dataframes
+##'
+##' @param model_df data.frame with columns: time (POSIXct), value (numeric)
+##' @param obs_df   data.frame with columns: time (POSIXct), value (numeric)
+##' @return invisible(TRUE)
+bm_validate <- function(model_df, obs_df) {
+  for (df in list(model_df, obs_df)) {
+    if (!inherits(df$time, "POSIXct"))
+      stop("Column 'time' must be POSIXct, got: ", class(df$time))
+    if (!is.numeric(df$value))
+      stop("Column 'value' must be numeric, got: ", class(df$value))
+  }
+  invisible(TRUE)
+}
 
 ##' Align model and observation data frames by nearest time
 ##'
@@ -70,3 +68,32 @@ align_by_time <- function(model_df, obs_df, tolerance_secs = 3600) {
   }))
   aligned
 }
+
+##' Compute benchmark metrics
+##'
+##' @param aligned data.frame with columns: time, model, obs
+##' @param metrics character vector of metric names
+##' @return data.frame with columns: metric, value
+compute_metrics <- function(aligned, metrics = c("RMSE", "MAE")) {
+  METRIC_REGISTRY <- list(
+    RMSE = function(x, y) sqrt(mean((x - y)^2, na.rm = TRUE)),
+    MAE  = function(x, y) mean(abs(x - y), na.rm = TRUE)
+  )
+  results <- lapply(toupper(metrics), function(m) {
+    if (!m %in% names(METRIC_REGISTRY)) stop("Unknown metric: ", m)
+    METRIC_REGISTRY[[m]](aligned$model, aligned$obs)
+  })
+  data.frame(metric = toupper(metrics), value = unlist(results, use.names = FALSE))
+}
+
+##' Plot model vs observations time series
+##'
+##' @param aligned data.frame with columns: time, model, obs
+##' @return ggplot object
+plot_time_series <- function(aligned) {
+  ggplot2::ggplot(aligned, ggplot2::aes(x = time)) +
+    ggplot2::geom_line(ggplot2::aes(y = model, color = "Model")) +
+    ggplot2::geom_line(ggplot2::aes(y = obs,   color = "Obs")) +
+    ggplot2::labs(color = "", y = "value", title = "Model vs Observations") +
+    ggplot2::theme_bw()
+}
diff --git a/modules/benchmark/tests/testthat/test-run_benchmark.R b/modules/benchmark/tests/testthat/test-run_benchmark.R
index 746e6f370d7..765dfe1cb02 100644
--- a/modules/benchmark/tests/testthat/test-run_benchmark.R
+++ b/modules/benchmark/tests/testthat/test-run_benchmark.R
@@ -1,21 +1,40 @@
 library(testthat)
 
-test_that("run_benchmark basic works", {
-  model <- data.frame(
-    time  = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"),
-    value = c(1, 2, 3, 4)
-  )
-  obs <- data.frame(
-    time  = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"),
-    value = c(1.1, 1.9, 3.2, 3.9)
-  )
-  tmp1 <- tempfile(fileext = ".csv")
-  tmp2 <- tempfile(fileext = ".csv")
-  write.csv(model, tmp1, row.names = FALSE)
-  write.csv(obs,   tmp2, row.names = FALSE)
+model_df <- data.frame(
+  time  = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"),
+  value = c(1, 2, 3, 4)
+)
+obs_df <- data.frame(
+  time  = as.POSIXct(seq(0, 3600*3, by = 3600), origin = "1970-01-01", tz = "UTC"),
+  value = c(1.1, 1.9, 3.2, 3.9)
+)
 
-  res <- run_benchmark(tmp1, tmp2, metrics = c("RMSE", "MAE"))
+test_that("run_benchmark returns correct structure", {
+  res <- run_benchmark(model_df, obs_df, metrics = c("RMSE", "MAE"))
   expect_true("metrics" %in% names(res))
   expect_true("aligned" %in% names(res))
-  expect_true(nrow(res$metrics) == 2)
+  expect_true("plot"    %in% names(res))
+  expect_equal(nrow(res$metrics), 2)
+})
+
+test_that("bm_validate rejects bad input", {
+  bad_df <- data.frame(time = c("2023-01-01"), value = c(1.0))
+  expect_error(bm_validate(bad_df, obs_df), "POSIXct")
+})
+
+test_that("compute_metrics returns correct values", {
+  aligned <- data.frame(
+    time  = model_df$time,
+    model = c(1, 2, 3, 4),
+    obs   = c(1, 2, 3, 4)
+  )
+  res <- compute_metrics(aligned, c("RMSE", "MAE"))
+  expect_equal(res$value[res$metric == "RMSE"], 0)
+  expect_equal(res$value[res$metric == "MAE"],  0)
+})
+
+test_that("align_by_time matches exact timestamps", {
+  aligned <- align_by_time(model_df, obs_df)
+  expect_equal(nrow(aligned), 4)
+  expect_true(all(c("time", "model", "obs") %in% names(aligned)))
 })

From 408870afbc11e7717014b90ab37d4d44e6de5a51 Mon Sep 17 00:00:00 2001
From: anshul23102 <anshul23102@iiitd.ac.in>
Date: Wed, 1 Apr 2026 13:02:18 +0530
Subject: [PATCH 8/9] docs(benchmark): add man pages for bm_validate,
 compute_metrics, plot_time_series

---
 modules/benchmark/man/bm_validate.Rd      | 19 +++++++++++++++++++
 modules/benchmark/man/compute_metrics.Rd  | 19 +++++++++++++++++++
 modules/benchmark/man/plot_time_series.Rd | 17 +++++++++++++++++
 modules/benchmark/man/run_benchmark.Rd    | 17 ++++++++++-------
 4 files changed, 65 insertions(+), 7 deletions(-)
 create mode 100644 modules/benchmark/man/bm_validate.Rd
 create mode 100644 modules/benchmark/man/compute_metrics.Rd
 create mode 100644 modules/benchmark/man/plot_time_series.Rd

diff --git a/modules/benchmark/man/bm_validate.Rd b/modules/benchmark/man/bm_validate.Rd
new file mode 100644
index 00000000000..11a4fd2eb36
--- /dev/null
+++ b/modules/benchmark/man/bm_validate.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/run_benchmark.R
+\name{bm_validate}
+\alias{bm_validate}
+\title{Validate benchmark input dataframes}
+\usage{
+bm_validate(model_df, obs_df)
+}
+\arguments{
+\item{model_df}{data.frame with columns: time (POSIXct), value (numeric)}
+
+\item{obs_df}{data.frame with columns: time (POSIXct), value (numeric)}
+}
+\value{
+invisible(TRUE)
+}
+\description{
+Validate benchmark input dataframes
+}
diff --git a/modules/benchmark/man/compute_metrics.Rd b/modules/benchmark/man/compute_metrics.Rd
new file mode 100644
index 00000000000..a9a66fe98e9
--- /dev/null
+++ b/modules/benchmark/man/compute_metrics.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/run_benchmark.R
+\name{compute_metrics}
+\alias{compute_metrics}
+\title{Compute benchmark metrics}
+\usage{
+compute_metrics(aligned, metrics = c("RMSE", "MAE"))
+}
+\arguments{
+\item{aligned}{data.frame with columns: time, model, obs}
+
+\item{metrics}{character vector of metric names}
+}
+\value{
+data.frame with columns: metric, value
+}
+\description{
+Compute benchmark metrics
+}
diff --git a/modules/benchmark/man/plot_time_series.Rd b/modules/benchmark/man/plot_time_series.Rd
new file mode 100644
index 00000000000..0710201ed1a
--- /dev/null
+++ b/modules/benchmark/man/plot_time_series.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/run_benchmark.R
+\name{plot_time_series}
+\alias{plot_time_series}
+\title{Plot model vs observations time series}
+\usage{
+plot_time_series(aligned)
+}
+\arguments{
+\item{aligned}{data.frame with columns: time, model, obs}
+}
+\value{
+ggplot object
+}
+\description{
+Plot model vs observations time series
+}
diff --git a/modules/benchmark/man/run_benchmark.Rd b/modules/benchmark/man/run_benchmark.Rd
index b1998b4ea5d..3d15828a2b4 100644
--- a/modules/benchmark/man/run_benchmark.Rd
+++ b/modules/benchmark/man/run_benchmark.Rd
@@ -5,27 +5,30 @@
 \title{Run a simple benchmark pipeline}
 \usage{
 run_benchmark(
-  model_path,
-  obs_path,
+  model_df,
+  obs_df,
   metrics = c("RMSE", "MAE"),
-  tolerance_secs = 3600
+  tolerance_secs = 3600,
+  method = "nearest"
 )
 }
 \arguments{
-\item{model_path}{path to model output CSV file (must have 'time' and 'value' columns)}
+\item{model_df}{data.frame with columns: time (POSIXct), value (numeric)}
 
-\item{obs_path}{path to observations CSV file (must have 'time' and 'value' columns)}
+\item{obs_df}{data.frame with columns: time (POSIXct), value (numeric)}
 
 \item{metrics}{character vector of metrics to compute. Options: "RMSE", "MAE"}
 
 \item{tolerance_secs}{nearest-neighbor time tolerance in seconds (default 1 hour)}
+
+\item{method}{alignment method: "nearest" or "interpolate"}
 }
 \value{
 list with: metrics (data.frame), aligned (data.frame), plot (ggplot)
 }
 \description{
-Loads model output and observations, aligns by time,
-computes RMSE and MAE, and returns a results table with a plot.
+Takes two validated dataframes, aligns by time,
+computes metrics, and returns a results table with a plot.
 }
 \author{
 Anshul Jain

From b1c6a875efef1f79e14059b8a7427f3eb9c6747b Mon Sep 17 00:00:00 2001
From: anshul23102 <anshul23102@iiitd.ac.in>
Date: Wed, 1 Apr 2026 13:16:23 +0530
Subject: [PATCH 9/9] fix(benchmark): use .data$ in aes() to fix R CMD check
 NOTE

---
 modules/benchmark/R/run_benchmark.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/benchmark/R/run_benchmark.R b/modules/benchmark/R/run_benchmark.R
index 9897b59b015..03128bd4cd7 100644
--- a/modules/benchmark/R/run_benchmark.R
+++ b/modules/benchmark/R/run_benchmark.R
@@ -91,9 +91,9 @@ compute_metrics <- function(aligned, metrics = c("RMSE", "MAE")) {
 ##' @param aligned data.frame with columns: time, model, obs
 ##' @return ggplot object
 plot_time_series <- function(aligned) {
-  ggplot2::ggplot(aligned, ggplot2::aes(x = time)) +
-    ggplot2::geom_line(ggplot2::aes(y = model, color = "Model")) +
-    ggplot2::geom_line(ggplot2::aes(y = obs,   color = "Obs")) +
+  ggplot2::ggplot(aligned, ggplot2::aes(x = .data$time)) +
+    ggplot2::geom_line(ggplot2::aes(y = .data$model, color = "Model")) +
+    ggplot2::geom_line(ggplot2::aes(y = .data$obs, color = "Obs")) +
     ggplot2::labs(color = "", y = "value", title = "Model vs Observations") +
     ggplot2::theme_bw()
 }