From 2559a3aca53637bf11959b8ef934dbc132049f4f Mon Sep 17 00:00:00 2001 From: TANMAY <105608756+tanmaydimriGSOC@users.noreply.github.com> Date: Fri, 20 Mar 2026 00:39:10 +0530 Subject: [PATCH 1/2] Add unit tests for benchmarking metrics and document metric_R2 fallback --- modules/benchmark/R/metric_R2.R | 24 +++++++-- .../benchmark/tests/testthat/test-metrics.R | 49 +++++++++++++++++++ 2 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 modules/benchmark/tests/testthat/test-metrics.R diff --git a/modules/benchmark/R/metric_R2.R b/modules/benchmark/R/metric_R2.R index 94e5df7677b..8b4a13e0e6e 100644 --- a/modules/benchmark/R/metric_R2.R +++ b/modules/benchmark/R/metric_R2.R @@ -1,11 +1,23 @@ ##' @name metric_R2 ##' @title Coefficient of Determination (R2) ##' @export -##' @param metric_dat dataframe +##' @param metric_dat dataframe with columns \code{model} and \code{obvs} ##' @param ... ignored -##' +##' +##' @details +##' Computes R-squared using the correlation-based formula: +##' \eqn{R^2 = \left(\frac{\sum(obs - \bar{obs})(mod - \bar{mod})} +##' {\sqrt{\sum(obs - \bar{obs})^2} \cdot \sqrt{\sum(mod - \bar{mod})^2}}\right)^2} +##' +##' If this formula returns \code{NA} (e.g. when model output is constant +##' across all observations), the function silently falls back to an +##' \code{lm()}-based R-squared via \code{summary(lm())$r.squared}. +##' This fallback may produce unreliable results and triggers a warning +##' from \code{stats::summary.lm}: "essentially perfect fit: summary may +##' be unreliable". Consider checking for constant model output before +##' calling this function. +##' ##' @author Betsy Cowdery - metric_R2 <- function(metric_dat, ...) { PEcAn.logger::logger.info("Metric: Coefficient of Determination (R2)") numer <- sum((metric_dat$obvs - mean(metric_dat$obvs)) * (metric_dat$model - mean(metric_dat$model))) @@ -13,6 +25,10 @@ metric_R2 <- function(metric_dat, ...) { out <- (numer / denom) ^ 2 + # If correlation formula returns NA (e.g. constant model output), + # fall back to lm()-based R-squared. Note: this fallback may trigger + # "essentially perfect fit" warning from stats::summary.lm and + # produce unreliable results in edge cases. if(is.na(out)){ fit <- stats::lm(metric_dat$model ~ metric_dat$obvs) out <- summary(fit)$r.squared @@ -20,4 +36,4 @@ metric_R2 <- function(metric_dat, ...) { return(out) -} # metric_R2 +} # metric_R2 \ No newline at end of file diff --git a/modules/benchmark/tests/testthat/test-metrics.R b/modules/benchmark/tests/testthat/test-metrics.R new file mode 100644 index 00000000000..a5266cc2763 --- /dev/null +++ b/modules/benchmark/tests/testthat/test-metrics.R @@ -0,0 +1,49 @@ +# at the top of test-metrics.R, add this: +if (!requireNamespace("PEcAn.logger", quietly = TRUE)) { + PEcAn.logger <- new.env() + PEcAn.logger$logger.info <- function(...) invisible(NULL) +} + +test_that("metric_RMSE returns 0 for perfect predictions", { + dat <- data.frame(model = c(1, 2, 3), obvs = c(1, 2, 3)) + expect_equal(metric_RMSE(dat), 0) +}) + +test_that("metric_RMSE handles NA values", { + dat <- data.frame(model = c(1, NA, 3), obvs = c(1, 2, 3)) + expect_true(is.numeric(metric_RMSE(dat))) +}) + +test_that("metric_RMSE returns numeric", { + dat <- data.frame(model = c(2, 4), obvs = c(1, 3)) + expect_equal(metric_RMSE(dat), 1) +}) + +test_that("metric_MAE returns 0 for perfect predictions", { + dat <- data.frame(model = c(1, 2, 3), obvs = c(1, 2, 3)) + expect_equal(metric_MAE(dat), 0) +}) + +test_that("metric_MAE returns correct value", { + dat <- data.frame(model = c(3, 3), obvs = c(1, 1)) + expect_equal(metric_MAE(dat), 2) +}) + +test_that("metric_cor returns 1 for perfect linear relationship", { + dat <- data.frame(model = c(1, 2, 3), obvs = c(1, 2, 3)) + expect_equal(metric_cor(dat), 1) +}) + +test_that("metric_R2 returns 1 for perfect predictions", { + dat <- data.frame(model = c(1, 2, 3), obvs = c(1, 2, 3)) + expect_equal(metric_R2(dat), 1) +}) + +test_that("metric_R2 silent NA fallback produces valid output", { + dat <- data.frame(model = c(2, 2, 2), obvs = c(1, 2, 3)) + expect_warning( + result <- metric_R2(dat), + "essentially perfect fit" + ) + expect_true(is.numeric(result)) +}) \ No newline at end of file From 08e9e7bda414ea464837bc4f94fdcd46332f8414 Mon Sep 17 00:00:00 2001 From: TANMAY <105608756+tanmaydimriGSOC@users.noreply.github.com> Date: Tue, 31 Mar 2026 13:46:20 +0530 Subject: [PATCH 2/2] Update metric_R2.Rd to match roxygen documentation --- modules/benchmark/man/metric_R2.Rd | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/modules/benchmark/man/metric_R2.Rd b/modules/benchmark/man/metric_R2.Rd index 65b1feb7fea..41f97646194 100644 --- a/modules/benchmark/man/metric_R2.Rd +++ b/modules/benchmark/man/metric_R2.Rd @@ -7,13 +7,26 @@ metric_R2(metric_dat, ...) } \arguments{ -\item{metric_dat}{dataframe} +\item{metric_dat}{dataframe with columns \code{model} and \code{obvs}} \item{...}{ignored} } \description{ Coefficient of Determination (R2) } +\details{ +Computes R-squared using the correlation-based formula: +\eqn{R^2 = \left(\frac{\sum(obs - \bar{obs})(mod - \bar{mod})} +{\sqrt{\sum(obs - \bar{obs})^2} \cdot \sqrt{\sum(mod - \bar{mod})^2}}\right)^2} + +If this formula returns \code{NA} (e.g. when model output is constant +across all observations), the function silently falls back to an +\code{lm()}-based R-squared via \code{summary(lm())$r.squared}. +This fallback may produce unreliable results and triggers a warning +from \code{stats::summary.lm}: "essentially perfect fit: summary may +be unreliable". Consider checking for constant model output before +calling this function. +} \author{ Betsy Cowdery -} +} \ No newline at end of file