diff --git a/DESCRIPTION b/DESCRIPTION index 03cd5cc85..8c77e9321 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -59,7 +59,6 @@ Imports: data.table (>= 1.16.0), ggplot2 (>= 3.4.0), methods, - Metrics, purrr, scoringRules (>= 1.1.3), stats @@ -67,6 +66,7 @@ Suggests: ggdist, kableExtra, knitr, + Metrics, rmarkdown, testthat (>= 3.1.9), vdiffr diff --git a/NAMESPACE b/NAMESPACE index 151aa6149..414b9baf8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -109,9 +109,6 @@ export(transform_forecasts) export(underprediction_quantile) export(underprediction_sample) export(wis) -importFrom(Metrics,ae) -importFrom(Metrics,ape) -importFrom(Metrics,se) importFrom(checkmate,assert) importFrom(checkmate,assert_character) importFrom(checkmate,assert_class) diff --git a/R/class-forecast-point.R b/R/class-forecast-point.R index 8635b25b9..820b8c39c 100644 --- a/R/class-forecast-point.R +++ b/R/class-forecast-point.R @@ -75,7 +75,6 @@ is_forecast_point <- function(x) { } -#' @importFrom Metrics se ae ape #' @importFrom stats na.omit #' @importFrom data.table setattr copy #' @rdname score @@ -100,9 +99,9 @@ score.forecast_point <- function(forecast, metrics = get_metrics(forecast), ...) #' #' @description #' For point forecasts, the default scoring rules are: -#' - "ae_point" = [ae()][Metrics::ae()] -#' - "se_point" = [se()][Metrics::se()] -#' - "ape" = [ape()][Metrics::ape()] +#' - "ae_point" = absolute error +#' - "se_point" = squared error +#' - "ape" = absolute percentage error #' #' A note of caution: Every scoring rule for a point forecast #' is implicitly minimised by a specific aspect of the predictive distribution @@ -145,9 +144,9 @@ score.forecast_point <- function(forecast, metrics = get_metrics(forecast), ...) #' Journal of the American Statistical Association. get_metrics.forecast_point <- function(x, select = NULL, exclude = NULL, ...) { all <- list( - ae_point = Metrics::ae, - se_point = Metrics::se, - ape = Metrics::ape + ae_point = function(actual, predicted) abs(actual - predicted), + se_point = function(actual, predicted) (actual - predicted)^2, + ape = function(actual, predicted) abs(actual - predicted) / abs(actual) ) select_metrics(all, select, exclude) } diff --git a/man/get_metrics.forecast_point.Rd b/man/get_metrics.forecast_point.Rd index 1d34200e5..77b4b0c25 100644 --- a/man/get_metrics.forecast_point.Rd +++ b/man/get_metrics.forecast_point.Rd @@ -21,9 +21,9 @@ If \code{select} is not \code{NULL}, this argument is ignored.} \description{ For point forecasts, the default scoring rules are: \itemize{ -\item "ae_point" = \link[Metrics:ae]{ae()} -\item "se_point" = \link[Metrics:se]{se()} -\item "ape" = \link[Metrics:ape]{ape()} +\item "ae_point" = absolute error +\item "se_point" = squared error +\item "ape" = absolute percentage error } A note of caution: Every scoring rule for a point forecast diff --git a/tests/testthat/test-class-forecast-point.R b/tests/testthat/test-class-forecast-point.R index fb2fd161d..e2a5ea6f4 100644 --- a/tests/testthat/test-class-forecast-point.R +++ b/tests/testthat/test-class-forecast-point.R @@ -107,3 +107,53 @@ test_that("get_metrics.forecast_point() works as expected", { c("ae_point", "se_point", "ape") ) }) + +test_that("internal ae replacement produces identical results", { + observed <- c(1, -15, 22, 0, 5.5) + predicted <- c(5, 6, 7, 0, 5.5) + ae_fn <- get_metrics(example_point, select = "ae_point")[[1]] + expect_identical(ae_fn(observed, predicted), abs(observed - predicted)) + expect_identical(ae_fn(5, 5), 0) + expect_identical(ae_fn(-10, 5), 15) +}) + +test_that("internal se replacement produces identical results", { + observed <- c(1, -15, 22, 0, 5.5) + predicted <- c(5, 6, 7, 0, 5.5) + se_fn <- get_metrics(example_point, select = "se_point")[[1]] + expect_identical(se_fn(observed, predicted), (observed - predicted)^2) + expect_identical(se_fn(5, 5), 0) + expect_identical(se_fn(-10, 5), 225) +}) + +test_that("internal ape replacement produces identical results", { + observed <- c(1, -15, 22, 5.5, 100) + predicted <- c(5, 6, 7, 0, 100) + ape_fn <- get_metrics(example_point, select = "ape")[[1]] + expect_equal(ape_fn(observed, predicted), abs(observed - predicted) / abs(observed)) + expect_identical(ape_fn(5, 5), 0) + expect_identical(ape_fn(0, 5), Inf) +}) + +test_that("Metrics package is not in DESCRIPTION Imports", { + desc_text <- readLines(system.file("DESCRIPTION", package = "scoringutils")) + imports_lines <- desc_text[grepl("^Imports:|^\\s+Metrics", desc_text)] + expect_false(any(grepl("\\bMetrics\\b", imports_lines))) +}) + +test_that("score() with point forecasts produces correct results after Metrics removal", { + scores <- score(example_point) + input <- na.omit(as.data.table(example_point)) + expect_equal(scores$ae_point, abs(input$observed - input$predicted)) + expect_equal(scores$se_point, (input$observed - input$predicted)^2) + expect_equal(scores$ape, abs(input$observed - input$predicted) / abs(input$observed)) + expect_true(all(c("ae_point", "se_point", "ape") %in% colnames(scores))) +}) + +test_that("get_metrics.forecast_point() returns expected functions", { + metrics <- get_metrics(example_point) + expect_type(metrics, "list") + expect_named(metrics, c("ae_point", "se_point", "ape")) + expect_true(all(vapply(metrics, is.function, logical(1)))) + expect_true(all(vapply(metrics, function(f) length(formals(f)) == 2, logical(1)))) +}) diff --git a/vignettes/scoring-rules.Rmd b/vignettes/scoring-rules.Rmd index f07e32a2f..292782526 100644 --- a/vignettes/scoring-rules.Rmd +++ b/vignettes/scoring-rules.Rmd @@ -58,11 +58,11 @@ observed <- rnorm(n, 5, 4)^2 predicted_mu <- mean(observed) predicted_not_mu <- predicted_mu - rnorm(n, 10, 2) -mean(Metrics::ae(observed, predicted_mu)) -mean(Metrics::ae(observed, predicted_not_mu)) +mean(abs(observed - predicted_mu)) +mean(abs(observed - predicted_not_mu)) -mean(Metrics::se(observed, predicted_mu)) -mean(Metrics::se(observed, predicted_not_mu)) +mean((observed - predicted_mu)^2) +mean((observed - predicted_not_mu)^2) ``` @@ -73,7 +73,7 @@ mean(Metrics::se(observed, predicted_not_mu)) **Forecast**: $\hat{y}$, a real number, the median of the forecaster's predictive distribution. -The absolute error is the absolute difference between the predicted and the observed values. See `?Metrics::ae`. +The absolute error is the absolute difference between the predicted and the observed values. $$\text{ae} = |y - \hat{y}|$$ @@ -85,7 +85,7 @@ The absolute error is only an appropriate rule if $\hat{y}$ corresponds to the m **Forecast**: $\hat{y}$, a real number, the mean of the forecaster's predictive distribution. -The squared error is the squared difference between the predicted and the observed values. See `?Metrics::se`. +The squared error is the squared difference between the predicted and the observed values. $$\text{se} = (y - \hat{y})^2$$ The squared error is only an appropriate rule if $\hat{y}$ corresponds to the mean of the forecaster's predictive distribution. Otherwise, results will be misleading (see @gneitingMakingEvaluatingPoint2011). @@ -96,7 +96,7 @@ The squared error is only an appropriate rule if $\hat{y}$ corresponds to the me **Forecast**: $\hat{y}$, a real number -The absolute percentage error is the absolute percent difference between the predicted and the observed values. See `?Metrics::ape`. +The absolute percentage error is the absolute percent difference between the predicted and the observed values. $$\text{ape} = \frac{|y - \hat{y}|}{|y|}$$