From a296a2329a80f37c8f88fd1d680e519cec7d9d9a Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 13 Feb 2026 05:43:55 +0100 Subject: [PATCH] Fixes #668: Add type argument to get_coverage() Co-Authored-By: Claude Opus 4.6 --- R/get-coverage.R | 85 ++++++++++++++------- man/get_coverage.Rd | 20 +++-- tests/testthat/test-get-coverage.R | 117 +++++++++++++++++++++++++++++ 3 files changed, 188 insertions(+), 34 deletions(-) diff --git a/R/get-coverage.R b/R/get-coverage.R index 66974e41c..71e3a1879 100644 --- a/R/get-coverage.R +++ b/R/get-coverage.R @@ -35,65 +35,94 @@ #' (can be either interval or quantile coverage) and the #' actual coverage. For example, if the desired coverage is 90% and the actual #' coverage is 80%, the coverage deviation is -0.1. -#' @return -#' A data.table with columns as specified in `by` and additional -#' columns for the coverage values described above #' @inheritParams score #' @param by character vector that denotes the level of grouping for which the #' coverage values should be computed. By default (`"model"`), one coverage #' value per model will be returned. +#' @param type character vector indicating which types of coverage to compute. +#' Must be a subset of `c("quantile", "interval")`. By default, both are +#' computed. #' @return -#' a data.table with columns "interval_coverage", +#' a data.table with the columns specified in `by` and additional columns +#' for the requested coverage values. When both types are requested (the +#' default), this includes "interval_coverage", #' "interval_coverage_deviation", "quantile_coverage", -#' "quantile_coverage_deviation" and the columns specified in `by`. +#' "quantile_coverage_deviation", "quantile_level", and "interval_range". #' @importFrom data.table setcolorder #' @importFrom checkmate assert_subset #' @examples #' example_quantile |> #' as_forecast_quantile() |> #' get_coverage(by = "model") +#' +#' # only interval coverage +#' example_quantile |> +#' as_forecast_quantile() |> +#' get_coverage(by = "model", type = "interval") #' @export #' @keywords scoring #' @export -get_coverage <- function(forecast, by = "model") { +get_coverage <- function(forecast, by = "model", + type = c("quantile", "interval")) { # input checks --------------------------------------------------------------- forecast <- clean_forecast(forecast, copy = TRUE, na.omit = TRUE) assert_subset(get_forecast_type(forecast), "quantile") + assert_subset(type, c("quantile", "interval"), empty.ok = FALSE) # remove "quantile_level" and "interval_range" from `by` if present, as these # are included anyway by <- setdiff(by, c("quantile_level", "interval_range")) assert_subset(by, names(forecast)) - # convert to wide interval format and compute interval coverage -------------- - interval_forecast <- quantile_to_interval(forecast, format = "wide") - interval_forecast[, - interval_coverage := (observed <= upper) & (observed >= lower) - ][, c("lower", "upper", "observed") := NULL] - interval_forecast[, interval_coverage_deviation := - interval_coverage - interval_range / 100] - - # merge interval range data with original data ------------------------------- - # preparations - forecast[, interval_range := get_range_from_quantile(quantile_level)] - forecast_cols <- colnames(forecast) # store so we can reset column order later + compute_interval <- "interval" %in% type + compute_quantile <- "quantile" %in% type + forecast_unit <- get_forecast_unit(forecast) - forecast <- merge(forecast, interval_forecast, - by = unique(c(forecast_unit, "interval_range"))) + if (compute_interval) { + # convert to wide interval format and compute interval coverage ------------ + interval_forecast <- quantile_to_interval(forecast, format = "wide") + interval_forecast[, + interval_coverage := (observed <= upper) & (observed >= lower) + ][, c("lower", "upper", "observed") := NULL] + interval_forecast[, interval_coverage_deviation := + interval_coverage - interval_range / 100] - # compute quantile coverage and deviation ------------------------------------ - forecast[, quantile_coverage := observed <= predicted] - forecast[, quantile_coverage_deviation := quantile_coverage - quantile_level] + # merge interval range data with original data ----------------------------- + forecast[, interval_range := get_range_from_quantile(quantile_level)] + forecast <- merge(forecast, interval_forecast, + by = unique(c(forecast_unit, "interval_range"))) + } + + if (compute_quantile) { + # compute quantile coverage and deviation ---------------------------------- + forecast[, quantile_coverage := observed <= predicted] + forecast[, quantile_coverage_deviation := quantile_coverage - quantile_level] + } # summarise coverage values according to `by` and cleanup -------------------- - # reset column order - new_metrics <- c("interval_coverage", "interval_coverage_deviation", - "quantile_coverage", "quantile_coverage_deviation") - setcolorder(forecast, unique(c(forecast_cols, "interval_range", new_metrics))) # remove forecast class and convert to regular data.table forecast <- as.data.table(forecast) - by <- unique(c(by, "quantile_level", "interval_range")) + + new_metrics <- c() + by_extra <- c() + if (compute_interval) { + new_metrics <- c(new_metrics, + "interval_coverage", "interval_coverage_deviation") + by_extra <- c(by_extra, "interval_range") + } + if (compute_quantile) { + new_metrics <- c(new_metrics, + "quantile_coverage", "quantile_coverage_deviation") + by_extra <- c(by_extra, "quantile_level") + } + + by <- unique(c(by, by_extra)) + + # only keep relevant columns before summarising + keep_cols <- unique(c(by, new_metrics)) + forecast <- forecast[, .SD, .SDcols = intersect(names(forecast), keep_cols)] + # summarise forecast <- forecast[, lapply(.SD, mean), by = by, .SDcols = new_metrics] return(forecast[]) diff --git a/man/get_coverage.Rd b/man/get_coverage.Rd index 68b900b93..c0f891b20 100644 --- a/man/get_coverage.Rd +++ b/man/get_coverage.Rd @@ -4,7 +4,7 @@ \alias{get_coverage} \title{Get quantile and interval coverage values for quantile-based forecasts} \usage{ -get_coverage(forecast, by = "model") +get_coverage(forecast, by = "model", type = c("quantile", "interval")) } \arguments{ \item{forecast}{A forecast object (a validated data.table with predicted and @@ -13,14 +13,17 @@ observed values).} \item{by}{character vector that denotes the level of grouping for which the coverage values should be computed. By default (\code{"model"}), one coverage value per model will be returned.} + +\item{type}{character vector indicating which types of coverage to compute. +Must be a subset of \code{c("quantile", "interval")}. By default, both are +computed.} } \value{ -A data.table with columns as specified in \code{by} and additional -columns for the coverage values described above - -a data.table with columns "interval_coverage", +a data.table with the columns specified in \code{by} and additional columns +for the requested coverage values. When both types are requested (the +default), this includes "interval_coverage", "interval_coverage_deviation", "quantile_coverage", -"quantile_coverage_deviation" and the columns specified in \code{by}. +"quantile_coverage_deviation", "quantile_level", and "interval_range". } \description{ For a validated forecast object in a quantile-based format @@ -64,5 +67,10 @@ coverage is 80\%, the coverage deviation is -0.1. example_quantile |> as_forecast_quantile() |> get_coverage(by = "model") + +# only interval coverage +example_quantile |> + as_forecast_quantile() |> + get_coverage(by = "model", type = "interval") } \keyword{scoring} diff --git a/tests/testthat/test-get-coverage.R b/tests/testthat/test-get-coverage.R index 13d992e06..92357ff59 100644 --- a/tests/testthat/test-get-coverage.R +++ b/tests/testthat/test-get-coverage.R @@ -53,6 +53,123 @@ test_that("get_coverage() can deal with non-symmetric prediction intervals", { expect_identical(cov, cov2) }) +test_that("get_coverage() with type = 'interval' returns only interval coverage columns", { + cov <- get_coverage(example_quantile, by = "model", type = "interval") + expect_s3_class(cov, c("data.table", "data.frame"), exact = TRUE) + expect_true("interval_coverage" %in% names(cov)) + expect_true("interval_coverage_deviation" %in% names(cov)) + expect_false("quantile_coverage" %in% names(cov)) + expect_false("quantile_coverage_deviation" %in% names(cov)) + expect_true("interval_range" %in% names(cov)) + expect_true("model" %in% names(cov)) + expect_true(all(cov$interval_coverage >= 0 & cov$interval_coverage <= 1, + na.rm = TRUE)) +}) + +test_that("get_coverage() with type = 'quantile' returns only quantile coverage columns", { + cov <- get_coverage(example_quantile, by = "model", type = "quantile") + expect_s3_class(cov, c("data.table", "data.frame"), exact = TRUE) + expect_true("quantile_coverage" %in% names(cov)) + expect_true("quantile_coverage_deviation" %in% names(cov)) + expect_false("interval_coverage" %in% names(cov)) + expect_false("interval_coverage_deviation" %in% names(cov)) + expect_true("quantile_level" %in% names(cov)) + expect_true("model" %in% names(cov)) + expect_true(all(cov$quantile_coverage >= 0 & cov$quantile_coverage <= 1)) + expect_false("interval_range" %in% names(cov)) +}) + +test_that("get_coverage() with type = c('quantile', 'interval') returns both (default behavior)", { + cov_default <- get_coverage(example_quantile, by = "model") + cov_explicit <- get_coverage(example_quantile, by = "model", + type = c("quantile", "interval")) + expect_identical(cov_default, cov_explicit) + expect_true(all(c("interval_coverage", "interval_coverage_deviation", + "quantile_coverage", "quantile_coverage_deviation") + %in% names(cov_default))) + expect_true(all(c("quantile_level", "interval_range") %in% names(cov_default))) +}) + +test_that("get_coverage() with type = 'interval' produces correct coverage values", { + test_data <- data.table::data.table( + model = rep("model1", 4), + target_type = rep("Cases", 4), + location = rep(c("A", "B"), each = 2), + target_end_date = as.Date(rep("2021-01-01", 4)), + forecast_date = as.Date(rep("2020-12-20", 4)), + quantile_level = rep(c(0.25, 0.75), 2), + predicted = c(10, 20, 10, 20), + observed = c(15, 15, 25, 25) + ) + test_data <- as_forecast_quantile(test_data) + cov <- get_coverage(test_data, by = get_forecast_unit(test_data), + type = "interval") + # location A: observed=15 inside [10,20] -> coverage=1 + expect_equal(cov[grepl("A", location)]$interval_coverage, 1) + # location B: observed=25 outside [10,20] -> coverage=0 + expect_equal(cov[grepl("B", location)]$interval_coverage, 0) + # interval_coverage_deviation = interval_coverage - interval_range/100 + expect_equal(cov$interval_coverage_deviation, + cov$interval_coverage - cov$interval_range / 100) + expect_false("quantile_coverage" %in% names(cov)) +}) + +test_that("get_coverage() with type = 'quantile' produces correct coverage values", { + test_data <- data.table::data.table( + model = rep("model1", 3), + target_type = rep("Cases", 3), + location = rep("A", 3), + target_end_date = as.Date(rep("2021-01-01", 3)), + forecast_date = as.Date(rep("2020-12-20", 3)), + quantile_level = c(0.25, 0.5, 0.75), + predicted = c(10, 15, 20), + observed = rep(12, 3) + ) + test_data <- as_forecast_quantile(test_data) + cov <- get_coverage(test_data, by = get_forecast_unit(test_data), + type = "quantile") + # 12 <= 10 -> FALSE -> 0 + expect_equal(cov[quantile_level == 0.25]$quantile_coverage, 0) + # 12 <= 15 -> TRUE -> 1 + expect_equal(cov[quantile_level == 0.5]$quantile_coverage, 1) + # 12 <= 20 -> TRUE -> 1 + expect_equal(cov[quantile_level == 0.75]$quantile_coverage, 1) + # quantile_coverage_deviation = quantile_coverage - quantile_level + expect_equal(cov$quantile_coverage_deviation, + cov$quantile_coverage - cov$quantile_level) + expect_false("interval_coverage" %in% names(cov)) +}) + +test_that("get_coverage() errors for invalid type argument", { + expect_error(get_coverage(example_quantile, type = "invalid")) + expect_error(get_coverage(example_quantile, type = c("quantile", "invalid"))) + expect_error(get_coverage(example_quantile, type = NULL)) +}) + +test_that("get_coverage() with type = 'interval' works with non-symmetric prediction intervals", { + test <- data.table::copy(example_quantile) + test <- test[!quantile_level %in% c(0.2, 0.3, 0.5)] + cov <- expect_no_condition(get_coverage(test, type = "interval")) + + prediction_intervals <- get_range_from_quantile(c(0.2, 0.3, 0.5)) + missing <- cov[interval_range %in% prediction_intervals] + not_missing <- cov[!interval_range %in% prediction_intervals] + + expect_true(all(is.na(missing$interval_coverage))) + expect_false(anyNA(not_missing)) + expect_false("quantile_coverage" %in% names(cov)) +}) + +test_that("get_coverage() with type = 'quantile' summarises correctly with by argument", { + cov <- get_coverage(example_quantile, by = "model", type = "quantile") + # should have one row per model + quantile_level + models <- unique(na.omit(example_quantile)$model) + quantile_levels <- unique(na.omit(example_quantile)$quantile_level) + expect_equal(nrow(cov), length(models) * length(quantile_levels)) + expect_false("interval_range" %in% names(cov)) + expect_true(all(cov$quantile_coverage >= 0 & cov$quantile_coverage <= 1)) +}) + # ============================================================================== # plot_interval_coverage() # nolint: commented_code_linter