Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 57 additions & 28 deletions R/get-coverage.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,65 +35,94 @@
#' (can be either interval or quantile coverage) and the
#' actual coverage. For example, if the desired coverage is 90% and the actual
#' coverage is 80%, the coverage deviation is -0.1.
#' @return
#' A data.table with columns as specified in `by` and additional
#' columns for the coverage values described above
#' @inheritParams score
#' @param by character vector that denotes the level of grouping for which the
#' coverage values should be computed. By default (`"model"`), one coverage
#' value per model will be returned.
#' @param type character vector indicating which types of coverage to compute.
#' Must be a subset of `c("quantile", "interval")`. By default, both are
#' computed.
#' @return
#' a data.table with columns "interval_coverage",
#' a data.table with the columns specified in `by` and additional columns
#' for the requested coverage values. When both types are requested (the
#' default), this includes "interval_coverage",
#' "interval_coverage_deviation", "quantile_coverage",
#' "quantile_coverage_deviation" and the columns specified in `by`.
#' "quantile_coverage_deviation", "quantile_level", and "interval_range".
#' @importFrom data.table setcolorder
#' @importFrom checkmate assert_subset
#' @examples
#' example_quantile |>
#' as_forecast_quantile() |>
#' get_coverage(by = "model")
#'
#' # only interval coverage
#' example_quantile |>
#' as_forecast_quantile() |>
#' get_coverage(by = "model", type = "interval")
#' @export
#' @keywords scoring
#' @export
get_coverage <- function(forecast, by = "model") {
get_coverage <- function(forecast, by = "model",
type = c("quantile", "interval")) {
# input checks ---------------------------------------------------------------
forecast <- clean_forecast(forecast, copy = TRUE, na.omit = TRUE)
assert_subset(get_forecast_type(forecast), "quantile")
assert_subset(type, c("quantile", "interval"), empty.ok = FALSE)

# remove "quantile_level" and "interval_range" from `by` if present, as these
# are included anyway
by <- setdiff(by, c("quantile_level", "interval_range"))
assert_subset(by, names(forecast))

# convert to wide interval format and compute interval coverage --------------
interval_forecast <- quantile_to_interval(forecast, format = "wide")
interval_forecast[,
interval_coverage := (observed <= upper) & (observed >= lower)
][, c("lower", "upper", "observed") := NULL]
interval_forecast[, interval_coverage_deviation :=
interval_coverage - interval_range / 100]

# merge interval range data with original data -------------------------------
# preparations
forecast[, interval_range := get_range_from_quantile(quantile_level)]
forecast_cols <- colnames(forecast) # store so we can reset column order later
compute_interval <- "interval" %in% type
compute_quantile <- "quantile" %in% type

forecast_unit <- get_forecast_unit(forecast)

forecast <- merge(forecast, interval_forecast,
by = unique(c(forecast_unit, "interval_range")))
if (compute_interval) {
# convert to wide interval format and compute interval coverage ------------
interval_forecast <- quantile_to_interval(forecast, format = "wide")
interval_forecast[,
interval_coverage := (observed <= upper) & (observed >= lower)
][, c("lower", "upper", "observed") := NULL]
interval_forecast[, interval_coverage_deviation :=
interval_coverage - interval_range / 100]

# compute quantile coverage and deviation ------------------------------------
forecast[, quantile_coverage := observed <= predicted]
forecast[, quantile_coverage_deviation := quantile_coverage - quantile_level]
# merge interval range data with original data -----------------------------
forecast[, interval_range := get_range_from_quantile(quantile_level)]
forecast <- merge(forecast, interval_forecast,
by = unique(c(forecast_unit, "interval_range")))
}

if (compute_quantile) {
# compute quantile coverage and deviation ----------------------------------
forecast[, quantile_coverage := observed <= predicted]
forecast[, quantile_coverage_deviation := quantile_coverage - quantile_level]
}

# summarise coverage values according to `by` and cleanup --------------------
# reset column order
new_metrics <- c("interval_coverage", "interval_coverage_deviation",
"quantile_coverage", "quantile_coverage_deviation")
setcolorder(forecast, unique(c(forecast_cols, "interval_range", new_metrics)))
# remove forecast class and convert to regular data.table
forecast <- as.data.table(forecast)
by <- unique(c(by, "quantile_level", "interval_range"))

new_metrics <- c()
by_extra <- c()
if (compute_interval) {
new_metrics <- c(new_metrics,
"interval_coverage", "interval_coverage_deviation")
by_extra <- c(by_extra, "interval_range")
}
if (compute_quantile) {
new_metrics <- c(new_metrics,
"quantile_coverage", "quantile_coverage_deviation")
by_extra <- c(by_extra, "quantile_level")
}

by <- unique(c(by, by_extra))

# only keep relevant columns before summarising
keep_cols <- unique(c(by, new_metrics))
forecast <- forecast[, .SD, .SDcols = intersect(names(forecast), keep_cols)]

# summarise
forecast <- forecast[, lapply(.SD, mean), by = by, .SDcols = new_metrics]
return(forecast[])
Expand Down
20 changes: 14 additions & 6 deletions man/get_coverage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

117 changes: 117 additions & 0 deletions tests/testthat/test-get-coverage.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,123 @@
expect_identical(cov, cov2)
})

test_that("get_coverage() with type = 'interval' returns only interval coverage columns", {
cov <- get_coverage(example_quantile, by = "model", type = "interval")
expect_s3_class(cov, c("data.table", "data.frame"), exact = TRUE)
expect_true("interval_coverage" %in% names(cov))
expect_true("interval_coverage_deviation" %in% names(cov))
expect_false("quantile_coverage" %in% names(cov))
expect_false("quantile_coverage_deviation" %in% names(cov))
expect_true("interval_range" %in% names(cov))
expect_true("model" %in% names(cov))
expect_true(all(cov$interval_coverage >= 0 & cov$interval_coverage <= 1,
na.rm = TRUE))
})

test_that("get_coverage() with type = 'quantile' returns only quantile coverage columns", {
cov <- get_coverage(example_quantile, by = "model", type = "quantile")
expect_s3_class(cov, c("data.table", "data.frame"), exact = TRUE)
expect_true("quantile_coverage" %in% names(cov))
expect_true("quantile_coverage_deviation" %in% names(cov))
expect_false("interval_coverage" %in% names(cov))
expect_false("interval_coverage_deviation" %in% names(cov))
expect_true("quantile_level" %in% names(cov))
expect_true("model" %in% names(cov))
expect_true(all(cov$quantile_coverage >= 0 & cov$quantile_coverage <= 1))
expect_false("interval_range" %in% names(cov))
})

test_that("get_coverage() with type = c('quantile', 'interval') returns both (default behavior)", {
cov_default <- get_coverage(example_quantile, by = "model")
cov_explicit <- get_coverage(example_quantile, by = "model",
type = c("quantile", "interval"))
expect_identical(cov_default, cov_explicit)
expect_true(all(c("interval_coverage", "interval_coverage_deviation",
"quantile_coverage", "quantile_coverage_deviation")

Check warning on line 88 in tests/testthat/test-get-coverage.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=tests/testthat/test-get-coverage.R,line=88,col=21,[indentation_linter] Hanging indent should be 20 spaces but is 21 spaces.
%in% names(cov_default)))
expect_true(all(c("quantile_level", "interval_range") %in% names(cov_default)))
})

test_that("get_coverage() with type = 'interval' produces correct coverage values", {
test_data <- data.table::data.table(
model = rep("model1", 4),
target_type = rep("Cases", 4),
location = rep(c("A", "B"), each = 2),
target_end_date = as.Date(rep("2021-01-01", 4)),
forecast_date = as.Date(rep("2020-12-20", 4)),
quantile_level = rep(c(0.25, 0.75), 2),
predicted = c(10, 20, 10, 20),
observed = c(15, 15, 25, 25)
)
test_data <- as_forecast_quantile(test_data)
cov <- get_coverage(test_data, by = get_forecast_unit(test_data),
type = "interval")
# location A: observed=15 inside [10,20] -> coverage=1
expect_equal(cov[grepl("A", location)]$interval_coverage, 1)

Check warning on line 108 in tests/testthat/test-get-coverage.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=tests/testthat/test-get-coverage.R,line=108,col=26,[fixed_regex_linter] Use "A" with fixed = TRUE here. This regular expression is static, i.e., its matches can be expressed as a fixed substring expression, which is faster to compute.
# location B: observed=25 outside [10,20] -> coverage=0
expect_equal(cov[grepl("B", location)]$interval_coverage, 0)

Check warning on line 110 in tests/testthat/test-get-coverage.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=tests/testthat/test-get-coverage.R,line=110,col=26,[fixed_regex_linter] Use "B" with fixed = TRUE here. This regular expression is static, i.e., its matches can be expressed as a fixed substring expression, which is faster to compute.
# interval_coverage_deviation = interval_coverage - interval_range/100
expect_equal(cov$interval_coverage_deviation,
cov$interval_coverage - cov$interval_range / 100)
expect_false("quantile_coverage" %in% names(cov))
})

test_that("get_coverage() with type = 'quantile' produces correct coverage values", {
test_data <- data.table::data.table(
model = rep("model1", 3),
target_type = rep("Cases", 3),
location = rep("A", 3),
target_end_date = as.Date(rep("2021-01-01", 3)),
forecast_date = as.Date(rep("2020-12-20", 3)),
quantile_level = c(0.25, 0.5, 0.75),
predicted = c(10, 15, 20),
observed = rep(12, 3)
)
test_data <- as_forecast_quantile(test_data)
cov <- get_coverage(test_data, by = get_forecast_unit(test_data),
type = "quantile")
# 12 <= 10 -> FALSE -> 0
expect_equal(cov[quantile_level == 0.25]$quantile_coverage, 0)
# 12 <= 15 -> TRUE -> 1
expect_equal(cov[quantile_level == 0.5]$quantile_coverage, 1)
# 12 <= 20 -> TRUE -> 1
expect_equal(cov[quantile_level == 0.75]$quantile_coverage, 1)
# quantile_coverage_deviation = quantile_coverage - quantile_level
expect_equal(cov$quantile_coverage_deviation,
cov$quantile_coverage - cov$quantile_level)
expect_false("interval_coverage" %in% names(cov))
})

test_that("get_coverage() errors for invalid type argument", {
expect_error(get_coverage(example_quantile, type = "invalid"))
expect_error(get_coverage(example_quantile, type = c("quantile", "invalid")))
expect_error(get_coverage(example_quantile, type = NULL))
})

test_that("get_coverage() with type = 'interval' works with non-symmetric prediction intervals", {
test <- data.table::copy(example_quantile)
test <- test[!quantile_level %in% c(0.2, 0.3, 0.5)]
cov <- expect_no_condition(get_coverage(test, type = "interval"))

prediction_intervals <- get_range_from_quantile(c(0.2, 0.3, 0.5))
missing <- cov[interval_range %in% prediction_intervals]
not_missing <- cov[!interval_range %in% prediction_intervals]

expect_true(all(is.na(missing$interval_coverage)))
expect_false(anyNA(not_missing))
expect_false("quantile_coverage" %in% names(cov))
})

test_that("get_coverage() with type = 'quantile' summarises correctly with by argument", {
cov <- get_coverage(example_quantile, by = "model", type = "quantile")
# should have one row per model + quantile_level
models <- unique(na.omit(example_quantile)$model)
quantile_levels <- unique(na.omit(example_quantile)$quantile_level)
expect_equal(nrow(cov), length(models) * length(quantile_levels))
expect_false("interval_range" %in% names(cov))
expect_true(all(cov$quantile_coverage >= 0 & cov$quantile_coverage <= 1))
})


# ==============================================================================
# plot_interval_coverage() # nolint: commented_code_linter
Expand Down
Loading