From b7105dd803f708d5a4d5300d7b2ccbbbc4961fd8 Mon Sep 17 00:00:00 2001 From: nikosbosse Date: Fri, 13 Feb 2026 05:21:05 +0100 Subject: [PATCH] Fixes #763: Warn when binary factor levels are in counterintuitive order Co-Authored-By: Claude Opus 4.6 --- R/metrics-binary.R | 26 +++++++++ tests/testthat/test-class-forecast-binary.R | 21 ++++++++ tests/testthat/test-metrics-binary.R | 58 +++++++++++++++++++++ 3 files changed, 105 insertions(+) diff --git a/R/metrics-binary.R b/R/metrics-binary.R index 735a300fd..af3d55dbc 100644 --- a/R/metrics-binary.R +++ b/R/metrics-binary.R @@ -19,6 +19,32 @@ assert_input_binary <- function(observed, predicted) { assert_factor(observed, n.levels = 2, min.len = 1) assert_numeric(predicted, lower = 0, upper = 1) assert_dims_ok_point(observed, predicted) + + # Warn if factor levels appear to be in counterintuitive order. + # Predictions represent P(outcome = highest factor level). If the levels + # are e.g. c("1", "0"), the highest level is "0", meaning predictions are + # interpreted as P(outcome = "0"), which is almost certainly unintended. + lvls <- levels(observed) + counterintuitive <- FALSE + if (setequal(lvls, c("0", "1")) && lvls[1] == "1") { + counterintuitive <- TRUE + } else if (setequal(lvls, c("TRUE", "FALSE")) && lvls[1] == "TRUE") { + counterintuitive <- TRUE + } + if (counterintuitive) { + #nolint start: keyword_quote_linter + cli_warn(c( + "!" = "Factor levels of {.var observed} appear to be in + counterintuitive order: {.val {lvls}}.", + "i" = "Predictions will be interpreted as the probability of + observing {.val {lvls[2]}} (the highest factor level).", + "i" = "If this is not intended, consider reordering the factor levels, + e.g. {.code factor(observed, levels = c({.val {lvls[2]}}, + {.val {lvls[1]}}))}" + )) + #nolint end + } + return(invisible(NULL)) } diff --git a/tests/testthat/test-class-forecast-binary.R b/tests/testthat/test-class-forecast-binary.R index cb95e1d23..f188cd88a 100644 --- a/tests/testthat/test-class-forecast-binary.R +++ b/tests/testthat/test-class-forecast-binary.R @@ -47,6 +47,27 @@ test_that("assert_forecast.forecast_binary works as expected", { +test_that("as_forecast_binary() warns when data has reversed 0/1 factor levels", { + dt <- data.table( + model = "m1", + id = 1:4, + observed = factor(c(0, 1, 1, 0), levels = c("1", "0")), + predicted = c(0.1, 0.9, 0.8, 0.2) + ) + expect_warning( + as_forecast_binary(dt), + "counterintuitive" + ) +}) + +test_that("score() produces correct results with standard 0/1 factor levels", { + # example_binary has standard levels c("0", "1"), should not warn about levels + expect_no_warning( + suppressMessages(score(as_forecast_binary(example_binary))) + ) +}) + + # ============================================================================== # score.forecast_binary() # nolint: commented_code_linter # ============================================================================== diff --git a/tests/testthat/test-metrics-binary.R b/tests/testthat/test-metrics-binary.R index 650afe99f..54086a0f7 100644 --- a/tests/testthat/test-metrics-binary.R +++ b/tests/testthat/test-metrics-binary.R @@ -106,6 +106,64 @@ test_that("function throws an error for wrong input formats", { }) +# ============================================================================== +# Test factor level order warning +# ============================================================================== +test_that("assert_input_binary() warns when 0/1 factor levels are in counterintuitive order", { + observed_rev <- factor(c(0, 1, 1, 0, 1), levels = c("1", "0")) + predicted_rev <- c(0.1, 0.9, 0.8, 0.2, 0.7) + expect_warning( + assert_input_binary(observed_rev, predicted_rev), + "counterintuitive" + ) +}) + +test_that("assert_input_binary() does not warn for standard 0/1 level order", { + observed_std <- factor(c(0, 1, 1, 0, 1), levels = c("0", "1")) + predicted_std <- c(0.1, 0.9, 0.8, 0.2, 0.7) + expect_no_warning(assert_input_binary(observed_std, predicted_std)) +}) + +test_that("assert_input_binary() does not warn for non-numeric factor levels", { + observed_ab <- factor(c("a", "b", "b", "a"), levels = c("a", "b")) + predicted_ab <- c(0.3, 0.7, 0.6, 0.4) + expect_no_warning(assert_input_binary(observed_ab, predicted_ab)) +}) + +test_that("brier_score() produces different results with reversed factor levels", { + observed_correct <- factor(c(0, 1, 1, 0), levels = c("0", "1")) + observed_reversed <- factor(c(0, 1, 1, 0), levels = c("1", "0")) + predicted_bs <- c(0.1, 0.9, 0.8, 0.2) + + scores_correct <- brier_score(observed_correct, predicted_bs) + expect_equal(scores_correct, c(0.01, 0.01, 0.04, 0.04)) # nolint: expect_identical_linter + + expect_warning( + scores_reversed <- brier_score(observed_reversed, predicted_bs), + "counterintuitive" + ) + expect_false(all(scores_correct == scores_reversed)) +}) + +test_that("logs_binary() warns with reversed 0/1 factor levels", { + observed_reversed <- factor(c(0, 1, 1, 0), levels = c("1", "0")) + predicted_lb <- c(0.1, 0.9, 0.8, 0.2) + expect_warning( + logs_binary(observed_reversed, predicted_lb), + "counterintuitive" + ) +}) + +test_that("assert_input_binary() warns for TRUE/FALSE levels in counterintuitive order", { + observed_tf <- factor(c(TRUE, FALSE, TRUE), levels = c("TRUE", "FALSE")) + predicted_tf <- c(0.8, 0.2, 0.9) + expect_warning( + assert_input_binary(observed_tf, predicted_tf), + "counterintuitive" + ) +}) + + # ============================================================================== # Test Binary Metrics # ==============================================================================