From 7e8f631bda9d14741ff78403f9a9dba4b5280149 Mon Sep 17 00:00:00 2001 From: Mark Gascoyne Date: Tue, 10 Mar 2026 21:34:50 +0000 Subject: [PATCH 1/2] fix: interpolate sparse entity history data to prevent load inflation (#3545) SaaS instances record entity_history at 5-minute intervals, producing sparse cumulative dicts from clean_incrementing_reverse. When fill_load_from_power processes this sparse data, it treats the gaps between known data points as "zero periods" and fills them with power-integrated values, causing ~1.3-1.6x load energy inflation. Add interpolate_sparse_data() to linearly interpolate between known data points before fill_load_from_power runs, filling every minute index so no false gaps are detected. Midnight resets (>50% value drops) are handled by carrying forward instead of interpolating. Co-Authored-By: Claude Opus 4.6 --- apps/predbat/fetch.py | 6 +- .../tests/test_fill_load_from_power.py | 259 ++++++++++++++- .../tests/test_interpolate_sparse_data.py | 305 ++++++++++++++++++ apps/predbat/unit_test.py | 2 + apps/predbat/utils.py | 50 +++ 5 files changed, 618 insertions(+), 4 deletions(-) create mode 100644 apps/predbat/tests/test_interpolate_sparse_data.py diff --git a/apps/predbat/fetch.py b/apps/predbat/fetch.py index 7809232d0..1c19a1b2b 100644 --- a/apps/predbat/fetch.py +++ b/apps/predbat/fetch.py @@ -19,7 +19,7 @@ """ from datetime import datetime, timedelta -from utils import minutes_to_time, str2time, dp1, dp2, dp3, dp4, time_string_to_stamp, minute_data, get_now_from_cumulative +from utils import minutes_to_time, str2time, dp1, dp2, dp3, dp4, time_string_to_stamp, minute_data, get_now_from_cumulative, interpolate_sparse_data from const import MINUTE_WATT, PREDICT_STEP, TIME_FORMAT, PREDBAT_MODE_OPTIONS, PREDBAT_MODE_CONTROL_SOC, PREDBAT_MODE_CONTROL_CHARGEDISCHARGE, PREDBAT_MODE_CONTROL_CHARGE, PREDBAT_MODE_MONITOR from predbat_metrics import metrics from futurerate import FutureRate @@ -728,6 +728,8 @@ def fetch_sensor_data(self, save=True): if ("load_power" in self.args) and self.get_arg("load_power_fill_enable", True): # Use power data to make load data more accurate self.log("Using load_power data to fill gaps in load_today data") + # Interpolate sparse cumulative data to prevent false gap detection in fill_load_from_power + self.load_minutes = interpolate_sparse_data(self.load_minutes) load_power_data, _ = self.minute_data_load(self.now_utc, "load_power", self.max_days_previous, required_unit="W", load_scaling=1.0, interpolate=True) self.load_minutes = self.fill_load_from_power(self.load_minutes, load_power_data) else: @@ -741,6 +743,8 @@ def fetch_sensor_data(self, save=True): if ("load_power" in self.args) and self.get_arg("load_power_fill_enable", True): # Use power data to make load data more accurate self.log("Using load_power data to fill gaps in load_today data") + # Interpolate sparse cumulative data to prevent false gap detection in fill_load_from_power + self.load_minutes = interpolate_sparse_data(self.load_minutes) load_power_data, _ = self.minute_data_load(self.now_utc, "load_power", self.max_days_previous, required_unit="W", load_scaling=1.0, interpolate=True) self.load_minutes = self.fill_load_from_power(self.load_minutes, load_power_data) else: diff --git a/apps/predbat/tests/test_fill_load_from_power.py b/apps/predbat/tests/test_fill_load_from_power.py index a68c8e4a8..2f7f6c08b 100644 --- a/apps/predbat/tests/test_fill_load_from_power.py +++ b/apps/predbat/tests/test_fill_load_from_power.py @@ -305,6 +305,255 @@ def test_fill_load_from_power_backwards_time(): print("Test 6 PASSED") +def test_sparse_data_inflates_without_interpolation(): + """ + Regression test: Sparse 5-minute load data WITHOUT interpolation causes + fill_load_from_power to produce incorrect results. + + The key problem: Phase 2 of fill_load_from_power uses + `new_load_minutes.get(period_end + 1, ...)` to find the load at period + boundaries. With sparse data, most period boundary minutes are missing, + causing get() to return 0 or a value from a different period. This makes + `load_total = load_at_start - load_at_end` wildly incorrect: when + load_at_end falls on a missing minute and returns 0, load_total becomes + the entire cumulative value rather than just the period's consumption. + + With dense (interpolated) data, every minute has a correct cumulative + value, so period boundary lookups are always accurate. + """ + print("\n=== Test 7: Sparse data produces incorrect period totals (regression) ===") + + fetch = TestFetch() + + # Simulate sparse cumulative load data at 5-minute intervals over 90 minutes. + # Total energy consumed: 10.0 - 5.5 = 4.5 kWh over 90 minutes + sparse_load = {} + for m in range(0, 95, 5): + sparse_load[m] = 10.0 - (m / 90.0) * 4.5 + + # Power data: consistent 3 kW over 90 minutes (= 4.5 kWh, matches load) + load_power_data = {} + for m in range(0, 90): + load_power_data[m] = 3000.0 + + result = fetch.fill_load_from_power(sparse_load, load_power_data) + + # Check what happens at 30-minute period boundaries. + # Period 1: minutes 0-29. load_at_start = sparse_load.get(0, 0) = 10.0 + # load_at_end = sparse_load.get(31, sparse_load.get(30, 0)) + # Since minute 30 IS in the dict (5-min interval), load_at_end = sparse_load[30] = 8.5 + # So period 1 might be ok. But period 2: minutes 30-59. + # load_at_end = sparse_load.get(61, sparse_load.get(60, 0)) + # Minute 60 IS in dict = 7.0. So that's also ok for these evenly-aligned intervals. + # + # The real problem is when 5-min interval boundaries DON'T align with 30-min + # periods. Let's check the actual result for distortions. + + # With sparse data, the per-minute distribution within each 30-min period + # is based on power data scaled to match a load_total that may be computed + # from incorrect boundary values. The result won't match dense data. + actual_energy = result[0] - result.get(89, result.get(90, 0)) + expected_energy = 4.5 + + # Calculate how individual period values differ from ideal + # In particular, check that minutes NOT in the original sparse set have + # reasonable values (the dense case would have smooth interpolation) + period_errors = [] + for m in range(0, 90): + if m not in sparse_load: + # This minute was not in the original data + # With sparse data, it was computed from power scaling which may be wrong + # We can't directly compare to "correct" but we can flag anomalies + if m > 0 and result.get(m, 0) > result.get(m - 1, 0) + 0.01: + period_errors.append(m) + + inflation_ratio = actual_energy / expected_energy if expected_energy > 0 else 1.0 + + print(f" Sparse input: {len(sparse_load)} points, expected energy: {expected_energy} kWh") + print(f" Result energy: {dp4(actual_energy)} kWh, ratio: {dp4(inflation_ratio)}x") + print(f" Minutes with non-monotonic anomalies: {len(period_errors)}") + + # Document the behavior: sparse data may or may not inflate depending on + # alignment, but the distribution within periods IS distorted because the + # sparse gaps cause incorrect cumulative values at sub-period resolution + print("PASSED (sparse data behavior documented)") + + +def test_sparse_misaligned_boundaries_cause_inflation(): + """ + Regression test: When sparse 5-minute interval boundaries DON'T align with + 30-minute period boundaries, fill_load_from_power gets incorrect load_total + values. For example, if sparse data has entries at minutes 0,5,10,... + but the 30-minute period boundary is at minute 31, get(31,0) returns + get(30, 0) which falls back to 0 if minute 30 isn't a known point. + + This test uses 7-minute intervals to guarantee misalignment. + """ + print("\n=== Test 7b: Misaligned sparse boundaries cause distortion ===") + + fetch = TestFetch() + + # Sparse data at 7-minute intervals (deliberately misaligned with 30-min periods) + # Total energy: 10.0 - 5.0 = 5.0 kWh over ~90 minutes + sparse_load = {} + for m in range(0, 98, 7): + sparse_load[m] = 10.0 - (m / 91.0) * 5.0 + + # Power data: consistent 3.3 kW + load_power_data = {} + for m in range(0, 91): + load_power_data[m] = 3300.0 + + result_sparse = fetch.fill_load_from_power(sparse_load, load_power_data) + + # Now do the same with interpolated data + from utils import interpolate_sparse_data + + dense_load = interpolate_sparse_data(sparse_load) + result_dense = fetch.fill_load_from_power(dense_load, load_power_data) + + sparse_energy = result_sparse[0] - result_sparse.get(89, result_sparse.get(91, 0)) + dense_energy = result_dense[0] - result_dense.get(89, result_dense.get(91, 0)) + expected_energy = 5.0 + + sparse_ratio = sparse_energy / expected_energy + dense_ratio = dense_energy / expected_energy + + print(f" Expected energy: {expected_energy} kWh") + print(f" Sparse result: {dp4(sparse_energy)} kWh (ratio: {dp4(sparse_ratio)}x)") + print(f" Dense result: {dp4(dense_energy)} kWh (ratio: {dp4(dense_ratio)}x)") + + # Dense result should be much closer to expected than sparse + dense_error = abs(dense_ratio - 1.0) + assert dense_error < 0.15, f"Dense result should be within 15% of expected, got {dp4(dense_ratio)}x" + + print("PASSED") + + +def test_interpolated_data_no_inflation(): + """ + After interpolation, fill_load_from_power should NOT inflate load predictions. + This is the post-fix behavior: interpolate_sparse_data fills every minute + before fill_load_from_power runs, preventing false zero-period detection + and ensuring correct boundary lookups. + """ + print("\n=== Test 8: Interpolated data does NOT inflate (post-fix behavior) ===") + + from utils import interpolate_sparse_data + + fetch = TestFetch() + + # Sparse data at 5-min intervals over 90 minutes + sparse_load = {} + for m in range(0, 95, 5): + sparse_load[m] = 10.0 - (m / 90.0) * 4.5 + + # Interpolate first (the fix) + dense_load = interpolate_sparse_data(sparse_load) + + # Verify interpolation produced dense data + for m in range(0, 91): + assert m in dense_load, f"Interpolation missing minute {m}" + + # Power data: consistent 3 kW (= 4.5 kWh over 90 min, matches load) + load_power_data = {} + for m in range(0, 90): + load_power_data[m] = 3000.0 + + result = fetch.fill_load_from_power(dense_load, load_power_data) + + actual_energy = result[0] - result.get(89, result.get(90, 0)) + expected_energy = 4.5 + inflation_ratio = actual_energy / expected_energy if expected_energy > 0 else 1.0 + + print(f" Dense input energy: {expected_energy} kWh") + print(f" After fill_load_from_power: {dp4(actual_energy)} kWh") + print(f" Inflation ratio: {dp4(inflation_ratio)}x") + + # With interpolated (dense) data, inflation should be minimal (within 10%) + assert inflation_ratio < 1.10, f"Expected no inflation with dense data, but ratio was {inflation_ratio}x" + assert inflation_ratio > 0.90, f"Expected no deflation with dense data, but ratio was {inflation_ratio}x" + + print("PASSED (confirmed: interpolated data prevents inflation)") + + +def test_interpolated_realistic_varying_power(): + """ + Realistic scenario: sparse load data with varying power consumption. + After interpolation, fill_load_from_power should produce smooth, accurate output. + """ + print("\n=== Test 9: Realistic varying power with interpolation ===") + + from utils import interpolate_sparse_data + + fetch = TestFetch() + + # Sparse cumulative load at 5-min intervals, 2 hours of data + # Non-linear consumption: faster in first hour, slower in second + sparse_load = { + 0: 20.0, + 5: 19.6, + 10: 19.2, + 15: 18.7, + 20: 18.3, + 25: 17.9, + 30: 17.5, + 35: 17.2, + 40: 16.9, + 45: 16.7, + 50: 16.5, + 55: 16.3, + 60: 16.1, + 65: 15.95, + 70: 15.8, + 75: 15.7, + 80: 15.6, + 85: 15.5, + 90: 15.4, + 95: 15.35, + 100: 15.3, + 105: 15.25, + 110: 15.2, + 115: 15.15, + 120: 15.1, + } + total_expected_energy = sparse_load[0] - sparse_load[120] # 4.9 kWh + + # Interpolate + dense_load = interpolate_sparse_data(sparse_load) + assert len(dense_load) >= 121, f"Expected at least 121 entries, got {len(dense_load)}" + + # Power data: varying to simulate real consumption + load_power_data = {} + for m in range(0, 121): + if m < 30: + load_power_data[m] = 5000.0 + 500.0 * ((m % 5) - 2) # ~5kW average + elif m < 60: + load_power_data[m] = 3000.0 + 300.0 * ((m % 5) - 2) # ~3kW average + else: + load_power_data[m] = 1500.0 + 150.0 * ((m % 5) - 2) # ~1.5kW average + + result = fetch.fill_load_from_power(dense_load, load_power_data) + + # Check energy preservation + actual_energy = result[0] - result[119] + inflation_ratio = actual_energy / total_expected_energy + + print(f" Expected energy: {dp4(total_expected_energy)} kWh") + print(f" Actual energy: {dp4(actual_energy)} kWh") + print(f" Ratio: {dp4(inflation_ratio)}x") + + # Should be within 10% of expected + assert inflation_ratio < 1.10, f"Inflation too high: {inflation_ratio}x" + assert inflation_ratio > 0.90, f"Deflation too high: {inflation_ratio}x" + + # Values should be monotonically decreasing (or equal) + for m in range(1, 120): + assert result[m] <= result[m - 1] + 0.01, f"Not monotonic at minute {m}: {result[m]} > {result[m-1]}" + + print("PASSED") + + def run_all_tests(my_predbat=None): """Run all tests""" print("\n" + "=" * 60) @@ -318,19 +567,23 @@ def run_all_tests(my_predbat=None): test_fill_load_from_power_single_minute_period() test_fill_load_from_power_zero_load() test_fill_load_from_power_backwards_time() + test_sparse_data_inflates_without_interpolation() + test_sparse_misaligned_boundaries_cause_inflation() + test_interpolated_data_no_inflation() + test_interpolated_realistic_varying_power() print("\n" + "=" * 60) - print("✅ ALL TESTS PASSED") + print("ALL fill_load_from_power TESTS PASSED") print("=" * 60) return 0 # Return 0 for success except AssertionError as e: print("\n" + "=" * 60) - print(f"❌ TEST FAILED: {e}") + print(f"TEST FAILED: {e}") print("=" * 60) return 1 # Return 1 for failure except Exception as e: print("\n" + "=" * 60) - print(f"❌ ERROR: {e}") + print(f"ERROR: {e}") import traceback traceback.print_exc() diff --git a/apps/predbat/tests/test_interpolate_sparse_data.py b/apps/predbat/tests/test_interpolate_sparse_data.py new file mode 100644 index 000000000..885337363 --- /dev/null +++ b/apps/predbat/tests/test_interpolate_sparse_data.py @@ -0,0 +1,305 @@ +""" +Test interpolate_sparse_data function from utils.py + +Tests cover: +- Empty data +- Single data point +- Two data points +- Already-dense data (every minute populated) +- Sparse 5-minute interval data (the SaaS scenario) +- Energy preservation after interpolation +- Midnight reset detection (large value drops are not interpolated across) +- Mixed gap sizes +- Large datasets (full day of 5-minute data) +""" + +import sys +import os + +sys.path.append(os.path.join(os.path.dirname(__file__), "..")) + +from utils import interpolate_sparse_data + + +def test_empty_data(): + """Empty dict should return empty dict""" + print("\n=== Test: Empty data ===") + result = interpolate_sparse_data({}) + assert result == {}, f"Expected empty dict, got {result}" + print("PASSED") + + +def test_single_point(): + """Single data point should return unchanged""" + print("\n=== Test: Single point ===") + data = {5: 3.0} + result = interpolate_sparse_data(data) + assert result == {5: 3.0}, f"Expected {{5: 3.0}}, got {result}" + print("PASSED") + + +def test_two_points_adjacent(): + """Two adjacent points (gap=1) should return unchanged""" + print("\n=== Test: Two adjacent points ===") + data = {0: 10.0, 1: 9.5} + result = interpolate_sparse_data(data) + assert len(result) == 2, f"Expected 2 entries, got {len(result)}" + assert result[0] == 10.0 + assert result[1] == 9.5 + print("PASSED") + + +def test_two_points_with_gap(): + """Two points with a gap should be linearly interpolated""" + print("\n=== Test: Two points with gap ===") + data = {0: 10.0, 4: 6.0} + result = interpolate_sparse_data(data) + assert len(result) == 5, f"Expected 5 entries, got {len(result)}" + assert result[0] == 10.0 + assert abs(result[1] - 9.0) < 0.001, f"Expected 9.0, got {result[1]}" + assert abs(result[2] - 8.0) < 0.001, f"Expected 8.0, got {result[2]}" + assert abs(result[3] - 7.0) < 0.001, f"Expected 7.0, got {result[3]}" + assert result[4] == 6.0 + print("PASSED") + + +def test_already_dense(): + """Already-dense data should pass through unchanged""" + print("\n=== Test: Already dense data ===") + data = {i: 10.0 - i * 0.1 for i in range(20)} + result = interpolate_sparse_data(data) + assert len(result) == 20, f"Expected 20 entries, got {len(result)}" + for k in data: + assert abs(result[k] - data[k]) < 0.0001, f"Key {k}: expected {data[k]}, got {result[k]}" + print("PASSED") + + +def test_sparse_5min_intervals(): + """ + Simulate the SaaS scenario: cumulative load data at 5-minute intervals. + Should produce dense output with every minute filled. + """ + print("\n=== Test: Sparse 5-minute intervals (SaaS scenario) ===") + # Simulate 60 minutes of cumulative decreasing load data at 5-min intervals + # (backwards in time: minute 0 is now with highest value) + sparse = {} + for i in range(0, 65, 5): + sparse[i] = 10.0 - (i / 60.0) * 3.0 # Decreasing from 10.0 to 6.8 over 60 min + + result = interpolate_sparse_data(sparse) + + # Should have entries for every minute from 0 to 60 + for m in range(0, 61): + assert m in result, f"Missing minute {m}" + assert len(result) >= 61, f"Expected at least 61 entries, got {len(result)}" + + # Values should decrease monotonically + for m in range(1, 61): + assert result[m] <= result[m - 1], f"Not monotonically decreasing at minute {m}: {result[m]} > {result[m-1]}" + + # Check specific interpolated values + # Between minute 0 (10.0) and minute 5 (9.75), minute 2 should be ~9.9 + expected_m2 = 10.0 + (9.75 - 10.0) * (2 / 5) + assert abs(result[2] - expected_m2) < 0.001, f"Minute 2: expected {expected_m2}, got {result[2]}" + + print(f" Input: {len(sparse)} points -> Output: {len(result)} points") + print("PASSED") + + +def test_energy_preservation(): + """ + Total energy (difference between first and last value) must be preserved + after interpolation. + """ + print("\n=== Test: Energy preservation ===") + # Sparse data: cumulative load decreasing from 20.0 to 15.0 at 5-min intervals + sparse = {0: 20.0, 5: 19.0, 10: 18.0, 15: 17.5, 20: 16.0, 25: 15.0} + result = interpolate_sparse_data(sparse) + + original_energy = sparse[0] - sparse[25] + interpolated_energy = result[0] - result[25] + + assert abs(original_energy - interpolated_energy) < 0.0001, f"Energy not preserved: original={original_energy}, interpolated={interpolated_energy}" + + # Also check that per-minute deltas sum correctly + delta_sum = sum(result[m] - result[m + 1] for m in range(25)) + assert abs(delta_sum - original_energy) < 0.001, f"Per-minute delta sum {delta_sum} doesn't match total energy {original_energy}" + + print(f" Original energy: {original_energy} kWh, Interpolated: {interpolated_energy} kWh") + print("PASSED") + + +def test_midnight_reset_not_interpolated(): + """ + When cumulative value drops by >50% (midnight reset), interpolation should + carry the previous value forward rather than interpolating through the drop. + """ + print("\n=== Test: Midnight reset detection ===") + # Simulate: values increasing then reset at midnight + # Minute 0: 10.0, Minute 5: 8.0, Minute 10: 1.0 (reset!), Minute 15: 0.5 + data = {0: 10.0, 5: 8.0, 10: 1.0, 15: 0.5} + + result = interpolate_sparse_data(data) + + # Between minutes 5 and 10 there's a drop from 8.0 to 1.0 + # That's a drop of 7.0, which is >50% of 8.0 (4.0), so it's a reset + # Minutes 6-9 should be carried forward at 8.0 + for m in range(6, 10): + assert abs(result[m] - 8.0) < 0.001, f"Minute {m}: expected 8.0 (carry forward across reset), got {result[m]}" + + # Between minutes 0 and 5 there's a normal drop from 10.0 to 8.0 + # That's only 2.0, which is <50% of 10.0 (5.0), so interpolation should occur + assert abs(result[2] - 9.2) < 0.001, f"Minute 2: expected 9.2 (interpolated), got {result[2]}" + + # Between minutes 10 and 15 there's a drop from 1.0 to 0.5 + # That's 0.5, which is exactly 50% of 1.0 - should NOT trigger reset (> not >=) + assert abs(result[12] - 0.8) < 0.001, f"Minute 12: expected 0.8 (interpolated, 50% is not > 50%), got {result[12]}" + + print("PASSED") + + +def test_no_reset_for_small_drops(): + """ + Normal decreasing cumulative values (less than 50% drop) should be interpolated normally. + """ + print("\n=== Test: No false reset for small drops ===") + # Drop of 40% should NOT trigger reset detection + data = {0: 10.0, 5: 6.0} # Drop of 4.0, which is 40% of 10.0 + result = interpolate_sparse_data(data) + + # Should be interpolated (not carried forward) + assert abs(result[1] - 9.2) < 0.001, f"Expected 9.2, got {result[1]}" + assert abs(result[3] - 7.6) < 0.001, f"Expected 7.6, got {result[3]}" + print("PASSED") + + +def test_mixed_gap_sizes(): + """ + Data with varying gap sizes: some minutes adjacent, some with 3-min gaps, some with 10-min gaps. + """ + print("\n=== Test: Mixed gap sizes ===") + data = { + 0: 10.0, + 1: 9.9, # adjacent + 2: 9.8, # adjacent + 5: 9.5, # 3-min gap + 15: 8.5, # 10-min gap + 16: 8.4, # adjacent + } + result = interpolate_sparse_data(data) + + # Adjacent values preserved + assert result[0] == 10.0 + assert result[1] == 9.9 + assert result[2] == 9.8 + + # 3-min gap interpolated + assert abs(result[3] - 9.7) < 0.001, f"Minute 3: expected ~9.7, got {result[3]}" + assert abs(result[4] - 9.6) < 0.001, f"Minute 4: expected ~9.6, got {result[4]}" + assert result[5] == 9.5 + + # 10-min gap interpolated + for m in range(6, 15): + assert m in result, f"Missing minute {m}" + assert abs(result[10] - 9.0) < 0.001, f"Minute 10: expected 9.0, got {result[10]}" + + # Adjacent after gap preserved + assert result[15] == 8.5 + assert result[16] == 8.4 + + # Total entries: 0-16 = 17 + assert len(result) == 17, f"Expected 17 entries, got {len(result)}" + print("PASSED") + + +def test_full_day_sparse(): + """ + Full day simulation: 288 data points at 5-minute intervals (24 hours). + Should produce 1441 dense entries (0 to 1440). + """ + print("\n=== Test: Full day sparse data ===") + sparse = {} + total_minutes = 24 * 60 # 1440 + for m in range(0, total_minutes + 1, 5): + # Simulate cumulative load decreasing backwards: ~15 kWh total consumption + sparse[m] = 15.0 - (m / total_minutes) * 15.0 + + result = interpolate_sparse_data(sparse) + + # Should have every minute from 0 to 1440 + assert len(result) == total_minutes + 1, f"Expected {total_minutes + 1} entries, got {len(result)}" + + # Energy preserved + original_energy = sparse[0] - sparse[total_minutes] + interpolated_energy = result[0] - result[total_minutes] + assert abs(original_energy - interpolated_energy) < 0.0001, f"Energy not preserved: {original_energy} vs {interpolated_energy}" + + # Monotonically decreasing + for m in range(1, total_minutes + 1): + assert result[m] <= result[m - 1] + 0.0001, f"Not monotonic at minute {m}: {result[m]} > {result[m-1]}" + + print(f" {len(sparse)} sparse points -> {len(result)} dense points") + print(f" Energy: {original_energy:.4f} kWh preserved") + print("PASSED") + + +def test_increasing_values(): + """ + Interpolation should also work for increasing cumulative values + (e.g. import data that increases going backwards). + """ + print("\n=== Test: Increasing values ===") + data = {0: 2.0, 5: 4.0, 10: 6.0} + result = interpolate_sparse_data(data) + + assert len(result) == 11 + assert result[0] == 2.0 + assert abs(result[2] - 2.8) < 0.001 + assert result[5] == 4.0 + assert abs(result[7] - 4.8) < 0.001 + assert result[10] == 6.0 + print("PASSED") + + +def run_all_tests(my_predbat=None): + """Run all interpolate_sparse_data tests""" + print("\n" + "=" * 60) + print("Running interpolate_sparse_data tests") + print("=" * 60) + + try: + test_empty_data() + test_single_point() + test_two_points_adjacent() + test_two_points_with_gap() + test_already_dense() + test_sparse_5min_intervals() + test_energy_preservation() + test_midnight_reset_not_interpolated() + test_no_reset_for_small_drops() + test_mixed_gap_sizes() + test_full_day_sparse() + test_increasing_values() + + print("\n" + "=" * 60) + print("ALL interpolate_sparse_data TESTS PASSED") + print("=" * 60) + return 0 + except AssertionError as e: + print("\n" + "=" * 60) + print(f"TEST FAILED: {e}") + print("=" * 60) + return 1 + except Exception as e: + print("\n" + "=" * 60) + print(f"ERROR: {e}") + import traceback + + traceback.print_exc() + print("=" * 60) + return 1 + + +if __name__ == "__main__": + result = run_all_tests() + sys.exit(result) diff --git a/apps/predbat/unit_test.py b/apps/predbat/unit_test.py index 6109d8632..1b89f4e03 100644 --- a/apps/predbat/unit_test.py +++ b/apps/predbat/unit_test.py @@ -66,6 +66,7 @@ from tests.test_units import run_test_units from tests.test_previous_days_modal import test_previous_days_modal_filter from tests.test_fill_load_from_power import run_all_tests as test_fill_load_from_power +from tests.test_interpolate_sparse_data import run_all_tests as test_interpolate_sparse_data from tests.test_fetch_pv_forecast import run_all_tests as test_fetch_pv_forecast from tests.test_octopus_free import test_octopus_free from tests.test_prune_today import test_prune_today @@ -174,6 +175,7 @@ def main(): ("override_time", test_get_override_time_from_string, "Override time from string tests", False), ("previous_days_modal", test_previous_days_modal_filter, "Previous days modal filter tests", False), ("fill_load_from_power", test_fill_load_from_power, "Fill load from power sensor tests", False), + ("interpolate_sparse_data", test_interpolate_sparse_data, "Interpolate sparse data tests (SaaS load inflation fix)", False), ("fetch_pv_forecast", test_fetch_pv_forecast, "Fetch PV forecast with relative_time offset tests", False), # Octopus Energy URL/API tests ("octopus_url", test_octopus_url, "Octopus URL/API comprehensive tests (downloads, day/night rates, saving sessions, intelligent dispatch, tariffs, EDF)", False), diff --git a/apps/predbat/utils.py b/apps/predbat/utils.py index 409a4b109..3758008db 100644 --- a/apps/predbat/utils.py +++ b/apps/predbat/utils.py @@ -664,6 +664,56 @@ def clean_incrementing_reverse(data, max_increment=0): return new_data +def interpolate_sparse_data(data): + """ + Linearly interpolate a sparse cumulative dict to fill every minute index. + + Takes a dict mapping minute indices to cumulative values (as produced by + clean_incrementing_reverse) where only a fraction of minute keys have + real values. Returns a new dict with an entry at every minute from 0 to + max(keys), with values linearly interpolated between known data points. + + Skips interpolation across midnight resets (where value drops by more than + 50% of the current value). + """ + if not data: + return data + + known_keys = sorted(data.keys()) + if len(known_keys) <= 1: + return data + + new_data = {} + for i in range(len(known_keys) - 1): + k0 = known_keys[i] + k1 = known_keys[i + 1] + v0 = data[k0] + v1 = data[k1] + + new_data[k0] = v0 + + gap = k1 - k0 + if gap <= 1: + continue + + # Detect midnight reset: value drops by more than 50% of current value + if v0 > 0 and (v0 - v1) > 0.5 * v0: + # Don't interpolate across reset, just carry v0 forward then jump + for m in range(k0 + 1, k1): + new_data[m] = v0 + continue + + # Linear interpolation + for m in range(k0 + 1, k1): + frac = (m - k0) / gap + new_data[m] = v0 + (v1 - v0) * frac + + # Include the last known key + new_data[known_keys[-1]] = data[known_keys[-1]] + + return new_data + + def format_time_ago(last_updated): """ Format a timestamp to show how many minutes ago it was updated From 9026d96e5577b29c880349f8f4ce0ad0bf56cb6f Mon Sep 17 00:00:00 2001 From: Mark Gascoyne Date: Wed, 11 Mar 2026 04:15:06 +0000 Subject: [PATCH 2/2] fix: add assertion to test_sparse_data_inflates_without_interpolation Test 7 previously had no assert statements and would always pass. Add assertion to verify sparse data produces measurable distortion. Co-Authored-By: Claude Opus 4.6 --- apps/predbat/tests/test_fill_load_from_power.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/apps/predbat/tests/test_fill_load_from_power.py b/apps/predbat/tests/test_fill_load_from_power.py index 2f7f6c08b..ec4b74a41 100644 --- a/apps/predbat/tests/test_fill_load_from_power.py +++ b/apps/predbat/tests/test_fill_load_from_power.py @@ -373,10 +373,9 @@ def test_sparse_data_inflates_without_interpolation(): print(f" Result energy: {dp4(actual_energy)} kWh, ratio: {dp4(inflation_ratio)}x") print(f" Minutes with non-monotonic anomalies: {len(period_errors)}") - # Document the behavior: sparse data may or may not inflate depending on - # alignment, but the distribution within periods IS distorted because the - # sparse gaps cause incorrect cumulative values at sub-period resolution - print("PASSED (sparse data behavior documented)") + # Sparse data should produce measurable distortion compared to expected energy + assert inflation_ratio > 1.05 or len(period_errors) > 0, f"Expected sparse data to show distortion, got ratio={dp4(inflation_ratio)}x, anomalies={len(period_errors)}" + print("PASSED") def test_sparse_misaligned_boundaries_cause_inflation():