From 7e8f631bda9d14741ff78403f9a9dba4b5280149 Mon Sep 17 00:00:00 2001
From: Mark Gascoyne <contact@markgascoyne.co.uk>
Date: Tue, 10 Mar 2026 21:34:50 +0000
Subject: [PATCH 1/2] fix: interpolate sparse entity history data to prevent
 load inflation (#3545)

SaaS instances record entity_history at 5-minute intervals, producing
sparse cumulative dicts from clean_incrementing_reverse. When
fill_load_from_power processes this sparse data, it treats the gaps
between known data points as "zero periods" and fills them with
power-integrated values, causing ~1.3-1.6x load energy inflation.

Add interpolate_sparse_data() to linearly interpolate between known
data points before fill_load_from_power runs, filling every minute
index so no false gaps are detected. Midnight resets (>50% value drops)
are handled by carrying forward instead of interpolating.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/predbat/fetch.py                         |   6 +-
 .../tests/test_fill_load_from_power.py        | 259 ++++++++++++++-
 .../tests/test_interpolate_sparse_data.py     | 305 ++++++++++++++++++
 apps/predbat/unit_test.py                     |   2 +
 apps/predbat/utils.py                         |  50 +++
 5 files changed, 618 insertions(+), 4 deletions(-)
 create mode 100644 apps/predbat/tests/test_interpolate_sparse_data.py

diff --git a/apps/predbat/fetch.py b/apps/predbat/fetch.py
index 7809232d0..1c19a1b2b 100644
--- a/apps/predbat/fetch.py
+++ b/apps/predbat/fetch.py
@@ -19,7 +19,7 @@
 """
 
 from datetime import datetime, timedelta
-from utils import minutes_to_time, str2time, dp1, dp2, dp3, dp4, time_string_to_stamp, minute_data, get_now_from_cumulative
+from utils import minutes_to_time, str2time, dp1, dp2, dp3, dp4, time_string_to_stamp, minute_data, get_now_from_cumulative, interpolate_sparse_data
 from const import MINUTE_WATT, PREDICT_STEP, TIME_FORMAT, PREDBAT_MODE_OPTIONS, PREDBAT_MODE_CONTROL_SOC, PREDBAT_MODE_CONTROL_CHARGEDISCHARGE, PREDBAT_MODE_CONTROL_CHARGE, PREDBAT_MODE_MONITOR
 from predbat_metrics import metrics
 from futurerate import FutureRate
@@ -728,6 +728,8 @@ def fetch_sensor_data(self, save=True):
             if ("load_power" in self.args) and self.get_arg("load_power_fill_enable", True):
                 # Use power data to make load data more accurate
                 self.log("Using load_power data to fill gaps in load_today data")
+                # Interpolate sparse cumulative data to prevent false gap detection in fill_load_from_power
+                self.load_minutes = interpolate_sparse_data(self.load_minutes)
                 load_power_data, _ = self.minute_data_load(self.now_utc, "load_power", self.max_days_previous, required_unit="W", load_scaling=1.0, interpolate=True)
                 self.load_minutes = self.fill_load_from_power(self.load_minutes, load_power_data)
         else:
@@ -741,6 +743,8 @@ def fetch_sensor_data(self, save=True):
                 if ("load_power" in self.args) and self.get_arg("load_power_fill_enable", True):
                     # Use power data to make load data more accurate
                     self.log("Using load_power data to fill gaps in load_today data")
+                    # Interpolate sparse cumulative data to prevent false gap detection in fill_load_from_power
+                    self.load_minutes = interpolate_sparse_data(self.load_minutes)
                     load_power_data, _ = self.minute_data_load(self.now_utc, "load_power", self.max_days_previous, required_unit="W", load_scaling=1.0, interpolate=True)
                     self.load_minutes = self.fill_load_from_power(self.load_minutes, load_power_data)
             else:
diff --git a/apps/predbat/tests/test_fill_load_from_power.py b/apps/predbat/tests/test_fill_load_from_power.py
index a68c8e4a8..2f7f6c08b 100644
--- a/apps/predbat/tests/test_fill_load_from_power.py
+++ b/apps/predbat/tests/test_fill_load_from_power.py
@@ -305,6 +305,255 @@ def test_fill_load_from_power_backwards_time():
     print("Test 6 PASSED")
 
 
+def test_sparse_data_inflates_without_interpolation():
+    """
+    Regression test: Sparse 5-minute load data WITHOUT interpolation causes
+    fill_load_from_power to produce incorrect results.
+
+    The key problem: Phase 2 of fill_load_from_power uses
+    `new_load_minutes.get(period_end + 1, ...)` to find the load at period
+    boundaries. With sparse data, most period boundary minutes are missing,
+    causing get() to return 0 or a value from a different period. This makes
+    `load_total = load_at_start - load_at_end` wildly incorrect: when
+    load_at_end falls on a missing minute and returns 0, load_total becomes
+    the entire cumulative value rather than just the period's consumption.
+
+    With dense (interpolated) data, every minute has a correct cumulative
+    value, so period boundary lookups are always accurate.
+    """
+    print("\n=== Test 7: Sparse data produces incorrect period totals (regression) ===")
+
+    fetch = TestFetch()
+
+    # Simulate sparse cumulative load data at 5-minute intervals over 90 minutes.
+    # Total energy consumed: 10.0 - 5.5 = 4.5 kWh over 90 minutes
+    sparse_load = {}
+    for m in range(0, 95, 5):
+        sparse_load[m] = 10.0 - (m / 90.0) * 4.5
+
+    # Power data: consistent 3 kW over 90 minutes (= 4.5 kWh, matches load)
+    load_power_data = {}
+    for m in range(0, 90):
+        load_power_data[m] = 3000.0
+
+    result = fetch.fill_load_from_power(sparse_load, load_power_data)
+
+    # Check what happens at 30-minute period boundaries.
+    # Period 1: minutes 0-29. load_at_start = sparse_load.get(0, 0) = 10.0
+    #   load_at_end = sparse_load.get(31, sparse_load.get(30, 0))
+    #   Since minute 30 IS in the dict (5-min interval), load_at_end = sparse_load[30] = 8.5
+    #   So period 1 might be ok. But period 2: minutes 30-59.
+    #   load_at_end = sparse_load.get(61, sparse_load.get(60, 0))
+    #   Minute 60 IS in dict = 7.0. So that's also ok for these evenly-aligned intervals.
+    #
+    # The real problem is when 5-min interval boundaries DON'T align with 30-min
+    # periods. Let's check the actual result for distortions.
+
+    # With sparse data, the per-minute distribution within each 30-min period
+    # is based on power data scaled to match a load_total that may be computed
+    # from incorrect boundary values. The result won't match dense data.
+    actual_energy = result[0] - result.get(89, result.get(90, 0))
+    expected_energy = 4.5
+
+    # Calculate how individual period values differ from ideal
+    # In particular, check that minutes NOT in the original sparse set have
+    # reasonable values (the dense case would have smooth interpolation)
+    period_errors = []
+    for m in range(0, 90):
+        if m not in sparse_load:
+            # This minute was not in the original data
+            # With sparse data, it was computed from power scaling which may be wrong
+            # We can't directly compare to "correct" but we can flag anomalies
+            if m > 0 and result.get(m, 0) > result.get(m - 1, 0) + 0.01:
+                period_errors.append(m)
+
+    inflation_ratio = actual_energy / expected_energy if expected_energy > 0 else 1.0
+
+    print(f"  Sparse input: {len(sparse_load)} points, expected energy: {expected_energy} kWh")
+    print(f"  Result energy: {dp4(actual_energy)} kWh, ratio: {dp4(inflation_ratio)}x")
+    print(f"  Minutes with non-monotonic anomalies: {len(period_errors)}")
+
+    # Document the behavior: sparse data may or may not inflate depending on
+    # alignment, but the distribution within periods IS distorted because the
+    # sparse gaps cause incorrect cumulative values at sub-period resolution
+    print("PASSED (sparse data behavior documented)")
+
+
+def test_sparse_misaligned_boundaries_cause_inflation():
+    """
+    Regression test: When sparse 5-minute interval boundaries DON'T align with
+    30-minute period boundaries, fill_load_from_power gets incorrect load_total
+    values. For example, if sparse data has entries at minutes 0,5,10,...
+    but the 30-minute period boundary is at minute 31, get(31,0) returns
+    get(30, 0) which falls back to 0 if minute 30 isn't a known point.
+
+    This test uses 7-minute intervals to guarantee misalignment.
+    """
+    print("\n=== Test 7b: Misaligned sparse boundaries cause distortion ===")
+
+    fetch = TestFetch()
+
+    # Sparse data at 7-minute intervals (deliberately misaligned with 30-min periods)
+    # Total energy: 10.0 - 5.0 = 5.0 kWh over ~90 minutes
+    sparse_load = {}
+    for m in range(0, 98, 7):
+        sparse_load[m] = 10.0 - (m / 91.0) * 5.0
+
+    # Power data: consistent 3.3 kW
+    load_power_data = {}
+    for m in range(0, 91):
+        load_power_data[m] = 3300.0
+
+    result_sparse = fetch.fill_load_from_power(sparse_load, load_power_data)
+
+    # Now do the same with interpolated data
+    from utils import interpolate_sparse_data
+
+    dense_load = interpolate_sparse_data(sparse_load)
+    result_dense = fetch.fill_load_from_power(dense_load, load_power_data)
+
+    sparse_energy = result_sparse[0] - result_sparse.get(89, result_sparse.get(91, 0))
+    dense_energy = result_dense[0] - result_dense.get(89, result_dense.get(91, 0))
+    expected_energy = 5.0
+
+    sparse_ratio = sparse_energy / expected_energy
+    dense_ratio = dense_energy / expected_energy
+
+    print(f"  Expected energy: {expected_energy} kWh")
+    print(f"  Sparse result: {dp4(sparse_energy)} kWh (ratio: {dp4(sparse_ratio)}x)")
+    print(f"  Dense result:  {dp4(dense_energy)} kWh (ratio: {dp4(dense_ratio)}x)")
+
+    # Dense result should be much closer to expected than sparse
+    dense_error = abs(dense_ratio - 1.0)
+    assert dense_error < 0.15, f"Dense result should be within 15% of expected, got {dp4(dense_ratio)}x"
+
+    print("PASSED")
+
+
+def test_interpolated_data_no_inflation():
+    """
+    After interpolation, fill_load_from_power should NOT inflate load predictions.
+    This is the post-fix behavior: interpolate_sparse_data fills every minute
+    before fill_load_from_power runs, preventing false zero-period detection
+    and ensuring correct boundary lookups.
+    """
+    print("\n=== Test 8: Interpolated data does NOT inflate (post-fix behavior) ===")
+
+    from utils import interpolate_sparse_data
+
+    fetch = TestFetch()
+
+    # Sparse data at 5-min intervals over 90 minutes
+    sparse_load = {}
+    for m in range(0, 95, 5):
+        sparse_load[m] = 10.0 - (m / 90.0) * 4.5
+
+    # Interpolate first (the fix)
+    dense_load = interpolate_sparse_data(sparse_load)
+
+    # Verify interpolation produced dense data
+    for m in range(0, 91):
+        assert m in dense_load, f"Interpolation missing minute {m}"
+
+    # Power data: consistent 3 kW (= 4.5 kWh over 90 min, matches load)
+    load_power_data = {}
+    for m in range(0, 90):
+        load_power_data[m] = 3000.0
+
+    result = fetch.fill_load_from_power(dense_load, load_power_data)
+
+    actual_energy = result[0] - result.get(89, result.get(90, 0))
+    expected_energy = 4.5
+    inflation_ratio = actual_energy / expected_energy if expected_energy > 0 else 1.0
+
+    print(f"  Dense input energy: {expected_energy} kWh")
+    print(f"  After fill_load_from_power: {dp4(actual_energy)} kWh")
+    print(f"  Inflation ratio: {dp4(inflation_ratio)}x")
+
+    # With interpolated (dense) data, inflation should be minimal (within 10%)
+    assert inflation_ratio < 1.10, f"Expected no inflation with dense data, but ratio was {inflation_ratio}x"
+    assert inflation_ratio > 0.90, f"Expected no deflation with dense data, but ratio was {inflation_ratio}x"
+
+    print("PASSED (confirmed: interpolated data prevents inflation)")
+
+
+def test_interpolated_realistic_varying_power():
+    """
+    Realistic scenario: sparse load data with varying power consumption.
+    After interpolation, fill_load_from_power should produce smooth, accurate output.
+    """
+    print("\n=== Test 9: Realistic varying power with interpolation ===")
+
+    from utils import interpolate_sparse_data
+
+    fetch = TestFetch()
+
+    # Sparse cumulative load at 5-min intervals, 2 hours of data
+    # Non-linear consumption: faster in first hour, slower in second
+    sparse_load = {
+        0: 20.0,
+        5: 19.6,
+        10: 19.2,
+        15: 18.7,
+        20: 18.3,
+        25: 17.9,
+        30: 17.5,
+        35: 17.2,
+        40: 16.9,
+        45: 16.7,
+        50: 16.5,
+        55: 16.3,
+        60: 16.1,
+        65: 15.95,
+        70: 15.8,
+        75: 15.7,
+        80: 15.6,
+        85: 15.5,
+        90: 15.4,
+        95: 15.35,
+        100: 15.3,
+        105: 15.25,
+        110: 15.2,
+        115: 15.15,
+        120: 15.1,
+    }
+    total_expected_energy = sparse_load[0] - sparse_load[120]  # 4.9 kWh
+
+    # Interpolate
+    dense_load = interpolate_sparse_data(sparse_load)
+    assert len(dense_load) >= 121, f"Expected at least 121 entries, got {len(dense_load)}"
+
+    # Power data: varying to simulate real consumption
+    load_power_data = {}
+    for m in range(0, 121):
+        if m < 30:
+            load_power_data[m] = 5000.0 + 500.0 * ((m % 5) - 2)  # ~5kW average
+        elif m < 60:
+            load_power_data[m] = 3000.0 + 300.0 * ((m % 5) - 2)  # ~3kW average
+        else:
+            load_power_data[m] = 1500.0 + 150.0 * ((m % 5) - 2)  # ~1.5kW average
+
+    result = fetch.fill_load_from_power(dense_load, load_power_data)
+
+    # Check energy preservation
+    actual_energy = result[0] - result[119]
+    inflation_ratio = actual_energy / total_expected_energy
+
+    print(f"  Expected energy: {dp4(total_expected_energy)} kWh")
+    print(f"  Actual energy: {dp4(actual_energy)} kWh")
+    print(f"  Ratio: {dp4(inflation_ratio)}x")
+
+    # Should be within 10% of expected
+    assert inflation_ratio < 1.10, f"Inflation too high: {inflation_ratio}x"
+    assert inflation_ratio > 0.90, f"Deflation too high: {inflation_ratio}x"
+
+    # Values should be monotonically decreasing (or equal)
+    for m in range(1, 120):
+        assert result[m] <= result[m - 1] + 0.01, f"Not monotonic at minute {m}: {result[m]} > {result[m-1]}"
+
+    print("PASSED")
+
+
 def run_all_tests(my_predbat=None):
     """Run all tests"""
     print("\n" + "=" * 60)
@@ -318,19 +567,23 @@ def run_all_tests(my_predbat=None):
         test_fill_load_from_power_single_minute_period()
         test_fill_load_from_power_zero_load()
         test_fill_load_from_power_backwards_time()
+        test_sparse_data_inflates_without_interpolation()
+        test_sparse_misaligned_boundaries_cause_inflation()
+        test_interpolated_data_no_inflation()
+        test_interpolated_realistic_varying_power()
 
         print("\n" + "=" * 60)
-        print("✅ ALL TESTS PASSED")
+        print("ALL fill_load_from_power TESTS PASSED")
         print("=" * 60)
         return 0  # Return 0 for success
     except AssertionError as e:
         print("\n" + "=" * 60)
-        print(f"❌ TEST FAILED: {e}")
+        print(f"TEST FAILED: {e}")
         print("=" * 60)
         return 1  # Return 1 for failure
     except Exception as e:
         print("\n" + "=" * 60)
-        print(f"❌ ERROR: {e}")
+        print(f"ERROR: {e}")
         import traceback
 
         traceback.print_exc()
diff --git a/apps/predbat/tests/test_interpolate_sparse_data.py b/apps/predbat/tests/test_interpolate_sparse_data.py
new file mode 100644
index 000000000..885337363
--- /dev/null
+++ b/apps/predbat/tests/test_interpolate_sparse_data.py
@@ -0,0 +1,305 @@
+"""
+Test interpolate_sparse_data function from utils.py
+
+Tests cover:
+- Empty data
+- Single data point
+- Two data points
+- Already-dense data (every minute populated)
+- Sparse 5-minute interval data (the SaaS scenario)
+- Energy preservation after interpolation
+- Midnight reset detection (large value drops are not interpolated across)
+- Mixed gap sizes
+- Large datasets (full day of 5-minute data)
+"""
+
+import sys
+import os
+
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+
+from utils import interpolate_sparse_data
+
+
+def test_empty_data():
+    """Empty dict should return empty dict"""
+    print("\n=== Test: Empty data ===")
+    result = interpolate_sparse_data({})
+    assert result == {}, f"Expected empty dict, got {result}"
+    print("PASSED")
+
+
+def test_single_point():
+    """Single data point should return unchanged"""
+    print("\n=== Test: Single point ===")
+    data = {5: 3.0}
+    result = interpolate_sparse_data(data)
+    assert result == {5: 3.0}, f"Expected {{5: 3.0}}, got {result}"
+    print("PASSED")
+
+
+def test_two_points_adjacent():
+    """Two adjacent points (gap=1) should return unchanged"""
+    print("\n=== Test: Two adjacent points ===")
+    data = {0: 10.0, 1: 9.5}
+    result = interpolate_sparse_data(data)
+    assert len(result) == 2, f"Expected 2 entries, got {len(result)}"
+    assert result[0] == 10.0
+    assert result[1] == 9.5
+    print("PASSED")
+
+
+def test_two_points_with_gap():
+    """Two points with a gap should be linearly interpolated"""
+    print("\n=== Test: Two points with gap ===")
+    data = {0: 10.0, 4: 6.0}
+    result = interpolate_sparse_data(data)
+    assert len(result) == 5, f"Expected 5 entries, got {len(result)}"
+    assert result[0] == 10.0
+    assert abs(result[1] - 9.0) < 0.001, f"Expected 9.0, got {result[1]}"
+    assert abs(result[2] - 8.0) < 0.001, f"Expected 8.0, got {result[2]}"
+    assert abs(result[3] - 7.0) < 0.001, f"Expected 7.0, got {result[3]}"
+    assert result[4] == 6.0
+    print("PASSED")
+
+
+def test_already_dense():
+    """Already-dense data should pass through unchanged"""
+    print("\n=== Test: Already dense data ===")
+    data = {i: 10.0 - i * 0.1 for i in range(20)}
+    result = interpolate_sparse_data(data)
+    assert len(result) == 20, f"Expected 20 entries, got {len(result)}"
+    for k in data:
+        assert abs(result[k] - data[k]) < 0.0001, f"Key {k}: expected {data[k]}, got {result[k]}"
+    print("PASSED")
+
+
+def test_sparse_5min_intervals():
+    """
+    Simulate the SaaS scenario: cumulative load data at 5-minute intervals.
+    Should produce dense output with every minute filled.
+    """
+    print("\n=== Test: Sparse 5-minute intervals (SaaS scenario) ===")
+    # Simulate 60 minutes of cumulative decreasing load data at 5-min intervals
+    # (backwards in time: minute 0 is now with highest value)
+    sparse = {}
+    for i in range(0, 65, 5):
+        sparse[i] = 10.0 - (i / 60.0) * 3.0  # Decreasing from 10.0 to 6.8 over 60 min
+
+    result = interpolate_sparse_data(sparse)
+
+    # Should have entries for every minute from 0 to 60
+    for m in range(0, 61):
+        assert m in result, f"Missing minute {m}"
+    assert len(result) >= 61, f"Expected at least 61 entries, got {len(result)}"
+
+    # Values should decrease monotonically
+    for m in range(1, 61):
+        assert result[m] <= result[m - 1], f"Not monotonically decreasing at minute {m}: {result[m]} > {result[m-1]}"
+
+    # Check specific interpolated values
+    # Between minute 0 (10.0) and minute 5 (9.75), minute 2 should be ~9.9
+    expected_m2 = 10.0 + (9.75 - 10.0) * (2 / 5)
+    assert abs(result[2] - expected_m2) < 0.001, f"Minute 2: expected {expected_m2}, got {result[2]}"
+
+    print(f"  Input: {len(sparse)} points -> Output: {len(result)} points")
+    print("PASSED")
+
+
+def test_energy_preservation():
+    """
+    Total energy (difference between first and last value) must be preserved
+    after interpolation.
+    """
+    print("\n=== Test: Energy preservation ===")
+    # Sparse data: cumulative load decreasing from 20.0 to 15.0 at 5-min intervals
+    sparse = {0: 20.0, 5: 19.0, 10: 18.0, 15: 17.5, 20: 16.0, 25: 15.0}
+    result = interpolate_sparse_data(sparse)
+
+    original_energy = sparse[0] - sparse[25]
+    interpolated_energy = result[0] - result[25]
+
+    assert abs(original_energy - interpolated_energy) < 0.0001, f"Energy not preserved: original={original_energy}, interpolated={interpolated_energy}"
+
+    # Also check that per-minute deltas sum correctly
+    delta_sum = sum(result[m] - result[m + 1] for m in range(25))
+    assert abs(delta_sum - original_energy) < 0.001, f"Per-minute delta sum {delta_sum} doesn't match total energy {original_energy}"
+
+    print(f"  Original energy: {original_energy} kWh, Interpolated: {interpolated_energy} kWh")
+    print("PASSED")
+
+
+def test_midnight_reset_not_interpolated():
+    """
+    When cumulative value drops by >50% (midnight reset), interpolation should
+    carry the previous value forward rather than interpolating through the drop.
+    """
+    print("\n=== Test: Midnight reset detection ===")
+    # Simulate: values increasing then reset at midnight
+    # Minute 0: 10.0, Minute 5: 8.0, Minute 10: 1.0 (reset!), Minute 15: 0.5
+    data = {0: 10.0, 5: 8.0, 10: 1.0, 15: 0.5}
+
+    result = interpolate_sparse_data(data)
+
+    # Between minutes 5 and 10 there's a drop from 8.0 to 1.0
+    # That's a drop of 7.0, which is >50% of 8.0 (4.0), so it's a reset
+    # Minutes 6-9 should be carried forward at 8.0
+    for m in range(6, 10):
+        assert abs(result[m] - 8.0) < 0.001, f"Minute {m}: expected 8.0 (carry forward across reset), got {result[m]}"
+
+    # Between minutes 0 and 5 there's a normal drop from 10.0 to 8.0
+    # That's only 2.0, which is <50% of 10.0 (5.0), so interpolation should occur
+    assert abs(result[2] - 9.2) < 0.001, f"Minute 2: expected 9.2 (interpolated), got {result[2]}"
+
+    # Between minutes 10 and 15 there's a drop from 1.0 to 0.5
+    # That's 0.5, which is exactly 50% of 1.0 - should NOT trigger reset (> not >=)
+    assert abs(result[12] - 0.8) < 0.001, f"Minute 12: expected 0.8 (interpolated, 50% is not > 50%), got {result[12]}"
+
+    print("PASSED")
+
+
+def test_no_reset_for_small_drops():
+    """
+    Normal decreasing cumulative values (less than 50% drop) should be interpolated normally.
+    """
+    print("\n=== Test: No false reset for small drops ===")
+    # Drop of 40% should NOT trigger reset detection
+    data = {0: 10.0, 5: 6.0}  # Drop of 4.0, which is 40% of 10.0
+    result = interpolate_sparse_data(data)
+
+    # Should be interpolated (not carried forward)
+    assert abs(result[1] - 9.2) < 0.001, f"Expected 9.2, got {result[1]}"
+    assert abs(result[3] - 7.6) < 0.001, f"Expected 7.6, got {result[3]}"
+    print("PASSED")
+
+
+def test_mixed_gap_sizes():
+    """
+    Data with varying gap sizes: some minutes adjacent, some with 3-min gaps, some with 10-min gaps.
+    """
+    print("\n=== Test: Mixed gap sizes ===")
+    data = {
+        0: 10.0,
+        1: 9.9,  # adjacent
+        2: 9.8,  # adjacent
+        5: 9.5,  # 3-min gap
+        15: 8.5,  # 10-min gap
+        16: 8.4,  # adjacent
+    }
+    result = interpolate_sparse_data(data)
+
+    # Adjacent values preserved
+    assert result[0] == 10.0
+    assert result[1] == 9.9
+    assert result[2] == 9.8
+
+    # 3-min gap interpolated
+    assert abs(result[3] - 9.7) < 0.001, f"Minute 3: expected ~9.7, got {result[3]}"
+    assert abs(result[4] - 9.6) < 0.001, f"Minute 4: expected ~9.6, got {result[4]}"
+    assert result[5] == 9.5
+
+    # 10-min gap interpolated
+    for m in range(6, 15):
+        assert m in result, f"Missing minute {m}"
+    assert abs(result[10] - 9.0) < 0.001, f"Minute 10: expected 9.0, got {result[10]}"
+
+    # Adjacent after gap preserved
+    assert result[15] == 8.5
+    assert result[16] == 8.4
+
+    # Total entries: 0-16 = 17
+    assert len(result) == 17, f"Expected 17 entries, got {len(result)}"
+    print("PASSED")
+
+
+def test_full_day_sparse():
+    """
+    Full day simulation: 288 data points at 5-minute intervals (24 hours).
+    Should produce 1441 dense entries (0 to 1440).
+    """
+    print("\n=== Test: Full day sparse data ===")
+    sparse = {}
+    total_minutes = 24 * 60  # 1440
+    for m in range(0, total_minutes + 1, 5):
+        # Simulate cumulative load decreasing backwards: ~15 kWh total consumption
+        sparse[m] = 15.0 - (m / total_minutes) * 15.0
+
+    result = interpolate_sparse_data(sparse)
+
+    # Should have every minute from 0 to 1440
+    assert len(result) == total_minutes + 1, f"Expected {total_minutes + 1} entries, got {len(result)}"
+
+    # Energy preserved
+    original_energy = sparse[0] - sparse[total_minutes]
+    interpolated_energy = result[0] - result[total_minutes]
+    assert abs(original_energy - interpolated_energy) < 0.0001, f"Energy not preserved: {original_energy} vs {interpolated_energy}"
+
+    # Monotonically decreasing
+    for m in range(1, total_minutes + 1):
+        assert result[m] <= result[m - 1] + 0.0001, f"Not monotonic at minute {m}: {result[m]} > {result[m-1]}"
+
+    print(f"  {len(sparse)} sparse points -> {len(result)} dense points")
+    print(f"  Energy: {original_energy:.4f} kWh preserved")
+    print("PASSED")
+
+
+def test_increasing_values():
+    """
+    Interpolation should also work for increasing cumulative values
+    (e.g. import data that increases going backwards).
+    """
+    print("\n=== Test: Increasing values ===")
+    data = {0: 2.0, 5: 4.0, 10: 6.0}
+    result = interpolate_sparse_data(data)
+
+    assert len(result) == 11
+    assert result[0] == 2.0
+    assert abs(result[2] - 2.8) < 0.001
+    assert result[5] == 4.0
+    assert abs(result[7] - 4.8) < 0.001
+    assert result[10] == 6.0
+    print("PASSED")
+
+
+def run_all_tests(my_predbat=None):
+    """Run all interpolate_sparse_data tests"""
+    print("\n" + "=" * 60)
+    print("Running interpolate_sparse_data tests")
+    print("=" * 60)
+
+    try:
+        test_empty_data()
+        test_single_point()
+        test_two_points_adjacent()
+        test_two_points_with_gap()
+        test_already_dense()
+        test_sparse_5min_intervals()
+        test_energy_preservation()
+        test_midnight_reset_not_interpolated()
+        test_no_reset_for_small_drops()
+        test_mixed_gap_sizes()
+        test_full_day_sparse()
+        test_increasing_values()
+
+        print("\n" + "=" * 60)
+        print("ALL interpolate_sparse_data TESTS PASSED")
+        print("=" * 60)
+        return 0
+    except AssertionError as e:
+        print("\n" + "=" * 60)
+        print(f"TEST FAILED: {e}")
+        print("=" * 60)
+        return 1
+    except Exception as e:
+        print("\n" + "=" * 60)
+        print(f"ERROR: {e}")
+        import traceback
+
+        traceback.print_exc()
+        print("=" * 60)
+        return 1
+
+
+if __name__ == "__main__":
+    result = run_all_tests()
+    sys.exit(result)
diff --git a/apps/predbat/unit_test.py b/apps/predbat/unit_test.py
index 6109d8632..1b89f4e03 100644
--- a/apps/predbat/unit_test.py
+++ b/apps/predbat/unit_test.py
@@ -66,6 +66,7 @@
 from tests.test_units import run_test_units
 from tests.test_previous_days_modal import test_previous_days_modal_filter
 from tests.test_fill_load_from_power import run_all_tests as test_fill_load_from_power
+from tests.test_interpolate_sparse_data import run_all_tests as test_interpolate_sparse_data
 from tests.test_fetch_pv_forecast import run_all_tests as test_fetch_pv_forecast
 from tests.test_octopus_free import test_octopus_free
 from tests.test_prune_today import test_prune_today
@@ -174,6 +175,7 @@ def main():
         ("override_time", test_get_override_time_from_string, "Override time from string tests", False),
         ("previous_days_modal", test_previous_days_modal_filter, "Previous days modal filter tests", False),
         ("fill_load_from_power", test_fill_load_from_power, "Fill load from power sensor tests", False),
+        ("interpolate_sparse_data", test_interpolate_sparse_data, "Interpolate sparse data tests (SaaS load inflation fix)", False),
         ("fetch_pv_forecast", test_fetch_pv_forecast, "Fetch PV forecast with relative_time offset tests", False),
         # Octopus Energy URL/API tests
         ("octopus_url", test_octopus_url, "Octopus URL/API comprehensive tests (downloads, day/night rates, saving sessions, intelligent dispatch, tariffs, EDF)", False),
diff --git a/apps/predbat/utils.py b/apps/predbat/utils.py
index 409a4b109..3758008db 100644
--- a/apps/predbat/utils.py
+++ b/apps/predbat/utils.py
@@ -664,6 +664,56 @@ def clean_incrementing_reverse(data, max_increment=0):
     return new_data
 
 
+def interpolate_sparse_data(data):
+    """
+    Linearly interpolate a sparse cumulative dict to fill every minute index.
+
+    Takes a dict mapping minute indices to cumulative values (as produced by
+    clean_incrementing_reverse) where only a fraction of minute keys have
+    real values. Returns a new dict with an entry at every minute from 0 to
+    max(keys), with values linearly interpolated between known data points.
+
+    Skips interpolation across midnight resets (where value drops by more than
+    50% of the current value).
+    """
+    if not data:
+        return data
+
+    known_keys = sorted(data.keys())
+    if len(known_keys) <= 1:
+        return data
+
+    new_data = {}
+    for i in range(len(known_keys) - 1):
+        k0 = known_keys[i]
+        k1 = known_keys[i + 1]
+        v0 = data[k0]
+        v1 = data[k1]
+
+        new_data[k0] = v0
+
+        gap = k1 - k0
+        if gap <= 1:
+            continue
+
+        # Detect midnight reset: value drops by more than 50% of current value
+        if v0 > 0 and (v0 - v1) > 0.5 * v0:
+            # Don't interpolate across reset, just carry v0 forward then jump
+            for m in range(k0 + 1, k1):
+                new_data[m] = v0
+            continue
+
+        # Linear interpolation
+        for m in range(k0 + 1, k1):
+            frac = (m - k0) / gap
+            new_data[m] = v0 + (v1 - v0) * frac
+
+    # Include the last known key
+    new_data[known_keys[-1]] = data[known_keys[-1]]
+
+    return new_data
+
+
 def format_time_ago(last_updated):
     """
     Format a timestamp to show how many minutes ago it was updated

From 9026d96e5577b29c880349f8f4ce0ad0bf56cb6f Mon Sep 17 00:00:00 2001
From: Mark Gascoyne <contact@markgascoyne.co.uk>
Date: Wed, 11 Mar 2026 04:15:06 +0000
Subject: [PATCH 2/2] fix: add assertion to
 test_sparse_data_inflates_without_interpolation

Test 7 previously had no assert statements and would always pass.
Add assertion to verify sparse data produces measurable distortion.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 apps/predbat/tests/test_fill_load_from_power.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/apps/predbat/tests/test_fill_load_from_power.py b/apps/predbat/tests/test_fill_load_from_power.py
index 2f7f6c08b..ec4b74a41 100644
--- a/apps/predbat/tests/test_fill_load_from_power.py
+++ b/apps/predbat/tests/test_fill_load_from_power.py
@@ -373,10 +373,9 @@ def test_sparse_data_inflates_without_interpolation():
     print(f"  Result energy: {dp4(actual_energy)} kWh, ratio: {dp4(inflation_ratio)}x")
     print(f"  Minutes with non-monotonic anomalies: {len(period_errors)}")
 
-    # Document the behavior: sparse data may or may not inflate depending on
-    # alignment, but the distribution within periods IS distorted because the
-    # sparse gaps cause incorrect cumulative values at sub-period resolution
-    print("PASSED (sparse data behavior documented)")
+    # Sparse data should produce measurable distortion compared to expected energy
+    assert inflation_ratio > 1.05 or len(period_errors) > 0, f"Expected sparse data to show distortion, got ratio={dp4(inflation_ratio)}x, anomalies={len(period_errors)}"
+    print("PASSED")
 
 
 def test_sparse_misaligned_boundaries_cause_inflation():