From 8d609a41f35f1490f1bc4882fa5fdda0ffa4ca4e Mon Sep 17 00:00:00 2001
From: liobrasil <limol.lionel@gmail.com>
Date: Tue, 10 Mar 2026 22:19:08 -0400
Subject: [PATCH 1/3] test: tighten supervisor recovery regressions

---
 cadence/tests/scheduled_supervisor_test.cdc   | 105 ++++++-
 ...duler_mixed_population_regression_test.cdc | 292 ++++++++++++++++++
 2 files changed, 388 insertions(+), 9 deletions(-)
 create mode 100644 cadence/tests/scheduler_mixed_population_regression_test.cdc

diff --git a/cadence/tests/scheduled_supervisor_test.cdc b/cadence/tests/scheduled_supervisor_test.cdc
index 32438057..bb96543a 100644
--- a/cadence/tests/scheduled_supervisor_test.cdc
+++ b/cadence/tests/scheduled_supervisor_test.cdc
@@ -599,6 +599,25 @@ fun testStuckYieldVaultDetectionLogic() {
     log("PASS: Stuck yield vault detection correctly identifies healthy yield vaults")
 }
 
+/// Returns per-yield-vault recovery event counts from YieldVaultRecovered events.
+///
+/// This is used by stress tests to distinguish "all vaults recovered at least once"
+/// from "lots of recovery events happened", which can otherwise hide duplicate
+/// recovery churn for the same vault IDs.
+///
+/// Example: 240 recovery events for 200 vaults can look healthy if the test only
+/// checks `events.length >= 200`, even though 40 of those events may be repeats.
+access(all)
+fun getRecoveredYieldVaultCounts(): {UInt64: Int} {
+    let counts: {UInt64: Int} = {}
+    let recoveredEvents = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecovered>())
+    for recoveredAny in recoveredEvents {
+        let recovered = recoveredAny as! FlowYieldVaultsSchedulerV1.YieldVaultRecovered
+        counts[recovered.yieldVaultID] = (counts[recovered.yieldVaultID] ?? 0) + 1
+    }
+    return counts
+}
+
 /// COMPREHENSIVE TEST: Insufficient Funds -> Failure -> Recovery
 ///
 /// This test validates the COMPLETE failure and recovery cycle:
@@ -939,9 +958,21 @@ fun testInsufficientFundsAndRecovery() {
 ///
 /// Flow: create 200 yield vaults, run 2 scheduling rounds, drain FLOW so executions fail,
 /// wait for vaults to be marked stuck, refund FLOW, schedule the supervisor, then advance
-/// time for ceil(200/MAX_BATCH_SIZE)+10 supervisor ticks. Asserts all 200 vaults are
-/// recovered (YieldVaultRecovered events), none still stuck, and all have active schedules.
-/// The +10 extra ticks are a buffer so every vault is processed despite scheduler timing.
+/// time for enough supervisor ticks to recover all unique vault IDs, plus a short
+/// stabilization window. This asserts:
+/// - every one of the 200 vault IDs is recovered at least once,
+/// - no recovery failures occur,
+/// - no vault emits more than one recovery event,
+/// - once all vaults are recovered and healthy, extra supervisor ticks do not emit
+///   additional recovery events,
+/// - none remain stuck, and all have active schedules.
+///
+/// Why this was tightened:
+/// the earlier version only checked `YieldVaultRecovered.length >= n` after
+/// `ceil(n / MAX_BATCH_SIZE) + 10` supervisor ticks. That allowed the test to pass
+/// even when some vaults recovered more than once while others had not yet been
+/// uniquely validated. This version keeps the same tick budget, but uses it as a
+/// timeout ceiling instead of treating it as proof that the recovery set is clean.
 access(all)
 fun testSupervisorHandlesManyStuckVaults() {
     let n = 200
@@ -1024,19 +1055,75 @@ fun testSupervisorHandlesManyStuckVaults() {
     )
     Test.expect(schedSupRes, Test.beSucceeded())
 
-    // 7. Advance time for supervisor ticks (ceil(n/MAX_BATCH_SIZE)+10); each tick processes a batch
+    // 7. Advance time until every target vault has emitted at least one recovery event.
+    //
+    // We still compute ceil(n / MAX_BATCH_SIZE) + 10, but it now acts as a maximum
+    // allowed budget for supervisor ticks. The loop stops early once all 200 vault IDs
+    // have been seen at least once. This makes the assertion sensitive to duplicate
+    // recoveries: repeated events for the same vault no longer help the test finish.
+    //
+    // After all unique IDs are observed, run a short stabilization window to check
+    // that a healthy supervisor does not continue to emit recovery events.
     let supervisorRunsNeeded = (UInt(n) + UInt(maxBatchSize) - 1) / UInt(maxBatchSize)
+    let maxSupervisorTicks = supervisorRunsNeeded + 10
     var run = 0 as UInt
-    while run < supervisorRunsNeeded + 10 {
+    var recoveredCounts = getRecoveredYieldVaultCounts()
+    while run < maxSupervisorTicks && recoveredCounts.length < n {
         Test.moveTime(by: 60.0 * 10.0 + 10.0)
         Test.commitBlock()
         run = run + 1
+        recoveredCounts = getRecoveredYieldVaultCounts()
+    }
+    log("testSupervisorHandlesManyStuckVaults: ran \(run.toString()) supervisor ticks to reach \(recoveredCounts.length.toString()) unique recovered vaults")
+
+    let recoveryFailures = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecoveryFailed>())
+    Test.assertEqual(0, recoveryFailures.length)
+
+    // Split the validation into:
+    // - missing recoveries: some vaults never emitted YieldVaultRecovered at all
+    // - duplicate recoveries: some vaults emitted YieldVaultRecovered more than once
+    //
+    // Both matter. Missing recoveries means the supervisor did not cover the whole set.
+    // Duplicate recoveries means event volume was inflated by churn, which the old
+    // `recoveredEvents.length >= n` assertion could not distinguish from success.
+    var missingRecoveries = 0
+    var duplicatedRecoveries = 0
+    var duplicatedVaults = 0
+    for yieldVaultID in yieldVaultIDs {
+        let recoveryCount = recoveredCounts[yieldVaultID] ?? 0
+        if recoveryCount == 0 {
+            missingRecoveries = missingRecoveries + 1
+        } else if recoveryCount > 1 {
+            duplicatedRecoveries = duplicatedRecoveries + (recoveryCount - 1)
+            duplicatedVaults = duplicatedVaults + 1
+        }
     }
-    log("testSupervisorHandlesManyStuckVaults: ran \((supervisorRunsNeeded + 10).toString()) supervisor ticks")
+    Test.assert(
+        missingRecoveries == 0,
+        message: "expected every vault to recover at least once, but \(missingRecoveries.toString()) vaults emitted no YieldVaultRecovered event"
+    )
+    Test.assert(
+        duplicatedRecoveries == 0,
+        message: "expected exactly one recovery per vault, but saw \(duplicatedRecoveries.toString()) duplicate recoveries across \(duplicatedVaults.toString()) vaults"
+    )
 
-    let recoveredEvents = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecovered>())
-    Test.assert(recoveredEvents.length >= n, message: "expected at least \(n.toString()) recovered, got \(recoveredEvents.length.toString())")
-    log("testSupervisorHandlesManyStuckVaults: recovered \(recoveredEvents.length.toString()) vaults")
+    // This second guard catches late churn that may start only after the full unique
+    // set has already been recovered. The duplicate-per-vault check above inspects the
+    // state up to this point; the stabilization window verifies the system stays quiet
+    // once recovery should be complete.
+    let recoveredEventsBeforeStabilization = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecovered>()).length
+    var stabilizationTick = 0
+    while stabilizationTick < 2 {
+        Test.moveTime(by: 60.0 * 10.0 + 10.0)
+        Test.commitBlock()
+        stabilizationTick = stabilizationTick + 1
+    }
+    let recoveredEventsAfterStabilization = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecovered>()).length
+    Test.assert(
+        recoveredEventsAfterStabilization == recoveredEventsBeforeStabilization,
+        message: "expected no additional recovery churn after all vaults were recovered; before stabilization: \(recoveredEventsBeforeStabilization.toString()), after: \(recoveredEventsAfterStabilization.toString())"
+    )
+    log("testSupervisorHandlesManyStuckVaults: stable recovery set of \(recoveredCounts.length.toString()) unique vaults with \(recoveredEventsAfterStabilization.toString()) total recovery events")
 
     // 8. Health check: none stuck, all have active schedules
     var stillStuck = 0
diff --git a/cadence/tests/scheduler_mixed_population_regression_test.cdc b/cadence/tests/scheduler_mixed_population_regression_test.cdc
new file mode 100644
index 00000000..036758ae
--- /dev/null
+++ b/cadence/tests/scheduler_mixed_population_regression_test.cdc
@@ -0,0 +1,292 @@
+/// Mixed-population regression tests for the scheduler stuck-scan.
+///
+/// WHY THIS FILE EXISTS
+/// --------------------
+/// The Supervisor does not scan the full registry for stuck vaults. Each run only asks the
+/// registry for up to MAX_BATCH_SIZE candidates from the tail of the scan order, then checks
+/// those candidates one by one.
+///
+/// That optimization breaks down when the scan order contains vaults that can never become
+/// stuck. Today, non-recurring vaults are still registered into the same ordering even though
+/// isStuckYieldVault() immediately returns false for them.
+///
+/// This creates a liveness risk:
+/// - more than MAX_BATCH_SIZE non-recurring vaults can occupy the tail,
+/// - the Supervisor can keep rescanning those same ineligible IDs,
+/// - and a real stuck recurring vault further up the list is never detected or recovered.
+///
+/// This file exists to lock that failure mode down as a regression. The main test below
+/// intentionally builds that mixed population and asserts the Supervisor should still find
+/// the real stuck vault. On the current implementation, that assertion fails, which is the
+/// exact bug this test is meant to expose until the scheduler logic is fixed.
+import Test
+import BlockchainHelpers
+
+import "test_helpers.cdc"
+
+import "FlowToken"
+import "MOET"
+import "YieldToken"
+import "MockStrategies"
+import "FlowYieldVaultsAutoBalancers"
+import "FlowYieldVaultsSchedulerV1"
+import "FlowYieldVaultsSchedulerRegistry"
+
+access(all) let protocolAccount = Test.getAccount(0x0000000000000008)
+access(all) let flowYieldVaultsAccount = Test.getAccount(0x0000000000000009)
+access(all) let yieldTokenAccount = Test.getAccount(0x0000000000000010)
+
+access(all) var strategyIdentifier = Type<@MockStrategies.TracerStrategy>().identifier
+access(all) var flowTokenIdentifier = Type<@FlowToken.Vault>().identifier
+access(all) var yieldTokenIdentifier = Type<@YieldToken.Vault>().identifier
+access(all) var moetTokenIdentifier = Type<@MOET.Vault>().identifier
+access(all) var snapshot: UInt64 = 0
+
+access(all)
+fun setup() {
+    log("Setting up mixed-population scheduler regression test...")
+
+    deployContracts()
+    mintFlow(to: flowYieldVaultsAccount, amount: 1000.0)
+
+    setMockOraclePrice(signer: flowYieldVaultsAccount, forTokenIdentifier: yieldTokenIdentifier, price: 1.0)
+    setMockOraclePrice(signer: flowYieldVaultsAccount, forTokenIdentifier: flowTokenIdentifier, price: 1.0)
+
+    let reserveAmount = 100_000_00.0
+    setupMoetVault(protocolAccount, beFailed: false)
+    setupYieldVault(protocolAccount, beFailed: false)
+    mintFlow(to: protocolAccount, amount: reserveAmount)
+    mintMoet(signer: protocolAccount, to: protocolAccount.address, amount: reserveAmount, beFailed: false)
+    mintYield(signer: yieldTokenAccount, to: protocolAccount.address, amount: reserveAmount, beFailed: false)
+    setMockSwapperLiquidityConnector(signer: protocolAccount, vaultStoragePath: MOET.VaultStoragePath)
+    setMockSwapperLiquidityConnector(signer: protocolAccount, vaultStoragePath: YieldToken.VaultStoragePath)
+    setMockSwapperLiquidityConnector(signer: protocolAccount, vaultStoragePath: /storage/flowTokenVault)
+
+    createAndStorePool(signer: protocolAccount, defaultTokenIdentifier: moetTokenIdentifier, beFailed: false)
+    addSupportedTokenFixedRateInterestCurve(
+        signer: protocolAccount,
+        tokenTypeIdentifier: flowTokenIdentifier,
+        collateralFactor: 0.8,
+        borrowFactor: 1.0,
+        yearlyRate: UFix128(0.1),
+        depositRate: 1_000_000.0,
+        depositCapacityCap: 1_000_000.0
+    )
+
+    let openRes = _executeTransaction(
+        "../../lib/FlowALP/cadence/transactions/flow-alp/position/create_position.cdc",
+        [reserveAmount / 2.0, /storage/flowTokenVault, true],
+        protocolAccount
+    )
+    Test.expect(openRes, Test.beSucceeded())
+
+    addStrategyComposer(
+        signer: flowYieldVaultsAccount,
+        strategyIdentifier: strategyIdentifier,
+        composerIdentifier: Type<@MockStrategies.TracerStrategyComposer>().identifier,
+        issuerStoragePath: MockStrategies.IssuerStoragePath,
+        beFailed: false
+    )
+
+    snapshot = getCurrentBlockHeight()
+    log("Setup complete")
+}
+
+access(all)
+fun cancelSchedulesAndRemoveRecurringConfig(yieldVaultID: UInt64) {
+    let storagePath = FlowYieldVaultsAutoBalancers.deriveAutoBalancerPath(
+        id: yieldVaultID,
+        storage: true
+    ) as! StoragePath
+    let res = _executeTransaction(
+        "../transactions/flow-yield-vaults/admin/cancel_all_scheduled_transactions_and_remove_recurring_config.cdc",
+        [storagePath],
+        flowYieldVaultsAccount
+    )
+    Test.expect(res, Test.beSucceeded())
+}
+
+access(all)
+fun isStuckYieldVault(_ yieldVaultID: UInt64): Bool {
+    let res = _executeScript("../scripts/flow-yield-vaults/is_stuck_yield_vault.cdc", [yieldVaultID])
+    Test.expect(res, Test.beSucceeded())
+    return res.returnValue! as! Bool
+}
+
+access(all)
+fun hasActiveSchedule(_ yieldVaultID: UInt64): Bool {
+    let res = _executeScript("../scripts/flow-yield-vaults/has_active_schedule.cdc", [yieldVaultID])
+    Test.expect(res, Test.beSucceeded())
+    return res.returnValue! as! Bool
+}
+
+access(all)
+fun getFlowYieldVaultsFlowBalance(): UFix64 {
+    let res = _executeScript(
+        "../scripts/flow-yield-vaults/get_flow_balance.cdc",
+        [flowYieldVaultsAccount.address]
+    )
+    Test.expect(res, Test.beSucceeded())
+    return res.returnValue! as! UFix64
+}
+
+/// Regression test: more than MAX_BATCH_SIZE non-recurring registry entries must not
+/// permanently starve stuck recurring vault detection.
+///
+/// Setup:
+/// 1. Create MAX_BATCH_SIZE + 1 mock vaults and strip their recurring config/schedules.
+/// 2. Create one real recurring vault after them so it sits behind those tail entries.
+/// 3. Drain FLOW so that recurring vault executes once but fails to reschedule, becoming stuck.
+/// 4. Fund and run the Supervisor for several ticks.
+///
+/// Expected behavior:
+/// - The recurring vault is eventually detected and recovered.
+/// - The non-recurring tail entries do not block recovery forever.
+access(all)
+fun testSupervisorScansPastNonRecurringTailEntries() {
+    if snapshot != getCurrentBlockHeight() {
+        Test.reset(to: snapshot)
+    }
+    log("\n[TEST] Supervisor scans past non-recurring tail entries...")
+
+    let blockerCount = FlowYieldVaultsSchedulerRegistry.MAX_BATCH_SIZE + 1
+    let user = Test.createAccount()
+    mintFlow(to: user, amount: 2000.0)
+    grantBeta(flowYieldVaultsAccount, user)
+
+    // Step 1: create more than one full scan batch of normal recurring mock vaults.
+    // We will convert these into permanently ineligible "blockers" without removing them
+    // from the registry, so they keep occupying the tail of the scan order.
+    var idx = 0
+    while idx < blockerCount {
+        let createRes = _executeTransaction(
+            "../transactions/flow-yield-vaults/create_yield_vault.cdc",
+            [strategyIdentifier, flowTokenIdentifier, 25.0],
+            user
+        )
+        Test.expect(createRes, Test.beSucceeded())
+        idx = idx + 1
+    }
+
+    let blockerIDs = getYieldVaultIDs(address: user.address)!
+    Test.assertEqual(blockerCount, blockerIDs.length)
+
+    // Step 2: strip schedules and recurring config from those vaults. They stay registered,
+    // but they can no longer self-schedule and they can never satisfy isStuckYieldVault().
+    for blockerID in blockerIDs {
+        cancelSchedulesAndRemoveRecurringConfig(yieldVaultID: blockerID)
+        Test.assertEqual(false, hasActiveSchedule(blockerID))
+        Test.assertEqual(false, isStuckYieldVault(blockerID))
+    }
+    log("Prepared \(blockerCount.toString()) non-recurring registry entries at the tail")
+
+    // Step 3: create one real recurring vault after the blockers. This vault is the one we
+    // will intentionally push into a stuck state and expect the Supervisor to recover.
+    let targetRes = _executeTransaction(
+        "../transactions/flow-yield-vaults/create_yield_vault.cdc",
+        [strategyIdentifier, flowTokenIdentifier, 25.0],
+        user
+    )
+    Test.expect(targetRes, Test.beSucceeded())
+
+    let allYieldVaultIDs = getYieldVaultIDs(address: user.address)!
+    var recurringYieldVaultID: UInt64 = 0
+    var foundRecurringTarget = false
+    for yieldVaultID in allYieldVaultIDs {
+        if !blockerIDs.contains(yieldVaultID) {
+            recurringYieldVaultID = yieldVaultID
+            foundRecurringTarget = true
+        }
+    }
+    Test.assert(foundRecurringTarget, message: "Failed to identify the recurring target yield vault")
+    Test.assertEqual(true, hasActiveSchedule(recurringYieldVaultID))
+
+    // Sanity check the test setup: the first scan batch should contain only blocker IDs,
+    // proving the real recurring target starts behind the tail window the Supervisor reads.
+    let initialCandidates = FlowYieldVaultsSchedulerRegistry.getStuckScanCandidates(
+        limit: UInt(FlowYieldVaultsSchedulerRegistry.MAX_BATCH_SIZE)
+    )
+    Test.assert(
+        !initialCandidates.contains(recurringYieldVaultID),
+        message: "Setup failure: target recurring vault should sit behind the first stuck-scan batch"
+    )
+    for candidate in initialCandidates {
+        Test.assert(
+            blockerIDs.contains(candidate),
+            message: "Setup failure: initial tail scan should contain only non-recurring blockers"
+        )
+    }
+
+    setMockOraclePrice(signer: flowYieldVaultsAccount, forTokenIdentifier: flowTokenIdentifier, price: 2.0)
+    setMockOraclePrice(signer: flowYieldVaultsAccount, forTokenIdentifier: yieldTokenIdentifier, price: 1.5)
+
+    // Step 4: drain the shared FLOW fee vault to the minimum possible residual balance.
+    // The target vault already has its first schedule funded, so it should execute once and
+    // then eventually stop self-scheduling, making it a genuine stuck recurring vault.
+    let balanceBeforeDrain = getFlowYieldVaultsFlowBalance()
+    let residualBalance = 0.00000001
+    if balanceBeforeDrain > residualBalance {
+        let drainRes = _executeTransaction(
+            "../transactions/flow-yield-vaults/drain_flow.cdc",
+            [balanceBeforeDrain - residualBalance],
+            flowYieldVaultsAccount
+        )
+        Test.expect(drainRes, Test.beSucceeded())
+    }
+
+    // Give the already-funded first execution time to run, then keep advancing until the
+    // vault becomes overdue with no active schedule. A nearly-empty fee vault can still be
+    // enough for one extra scheduling attempt, so this waits several intervals.
+    idx = 0
+    while idx < 6 && !isStuckYieldVault(recurringYieldVaultID) {
+        Test.moveTime(by: 60.0 * 10.0 + 10.0)
+        Test.commitBlock()
+        idx = idx + 1
+    }
+
+    Test.assertEqual(true, isStuckYieldVault(recurringYieldVaultID))
+    Test.assertEqual(false, hasActiveSchedule(recurringYieldVaultID))
+
+    // Step 5: fund the account again and start the Supervisor. A correct implementation
+    // should eventually scan past the blockers, detect the real stuck recurring vault, and
+    // recover it.
+    mintFlow(to: flowYieldVaultsAccount, amount: 200.0)
+    Test.commitBlock()
+
+    let recoveredEventsBefore = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecovered>()).length
+    let detectedEventsBefore = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.StuckYieldVaultDetected>()).length
+
+    let scheduleSupervisorRes = _executeTransaction(
+        "../transactions/flow-yield-vaults/admin/schedule_supervisor.cdc",
+        [60.0 * 10.0, UInt8(1), UInt64(800), true],
+        flowYieldVaultsAccount
+    )
+    Test.expect(scheduleSupervisorRes, Test.beSucceeded())
+
+    let supervisorTicks = 3
+    idx = 0
+    while idx < supervisorTicks {
+        Test.moveTime(by: 60.0 * 10.0 + 10.0)
+        Test.commitBlock()
+        idx = idx + 1
+    }
+
+    let recoveredEvents = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.YieldVaultRecovered>())
+    let detectedEvents = Test.eventsOfType(Type<FlowYieldVaultsSchedulerV1.StuckYieldVaultDetected>())
+    log("Recovered events after supervisor ticks: \(recoveredEvents.length.toString())")
+    log("Detected events after supervisor ticks: \(detectedEvents.length.toString())")
+
+    // These are the core regression assertions. On the current implementation they fail,
+    // because the Supervisor keeps rescanning the same non-recurring tail entries and never
+    // reaches the real stuck recurring vault behind them.
+    Test.assert(
+        detectedEvents.length > detectedEventsBefore,
+        message: "Supervisor should eventually detect the stuck recurring vault instead of rescanning the same non-recurring tail entries forever"
+    )
+    Test.assert(
+        recoveredEvents.length > recoveredEventsBefore,
+        message: "Supervisor should eventually recover the stuck recurring vault even when more than MAX_BATCH_SIZE non-recurring entries occupy the tail"
+    )
+    Test.assertEqual(false, isStuckYieldVault(recurringYieldVaultID))
+    Test.assertEqual(true, hasActiveSchedule(recurringYieldVaultID))
+}

From 8447480f21cdb9926f150b7ec5e59356c47d7451 Mon Sep 17 00:00:00 2001
From: liobrasil <limol.lionel@gmail.com>
Date: Wed, 11 Mar 2026 00:21:09 -0400
Subject: [PATCH 2/3] fix: avoid duplicate supervisor recoveries

---
 .../FlowYieldVaultsAutoBalancers.cdc          | 29 +++++++++++++++----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc b/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc
index c9325872..770d4d25 100644
--- a/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc
+++ b/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc
@@ -68,11 +68,20 @@ access(all) contract FlowYieldVaultsAutoBalancers {
         return nil
     }
 
-    /// Checks if an AutoBalancer has at least one active (Scheduled) transaction.
+    /// Checks if an AutoBalancer has at least one active internally-managed transaction.
     /// Used by Supervisor to detect stuck yield vaults that need recovery.
     ///
+    /// A transaction is considered active when it is:
+    /// - still `Scheduled`, or
+    /// - already marked `Executed` by FlowTransactionScheduler, but the AutoBalancer has not
+    ///   yet advanced its last rebalance timestamp past that transaction's scheduled time.
+    ///
+    /// The second case matters because FlowTransactionScheduler flips status to `Executed`
+    /// before the handler actually runs. Without treating that in-flight window as active,
+    /// the Supervisor can falsely classify healthy vaults as stuck and recover them twice.
+    ///
     /// @param id: The yield vault/AutoBalancer ID
-    /// @return Bool: true if there's at least one Scheduled transaction, false otherwise
+    /// @return Bool: true if there's at least one active internally-managed transaction, false otherwise
     ///
     access(all) fun hasActiveSchedule(id: UInt64): Bool {
         let autoBalancer = self.borrowAutoBalancer(id: id)
@@ -80,10 +89,20 @@ access(all) contract FlowYieldVaultsAutoBalancers {
             return false
         }
 
+        let lastRebalanceTimestamp = autoBalancer!.getLastRebalanceTimestamp()
         let txnIDs = autoBalancer!.getScheduledTransactionIDs()
         for txnID in txnIDs {
-            if autoBalancer!.borrowScheduledTransaction(id: txnID)?.status() == FlowTransactionScheduler.Status.Scheduled {
-                return true
+            if let scheduledTxn = autoBalancer!.borrowScheduledTransaction(id: txnID) {
+                if let status = scheduledTxn.status() {
+                    if status == FlowTransactionScheduler.Status.Scheduled {
+                        return true
+                    }
+
+                    if status == FlowTransactionScheduler.Status.Executed
+                        && scheduledTxn.timestamp > lastRebalanceTimestamp {
+                        return true
+                    }
+                }
             }
         }
         return false
@@ -110,7 +129,7 @@ access(all) contract FlowYieldVaultsAutoBalancers {
             return false // Not configured for recurring, can't be "stuck"
         }
 
-        // Check if there's an active schedule
+        // Check if there's an active schedule or an in-flight due execution
         if self.hasActiveSchedule(id: id) {
             return false // Has active schedule, not stuck
         }

From 66ccb02dc8ccab717307af6546374cbc31ce6fa6 Mon Sep 17 00:00:00 2001
From: liobrasil <limol.lionel@gmail.com>
Date: Wed, 11 Mar 2026 00:55:32 -0400
Subject: [PATCH 3/3] fix: restrict supervisor stuck scan to recurring vaults

---
 .../FlowYieldVaultsAutoBalancers.cdc          | 10 +++-
 .../FlowYieldVaultsSchedulerRegistry.cdc      | 53 +++++++++++++------
 .../contracts/FlowYieldVaultsSchedulerV1.cdc  |  5 +-
 docs/IMPLEMENTATION_SUMMARY.md                |  8 +--
 docs/SCHEDULED_REBALANCING_GUIDE.md           |  4 +-
 ...autobalancer-restart-recurring-proposal.md |  2 +-
 docs/rebalancing_architecture.md              | 11 ++--
 7 files changed, 60 insertions(+), 33 deletions(-)

diff --git a/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc b/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc
index 770d4d25..bf857ba2 100644
--- a/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc
+++ b/cadence/contracts/FlowYieldVaultsAutoBalancers.cdc
@@ -232,8 +232,14 @@ access(all) contract FlowYieldVaultsAutoBalancers {
         let scheduleCap = self.account.capabilities.storage
             .issue<auth(DeFiActions.Schedule) &DeFiActions.AutoBalancer>(storagePath)
 
-        // Register yield vault in registry for global mapping of live yield vault IDs
-        FlowYieldVaultsSchedulerRegistry.register(yieldVaultID: uniqueID.id, handlerCap: handlerCap, scheduleCap: scheduleCap)
+        // Register yield vault in registry for global mapping of live yield vault IDs.
+        // Only recurring vaults participate in stuck-scan ordering.
+        FlowYieldVaultsSchedulerRegistry.register(
+            yieldVaultID: uniqueID.id,
+            handlerCap: handlerCap,
+            scheduleCap: scheduleCap,
+            participatesInStuckScan: recurringConfig != nil
+        )
 
         // Start the native AutoBalancer self-scheduling chain if recurringConfig was provided
         // This schedules the first rebalance; subsequent ones are scheduled automatically
diff --git a/cadence/contracts/FlowYieldVaultsSchedulerRegistry.cdc b/cadence/contracts/FlowYieldVaultsSchedulerRegistry.cdc
index a8bdbe97..6c26b201 100644
--- a/cadence/contracts/FlowYieldVaultsSchedulerRegistry.cdc
+++ b/cadence/contracts/FlowYieldVaultsSchedulerRegistry.cdc
@@ -16,6 +16,7 @@ access(all) contract FlowYieldVaultsSchedulerRegistry {
     /* --- TYPES --- */
 
     /// Node in the simulated doubly-linked list used for O(1) stuck-scan ordering.
+    /// Only recurring, scan-eligible vaults participate in this list.
     /// `prev` points toward the head (most recently executed); `next` points toward the tail (oldest/least recently executed).
     access(all) struct ListNode {
         access(all) var prev: UInt64?
@@ -79,10 +80,11 @@ access(all) contract FlowYieldVaultsSchedulerRegistry {
     /// Stored as a dictionary for O(1) add/remove; iteration gives the pending set
     access(self) var pendingQueue: {UInt64: Bool}
 
-    /// Simulated doubly-linked list for O(1) stuck-scan ordering.
-    /// listHead = most recently executed vault ID (or nil if empty).
-    /// listTail = least recently executed vault ID — getStuckScanCandidates walks from here.
-    /// On reportExecution a vault is snipped from its current position and moved to head in O(1).
+    /// Simulated doubly-linked list for O(1) stuck-scan ordering across recurring scan participants.
+    /// listHead = most recently executed recurring vault ID (or nil if empty).
+    /// listTail = least recently executed recurring vault ID — getStuckScanCandidates walks from here.
+    /// On reportExecution a recurring participant is snipped from its current position and moved to head in O(1).
+    /// If a vault later disables recurring config, its stale list entry is pruned lazily during candidate walks.
     access(self) var listNodes: {UInt64: ListNode}
     access(self) var listHead: UInt64?
     access(self) var listTail: UInt64?
@@ -136,10 +138,12 @@ access(all) contract FlowYieldVaultsSchedulerRegistry {
     /* --- ACCOUNT-LEVEL FUNCTIONS --- */
 
     /// Register a YieldVault and store its handler and schedule capabilities (idempotent)
+    /// `participatesInStuckScan` should be true only for vaults that currently have recurring config.
     access(account) fun register(
         yieldVaultID: UInt64,
         handlerCap: Capability<auth(FlowTransactionScheduler.Execute) &{FlowTransactionScheduler.TransactionHandler}>,
-        scheduleCap: Capability<auth(DeFiActions.Schedule) &DeFiActions.AutoBalancer>
+        scheduleCap: Capability<auth(DeFiActions.Schedule) &DeFiActions.AutoBalancer>,
+        participatesInStuckScan: Bool
     ) {
         pre {
             handlerCap.check(): "Invalid handler capability provided for yieldVaultID \(yieldVaultID)"
@@ -148,20 +152,23 @@ access(all) contract FlowYieldVaultsSchedulerRegistry {
         self.yieldVaultRegistry[yieldVaultID] = true
         self.handlerCaps[yieldVaultID] = handlerCap
         self.scheduleCaps[yieldVaultID] = scheduleCap
-        // New vaults go to the head; they haven't executed yet but are freshly registered.
+
+        // Only recurring vaults participate in stuck-scan ordering.
         // If already in the list (idempotent re-register), remove first to avoid duplicates.
         if self.listNodes[yieldVaultID] != nil {
             self._listRemove(id: yieldVaultID)
         }
-        self._listInsertAtHead(id: yieldVaultID)
+        if participatesInStuckScan {
+            self._listInsertAtHead(id: yieldVaultID)
+        }
         emit YieldVaultRegistered(yieldVaultID: yieldVaultID)
     }
 
-    /// Called on every execution. Moves yieldVaultID to the head (most recently executed)
-    /// so the Supervisor scans from the tail (least recently executed) for stuck detection — O(1).
-    /// If the list entry is unexpectedly missing, reinsert it to restore the ordering structure.
+    /// Called on every execution. Moves scan-participating yieldVaultID to the head
+    /// (most recently executed) so the Supervisor scans recurring participants from the tail
+    /// (least recently executed) for stuck detection — O(1).
     access(account) fun reportExecution(yieldVaultID: UInt64) {
-        if !(self.yieldVaultRegistry[yieldVaultID] ?? false) {
+        if !(self.yieldVaultRegistry[yieldVaultID] ?? false) || self.listNodes[yieldVaultID] == nil {
             return
         }
         let _ = self._listRemove(id: yieldVaultID)
@@ -270,18 +277,30 @@ access(all) contract FlowYieldVaultsSchedulerRegistry {
         return self.pendingQueue.length
     }
 
-    /// Returns up to `limit` vault IDs starting from the tail (least recently executed).
+    /// Returns up to `limit` recurring scan participants starting from the tail
+    /// (least recently executed among recurring participants).
+    /// Stale entries whose recurring config has been removed are pruned lazily as the walk proceeds.
     /// Supervisor should only scan these for stuck detection instead of all registered vaults.
     /// @param limit: Maximum number of IDs to return (caller typically passes MAX_BATCH_SIZE)
     access(all) fun getStuckScanCandidates(limit: UInt): [UInt64] {
         var result: [UInt64] = []
         var current = self.listTail
-        var count: UInt = 0
-        while count < limit {
+        while UInt(result.length) < limit {
             if let id = current {
-                result.append(id)
-                current = self.listNodes[id]?.prev
-                count = count + 1
+                let previous = self.listNodes[id]?.prev
+                let scheduleCap = self.scheduleCaps[id]
+                let isRecurringParticipant =
+                    scheduleCap != nil
+                    && scheduleCap!.check()
+                    && scheduleCap!.borrow()?.getRecurringConfig() != nil
+
+                if isRecurringParticipant {
+                    result.append(id)
+                } else {
+                    self.dequeuePending(yieldVaultID: id)
+                    let _ = self._listRemove(id: id)
+                }
+                current = previous
             } else {
                 break
             }
diff --git a/cadence/contracts/FlowYieldVaultsSchedulerV1.cdc b/cadence/contracts/FlowYieldVaultsSchedulerV1.cdc
index 27aceb70..ff17ee04 100644
--- a/cadence/contracts/FlowYieldVaultsSchedulerV1.cdc
+++ b/cadence/contracts/FlowYieldVaultsSchedulerV1.cdc
@@ -160,7 +160,8 @@ access(all) contract FlowYieldVaultsSchedulerV1 {
         /// Detects and recovers stuck yield vaults by directly calling their scheduleNextRebalance().
         ///
         /// Detection methods:
-        /// 1. State-based: Scans for registered yield vaults with no active schedule that are overdue
+        /// 1. State-based: Scans recurring yield vaults in stuck-scan order for candidates with
+        ///    no active schedule that are overdue
         ///
         /// Recovery method:
         /// - Uses Schedule capability to call AutoBalancer.scheduleNextRebalance() directly
@@ -172,7 +173,7 @@ access(all) contract FlowYieldVaultsSchedulerV1 {
         ///   "priority": UInt8 (0=High,1=Medium,2=Low) - for Supervisor self-rescheduling
         ///   "executionEffort": UInt64 - for Supervisor self-rescheduling
         ///   "recurringInterval": UFix64 (for Supervisor self-rescheduling)
-        ///   "scanForStuck": Bool (default true - scan up to MAX_BATCH_SIZE least-recently-executed vaults for stuck ones)
+        ///   "scanForStuck": Bool (default true - scan up to MAX_BATCH_SIZE least-recently-executed recurring scan participants for stuck ones)
         /// }
         access(FlowTransactionScheduler.Execute) fun executeTransaction(id: UInt64, data: AnyStruct?) {
             let cfg = data as? {String: AnyStruct} ?? {}
diff --git a/docs/IMPLEMENTATION_SUMMARY.md b/docs/IMPLEMENTATION_SUMMARY.md
index b6a73b31..c81ab1e7 100644
--- a/docs/IMPLEMENTATION_SUMMARY.md
+++ b/docs/IMPLEMENTATION_SUMMARY.md
@@ -22,7 +22,7 @@ This document reflects the current scheduler architecture in this repository.
 2. Direct AutoBalancer capabilities, with no scheduling wrapper layer.
 3. Native self-scheduling for healthy recurring AutoBalancers.
 4. Recovery-only Supervisor with bounded scanning and bounded pending-queue processing.
-5. LRU stuck-scan ordering, so the longest-idle vaults are checked first.
+5. LRU stuck-scan ordering across recurring scan participants, so the longest-idle recurring vaults are checked first.
 
 ### Main Components
 
@@ -40,7 +40,7 @@ FlowYieldVaults Account
     |       +-- handlerCaps
     |       +-- scheduleCaps
     |       +-- pendingQueue
-    |       +-- listNodes / listHead / listTail (LRU stuck-scan order)
+    |       +-- listNodes / listHead / listTail (LRU stuck-scan order for recurring participants)
     |       +-- supervisorCap
     |
     +-- FlowYieldVaultsSchedulerV1
@@ -75,7 +75,7 @@ FlowYieldVaults Account
 Each Supervisor run has two bounded steps:
 
 1. Stuck detection:
-   - reads up to `MAX_BATCH_SIZE` least-recently-executed vault IDs from `getStuckScanCandidates(...)`
+   - reads up to `MAX_BATCH_SIZE` least-recently-executed recurring-participant vault IDs from `getStuckScanCandidates(...)`
    - checks whether each candidate is overdue and lacks an active schedule
    - enqueues stuck vaults into `pendingQueue`
 
@@ -98,7 +98,7 @@ If the Supervisor itself is configured with a recurring interval, it self-resche
   - registered vault tracking
   - pending queue
   - handler/schedule capability storage
-  - LRU stuck-scan ordering
+  - LRU stuck-scan ordering for recurring participants
 
 - `FlowYieldVaultsSchedulerV1.cdc`
   - Supervisor recovery handler
diff --git a/docs/SCHEDULED_REBALANCING_GUIDE.md b/docs/SCHEDULED_REBALANCING_GUIDE.md
index 687f176b..c47fb5a4 100644
--- a/docs/SCHEDULED_REBALANCING_GUIDE.md
+++ b/docs/SCHEDULED_REBALANCING_GUIDE.md
@@ -162,10 +162,10 @@ flow scripts execute cadence/scripts/flow-yield-vaults/get_pending_count.cdc
 ### What It Does
 
 The Supervisor handles two recovery scenarios per run:
-1. **Stuck detection**: Scans up to `MAX_BATCH_SIZE` vault candidates using `getStuckScanCandidates()`, which returns vaults ordered least-recently-executed first (LRU). Stuck vaults are enqueued in `pendingQueue`.
+1. **Stuck detection**: Scans up to `MAX_BATCH_SIZE` recurring scan participants using `getStuckScanCandidates()`, which returns recurring vaults ordered least-recently-executed first (LRU). Stuck vaults are enqueued in `pendingQueue`.
 2. **Pending processing**: Seeds vaults from `pendingQueue` (up to `MAX_BATCH_SIZE` per run). When scheduled with a recurring interval, the Supervisor keeps self-rescheduling even if a given run finds no work.
 
-Each AutoBalancer reports back to the registry after every execution via `RegistryReportCallback`, which calls `reportExecution()` to move the vault to the most-recently-executed end of the internal list. Because stuck scanning starts from the least-recently-executed tail, the Supervisor still prioritises the longest-idle vaults first.
+Each recurring AutoBalancer reports back to the registry after every execution via `RegistryReportCallback`, which calls `reportExecution()` to move the vault to the most-recently-executed end of the internal list. Because stuck scanning starts from the least-recently-executed recurring tail, the Supervisor prioritises the longest-idle recurring vaults first.
 
 ### When It's Needed
 
diff --git a/docs/autobalancer-restart-recurring-proposal.md b/docs/autobalancer-restart-recurring-proposal.md
index 6aa42dcb..111b5fed 100644
--- a/docs/autobalancer-restart-recurring-proposal.md
+++ b/docs/autobalancer-restart-recurring-proposal.md
@@ -2,7 +2,7 @@
 
 > Historical note: this proposal describes the recovery design that was later implemented.
 > Current code names are `FlowYieldVaultsSchedulerV1` and `FlowYieldVaultsSchedulerRegistry`.
-> Current stuck detection scans up to `MAX_BATCH_SIZE` least-recently-executed vaults from
+> Current stuck detection scans up to `MAX_BATCH_SIZE` least-recently-executed recurring scan participants from
 > the registry's LRU ordering, not the full registered set.
 
 ## Problem Statement
diff --git a/docs/rebalancing_architecture.md b/docs/rebalancing_architecture.md
index 5d2acd8d..81972ed6 100644
--- a/docs/rebalancing_architecture.md
+++ b/docs/rebalancing_architecture.md
@@ -45,7 +45,7 @@
   - `yieldVaultRegistry`: registered yield vault IDs
   - `handlerCaps`: direct capabilities to AutoBalancers (no wrapper)
   - `pendingQueue`: yield vaults needing (re)seeding; processing is bounded by `MAX_BATCH_SIZE = 5` per Supervisor run
-  - `stuckScanOrder`: LRU-ordered list of vault IDs for stuck detection; vaults call `reportExecution()` on each run to move themselves to the most-recently-executed end, so the Supervisor always scans the longest-idle vaults first
+  - `stuckScanOrder`: LRU-ordered list of recurring scan participants for stuck detection; recurring vaults call `reportExecution()` on each run to move themselves to the most-recently-executed end, so the Supervisor scans the longest-idle recurring vaults first
   - `supervisorCap`: capability for Supervisor self-scheduling
 - **FlowYieldVaultsSchedulerV1** provides:
   - `Supervisor`: recovery handler for failed schedules
@@ -125,7 +125,8 @@ When `_initNewAutoBalancer()` is called:
 FlowYieldVaultsSchedulerRegistry.register(
     yieldVaultID: uniqueID.id,
     handlerCap: handlerCap,
-    scheduleCap: scheduleCap
+    scheduleCap: scheduleCap,
+    participatesInStuckScan: recurringConfig != nil
 )
 autoBalancerRef.scheduleNextRebalance(whileExecuting: nil)
 ```
@@ -162,7 +163,7 @@ fun executeTransaction(id: UInt64, data: AnyStruct?) {
 The Supervisor runs two steps per execution:
 
 **Step 1 – Stuck detection** (when `scanForStuck == true`):
-Fetches up to `MAX_BATCH_SIZE` candidates from `getStuckScanCandidates(limit:)`, which returns vault IDs starting from the least-recently-executed tail of `stuckScanOrder`. Vaults that are stuck (recurring config set, no active schedule, overdue) are enqueued into `pendingQueue`.
+Fetches up to `MAX_BATCH_SIZE` candidates from `getStuckScanCandidates(limit:)`, which returns recurring scan participants starting from the least-recently-executed tail of `stuckScanOrder`. Vaults that are stuck (recurring config set, no active schedule, overdue) are enqueued into `pendingQueue`.
 
 **Step 2 – Pending processing**:
 Seeds vaults from `pendingQueue` (up to `MAX_BATCH_SIZE` per run via `getPendingYieldVaultIDsPaginated(page: 0, size: UInt(MAX_BATCH_SIZE))`).
@@ -170,7 +171,7 @@ Seeds vaults from `pendingQueue` (up to `MAX_BATCH_SIZE` per run via `getPending
   ```cadence
 access(FlowTransactionScheduler.Execute)
 fun executeTransaction(id: UInt64, data: AnyStruct?) {
-    // STEP 1: scan least-recently-executed vaults for stuck detection
+    // STEP 1: scan least-recently-executed recurring participants for stuck detection
     let candidates = FlowYieldVaultsSchedulerRegistry.getStuckScanCandidates(
         limit: UInt(FlowYieldVaultsSchedulerRegistry.MAX_BATCH_SIZE))
     for yieldVaultID in candidates {
@@ -195,7 +196,7 @@ fun executeTransaction(id: UInt64, data: AnyStruct?) {
 }
   ```
 
-Each AutoBalancer sets a shared `RegistryReportCallback` capability at creation time. On every execution it calls `FlowYieldVaultsSchedulerRegistry.reportExecution(yieldVaultID:)`, which moves the vault to the head of `stuckScanOrder` so the least-recently-executed tail remains the next stuck-scan priority.
+Each AutoBalancer sets a shared `RegistryReportCallback` capability at creation time. On every execution, recurring scan participants call `FlowYieldVaultsSchedulerRegistry.reportExecution(yieldVaultID:)`, which moves the vault to the head of `stuckScanOrder` so the least-recently-executed recurring tail remains the next stuck-scan priority.
 
 ---