From 8a4c25cd991dd1293a787d348b6bf98eadb16e47 Mon Sep 17 00:00:00 2001
From: Devin Oldenburg <158351052+devinoldenburg@users.noreply.github.com>
Date: Sun, 21 Jun 2026 17:12:03 +0000
Subject: [PATCH 1/2] fix: prevent exhaustion loophole and add lean gate mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two critical fixes:

1. Exhaustion loophole — a strong LLM could exhaust every approach,
   convince all reviewers it 'tried everything', and get PASS verdicts
   even though the main goal was never achieved (violates Ralph loop).

   Fix:  now requires evidence coverage — every
   acceptance criterion must have at least one piece of fresh (post-last-edit)
   recorded verification evidence. This is a programmatic gate no LLM can
   persuade past. Configurable via  (default true).

2. Lean gate mode — the full 5+contextual reviewer set consumes significant
   tokens per review cycle. Add  config option (default false)
   that reduces base gates to 3 (prompt-auditor, reviewer, final-auditor)
   and disables contextual gates, dramatically cutting token consumption.
   The evidence-coverage check keeps the exhaustion loophole closed even
   in lean mode.

Also: continuation messages now detect and report uncovered acceptance
criteria, so an agent stuck in an exhaustion loop gets explicit guidance.
---
 plugins/goal-guard/agents.js       |  13 ++++
 plugins/goal-guard/autocontinue.js |  27 ++++++-
 plugins/goal-guard/completion.js   |  23 +++++-
 plugins/goal-guard/config.js       |  16 +++++
 plugins/goal-guard/gates.js        |  50 +++++++++++--
 plugins/goal-guard/summary.js      |   6 +-
 tests/autocontinue.test.mjs        |   1 +
 tests/config.test.mjs              |  12 ++++
 tests/gates.test.mjs               | 112 ++++++++++++++++++++++++++++-
 tests/plugin.test.mjs              |   5 ++
 tests/programmatic-review.test.mjs |   2 +-
 tests/review-runner.test.mjs       |   7 +-
 tests/summary.test.mjs             |   1 +
 13 files changed, 260 insertions(+), 15 deletions(-)

diff --git a/plugins/goal-guard/agents.js b/plugins/goal-guard/agents.js
index a3068ff..7fdcac4 100644
--- a/plugins/goal-guard/agents.js
+++ b/plugins/goal-guard/agents.js
@@ -93,6 +93,19 @@ export const BASE_GATES = Object.freeze([
   "goal-final-auditor",
 ]);
 
+/**
+ * Minimal base-gate set for lean/token-conscious mode. Drops the code-only
+ * diff-reviewer and verifier gates — each full review pass runs O(N) subagent
+ * subtasks (one per required gate) so reducing N by 2 saves two subtask tokens
+ * per cycle. The prompt auditor, correctness reviewer, and final auditor are the
+ * irreducible safety floor (goal alignment + correctness + finality).
+ */
+export const LEAN_BASE_GATES = Object.freeze([
+  "goal-prompt-auditor",
+  "goal-reviewer",
+  "goal-final-auditor",
+]);
+
 /**
  * Gates that only make sense when the goal actually changed code. A research,
  * analysis, explanation, or planning goal produces a text/evidence deliverable
diff --git a/plugins/goal-guard/autocontinue.js b/plugins/goal-guard/autocontinue.js
index 3763445..428a378 100644
--- a/plugins/goal-guard/autocontinue.js
+++ b/plugins/goal-guard/autocontinue.js
@@ -16,7 +16,7 @@
  * `state`, so it is fully unit-testable.
  */
 
-import { completionAllowed, missingGates } from "./gates.js";
+import { completionAllowed, missingGates, evidenceCoverageMet } from "./gates.js";
 
 /** Consecutive no-change idle ticks after which auto-continue pauses for the human. */
 export const NO_PROGRESS_LIMIT = 4;
@@ -54,6 +54,9 @@ export function progressSignature(state, config) {
  * exact reviewer gates via the task tool. */
 export function continuationMessage(state, config) {
   const missing = missingGates(state, config);
+  const criteria = Array.isArray(state?.contract?.acceptanceCriteria) ? state.contract.acceptanceCriteria : [];
+  const hasCriteria = criteria.length > 0;
+  const evidenceOk = evidenceCoverageMet(state);
   const lines = ["The goal is NOT complete — do not stop. Continue working now."];
   if (!state?.contract) {
     lines.push("First, record the Goal Contract with the `goal_contract` tool (title, the original request, and concrete acceptance criteria) so the objective is anchored.");
@@ -61,6 +64,28 @@ export function continuationMessage(state, config) {
   if (state?.dirty) {
     lines.push("There are changes that are not yet reviewed/verified after your latest edits — actually run the code/tests and record it with `goal_evidence`.");
   }
+  if (hasCriteria && !evidenceOk) {
+    const uncovered = criteria.filter((c) => {
+      const full = String(c).trim().toLowerCase();
+      if (!full) return false;
+      const entries = Array.isArray(state?.evidence) ? state.evidence : [];
+      const lastEditSeq = state?.lastEditSeq || 0;
+      return !entries.some((entry) => {
+        const ecriteria = Array.isArray(entry.criteria) ? entry.criteria : [];
+        if (!ecriteria.some((ec) => String(ec).trim().toLowerCase() === full)) return false;
+        if (!entry.seq) return lastEditSeq === 0;
+        return entry.seq > lastEditSeq;
+      });
+    });
+    if (uncovered.length) {
+      lines.push(
+        `EVIDENCE COVERAGE MISSING — every acceptance criterion must have fresh recorded ` +
+        `evidence. Uncovered criteria: ${uncovered.map((c) => `"${c}"`).join(", ")}. ` +
+        `Run verification and record with \`goal_evidence\`, passing each uncovered criterion. ` +
+        `Mere exhaustion of approaches is NOT success — the guard requires proof.`,
+      );
+    }
+  }
   if (missing.length) {
     if (config?.programmaticReview) {
       lines.push(
diff --git a/plugins/goal-guard/completion.js b/plugins/goal-guard/completion.js
index 27b9a7d..4523cfe 100644
--- a/plugins/goal-guard/completion.js
+++ b/plugins/goal-guard/completion.js
@@ -28,7 +28,7 @@
  * adversarial digit-runs cannot trigger polynomial backtracking (issue #367).
  */
 
-import { missingGates, completionAllowed } from "./gates.js";
+import { missingGates, completionAllowed, evidenceCoverageMet } from "./gates.js";
 import { summarizeState } from "./summary.js";
 
 const CYCLES_RE = /Review cycles:\s*(\d+)/gi;
@@ -131,7 +131,26 @@ export function evaluateCompletionClaim(state, config, text) {
     reason = `claimed review cycles (${claimedCycles}) do not match recorded review cycles (${state.reviewCycles})`;
   } else if (!completionAllowed(state, config)) {
     const missing = missingGates(state, config).join(", ");
-    reason = `required review gates are missing or stale (${missing || "goal session not active"})`;
+    if (missing) {
+      reason = `required review gates are missing or stale (${missing})`;
+    } else if (!evidenceCoverageMet(state)) {
+      const criteria = Array.isArray(state?.contract?.acceptanceCriteria) ? state.contract.acceptanceCriteria : [];
+      const uncovered = criteria.filter((c) => {
+        const full = String(c).trim().toLowerCase();
+        if (!full) return false;
+        const entries = Array.isArray(state?.evidence) ? state.evidence : [];
+        const lastEditSeq = state?.lastEditSeq || 0;
+        return !entries.some((entry) => {
+          const ecriteria = Array.isArray(entry.criteria) ? entry.criteria : [];
+          if (!ecriteria.some((ec) => String(ec).trim().toLowerCase() === full)) return false;
+          if (!entry.seq) return lastEditSeq === 0;
+          return entry.seq > lastEditSeq;
+        });
+      });
+      reason = `acceptance criteria lack evidence coverage (${uncovered.map((c) => `"${c}"`).join(", ") || "all criteria"}) — goal not achieved, mere exhaustion is not success`;
+    } else {
+      reason = "goal session not active";
+    }
   }
 
   if (!reason) return { blocked: false, claimedCycles };
diff --git a/plugins/goal-guard/config.js b/plugins/goal-guard/config.js
index f8560df..ed05f4b 100644
--- a/plugins/goal-guard/config.js
+++ b/plugins/goal-guard/config.js
@@ -56,6 +56,18 @@ export const DEFAULT_CONFIG = Object.freeze({
    * explanation goal is gated on evidence instead, never on an empty diff);
    * "always" forces them on; "never" turns them off. */
   requireCodeReview: "auto",
+  /** Require fresh evidence covering every acceptance criterion before completion
+   * is allowed. This is the programmatic exhaustion-prevention gate: a strong LLM
+   * that exhausts every approach and persuades the reviewers it "tried everything"
+   * cannot satisfy this check for criteria it never actually achieved — the guard
+   * demands verified, recorded proof, not statements of effort. Default true. */
+  requireEvidenceCoverage: true,
+  /** Lean mode — reduces the base gate set to the 3 safety-critical reviewers
+   * (prompt-auditor, reviewer, final-auditor) and disables contextual gates, so
+   * each full review round runs fewer subagent subtasks and token consumption per
+   * cycle drops sharply. Combined with the evidence-coverage check this keeps the
+   * exhaustion loophole closed. Default false. */
+  leanGates: false,
   /** Block non-Goal agents from invoking the goal-* subagents via the task tool. */
   restrictSubagents: true,
   /** Maximum tracked sessions before LRU eviction. */
@@ -134,6 +146,8 @@ export const CONFIG_DOCS = Object.freeze({
   // Gates & scope
   contextualGates: { group: "Gates", summary: "Require specialist reviewer gates derived from goal text / changed files." },
   requireCodeReview: { group: "Gates", summary: "When to require the code-only diff/verification gates: 'auto' (only once the goal edits a file), 'always', or 'never'. Lets non-code agentic goals complete on evidence." },
+  requireEvidenceCoverage: { group: "Gates", summary: "Require fresh recorded evidence for every acceptance criterion before completion (exhaustion-prevention gate — the guard demands verified proof, not statements of effort)." },
+  leanGates: { group: "Gates", summary: "Lean mode: reduce the base reviewer set and disable contextual gates for much lower token consumption per review cycle." },
   restrictSubagents: { group: "Gates", summary: "Lock the goal-* subagents to the Goal agent (other agents can't call them)." },
   // State & lifecycle
   injectSystemState: { group: "State", summary: "Inject a live Goal Guard state block into the system prompt." },
@@ -218,6 +232,8 @@ function fromEnv(env) {
     GOAL_GUARD_PERSIST: ["persist", coerceBool],
     GOAL_GUARD_CONTEXTUAL_GATES: ["contextualGates", coerceBool],
     GOAL_GUARD_REQUIRE_CODE_REVIEW: ["requireCodeReview", coerceStr],
+    GOAL_GUARD_REQUIRE_EVIDENCE_COVERAGE: ["requireEvidenceCoverage", coerceBool],
+    GOAL_GUARD_LEAN_GATES: ["leanGates", coerceBool],
     GOAL_GUARD_RESTRICT_SUBAGENTS: ["restrictSubagents", coerceBool],
     GOAL_GUARD_MAX_SESSIONS: ["maxSessions", coerceInt],
     GOAL_GUARD_SESSION_TTL_MS: ["sessionTtlMs", coerceInt],
diff --git a/plugins/goal-guard/gates.js b/plugins/goal-guard/gates.js
index 943c6ce..6a8a1ec 100644
--- a/plugins/goal-guard/gates.js
+++ b/plugins/goal-guard/gates.js
@@ -13,7 +13,7 @@
  *    Re-running verification after a clean review no longer re-opens the gates.
  */
 
-import { BASE_GATES, CODE_GATES, CONTEXTUAL_GATES } from "./agents.js";
+import { BASE_GATES, LEAN_BASE_GATES, CODE_GATES, CONTEXTUAL_GATES } from "./agents.js";
 
 /**
  * Whether this goal has touched code. A goal is code-bearing once it is dirty or has
@@ -79,14 +79,19 @@ export function refreshStickyGates(state) {
 
 /** The reviewers that must PASS for this state, given config. */
 export function requiredGates(state, config) {
+  // Lean mode uses the minimal base-gate set (3 reviewers instead of 5) and
+  // disables contextual gates — drastically reduces token consumption per cycle.
+  const lean = !!(config?.leanGates);
   // Code-only gates (diff review, verification) are required only when the goal
   // actually changed code; a non-code goal is gated on its evidence instead, so it
   // is not blocked forever by an empty `git diff`. `requireCodeReview` overrides the
   // auto-detection: "always" forces them on, "never" off.
   const mode = config?.requireCodeReview || "auto";
   const includeCodeGates = mode === "always" ? true : mode === "never" ? false : isCodeBearing(state);
-  const gates = BASE_GATES.filter((agent) => includeCodeGates || !CODE_GATES.includes(agent));
-  if (!config || config.contextualGates) {
+  const baseSet = lean ? LEAN_BASE_GATES : BASE_GATES;
+  const gates = baseSet.filter((agent) => includeCodeGates || !CODE_GATES.includes(agent));
+  // Contextual gates are disabled in lean mode (token economy).
+  if (!lean && (!config || config.contextualGates)) {
     const contextual = new Set([...(state.stickyGates || []), ...contextualGatesFor(state)]);
     for (const agent of contextual) {
       if (!gates.includes(agent)) gates.push(agent);
@@ -106,6 +111,43 @@ export function missingGates(state, config) {
   return requiredGates(state, config).filter((agent) => !gatePassedFresh(state, agent));
 }
 
+/**
+ * Whether the evidence ledger covers every recorded acceptance criterion with
+ * at least one piece of fresh (post-last-edit) evidence. A goal with no criteria
+ * trivially passes.
+ *
+ * This is a programmatic exhaustion-prevention gate: a strong LLM agent that
+ * exhausts every approach and convinces the reviewers it "tried everything"
+ * cannot fake evidence for criteria it never achieved. The guard requires
+ * VERIFIED, recorded proof — not statements of effort.
+ */
+export function evidenceCoverageMet(state) {
+  const criteria = Array.isArray(state?.contract?.acceptanceCriteria)
+    ? state.contract.acceptanceCriteria
+    : [];
+  if (!criteria.length) return true; // no criteria = nothing to cover
+  const entries = Array.isArray(state?.evidence) ? state.evidence : [];
+  const lastEditSeq = state?.lastEditSeq || 0;
+  for (const criterion of criteria) {
+    const full = String(criterion).trim().toLowerCase();
+    if (!full) continue;
+    const covered = entries.some((entry) => {
+      const ecriteria = Array.isArray(entry.criteria) ? entry.criteria : [];
+      if (!ecriteria.some((c) => String(c).trim().toLowerCase() === full)) return false;
+      // Evidence must be fresh (recorded after the last edit).
+      if (!entry.seq) return lastEditSeq === 0;
+      return entry.seq > lastEditSeq;
+    });
+    if (!covered) return false;
+  }
+  return true;
+}
+
 export function completionAllowed(state, config) {
-  return Boolean(state.active) && missingGates(state, config).length === 0;
+  if (!state.active) return false;
+  if (missingGates(state, config).length !== 0) return false;
+  // Evidence-coverage check: every acceptance criterion must have fresh recorded
+  // evidence — a programmatic gate the reviewers cannot be persuaded past.
+  if (config?.requireEvidenceCoverage !== false && !evidenceCoverageMet(state)) return false;
+  return true;
 }
diff --git a/plugins/goal-guard/summary.js b/plugins/goal-guard/summary.js
index 828f546..cf95243 100644
--- a/plugins/goal-guard/summary.js
+++ b/plugins/goal-guard/summary.js
@@ -3,7 +3,7 @@
  * messages, and the `goal_status` tool. Kept pure and dependency-light.
  */
 
-import { requiredGates, missingGates, gatePassedFresh } from "./gates.js";
+import { requiredGates, missingGates, gatePassedFresh, completionAllowed } from "./gates.js";
 import { prettyAgentName } from "./agents.js";
 
 /**
@@ -99,7 +99,7 @@ export function sidebarView(state, config) {
   const cycles = Number(state.reviewCycles) || 0;
   const gates = `${passing}/${required.length} gates`;
   const todos = sidebarTodos(state, required, missing);
-  const done = required.length > 0 && missing.length === 0 && !state.dirty;
+  const done = completionAllowed(state, config) && !state.dirty;
   if (done) {
     return {
       state: "done",
@@ -190,7 +190,7 @@ export function statusReport(state, config) {
     reviewerMemory: reviewerMemoryReport(state),
     changedFiles: state.changedFiles.slice(-50),
     contract: state.contract,
-    completionAllowed: Boolean(state.active) && missing.length === 0,
+    completionAllowed: completionAllowed(state, config),
   };
 }
 
diff --git a/tests/autocontinue.test.mjs b/tests/autocontinue.test.mjs
index f303bef..4a45d47 100644
--- a/tests/autocontinue.test.mjs
+++ b/tests/autocontinue.test.mjs
@@ -56,6 +56,7 @@ test("a COMPLETE goal does not auto-continue and resets the counters", () => {
     reviewCycles: 1,
     autoContinueCount: 7,
     autoContinueNoProgress: 3,
+    evidence: [{ command: "test", result: "pass", criteria: ["done"], seq: 2 }],
   });
   const d = evaluateAutoContinue(st, DEFAULT_CONFIG);
   assert.equal(d.continue, false);
diff --git a/tests/config.test.mjs b/tests/config.test.mjs
index 8aa70de..ab47b2b 100644
--- a/tests/config.test.mjs
+++ b/tests/config.test.mjs
@@ -128,3 +128,15 @@ test("REGRESSION: a degenerate maxSessions (0/negative) falls back to the defaul
   assert.equal(resolveConfig({ maxSessions: 25 }).maxSessions, 25);
   assert.equal(resolveConfig({}, { GOAL_GUARD_MAX_SESSIONS: "0" }).maxSessions, DEFAULT_CONFIG.maxSessions);
 });
+
+test("requireEvidenceCoverage defaults to true and is configurable", () => {
+  assert.equal(DEFAULT_CONFIG.requireEvidenceCoverage, true);
+  assert.equal(resolveConfig({ requireEvidenceCoverage: false }).requireEvidenceCoverage, false);
+  assert.equal(resolveConfig(undefined, { GOAL_GUARD_REQUIRE_EVIDENCE_COVERAGE: "off" }).requireEvidenceCoverage, false);
+});
+
+test("leanGates defaults to false and is configurable", () => {
+  assert.equal(DEFAULT_CONFIG.leanGates, false);
+  assert.equal(resolveConfig({ leanGates: true }).leanGates, true);
+  assert.equal(resolveConfig(undefined, { GOAL_GUARD_LEAN_GATES: "1" }).leanGates, true);
+});
diff --git a/tests/gates.test.mjs b/tests/gates.test.mjs
index 23d4272..b396e22 100644
--- a/tests/gates.test.mjs
+++ b/tests/gates.test.mjs
@@ -1,8 +1,8 @@
 import test from "node:test";
 import assert from "node:assert/strict";
 import { createState } from "../plugins/goal-guard/state.js";
-import { requiredGates, gatePassedFresh, missingGates, completionAllowed, refreshStickyGates, isCodeBearing } from "../plugins/goal-guard/gates.js";
-import { BASE_GATES, CODE_GATES, CONTEXTUAL_GATES } from "../plugins/goal-guard/agents.js";
+import { requiredGates, gatePassedFresh, missingGates, completionAllowed, refreshStickyGates, isCodeBearing, evidenceCoverageMet } from "../plugins/goal-guard/gates.js";
+import { BASE_GATES, LEAN_BASE_GATES, CODE_GATES, CONTEXTUAL_GATES } from "../plugins/goal-guard/agents.js";
 
 const cfg = { contextualGates: true };
 
@@ -154,3 +154,111 @@ test("refreshStickyGates persists a gate even after the keyword disappears", ()
   st.goalText = "rename a variable";
   assert.ok(requiredGates(st, cfg).includes("goal-data-reviewer"), "sticky gate survives keyword loss");
 });
+
+// ── Evidence coverage (exhaustion-prevention gate) ──────────────────
+
+test("evidenceCoverageMet: true when no acceptance criteria exist", () => {
+  const st = createState();
+  st.contract = { acceptanceCriteria: [] };
+  assert.equal(evidenceCoverageMet(st), true);
+  st.contract = null;
+  assert.equal(evidenceCoverageMet(st), true);
+});
+
+test("evidenceCoverageMet: false when criteria have no evidence", () => {
+  const st = createState();
+  st.contract = { acceptanceCriteria: ["the thing works", "logs are clean"] };
+  assert.equal(evidenceCoverageMet(st), false);
+});
+
+test("evidenceCoverageMet: true when all criteria have fresh evidence", () => {
+  const st = createState();
+  st.lastEditSeq = 3;
+  st.contract = { acceptanceCriteria: ["the thing works", "logs are clean"] };
+  st.evidence = [
+    { command: "test a", result: "pass", criteria: ["the thing works"], seq: 4 },
+    { command: "test b", result: "pass", criteria: ["logs are clean"], seq: 5 },
+  ];
+  assert.equal(evidenceCoverageMet(st), true);
+});
+
+test("evidenceCoverageMet: false when evidence is stale (recorded before last edit)", () => {
+  const st = createState();
+  st.lastEditSeq = 10;
+  st.contract = { acceptanceCriteria: ["the thing works"] };
+  st.evidence = [
+    { command: "old test", result: "pass", criteria: ["the thing works"], seq: 5 },
+  ];
+  assert.equal(evidenceCoverageMet(st), false);
+});
+
+test("evidenceCoverageMet: case-insensitive criterion matching", () => {
+  const st = createState();
+  st.contract = { acceptanceCriteria: ["THE THING WORKS"] };
+  st.evidence = [
+    { command: "test", result: "pass", criteria: ["the thing works"], seq: 1 },
+  ];
+  assert.equal(evidenceCoverageMet(st), true);
+});
+
+test("completionAllowed: blocked when evidence coverage is missing (requireEvidenceCoverage default true)", () => {
+  const st = createState();
+  st.active = true;
+  st.lastEditSeq = 1;
+  st.contract = { acceptanceCriteria: ["unmet criterion"] };
+  for (const g of BASE_GATES) withVerdict(st, g, "PASS", 2);
+  // All gates pass, but no evidence → completion should NOT be allowed.
+  assert.equal(completionAllowed(st, cfg), false);
+});
+
+test("completionAllowed: evidence coverage check can be disabled via config", () => {
+  const st = createState();
+  st.active = true;
+  st.lastEditSeq = 1;
+  st.contract = { acceptanceCriteria: ["unmet criterion"] };
+  for (const g of BASE_GATES) withVerdict(st, g, "PASS", 2);
+  // Disable evidence coverage requirement.
+  assert.equal(completionAllowed(st, { ...cfg, requireEvidenceCoverage: false }), true);
+});
+
+test("completionAllowed: evidence coverage is satisfied with evidence (default config)", () => {
+  const st = createState();
+  st.active = true;
+  st.lastEditSeq = 1;
+  st.contract = { acceptanceCriteria: ["met criterion"] };
+  st.evidence = [{ command: "test", result: "pass", criteria: ["met criterion"], seq: 2 }];
+  for (const g of BASE_GATES) withVerdict(st, g, "PASS", 2);
+  assert.equal(completionAllowed(st, cfg), true);
+});
+
+// ── Lean gates (reduced checker set) ────────────────────────────────
+
+test("leanGates uses LEAN_BASE_GATES instead of BASE_GATES", () => {
+  const st = createState();
+  st.dirty = true; // code-bearing so code gates would normally be included
+  const leanCfg = { ...cfg, leanGates: true };
+  const gates = requiredGates(st, leanCfg);
+  for (const g of LEAN_BASE_GATES) assert.ok(gates.includes(g), `lean must include ${g}`);
+  // The code-only gates (diff-reviewer, verifier) must NOT be in the lean set.
+  assert.equal(gates.includes("goal-diff-reviewer"), false, "lean mode drops diff-reviewer");
+  assert.equal(gates.includes("goal-verifier"), false, "lean mode drops verifier");
+});
+
+test("leanGates disables contextual gates", () => {
+  const st = createState();
+  st.goalText = "add an auth endpoint and a database migration";
+  const leanCfg = { ...cfg, leanGates: true };
+  const gates = requiredGates(st, leanCfg);
+  assert.equal(gates.includes("goal-security-reviewer"), false, "lean mode drops contextual security");
+  assert.equal(gates.includes("goal-api-reviewer"), false, "lean mode drops contextual api");
+  assert.equal(gates.includes("goal-data-reviewer"), false, "lean mode drops contextual data");
+});
+
+test("leanGates only includes LEAN_BASE_GATES for a code-bearing goal", () => {
+  const st = createState();
+  st.dirty = true;
+  const leanCfg = { ...cfg, leanGates: true };
+  const gates = requiredGates(st, leanCfg);
+  assert.deepEqual(gates.length, LEAN_BASE_GATES.length, "lean gates should equal the lean base set");
+  assert.deepEqual(gates.sort(), [...LEAN_BASE_GATES].sort());
+});
diff --git a/tests/plugin.test.mjs b/tests/plugin.test.mjs
index f6200a4..fb0e7e0 100644
--- a/tests/plugin.test.mjs
+++ b/tests/plugin.test.mjs
@@ -744,6 +744,11 @@ test("a NEW goal in the same session does not inherit the previous goal's gates/
     { title: "Polish the welcome banner", original: "make the welcome banner look nicer", acceptanceCriteria: ["the banner greeting reads well"] },
     { sessionID: "switch" },
   );
+  // Record evidence so completionAllowed (which now checks evidence coverage) allows the goal to complete.
+  await tools.goal_evidence.execute(
+    { command: "check banner", result: "pass", criteria: ["the banner greeting reads well"] },
+    { sessionID: "switch" },
+  );
   await passAllBaseGates(guard.hooks, guard.store, "switch");
   let view = sidebarView(guard.store.stateFor("switch"), guard.config);
   assert.equal(view.goal, "Polish the welcome banner");
diff --git a/tests/programmatic-review.test.mjs b/tests/programmatic-review.test.mjs
index 1c36cfc..1af7a47 100644
--- a/tests/programmatic-review.test.mjs
+++ b/tests/programmatic-review.test.mjs
@@ -139,7 +139,7 @@ test("[live-parity] the guard runs programmatic reviews when only goal_evidence
   await guard.hooks["chat.params"]({ sessionID: "g", agent: "goal", model: MODEL }, {});
   await guard.hooks["chat.message"]({ sessionID: "g", agent: "goal", model: MODEL }, { parts: [{ type: "text", text: "verify remote server setup" }] });
   await tools.goal_contract.execute({ title: "Remote verify", original: "verify remote server setup", acceptanceCriteria: ["server reachable"] }, { sessionID: "g" });
-  await tools.goal_evidence.execute({ command: "curl -fsS https://example.com", result: "PASS" }, { sessionID: "g" });
+  await tools.goal_evidence.execute({ command: "curl -fsS https://example.com", result: "PASS", criteria: ["server reachable"] }, { sessionID: "g" });
   await guard.hooks.event({ event: { type: "session.idle", properties: { sessionID: "g" } } });
   // No file edits → a non-code goal, gated on its evidence by the always-on
   // reviewers (prompt-auditor, reviewer, final-auditor); review still runs.
diff --git a/tests/review-runner.test.mjs b/tests/review-runner.test.mjs
index b921c31..dd53b88 100644
--- a/tests/review-runner.test.mjs
+++ b/tests/review-runner.test.mjs
@@ -1,7 +1,7 @@
 import test from "node:test";
 import assert from "node:assert/strict";
 import { createStore } from "../plugins/goal-guard/state.js";
-import { markEdit } from "../plugins/goal-guard/events.js";
+import { markEdit, recordEvidence } from "../plugins/goal-guard/events.js";
 import { DEFAULT_CONFIG } from "../plugins/goal-guard/config.js";
 import { completionAllowed } from "../plugins/goal-guard/gates.js";
 import { runReviewCycle, clientCanReview, ensureReviewClient, reviewerPrompt } from "../plugins/goal-guard/review-runner.js";
@@ -49,6 +49,8 @@ function goalState(store, id) {
   state.active = true;
   state.contract = { title: "Do x", original: "do x", acceptanceCriteria: ["x works"] };
   markEdit(store, state, "edit"); // there is work to review
+  // Record evidence so the exhaustion-prevention gate (evidenceCoverageMet) is satisfied.
+  recordEvidence(store, state, "test command", "PASS", ["x works"]);
   return state;
 }
 
@@ -117,7 +119,8 @@ test("the cycle: FAIL → fix(edit) → re-review PASS counts 2 review cycles",
   assert.equal(r1.completionAllowed, false);
   assert.equal(state.reviewCycles, 1);
   markEdit(store, state, "fix after review");
-  assert.equal(completionAllowed(state, DEFAULT_CONFIG), false);
+  recordEvidence(store, state, "test command", "PASS", ["x works"]); // re-verify after the edit
+  assert.equal(completionAllowed(state, DEFAULT_CONFIG), false); // evidence fresh but gates still stale from edit
   const r2 = await runReviewCycle(mockReviewClient("PASS", sid), store, state, DEFAULT_CONFIG, fastOpts(sid));
   assert.equal(r2.completionAllowed, true);
   assert.equal(state.reviewCycles, 2);
diff --git a/tests/summary.test.mjs b/tests/summary.test.mjs
index e592ab9..6897e15 100644
--- a/tests/summary.test.mjs
+++ b/tests/summary.test.mjs
@@ -165,6 +165,7 @@ test("#291 sidebarView: a fully-passed, clean goal renders as done", () => {
   const st = createState();
   st.active = true;
   st.contract = { title: "Done goal", acceptanceCriteria: ["it works"] };
+  st.evidence = [{ command: "npm test", result: "pass", criteria: ["it works"], seq: 1 }];
   for (const gate of BASE_GATES) recordVerdict(store, st, gate, "PASS");
   st.dirty = false;
   const view = sidebarView(st, CFG);

From e0127ed80690a822b98170b30ed0d48201a422d3 Mon Sep 17 00:00:00 2001
From: Devin Oldenburg <158351052+devinoldenburg@users.noreply.github.com>
Date: Sun, 21 Jun 2026 17:31:38 +0000
Subject: [PATCH 2/2] fix(test): update deep-bughunt allowedState helper to
 record evidence for acceptance criteria

---
 tests/deep-bughunt.test.mjs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/deep-bughunt.test.mjs b/tests/deep-bughunt.test.mjs
index ce0d931..d82ded4 100644
--- a/tests/deep-bughunt.test.mjs
+++ b/tests/deep-bughunt.test.mjs
@@ -11,7 +11,7 @@ import { evaluateCompletionClaim } from "../plugins/goal-guard/completion.js";
 import { analyzeCommand } from "../plugins/goal-guard/shell.js";
 import { goalSimilarity, SAME_GOAL_THRESHOLD, createGoalTools } from "../plugins/goal-guard/tools.js";
 import { createStore, createState } from "../plugins/goal-guard/state.js";
-import { markEdit } from "../plugins/goal-guard/events.js";
+import { markEdit, recordEvidence } from "../plugins/goal-guard/events.js";
 import { runReviewCycle } from "../plugins/goal-guard/review-runner.js";
 import { DEFAULT_CONFIG } from "../plugins/goal-guard/config.js";
 import { completionAllowed } from "../plugins/goal-guard/gates.js";
@@ -59,6 +59,7 @@ async function allowedState(id) {
   state.active = true;
   state.contract = { title: "Do x", original: "do x", acceptanceCriteria: ["x works"] };
   markEdit(store, state, "edit");
+  recordEvidence(store, state, "test", "PASS", ["x works"]);
   await runReviewCycle(mockReviewClient("PASS", id), store, state, DEFAULT_CONFIG, { sessionID: id, sleep: async () => {}, pollMs: 1, timeoutMs: 500 });
   return state;
 }