From 740d1c72952299777681e75856f0dd005c701089 Mon Sep 17 00:00:00 2001
From: Ryan Orban <ryan@orban.dev>
Date: Wed, 15 Apr 2026 13:27:50 -0700
Subject: [PATCH] add test-after-edit eval condition

adds a fifth experimental condition that constrains the agent to run
tests after every source-file edit before making further changes. isolates
the effect of tight test-driven feedback loops from context-injection
effects (none/flat_llm/intent_layer).

wired through Condition enum, prompt builder, reporter display labels,
and the run() CLI YAML_CONDITIONS list. test_condition_enum updated to
cover the new variant.
---
 eval-harness/lib/cli.py                | 2 +-
 eval-harness/lib/prompt_builder.py     | 4 ++++
 eval-harness/lib/reporter.py           | 1 +
 eval-harness/lib/task_runner.py        | 3 +++
 eval-harness/tests/test_task_runner.py | 3 ++-
 5 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/eval-harness/lib/cli.py b/eval-harness/lib/cli.py
index accf6e0..6a7d7fe 100644
--- a/eval-harness/lib/cli.py
+++ b/eval-harness/lib/cli.py
@@ -493,7 +493,7 @@ def run(tasks, parallel, category, output, keep_workspaces, dry_run, timeout, ve
         click.echo(f"Cleared index cache at {cache_dir}")
 
     # Determine conditions to run (HUMAN is AGENTbench-only, not used here)
-    YAML_CONDITIONS = [Condition.NONE, Condition.FLAT_LLM, Condition.INTENT_LAYER]
+    YAML_CONDITIONS = [Condition.NONE, Condition.FLAT_LLM, Condition.INTENT_LAYER, Condition.TEST_AFTER_EDIT]
     if condition:
         conditions = [Condition(c) for c in condition]
     else:
diff --git a/eval-harness/lib/prompt_builder.py b/eval-harness/lib/prompt_builder.py
index f6b4959..7aea700 100644
--- a/eval-harness/lib/prompt_builder.py
+++ b/eval-harness/lib/prompt_builder.py
@@ -16,6 +16,10 @@
 
 """
 
+TEST_AFTER_EDIT_PREAMBLE = """CONSTRAINT: Every time you edit a source file, you must immediately run the relevant tests before making any further edits. Read the full test output and use it to decide your next action. Do not batch multiple edits before testing.
+
+"""
+
 
 def build_prompt_from_commit_message(message: str, preamble: str | None = None) -> str:
     """Build a prompt from a git commit message."""
diff --git a/eval-harness/lib/reporter.py b/eval-harness/lib/reporter.py
index d59a1bc..c71c911 100644
--- a/eval-harness/lib/reporter.py
+++ b/eval-harness/lib/reporter.py
@@ -29,6 +29,7 @@ class Reporter:
         "flat_llm": "Flat LLM",
         "intent_layer": "Intent Layer",
         "human": "Human",
+        "test_after_edit": "Test After Edit",
     }
 
     def __init__(self, output_dir: str):
diff --git a/eval-harness/lib/task_runner.py b/eval-harness/lib/task_runner.py
index 500f761..2cddd5f 100644
--- a/eval-harness/lib/task_runner.py
+++ b/eval-harness/lib/task_runner.py
@@ -25,6 +25,7 @@
     build_prompt_from_issue,
     FLAT_PREAMBLE,
     INTENT_LAYER_PREAMBLE,
+    TEST_AFTER_EDIT_PREAMBLE,
 )
 from lib.index_cache import IndexCache
 
@@ -142,6 +143,7 @@ class Condition(Enum):
     FLAT_LLM = "flat_llm"
     INTENT_LAYER = "intent_layer"
     HUMAN = "human"
+    TEST_AFTER_EDIT = "test_after_edit"
 
 
 @dataclass
@@ -1165,6 +1167,7 @@ def _build_prompt(self, task: Task, workspace: str, condition: Condition, cached
             Condition.NONE: None,
             Condition.FLAT_LLM: FLAT_PREAMBLE,
             Condition.INTENT_LAYER: INTENT_LAYER_PREAMBLE,
+            Condition.TEST_AFTER_EDIT: TEST_AFTER_EDIT_PREAMBLE,
         }[condition]
 
         if task.prompt_source == "commit_message":
diff --git a/eval-harness/tests/test_task_runner.py b/eval-harness/tests/test_task_runner.py
index 0f569b2..dd4484d 100644
--- a/eval-harness/tests/test_task_runner.py
+++ b/eval-harness/tests/test_task_runner.py
@@ -145,7 +145,8 @@ def test_condition_enum():
     assert Condition.FLAT_LLM.value == "flat_llm"
     assert Condition.INTENT_LAYER.value == "intent_layer"
     assert Condition.HUMAN.value == "human"
-    assert len(Condition) == 4
+    assert Condition.TEST_AFTER_EDIT.value == "test_after_edit"
+    assert len(Condition) == 5
 
 
 def test_find_agents_files(sample_repo):