orban · orban · Apr 15, 2026
diff --git a/eval-harness/lib/cli.py b/eval-harness/lib/cli.py
@@ -493,7 +493,7 @@ def run(tasks, parallel, category, output, keep_workspaces, dry_run, timeout, ve
         click.echo(f"Cleared index cache at {cache_dir}")
 
     # Determine conditions to run (HUMAN is AGENTbench-only, not used here)
-    YAML_CONDITIONS = [Condition.NONE, Condition.FLAT_LLM, Condition.INTENT_LAYER]
+    YAML_CONDITIONS = [Condition.NONE, Condition.FLAT_LLM, Condition.INTENT_LAYER, Condition.TEST_AFTER_EDIT]
     if condition:
         conditions = [Condition(c) for c in condition]
     else:

diff --git a/eval-harness/lib/prompt_builder.py b/eval-harness/lib/prompt_builder.py
@@ -16,6 +16,10 @@
 
 """
 
+TEST_AFTER_EDIT_PREAMBLE = """CONSTRAINT: Every time you edit a source file, you must immediately run the relevant tests before making any further edits. Read the full test output and use it to decide your next action. Do not batch multiple edits before testing.
+
+"""
+
 
 def build_prompt_from_commit_message(message: str, preamble: str | None = None) -> str:
     """Build a prompt from a git commit message."""

diff --git a/eval-harness/lib/reporter.py b/eval-harness/lib/reporter.py
@@ -29,6 +29,7 @@ class Reporter:
         "flat_llm": "Flat LLM",
         "intent_layer": "Intent Layer",
         "human": "Human",
+        "test_after_edit": "Test After Edit",
     }
 
     def __init__(self, output_dir: str):

diff --git a/eval-harness/lib/task_runner.py b/eval-harness/lib/task_runner.py
@@ -25,6 +25,7 @@
     build_prompt_from_issue,
     FLAT_PREAMBLE,
     INTENT_LAYER_PREAMBLE,
+    TEST_AFTER_EDIT_PREAMBLE,
 )
 from lib.index_cache import IndexCache
 
@@ -142,6 +143,7 @@ class Condition(Enum):
     FLAT_LLM = "flat_llm"
     INTENT_LAYER = "intent_layer"
     HUMAN = "human"
+    TEST_AFTER_EDIT = "test_after_edit"
 
 
 @dataclass
@@ -1165,6 +1167,7 @@ def _build_prompt(self, task: Task, workspace: str, condition: Condition, cached
             Condition.NONE: None,
             Condition.FLAT_LLM: FLAT_PREAMBLE,
             Condition.INTENT_LAYER: INTENT_LAYER_PREAMBLE,
+            Condition.TEST_AFTER_EDIT: TEST_AFTER_EDIT_PREAMBLE,
         }[condition]
 
         if task.prompt_source == "commit_message":

diff --git a/eval-harness/tests/test_task_runner.py b/eval-harness/tests/test_task_runner.py
@@ -145,7 +145,8 @@ def test_condition_enum():
     assert Condition.FLAT_LLM.value == "flat_llm"
     assert Condition.INTENT_LAYER.value == "intent_layer"
     assert Condition.HUMAN.value == "human"
-    assert len(Condition) == 4
+    assert Condition.TEST_AFTER_EDIT.value == "test_after_edit"
+    assert len(Condition) == 5
 
 
 def test_find_agents_files(sample_repo):
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,6 +16,10 @@ @@
     """
+    TEST_AFTER_EDIT_PREAMBLE = """CONSTRAINT: Every time you edit a source file, you must immediately run the relevant tests before making any further edits. Read the full test output and use it to decide your next action. Do not batch multiple edits before testing.
+    """
     def build_prompt_from_commit_message(message: str, preamble: str | None = None) -> str:
         """Build a prompt from a git commit message."""
@@ Expand Down @@