Wool-xing · Wool-xing · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/runtime/cli/completer.py b/runtime/cli/completer.py
@@ -1,7 +1,6 @@
 """Tab completion for interactive REPL — slash commands + paths.
 
-Uses prompt_toolkit (same as Hermes Agent CLI).
-Provides: slash command completion, path completion, session command completion.
+Provides slash command completion, path completion, session command completion.
 """
 
 from __future__ import annotations

diff --git a/runtime/cli/interactive.py b/runtime/cli/interactive.py
@@ -1,4 +1,4 @@
-"""Interactive REPL — Claude Code / Hermes Agent / OpenClaw style.
+"""Interactive REPL — terminal-based testing agent.
 
 Bare `tagent` enters interactive session:
   - Natural language → LLM routing → streaming activity feed
@@ -134,7 +134,7 @@ def _print_help() -> None:
 
 
 def _handle_natural_language(text: str) -> None:
-    """Route through LLM with streaming activity output (Claude Code style)."""
+    """Route through LLM with streaming activity output."""
     if not text.strip():
         return
 

diff --git a/runtime/cli/slash_commands.py b/runtime/cli/slash_commands.py
@@ -1,7 +1,6 @@
 """Slash command registry — single source of truth.
 
-Modeled after Hermes Agent's COMMAND_REGISTRY (hermes_cli/commands.py).
-One registry drives: CLI autocomplete, help output, command dispatch.
+Single registry drives CLI autocomplete, help output, command dispatch.
 """
 
 from __future__ import annotations

diff --git a/runtime/orchestrator/workflows/__init__.py b/runtime/orchestrator/workflows/__init__.py
@@ -0,0 +1 @@
+"""Workflow orchestration — fixed-pipeline test coordinator and gate enforcement."""
diff --git a/runtime/orchestrator/workflows/gates.py b/runtime/orchestrator/workflows/gates.py
@@ -0,0 +1,83 @@
+"""Gate enforcement for test-coordinator pipeline.
+
+Pure functions. Each gate inspects test result metrics and returns PASS/WARN/BLOCK.
+Thresholds from skills/test-coordinator.md.
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+
+
+class GateResult(str, Enum):
+    PASS = "pass"
+    WARN = "warn"
+    BLOCK = "block"
+
+
+def check_smoke_gate(
+    p0_total: int = 0,
+    p0_passed: int = 0,
+    new_p0_bugs: int = 0,
+    threshold: float = 0.95,
+) -> GateResult:
+    """Smoke gate: P0 pass rate >= threshold AND 0 new P0 bugs.
+
+    Args:
+        p0_total: Total P0 test cases run
+        p0_passed: Number of P0 tests that passed
+        new_p0_bugs: New P0 bugs found during smoke
+        threshold: Minimum pass rate (default 0.95 = 95%)
+    """
+    if new_p0_bugs > 0:
+        return GateResult.BLOCK
+    if p0_total == 0:
+        return GateResult.BLOCK  # nothing tested
+    rate = p0_passed / p0_total
+    if rate >= threshold:
+        return GateResult.PASS
+    return GateResult.BLOCK
+
+
+def check_regression_gate(
+    total: int = 0,
+    passed: int = 0,
+    failed: int = 0,
+    threshold: float = 0.90,
+) -> GateResult:
+    """Regression gate: overall pass rate >= threshold.
+
+    Args:
+        total: Total test cases
+        passed: Passed test cases
+        failed: Failed test cases
+        threshold: Minimum pass rate (default 0.90)
+    """
+    if total == 0:
+        return GateResult.BLOCK
+    rate = passed / total
+    if rate >= threshold:
+        return GateResult.PASS
+    return GateResult.BLOCK
+
+
+def check_perf_gate(
+    avg_response_ms: float = 0,
+    p95_response_ms: float = 0,
+    mode: str = "ci_quick",
+) -> GateResult:
+    """Performance gate: thresholds differ by mode.
+
+    ci_quick: avg < 500ms, p95 < 1000ms
+    full:     avg < 2000ms, p95 < 5000ms
+    """
+    if mode == "full":
+        avg_ok = avg_response_ms <= 2000
+        p95_ok = p95_response_ms <= 5000
+    else:
+        avg_ok = avg_response_ms <= 500
+        p95_ok = p95_response_ms <= 1000
+
+    if avg_ok and p95_ok:
+        return GateResult.PASS
+    return GateResult.BLOCK
diff --git a/runtime/tests/test_conversation.py b/runtime/tests/test_conversation.py
@@ -0,0 +1,100 @@
+"""TDD: ConversationMemory unit tests — RED phase (tests first)."""
+
+from __future__ import annotations
+
+import json
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from runtime.cli.conversation import ConversationMemory, Message
+
+
+class TestConversationMemory:
+    """Test ConversationMemory: add, truncate, context, dump/load, clear."""
+
+    def test_add_and_retrieve_messages(self):
+        """add() stores messages; messages property returns them."""
+        mem = ConversationMemory()
+        mem.add("user", "test the login page")
+        mem.add("assistant", "routing to requirements-analyst...")
+
+        assert len(mem.messages) == 2
+        assert mem.messages[0].role == "user"
+        assert mem.messages[0].content == "test the login page"
+        assert mem.messages[1].role == "assistant"
+
+    def test_build_context_wraps_history(self):
+        """build_context() formats history + current input for the LLM prompt."""
+        mem = ConversationMemory()
+        mem.add("user", "test login")
+        mem.add("assistant", "done, found 2 bugs")
+
+        ctx = mem.build_context("also test register")
+        assert "test login" in ctx
+        assert "done, found 2 bugs" in ctx
+        assert "also test register" in ctx
+        assert "Previous conversation" in ctx
+
+    def test_build_context_empty_memory(self):
+        """build_context() with no history returns just the current input."""
+        mem = ConversationMemory()
+        ctx = mem.build_context("test login")
+        assert ctx == "test login"
+
+    def test_max_turns_truncation(self):
+        """Sliding window: oldest messages dropped when exceeding max_turns."""
+        mem = ConversationMemory(max_turns=4)
+        for i in range(6):
+            mem.add("user", f"msg {i}")
+
+        assert len(mem.messages) == 4
+        assert mem.messages[0].content == "msg 2"
+        assert mem.messages[-1].content == "msg 5"
+
+    def test_max_chars_truncation(self):
+        """Character budget: oldest messages dropped until under limit."""
+        mem = ConversationMemory(max_chars=100)
+        mem.add("user", "A" * 60)
+        mem.add("assistant", "B" * 60)  # 120 total, drops first
+
+        assert len(mem.messages) == 1
+        assert mem.messages[0].content == "B" * 60
+
+    def test_dump_and_load_roundtrip(self):
+        """dump() writes JSON; load() restores identical state."""
+        mem = ConversationMemory(session_id="test-123")
+        mem.add("user", "hello")
+        mem.add("assistant", "hi there")
+
+        with tempfile.TemporaryDirectory() as td:
+            path = Path(td) / "session.json"
+            mem.dump(path)
+
+            restored = ConversationMemory.load(path)
+            assert restored.session_id == "test-123"
+            assert len(restored.messages) == 2
+            assert restored.messages[0].content == "hello"
+
+    def test_load_nonexistent_file(self):
+        """load() on missing file returns fresh ConversationMemory."""
+        mem = ConversationMemory.load(Path("/nonexistent/path.json"))
+        assert len(mem.messages) == 0
+        assert mem.session_id != ""
+
+    def test_clear_resets_memory(self):
+        """clear() removes all messages, keeps session_id."""
+        mem = ConversationMemory(session_id="keep-me")
+        mem.add("user", "something")
+        mem.clear()
+
+        assert len(mem.messages) == 0
+        assert mem.session_id == "keep-me"
+
+    def test_message_dataclass(self):
+        """Message stores role, content, and auto-generates timestamp."""
+        msg = Message(role="user", content="hello")
+        assert msg.role == "user"
+        assert msg.content == "hello"
+        assert msg.ts is not None
diff --git a/runtime/tests/test_test_coordinator_workflow.py b/runtime/tests/test_test_coordinator_workflow.py
@@ -0,0 +1,125 @@
+"""Characterization tests: TestCoordinatorPipeline — 11-step workflow."""
+
+from __future__ import annotations
+
+import pytest
+
+
+class TestPipelineStructure:
+    def test_sequence_has_11_steps(self):
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        assert len(TestCoordinatorPipeline.SEQUENCE) == 11
+
+    def test_first_step_is_requirements_analyst(self):
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        name, kind = TestCoordinatorPipeline.SEQUENCE[0]
+        assert name == "requirements-analyst"
+        assert kind == "expert"
+
+    def test_last_step_is_test_lead(self):
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        name, kind = TestCoordinatorPipeline.SEQUENCE[-1]
+        assert name == "test-lead"
+        assert kind == "expert"
+
+    def test_all_steps_have_valid_kinds(self):
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        for name, kind in TestCoordinatorPipeline.SEQUENCE:
+            assert kind in ("expert", "skill"), f"{name}: invalid kind {kind}"
+
+    def test_no_duplicate_step_names(self):
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        names = [n for n, _ in TestCoordinatorPipeline.SEQUENCE]
+        assert len(names) == len(set(names)), f"Duplicate steps: {names}"
+
+
+class TestPreflight:
+    def test_preflight_checks_python_version(self):
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        p = TestCoordinatorPipeline()
+        missing = p._preflight()
+        # Python 3.10+ on all modern systems → should be empty
+        assert isinstance(missing, list)
+
+    def test_preflight_returns_list(self):
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        p = TestCoordinatorPipeline()
+        result = p._preflight()
+        assert isinstance(result, list)
+
+
+class TestPipelineResult:
+    def test_pipeline_result_defaults(self):
+        from runtime.orchestrator.workflows.test_coordinator import PipelineResult
+        r = PipelineResult(ok=True)
+        assert r.ok is True
+        assert r.steps == []
+        assert r.aborted_at is None
+        assert r.summary == ""
+
+    def test_pipeline_step_defaults(self):
+        from runtime.orchestrator.workflows.test_coordinator import PipelineStep
+        s = PipelineStep(name="test-step", kind="expert")
+        assert s.name == "test-step"
+        assert s.kind == "expert"
+        assert s.status == "pending"
+
+
+class TestGateIntegration:
+    def test_check_gates_with_empty_metrics_blocks(self):
+        """Empty metrics dict → gate values are 0 → gates BLOCK."""
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        p = TestCoordinatorPipeline()
+
+        # smoke-test with no metrics → 0/0 tests → BLOCK
+        result = p._check_gates("smoke-test", {"metrics": {}})
+        assert result is not None  # should block
+
+        # test-executor with no metrics → 0/0 → BLOCK
+        result2 = p._check_gates("test-executor", {"metrics": {}})
+        assert result2 is not None
+
+    def test_check_gates_passing_metrics(self):
+        """With passing metrics, gates should return None (no block)."""
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        p = TestCoordinatorPipeline()
+
+        result = p._check_gates("smoke-test", {
+            "metrics": {"p0_total": 100, "p0_passed": 98, "new_p0_bugs": 0}
+        })
+        assert result is None  # 98% > 95% → pass
+
+    def test_check_gates_failing_smoke(self):
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        p = TestCoordinatorPipeline()
+
+        result = p._check_gates("smoke-test", {
+            "metrics": {"p0_total": 100, "p0_passed": 80, "new_p0_bugs": 2}
+        })
+        assert result is not None  # 80% < 95% + bugs → block
+
+    def test_check_gates_unknown_step_passes(self):
+        """Steps not in gate logic return None."""
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        p = TestCoordinatorPipeline()
+        result = p._check_gates("requirements-analyst", {"metrics": {}})
+        assert result is None
+
+
+class TestPipelineRun:
+    def test_run_creates_result(self):
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        p = TestCoordinatorPipeline()
+        result = p.run("test target")
+        assert result is not None
+        assert isinstance(result.summary, str)
+
+    def test_run_aborted_preflight(self):
+        """Simulate preflight failure by checking workspace."""
+        from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
+        from unittest.mock import patch
+        p = TestCoordinatorPipeline()
+        with patch.object(p, '_preflight', return_value=["missing dep"]):
+            result = p.run("test")
+            assert result.ok is False
+            assert result.aborted_at == "preflight"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		"""Workflow orchestration — fixed-pipeline test coordinator and gate enforcement."""