Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions runtime/cli/completer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Tab completion for interactive REPL — slash commands + paths.

Uses prompt_toolkit (same as Hermes Agent CLI).
Provides: slash command completion, path completion, session command completion.
Provides slash command completion, path completion, session command completion.
"""

from __future__ import annotations
Expand Down
4 changes: 2 additions & 2 deletions runtime/cli/interactive.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Interactive REPL — Claude Code / Hermes Agent / OpenClaw style.
"""Interactive REPL — terminal-based testing agent.

Bare `tagent` enters interactive session:
- Natural language → LLM routing → streaming activity feed
Expand Down Expand Up @@ -134,7 +134,7 @@ def _print_help() -> None:


def _handle_natural_language(text: str) -> None:
"""Route through LLM with streaming activity output (Claude Code style)."""
"""Route through LLM with streaming activity output."""
if not text.strip():
return

Expand Down
3 changes: 1 addition & 2 deletions runtime/cli/slash_commands.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Slash command registry — single source of truth.

Modeled after Hermes Agent's COMMAND_REGISTRY (hermes_cli/commands.py).
One registry drives: CLI autocomplete, help output, command dispatch.
Single registry drives CLI autocomplete, help output, command dispatch.
"""

from __future__ import annotations
Expand Down
1 change: 1 addition & 0 deletions runtime/orchestrator/workflows/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Workflow orchestration — fixed-pipeline test coordinator and gate enforcement."""
83 changes: 83 additions & 0 deletions runtime/orchestrator/workflows/gates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Gate enforcement for test-coordinator pipeline.

Pure functions. Each gate inspects test result metrics and returns PASS/WARN/BLOCK.
Thresholds from skills/test-coordinator.md.
"""

from __future__ import annotations

from enum import Enum


class GateResult(str, Enum):
PASS = "pass"
WARN = "warn"
BLOCK = "block"


def check_smoke_gate(
p0_total: int = 0,
p0_passed: int = 0,
new_p0_bugs: int = 0,
threshold: float = 0.95,
) -> GateResult:
"""Smoke gate: P0 pass rate >= threshold AND 0 new P0 bugs.

Args:
p0_total: Total P0 test cases run
p0_passed: Number of P0 tests that passed
new_p0_bugs: New P0 bugs found during smoke
threshold: Minimum pass rate (default 0.95 = 95%)
"""
if new_p0_bugs > 0:
return GateResult.BLOCK
if p0_total == 0:
return GateResult.BLOCK # nothing tested
rate = p0_passed / p0_total
if rate >= threshold:
return GateResult.PASS
return GateResult.BLOCK


def check_regression_gate(
total: int = 0,
passed: int = 0,
failed: int = 0,
threshold: float = 0.90,
) -> GateResult:
"""Regression gate: overall pass rate >= threshold.

Args:
total: Total test cases
passed: Passed test cases
failed: Failed test cases
threshold: Minimum pass rate (default 0.90)
"""
if total == 0:
return GateResult.BLOCK
rate = passed / total
if rate >= threshold:
return GateResult.PASS
return GateResult.BLOCK


def check_perf_gate(
avg_response_ms: float = 0,
p95_response_ms: float = 0,
mode: str = "ci_quick",
) -> GateResult:
"""Performance gate: thresholds differ by mode.

ci_quick: avg < 500ms, p95 < 1000ms
full: avg < 2000ms, p95 < 5000ms
"""
if mode == "full":
avg_ok = avg_response_ms <= 2000
p95_ok = p95_response_ms <= 5000
else:
avg_ok = avg_response_ms <= 500
p95_ok = p95_response_ms <= 1000

if avg_ok and p95_ok:
return GateResult.PASS
return GateResult.BLOCK
100 changes: 100 additions & 0 deletions runtime/tests/test_conversation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""TDD: ConversationMemory unit tests — RED phase (tests first)."""

from __future__ import annotations

import json
import tempfile
from pathlib import Path

import pytest

from runtime.cli.conversation import ConversationMemory, Message


class TestConversationMemory:
"""Test ConversationMemory: add, truncate, context, dump/load, clear."""

def test_add_and_retrieve_messages(self):
"""add() stores messages; messages property returns them."""
mem = ConversationMemory()
mem.add("user", "test the login page")
mem.add("assistant", "routing to requirements-analyst...")

assert len(mem.messages) == 2
assert mem.messages[0].role == "user"
assert mem.messages[0].content == "test the login page"
assert mem.messages[1].role == "assistant"

def test_build_context_wraps_history(self):
"""build_context() formats history + current input for the LLM prompt."""
mem = ConversationMemory()
mem.add("user", "test login")
mem.add("assistant", "done, found 2 bugs")

ctx = mem.build_context("also test register")
assert "test login" in ctx
assert "done, found 2 bugs" in ctx
assert "also test register" in ctx
assert "Previous conversation" in ctx

def test_build_context_empty_memory(self):
"""build_context() with no history returns just the current input."""
mem = ConversationMemory()
ctx = mem.build_context("test login")
assert ctx == "test login"

def test_max_turns_truncation(self):
"""Sliding window: oldest messages dropped when exceeding max_turns."""
mem = ConversationMemory(max_turns=4)
for i in range(6):
mem.add("user", f"msg {i}")

assert len(mem.messages) == 4
assert mem.messages[0].content == "msg 2"
assert mem.messages[-1].content == "msg 5"

def test_max_chars_truncation(self):
"""Character budget: oldest messages dropped until under limit."""
mem = ConversationMemory(max_chars=100)
mem.add("user", "A" * 60)
mem.add("assistant", "B" * 60) # 120 total, drops first

assert len(mem.messages) == 1
assert mem.messages[0].content == "B" * 60

def test_dump_and_load_roundtrip(self):
"""dump() writes JSON; load() restores identical state."""
mem = ConversationMemory(session_id="test-123")
mem.add("user", "hello")
mem.add("assistant", "hi there")

with tempfile.TemporaryDirectory() as td:
path = Path(td) / "session.json"
mem.dump(path)

restored = ConversationMemory.load(path)
assert restored.session_id == "test-123"
assert len(restored.messages) == 2
assert restored.messages[0].content == "hello"

def test_load_nonexistent_file(self):
"""load() on missing file returns fresh ConversationMemory."""
mem = ConversationMemory.load(Path("/nonexistent/path.json"))
assert len(mem.messages) == 0
assert mem.session_id != ""

def test_clear_resets_memory(self):
"""clear() removes all messages, keeps session_id."""
mem = ConversationMemory(session_id="keep-me")
mem.add("user", "something")
mem.clear()

assert len(mem.messages) == 0
assert mem.session_id == "keep-me"

def test_message_dataclass(self):
"""Message stores role, content, and auto-generates timestamp."""
msg = Message(role="user", content="hello")
assert msg.role == "user"
assert msg.content == "hello"
assert msg.ts is not None
125 changes: 125 additions & 0 deletions runtime/tests/test_test_coordinator_workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""Characterization tests: TestCoordinatorPipeline — 11-step workflow."""

from __future__ import annotations

import pytest


class TestPipelineStructure:
def test_sequence_has_11_steps(self):
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
assert len(TestCoordinatorPipeline.SEQUENCE) == 11

def test_first_step_is_requirements_analyst(self):
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
name, kind = TestCoordinatorPipeline.SEQUENCE[0]
assert name == "requirements-analyst"
assert kind == "expert"

def test_last_step_is_test_lead(self):
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
name, kind = TestCoordinatorPipeline.SEQUENCE[-1]
assert name == "test-lead"
assert kind == "expert"

def test_all_steps_have_valid_kinds(self):
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
for name, kind in TestCoordinatorPipeline.SEQUENCE:
assert kind in ("expert", "skill"), f"{name}: invalid kind {kind}"

def test_no_duplicate_step_names(self):
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
names = [n for n, _ in TestCoordinatorPipeline.SEQUENCE]
assert len(names) == len(set(names)), f"Duplicate steps: {names}"


class TestPreflight:
def test_preflight_checks_python_version(self):
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
p = TestCoordinatorPipeline()
missing = p._preflight()
# Python 3.10+ on all modern systems → should be empty
assert isinstance(missing, list)

def test_preflight_returns_list(self):
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
p = TestCoordinatorPipeline()
result = p._preflight()
assert isinstance(result, list)


class TestPipelineResult:
def test_pipeline_result_defaults(self):
from runtime.orchestrator.workflows.test_coordinator import PipelineResult
r = PipelineResult(ok=True)
assert r.ok is True
assert r.steps == []
assert r.aborted_at is None
assert r.summary == ""

def test_pipeline_step_defaults(self):
from runtime.orchestrator.workflows.test_coordinator import PipelineStep
s = PipelineStep(name="test-step", kind="expert")
assert s.name == "test-step"
assert s.kind == "expert"
assert s.status == "pending"


class TestGateIntegration:
def test_check_gates_with_empty_metrics_blocks(self):
"""Empty metrics dict → gate values are 0 → gates BLOCK."""
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
p = TestCoordinatorPipeline()

# smoke-test with no metrics → 0/0 tests → BLOCK
result = p._check_gates("smoke-test", {"metrics": {}})
assert result is not None # should block

# test-executor with no metrics → 0/0 → BLOCK
result2 = p._check_gates("test-executor", {"metrics": {}})
assert result2 is not None

def test_check_gates_passing_metrics(self):
"""With passing metrics, gates should return None (no block)."""
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
p = TestCoordinatorPipeline()

result = p._check_gates("smoke-test", {
"metrics": {"p0_total": 100, "p0_passed": 98, "new_p0_bugs": 0}
})
assert result is None # 98% > 95% → pass

def test_check_gates_failing_smoke(self):
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
p = TestCoordinatorPipeline()

result = p._check_gates("smoke-test", {
"metrics": {"p0_total": 100, "p0_passed": 80, "new_p0_bugs": 2}
})
assert result is not None # 80% < 95% + bugs → block

def test_check_gates_unknown_step_passes(self):
"""Steps not in gate logic return None."""
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
p = TestCoordinatorPipeline()
result = p._check_gates("requirements-analyst", {"metrics": {}})
assert result is None


class TestPipelineRun:
def test_run_creates_result(self):
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
p = TestCoordinatorPipeline()
result = p.run("test target")
assert result is not None
assert isinstance(result.summary, str)

def test_run_aborted_preflight(self):
"""Simulate preflight failure by checking workspace."""
from runtime.orchestrator.workflows.test_coordinator import TestCoordinatorPipeline
from unittest.mock import patch
p = TestCoordinatorPipeline()
with patch.object(p, '_preflight', return_value=["missing dep"]):
result = p.run("test")
assert result.ok is False
assert result.aborted_at == "preflight"
Loading