From e9fa284f2c067ac68efa4d3e65f823a22351d9bf Mon Sep 17 00:00:00 2001 From: Andy Xie Date: Tue, 9 Jun 2026 02:10:37 +0800 Subject: [PATCH 1/2] feat(agent): add AgentRunner runtime dispatcher (Phase 2 foundation) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Route a sub-agent task to the legacy Anthropic loop or the Claude Agent SDK adapter based on agent.runtime (legacy|sdk, default legacy), returning the same structured result dict either way. Additive and dormant: nothing in the request path imports it yet (mirrors how the #24 adapter shipped behind the flag). A follow-up PR wires the Icinga sub-agent to dispatch through it. With the default legacy runtime it is a transparent pass-through to src.agent.agents.run_sub_agent — zero behavior change. The SDK branch surfaces token/cost/cache usage under data.usage, which the Phase-2 cost benchmark compares against legacy. 11 tests: runtime resolution, legacy pass-through, SDK result normalization, and SDK-unavailable error handling. Co-Authored-By: Claude Opus 4.8 --- src/agent/runner.py | 198 +++++++++++++++++++++++++++++++++++ tests/test_agent_runner.py | 204 +++++++++++++++++++++++++++++++++++++ 2 files changed, 402 insertions(+) create mode 100644 src/agent/runner.py create mode 100644 tests/test_agent_runner.py diff --git a/src/agent/runner.py b/src/agent/runner.py new file mode 100644 index 0000000..51785bc --- /dev/null +++ b/src/agent/runner.py @@ -0,0 +1,198 @@ +"""AgentRunner — route a sub-agent task to the legacy loop or the Claude Agent SDK. + +Phase-2 foundation. The orchestrator currently calls :func:`src.agent.agents.run_sub_agent` +directly. This module introduces a single dispatch seam that reads ``agent.runtime`` +(``legacy|sdk``, default ``legacy``) once and routes each sub-agent task to the matching +runtime, returning the **same structured result dict** either way so callers don't care +which runtime answered. + +It is intentionally *additive and dormant*: nothing in the request path imports it yet +(mirroring how the #24 adapter shipped behind the flag). A later PR wires a specific +sub-agent (Icinga) to dispatch through it. With the default ``legacy`` runtime, +:meth:`AgentRunner.run_sub_agent` is a transparent pass-through to the existing loop — +zero behavior change. + +The SDK branch here is deliberately minimal: it runs the agent's system prompt through +:meth:`AgentSdkClient.complete` and normalizes the outcome. Per-agent *skill + tool* +wiring (e.g. the Icinga ``query_icinga`` tool surface and ``icinga-triage`` SKILL.md) +lands in the follow-up PR; this module only owns the routing. +""" + +from __future__ import annotations + +import logging +import time +from typing import Any + +from src.llm import RUNTIME_SDK, RuntimeName, get_runtime + +logger = logging.getLogger(__name__) + + +class AgentRunner: + """Dispatches a sub-agent task to the configured runtime. + + Resolve the runtime once (at construction) so a single request doesn't + re-read config per sub-agent call. Stateless apart from the resolved + runtime + config handle, so it's safe to build per request or reuse. + """ + + def __init__(self, config: Any, runtime: RuntimeName | None = None) -> None: + """Args: + config: Dynaconf-style config (or plain dict in tests). + runtime: Force a runtime, bypassing ``agent.runtime``. Mainly for + tests and benchmark harnesses that want to drive both paths from + one config; ``None`` resolves the flag via :func:`get_runtime`. + """ + self._config = config + self._runtime: RuntimeName = runtime or get_runtime(config) + logger.debug("AgentRunner initialized with runtime=%s", self._runtime) + + @property + def runtime(self) -> RuntimeName: + """The resolved runtime (``legacy`` or ``sdk``).""" + return self._runtime + + async def run_sub_agent( + self, + agent_type: str, + task: str, + context: dict | None = None, + client: Any = None, + event_queue: Any = None, + conversation_history: list | None = None, + ) -> dict: + """Run one sub-agent task on the active runtime. + + Signature mirrors :func:`src.agent.agents.run_sub_agent` so the runner + is a drop-in seam. The ``client``/``event_queue``/``conversation_history`` + arguments are only meaningful for the legacy loop and are ignored by the + SDK path (the SDK runs its own loop and streams internally). + + Returns: + The legacy result dict (``agent``/``status``/``summary``/``findings``/ + ``data``/``tool_calls``/``duration_seconds`` …). The SDK path produces + the same shape, with token/cost/cache usage surfaced under ``data``. + """ + if self._runtime == RUNTIME_SDK: + return await self._run_via_sdk(agent_type=agent_type, task=task, context=context) + return await self._run_via_legacy( + agent_type=agent_type, + task=task, + context=context, + client=client, + event_queue=event_queue, + conversation_history=conversation_history, + ) + + # ----------------------------------------------------------------- legacy + + async def _run_via_legacy( + self, + *, + agent_type: str, + task: str, + context: dict | None, + client: Any, + event_queue: Any, + conversation_history: list | None, + ) -> dict: + """Pass-through to the existing Anthropic tool-use loop (unchanged).""" + # Imported lazily: agents.py pulls in the full tool/orchestrator graph, + # which we don't want to import just to construct an SDK-runtime runner. + from src.agent.agents import run_sub_agent as legacy_run_sub_agent + + return await legacy_run_sub_agent( + agent_type, + task, + context=context, + client=client, + event_queue=event_queue, + conversation_history=conversation_history, + ) + + # -------------------------------------------------------------------- sdk + + async def _run_via_sdk(self, *, agent_type: str, task: str, context: dict | None) -> dict: + """Run the task through the Claude Agent SDK adapter and normalize it. + + Minimal by design (see module docstring): no per-agent skill/tool wiring + yet. The agent's system prompt is loaded the same way the legacy loop + loads it, so the two paths share prompt content for a fair benchmark. + """ + from src.agent.system_prompt import get_agent_prompt + from src.llm import AgentSdkClient, AgentSdkUnavailableError + + start = time.monotonic() + system = get_agent_prompt(agent_type) or None + prompt = _with_context(task, context) + + try: + sdk_client = AgentSdkClient.from_config(self._config) + result = await sdk_client.complete(prompt=prompt, system=system) + except AgentSdkUnavailableError as exc: + logger.warning("SDK runtime requested but unavailable: %s", exc) + return _error_result(agent_type, str(exc), round(time.monotonic() - start, 1)) + + return _sdk_result_to_dict(agent_type, result, round(time.monotonic() - start, 1)) + + +# --------------------------------------------------------------------- helpers + + +def _with_context(task: str, context: dict | None) -> str: + """Append orchestrator context to the task, matching the legacy framing.""" + if not context: + return task + import json + + return f"{task}\n\n**Context from orchestrator:**\n```json\n{json.dumps(context, default=str)}\n```" + + +def _sdk_result_to_dict(agent_type: str, result: Any, duration_seconds: float) -> dict: + """Map an :class:`SdkResult` onto the legacy ``run_sub_agent`` result dict. + + Token/cost/cache usage is surfaced under ``data.usage`` — exactly the + fields the Phase-2 cost benchmark compares against the legacy path. + """ + usage = result.usage + return { + "agent": agent_type, + "status": "success" if result.succeeded else "error", + "summary": result.text or (result.error_message or ""), + "findings": [result.text] if result.text else [], + "data": { + "runtime": "sdk", + "model": result.model, + "session_id": result.session_id, + "usage": { + "input_tokens": usage.input_tokens, + "output_tokens": usage.output_tokens, + "cache_creation_input_tokens": usage.cache_creation_input_tokens, + "cache_read_input_tokens": usage.cache_read_input_tokens, + "total_cost_usd": usage.total_cost_usd, + "num_turns": usage.num_turns, + }, + }, + "tool_calls": len(result.tool_invocations), + "tool_errors": sum(1 for inv in result.tool_invocations if inv.get("is_error")), + "rounds_used": result.usage.num_turns, + "duration_seconds": duration_seconds, + "error": result.error_message, + } + + +def _error_result(agent_type: str, message: str, duration_seconds: float = 0.0) -> dict: + """A legacy-shaped error result for failures before/around the SDK call.""" + return { + "agent": agent_type, + "status": "error", + "summary": message, + "findings": [], + "data": {"runtime": "sdk"}, + "tool_calls": 0, + "tool_errors": 0, + "rounds_used": 0, + "duration_seconds": duration_seconds, + "error": message, + } diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py new file mode 100644 index 0000000..76320b1 --- /dev/null +++ b/tests/test_agent_runner.py @@ -0,0 +1,204 @@ +"""Tests for src.agent.runner.AgentRunner. + +The runner is a routing seam between the legacy Anthropic loop +(``src.agent.agents.run_sub_agent``) and the Claude Agent SDK adapter +(``src.llm.AgentSdkClient``). These tests inject fakes for both heavy +dependencies via ``monkeypatch`` so the routing/normalization logic can be +exercised in isolation (no anthropic SDK, no claude_agent_sdk, no config files). +""" + +from __future__ import annotations + +import sys +import types +from typing import Any + +import pytest + +from src.agent.runner import ( + AgentRunner, + _error_result, + _sdk_result_to_dict, + _with_context, +) +from src.llm import RUNTIME_LEGACY, RUNTIME_SDK, AgentSdkUnavailableError, SdkResult, SdkUsage + +# --------------------------------------------------------------- runtime wiring + + +def test_runtime_defaults_to_legacy() -> None: + assert AgentRunner({}).runtime == RUNTIME_LEGACY + + +def test_runtime_reads_flag_from_config() -> None: + assert AgentRunner({"agent": {"runtime": "sdk"}}).runtime == RUNTIME_SDK + + +def test_runtime_explicit_override_wins() -> None: + # Even with legacy in config, an explicit override forces the runtime + # (used by the benchmark harness to drive both paths from one config). + runner = AgentRunner({"agent": {"runtime": "legacy"}}, runtime=RUNTIME_SDK) + assert runner.runtime == RUNTIME_SDK + + +def test_unknown_runtime_falls_back_to_legacy() -> None: + assert AgentRunner({"agent": {"runtime": "bogus"}}).runtime == RUNTIME_LEGACY + + +# ------------------------------------------------------------- legacy dispatch + + +async def test_legacy_dispatch_passes_through(monkeypatch: pytest.MonkeyPatch) -> None: + captured: dict[str, Any] = {} + + async def _fake_run_sub_agent(agent_type: str, task: str, **kwargs: Any) -> dict: + captured["agent_type"] = agent_type + captured["task"] = task + captured["kwargs"] = kwargs + return {"agent": agent_type, "status": "success", "summary": "legacy answer"} + + fake_agents = types.ModuleType("src.agent.agents") + fake_agents.run_sub_agent = _fake_run_sub_agent # type: ignore[attr-defined] + monkeypatch.setitem(sys.modules, "src.agent.agents", fake_agents) + + runner = AgentRunner({"agent": {"runtime": "legacy"}}) + out = await runner.run_sub_agent( + "icinga", "triage alert X", context={"host": "h1"}, conversation_history=[{"x": 1}] + ) + + assert out["summary"] == "legacy answer" + assert captured["agent_type"] == "icinga" + assert captured["task"] == "triage alert X" + # context/conversation_history forwarded verbatim to the legacy loop + assert captured["kwargs"]["context"] == {"host": "h1"} + assert captured["kwargs"]["conversation_history"] == [{"x": 1}] + + +# ---------------------------------------------------------------- sdk dispatch + + +class _FakeSdkClient: + """Stand-in for AgentSdkClient: records the call and returns a real SdkResult.""" + + calls: list[dict[str, Any]] = [] + + def __init__(self, result: SdkResult) -> None: + self._result = result + + @classmethod + def from_config(cls, config: Any) -> _FakeSdkClient: + return cls( + SdkResult( + text="sdk answer", + tool_invocations=( + {"name": "query_icinga", "input": {}, "is_error": False}, + {"name": "fetch_github_file", "input": {}, "is_error": True}, + ), + model="claude-sonnet-4-5", + session_id="sess-1", + usage=SdkUsage( + input_tokens=1000, + output_tokens=200, + cache_creation_input_tokens=50, + cache_read_input_tokens=800, + total_cost_usd=0.0123, + num_turns=3, + ), + ) + ) + + async def complete(self, *, prompt: str, system: str | None = None, **kwargs: Any) -> SdkResult: + _FakeSdkClient.calls.append({"prompt": prompt, "system": system}) + return self._result + + +def _inject_prompt_loader( + monkeypatch: pytest.MonkeyPatch, prompt: str = "ICINGA SYSTEM PROMPT" +) -> None: + fake_sp = types.ModuleType("src.agent.system_prompt") + fake_sp.get_agent_prompt = lambda agent_type: prompt # type: ignore[attr-defined] + monkeypatch.setitem(sys.modules, "src.agent.system_prompt", fake_sp) + + +async def test_sdk_dispatch_maps_result(monkeypatch: pytest.MonkeyPatch) -> None: + _FakeSdkClient.calls = [] + _inject_prompt_loader(monkeypatch) + monkeypatch.setattr("src.llm.AgentSdkClient", _FakeSdkClient) + + runner = AgentRunner({"agent": {"runtime": "sdk"}}) + out = await runner.run_sub_agent("icinga", "triage alert X", context={"host": "h1"}) + + # routing reached the SDK and forwarded the agent's system prompt + task(+context) + assert _FakeSdkClient.calls[0]["system"] == "ICINGA SYSTEM PROMPT" + assert _FakeSdkClient.calls[0]["prompt"].startswith("triage alert X") + assert "Context from orchestrator" in _FakeSdkClient.calls[0]["prompt"] + + # result normalized onto the legacy dict shape + assert out["agent"] == "icinga" + assert out["status"] == "success" + assert out["summary"] == "sdk answer" + assert out["tool_calls"] == 2 + assert out["tool_errors"] == 1 + assert out["rounds_used"] == 3 + # cost/cache usage surfaced for the benchmark + assert out["data"]["runtime"] == "sdk" + assert out["data"]["usage"]["cache_read_input_tokens"] == 800 + assert out["data"]["usage"]["total_cost_usd"] == pytest.approx(0.0123) + + +async def test_sdk_unavailable_returns_error_not_raises(monkeypatch: pytest.MonkeyPatch) -> None: + _inject_prompt_loader(monkeypatch) + + class _Unavailable: + @classmethod + def from_config(cls, config: Any) -> Any: + raise AgentSdkUnavailableError("claude_agent_sdk is not installed") + + monkeypatch.setattr("src.llm.AgentSdkClient", _Unavailable) + + runner = AgentRunner({"agent": {"runtime": "sdk"}}) + out = await runner.run_sub_agent("icinga", "triage") + + assert out["status"] == "error" + assert "not installed" in out["summary"] + assert out["data"]["runtime"] == "sdk" + + +# ------------------------------------------------------------------- helpers + + +def test_with_context_noop_when_empty() -> None: + assert _with_context("do thing", None) == "do thing" + assert _with_context("do thing", {}) == "do thing" + + +def test_with_context_appends_json() -> None: + out = _with_context("do thing", {"account": "123"}) + assert out.startswith("do thing") + assert "Context from orchestrator" in out + assert '"account": "123"' in out + + +def test_error_result_shape() -> None: + out = _error_result("icinga", "boom", 1.5) + assert out == { + "agent": "icinga", + "status": "error", + "summary": "boom", + "findings": [], + "data": {"runtime": "sdk"}, + "tool_calls": 0, + "tool_errors": 0, + "rounds_used": 0, + "duration_seconds": 1.5, + "error": "boom", + } + + +def test_sdk_result_to_dict_error_status() -> None: + result = SdkResult(text="", is_error=True, error_message="timeout") + out = _sdk_result_to_dict("icinga", result, 2.0) + assert out["status"] == "error" + assert out["summary"] == "timeout" + assert out["findings"] == [] + assert out["duration_seconds"] == 2.0 From 7475b4a77dd23852b9693e6ea917990551e08cda Mon Sep 17 00:00:00 2001 From: Andy Xie Date: Tue, 9 Jun 2026 02:21:56 +0800 Subject: [PATCH 2/2] feat(agent): port Icinga sub-agent to the Agent SDK (phase 2 pilot) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Splits the Icinga sub-agent into a first-class skill + a thin SDK profile, the Phase-2 pilot for running a sub-agent on the Claude Agent SDK. - skills/icinga-triage/SKILL.md: the Icinga triage workflow as an Agent Skill (state model, the two monitoring GitHub repos, the Step-0->diagnose procedure, gated write ops). Loads strict with zero warnings; parsec-native, domain=icinga. This is the "skill = reusable capability" half. - src/agent/icinga_sdk.py: build_icinga_sdk_profile() returns the skill + the monitoring-mcp (SSE) and GitHub (HTTP) MCP servers the legacy query_icinga / github tools already use, so the SDK consumes the same backends directly. Config-only and SDK-import-light (unit-testable). - runner.py: the SDK branch now applies the per-agent profile (sdk_profile_for) — Icinga loads its skill + servers; other agents get an empty profile. The "agent = running instance that loads the skill" half. Still gated by agent.runtime: sdk (default legacy) -> zero behavior change. Skill discovery in-cluster depends on the image baking skills/ (#27); the runner seam depends on #30. End-to-end Icinga-on-SDK run is verified in the personal NERC cluster (results to be commented on the PR). 7 Icinga tests + runner suite green; full suite passes. Co-Authored-By: Claude Opus 4.8 --- skills/icinga-triage/SKILL.md | 120 ++++++++++++++++++++++++++++++++++ src/agent/icinga_sdk.py | 85 ++++++++++++++++++++++++ src/agent/runner.py | 12 ++-- tests/test_icinga_sdk.py | 78 ++++++++++++++++++++++ 4 files changed, 291 insertions(+), 4 deletions(-) create mode 100644 skills/icinga-triage/SKILL.md create mode 100644 src/agent/icinga_sdk.py create mode 100644 tests/test_icinga_sdk.py diff --git a/skills/icinga-triage/SKILL.md b/skills/icinga-triage/SKILL.md new file mode 100644 index 0000000..dfada61 --- /dev/null +++ b/skills/icinga-triage/SKILL.md @@ -0,0 +1,120 @@ +--- +name: icinga-triage +description: > + Triage and diagnose an Icinga2 monitoring alert by correlating live host/service + state with the check-script source and Icinga GitOps config from GitHub, then + produce a root cause and an action plan. Use when someone reports a monitoring + alert, a host or service is DOWN / CRITICAL / WARNING / UNKNOWN, or asks why an + Icinga check is failing. +license: MIT +allowed-tools: + - query_icinga + - fetch_github_file + - search_github_repo +metadata: + author: parsec-team + maturity: sample +parsec: + version: "1.0.0" + domain: icinga + requires_mcp: + - icinga + - github + cost_estimate_per_call_usd: 1.38 +--- + +# Icinga Alert Triage + +You are an expert Icinga SRE. Diagnose Icinga monitoring alerts by combining **live +Icinga state** with **check-script source** and **Icinga GitOps config** from GitHub. + +## When to use + +- A monitoring alert fired (host DOWN / service CRITICAL, WARNING, or UNKNOWN). +- Someone asks "why is this Icinga check failing / red?" or pastes a dashboard alert. +- You need to correlate a monitoring problem with the script or config that produced it. + +## Tools + +1. **query_icinga** — Icinga2 hosts, services, problems, downtimes, comments. Can also + acknowledge, schedule downtime, force a recheck (see Write Operations). +2. **fetch_github_file** — fetch monitoring scripts and Icinga config from GitHub. +3. **search_github_repo** — find paths in a repo by substring. + +## Reference repositories + +| Repo | Purpose | Key paths | +|------|---------|-----------| +| `rhpds/monitoring-scripts` | Custom check scripts (`.sh`/`.py`/`.pl`) | `monitoring/