From 1fcb17ab8e964eb5514a7b9759c5f9acc64bf089 Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Sun, 22 Feb 2026 20:53:42 -0500 Subject: [PATCH 01/13] Add context generation feature for BlueBoxAgent session replay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow the agent to save a structured context file (JSON + Markdown) capturing the successful path through a session — routines used, parameters, post-processing code, and output description — so a new agent instance can replay it without trial and error. Co-Authored-By: Claude Opus 4.6 --- bluebox/agents/bluebox_agent.py | 152 ++++++- bluebox/data_models/agents/__init__.py | 3 + bluebox/data_models/agents/context.py | 245 +++++++++++ bluebox/scripts/run_bluebox_agent.py | 49 ++- .../unit/agents/test_bluebox_agent_context.py | 393 ++++++++++++++++++ 5 files changed, 839 insertions(+), 3 deletions(-) create mode 100644 bluebox/data_models/agents/__init__.py create mode 100644 bluebox/data_models/agents/context.py create mode 100644 tests/unit/agents/test_bluebox_agent_context.py diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py index 82fce628..a2d5a856 100644 --- a/bluebox/agents/bluebox_agent.py +++ b/bluebox/agents/bluebox_agent.py @@ -13,6 +13,7 @@ import json from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime +from pathlib import Path from textwrap import dedent from typing import Any, Callable @@ -21,6 +22,7 @@ from bluebox.agents.abstract_agent import AbstractAgent, AgentCard, agent_tool from bluebox.agents.workspace import AgentWorkspace, LocalWorkspace from bluebox.config import Config +from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed from bluebox.data_models.browser_agent import ( BrowserAgentDoneEvent, BrowserAgentErrorEvent, @@ -79,6 +81,7 @@ class BlueBoxAgent(AbstractAgent): Your workspace has the following structure: - `raw/` — routine result JSON files, saved automatically when routines execute - `outputs/` — write all your generated output files here (CSV, JSON, JSONL, etc.) + - `context/` — context files (JSON + Markdown) saved by `generate_context`, used for session replay **Pre-loaded variables in `run_python_code`:** - `routine_results` — list of dicts, one per JSON file in raw/ @@ -128,6 +131,7 @@ class BlueBoxAgent(AbstractAgent): - When using `execute_browser_task`, write a specific, step-by-step task description so the browser agent knows exactly what to do. - If your first search returns no results, try rephrasing the task description before giving up. - Be concise in responses. + - After successfully completing a task (output verified and correct), call `generate_context` to save a reusable recipe. Fill in all fields accurately — especially `routines_used` with the exact routine_ids and parameters that worked, and `python_code` with the final working snippet. """).strip() ## Magic methods @@ -144,6 +148,7 @@ def __init__( workspace: AgentWorkspace | None = None, auth_headers_provider: Callable[[], dict[str, str]] | None = None, on_llm_response: Callable[[LLMChatResponse], None] | None = None, + context_file: str | None = None, ) -> None: """ Initialize the BlueBox Agent. @@ -160,6 +165,9 @@ def __init__( auth_headers_provider: Optional callback that returns auth headers for downstream API calls. If not provided, falls back to Config.VECTORLY_SERVICE_TOKEN. on_llm_response: Optional callback invoked after each LLM call with the response (for token tracking). + context_file: Optional path to a context file (.json or .md) from a previous + session. If not provided, auto-discovers the most recent context file from + the workspace's context/ directory. """ # Validate required config self._auth_headers_provider = auth_headers_provider @@ -169,6 +177,9 @@ def __init__( self._workspace = workspace or LocalWorkspace() self._routine_cache: dict[str, RoutineInfo] = {} + # Load context from explicit path or auto-discover from workspace + self._agent_context: BlueBoxAgentContext | None = self._load_context(context_file) + super().__init__( emit_message_callable=emit_message_callable, persist_chat_callable=persist_chat_callable, @@ -186,10 +197,11 @@ def __init__( self._is_blocklist_mode = self._sandbox_mode == "blocklist" logger.debug( - "BlueBoxAgent initialized with model: %s, chat_thread_id: %s, sandbox_mode: %s", + "BlueBoxAgent initialized with model: %s, chat_thread_id: %s, sandbox_mode: %s, has_context: %s", llm_model, self._thread.id, self._sandbox_mode, + self._agent_context is not None, ) ## Auth @@ -210,6 +222,8 @@ def _get_system_prompt(self) -> str: prompt = self.SYSTEM_PROMPT + time_info if self._is_blocklist_mode: prompt += self._get_blocklist_sandbox_prompt_section() + if self._agent_context: + prompt += self._get_context_prompt_section() return prompt def _get_blocklist_sandbox_prompt_section(self) -> str: @@ -281,6 +295,96 @@ def _validate_routine_params(self, routine_id: str, params: dict[str, Any]) -> s ) return None + ## Context loading + + _CONTEXT_PROMPT_MAX_CHARS: int = 20_000 + + def _load_context(self, context_file: str | None) -> BlueBoxAgentContext | None: + """Load context from an explicit path or auto-discover from workspace context/ dir. + + Resolution order for context_file: + 1. Absolute path + 2. Relative to workspace root + + If context_file is None, auto-discovers the most recent .json file in context/. + """ + if context_file: + return self._load_context_from_path(context_file) + return self._auto_discover_context() + + def _load_context_from_path(self, context_file: str) -> BlueBoxAgentContext | None: + """Load a context file from an explicit path (absolute or workspace-relative).""" + path = Path(context_file) + if not path.is_absolute(): + path = self._workspace.root_path / context_file + if not path.is_file(): + logger.warning("Context file not found: %s", path) + return None + try: + raw = path.read_text(encoding="utf-8") + if path.suffix == ".md": + ctx = BlueBoxAgentContext.from_markdown(raw) + else: + ctx = BlueBoxAgentContext.model_validate_json(raw) + logger.info("Loaded agent context from %s", path) + return ctx + except Exception as e: + logger.warning("Failed to load context file %s: %s", path, e) + return None + + def _auto_discover_context(self) -> BlueBoxAgentContext | None: + """Find and load the most recent .json context file from workspace context/ dir.""" + context_dir = self._workspace.root_path / "context" + if not context_dir.is_dir(): + return None + json_files = sorted(context_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True) + if not json_files: + return None + return self._load_context_from_path(str(json_files[0])) + + def _get_context_prompt_section(self) -> str: + """Build a system prompt section from a loaded BlueBoxAgentContext.""" + ctx = self._agent_context + if not ctx: + return "" + + lines: list[str] = [ + "\n\n## Prior Context", + "A previous session already solved a similar task. Use this as a starting point.", + f"\n**Goal:** {ctx.goal}", + f"\n**Summary:** {ctx.summary}", + ] + + if ctx.routines_used: + lines.append("\n**Routines that worked:**") + for r in ctx.routines_used: + param_str = json.dumps(r.parameters, default=str) if r.parameters else "{}" + lines.append(f"- `{r.routine_id}` ({r.routine_name}): {param_str}") + + if ctx.python_code: + lines.append(f"\n**Post-processing code that worked:**\n```python\n{ctx.python_code}\n```") + + if ctx.output_files: + lines.append(f"\n**Output files produced:** {', '.join(ctx.output_files)}") + + lines.append(f"\n**Output description:** {ctx.output_description}") + lines.append( + "\n> Replicate this path if the user's goal matches. " + "Adjust parameters for the new request. Skip trial and error." + ) + + section = "\n".join(lines) + + if len(section) > self._CONTEXT_PROMPT_MAX_CHARS: + truncated = section[:self._CONTEXT_PROMPT_MAX_CHARS] + truncated += ( + "\n\n... (context truncated — use `read_workspace_file` to read " + "the full context files in `context/` for more detail)" + ) + return truncated + + return section + ## Tool handlers @agent_tool() @@ -661,3 +765,49 @@ def _read_workspace_file( end_line: Optional 1-based end line number (inclusive). Omit to read to the end. """ return self._workspace.read_file(path, start_line=start_line, end_line=end_line) + + @agent_tool() + def _generate_context(self, context: BlueBoxAgentContext) -> dict[str, Any]: + """ + Save a context file capturing what worked in this session. + + Call this after successfully completing the user's task. The context + file lets another BlueBoxAgent instance replicate the successful path + without trial and error. Both a JSON file (canonical) and a Markdown + file (human-readable) are saved to the context/ directory. + + Args: + context: The full context object describing what was accomplished. + Must include goal, summary, output_description, and routines_used + with exact routine_ids and parameters that worked. Include python_code + if post-processing was used, and output_files listing what was produced. + """ + # Save canonical JSON + json_content = context.model_dump_json(indent=2) + try: + json_save = self._workspace.save_file("context", "agent_context", json_content) + except Exception as e: + logger.exception("Failed to save context JSON: %s", e) + return {"error": f"Failed to save context file: {e}"} + + # Save companion Markdown + md_content = context.to_markdown() + try: + md_save = self._workspace.save_file( + "context", "agent_context", md_content, extension=".md", + ) + except Exception as e: + logger.warning("Failed to save context Markdown: %s", e) + md_save = {"output_file": None} + + logger.info("Context files saved: %s, %s", json_save["output_file"], md_save["output_file"]) + return { + "success": True, + "context_json": json_save["output_file"], + "context_md": md_save["output_file"], + "message": ( + f"Context saved to {json_save['output_file']}. " + "A new BlueBoxAgent using this workspace will automatically " + "load this context and replicate the successful path." + ), + } diff --git a/bluebox/data_models/agents/__init__.py b/bluebox/data_models/agents/__init__.py new file mode 100644 index 00000000..94c56aed --- /dev/null +++ b/bluebox/data_models/agents/__init__.py @@ -0,0 +1,3 @@ +from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed + +__all__ = ["BlueBoxAgentContext", "RoutineUsed"] diff --git a/bluebox/data_models/agents/context.py b/bluebox/data_models/agents/context.py new file mode 100644 index 00000000..ede66f49 --- /dev/null +++ b/bluebox/data_models/agents/context.py @@ -0,0 +1,245 @@ +""" +bluebox/data_models/agents/context.py + +Data model for BlueBoxAgent context files. + +A context file captures the successful path through a BlueBoxAgent +conversation so a new agent instance can replay it without trial and error. + +Supports dual format: canonical JSON (Pydantic) and human-readable Markdown, +with round-trip parsing between both. +""" + +from __future__ import annotations + +import json +import re +from datetime import datetime, timezone +from typing import Any + +from pydantic import BaseModel, Field + + +class RoutineUsed(BaseModel): + """One routine that was successfully executed during the session.""" + + routine_id: str = Field(..., description="Routine ID from search_routines results") + routine_name: str = Field(..., description="Human-readable routine name") + parameters: dict[str, Any] = Field( + default_factory=dict, + description="Parameter name-to-value mapping that produced correct results", + ) + + +class BlueBoxAgentContext(BaseModel): + """ + Structured snapshot of a successful BlueBoxAgent session. + + Serialized to JSON and saved to context/. Consumed by a new + BlueBoxAgent instance via system prompt injection. + """ + + version: int = Field(default=1, description="Schema version for forward compatibility") + goal: str = Field(..., description="The user's original request, in their own words") + routines_used: list[RoutineUsed] = Field( + default_factory=list, + description="Routines that produced useful results, in execution order", + ) + python_code: str | None = Field( + default=None, + description="The final working Python post-processing snippet", + ) + output_files: list[str] = Field( + default_factory=list, + description="Relative paths of output files written to outputs/", + ) + output_description: str = Field( + ..., + description="Prose description of the output: format, key fields, row count if known", + ) + summary: str = Field( + ..., + description="1-2 sentence human-readable summary of what was accomplished", + ) + generated_at: datetime = Field( + default_factory=lambda: datetime.now(tz=timezone.utc), + description="When this context was generated", + ) + + # ── Markdown serialization ─────────────────────────────────────────── + + def to_markdown(self) -> str: + """Render as structured Markdown with fenced sections for round-tripping.""" + lines: list[str] = [] + lines.append("# BlueBox Agent Context") + lines.append("") + lines.append(f"**Version:** {self.version}") + lines.append(f"**Generated:** {self.generated_at.isoformat()}") + lines.append("") + + lines.append("## Goal") + lines.append("") + lines.append(self.goal) + lines.append("") + + lines.append("## Summary") + lines.append("") + lines.append(self.summary) + lines.append("") + + if self.routines_used: + lines.append("## Routines Used") + lines.append("") + for r in self.routines_used: + lines.append(f"### {r.routine_name} (`{r.routine_id}`)") + lines.append("") + if r.parameters: + lines.append("**Parameters:**") + lines.append("```json") + lines.append(json.dumps(r.parameters, indent=2, default=str)) + lines.append("```") + else: + lines.append("No parameters.") + lines.append("") + + if self.python_code: + lines.append("## Python Code") + lines.append("") + lines.append("```python") + lines.append(self.python_code) + lines.append("```") + lines.append("") + + if self.output_files: + lines.append("## Output Files") + lines.append("") + for f in self.output_files: + lines.append(f"- `{f}`") + lines.append("") + + lines.append("## Output Description") + lines.append("") + lines.append(self.output_description) + lines.append("") + + return "\n".join(lines) + + @classmethod + def from_markdown(cls, text: str) -> BlueBoxAgentContext: + """Parse structured Markdown back into BlueBoxAgentContext.""" + sections = _split_markdown_sections(text) + + # Version and generated_at from header + version = 1 + generated_at = datetime.now(tz=timezone.utc) + header = sections.get("BlueBox Agent Context", "") + version_match = re.search(r"\*\*Version:\*\*\s*(\d+)", header) + if version_match: + version = int(version_match.group(1)) + generated_match = re.search(r"\*\*Generated:\*\*\s*(.+)", header) + if generated_match: + try: + generated_at = datetime.fromisoformat(generated_match.group(1).strip()) + except ValueError: + pass + + goal = sections.get("Goal", "").strip() + summary = sections.get("Summary", "").strip() + output_description = sections.get("Output Description", "").strip() + + # Parse routines from subsections + routines_used = _parse_routines_section(sections.get("Routines Used", "")) + + # Parse python code from fenced block + python_code = _extract_fenced_block(sections.get("Python Code", ""), "python") + + # Parse output files + output_files: list[str] = [] + for line in sections.get("Output Files", "").splitlines(): + match = re.match(r"^-\s*`(.+)`", line.strip()) + if match: + output_files.append(match.group(1)) + + return cls( + version=version, + goal=goal, + summary=summary, + output_description=output_description, + routines_used=routines_used, + python_code=python_code, + output_files=output_files, + generated_at=generated_at, + ) + + +# ── Markdown parsing helpers ───────────────────────────────────────────── + + +def _split_markdown_sections(text: str) -> dict[str, str]: + """Split Markdown into {heading: body} pairs. Handles H1 and H2 levels.""" + sections: dict[str, str] = {} + current_heading: str | None = None + current_lines: list[str] = [] + + for line in text.splitlines(): + heading_match = re.match(r"^#{1,2}\s+(.+)$", line) + if heading_match: + if current_heading is not None: + sections[current_heading] = "\n".join(current_lines) + current_heading = heading_match.group(1).strip() + current_lines = [] + else: + current_lines.append(line) + + if current_heading is not None: + sections[current_heading] = "\n".join(current_lines) + + return sections + + +def _extract_fenced_block(text: str, language: str | None = None) -> str | None: + """Extract the first fenced code block from text, optionally matching language.""" + if language: + pattern = rf"```{re.escape(language)}\n(.*?)```" + else: + pattern = r"```\w*\n(.*?)```" + match = re.search(pattern, text, re.DOTALL) + if match: + return match.group(1).rstrip("\n") + return None + + +def _parse_routines_section(text: str) -> list[RoutineUsed]: + """Parse the Routines Used section into RoutineUsed objects.""" + routines: list[RoutineUsed] = [] + if not text.strip(): + return routines + + # Split on H3 headers: ### RoutineName (`routine_id`) + parts = re.split(r"^###\s+", text, flags=re.MULTILINE) + for part in parts: + if not part.strip(): + continue + # Parse header: "RoutineName (`routine_id`)" + header_match = re.match(r"^(.+?)\s*\(`([^`]+)`\)", part) + if not header_match: + continue + routine_name = header_match.group(1).strip() + routine_id = header_match.group(2).strip() + + # Parse parameters from JSON code block + parameters: dict[str, Any] = {} + params_json = _extract_fenced_block(part, "json") + if params_json: + try: + parameters = json.loads(params_json) + except json.JSONDecodeError: + pass + + routines.append(RoutineUsed( + routine_id=routine_id, + routine_name=routine_name, + parameters=parameters, + )) + + return routines diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py index 015b9a1e..30daa002 100644 --- a/bluebox/scripts/run_bluebox_agent.py +++ b/bluebox/scripts/run_bluebox_agent.py @@ -50,17 +50,24 @@ class BlueBoxAgentTUI(AbstractAgentTUI): """Multi-pane TUI for the BlueBox Agent.""" TITLE = "BlueBox Agent" - SLASH_COMMANDS = BASE_SLASH_COMMANDS - HELP_TEXT = BASE_HELP_TEXT + SLASH_COMMANDS = { + **BASE_SLASH_COMMANDS, + "/generate_context": "Save a reusable context file from this session", + } + HELP_TEXT = BASE_HELP_TEXT + ( + "\n [cyan]/generate_context[/cyan] Save a reusable context file from this session\n" + ) SHOW_SAVED_FILES_PANE = True def __init__( self, llm_model: LLMModel, workspace_dir: str = "./bluebox_workspace", + context_file: str | None = None, ) -> None: super().__init__(llm_model, working_dir=workspace_dir) self._workspace_dir = workspace_dir + self._context_file = context_file # ── Abstract implementations ───────────────────────────────────────── @@ -70,6 +77,7 @@ def _create_agent(self) -> AbstractAgent: stream_chunk_callable=self._handle_stream_chunk, llm_model=self._llm_model, workspace=LocalWorkspace(self._workspace_dir), + context_file=self._context_file, ) def _print_welcome(self) -> None: @@ -129,6 +137,36 @@ def _add(p: str) -> None: _add(r.get("output_file", "")) return paths + # ── Custom slash commands ───────────────────────────────────────── + + _GENERATE_CONTEXT_PROMPT: str = ( + "Review everything we accomplished in this session and call the `generate_context` tool " + "to save a reusable context file. Include:\n" + "- The original goal (what I asked for)\n" + "- All routines that produced useful results (with exact routine_ids and parameter values)\n" + "- The final working Python post-processing code (if any)\n" + "- The output files that were created\n" + "- A clear description of what the output looks like\n" + "- A concise summary of what was accomplished\n\n" + "Be thorough and accurate — another agent will use this context to replicate our work." + ) + + def _handle_custom_command(self, cmd: str, raw_input: str) -> bool: + if cmd == "/generate_context": + chat = self.query_one("#chat-log", RichLog) + if not self._agent: + chat.write(Text.from_markup("[red]Agent not initialized.[/red]")) + return True + chat.write(Text.from_markup( + "[yellow]Generating context from this session...[/yellow]" + )) + self._processing = True + self._assistant_header_printed = False + self._status_update_printed = False + self._send_to_agent(self._GENERATE_CONTEXT_PROMPT) + return True + return False + # ─── Entry point ───────────────────────────────────────────────────────────── @@ -142,6 +180,12 @@ def main() -> None: default="./bluebox_workspace", help="Workspace directory. Raw results in raw/, output files in outputs/ (default: ./bluebox_workspace)", ) + parser.add_argument( + "--context-file", + type=str, + default=None, + help="Path to a context file (.json or .md) from a previous session to guide the agent", + ) parser.add_argument("-q", "--quiet", action="store_true", help="Suppress logs") parser.add_argument("--log-file", type=str, default=None, help="Log to file") args = parser.parse_args() @@ -186,6 +230,7 @@ def main() -> None: app = BlueBoxAgentTUI( llm_model=llm_model, workspace_dir=args.workspace_dir, + context_file=args.context_file, ) app.run() diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py new file mode 100644 index 00000000..7783cbb8 --- /dev/null +++ b/tests/unit/agents/test_bluebox_agent_context.py @@ -0,0 +1,393 @@ +""" +tests/unit/agents/test_bluebox_agent_context.py + +Unit tests for BlueBoxAgentContext data model and context generation/loading +in BlueBoxAgent. +""" + +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock + +import pytest + +from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed + + +# ============================================================================= +# Fixtures +# ============================================================================= + + +@pytest.fixture +def sample_context() -> BlueBoxAgentContext: + """A fully populated context for testing.""" + return BlueBoxAgentContext( + version=1, + goal="Find one-way train tickets from NYC to Boston on March 15, 2026", + routines_used=[ + RoutineUsed( + routine_id="Routine_abc123", + routine_name="AmtrakOneWaySearch", + parameters={"origin": "New York", "destination": "Boston", "date": "2026-03-15"}, + ), + RoutineUsed( + routine_id="Routine_def456", + routine_name="AmtrakPriceFilter", + parameters={"max_price": 100}, + ), + ], + python_code=( + 'import csv\n' + 'with open("outputs/trains.csv", "w") as f:\n' + ' writer = csv.DictWriter(f, fieldnames=["departure", "price"])\n' + ' writer.writeheader()\n' + ' for rr in routine_results:\n' + ' for train in rr["result"]["data"]["trains"]:\n' + ' writer.writerow(train)\n' + 'print("Done")' + ), + output_files=["outputs/trains.csv"], + output_description="CSV with columns: departure, price. 12 rows of Amtrak trains under $100.", + summary="Searched Amtrak for NYC-Boston trains on March 15, filtered by price, and exported to CSV.", + generated_at=datetime(2026, 2, 22, 10, 30, 0, tzinfo=timezone.utc), + ) + + +@pytest.fixture +def minimal_context() -> BlueBoxAgentContext: + """A context with only required fields.""" + return BlueBoxAgentContext( + goal="Search for flights", + output_description="JSON with flight data", + summary="Found flights.", + ) + + +# ============================================================================= +# BlueBoxAgentContext model tests +# ============================================================================= + + +class TestBlueBoxAgentContextModel: + """Tests for the Pydantic model itself.""" + + def test_json_roundtrip(self, sample_context: BlueBoxAgentContext) -> None: + """Serialize to JSON and back, verify equality.""" + json_str = sample_context.model_dump_json(indent=2) + restored = BlueBoxAgentContext.model_validate_json(json_str) + assert restored.version == sample_context.version + assert restored.goal == sample_context.goal + assert restored.summary == sample_context.summary + assert restored.output_description == sample_context.output_description + assert restored.python_code == sample_context.python_code + assert restored.output_files == sample_context.output_files + assert len(restored.routines_used) == 2 + assert restored.routines_used[0].routine_id == "Routine_abc123" + assert restored.routines_used[1].parameters == {"max_price": 100} + assert isinstance(restored.generated_at, datetime) + + def test_version_defaults_to_1(self, minimal_context: BlueBoxAgentContext) -> None: + assert minimal_context.version == 1 + + def test_generated_at_defaults_to_now(self, minimal_context: BlueBoxAgentContext) -> None: + assert isinstance(minimal_context.generated_at, datetime) + # Should be recent (within last 10 seconds) + delta = datetime.now(tz=timezone.utc) - minimal_context.generated_at + assert delta.total_seconds() < 10 + + def test_optional_fields_default(self, minimal_context: BlueBoxAgentContext) -> None: + assert minimal_context.routines_used == [] + assert minimal_context.python_code is None + assert minimal_context.output_files == [] + + +# ============================================================================= +# Markdown round-trip tests +# ============================================================================= + + +class TestMarkdownRoundTrip: + """Tests for to_markdown() and from_markdown().""" + + def test_to_markdown_has_expected_sections(self, sample_context: BlueBoxAgentContext) -> None: + md = sample_context.to_markdown() + assert "# BlueBox Agent Context" in md + assert "## Goal" in md + assert "## Summary" in md + assert "## Routines Used" in md + assert "## Python Code" in md + assert "## Output Files" in md + assert "## Output Description" in md + assert "**Version:** 1" in md + assert "**Generated:**" in md + + def test_to_markdown_contains_routine_details(self, sample_context: BlueBoxAgentContext) -> None: + md = sample_context.to_markdown() + assert "AmtrakOneWaySearch" in md + assert "Routine_abc123" in md + assert '"origin": "New York"' in md + + def test_to_markdown_contains_python_code(self, sample_context: BlueBoxAgentContext) -> None: + md = sample_context.to_markdown() + assert "```python" in md + assert "csv.DictWriter" in md + + def test_from_markdown_roundtrip(self, sample_context: BlueBoxAgentContext) -> None: + """from_markdown(to_markdown(ctx)) should produce an equivalent model.""" + md = sample_context.to_markdown() + restored = BlueBoxAgentContext.from_markdown(md) + assert restored.version == sample_context.version + assert restored.goal == sample_context.goal + assert restored.summary == sample_context.summary + assert restored.output_description == sample_context.output_description + assert restored.python_code == sample_context.python_code + assert restored.output_files == sample_context.output_files + assert len(restored.routines_used) == len(sample_context.routines_used) + for orig, rest in zip(sample_context.routines_used, restored.routines_used): + assert rest.routine_id == orig.routine_id + assert rest.routine_name == orig.routine_name + assert rest.parameters == orig.parameters + + def test_from_markdown_no_python_code(self, minimal_context: BlueBoxAgentContext) -> None: + """Markdown with no Python Code section should parse python_code as None.""" + md = minimal_context.to_markdown() + assert "## Python Code" not in md + restored = BlueBoxAgentContext.from_markdown(md) + assert restored.python_code is None + + def test_from_markdown_no_routines(self, minimal_context: BlueBoxAgentContext) -> None: + md = minimal_context.to_markdown() + assert "## Routines Used" not in md + restored = BlueBoxAgentContext.from_markdown(md) + assert restored.routines_used == [] + + def test_from_markdown_no_output_files(self, minimal_context: BlueBoxAgentContext) -> None: + md = minimal_context.to_markdown() + restored = BlueBoxAgentContext.from_markdown(md) + assert restored.output_files == [] + + +# ============================================================================= +# Context loading tests (BlueBoxAgent integration) +# ============================================================================= + + +class TestContextLoading: + """Tests for context file loading in BlueBoxAgent.""" + + def _make_agent( + self, + workspace_dir: Path, + context_file: str | None = None, + ) -> Any: + """Create a BlueBoxAgent with mocked dependencies.""" + from bluebox.agents.bluebox_agent import BlueBoxAgent + from bluebox.agents.workspace import LocalWorkspace + + return BlueBoxAgent( + emit_message_callable=MagicMock(), + workspace=LocalWorkspace(str(workspace_dir)), + auth_headers_provider=lambda: {"X-Service-Token": "test"}, + context_file=context_file, + ) + + def test_loads_json_context_file(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None: + ctx_file = tmp_path / "my_context.json" + ctx_file.write_text(sample_context.model_dump_json(indent=2)) + + agent = self._make_agent(tmp_path, context_file=str(ctx_file)) + assert agent._agent_context is not None + assert agent._agent_context.goal == sample_context.goal + + def test_loads_markdown_context_file(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None: + ctx_file = tmp_path / "my_context.md" + ctx_file.write_text(sample_context.to_markdown()) + + agent = self._make_agent(tmp_path, context_file=str(ctx_file)) + assert agent._agent_context is not None + assert agent._agent_context.goal == sample_context.goal + + def test_workspace_relative_path(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None: + context_dir = tmp_path / "context" + context_dir.mkdir() + ctx_file = context_dir / "my_context.json" + ctx_file.write_text(sample_context.model_dump_json(indent=2)) + + agent = self._make_agent(tmp_path, context_file="context/my_context.json") + assert agent._agent_context is not None + assert agent._agent_context.goal == sample_context.goal + + def test_auto_discovers_from_workspace(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None: + context_dir = tmp_path / "context" + context_dir.mkdir() + ctx_file = context_dir / "agent_context.json" + ctx_file.write_text(sample_context.model_dump_json(indent=2)) + + agent = self._make_agent(tmp_path) + assert agent._agent_context is not None + assert agent._agent_context.goal == sample_context.goal + + def test_auto_discovers_most_recent(self, tmp_path: Path) -> None: + """When multiple context files exist, loads the most recently modified.""" + import time + + context_dir = tmp_path / "context" + context_dir.mkdir() + + old = BlueBoxAgentContext(goal="old goal", output_description="old", summary="old") + (context_dir / "old.json").write_text(old.model_dump_json()) + time.sleep(0.05) # ensure mtime differs + + new = BlueBoxAgentContext(goal="new goal", output_description="new", summary="new") + (context_dir / "new.json").write_text(new.model_dump_json()) + + agent = self._make_agent(tmp_path) + assert agent._agent_context is not None + assert agent._agent_context.goal == "new goal" + + def test_explicit_context_file_overrides_auto_discovery( + self, tmp_path: Path, sample_context: BlueBoxAgentContext, + ) -> None: + # Put one context in workspace + context_dir = tmp_path / "context" + context_dir.mkdir() + auto_ctx = BlueBoxAgentContext(goal="auto goal", output_description="auto", summary="auto") + (context_dir / "auto.json").write_text(auto_ctx.model_dump_json()) + + # Put explicit context elsewhere + explicit_file = tmp_path / "explicit.json" + explicit_file.write_text(sample_context.model_dump_json(indent=2)) + + agent = self._make_agent(tmp_path, context_file=str(explicit_file)) + assert agent._agent_context is not None + assert agent._agent_context.goal == sample_context.goal + + def test_invalid_context_file_ignored(self, tmp_path: Path) -> None: + agent = self._make_agent(tmp_path, context_file="/nonexistent/path.json") + assert agent._agent_context is None + + def test_malformed_json_ignored(self, tmp_path: Path) -> None: + bad_file = tmp_path / "bad.json" + bad_file.write_text("not valid json!!!") + agent = self._make_agent(tmp_path, context_file=str(bad_file)) + assert agent._agent_context is None + + def test_no_context_dir_no_error(self, tmp_path: Path) -> None: + agent = self._make_agent(tmp_path) + assert agent._agent_context is None + + +# ============================================================================= +# System prompt injection tests +# ============================================================================= + + +class TestContextPromptInjection: + """Tests for _get_context_prompt_section and system prompt integration.""" + + def _make_agent(self, tmp_path: Path, context: BlueBoxAgentContext) -> Any: + from bluebox.agents.bluebox_agent import BlueBoxAgent + from bluebox.agents.workspace import LocalWorkspace + + ctx_file = tmp_path / "context.json" + ctx_file.write_text(context.model_dump_json(indent=2)) + + return BlueBoxAgent( + emit_message_callable=MagicMock(), + workspace=LocalWorkspace(str(tmp_path)), + auth_headers_provider=lambda: {"X-Service-Token": "test"}, + context_file=str(ctx_file), + ) + + def test_context_section_in_system_prompt(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None: + agent = self._make_agent(tmp_path, sample_context) + prompt = agent._get_system_prompt() + assert "## Prior Context" in prompt + assert sample_context.goal in prompt + assert sample_context.summary in prompt + assert "Routine_abc123" in prompt + assert "AmtrakOneWaySearch" in prompt + + def test_context_section_includes_python_code(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None: + agent = self._make_agent(tmp_path, sample_context) + prompt = agent._get_system_prompt() + assert "```python" in prompt + assert "csv.DictWriter" in prompt + + def test_context_section_truncation(self, tmp_path: Path) -> None: + """Context over 20K chars gets truncated with a hint.""" + big_context = BlueBoxAgentContext( + goal="x" * 25_000, + output_description="desc", + summary="summary", + ) + agent = self._make_agent(tmp_path, big_context) + section = agent._get_context_prompt_section() + assert len(section) < 25_000 + assert "context truncated" in section + assert "read_workspace_file" in section + + def test_no_context_no_section(self, tmp_path: Path) -> None: + from bluebox.agents.bluebox_agent import BlueBoxAgent + from bluebox.agents.workspace import LocalWorkspace + + agent = BlueBoxAgent( + emit_message_callable=MagicMock(), + workspace=LocalWorkspace(str(tmp_path)), + auth_headers_provider=lambda: {"X-Service-Token": "test"}, + ) + prompt = agent._get_system_prompt() + assert "## Prior Context" not in prompt + + +# ============================================================================= +# generate_context tool tests +# ============================================================================= + + +class TestGenerateContextTool: + """Tests for the _generate_context agent tool.""" + + def _make_agent(self, tmp_path: Path) -> Any: + from bluebox.agents.bluebox_agent import BlueBoxAgent + from bluebox.agents.workspace import LocalWorkspace + + return BlueBoxAgent( + emit_message_callable=MagicMock(), + workspace=LocalWorkspace(str(tmp_path)), + auth_headers_provider=lambda: {"X-Service-Token": "test"}, + ) + + def test_tool_is_registered(self) -> None: + from bluebox.agents.bluebox_agent import BlueBoxAgent + tools = BlueBoxAgent._collect_tools() + tool_names = [meta.name for meta, _ in tools] + assert "generate_context" in tool_names + + def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None: + agent = self._make_agent(tmp_path) + result = agent._generate_context(context=sample_context) + + assert result["success"] is True + assert result["context_json"] is not None + assert result["context_md"] is not None + + # Verify JSON file exists and is valid + json_path = tmp_path / result["context_json"] + assert json_path.is_file() + loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text()) + assert loaded.goal == sample_context.goal + + # Verify MD file exists + md_path = tmp_path / result["context_md"] + assert md_path.is_file() + assert "## Goal" in md_path.read_text() + + def test_saves_to_context_subdirectory(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None: + agent = self._make_agent(tmp_path) + result = agent._generate_context(context=minimal_context) + assert "context/" in result["context_json"] + assert "context/" in result["context_md"] From 08b7ebab03bf98b925a04f2e1160d5ef330331a9 Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Sun, 22 Feb 2026 21:08:34 -0500 Subject: [PATCH 02/13] Allow /generate_context to accept an optional focus prompt Users can now type `/generate_context focus on the flight search part` to guide the agent toward a specific aspect of the session when generating the context file. Co-Authored-By: Claude Opus 4.6 --- bluebox/scripts/run_bluebox_agent.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py index 30daa002..87bdc74a 100644 --- a/bluebox/scripts/run_bluebox_agent.py +++ b/bluebox/scripts/run_bluebox_agent.py @@ -52,10 +52,11 @@ class BlueBoxAgentTUI(AbstractAgentTUI): TITLE = "BlueBox Agent" SLASH_COMMANDS = { **BASE_SLASH_COMMANDS, - "/generate_context": "Save a reusable context file from this session", + "/generate_context": "Save a reusable context file (optionally with a focus prompt)", } HELP_TEXT = BASE_HELP_TEXT + ( - "\n [cyan]/generate_context[/cyan] Save a reusable context file from this session\n" + "\n [cyan]/generate_context[/cyan] Save a reusable context file from this session" + "\n Optionally add a focus: [cyan]/generate_context focus on the flight search part[/cyan]\n" ) SHOW_SAVED_FILES_PANE = True @@ -152,18 +153,24 @@ def _add(p: str) -> None: ) def _handle_custom_command(self, cmd: str, raw_input: str) -> bool: - if cmd == "/generate_context": + if raw_input.lower().startswith("/generate_context"): chat = self.query_one("#chat-log", RichLog) if not self._agent: chat.write(Text.from_markup("[red]Agent not initialized.[/red]")) return True + + user_focus = raw_input[len("/generate_context"):].strip() + prompt = self._GENERATE_CONTEXT_PROMPT + if user_focus: + prompt += f"\n\n**User focus:** {user_focus}" + chat.write(Text.from_markup( "[yellow]Generating context from this session...[/yellow]" )) self._processing = True self._assistant_header_printed = False self._status_update_printed = False - self._send_to_agent(self._GENERATE_CONTEXT_PROMPT) + self._send_to_agent(prompt) return True return False From 24a3d2005338fa0b928e2ea2906208014e2add8f Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Sun, 22 Feb 2026 21:11:18 -0500 Subject: [PATCH 03/13] Use to_markdown() for context prompt injection instead of hand-built formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single source of truth — the markdown rendering logic lives on the model, not duplicated in the agent. Co-Authored-By: Claude Opus 4.6 --- bluebox/agents/bluebox_agent.py | 35 +++++++-------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py index a2d5a856..6f9aae4e 100644 --- a/bluebox/agents/bluebox_agent.py +++ b/bluebox/agents/bluebox_agent.py @@ -348,40 +348,19 @@ def _get_context_prompt_section(self) -> str: if not ctx: return "" - lines: list[str] = [ - "\n\n## Prior Context", - "A previous session already solved a similar task. Use this as a starting point.", - f"\n**Goal:** {ctx.goal}", - f"\n**Summary:** {ctx.summary}", - ] - - if ctx.routines_used: - lines.append("\n**Routines that worked:**") - for r in ctx.routines_used: - param_str = json.dumps(r.parameters, default=str) if r.parameters else "{}" - lines.append(f"- `{r.routine_id}` ({r.routine_name}): {param_str}") - - if ctx.python_code: - lines.append(f"\n**Post-processing code that worked:**\n```python\n{ctx.python_code}\n```") - - if ctx.output_files: - lines.append(f"\n**Output files produced:** {', '.join(ctx.output_files)}") - - lines.append(f"\n**Output description:** {ctx.output_description}") - lines.append( - "\n> Replicate this path if the user's goal matches. " - "Adjust parameters for the new request. Skip trial and error." + section = ( + "\n\n## Prior Context\n" + "A previous session already solved a similar task. Use this as a starting point.\n" + "Replicate this path if the user's goal matches. " + "Adjust parameters for the new request. Skip trial and error.\n\n" + + ctx.to_markdown() ) - section = "\n".join(lines) - if len(section) > self._CONTEXT_PROMPT_MAX_CHARS: - truncated = section[:self._CONTEXT_PROMPT_MAX_CHARS] - truncated += ( + section = section[:self._CONTEXT_PROMPT_MAX_CHARS] + ( "\n\n... (context truncated — use `read_workspace_file` to read " "the full context files in `context/` for more detail)" ) - return truncated return section From ebf444606bf7036e141ecb0bc792b81b42b22bcd Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Sun, 22 Feb 2026 21:16:09 -0500 Subject: [PATCH 04/13] Flatten generate_context tool params for LLM compatibility The nested Pydantic model produced a JSON schema with $defs/$ref that LLMs couldn't reliably fill. Switch to flat parameters (goal, summary, routines_used, etc.) and construct the model internally. Co-Authored-By: Claude Opus 4.6 --- bluebox/agents/bluebox_agent.py | 40 ++++++++++++++++--- .../unit/agents/test_bluebox_agent_context.py | 25 +++++++++++- 2 files changed, 58 insertions(+), 7 deletions(-) diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py index 6f9aae4e..337068d5 100644 --- a/bluebox/agents/bluebox_agent.py +++ b/bluebox/agents/bluebox_agent.py @@ -746,7 +746,15 @@ def _read_workspace_file( return self._workspace.read_file(path, start_line=start_line, end_line=end_line) @agent_tool() - def _generate_context(self, context: BlueBoxAgentContext) -> dict[str, Any]: + def _generate_context( + self, + goal: str, + summary: str, + output_description: str, + routines_used: list[dict[str, Any]] | None = None, + python_code: str | None = None, + output_files: list[str] | None = None, + ) -> dict[str, Any]: """ Save a context file capturing what worked in this session. @@ -756,11 +764,33 @@ def _generate_context(self, context: BlueBoxAgentContext) -> dict[str, Any]: file (human-readable) are saved to the context/ directory. Args: - context: The full context object describing what was accomplished. - Must include goal, summary, output_description, and routines_used - with exact routine_ids and parameters that worked. Include python_code - if post-processing was used, and output_files listing what was produced. + goal: The user's original request, in their own words. + summary: 1-2 sentence summary of what was accomplished. + output_description: Description of the output: format, key fields, + row count if known (e.g. "CSV with 47 rows, columns: name, price, url"). + routines_used: List of routines that worked. Each dict must have keys: + routine_id (str), routine_name (str), and parameters (dict with + the parameter values that produced correct results). + python_code: The final working Python snippet passed to run_python_code. + Omit if no post-processing was needed. + output_files: Relative paths of files written to outputs/ + (e.g. ["outputs/results.csv"]). """ + try: + validated_routines = [ + RoutineUsed.model_validate(r) for r in (routines_used or []) + ] + context = BlueBoxAgentContext( + goal=goal, + summary=summary, + output_description=output_description, + routines_used=validated_routines, + python_code=python_code, + output_files=output_files or [], + ) + except Exception as e: + return {"error": f"Failed to build context: {e}"} + # Save canonical JSON json_content = context.model_dump_json(indent=2) try: diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py index 7783cbb8..ceabf28b 100644 --- a/tests/unit/agents/test_bluebox_agent_context.py +++ b/tests/unit/agents/test_bluebox_agent_context.py @@ -369,7 +369,14 @@ def test_tool_is_registered(self) -> None: def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None: agent = self._make_agent(tmp_path) - result = agent._generate_context(context=sample_context) + result = agent._generate_context( + goal=sample_context.goal, + summary=sample_context.summary, + output_description=sample_context.output_description, + routines_used=[r.model_dump() for r in sample_context.routines_used], + python_code=sample_context.python_code, + output_files=sample_context.output_files, + ) assert result["success"] is True assert result["context_json"] is not None @@ -388,6 +395,20 @@ def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentCont def test_saves_to_context_subdirectory(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None: agent = self._make_agent(tmp_path) - result = agent._generate_context(context=minimal_context) + result = agent._generate_context( + goal=minimal_context.goal, + summary=minimal_context.summary, + output_description=minimal_context.output_description, + ) assert "context/" in result["context_json"] assert "context/" in result["context_md"] + + def test_validates_bad_routines_used(self, tmp_path: Path) -> None: + agent = self._make_agent(tmp_path) + result = agent._generate_context( + goal="test", + summary="test", + output_description="test", + routines_used=[{"bad_key": "missing routine_id"}], + ) + assert "error" in result From 2cee95d0db0a6e878e15618192795c7f44e54191 Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Sun, 22 Feb 2026 21:26:08 -0500 Subject: [PATCH 05/13] Fix context file path resolution doubling the workspace prefix _auto_discover_context returns paths like 'workspace/context/file.json' (already relative to cwd). _load_context_from_path was unconditionally prepending workspace root_path again, producing 'workspace/workspace/...'. Fix: try the path as-is first, only prepend workspace root if it doesn't exist. Co-Authored-By: Claude Opus 4.6 --- bluebox/agents/bluebox_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py index 337068d5..96ec7f55 100644 --- a/bluebox/agents/bluebox_agent.py +++ b/bluebox/agents/bluebox_agent.py @@ -315,7 +315,7 @@ def _load_context(self, context_file: str | None) -> BlueBoxAgentContext | None: def _load_context_from_path(self, context_file: str) -> BlueBoxAgentContext | None: """Load a context file from an explicit path (absolute or workspace-relative).""" path = Path(context_file) - if not path.is_absolute(): + if not path.is_file() and not path.is_absolute(): path = self._workspace.root_path / context_file if not path.is_file(): logger.warning("Context file not found: %s", path) From 13a75b92c39faa3bb6824c9c43dcefe4c8d5c171 Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Sun, 22 Feb 2026 21:30:14 -0500 Subject: [PATCH 06/13] Auto-populate routines_used from raw/ when agent leaves it empty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The agent was generating context files with routines_used=[] despite having executed routines. Two fixes: 1. _generate_context now falls back to _extract_routines_from_raw() which reads the raw/ execution result files and extracts routine_id, routine_name, and parameters from completed executions. This is a safety net — if the agent provides routines, those are used instead. 2. Strengthened the /generate_context prompt and system prompt rule to emphasize that routines_used must never be empty. Co-Authored-By: Claude Opus 4.6 --- bluebox/agents/bluebox_agent.py | 37 ++++++++- bluebox/scripts/run_bluebox_agent.py | 10 ++- .../unit/agents/test_bluebox_agent_context.py | 80 +++++++++++++++++++ 3 files changed, 123 insertions(+), 4 deletions(-) diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py index 96ec7f55..8c6d5d76 100644 --- a/bluebox/agents/bluebox_agent.py +++ b/bluebox/agents/bluebox_agent.py @@ -131,7 +131,7 @@ class BlueBoxAgent(AbstractAgent): - When using `execute_browser_task`, write a specific, step-by-step task description so the browser agent knows exactly what to do. - If your first search returns no results, try rephrasing the task description before giving up. - Be concise in responses. - - After successfully completing a task (output verified and correct), call `generate_context` to save a reusable recipe. Fill in all fields accurately — especially `routines_used` with the exact routine_ids and parameters that worked, and `python_code` with the final working snippet. + - After successfully completing a task (output verified and correct), call `generate_context` to save a reusable recipe. **NEVER leave `routines_used` empty** — include every routine that was executed, with exact routine_id, routine_name, and parameter values. Also include `python_code` with the final working snippet. """).strip() ## Magic methods @@ -364,6 +364,31 @@ def _get_context_prompt_section(self) -> str: return section + def _extract_routines_from_raw(self) -> list[RoutineUsed]: + """Extract routine info from raw/ execution result files. + + Each raw JSON file contains routine_id, routine_name, parameters, + and status from a previous execution. Returns deduplicated list + of successfully executed routines. + """ + raw_results = self._workspace.load_raw_json() + seen: set[str] = set() + routines: list[RoutineUsed] = [] + for rr in raw_results: + rid = rr.get("routine_id") + if not rid or rid in seen: + continue + # Only include completed executions + if rr.get("status") not in ("completed", None): + continue + seen.add(rid) + routines.append(RoutineUsed( + routine_id=rid, + routine_name=rr.get("routine_name", rid), + parameters=rr.get("parameters", {}), + )) + return routines + ## Tool handlers @agent_tool() @@ -780,6 +805,16 @@ def _generate_context( validated_routines = [ RoutineUsed.model_validate(r) for r in (routines_used or []) ] + + # Auto-populate from raw/ execution results if agent didn't provide routines + if not validated_routines: + validated_routines = self._extract_routines_from_raw() + if validated_routines: + logger.info( + "Auto-populated %d routine(s) from raw/ execution results", + len(validated_routines), + ) + context = BlueBoxAgentContext( goal=goal, summary=summary, diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py index 87bdc74a..48358f90 100644 --- a/bluebox/scripts/run_bluebox_agent.py +++ b/bluebox/scripts/run_bluebox_agent.py @@ -142,10 +142,14 @@ def _add(p: str) -> None: _GENERATE_CONTEXT_PROMPT: str = ( "Review everything we accomplished in this session and call the `generate_context` tool " - "to save a reusable context file. Include:\n" + "to save a reusable context file.\n\n" + "**CRITICAL — you MUST include `routines_used`**. For every routine that was executed, " + "provide the exact routine_id, routine_name, and the parameter values that were used. " + "Look at the execute_routines_in_parallel calls you made earlier in this conversation. " + "Do NOT leave routines_used empty — this is the most important field for replay.\n\n" + "Also include:\n" "- The original goal (what I asked for)\n" - "- All routines that produced useful results (with exact routine_ids and parameter values)\n" - "- The final working Python post-processing code (if any)\n" + "- The final working Python post-processing code (the last successful run_python_code call)\n" "- The output files that were created\n" "- A clear description of what the output looks like\n" "- A concise summary of what was accomplished\n\n" diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py index ceabf28b..1c3d15d0 100644 --- a/tests/unit/agents/test_bluebox_agent_context.py +++ b/tests/unit/agents/test_bluebox_agent_context.py @@ -412,3 +412,83 @@ def test_validates_bad_routines_used(self, tmp_path: Path) -> None: routines_used=[{"bad_key": "missing routine_id"}], ) assert "error" in result + + def test_auto_populates_routines_from_raw(self, tmp_path: Path) -> None: + """When routines_used is empty, auto-populate from raw/ execution results.""" + agent = self._make_agent(tmp_path) + + # Write a fake routine result to raw/ + raw_dir = tmp_path / "raw" + raw_dir.mkdir() + (raw_dir / "result_1.json").write_text(json.dumps({ + "routine_id": "Routine_abc", + "routine_name": "TestRoutine", + "status": "completed", + "parameters": {"city": "NYC"}, + "result": {"ok": True, "data": {}}, + })) + + result = agent._generate_context( + goal="test goal", + summary="test summary", + output_description="test output", + # routines_used intentionally omitted + ) + + assert result["success"] is True + # Verify the saved context has the routine from raw/ + json_path = tmp_path / result["context_json"] + loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text()) + assert len(loaded.routines_used) == 1 + assert loaded.routines_used[0].routine_id == "Routine_abc" + assert loaded.routines_used[0].routine_name == "TestRoutine" + assert loaded.routines_used[0].parameters == {"city": "NYC"} + + def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None: + """Same routine_id executed multiple times should appear once.""" + agent = self._make_agent(tmp_path) + + raw_dir = tmp_path / "raw" + raw_dir.mkdir() + for i in range(3): + (raw_dir / f"result_{i}.json").write_text(json.dumps({ + "routine_id": "Routine_same", + "routine_name": "SameRoutine", + "status": "completed", + "parameters": {"q": f"query_{i}"}, + "result": {"ok": True, "data": {}}, + })) + + result = agent._generate_context( + goal="test", summary="test", output_description="test", + ) + json_path = tmp_path / result["context_json"] + loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text()) + assert len(loaded.routines_used) == 1 + + def test_agent_provided_routines_not_overridden(self, tmp_path: Path) -> None: + """When agent provides routines_used, don't auto-populate.""" + agent = self._make_agent(tmp_path) + + raw_dir = tmp_path / "raw" + raw_dir.mkdir() + (raw_dir / "result_1.json").write_text(json.dumps({ + "routine_id": "Routine_from_raw", + "routine_name": "RawRoutine", + "status": "completed", + "parameters": {}, + "result": {"ok": True, "data": {}}, + })) + + result = agent._generate_context( + goal="test", summary="test", output_description="test", + routines_used=[{ + "routine_id": "Routine_agent_provided", + "routine_name": "AgentRoutine", + "parameters": {"x": 1}, + }], + ) + json_path = tmp_path / result["context_json"] + loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text()) + assert len(loaded.routines_used) == 1 + assert loaded.routines_used[0].routine_id == "Routine_agent_provided" From 47a5578ff26d93e5dcdffacc8ca5ed1b95c1b01c Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Sun, 22 Feb 2026 22:15:03 -0500 Subject: [PATCH 07/13] Replace generate_context tool with structured output call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the @agent_tool generate_context (LLM kept failing to fill the schema) and replace it with a direct LLMClient.call_sync using response_model=BlueBoxAgentContext via OpenAI structured output. The /generate_context slash command now calls agent.generate_context() directly instead of prompting the agent loop, using previous_response_id to preserve conversation context without polluting the chat history. Also: - Rename RoutineUsed → UsedRoutine, add UsedRoutineParameter with concrete types (str|bool|int|float) to satisfy OpenAI strict schema - Create all workspace subdirs (raw/, outputs/, context/) in LocalWorkspace.__init__ Co-Authored-By: Claude Opus 4.6 --- bluebox/agents/bluebox_agent.py | 142 +++++++------- bluebox/agents/workspace.py | 8 +- bluebox/data_models/agents/__init__.py | 4 +- bluebox/data_models/agents/context.py | 51 +++-- bluebox/scripts/run_bluebox_agent.py | 56 +++--- .../unit/agents/test_bluebox_agent_context.py | 177 ++++++++++-------- tests/unit/test_read_workspace_file.py | 2 - tests/unit/test_workspace.py | 11 +- 8 files changed, 245 insertions(+), 206 deletions(-) diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py index 8c6d5d76..fda99365 100644 --- a/bluebox/agents/bluebox_agent.py +++ b/bluebox/agents/bluebox_agent.py @@ -22,7 +22,7 @@ from bluebox.agents.abstract_agent import AbstractAgent, AgentCard, agent_tool from bluebox.agents.workspace import AgentWorkspace, LocalWorkspace from bluebox.config import Config -from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed +from bluebox.data_models.agents.context import BlueBoxAgentContext, UsedRoutine from bluebox.data_models.browser_agent import ( BrowserAgentDoneEvent, BrowserAgentErrorEvent, @@ -131,7 +131,7 @@ class BlueBoxAgent(AbstractAgent): - When using `execute_browser_task`, write a specific, step-by-step task description so the browser agent knows exactly what to do. - If your first search returns no results, try rephrasing the task description before giving up. - Be concise in responses. - - After successfully completing a task (output verified and correct), call `generate_context` to save a reusable recipe. **NEVER leave `routines_used` empty** — include every routine that was executed, with exact routine_id, routine_name, and parameter values. Also include `python_code` with the final working snippet. + - Be thorough and persistent — keep iterating until the output is correct. """).strip() ## Magic methods @@ -364,7 +364,7 @@ def _get_context_prompt_section(self) -> str: return section - def _extract_routines_from_raw(self) -> list[RoutineUsed]: + def _extract_routines_from_raw(self) -> list[UsedRoutine]: """Extract routine info from raw/ execution result files. Each raw JSON file contains routine_id, routine_name, parameters, @@ -373,7 +373,7 @@ def _extract_routines_from_raw(self) -> list[RoutineUsed]: """ raw_results = self._workspace.load_raw_json() seen: set[str] = set() - routines: list[RoutineUsed] = [] + routines: list[UsedRoutine] = [] for rr in raw_results: rid = rr.get("routine_id") if not rid or rid in seen: @@ -382,7 +382,7 @@ def _extract_routines_from_raw(self) -> list[RoutineUsed]: if rr.get("status") not in ("completed", None): continue seen.add(rid) - routines.append(RoutineUsed( + routines.append(UsedRoutine.from_dict_params( routine_id=rid, routine_name=rr.get("routine_name", rid), parameters=rr.get("parameters", {}), @@ -770,88 +770,72 @@ def _read_workspace_file( """ return self._workspace.read_file(path, start_line=start_line, end_line=end_line) - @agent_tool() - def _generate_context( - self, - goal: str, - summary: str, - output_description: str, - routines_used: list[dict[str, Any]] | None = None, - python_code: str | None = None, - output_files: list[str] | None = None, - ) -> dict[str, Any]: - """ - Save a context file capturing what worked in this session. + ## Context generation (structured output, called by TUI slash command) + + def generate_context(self, focus: str | None = None) -> BlueBoxAgentContext: + """Generate a context file from the current session using structured output. - Call this after successfully completing the user's task. The context - file lets another BlueBoxAgent instance replicate the successful path - without trial and error. Both a JSON file (canonical) and a Markdown - file (human-readable) are saved to the context/ directory. + Makes a direct LLM call with response_model=BlueBoxAgentContext to get + a validated Pydantic model back. Saves both JSON and Markdown files to + the workspace context/ directory. Args: - goal: The user's original request, in their own words. - summary: 1-2 sentence summary of what was accomplished. - output_description: Description of the output: format, key fields, - row count if known (e.g. "CSV with 47 rows, columns: name, price, url"). - routines_used: List of routines that worked. Each dict must have keys: - routine_id (str), routine_name (str), and parameters (dict with - the parameter values that produced correct results). - python_code: The final working Python snippet passed to run_python_code. - Omit if no post-processing was needed. - output_files: Relative paths of files written to outputs/ - (e.g. ["outputs/results.csv"]). - """ - try: - validated_routines = [ - RoutineUsed.model_validate(r) for r in (routines_used or []) - ] + focus: Optional user-provided focus prompt to guide context generation. + + Returns: + The generated BlueBoxAgentContext. - # Auto-populate from raw/ execution results if agent didn't provide routines - if not validated_routines: - validated_routines = self._extract_routines_from_raw() - if validated_routines: - logger.info( - "Auto-populated %d routine(s) from raw/ execution results", - len(validated_routines), - ) - - context = BlueBoxAgentContext( - goal=goal, - summary=summary, - output_description=output_description, - routines_used=validated_routines, - python_code=python_code, - output_files=output_files or [], + Raises: + ValueError: If the LLM fails to produce a valid context. + """ + raw_routines = self._extract_routines_from_raw() + + system_prompt = ( + "You are analyzing a BlueBox Agent conversation to extract a reusable context file. " + "Fill in every field of the BlueBoxAgentContext schema based on the conversation.\n\n" + "CRITICAL: routines_used must include every routine that was executed with exact " + "routine_id, routine_name, and parameter values.\n" + "Include the final working python_code snippet if post-processing was done.\n" + "Include output_files with relative paths of files written to outputs/.\n" + ) + if raw_routines: + system_prompt += "\nRoutines found in execution results:\n" + for r in raw_routines: + system_prompt += f"- {r.routine_name} ({r.routine_id}): {json.dumps(r.parameters_as_dict(), default=str)}\n" + if focus: + system_prompt += f"\nUser focus: {focus}\n" + + # One-off structured output call that sees the full conversation via + # OpenAI's response chaining (previous_response_id reconstructs the + # thread server-side). We don't update self._previous_response_id + # afterward so this call doesn't affect the agent loop. + response = self.llm_client.call_sync( + input="Generate a reusable context file from this conversation.", + system_prompt=system_prompt, + response_model=BlueBoxAgentContext, + previous_response_id=self._previous_response_id, + ) + context = response.parsed + if context is None: + raise ValueError("LLM failed to produce a valid BlueBoxAgentContext") + + # Safety net: merge raw routines if LLM left routines_used empty + if not context.routines_used and raw_routines: + context.routines_used = raw_routines + logger.info( + "Auto-populated %d routine(s) from raw/ execution results", + len(raw_routines), ) - except Exception as e: - return {"error": f"Failed to build context: {e}"} # Save canonical JSON - json_content = context.model_dump_json(indent=2) - try: - json_save = self._workspace.save_file("context", "agent_context", json_content) - except Exception as e: - logger.exception("Failed to save context JSON: %s", e) - return {"error": f"Failed to save context file: {e}"} + json_save = self._workspace.save_file( + "context", "agent_context", context.model_dump_json(indent=2), + ) # Save companion Markdown - md_content = context.to_markdown() - try: - md_save = self._workspace.save_file( - "context", "agent_context", md_content, extension=".md", - ) - except Exception as e: - logger.warning("Failed to save context Markdown: %s", e) - md_save = {"output_file": None} + md_save = self._workspace.save_file( + "context", "agent_context", context.to_markdown(), extension=".md", + ) logger.info("Context files saved: %s, %s", json_save["output_file"], md_save["output_file"]) - return { - "success": True, - "context_json": json_save["output_file"], - "context_md": md_save["output_file"], - "message": ( - f"Context saved to {json_save['output_file']}. " - "A new BlueBoxAgent using this workspace will automatically " - "load this context and replicate the successful path." - ), - } + return context diff --git a/bluebox/agents/workspace.py b/bluebox/agents/workspace.py index 999c0db8..37e61fe0 100644 --- a/bluebox/agents/workspace.py +++ b/bluebox/agents/workspace.py @@ -27,9 +27,10 @@ class AgentWorkspace(ABC): """ Abstract workspace that agents use for file I/O. - A workspace has two logical subdirectories: + A workspace has three logical subdirectories: - raw/ : input data (e.g., routine results saved automatically) - outputs/: agent-generated output files (e.g., CSVs, processed JSON) + - context/: reusable context files from successful sessions """ @property @@ -113,7 +114,7 @@ def diff_outputs(self, before: dict[str, float]) -> list[str]: @abstractmethod def ensure_dirs(self) -> None: - """Ensure the workspace directory structure exists (raw/, outputs/).""" + """Ensure the workspace directory structure exists (raw/, outputs/, context/).""" class LocalWorkspace(AgentWorkspace): @@ -123,8 +124,10 @@ def __init__(self, workspace_dir: str = "./bluebox_workspace") -> None: self._workspace_dir = Path(workspace_dir) self._raw_dir = self._workspace_dir / "raw" self._outputs_dir = self._workspace_dir / "outputs" + self._context_dir = self._workspace_dir / "context" self._execution_counter: int = 0 self._counter_lock = threading.Lock() + self.ensure_dirs() @property def root_path(self) -> Path: @@ -230,3 +233,4 @@ def diff_outputs(self, before: dict[str, float]) -> list[str]: def ensure_dirs(self) -> None: self._raw_dir.mkdir(parents=True, exist_ok=True) self._outputs_dir.mkdir(parents=True, exist_ok=True) + self._context_dir.mkdir(parents=True, exist_ok=True) diff --git a/bluebox/data_models/agents/__init__.py b/bluebox/data_models/agents/__init__.py index 94c56aed..663a502b 100644 --- a/bluebox/data_models/agents/__init__.py +++ b/bluebox/data_models/agents/__init__.py @@ -1,3 +1,3 @@ -from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed +from bluebox.data_models.agents.context import BlueBoxAgentContext, UsedRoutine, UsedRoutineParameter -__all__ = ["BlueBoxAgentContext", "RoutineUsed"] +__all__ = ["BlueBoxAgentContext", "UsedRoutine", "UsedRoutineParameter"] diff --git a/bluebox/data_models/agents/context.py b/bluebox/data_models/agents/context.py index ede66f49..f5f2f329 100644 --- a/bluebox/data_models/agents/context.py +++ b/bluebox/data_models/agents/context.py @@ -20,16 +20,38 @@ from pydantic import BaseModel, Field -class RoutineUsed(BaseModel): +class UsedRoutineParameter(BaseModel): + """A single parameter key-value pair used in a routine execution.""" + + key: str = Field(..., description="Parameter name") + value: str | bool | int | float = Field(..., description="Parameter value") + + +class UsedRoutine(BaseModel): """One routine that was successfully executed during the session.""" routine_id: str = Field(..., description="Routine ID from search_routines results") routine_name: str = Field(..., description="Human-readable routine name") - parameters: dict[str, Any] = Field( - default_factory=dict, - description="Parameter name-to-value mapping that produced correct results", + parameters: list[UsedRoutineParameter] = Field( + default_factory=list, + description="Parameter key-value pairs that produced correct results", ) + def parameters_as_dict(self) -> dict[str, str | bool | int | float]: + """Convert parameters list to a dict for convenience.""" + return {p.key: p.value for p in self.parameters} + + @classmethod + def from_dict_params( + cls, routine_id: str, routine_name: str, parameters: dict[str, Any], + ) -> UsedRoutine: + """Convenience constructor that accepts a dict of parameters.""" + return cls( + routine_id=routine_id, + routine_name=routine_name, + parameters=[UsedRoutineParameter(key=k, value=v) for k, v in parameters.items()], + ) + class BlueBoxAgentContext(BaseModel): """ @@ -41,7 +63,7 @@ class BlueBoxAgentContext(BaseModel): version: int = Field(default=1, description="Schema version for forward compatibility") goal: str = Field(..., description="The user's original request, in their own words") - routines_used: list[RoutineUsed] = Field( + routines_used: list[UsedRoutine] = Field( default_factory=list, description="Routines that produced useful results, in execution order", ) @@ -96,7 +118,7 @@ def to_markdown(self) -> str: if r.parameters: lines.append("**Parameters:**") lines.append("```json") - lines.append(json.dumps(r.parameters, indent=2, default=str)) + lines.append(json.dumps(r.parameters_as_dict(), indent=2, default=str)) lines.append("```") else: lines.append("No parameters.") @@ -209,9 +231,9 @@ def _extract_fenced_block(text: str, language: str | None = None) -> str | None: return None -def _parse_routines_section(text: str) -> list[RoutineUsed]: - """Parse the Routines Used section into RoutineUsed objects.""" - routines: list[RoutineUsed] = [] +def _parse_routines_section(text: str) -> list[UsedRoutine]: + """Parse the Routines Used section into UsedRoutine objects.""" + routines: list[UsedRoutine] = [] if not text.strip(): return routines @@ -228,18 +250,19 @@ def _parse_routines_section(text: str) -> list[RoutineUsed]: routine_id = header_match.group(2).strip() # Parse parameters from JSON code block - parameters: dict[str, Any] = {} + param_list: list[UsedRoutineParameter] = [] params_json = _extract_fenced_block(part, "json") if params_json: try: - parameters = json.loads(params_json) - except json.JSONDecodeError: + params_dict = json.loads(params_json) + param_list = [UsedRoutineParameter(key=k, value=v) for k, v in params_dict.items()] + except (json.JSONDecodeError, TypeError): pass - routines.append(RoutineUsed( + routines.append(UsedRoutine( routine_id=routine_id, routine_name=routine_name, - parameters=parameters, + parameters=param_list, )) return routines diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py index 48358f90..abacd7a7 100644 --- a/bluebox/scripts/run_bluebox_agent.py +++ b/bluebox/scripts/run_bluebox_agent.py @@ -32,6 +32,7 @@ from rich.console import Console from rich.text import Text +from textual import work from textual.widgets import RichLog from bluebox.agents.bluebox_agent import BlueBoxAgent @@ -140,22 +141,6 @@ def _add(p: str) -> None: # ── Custom slash commands ───────────────────────────────────────── - _GENERATE_CONTEXT_PROMPT: str = ( - "Review everything we accomplished in this session and call the `generate_context` tool " - "to save a reusable context file.\n\n" - "**CRITICAL — you MUST include `routines_used`**. For every routine that was executed, " - "provide the exact routine_id, routine_name, and the parameter values that were used. " - "Look at the execute_routines_in_parallel calls you made earlier in this conversation. " - "Do NOT leave routines_used empty — this is the most important field for replay.\n\n" - "Also include:\n" - "- The original goal (what I asked for)\n" - "- The final working Python post-processing code (the last successful run_python_code call)\n" - "- The output files that were created\n" - "- A clear description of what the output looks like\n" - "- A concise summary of what was accomplished\n\n" - "Be thorough and accurate — another agent will use this context to replicate our work." - ) - def _handle_custom_command(self, cmd: str, raw_input: str) -> bool: if raw_input.lower().startswith("/generate_context"): chat = self.query_one("#chat-log", RichLog) @@ -163,21 +148,44 @@ def _handle_custom_command(self, cmd: str, raw_input: str) -> bool: chat.write(Text.from_markup("[red]Agent not initialized.[/red]")) return True - user_focus = raw_input[len("/generate_context"):].strip() - prompt = self._GENERATE_CONTEXT_PROMPT - if user_focus: - prompt += f"\n\n**User focus:** {user_focus}" - + user_focus = raw_input[len("/generate_context"):].strip() or None chat.write(Text.from_markup( "[yellow]Generating context from this session...[/yellow]" )) self._processing = True - self._assistant_header_printed = False - self._status_update_printed = False - self._send_to_agent(prompt) + self._generate_context_async(user_focus) return True return False + @work(thread=True) + def _generate_context_async(self, focus: str | None) -> None: + """Run generate_context in a background thread via structured output.""" + try: + assert isinstance(self._agent, BlueBoxAgent) + context = self._agent.generate_context(focus=focus) + self.call_from_thread(self._show_context_success, context) + except Exception as e: + self.call_from_thread(self._show_context_error, str(e)) + + def _show_context_success(self, context: Any) -> None: + """Display context generation success in the chat pane.""" + chat = self.query_one("#chat-log", RichLog) + chat.write(Text.from_markup( + f"[bold green]Context saved![/bold green]\n" + f"[dim]Goal:[/dim] {context.goal}\n" + f"[dim]Summary:[/dim] {context.summary}\n" + f"[dim]Routines:[/dim] {len(context.routines_used)}" + )) + self._processing = False + self._update_status() + + def _show_context_error(self, error: str) -> None: + """Display context generation error in the chat pane.""" + chat = self.query_one("#chat-log", RichLog) + chat.write(Text.from_markup(f"[bold red]Context generation failed:[/bold red] {error}")) + self._processing = False + self._update_status() + # ─── Entry point ───────────────────────────────────────────────────────────── diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py index 1c3d15d0..e2403ee4 100644 --- a/tests/unit/agents/test_bluebox_agent_context.py +++ b/tests/unit/agents/test_bluebox_agent_context.py @@ -13,7 +13,7 @@ import pytest -from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed +from bluebox.data_models.agents.context import BlueBoxAgentContext, UsedRoutine, UsedRoutineParameter # ============================================================================= @@ -28,12 +28,12 @@ def sample_context() -> BlueBoxAgentContext: version=1, goal="Find one-way train tickets from NYC to Boston on March 15, 2026", routines_used=[ - RoutineUsed( + UsedRoutine.from_dict_params( routine_id="Routine_abc123", routine_name="AmtrakOneWaySearch", parameters={"origin": "New York", "destination": "Boston", "date": "2026-03-15"}, ), - RoutineUsed( + UsedRoutine.from_dict_params( routine_id="Routine_def456", routine_name="AmtrakPriceFilter", parameters={"max_price": 100}, @@ -86,7 +86,7 @@ def test_json_roundtrip(self, sample_context: BlueBoxAgentContext) -> None: assert restored.output_files == sample_context.output_files assert len(restored.routines_used) == 2 assert restored.routines_used[0].routine_id == "Routine_abc123" - assert restored.routines_used[1].parameters == {"max_price": 100} + assert restored.routines_used[1].parameters_as_dict() == {"max_price": 100} assert isinstance(restored.generated_at, datetime) def test_version_defaults_to_1(self, minimal_context: BlueBoxAgentContext) -> None: @@ -149,7 +149,7 @@ def test_from_markdown_roundtrip(self, sample_context: BlueBoxAgentContext) -> N for orig, rest in zip(sample_context.routines_used, restored.routines_used): assert rest.routine_id == orig.routine_id assert rest.routine_name == orig.routine_name - assert rest.parameters == orig.parameters + assert rest.parameters_as_dict() == orig.parameters_as_dict() def test_from_markdown_no_python_code(self, minimal_context: BlueBoxAgentContext) -> None: """Markdown with no Python Code section should parse python_code as None.""" @@ -344,12 +344,12 @@ def test_no_context_no_section(self, tmp_path: Path) -> None: # ============================================================================= -# generate_context tool tests +# generate_context (structured output) tests # ============================================================================= -class TestGenerateContextTool: - """Tests for the _generate_context agent tool.""" +class TestGenerateContext: + """Tests for the generate_context public method (structured output).""" def _make_agent(self, tmp_path: Path) -> Any: from bluebox.agents.bluebox_agent import BlueBoxAgent @@ -361,65 +361,70 @@ def _make_agent(self, tmp_path: Path) -> Any: auth_headers_provider=lambda: {"X-Service-Token": "test"}, ) - def test_tool_is_registered(self) -> None: + def _mock_llm_response(self, context: BlueBoxAgentContext) -> MagicMock: + """Create a mock LLMChatResponse with parsed context.""" + response = MagicMock() + response.parsed = context + return response + + def test_tool_is_not_registered(self) -> None: + """generate_context should NOT be an agent tool anymore.""" from bluebox.agents.bluebox_agent import BlueBoxAgent tools = BlueBoxAgent._collect_tools() tool_names = [meta.name for meta, _ in tools] - assert "generate_context" in tool_names + assert "generate_context" not in tool_names def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None: agent = self._make_agent(tmp_path) - result = agent._generate_context( - goal=sample_context.goal, - summary=sample_context.summary, - output_description=sample_context.output_description, - routines_used=[r.model_dump() for r in sample_context.routines_used], - python_code=sample_context.python_code, - output_files=sample_context.output_files, - ) + agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(sample_context)) + + result = agent.generate_context() - assert result["success"] is True - assert result["context_json"] is not None - assert result["context_md"] is not None + assert result.goal == sample_context.goal + assert result.summary == sample_context.summary - # Verify JSON file exists and is valid - json_path = tmp_path / result["context_json"] - assert json_path.is_file() - loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text()) + # Verify both JSON and MD files were saved + context_dir = tmp_path / "context" + json_files = list(context_dir.glob("*.json")) + md_files = list(context_dir.glob("*.md")) + assert len(json_files) == 1 + assert len(md_files) == 1 + + # Verify JSON is valid + loaded = BlueBoxAgentContext.model_validate_json(json_files[0].read_text()) assert loaded.goal == sample_context.goal - # Verify MD file exists - md_path = tmp_path / result["context_md"] - assert md_path.is_file() - assert "## Goal" in md_path.read_text() + # Verify MD has expected sections + assert "## Goal" in md_files[0].read_text() def test_saves_to_context_subdirectory(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None: agent = self._make_agent(tmp_path) - result = agent._generate_context( - goal=minimal_context.goal, - summary=minimal_context.summary, - output_description=minimal_context.output_description, - ) - assert "context/" in result["context_json"] - assert "context/" in result["context_md"] + agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(minimal_context)) - def test_validates_bad_routines_used(self, tmp_path: Path) -> None: + agent.generate_context() + + context_dir = tmp_path / "context" + assert context_dir.is_dir() + assert len(list(context_dir.glob("*.json"))) == 1 + assert len(list(context_dir.glob("*.md"))) == 1 + + def test_raises_on_none_parsed(self, tmp_path: Path) -> None: + """Should raise ValueError when LLM returns None parsed result.""" agent = self._make_agent(tmp_path) - result = agent._generate_context( - goal="test", - summary="test", - output_description="test", - routines_used=[{"bad_key": "missing routine_id"}], - ) - assert "error" in result + response = MagicMock() + response.parsed = None + agent.llm_client.call_sync = MagicMock(return_value=response) + + with pytest.raises(ValueError, match="failed to produce"): + agent.generate_context() def test_auto_populates_routines_from_raw(self, tmp_path: Path) -> None: - """When routines_used is empty, auto-populate from raw/ execution results.""" + """When LLM returns empty routines_used, auto-populate from raw/.""" agent = self._make_agent(tmp_path) # Write a fake routine result to raw/ raw_dir = tmp_path / "raw" - raw_dir.mkdir() + raw_dir.mkdir(exist_ok=True) (raw_dir / "result_1.json").write_text(json.dumps({ "routine_id": "Routine_abc", "routine_name": "TestRoutine", @@ -428,28 +433,28 @@ def test_auto_populates_routines_from_raw(self, tmp_path: Path) -> None: "result": {"ok": True, "data": {}}, })) - result = agent._generate_context( + # LLM returns context with empty routines_used + context_from_llm = BlueBoxAgentContext( goal="test goal", summary="test summary", output_description="test output", - # routines_used intentionally omitted + routines_used=[], ) + agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm)) - assert result["success"] is True - # Verify the saved context has the routine from raw/ - json_path = tmp_path / result["context_json"] - loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text()) - assert len(loaded.routines_used) == 1 - assert loaded.routines_used[0].routine_id == "Routine_abc" - assert loaded.routines_used[0].routine_name == "TestRoutine" - assert loaded.routines_used[0].parameters == {"city": "NYC"} + result = agent.generate_context() + + assert len(result.routines_used) == 1 + assert result.routines_used[0].routine_id == "Routine_abc" + assert result.routines_used[0].routine_name == "TestRoutine" + assert result.routines_used[0].parameters_as_dict() == {"city": "NYC"} def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None: """Same routine_id executed multiple times should appear once.""" agent = self._make_agent(tmp_path) raw_dir = tmp_path / "raw" - raw_dir.mkdir() + raw_dir.mkdir(exist_ok=True) for i in range(3): (raw_dir / f"result_{i}.json").write_text(json.dumps({ "routine_id": "Routine_same", @@ -459,19 +464,21 @@ def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None: "result": {"ok": True, "data": {}}, })) - result = agent._generate_context( + context_from_llm = BlueBoxAgentContext( goal="test", summary="test", output_description="test", + routines_used=[], ) - json_path = tmp_path / result["context_json"] - loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text()) - assert len(loaded.routines_used) == 1 + agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm)) + + result = agent.generate_context() + assert len(result.routines_used) == 1 - def test_agent_provided_routines_not_overridden(self, tmp_path: Path) -> None: - """When agent provides routines_used, don't auto-populate.""" + def test_llm_provided_routines_not_overridden(self, tmp_path: Path) -> None: + """When LLM provides routines_used, don't auto-populate from raw/.""" agent = self._make_agent(tmp_path) raw_dir = tmp_path / "raw" - raw_dir.mkdir() + raw_dir.mkdir(exist_ok=True) (raw_dir / "result_1.json").write_text(json.dumps({ "routine_id": "Routine_from_raw", "routine_name": "RawRoutine", @@ -480,15 +487,37 @@ def test_agent_provided_routines_not_overridden(self, tmp_path: Path) -> None: "result": {"ok": True, "data": {}}, })) - result = agent._generate_context( + context_from_llm = BlueBoxAgentContext( goal="test", summary="test", output_description="test", - routines_used=[{ - "routine_id": "Routine_agent_provided", - "routine_name": "AgentRoutine", - "parameters": {"x": 1}, - }], + routines_used=[UsedRoutine.from_dict_params( + routine_id="Routine_llm_provided", + routine_name="LLMRoutine", + parameters={"x": 1}, + )], ) - json_path = tmp_path / result["context_json"] - loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text()) - assert len(loaded.routines_used) == 1 - assert loaded.routines_used[0].routine_id == "Routine_agent_provided" + agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm)) + + result = agent.generate_context() + assert len(result.routines_used) == 1 + assert result.routines_used[0].routine_id == "Routine_llm_provided" + + def test_passes_focus_to_system_prompt(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None: + """Focus text should be included in the system prompt sent to LLM.""" + agent = self._make_agent(tmp_path) + agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(minimal_context)) + + agent.generate_context(focus="focus on the flight search part") + + call_kwargs = agent.llm_client.call_sync.call_args + system_prompt = call_kwargs.kwargs.get("system_prompt") or call_kwargs[1].get("system_prompt", "") + assert "focus on the flight search part" in system_prompt + + def test_passes_response_model(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None: + """Should call llm_client.call_sync with response_model=BlueBoxAgentContext.""" + agent = self._make_agent(tmp_path) + agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(minimal_context)) + + agent.generate_context() + + call_kwargs = agent.llm_client.call_sync.call_args + assert call_kwargs.kwargs.get("response_model") is BlueBoxAgentContext diff --git a/tests/unit/test_read_workspace_file.py b/tests/unit/test_read_workspace_file.py index a10bee9b..a38a237f 100644 --- a/tests/unit/test_read_workspace_file.py +++ b/tests/unit/test_read_workspace_file.py @@ -28,7 +28,6 @@ class TestPathTraversalPrevention: def test_parent_traversal_blocked(self, tmp_path: Path) -> None: """../ should be denied.""" ws = _make_workspace(tmp_path / "workspace") - ws.root_path.mkdir() result = _call(ws, "../../../etc/passwd") assert "error" in result assert "Access denied" in result["error"] @@ -36,7 +35,6 @@ def test_parent_traversal_blocked(self, tmp_path: Path) -> None: def test_absolute_path_outside_blocked(self, tmp_path: Path) -> None: """/etc/passwd should be denied.""" ws = _make_workspace(tmp_path / "workspace") - ws.root_path.mkdir() result = _call(ws, "/etc/passwd") assert "error" in result assert "Access denied" in result["error"] diff --git a/tests/unit/test_workspace.py b/tests/unit/test_workspace.py index 7aa0b38c..d6db96e9 100644 --- a/tests/unit/test_workspace.py +++ b/tests/unit/test_workspace.py @@ -90,9 +90,7 @@ def test_empty_workspace(self, tmp_path: Path) -> None: def test_lists_files_in_subdirs(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) - (tmp_path / "raw").mkdir() (tmp_path / "raw" / "result.json").write_text("{}") - (tmp_path / "outputs").mkdir() (tmp_path / "outputs" / "out.csv").write_text("a,b") result = ws.list_files() assert result["total_files"] == 2 @@ -106,7 +104,6 @@ class TestLoadRawJson: def test_loads_json_files(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) raw = tmp_path / "raw" - raw.mkdir() (raw / "a.json").write_text('{"key": "a"}') (raw / "b.json").write_text('{"key": "b"}') results = ws.load_raw_json() @@ -117,7 +114,6 @@ def test_loads_json_files(self, tmp_path: Path) -> None: def test_skips_invalid_json(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) raw = tmp_path / "raw" - raw.mkdir() (raw / "good.json").write_text('{"ok": true}') (raw / "bad.json").write_text("not json") results = ws.load_raw_json() @@ -134,7 +130,6 @@ class TestSnapshotAndDiffOutputs: def test_detects_new_file(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) - (tmp_path / "outputs").mkdir() before = ws.snapshot_outputs() (tmp_path / "outputs" / "new.csv").write_text("data") changed = ws.diff_outputs(before) @@ -144,7 +139,6 @@ def test_detects_new_file(self, tmp_path: Path) -> None: def test_detects_modified_file(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) outputs = tmp_path / "outputs" - outputs.mkdir() f = outputs / "existing.csv" f.write_text("old") before = ws.snapshot_outputs() @@ -156,7 +150,6 @@ def test_detects_modified_file(self, tmp_path: Path) -> None: def test_no_changes(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) outputs = tmp_path / "outputs" - outputs.mkdir() (outputs / "stable.csv").write_text("data") before = ws.snapshot_outputs() changed = ws.diff_outputs(before) @@ -166,11 +159,11 @@ def test_no_changes(self, tmp_path: Path) -> None: class TestEnsureDirs: """Tests for LocalWorkspace.ensure_dirs.""" - def test_creates_raw_and_outputs(self, tmp_path: Path) -> None: + def test_creates_raw_outputs_and_context(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path / "new_workspace")) - ws.ensure_dirs() assert (tmp_path / "new_workspace" / "raw").is_dir() assert (tmp_path / "new_workspace" / "outputs").is_dir() + assert (tmp_path / "new_workspace" / "context").is_dir() def test_idempotent(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) From 52d4ee5b139cd06637e8eb307a0ce33619dbc01a Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Sun, 22 Feb 2026 22:23:40 -0500 Subject: [PATCH 08/13] Clean up review feedback from context generation PR - Remove unused re-exports from data_models/agents/__init__.py - Fix import sys grouping in run_bluebox_agent.py - Auto-discover .md context files (prefer .json, fall back to .md) - Tighten _extract_routines_from_raw to only accept status=="completed" - Type _show_context_success param as BlueBoxAgentContext instead of Any - Replace bare assert isinstance with explicit TypeError - Replace flaky time.sleep with os.utime in test_auto_discovers_most_recent Co-Authored-By: Claude Opus 4.6 --- bluebox/agents/bluebox_agent.py | 18 ++++++++++++------ bluebox/data_models/agents/__init__.py | 3 --- bluebox/scripts/run_bluebox_agent.py | 8 +++++--- .../unit/agents/test_bluebox_agent_context.py | 10 ++++++---- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py index fda99365..042ee036 100644 --- a/bluebox/agents/bluebox_agent.py +++ b/bluebox/agents/bluebox_agent.py @@ -333,14 +333,20 @@ def _load_context_from_path(self, context_file: str) -> BlueBoxAgentContext | No return None def _auto_discover_context(self) -> BlueBoxAgentContext | None: - """Find and load the most recent .json context file from workspace context/ dir.""" + """Find and load the most recent context file from workspace context/ dir. + + Prefers .json files over .md when both exist. Falls back to .md if no + JSON context files are present. + """ context_dir = self._workspace.root_path / "context" if not context_dir.is_dir(): return None - json_files = sorted(context_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True) - if not json_files: - return None - return self._load_context_from_path(str(json_files[0])) + # Prefer JSON, fall back to Markdown + for ext in ("*.json", "*.md"): + files = sorted(context_dir.glob(ext), key=lambda p: p.stat().st_mtime, reverse=True) + if files: + return self._load_context_from_path(str(files[0])) + return None def _get_context_prompt_section(self) -> str: """Build a system prompt section from a loaded BlueBoxAgentContext.""" @@ -379,7 +385,7 @@ def _extract_routines_from_raw(self) -> list[UsedRoutine]: if not rid or rid in seen: continue # Only include completed executions - if rr.get("status") not in ("completed", None): + if rr.get("status") != "completed": continue seen.add(rid) routines.append(UsedRoutine.from_dict_params( diff --git a/bluebox/data_models/agents/__init__.py b/bluebox/data_models/agents/__init__.py index 663a502b..e69de29b 100644 --- a/bluebox/data_models/agents/__init__.py +++ b/bluebox/data_models/agents/__init__.py @@ -1,3 +0,0 @@ -from bluebox.data_models.agents.context import BlueBoxAgentContext, UsedRoutine, UsedRoutineParameter - -__all__ = ["BlueBoxAgentContext", "UsedRoutine", "UsedRoutineParameter"] diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py index abacd7a7..4b316bdf 100644 --- a/bluebox/scripts/run_bluebox_agent.py +++ b/bluebox/scripts/run_bluebox_agent.py @@ -22,10 +22,10 @@ import argparse import shutil +import sys from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Any -import sys from bluebox.utils.code_execution_sandbox import is_docker_available from bluebox.utils.terminal_utils import ask_yes_no, print_colored, YELLOW @@ -38,6 +38,7 @@ from bluebox.agents.bluebox_agent import BlueBoxAgent from bluebox.agents.workspace import LocalWorkspace from bluebox.config import Config +from bluebox.data_models.agents.context import BlueBoxAgentContext from bluebox.data_models.llms.vendors import LLMModel from bluebox.utils.cli_utils import add_model_argument, resolve_model from bluebox.utils.logger import enable_tui_logging @@ -161,13 +162,14 @@ def _handle_custom_command(self, cmd: str, raw_input: str) -> bool: def _generate_context_async(self, focus: str | None) -> None: """Run generate_context in a background thread via structured output.""" try: - assert isinstance(self._agent, BlueBoxAgent) + if not isinstance(self._agent, BlueBoxAgent): + raise TypeError(f"Expected BlueBoxAgent, got {type(self._agent).__name__}") context = self._agent.generate_context(focus=focus) self.call_from_thread(self._show_context_success, context) except Exception as e: self.call_from_thread(self._show_context_error, str(e)) - def _show_context_success(self, context: Any) -> None: + def _show_context_success(self, context: BlueBoxAgentContext) -> None: """Display context generation success in the chat pane.""" chat = self.query_one("#chat-log", RichLog) chat.write(Text.from_markup( diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py index e2403ee4..a044108c 100644 --- a/tests/unit/agents/test_bluebox_agent_context.py +++ b/tests/unit/agents/test_bluebox_agent_context.py @@ -6,6 +6,8 @@ """ import json +import os +import time from datetime import datetime, timezone from pathlib import Path from typing import Any @@ -232,14 +234,14 @@ def test_auto_discovers_from_workspace(self, tmp_path: Path, sample_context: Blu def test_auto_discovers_most_recent(self, tmp_path: Path) -> None: """When multiple context files exist, loads the most recently modified.""" - import time - context_dir = tmp_path / "context" context_dir.mkdir() old = BlueBoxAgentContext(goal="old goal", output_description="old", summary="old") - (context_dir / "old.json").write_text(old.model_dump_json()) - time.sleep(0.05) # ensure mtime differs + old_file = context_dir / "old.json" + old_file.write_text(old.model_dump_json()) + past = time.time() - 10 + os.utime(old_file, (past, past)) # force mtime 10s in the past new = BlueBoxAgentContext(goal="new goal", output_description="new", summary="new") (context_dir / "new.json").write_text(new.model_dump_json()) From 5a267795cecf659696fe042758e906957c568177 Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Sun, 22 Feb 2026 22:36:33 -0500 Subject: [PATCH 09/13] Move lazy imports to top-level, fix return types, add roundtrip timestamp assertion - Move BlueBoxAgent/LocalWorkspace imports to file top (CLAUDE.md: no lazy imports) - Fix _make_agent return types from Any to BlueBoxAgent - Remove unused Any import - Add generated_at assertion in test_from_markdown_roundtrip - Update save_file docstring to include "context" subdirectory Co-Authored-By: Claude Opus 4.6 --- bluebox/agents/workspace.py | 2 +- .../unit/agents/test_bluebox_agent_context.py | 23 +++++-------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/bluebox/agents/workspace.py b/bluebox/agents/workspace.py index 37e61fe0..a17863ff 100644 --- a/bluebox/agents/workspace.py +++ b/bluebox/agents/workspace.py @@ -49,7 +49,7 @@ def save_file( """Save content with a unique timestamped filename. Args: - subdirectory: Logical subdirectory ("raw" or "outputs"). + subdirectory: Logical subdirectory ("raw", "outputs", or "context"). filename_prefix: Prefix for the generated filename. content: File content to write. extension: File extension including the dot. diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py index a044108c..af2dfb2a 100644 --- a/tests/unit/agents/test_bluebox_agent_context.py +++ b/tests/unit/agents/test_bluebox_agent_context.py @@ -10,11 +10,12 @@ import time from datetime import datetime, timezone from pathlib import Path -from typing import Any from unittest.mock import MagicMock import pytest +from bluebox.agents.bluebox_agent import BlueBoxAgent +from bluebox.agents.workspace import LocalWorkspace from bluebox.data_models.agents.context import BlueBoxAgentContext, UsedRoutine, UsedRoutineParameter @@ -152,6 +153,7 @@ def test_from_markdown_roundtrip(self, sample_context: BlueBoxAgentContext) -> N assert rest.routine_id == orig.routine_id assert rest.routine_name == orig.routine_name assert rest.parameters_as_dict() == orig.parameters_as_dict() + assert restored.generated_at == sample_context.generated_at def test_from_markdown_no_python_code(self, minimal_context: BlueBoxAgentContext) -> None: """Markdown with no Python Code section should parse python_code as None.""" @@ -184,11 +186,8 @@ def _make_agent( self, workspace_dir: Path, context_file: str | None = None, - ) -> Any: + ) -> BlueBoxAgent: """Create a BlueBoxAgent with mocked dependencies.""" - from bluebox.agents.bluebox_agent import BlueBoxAgent - from bluebox.agents.workspace import LocalWorkspace - return BlueBoxAgent( emit_message_callable=MagicMock(), workspace=LocalWorkspace(str(workspace_dir)), @@ -290,10 +289,7 @@ def test_no_context_dir_no_error(self, tmp_path: Path) -> None: class TestContextPromptInjection: """Tests for _get_context_prompt_section and system prompt integration.""" - def _make_agent(self, tmp_path: Path, context: BlueBoxAgentContext) -> Any: - from bluebox.agents.bluebox_agent import BlueBoxAgent - from bluebox.agents.workspace import LocalWorkspace - + def _make_agent(self, tmp_path: Path, context: BlueBoxAgentContext) -> BlueBoxAgent: ctx_file = tmp_path / "context.json" ctx_file.write_text(context.model_dump_json(indent=2)) @@ -333,9 +329,6 @@ def test_context_section_truncation(self, tmp_path: Path) -> None: assert "read_workspace_file" in section def test_no_context_no_section(self, tmp_path: Path) -> None: - from bluebox.agents.bluebox_agent import BlueBoxAgent - from bluebox.agents.workspace import LocalWorkspace - agent = BlueBoxAgent( emit_message_callable=MagicMock(), workspace=LocalWorkspace(str(tmp_path)), @@ -353,10 +346,7 @@ def test_no_context_no_section(self, tmp_path: Path) -> None: class TestGenerateContext: """Tests for the generate_context public method (structured output).""" - def _make_agent(self, tmp_path: Path) -> Any: - from bluebox.agents.bluebox_agent import BlueBoxAgent - from bluebox.agents.workspace import LocalWorkspace - + def _make_agent(self, tmp_path: Path) -> BlueBoxAgent: return BlueBoxAgent( emit_message_callable=MagicMock(), workspace=LocalWorkspace(str(tmp_path)), @@ -371,7 +361,6 @@ def _mock_llm_response(self, context: BlueBoxAgentContext) -> MagicMock: def test_tool_is_not_registered(self) -> None: """generate_context should NOT be an agent tool anymore.""" - from bluebox.agents.bluebox_agent import BlueBoxAgent tools = BlueBoxAgent._collect_tools() tool_names = [meta.name for meta, _ in tools] assert "generate_context" not in tool_names From 835eb9304c31d503c8be4960a68174848b9c44b6 Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Sun, 22 Feb 2026 23:32:15 -0500 Subject: [PATCH 10/13] Show context file paths on save, show loaded context in TUI welcome - generate_context now returns GenerateContextResult (NamedTuple) with context, json_path, and md_path so callers can display file locations - TUI _show_context_success displays the saved JSON and Markdown paths - TUI _print_welcome shows loaded context goal and routine count when a context file was auto-discovered or explicitly loaded - Add loaded_context property on BlueBoxAgent to expose context state Co-Authored-By: Claude Opus 4.6 --- bluebox/agents/bluebox_agent.py | 27 ++++++++++-- bluebox/scripts/run_bluebox_agent.py | 21 ++++++---- .../unit/agents/test_bluebox_agent_context.py | 41 +++++++++---------- 3 files changed, 56 insertions(+), 33 deletions(-) diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py index 042ee036..2e7ade15 100644 --- a/bluebox/agents/bluebox_agent.py +++ b/bluebox/agents/bluebox_agent.py @@ -15,7 +15,7 @@ from datetime import datetime from pathlib import Path from textwrap import dedent -from typing import Any, Callable +from typing import Any, Callable, NamedTuple import requests @@ -52,6 +52,14 @@ logger = get_logger(name=__name__) +class GenerateContextResult(NamedTuple): + """Return value from BlueBoxAgent.generate_context.""" + + context: BlueBoxAgentContext + json_path: str + md_path: str + + class BlueBoxAgent(AbstractAgent): """ BlueBoxAgent that searches and executes web automation routines. @@ -204,6 +212,13 @@ def __init__( self._agent_context is not None, ) + ## Properties + + @property + def loaded_context(self) -> BlueBoxAgentContext | None: + """The context loaded on init, if any.""" + return self._agent_context + ## Auth def _get_auth_headers(self) -> dict[str, str]: @@ -778,7 +793,7 @@ def _read_workspace_file( ## Context generation (structured output, called by TUI slash command) - def generate_context(self, focus: str | None = None) -> BlueBoxAgentContext: + def generate_context(self, focus: str | None = None) -> GenerateContextResult: """Generate a context file from the current session using structured output. Makes a direct LLM call with response_model=BlueBoxAgentContext to get @@ -789,7 +804,7 @@ def generate_context(self, focus: str | None = None) -> BlueBoxAgentContext: focus: Optional user-provided focus prompt to guide context generation. Returns: - The generated BlueBoxAgentContext. + GenerateContextResult with the context and saved file paths. Raises: ValueError: If the LLM fails to produce a valid context. @@ -844,4 +859,8 @@ def generate_context(self, focus: str | None = None) -> BlueBoxAgentContext: ) logger.info("Context files saved: %s, %s", json_save["output_file"], md_save["output_file"]) - return context + return GenerateContextResult( + context=context, + json_path=json_save["output_file"], + md_path=md_save["output_file"], + ) diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py index 4b316bdf..88931697 100644 --- a/bluebox/scripts/run_bluebox_agent.py +++ b/bluebox/scripts/run_bluebox_agent.py @@ -35,10 +35,9 @@ from textual import work from textual.widgets import RichLog -from bluebox.agents.bluebox_agent import BlueBoxAgent +from bluebox.agents.bluebox_agent import BlueBoxAgent, GenerateContextResult from bluebox.agents.workspace import LocalWorkspace from bluebox.config import Config -from bluebox.data_models.agents.context import BlueBoxAgentContext from bluebox.data_models.llms.vendors import LLMModel from bluebox.utils.cli_utils import add_model_argument, resolve_model from bluebox.utils.logger import enable_tui_logging @@ -94,6 +93,10 @@ def _print_welcome(self) -> None: lines = [ f"[dim]Model:[/dim] {self._llm_model.value}", ] + if isinstance(self._agent, BlueBoxAgent) and self._agent.loaded_context: + ctx = self._agent.loaded_context + lines.append(f"[dim]Context:[/dim] [green]loaded[/green] — {ctx.goal[:60]}") + lines.append(f"[dim] {len(ctx.routines_used)} routine(s), {len(ctx.output_files)} output file(s)[/dim]") chat.write(Text.from_markup("\n".join(lines))) chat.write("") @@ -164,19 +167,21 @@ def _generate_context_async(self, focus: str | None) -> None: try: if not isinstance(self._agent, BlueBoxAgent): raise TypeError(f"Expected BlueBoxAgent, got {type(self._agent).__name__}") - context = self._agent.generate_context(focus=focus) - self.call_from_thread(self._show_context_success, context) + result = self._agent.generate_context(focus=focus) + self.call_from_thread(self._show_context_success, result) except Exception as e: self.call_from_thread(self._show_context_error, str(e)) - def _show_context_success(self, context: BlueBoxAgentContext) -> None: + def _show_context_success(self, result: GenerateContextResult) -> None: """Display context generation success in the chat pane.""" chat = self.query_one("#chat-log", RichLog) chat.write(Text.from_markup( f"[bold green]Context saved![/bold green]\n" - f"[dim]Goal:[/dim] {context.goal}\n" - f"[dim]Summary:[/dim] {context.summary}\n" - f"[dim]Routines:[/dim] {len(context.routines_used)}" + f"[dim]Goal:[/dim] {result.context.goal}\n" + f"[dim]Summary:[/dim] {result.context.summary}\n" + f"[dim]Routines:[/dim] {len(result.context.routines_used)}\n" + f"[dim]JSON:[/dim] {result.json_path}\n" + f"[dim]Markdown:[/dim] {result.md_path}" )) self._processing = False self._update_status() diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py index af2dfb2a..c651af8f 100644 --- a/tests/unit/agents/test_bluebox_agent_context.py +++ b/tests/unit/agents/test_bluebox_agent_context.py @@ -371,22 +371,21 @@ def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentCont result = agent.generate_context() - assert result.goal == sample_context.goal - assert result.summary == sample_context.summary - - # Verify both JSON and MD files were saved - context_dir = tmp_path / "context" - json_files = list(context_dir.glob("*.json")) - md_files = list(context_dir.glob("*.md")) - assert len(json_files) == 1 - assert len(md_files) == 1 - - # Verify JSON is valid - loaded = BlueBoxAgentContext.model_validate_json(json_files[0].read_text()) + assert result.context.goal == sample_context.goal + assert result.context.summary == sample_context.summary + assert "context/" in result.json_path + assert "context/" in result.md_path + + # Verify JSON file exists and is valid + json_path = Path(result.json_path) + assert json_path.is_file() + loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text()) assert loaded.goal == sample_context.goal - # Verify MD has expected sections - assert "## Goal" in md_files[0].read_text() + # Verify MD file exists with expected sections + md_path = Path(result.md_path) + assert md_path.is_file() + assert "## Goal" in md_path.read_text() def test_saves_to_context_subdirectory(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None: agent = self._make_agent(tmp_path) @@ -435,10 +434,10 @@ def test_auto_populates_routines_from_raw(self, tmp_path: Path) -> None: result = agent.generate_context() - assert len(result.routines_used) == 1 - assert result.routines_used[0].routine_id == "Routine_abc" - assert result.routines_used[0].routine_name == "TestRoutine" - assert result.routines_used[0].parameters_as_dict() == {"city": "NYC"} + assert len(result.context.routines_used) == 1 + assert result.context.routines_used[0].routine_id == "Routine_abc" + assert result.context.routines_used[0].routine_name == "TestRoutine" + assert result.context.routines_used[0].parameters_as_dict() == {"city": "NYC"} def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None: """Same routine_id executed multiple times should appear once.""" @@ -462,7 +461,7 @@ def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None: agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm)) result = agent.generate_context() - assert len(result.routines_used) == 1 + assert len(result.context.routines_used) == 1 def test_llm_provided_routines_not_overridden(self, tmp_path: Path) -> None: """When LLM provides routines_used, don't auto-populate from raw/.""" @@ -489,8 +488,8 @@ def test_llm_provided_routines_not_overridden(self, tmp_path: Path) -> None: agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm)) result = agent.generate_context() - assert len(result.routines_used) == 1 - assert result.routines_used[0].routine_id == "Routine_llm_provided" + assert len(result.context.routines_used) == 1 + assert result.context.routines_used[0].routine_id == "Routine_llm_provided" def test_passes_focus_to_system_prompt(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None: """Focus text should be included in the system prompt sent to LLM.""" From 7f5f9df16e65991f6cdd0b3128bad5191846c772 Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Mon, 23 Feb 2026 01:48:44 -0500 Subject: [PATCH 11/13] Simplify save_file to take direct filename, dispatch context to Saved Files panel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Callers now build their own filenames — routine results use timestamps, context files use fixed names (agent_context.json/md). Context files are dispatched to the TUI Saved Files pane via _add_saved_file(). Co-Authored-By: Claude Opus 4.6 --- bluebox/agents/bluebox_agent.py | 10 ++++++---- bluebox/agents/workspace.py | 20 ++++---------------- bluebox/scripts/run_bluebox_agent.py | 2 ++ tests/unit/test_workspace.py | 21 +++++++++++---------- 4 files changed, 23 insertions(+), 30 deletions(-) diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py index 2e7ade15..3c677d94 100644 --- a/bluebox/agents/bluebox_agent.py +++ b/bluebox/agents/bluebox_agent.py @@ -469,8 +469,9 @@ def _execute_routines_in_parallel( def save_result(result: dict[str, Any]) -> dict[str, Any]: """Save a single routine result to a JSON file in raw/.""" try: + ts = datetime.now().strftime("%y-%m-%d-%H%M%S") save_info = self._workspace.save_file( - "raw", "routine_result", + "raw", f"{ts}-routine_result.json", json.dumps(result, indent=2, default=str), ) result.update(save_info) @@ -610,8 +611,9 @@ def _execute_browser_task( final_result = result.get("final_result") if final_result: try: + ts = datetime.now().strftime("%y-%m-%d-%H%M%S") save_info = self._workspace.save_file( - "outputs", "browser_agent", final_result, extension=".md", + "outputs", f"{ts}-browser_agent.md", final_result, ) result.update(save_info) except Exception as e: @@ -850,12 +852,12 @@ def generate_context(self, focus: str | None = None) -> GenerateContextResult: # Save canonical JSON json_save = self._workspace.save_file( - "context", "agent_context", context.model_dump_json(indent=2), + "context", "agent_context.json", context.model_dump_json(indent=2), ) # Save companion Markdown md_save = self._workspace.save_file( - "context", "agent_context", context.to_markdown(), extension=".md", + "context", "agent_context.md", context.to_markdown(), ) logger.info("Context files saved: %s, %s", json_save["output_file"], md_save["output_file"]) diff --git a/bluebox/agents/workspace.py b/bluebox/agents/workspace.py index a17863ff..869d3bdb 100644 --- a/bluebox/agents/workspace.py +++ b/bluebox/agents/workspace.py @@ -11,9 +11,7 @@ from __future__ import annotations import json -import threading from abc import ABC, abstractmethod -from datetime import datetime from pathlib import Path from typing import Any @@ -42,17 +40,15 @@ def root_path(self) -> Path: def save_file( self, subdirectory: str, - filename_prefix: str, + filename: str, content: str, - extension: str = ".json", ) -> dict[str, str]: - """Save content with a unique timestamped filename. + """Save content to a file in the workspace. Args: subdirectory: Logical subdirectory ("raw", "outputs", or "context"). - filename_prefix: Prefix for the generated filename. + filename: The filename to use (e.g. "routine_result_1.json"). content: File content to write. - extension: File extension including the dot. Returns: Dict with at least "output_file" key (the saved path). @@ -125,8 +121,6 @@ def __init__(self, workspace_dir: str = "./bluebox_workspace") -> None: self._raw_dir = self._workspace_dir / "raw" self._outputs_dir = self._workspace_dir / "outputs" self._context_dir = self._workspace_dir / "context" - self._execution_counter: int = 0 - self._counter_lock = threading.Lock() self.ensure_dirs() @property @@ -136,17 +130,11 @@ def root_path(self) -> Path: def save_file( self, subdirectory: str, - filename_prefix: str, + filename: str, content: str, - extension: str = ".json", ) -> dict[str, str]: directory = self._workspace_dir / subdirectory directory.mkdir(parents=True, exist_ok=True) - with self._counter_lock: - self._execution_counter += 1 - idx = self._execution_counter - timestamp = datetime.now().strftime("%y-%m-%d-%H%M%S") - filename = f"{timestamp}-{filename_prefix}_{idx}{extension}" output_path = directory / filename output_path.write_text(content) logger.info("Result saved to %s", output_path) diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py index 88931697..6d69216c 100644 --- a/bluebox/scripts/run_bluebox_agent.py +++ b/bluebox/scripts/run_bluebox_agent.py @@ -183,6 +183,8 @@ def _show_context_success(self, result: GenerateContextResult) -> None: f"[dim]JSON:[/dim] {result.json_path}\n" f"[dim]Markdown:[/dim] {result.md_path}" )) + self._add_saved_file(result.json_path) + self._add_saved_file(result.md_path) self._processing = False self._update_status() diff --git a/tests/unit/test_workspace.py b/tests/unit/test_workspace.py index d6db96e9..49df8612 100644 --- a/tests/unit/test_workspace.py +++ b/tests/unit/test_workspace.py @@ -20,31 +20,32 @@ class TestSaveFile: def test_saves_file_with_content(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) - result = ws.save_file("raw", "routine_result", '{"data": 1}') + result = ws.save_file("raw", "routine_result.json", '{"data": 1}') assert "output_file" in result saved = Path(result["output_file"]) assert saved.exists() assert saved.read_text() == '{"data": 1}' + assert saved.name == "routine_result.json" def test_creates_subdirectory(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) - result = ws.save_file("raw", "test", "content") - assert (tmp_path / "raw").is_dir() + ws.save_file("custom_subdir", "test.json", "content") + assert (tmp_path / "custom_subdir").is_dir() - def test_unique_filenames(self, tmp_path: Path) -> None: + def test_overwrites_existing_file(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) - r1 = ws.save_file("raw", "test", "a") - r2 = ws.save_file("raw", "test", "b") - assert r1["output_file"] != r2["output_file"] + ws.save_file("raw", "test.json", "old") + ws.save_file("raw", "test.json", "new") + assert (tmp_path / "raw" / "test.json").read_text() == "new" - def test_custom_extension(self, tmp_path: Path) -> None: + def test_different_extensions(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) - result = ws.save_file("outputs", "browser_agent", "# Result", extension=".md") + result = ws.save_file("outputs", "result.md", "# Result") assert result["output_file"].endswith(".md") def test_no_s3_key_in_result(self, tmp_path: Path) -> None: ws = LocalWorkspace(str(tmp_path)) - result = ws.save_file("raw", "test", "data") + result = ws.save_file("raw", "test.json", "data") assert "output_file_s3_key" not in result From 2b8617107ba45b85aba56898de43420c0d4e5984 Mon Sep 17 00:00:00 2001 From: Dima Vremekno Date: Mon, 23 Feb 2026 01:52:19 -0500 Subject: [PATCH 12/13] Remove GenerateContextResult NamedTuple, return BlueBoxAgentContext directly Co-Authored-By: Claude Opus 4.6 --- bluebox/agents/bluebox_agent.py | 17 ++++--------- bluebox/scripts/run_bluebox_agent.py | 22 ++++++++++------- .../unit/agents/test_bluebox_agent_context.py | 24 +++++++++---------- 3 files changed, 28 insertions(+), 35 deletions(-) diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py index 3c677d94..1fb3ab71 100644 --- a/bluebox/agents/bluebox_agent.py +++ b/bluebox/agents/bluebox_agent.py @@ -15,7 +15,7 @@ from datetime import datetime from pathlib import Path from textwrap import dedent -from typing import Any, Callable, NamedTuple +from typing import Any, Callable import requests @@ -52,12 +52,7 @@ logger = get_logger(name=__name__) -class GenerateContextResult(NamedTuple): - """Return value from BlueBoxAgent.generate_context.""" - context: BlueBoxAgentContext - json_path: str - md_path: str class BlueBoxAgent(AbstractAgent): @@ -795,7 +790,7 @@ def _read_workspace_file( ## Context generation (structured output, called by TUI slash command) - def generate_context(self, focus: str | None = None) -> GenerateContextResult: + def generate_context(self, focus: str | None = None) -> BlueBoxAgentContext: """Generate a context file from the current session using structured output. Makes a direct LLM call with response_model=BlueBoxAgentContext to get @@ -806,7 +801,7 @@ def generate_context(self, focus: str | None = None) -> GenerateContextResult: focus: Optional user-provided focus prompt to guide context generation. Returns: - GenerateContextResult with the context and saved file paths. + The validated BlueBoxAgentContext. Raises: ValueError: If the LLM fails to produce a valid context. @@ -861,8 +856,4 @@ def generate_context(self, focus: str | None = None) -> GenerateContextResult: ) logger.info("Context files saved: %s, %s", json_save["output_file"], md_save["output_file"]) - return GenerateContextResult( - context=context, - json_path=json_save["output_file"], - md_path=md_save["output_file"], - ) + return context diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py index 6d69216c..093a4667 100644 --- a/bluebox/scripts/run_bluebox_agent.py +++ b/bluebox/scripts/run_bluebox_agent.py @@ -35,8 +35,9 @@ from textual import work from textual.widgets import RichLog -from bluebox.agents.bluebox_agent import BlueBoxAgent, GenerateContextResult +from bluebox.agents.bluebox_agent import BlueBoxAgent from bluebox.agents.workspace import LocalWorkspace +from bluebox.data_models.agents.context import BlueBoxAgentContext from bluebox.config import Config from bluebox.data_models.llms.vendors import LLMModel from bluebox.utils.cli_utils import add_model_argument, resolve_model @@ -172,19 +173,22 @@ def _generate_context_async(self, focus: str | None) -> None: except Exception as e: self.call_from_thread(self._show_context_error, str(e)) - def _show_context_success(self, result: GenerateContextResult) -> None: + def _show_context_success(self, context: BlueBoxAgentContext) -> None: """Display context generation success in the chat pane.""" + context_dir = Path(self._workspace_dir) / "context" + json_path = str(context_dir / "agent_context.json") + md_path = str(context_dir / "agent_context.md") chat = self.query_one("#chat-log", RichLog) chat.write(Text.from_markup( f"[bold green]Context saved![/bold green]\n" - f"[dim]Goal:[/dim] {result.context.goal}\n" - f"[dim]Summary:[/dim] {result.context.summary}\n" - f"[dim]Routines:[/dim] {len(result.context.routines_used)}\n" - f"[dim]JSON:[/dim] {result.json_path}\n" - f"[dim]Markdown:[/dim] {result.md_path}" + f"[dim]Goal:[/dim] {context.goal}\n" + f"[dim]Summary:[/dim] {context.summary}\n" + f"[dim]Routines:[/dim] {len(context.routines_used)}\n" + f"[dim]JSON:[/dim] {json_path}\n" + f"[dim]Markdown:[/dim] {md_path}" )) - self._add_saved_file(result.json_path) - self._add_saved_file(result.md_path) + self._add_saved_file(json_path) + self._add_saved_file(md_path) self._processing = False self._update_status() diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py index c651af8f..95db05b8 100644 --- a/tests/unit/agents/test_bluebox_agent_context.py +++ b/tests/unit/agents/test_bluebox_agent_context.py @@ -371,19 +371,17 @@ def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentCont result = agent.generate_context() - assert result.context.goal == sample_context.goal - assert result.context.summary == sample_context.summary - assert "context/" in result.json_path - assert "context/" in result.md_path + assert result.goal == sample_context.goal + assert result.summary == sample_context.summary # Verify JSON file exists and is valid - json_path = Path(result.json_path) + json_path = tmp_path / "context" / "agent_context.json" assert json_path.is_file() loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text()) assert loaded.goal == sample_context.goal # Verify MD file exists with expected sections - md_path = Path(result.md_path) + md_path = tmp_path / "context" / "agent_context.md" assert md_path.is_file() assert "## Goal" in md_path.read_text() @@ -434,10 +432,10 @@ def test_auto_populates_routines_from_raw(self, tmp_path: Path) -> None: result = agent.generate_context() - assert len(result.context.routines_used) == 1 - assert result.context.routines_used[0].routine_id == "Routine_abc" - assert result.context.routines_used[0].routine_name == "TestRoutine" - assert result.context.routines_used[0].parameters_as_dict() == {"city": "NYC"} + assert len(result.routines_used) == 1 + assert result.routines_used[0].routine_id == "Routine_abc" + assert result.routines_used[0].routine_name == "TestRoutine" + assert result.routines_used[0].parameters_as_dict() == {"city": "NYC"} def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None: """Same routine_id executed multiple times should appear once.""" @@ -461,7 +459,7 @@ def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None: agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm)) result = agent.generate_context() - assert len(result.context.routines_used) == 1 + assert len(result.routines_used) == 1 def test_llm_provided_routines_not_overridden(self, tmp_path: Path) -> None: """When LLM provides routines_used, don't auto-populate from raw/.""" @@ -488,8 +486,8 @@ def test_llm_provided_routines_not_overridden(self, tmp_path: Path) -> None: agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm)) result = agent.generate_context() - assert len(result.context.routines_used) == 1 - assert result.context.routines_used[0].routine_id == "Routine_llm_provided" + assert len(result.routines_used) == 1 + assert result.routines_used[0].routine_id == "Routine_llm_provided" def test_passes_focus_to_system_prompt(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None: """Focus text should be included in the system prompt sent to LLM.""" From 4d67767cf2dd9faa95badf4f76ab953777a8e577 Mon Sep 17 00:00:00 2001 From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com> Date: Mon, 23 Feb 2026 08:53:40 -0500 Subject: [PATCH 13/13] update readme --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 74157aa7..11581f79 100644 --- a/README.md +++ b/README.md @@ -85,9 +85,22 @@ bluebox-agent --model claude-opus-4-5 - Falls back to an AI browser agent for tasks without predefined routines - Post-processes outputs using Python (CSV, JSON, etc.) - Saves generated files to a local workspace +- Generates reusable **context files** to replay successful sessions instantly Ask it anything: *"Run a price analysis on Rolex Sea Dweller 16600"* — the agent automatically selects the right routine, runs it, and delivers structured results. +### Context (session replay) + +After a successful session, run `/generate_context` to save a snapshot of what worked — the goal, routines called (with exact parameters), any Python post-processing code, and output descriptions. Context files are saved to the workspace `context/` directory in both JSON and Markdown formats. + +When the agent starts a new session, it automatically loads the most recent context file and injects it into the system prompt. This lets the agent **skip trial and error** and directly replay the known-good path, adjusting parameters as needed for the new request. + +You can also load a specific context file explicitly: + +```bash +bluebox-agent --context-file path/to/agent_context.json +``` + ## Create your own routines To learn about the core technology powering BlueBox, see [routine_discovery.md](routine_discovery.md).