From 1fcb17ab8e964eb5514a7b9759c5f9acc64bf089 Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Sun, 22 Feb 2026 20:53:42 -0500
Subject: [PATCH 01/13] Add context generation feature for BlueBoxAgent session
 replay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow the agent to save a structured context file (JSON + Markdown) capturing
the successful path through a session — routines used, parameters, post-processing
code, and output description — so a new agent instance can replay it without
trial and error.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/agents/bluebox_agent.py               | 152 ++++++-
 bluebox/data_models/agents/__init__.py        |   3 +
 bluebox/data_models/agents/context.py         | 245 +++++++++++
 bluebox/scripts/run_bluebox_agent.py          |  49 ++-
 .../unit/agents/test_bluebox_agent_context.py | 393 ++++++++++++++++++
 5 files changed, 839 insertions(+), 3 deletions(-)
 create mode 100644 bluebox/data_models/agents/__init__.py
 create mode 100644 bluebox/data_models/agents/context.py
 create mode 100644 tests/unit/agents/test_bluebox_agent_context.py

diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py
index 82fce628..a2d5a856 100644
--- a/bluebox/agents/bluebox_agent.py
+++ b/bluebox/agents/bluebox_agent.py
@@ -13,6 +13,7 @@
 import json
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
+from pathlib import Path
 from textwrap import dedent
 from typing import Any, Callable
 
@@ -21,6 +22,7 @@
 from bluebox.agents.abstract_agent import AbstractAgent, AgentCard, agent_tool
 from bluebox.agents.workspace import AgentWorkspace, LocalWorkspace
 from bluebox.config import Config
+from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed
 from bluebox.data_models.browser_agent import (
     BrowserAgentDoneEvent,
     BrowserAgentErrorEvent,
@@ -79,6 +81,7 @@ class BlueBoxAgent(AbstractAgent):
         Your workspace has the following structure:
         - `raw/` — routine result JSON files, saved automatically when routines execute
         - `outputs/` — write all your generated output files here (CSV, JSON, JSONL, etc.)
+        - `context/` — context files (JSON + Markdown) saved by `generate_context`, used for session replay
 
         **Pre-loaded variables in `run_python_code`:**
         - `routine_results` — list of dicts, one per JSON file in raw/
@@ -128,6 +131,7 @@ class BlueBoxAgent(AbstractAgent):
         - When using `execute_browser_task`, write a specific, step-by-step task description so the browser agent knows exactly what to do.
         - If your first search returns no results, try rephrasing the task description before giving up.
         - Be concise in responses.
+        - After successfully completing a task (output verified and correct), call `generate_context` to save a reusable recipe. Fill in all fields accurately — especially `routines_used` with the exact routine_ids and parameters that worked, and `python_code` with the final working snippet.
     """).strip()
 
     ## Magic methods
@@ -144,6 +148,7 @@ def __init__(
         workspace: AgentWorkspace | None = None,
         auth_headers_provider: Callable[[], dict[str, str]] | None = None,
         on_llm_response: Callable[[LLMChatResponse], None] | None = None,
+        context_file: str | None = None,
     ) -> None:
         """
         Initialize the BlueBox Agent.
@@ -160,6 +165,9 @@ def __init__(
             auth_headers_provider: Optional callback that returns auth headers for
                 downstream API calls. If not provided, falls back to Config.VECTORLY_SERVICE_TOKEN.
             on_llm_response: Optional callback invoked after each LLM call with the response (for token tracking).
+            context_file: Optional path to a context file (.json or .md) from a previous
+                session. If not provided, auto-discovers the most recent context file from
+                the workspace's context/ directory.
         """
         # Validate required config
         self._auth_headers_provider = auth_headers_provider
@@ -169,6 +177,9 @@ def __init__(
         self._workspace = workspace or LocalWorkspace()
         self._routine_cache: dict[str, RoutineInfo] = {}
 
+        # Load context from explicit path or auto-discover from workspace
+        self._agent_context: BlueBoxAgentContext | None = self._load_context(context_file)
+
         super().__init__(
             emit_message_callable=emit_message_callable,
             persist_chat_callable=persist_chat_callable,
@@ -186,10 +197,11 @@ def __init__(
         self._is_blocklist_mode = self._sandbox_mode == "blocklist"
 
         logger.debug(
-            "BlueBoxAgent initialized with model: %s, chat_thread_id: %s, sandbox_mode: %s",
+            "BlueBoxAgent initialized with model: %s, chat_thread_id: %s, sandbox_mode: %s, has_context: %s",
             llm_model,
             self._thread.id,
             self._sandbox_mode,
+            self._agent_context is not None,
         )
 
     ## Auth
@@ -210,6 +222,8 @@ def _get_system_prompt(self) -> str:
         prompt = self.SYSTEM_PROMPT + time_info
         if self._is_blocklist_mode:
             prompt += self._get_blocklist_sandbox_prompt_section()
+        if self._agent_context:
+            prompt += self._get_context_prompt_section()
         return prompt
 
     def _get_blocklist_sandbox_prompt_section(self) -> str:
@@ -281,6 +295,96 @@ def _validate_routine_params(self, routine_id: str, params: dict[str, Any]) -> s
             )
         return None
 
+    ## Context loading
+
+    _CONTEXT_PROMPT_MAX_CHARS: int = 20_000
+
+    def _load_context(self, context_file: str | None) -> BlueBoxAgentContext | None:
+        """Load context from an explicit path or auto-discover from workspace context/ dir.
+
+        Resolution order for context_file:
+        1. Absolute path
+        2. Relative to workspace root
+
+        If context_file is None, auto-discovers the most recent .json file in context/.
+        """
+        if context_file:
+            return self._load_context_from_path(context_file)
+        return self._auto_discover_context()
+
+    def _load_context_from_path(self, context_file: str) -> BlueBoxAgentContext | None:
+        """Load a context file from an explicit path (absolute or workspace-relative)."""
+        path = Path(context_file)
+        if not path.is_absolute():
+            path = self._workspace.root_path / context_file
+        if not path.is_file():
+            logger.warning("Context file not found: %s", path)
+            return None
+        try:
+            raw = path.read_text(encoding="utf-8")
+            if path.suffix == ".md":
+                ctx = BlueBoxAgentContext.from_markdown(raw)
+            else:
+                ctx = BlueBoxAgentContext.model_validate_json(raw)
+            logger.info("Loaded agent context from %s", path)
+            return ctx
+        except Exception as e:
+            logger.warning("Failed to load context file %s: %s", path, e)
+            return None
+
+    def _auto_discover_context(self) -> BlueBoxAgentContext | None:
+        """Find and load the most recent .json context file from workspace context/ dir."""
+        context_dir = self._workspace.root_path / "context"
+        if not context_dir.is_dir():
+            return None
+        json_files = sorted(context_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True)
+        if not json_files:
+            return None
+        return self._load_context_from_path(str(json_files[0]))
+
+    def _get_context_prompt_section(self) -> str:
+        """Build a system prompt section from a loaded BlueBoxAgentContext."""
+        ctx = self._agent_context
+        if not ctx:
+            return ""
+
+        lines: list[str] = [
+            "\n\n## Prior Context",
+            "A previous session already solved a similar task. Use this as a starting point.",
+            f"\n**Goal:** {ctx.goal}",
+            f"\n**Summary:** {ctx.summary}",
+        ]
+
+        if ctx.routines_used:
+            lines.append("\n**Routines that worked:**")
+            for r in ctx.routines_used:
+                param_str = json.dumps(r.parameters, default=str) if r.parameters else "{}"
+                lines.append(f"- `{r.routine_id}` ({r.routine_name}): {param_str}")
+
+        if ctx.python_code:
+            lines.append(f"\n**Post-processing code that worked:**\n```python\n{ctx.python_code}\n```")
+
+        if ctx.output_files:
+            lines.append(f"\n**Output files produced:** {', '.join(ctx.output_files)}")
+
+        lines.append(f"\n**Output description:** {ctx.output_description}")
+        lines.append(
+            "\n> Replicate this path if the user's goal matches. "
+            "Adjust parameters for the new request. Skip trial and error."
+        )
+
+        section = "\n".join(lines)
+
+        if len(section) > self._CONTEXT_PROMPT_MAX_CHARS:
+            truncated = section[:self._CONTEXT_PROMPT_MAX_CHARS]
+            truncated += (
+                "\n\n... (context truncated — use `read_workspace_file` to read "
+                "the full context files in `context/` for more detail)"
+            )
+            return truncated
+
+        return section
+
     ## Tool handlers
 
     @agent_tool()
@@ -661,3 +765,49 @@ def _read_workspace_file(
             end_line: Optional 1-based end line number (inclusive). Omit to read to the end.
         """
         return self._workspace.read_file(path, start_line=start_line, end_line=end_line)
+
+    @agent_tool()
+    def _generate_context(self, context: BlueBoxAgentContext) -> dict[str, Any]:
+        """
+        Save a context file capturing what worked in this session.
+
+        Call this after successfully completing the user's task. The context
+        file lets another BlueBoxAgent instance replicate the successful path
+        without trial and error. Both a JSON file (canonical) and a Markdown
+        file (human-readable) are saved to the context/ directory.
+
+        Args:
+            context: The full context object describing what was accomplished.
+                Must include goal, summary, output_description, and routines_used
+                with exact routine_ids and parameters that worked. Include python_code
+                if post-processing was used, and output_files listing what was produced.
+        """
+        # Save canonical JSON
+        json_content = context.model_dump_json(indent=2)
+        try:
+            json_save = self._workspace.save_file("context", "agent_context", json_content)
+        except Exception as e:
+            logger.exception("Failed to save context JSON: %s", e)
+            return {"error": f"Failed to save context file: {e}"}
+
+        # Save companion Markdown
+        md_content = context.to_markdown()
+        try:
+            md_save = self._workspace.save_file(
+                "context", "agent_context", md_content, extension=".md",
+            )
+        except Exception as e:
+            logger.warning("Failed to save context Markdown: %s", e)
+            md_save = {"output_file": None}
+
+        logger.info("Context files saved: %s, %s", json_save["output_file"], md_save["output_file"])
+        return {
+            "success": True,
+            "context_json": json_save["output_file"],
+            "context_md": md_save["output_file"],
+            "message": (
+                f"Context saved to {json_save['output_file']}. "
+                "A new BlueBoxAgent using this workspace will automatically "
+                "load this context and replicate the successful path."
+            ),
+        }
diff --git a/bluebox/data_models/agents/__init__.py b/bluebox/data_models/agents/__init__.py
new file mode 100644
index 00000000..94c56aed
--- /dev/null
+++ b/bluebox/data_models/agents/__init__.py
@@ -0,0 +1,3 @@
+from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed
+
+__all__ = ["BlueBoxAgentContext", "RoutineUsed"]
diff --git a/bluebox/data_models/agents/context.py b/bluebox/data_models/agents/context.py
new file mode 100644
index 00000000..ede66f49
--- /dev/null
+++ b/bluebox/data_models/agents/context.py
@@ -0,0 +1,245 @@
+"""
+bluebox/data_models/agents/context.py
+
+Data model for BlueBoxAgent context files.
+
+A context file captures the successful path through a BlueBoxAgent
+conversation so a new agent instance can replay it without trial and error.
+
+Supports dual format: canonical JSON (Pydantic) and human-readable Markdown,
+with round-trip parsing between both.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from datetime import datetime, timezone
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class RoutineUsed(BaseModel):
+    """One routine that was successfully executed during the session."""
+
+    routine_id: str = Field(..., description="Routine ID from search_routines results")
+    routine_name: str = Field(..., description="Human-readable routine name")
+    parameters: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Parameter name-to-value mapping that produced correct results",
+    )
+
+
+class BlueBoxAgentContext(BaseModel):
+    """
+    Structured snapshot of a successful BlueBoxAgent session.
+
+    Serialized to JSON and saved to context/. Consumed by a new
+    BlueBoxAgent instance via system prompt injection.
+    """
+
+    version: int = Field(default=1, description="Schema version for forward compatibility")
+    goal: str = Field(..., description="The user's original request, in their own words")
+    routines_used: list[RoutineUsed] = Field(
+        default_factory=list,
+        description="Routines that produced useful results, in execution order",
+    )
+    python_code: str | None = Field(
+        default=None,
+        description="The final working Python post-processing snippet",
+    )
+    output_files: list[str] = Field(
+        default_factory=list,
+        description="Relative paths of output files written to outputs/",
+    )
+    output_description: str = Field(
+        ...,
+        description="Prose description of the output: format, key fields, row count if known",
+    )
+    summary: str = Field(
+        ...,
+        description="1-2 sentence human-readable summary of what was accomplished",
+    )
+    generated_at: datetime = Field(
+        default_factory=lambda: datetime.now(tz=timezone.utc),
+        description="When this context was generated",
+    )
+
+    # ── Markdown serialization ───────────────────────────────────────────
+
+    def to_markdown(self) -> str:
+        """Render as structured Markdown with fenced sections for round-tripping."""
+        lines: list[str] = []
+        lines.append("# BlueBox Agent Context")
+        lines.append("")
+        lines.append(f"**Version:** {self.version}")
+        lines.append(f"**Generated:** {self.generated_at.isoformat()}")
+        lines.append("")
+
+        lines.append("## Goal")
+        lines.append("")
+        lines.append(self.goal)
+        lines.append("")
+
+        lines.append("## Summary")
+        lines.append("")
+        lines.append(self.summary)
+        lines.append("")
+
+        if self.routines_used:
+            lines.append("## Routines Used")
+            lines.append("")
+            for r in self.routines_used:
+                lines.append(f"### {r.routine_name} (`{r.routine_id}`)")
+                lines.append("")
+                if r.parameters:
+                    lines.append("**Parameters:**")
+                    lines.append("```json")
+                    lines.append(json.dumps(r.parameters, indent=2, default=str))
+                    lines.append("```")
+                else:
+                    lines.append("No parameters.")
+                lines.append("")
+
+        if self.python_code:
+            lines.append("## Python Code")
+            lines.append("")
+            lines.append("```python")
+            lines.append(self.python_code)
+            lines.append("```")
+            lines.append("")
+
+        if self.output_files:
+            lines.append("## Output Files")
+            lines.append("")
+            for f in self.output_files:
+                lines.append(f"- `{f}`")
+            lines.append("")
+
+        lines.append("## Output Description")
+        lines.append("")
+        lines.append(self.output_description)
+        lines.append("")
+
+        return "\n".join(lines)
+
+    @classmethod
+    def from_markdown(cls, text: str) -> BlueBoxAgentContext:
+        """Parse structured Markdown back into BlueBoxAgentContext."""
+        sections = _split_markdown_sections(text)
+
+        # Version and generated_at from header
+        version = 1
+        generated_at = datetime.now(tz=timezone.utc)
+        header = sections.get("BlueBox Agent Context", "")
+        version_match = re.search(r"\*\*Version:\*\*\s*(\d+)", header)
+        if version_match:
+            version = int(version_match.group(1))
+        generated_match = re.search(r"\*\*Generated:\*\*\s*(.+)", header)
+        if generated_match:
+            try:
+                generated_at = datetime.fromisoformat(generated_match.group(1).strip())
+            except ValueError:
+                pass
+
+        goal = sections.get("Goal", "").strip()
+        summary = sections.get("Summary", "").strip()
+        output_description = sections.get("Output Description", "").strip()
+
+        # Parse routines from subsections
+        routines_used = _parse_routines_section(sections.get("Routines Used", ""))
+
+        # Parse python code from fenced block
+        python_code = _extract_fenced_block(sections.get("Python Code", ""), "python")
+
+        # Parse output files
+        output_files: list[str] = []
+        for line in sections.get("Output Files", "").splitlines():
+            match = re.match(r"^-\s*`(.+)`", line.strip())
+            if match:
+                output_files.append(match.group(1))
+
+        return cls(
+            version=version,
+            goal=goal,
+            summary=summary,
+            output_description=output_description,
+            routines_used=routines_used,
+            python_code=python_code,
+            output_files=output_files,
+            generated_at=generated_at,
+        )
+
+
+# ── Markdown parsing helpers ─────────────────────────────────────────────
+
+
+def _split_markdown_sections(text: str) -> dict[str, str]:
+    """Split Markdown into {heading: body} pairs. Handles H1 and H2 levels."""
+    sections: dict[str, str] = {}
+    current_heading: str | None = None
+    current_lines: list[str] = []
+
+    for line in text.splitlines():
+        heading_match = re.match(r"^#{1,2}\s+(.+)$", line)
+        if heading_match:
+            if current_heading is not None:
+                sections[current_heading] = "\n".join(current_lines)
+            current_heading = heading_match.group(1).strip()
+            current_lines = []
+        else:
+            current_lines.append(line)
+
+    if current_heading is not None:
+        sections[current_heading] = "\n".join(current_lines)
+
+    return sections
+
+
+def _extract_fenced_block(text: str, language: str | None = None) -> str | None:
+    """Extract the first fenced code block from text, optionally matching language."""
+    if language:
+        pattern = rf"```{re.escape(language)}\n(.*?)```"
+    else:
+        pattern = r"```\w*\n(.*?)```"
+    match = re.search(pattern, text, re.DOTALL)
+    if match:
+        return match.group(1).rstrip("\n")
+    return None
+
+
+def _parse_routines_section(text: str) -> list[RoutineUsed]:
+    """Parse the Routines Used section into RoutineUsed objects."""
+    routines: list[RoutineUsed] = []
+    if not text.strip():
+        return routines
+
+    # Split on H3 headers: ### RoutineName (`routine_id`)
+    parts = re.split(r"^###\s+", text, flags=re.MULTILINE)
+    for part in parts:
+        if not part.strip():
+            continue
+        # Parse header: "RoutineName (`routine_id`)"
+        header_match = re.match(r"^(.+?)\s*\(`([^`]+)`\)", part)
+        if not header_match:
+            continue
+        routine_name = header_match.group(1).strip()
+        routine_id = header_match.group(2).strip()
+
+        # Parse parameters from JSON code block
+        parameters: dict[str, Any] = {}
+        params_json = _extract_fenced_block(part, "json")
+        if params_json:
+            try:
+                parameters = json.loads(params_json)
+            except json.JSONDecodeError:
+                pass
+
+        routines.append(RoutineUsed(
+            routine_id=routine_id,
+            routine_name=routine_name,
+            parameters=parameters,
+        ))
+
+    return routines
diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py
index 015b9a1e..30daa002 100644
--- a/bluebox/scripts/run_bluebox_agent.py
+++ b/bluebox/scripts/run_bluebox_agent.py
@@ -50,17 +50,24 @@ class BlueBoxAgentTUI(AbstractAgentTUI):
     """Multi-pane TUI for the BlueBox Agent."""
 
     TITLE = "BlueBox Agent"
-    SLASH_COMMANDS = BASE_SLASH_COMMANDS
-    HELP_TEXT = BASE_HELP_TEXT
+    SLASH_COMMANDS = {
+        **BASE_SLASH_COMMANDS,
+        "/generate_context": "Save a reusable context file from this session",
+    }
+    HELP_TEXT = BASE_HELP_TEXT + (
+        "\n    [cyan]/generate_context[/cyan]  Save a reusable context file from this session\n"
+    )
     SHOW_SAVED_FILES_PANE = True
 
     def __init__(
         self,
         llm_model: LLMModel,
         workspace_dir: str = "./bluebox_workspace",
+        context_file: str | None = None,
     ) -> None:
         super().__init__(llm_model, working_dir=workspace_dir)
         self._workspace_dir = workspace_dir
+        self._context_file = context_file
 
     # ── Abstract implementations ─────────────────────────────────────────
 
@@ -70,6 +77,7 @@ def _create_agent(self) -> AbstractAgent:
             stream_chunk_callable=self._handle_stream_chunk,
             llm_model=self._llm_model,
             workspace=LocalWorkspace(self._workspace_dir),
+            context_file=self._context_file,
         )
 
     def _print_welcome(self) -> None:
@@ -129,6 +137,36 @@ def _add(p: str) -> None:
                 _add(r.get("output_file", ""))
         return paths
 
+    # ── Custom slash commands ─────────────────────────────────────────
+
+    _GENERATE_CONTEXT_PROMPT: str = (
+        "Review everything we accomplished in this session and call the `generate_context` tool "
+        "to save a reusable context file. Include:\n"
+        "- The original goal (what I asked for)\n"
+        "- All routines that produced useful results (with exact routine_ids and parameter values)\n"
+        "- The final working Python post-processing code (if any)\n"
+        "- The output files that were created\n"
+        "- A clear description of what the output looks like\n"
+        "- A concise summary of what was accomplished\n\n"
+        "Be thorough and accurate — another agent will use this context to replicate our work."
+    )
+
+    def _handle_custom_command(self, cmd: str, raw_input: str) -> bool:
+        if cmd == "/generate_context":
+            chat = self.query_one("#chat-log", RichLog)
+            if not self._agent:
+                chat.write(Text.from_markup("[red]Agent not initialized.[/red]"))
+                return True
+            chat.write(Text.from_markup(
+                "[yellow]Generating context from this session...[/yellow]"
+            ))
+            self._processing = True
+            self._assistant_header_printed = False
+            self._status_update_printed = False
+            self._send_to_agent(self._GENERATE_CONTEXT_PROMPT)
+            return True
+        return False
+
 
 # ─── Entry point ─────────────────────────────────────────────────────────────
 
@@ -142,6 +180,12 @@ def main() -> None:
         default="./bluebox_workspace",
         help="Workspace directory. Raw results in raw/, output files in outputs/ (default: ./bluebox_workspace)",
     )
+    parser.add_argument(
+        "--context-file",
+        type=str,
+        default=None,
+        help="Path to a context file (.json or .md) from a previous session to guide the agent",
+    )
     parser.add_argument("-q", "--quiet", action="store_true", help="Suppress logs")
     parser.add_argument("--log-file", type=str, default=None, help="Log to file")
     args = parser.parse_args()
@@ -186,6 +230,7 @@ def main() -> None:
     app = BlueBoxAgentTUI(
         llm_model=llm_model,
         workspace_dir=args.workspace_dir,
+        context_file=args.context_file,
     )
     app.run()
 
diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py
new file mode 100644
index 00000000..7783cbb8
--- /dev/null
+++ b/tests/unit/agents/test_bluebox_agent_context.py
@@ -0,0 +1,393 @@
+"""
+tests/unit/agents/test_bluebox_agent_context.py
+
+Unit tests for BlueBoxAgentContext data model and context generation/loading
+in BlueBoxAgent.
+"""
+
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+from unittest.mock import MagicMock
+
+import pytest
+
+from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed
+
+
+# =============================================================================
+# Fixtures
+# =============================================================================
+
+
+@pytest.fixture
+def sample_context() -> BlueBoxAgentContext:
+    """A fully populated context for testing."""
+    return BlueBoxAgentContext(
+        version=1,
+        goal="Find one-way train tickets from NYC to Boston on March 15, 2026",
+        routines_used=[
+            RoutineUsed(
+                routine_id="Routine_abc123",
+                routine_name="AmtrakOneWaySearch",
+                parameters={"origin": "New York", "destination": "Boston", "date": "2026-03-15"},
+            ),
+            RoutineUsed(
+                routine_id="Routine_def456",
+                routine_name="AmtrakPriceFilter",
+                parameters={"max_price": 100},
+            ),
+        ],
+        python_code=(
+            'import csv\n'
+            'with open("outputs/trains.csv", "w") as f:\n'
+            '    writer = csv.DictWriter(f, fieldnames=["departure", "price"])\n'
+            '    writer.writeheader()\n'
+            '    for rr in routine_results:\n'
+            '        for train in rr["result"]["data"]["trains"]:\n'
+            '            writer.writerow(train)\n'
+            'print("Done")'
+        ),
+        output_files=["outputs/trains.csv"],
+        output_description="CSV with columns: departure, price. 12 rows of Amtrak trains under $100.",
+        summary="Searched Amtrak for NYC-Boston trains on March 15, filtered by price, and exported to CSV.",
+        generated_at=datetime(2026, 2, 22, 10, 30, 0, tzinfo=timezone.utc),
+    )
+
+
+@pytest.fixture
+def minimal_context() -> BlueBoxAgentContext:
+    """A context with only required fields."""
+    return BlueBoxAgentContext(
+        goal="Search for flights",
+        output_description="JSON with flight data",
+        summary="Found flights.",
+    )
+
+
+# =============================================================================
+# BlueBoxAgentContext model tests
+# =============================================================================
+
+
+class TestBlueBoxAgentContextModel:
+    """Tests for the Pydantic model itself."""
+
+    def test_json_roundtrip(self, sample_context: BlueBoxAgentContext) -> None:
+        """Serialize to JSON and back, verify equality."""
+        json_str = sample_context.model_dump_json(indent=2)
+        restored = BlueBoxAgentContext.model_validate_json(json_str)
+        assert restored.version == sample_context.version
+        assert restored.goal == sample_context.goal
+        assert restored.summary == sample_context.summary
+        assert restored.output_description == sample_context.output_description
+        assert restored.python_code == sample_context.python_code
+        assert restored.output_files == sample_context.output_files
+        assert len(restored.routines_used) == 2
+        assert restored.routines_used[0].routine_id == "Routine_abc123"
+        assert restored.routines_used[1].parameters == {"max_price": 100}
+        assert isinstance(restored.generated_at, datetime)
+
+    def test_version_defaults_to_1(self, minimal_context: BlueBoxAgentContext) -> None:
+        assert minimal_context.version == 1
+
+    def test_generated_at_defaults_to_now(self, minimal_context: BlueBoxAgentContext) -> None:
+        assert isinstance(minimal_context.generated_at, datetime)
+        # Should be recent (within last 10 seconds)
+        delta = datetime.now(tz=timezone.utc) - minimal_context.generated_at
+        assert delta.total_seconds() < 10
+
+    def test_optional_fields_default(self, minimal_context: BlueBoxAgentContext) -> None:
+        assert minimal_context.routines_used == []
+        assert minimal_context.python_code is None
+        assert minimal_context.output_files == []
+
+
+# =============================================================================
+# Markdown round-trip tests
+# =============================================================================
+
+
+class TestMarkdownRoundTrip:
+    """Tests for to_markdown() and from_markdown()."""
+
+    def test_to_markdown_has_expected_sections(self, sample_context: BlueBoxAgentContext) -> None:
+        md = sample_context.to_markdown()
+        assert "# BlueBox Agent Context" in md
+        assert "## Goal" in md
+        assert "## Summary" in md
+        assert "## Routines Used" in md
+        assert "## Python Code" in md
+        assert "## Output Files" in md
+        assert "## Output Description" in md
+        assert "**Version:** 1" in md
+        assert "**Generated:**" in md
+
+    def test_to_markdown_contains_routine_details(self, sample_context: BlueBoxAgentContext) -> None:
+        md = sample_context.to_markdown()
+        assert "AmtrakOneWaySearch" in md
+        assert "Routine_abc123" in md
+        assert '"origin": "New York"' in md
+
+    def test_to_markdown_contains_python_code(self, sample_context: BlueBoxAgentContext) -> None:
+        md = sample_context.to_markdown()
+        assert "```python" in md
+        assert "csv.DictWriter" in md
+
+    def test_from_markdown_roundtrip(self, sample_context: BlueBoxAgentContext) -> None:
+        """from_markdown(to_markdown(ctx)) should produce an equivalent model."""
+        md = sample_context.to_markdown()
+        restored = BlueBoxAgentContext.from_markdown(md)
+        assert restored.version == sample_context.version
+        assert restored.goal == sample_context.goal
+        assert restored.summary == sample_context.summary
+        assert restored.output_description == sample_context.output_description
+        assert restored.python_code == sample_context.python_code
+        assert restored.output_files == sample_context.output_files
+        assert len(restored.routines_used) == len(sample_context.routines_used)
+        for orig, rest in zip(sample_context.routines_used, restored.routines_used):
+            assert rest.routine_id == orig.routine_id
+            assert rest.routine_name == orig.routine_name
+            assert rest.parameters == orig.parameters
+
+    def test_from_markdown_no_python_code(self, minimal_context: BlueBoxAgentContext) -> None:
+        """Markdown with no Python Code section should parse python_code as None."""
+        md = minimal_context.to_markdown()
+        assert "## Python Code" not in md
+        restored = BlueBoxAgentContext.from_markdown(md)
+        assert restored.python_code is None
+
+    def test_from_markdown_no_routines(self, minimal_context: BlueBoxAgentContext) -> None:
+        md = minimal_context.to_markdown()
+        assert "## Routines Used" not in md
+        restored = BlueBoxAgentContext.from_markdown(md)
+        assert restored.routines_used == []
+
+    def test_from_markdown_no_output_files(self, minimal_context: BlueBoxAgentContext) -> None:
+        md = minimal_context.to_markdown()
+        restored = BlueBoxAgentContext.from_markdown(md)
+        assert restored.output_files == []
+
+
+# =============================================================================
+# Context loading tests (BlueBoxAgent integration)
+# =============================================================================
+
+
+class TestContextLoading:
+    """Tests for context file loading in BlueBoxAgent."""
+
+    def _make_agent(
+        self,
+        workspace_dir: Path,
+        context_file: str | None = None,
+    ) -> Any:
+        """Create a BlueBoxAgent with mocked dependencies."""
+        from bluebox.agents.bluebox_agent import BlueBoxAgent
+        from bluebox.agents.workspace import LocalWorkspace
+
+        return BlueBoxAgent(
+            emit_message_callable=MagicMock(),
+            workspace=LocalWorkspace(str(workspace_dir)),
+            auth_headers_provider=lambda: {"X-Service-Token": "test"},
+            context_file=context_file,
+        )
+
+    def test_loads_json_context_file(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None:
+        ctx_file = tmp_path / "my_context.json"
+        ctx_file.write_text(sample_context.model_dump_json(indent=2))
+
+        agent = self._make_agent(tmp_path, context_file=str(ctx_file))
+        assert agent._agent_context is not None
+        assert agent._agent_context.goal == sample_context.goal
+
+    def test_loads_markdown_context_file(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None:
+        ctx_file = tmp_path / "my_context.md"
+        ctx_file.write_text(sample_context.to_markdown())
+
+        agent = self._make_agent(tmp_path, context_file=str(ctx_file))
+        assert agent._agent_context is not None
+        assert agent._agent_context.goal == sample_context.goal
+
+    def test_workspace_relative_path(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None:
+        context_dir = tmp_path / "context"
+        context_dir.mkdir()
+        ctx_file = context_dir / "my_context.json"
+        ctx_file.write_text(sample_context.model_dump_json(indent=2))
+
+        agent = self._make_agent(tmp_path, context_file="context/my_context.json")
+        assert agent._agent_context is not None
+        assert agent._agent_context.goal == sample_context.goal
+
+    def test_auto_discovers_from_workspace(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None:
+        context_dir = tmp_path / "context"
+        context_dir.mkdir()
+        ctx_file = context_dir / "agent_context.json"
+        ctx_file.write_text(sample_context.model_dump_json(indent=2))
+
+        agent = self._make_agent(tmp_path)
+        assert agent._agent_context is not None
+        assert agent._agent_context.goal == sample_context.goal
+
+    def test_auto_discovers_most_recent(self, tmp_path: Path) -> None:
+        """When multiple context files exist, loads the most recently modified."""
+        import time
+
+        context_dir = tmp_path / "context"
+        context_dir.mkdir()
+
+        old = BlueBoxAgentContext(goal="old goal", output_description="old", summary="old")
+        (context_dir / "old.json").write_text(old.model_dump_json())
+        time.sleep(0.05)  # ensure mtime differs
+
+        new = BlueBoxAgentContext(goal="new goal", output_description="new", summary="new")
+        (context_dir / "new.json").write_text(new.model_dump_json())
+
+        agent = self._make_agent(tmp_path)
+        assert agent._agent_context is not None
+        assert agent._agent_context.goal == "new goal"
+
+    def test_explicit_context_file_overrides_auto_discovery(
+        self, tmp_path: Path, sample_context: BlueBoxAgentContext,
+    ) -> None:
+        # Put one context in workspace
+        context_dir = tmp_path / "context"
+        context_dir.mkdir()
+        auto_ctx = BlueBoxAgentContext(goal="auto goal", output_description="auto", summary="auto")
+        (context_dir / "auto.json").write_text(auto_ctx.model_dump_json())
+
+        # Put explicit context elsewhere
+        explicit_file = tmp_path / "explicit.json"
+        explicit_file.write_text(sample_context.model_dump_json(indent=2))
+
+        agent = self._make_agent(tmp_path, context_file=str(explicit_file))
+        assert agent._agent_context is not None
+        assert agent._agent_context.goal == sample_context.goal
+
+    def test_invalid_context_file_ignored(self, tmp_path: Path) -> None:
+        agent = self._make_agent(tmp_path, context_file="/nonexistent/path.json")
+        assert agent._agent_context is None
+
+    def test_malformed_json_ignored(self, tmp_path: Path) -> None:
+        bad_file = tmp_path / "bad.json"
+        bad_file.write_text("not valid json!!!")
+        agent = self._make_agent(tmp_path, context_file=str(bad_file))
+        assert agent._agent_context is None
+
+    def test_no_context_dir_no_error(self, tmp_path: Path) -> None:
+        agent = self._make_agent(tmp_path)
+        assert agent._agent_context is None
+
+
+# =============================================================================
+# System prompt injection tests
+# =============================================================================
+
+
+class TestContextPromptInjection:
+    """Tests for _get_context_prompt_section and system prompt integration."""
+
+    def _make_agent(self, tmp_path: Path, context: BlueBoxAgentContext) -> Any:
+        from bluebox.agents.bluebox_agent import BlueBoxAgent
+        from bluebox.agents.workspace import LocalWorkspace
+
+        ctx_file = tmp_path / "context.json"
+        ctx_file.write_text(context.model_dump_json(indent=2))
+
+        return BlueBoxAgent(
+            emit_message_callable=MagicMock(),
+            workspace=LocalWorkspace(str(tmp_path)),
+            auth_headers_provider=lambda: {"X-Service-Token": "test"},
+            context_file=str(ctx_file),
+        )
+
+    def test_context_section_in_system_prompt(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None:
+        agent = self._make_agent(tmp_path, sample_context)
+        prompt = agent._get_system_prompt()
+        assert "## Prior Context" in prompt
+        assert sample_context.goal in prompt
+        assert sample_context.summary in prompt
+        assert "Routine_abc123" in prompt
+        assert "AmtrakOneWaySearch" in prompt
+
+    def test_context_section_includes_python_code(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None:
+        agent = self._make_agent(tmp_path, sample_context)
+        prompt = agent._get_system_prompt()
+        assert "```python" in prompt
+        assert "csv.DictWriter" in prompt
+
+    def test_context_section_truncation(self, tmp_path: Path) -> None:
+        """Context over 20K chars gets truncated with a hint."""
+        big_context = BlueBoxAgentContext(
+            goal="x" * 25_000,
+            output_description="desc",
+            summary="summary",
+        )
+        agent = self._make_agent(tmp_path, big_context)
+        section = agent._get_context_prompt_section()
+        assert len(section) < 25_000
+        assert "context truncated" in section
+        assert "read_workspace_file" in section
+
+    def test_no_context_no_section(self, tmp_path: Path) -> None:
+        from bluebox.agents.bluebox_agent import BlueBoxAgent
+        from bluebox.agents.workspace import LocalWorkspace
+
+        agent = BlueBoxAgent(
+            emit_message_callable=MagicMock(),
+            workspace=LocalWorkspace(str(tmp_path)),
+            auth_headers_provider=lambda: {"X-Service-Token": "test"},
+        )
+        prompt = agent._get_system_prompt()
+        assert "## Prior Context" not in prompt
+
+
+# =============================================================================
+# generate_context tool tests
+# =============================================================================
+
+
+class TestGenerateContextTool:
+    """Tests for the _generate_context agent tool."""
+
+    def _make_agent(self, tmp_path: Path) -> Any:
+        from bluebox.agents.bluebox_agent import BlueBoxAgent
+        from bluebox.agents.workspace import LocalWorkspace
+
+        return BlueBoxAgent(
+            emit_message_callable=MagicMock(),
+            workspace=LocalWorkspace(str(tmp_path)),
+            auth_headers_provider=lambda: {"X-Service-Token": "test"},
+        )
+
+    def test_tool_is_registered(self) -> None:
+        from bluebox.agents.bluebox_agent import BlueBoxAgent
+        tools = BlueBoxAgent._collect_tools()
+        tool_names = [meta.name for meta, _ in tools]
+        assert "generate_context" in tool_names
+
+    def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None:
+        agent = self._make_agent(tmp_path)
+        result = agent._generate_context(context=sample_context)
+
+        assert result["success"] is True
+        assert result["context_json"] is not None
+        assert result["context_md"] is not None
+
+        # Verify JSON file exists and is valid
+        json_path = tmp_path / result["context_json"]
+        assert json_path.is_file()
+        loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text())
+        assert loaded.goal == sample_context.goal
+
+        # Verify MD file exists
+        md_path = tmp_path / result["context_md"]
+        assert md_path.is_file()
+        assert "## Goal" in md_path.read_text()
+
+    def test_saves_to_context_subdirectory(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None:
+        agent = self._make_agent(tmp_path)
+        result = agent._generate_context(context=minimal_context)
+        assert "context/" in result["context_json"]
+        assert "context/" in result["context_md"]

From 08b7ebab03bf98b925a04f2e1160d5ef330331a9 Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Sun, 22 Feb 2026 21:08:34 -0500
Subject: [PATCH 02/13] Allow /generate_context to accept an optional focus
 prompt

Users can now type `/generate_context focus on the flight search part` to
guide the agent toward a specific aspect of the session when generating
the context file.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/scripts/run_bluebox_agent.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py
index 30daa002..87bdc74a 100644
--- a/bluebox/scripts/run_bluebox_agent.py
+++ b/bluebox/scripts/run_bluebox_agent.py
@@ -52,10 +52,11 @@ class BlueBoxAgentTUI(AbstractAgentTUI):
     TITLE = "BlueBox Agent"
     SLASH_COMMANDS = {
         **BASE_SLASH_COMMANDS,
-        "/generate_context": "Save a reusable context file from this session",
+        "/generate_context": "Save a reusable context file (optionally with a focus prompt)",
     }
     HELP_TEXT = BASE_HELP_TEXT + (
-        "\n    [cyan]/generate_context[/cyan]  Save a reusable context file from this session\n"
+        "\n    [cyan]/generate_context[/cyan]  Save a reusable context file from this session"
+        "\n                       Optionally add a focus: [cyan]/generate_context focus on the flight search part[/cyan]\n"
     )
     SHOW_SAVED_FILES_PANE = True
 
@@ -152,18 +153,24 @@ def _add(p: str) -> None:
     )
 
     def _handle_custom_command(self, cmd: str, raw_input: str) -> bool:
-        if cmd == "/generate_context":
+        if raw_input.lower().startswith("/generate_context"):
             chat = self.query_one("#chat-log", RichLog)
             if not self._agent:
                 chat.write(Text.from_markup("[red]Agent not initialized.[/red]"))
                 return True
+
+            user_focus = raw_input[len("/generate_context"):].strip()
+            prompt = self._GENERATE_CONTEXT_PROMPT
+            if user_focus:
+                prompt += f"\n\n**User focus:** {user_focus}"
+
             chat.write(Text.from_markup(
                 "[yellow]Generating context from this session...[/yellow]"
             ))
             self._processing = True
             self._assistant_header_printed = False
             self._status_update_printed = False
-            self._send_to_agent(self._GENERATE_CONTEXT_PROMPT)
+            self._send_to_agent(prompt)
             return True
         return False
 

From 24a3d2005338fa0b928e2ea2906208014e2add8f Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Sun, 22 Feb 2026 21:11:18 -0500
Subject: [PATCH 03/13] Use to_markdown() for context prompt injection instead
 of hand-built formatting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Single source of truth — the markdown rendering logic lives on the model,
not duplicated in the agent.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/agents/bluebox_agent.py | 35 +++++++--------------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py
index a2d5a856..6f9aae4e 100644
--- a/bluebox/agents/bluebox_agent.py
+++ b/bluebox/agents/bluebox_agent.py
@@ -348,40 +348,19 @@ def _get_context_prompt_section(self) -> str:
         if not ctx:
             return ""
 
-        lines: list[str] = [
-            "\n\n## Prior Context",
-            "A previous session already solved a similar task. Use this as a starting point.",
-            f"\n**Goal:** {ctx.goal}",
-            f"\n**Summary:** {ctx.summary}",
-        ]
-
-        if ctx.routines_used:
-            lines.append("\n**Routines that worked:**")
-            for r in ctx.routines_used:
-                param_str = json.dumps(r.parameters, default=str) if r.parameters else "{}"
-                lines.append(f"- `{r.routine_id}` ({r.routine_name}): {param_str}")
-
-        if ctx.python_code:
-            lines.append(f"\n**Post-processing code that worked:**\n```python\n{ctx.python_code}\n```")
-
-        if ctx.output_files:
-            lines.append(f"\n**Output files produced:** {', '.join(ctx.output_files)}")
-
-        lines.append(f"\n**Output description:** {ctx.output_description}")
-        lines.append(
-            "\n> Replicate this path if the user's goal matches. "
-            "Adjust parameters for the new request. Skip trial and error."
+        section = (
+            "\n\n## Prior Context\n"
+            "A previous session already solved a similar task. Use this as a starting point.\n"
+            "Replicate this path if the user's goal matches. "
+            "Adjust parameters for the new request. Skip trial and error.\n\n"
+            + ctx.to_markdown()
         )
 
-        section = "\n".join(lines)
-
         if len(section) > self._CONTEXT_PROMPT_MAX_CHARS:
-            truncated = section[:self._CONTEXT_PROMPT_MAX_CHARS]
-            truncated += (
+            section = section[:self._CONTEXT_PROMPT_MAX_CHARS] + (
                 "\n\n... (context truncated — use `read_workspace_file` to read "
                 "the full context files in `context/` for more detail)"
             )
-            return truncated
 
         return section
 

From ebf444606bf7036e141ecb0bc792b81b42b22bcd Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Sun, 22 Feb 2026 21:16:09 -0500
Subject: [PATCH 04/13] Flatten generate_context tool params for LLM
 compatibility

The nested Pydantic model produced a JSON schema with $defs/$ref that
LLMs couldn't reliably fill. Switch to flat parameters (goal, summary,
routines_used, etc.) and construct the model internally.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/agents/bluebox_agent.py               | 40 ++++++++++++++++---
 .../unit/agents/test_bluebox_agent_context.py | 25 +++++++++++-
 2 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py
index 6f9aae4e..337068d5 100644
--- a/bluebox/agents/bluebox_agent.py
+++ b/bluebox/agents/bluebox_agent.py
@@ -746,7 +746,15 @@ def _read_workspace_file(
         return self._workspace.read_file(path, start_line=start_line, end_line=end_line)
 
     @agent_tool()
-    def _generate_context(self, context: BlueBoxAgentContext) -> dict[str, Any]:
+    def _generate_context(
+        self,
+        goal: str,
+        summary: str,
+        output_description: str,
+        routines_used: list[dict[str, Any]] | None = None,
+        python_code: str | None = None,
+        output_files: list[str] | None = None,
+    ) -> dict[str, Any]:
         """
         Save a context file capturing what worked in this session.
 
@@ -756,11 +764,33 @@ def _generate_context(self, context: BlueBoxAgentContext) -> dict[str, Any]:
         file (human-readable) are saved to the context/ directory.
 
         Args:
-            context: The full context object describing what was accomplished.
-                Must include goal, summary, output_description, and routines_used
-                with exact routine_ids and parameters that worked. Include python_code
-                if post-processing was used, and output_files listing what was produced.
+            goal: The user's original request, in their own words.
+            summary: 1-2 sentence summary of what was accomplished.
+            output_description: Description of the output: format, key fields,
+                row count if known (e.g. "CSV with 47 rows, columns: name, price, url").
+            routines_used: List of routines that worked. Each dict must have keys:
+                routine_id (str), routine_name (str), and parameters (dict with
+                the parameter values that produced correct results).
+            python_code: The final working Python snippet passed to run_python_code.
+                Omit if no post-processing was needed.
+            output_files: Relative paths of files written to outputs/
+                (e.g. ["outputs/results.csv"]).
         """
+        try:
+            validated_routines = [
+                RoutineUsed.model_validate(r) for r in (routines_used or [])
+            ]
+            context = BlueBoxAgentContext(
+                goal=goal,
+                summary=summary,
+                output_description=output_description,
+                routines_used=validated_routines,
+                python_code=python_code,
+                output_files=output_files or [],
+            )
+        except Exception as e:
+            return {"error": f"Failed to build context: {e}"}
+
         # Save canonical JSON
         json_content = context.model_dump_json(indent=2)
         try:
diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py
index 7783cbb8..ceabf28b 100644
--- a/tests/unit/agents/test_bluebox_agent_context.py
+++ b/tests/unit/agents/test_bluebox_agent_context.py
@@ -369,7 +369,14 @@ def test_tool_is_registered(self) -> None:
 
     def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None:
         agent = self._make_agent(tmp_path)
-        result = agent._generate_context(context=sample_context)
+        result = agent._generate_context(
+            goal=sample_context.goal,
+            summary=sample_context.summary,
+            output_description=sample_context.output_description,
+            routines_used=[r.model_dump() for r in sample_context.routines_used],
+            python_code=sample_context.python_code,
+            output_files=sample_context.output_files,
+        )
 
         assert result["success"] is True
         assert result["context_json"] is not None
@@ -388,6 +395,20 @@ def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentCont
 
     def test_saves_to_context_subdirectory(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None:
         agent = self._make_agent(tmp_path)
-        result = agent._generate_context(context=minimal_context)
+        result = agent._generate_context(
+            goal=minimal_context.goal,
+            summary=minimal_context.summary,
+            output_description=minimal_context.output_description,
+        )
         assert "context/" in result["context_json"]
         assert "context/" in result["context_md"]
+
+    def test_validates_bad_routines_used(self, tmp_path: Path) -> None:
+        agent = self._make_agent(tmp_path)
+        result = agent._generate_context(
+            goal="test",
+            summary="test",
+            output_description="test",
+            routines_used=[{"bad_key": "missing routine_id"}],
+        )
+        assert "error" in result

From 2cee95d0db0a6e878e15618192795c7f44e54191 Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Sun, 22 Feb 2026 21:26:08 -0500
Subject: [PATCH 05/13] Fix context file path resolution doubling the workspace
 prefix

_auto_discover_context returns paths like 'workspace/context/file.json'
(already relative to cwd). _load_context_from_path was unconditionally
prepending workspace root_path again, producing 'workspace/workspace/...'.

Fix: try the path as-is first, only prepend workspace root if it doesn't
exist.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/agents/bluebox_agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py
index 337068d5..96ec7f55 100644
--- a/bluebox/agents/bluebox_agent.py
+++ b/bluebox/agents/bluebox_agent.py
@@ -315,7 +315,7 @@ def _load_context(self, context_file: str | None) -> BlueBoxAgentContext | None:
     def _load_context_from_path(self, context_file: str) -> BlueBoxAgentContext | None:
         """Load a context file from an explicit path (absolute or workspace-relative)."""
         path = Path(context_file)
-        if not path.is_absolute():
+        if not path.is_file() and not path.is_absolute():
             path = self._workspace.root_path / context_file
         if not path.is_file():
             logger.warning("Context file not found: %s", path)

From 13a75b92c39faa3bb6824c9c43dcefe4c8d5c171 Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Sun, 22 Feb 2026 21:30:14 -0500
Subject: [PATCH 06/13] Auto-populate routines_used from raw/ when agent leaves
 it empty
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The agent was generating context files with routines_used=[] despite having
executed routines. Two fixes:

1. _generate_context now falls back to _extract_routines_from_raw() which
   reads the raw/ execution result files and extracts routine_id, routine_name,
   and parameters from completed executions. This is a safety net — if the
   agent provides routines, those are used instead.

2. Strengthened the /generate_context prompt and system prompt rule to
   emphasize that routines_used must never be empty.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/agents/bluebox_agent.py               | 37 ++++++++-
 bluebox/scripts/run_bluebox_agent.py          | 10 ++-
 .../unit/agents/test_bluebox_agent_context.py | 80 +++++++++++++++++++
 3 files changed, 123 insertions(+), 4 deletions(-)

diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py
index 96ec7f55..8c6d5d76 100644
--- a/bluebox/agents/bluebox_agent.py
+++ b/bluebox/agents/bluebox_agent.py
@@ -131,7 +131,7 @@ class BlueBoxAgent(AbstractAgent):
         - When using `execute_browser_task`, write a specific, step-by-step task description so the browser agent knows exactly what to do.
         - If your first search returns no results, try rephrasing the task description before giving up.
         - Be concise in responses.
-        - After successfully completing a task (output verified and correct), call `generate_context` to save a reusable recipe. Fill in all fields accurately — especially `routines_used` with the exact routine_ids and parameters that worked, and `python_code` with the final working snippet.
+        - After successfully completing a task (output verified and correct), call `generate_context` to save a reusable recipe. **NEVER leave `routines_used` empty** — include every routine that was executed, with exact routine_id, routine_name, and parameter values. Also include `python_code` with the final working snippet.
     """).strip()
 
     ## Magic methods
@@ -364,6 +364,31 @@ def _get_context_prompt_section(self) -> str:
 
         return section
 
+    def _extract_routines_from_raw(self) -> list[RoutineUsed]:
+        """Extract routine info from raw/ execution result files.
+
+        Each raw JSON file contains routine_id, routine_name, parameters,
+        and status from a previous execution. Returns deduplicated list
+        of successfully executed routines.
+        """
+        raw_results = self._workspace.load_raw_json()
+        seen: set[str] = set()
+        routines: list[RoutineUsed] = []
+        for rr in raw_results:
+            rid = rr.get("routine_id")
+            if not rid or rid in seen:
+                continue
+            # Only include completed executions
+            if rr.get("status") not in ("completed", None):
+                continue
+            seen.add(rid)
+            routines.append(RoutineUsed(
+                routine_id=rid,
+                routine_name=rr.get("routine_name", rid),
+                parameters=rr.get("parameters", {}),
+            ))
+        return routines
+
     ## Tool handlers
 
     @agent_tool()
@@ -780,6 +805,16 @@ def _generate_context(
             validated_routines = [
                 RoutineUsed.model_validate(r) for r in (routines_used or [])
             ]
+
+            # Auto-populate from raw/ execution results if agent didn't provide routines
+            if not validated_routines:
+                validated_routines = self._extract_routines_from_raw()
+                if validated_routines:
+                    logger.info(
+                        "Auto-populated %d routine(s) from raw/ execution results",
+                        len(validated_routines),
+                    )
+
             context = BlueBoxAgentContext(
                 goal=goal,
                 summary=summary,
diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py
index 87bdc74a..48358f90 100644
--- a/bluebox/scripts/run_bluebox_agent.py
+++ b/bluebox/scripts/run_bluebox_agent.py
@@ -142,10 +142,14 @@ def _add(p: str) -> None:
 
     _GENERATE_CONTEXT_PROMPT: str = (
         "Review everything we accomplished in this session and call the `generate_context` tool "
-        "to save a reusable context file. Include:\n"
+        "to save a reusable context file.\n\n"
+        "**CRITICAL — you MUST include `routines_used`**. For every routine that was executed, "
+        "provide the exact routine_id, routine_name, and the parameter values that were used. "
+        "Look at the execute_routines_in_parallel calls you made earlier in this conversation. "
+        "Do NOT leave routines_used empty — this is the most important field for replay.\n\n"
+        "Also include:\n"
         "- The original goal (what I asked for)\n"
-        "- All routines that produced useful results (with exact routine_ids and parameter values)\n"
-        "- The final working Python post-processing code (if any)\n"
+        "- The final working Python post-processing code (the last successful run_python_code call)\n"
         "- The output files that were created\n"
         "- A clear description of what the output looks like\n"
         "- A concise summary of what was accomplished\n\n"
diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py
index ceabf28b..1c3d15d0 100644
--- a/tests/unit/agents/test_bluebox_agent_context.py
+++ b/tests/unit/agents/test_bluebox_agent_context.py
@@ -412,3 +412,83 @@ def test_validates_bad_routines_used(self, tmp_path: Path) -> None:
             routines_used=[{"bad_key": "missing routine_id"}],
         )
         assert "error" in result
+
+    def test_auto_populates_routines_from_raw(self, tmp_path: Path) -> None:
+        """When routines_used is empty, auto-populate from raw/ execution results."""
+        agent = self._make_agent(tmp_path)
+
+        # Write a fake routine result to raw/
+        raw_dir = tmp_path / "raw"
+        raw_dir.mkdir()
+        (raw_dir / "result_1.json").write_text(json.dumps({
+            "routine_id": "Routine_abc",
+            "routine_name": "TestRoutine",
+            "status": "completed",
+            "parameters": {"city": "NYC"},
+            "result": {"ok": True, "data": {}},
+        }))
+
+        result = agent._generate_context(
+            goal="test goal",
+            summary="test summary",
+            output_description="test output",
+            # routines_used intentionally omitted
+        )
+
+        assert result["success"] is True
+        # Verify the saved context has the routine from raw/
+        json_path = tmp_path / result["context_json"]
+        loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text())
+        assert len(loaded.routines_used) == 1
+        assert loaded.routines_used[0].routine_id == "Routine_abc"
+        assert loaded.routines_used[0].routine_name == "TestRoutine"
+        assert loaded.routines_used[0].parameters == {"city": "NYC"}
+
+    def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None:
+        """Same routine_id executed multiple times should appear once."""
+        agent = self._make_agent(tmp_path)
+
+        raw_dir = tmp_path / "raw"
+        raw_dir.mkdir()
+        for i in range(3):
+            (raw_dir / f"result_{i}.json").write_text(json.dumps({
+                "routine_id": "Routine_same",
+                "routine_name": "SameRoutine",
+                "status": "completed",
+                "parameters": {"q": f"query_{i}"},
+                "result": {"ok": True, "data": {}},
+            }))
+
+        result = agent._generate_context(
+            goal="test", summary="test", output_description="test",
+        )
+        json_path = tmp_path / result["context_json"]
+        loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text())
+        assert len(loaded.routines_used) == 1
+
+    def test_agent_provided_routines_not_overridden(self, tmp_path: Path) -> None:
+        """When agent provides routines_used, don't auto-populate."""
+        agent = self._make_agent(tmp_path)
+
+        raw_dir = tmp_path / "raw"
+        raw_dir.mkdir()
+        (raw_dir / "result_1.json").write_text(json.dumps({
+            "routine_id": "Routine_from_raw",
+            "routine_name": "RawRoutine",
+            "status": "completed",
+            "parameters": {},
+            "result": {"ok": True, "data": {}},
+        }))
+
+        result = agent._generate_context(
+            goal="test", summary="test", output_description="test",
+            routines_used=[{
+                "routine_id": "Routine_agent_provided",
+                "routine_name": "AgentRoutine",
+                "parameters": {"x": 1},
+            }],
+        )
+        json_path = tmp_path / result["context_json"]
+        loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text())
+        assert len(loaded.routines_used) == 1
+        assert loaded.routines_used[0].routine_id == "Routine_agent_provided"

From 47a5578ff26d93e5dcdffacc8ca5ed1b95c1b01c Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Sun, 22 Feb 2026 22:15:03 -0500
Subject: [PATCH 07/13] Replace generate_context tool with structured output
 call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the @agent_tool generate_context (LLM kept failing to fill
the schema) and replace it with a direct LLMClient.call_sync using
response_model=BlueBoxAgentContext via OpenAI structured output.

The /generate_context slash command now calls agent.generate_context()
directly instead of prompting the agent loop, using
previous_response_id to preserve conversation context without
polluting the chat history.

Also:
- Rename RoutineUsed → UsedRoutine, add UsedRoutineParameter with
  concrete types (str|bool|int|float) to satisfy OpenAI strict schema
- Create all workspace subdirs (raw/, outputs/, context/) in
  LocalWorkspace.__init__

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/agents/bluebox_agent.py               | 142 +++++++-------
 bluebox/agents/workspace.py                   |   8 +-
 bluebox/data_models/agents/__init__.py        |   4 +-
 bluebox/data_models/agents/context.py         |  51 +++--
 bluebox/scripts/run_bluebox_agent.py          |  56 +++---
 .../unit/agents/test_bluebox_agent_context.py | 177 ++++++++++--------
 tests/unit/test_read_workspace_file.py        |   2 -
 tests/unit/test_workspace.py                  |  11 +-
 8 files changed, 245 insertions(+), 206 deletions(-)

diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py
index 8c6d5d76..fda99365 100644
--- a/bluebox/agents/bluebox_agent.py
+++ b/bluebox/agents/bluebox_agent.py
@@ -22,7 +22,7 @@
 from bluebox.agents.abstract_agent import AbstractAgent, AgentCard, agent_tool
 from bluebox.agents.workspace import AgentWorkspace, LocalWorkspace
 from bluebox.config import Config
-from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed
+from bluebox.data_models.agents.context import BlueBoxAgentContext, UsedRoutine
 from bluebox.data_models.browser_agent import (
     BrowserAgentDoneEvent,
     BrowserAgentErrorEvent,
@@ -131,7 +131,7 @@ class BlueBoxAgent(AbstractAgent):
         - When using `execute_browser_task`, write a specific, step-by-step task description so the browser agent knows exactly what to do.
         - If your first search returns no results, try rephrasing the task description before giving up.
         - Be concise in responses.
-        - After successfully completing a task (output verified and correct), call `generate_context` to save a reusable recipe. **NEVER leave `routines_used` empty** — include every routine that was executed, with exact routine_id, routine_name, and parameter values. Also include `python_code` with the final working snippet.
+        - Be thorough and persistent — keep iterating until the output is correct.
     """).strip()
 
     ## Magic methods
@@ -364,7 +364,7 @@ def _get_context_prompt_section(self) -> str:
 
         return section
 
-    def _extract_routines_from_raw(self) -> list[RoutineUsed]:
+    def _extract_routines_from_raw(self) -> list[UsedRoutine]:
         """Extract routine info from raw/ execution result files.
 
         Each raw JSON file contains routine_id, routine_name, parameters,
@@ -373,7 +373,7 @@ def _extract_routines_from_raw(self) -> list[RoutineUsed]:
         """
         raw_results = self._workspace.load_raw_json()
         seen: set[str] = set()
-        routines: list[RoutineUsed] = []
+        routines: list[UsedRoutine] = []
         for rr in raw_results:
             rid = rr.get("routine_id")
             if not rid or rid in seen:
@@ -382,7 +382,7 @@ def _extract_routines_from_raw(self) -> list[RoutineUsed]:
             if rr.get("status") not in ("completed", None):
                 continue
             seen.add(rid)
-            routines.append(RoutineUsed(
+            routines.append(UsedRoutine.from_dict_params(
                 routine_id=rid,
                 routine_name=rr.get("routine_name", rid),
                 parameters=rr.get("parameters", {}),
@@ -770,88 +770,72 @@ def _read_workspace_file(
         """
         return self._workspace.read_file(path, start_line=start_line, end_line=end_line)
 
-    @agent_tool()
-    def _generate_context(
-        self,
-        goal: str,
-        summary: str,
-        output_description: str,
-        routines_used: list[dict[str, Any]] | None = None,
-        python_code: str | None = None,
-        output_files: list[str] | None = None,
-    ) -> dict[str, Any]:
-        """
-        Save a context file capturing what worked in this session.
+    ## Context generation (structured output, called by TUI slash command)
+
+    def generate_context(self, focus: str | None = None) -> BlueBoxAgentContext:
+        """Generate a context file from the current session using structured output.
 
-        Call this after successfully completing the user's task. The context
-        file lets another BlueBoxAgent instance replicate the successful path
-        without trial and error. Both a JSON file (canonical) and a Markdown
-        file (human-readable) are saved to the context/ directory.
+        Makes a direct LLM call with response_model=BlueBoxAgentContext to get
+        a validated Pydantic model back. Saves both JSON and Markdown files to
+        the workspace context/ directory.
 
         Args:
-            goal: The user's original request, in their own words.
-            summary: 1-2 sentence summary of what was accomplished.
-            output_description: Description of the output: format, key fields,
-                row count if known (e.g. "CSV with 47 rows, columns: name, price, url").
-            routines_used: List of routines that worked. Each dict must have keys:
-                routine_id (str), routine_name (str), and parameters (dict with
-                the parameter values that produced correct results).
-            python_code: The final working Python snippet passed to run_python_code.
-                Omit if no post-processing was needed.
-            output_files: Relative paths of files written to outputs/
-                (e.g. ["outputs/results.csv"]).
-        """
-        try:
-            validated_routines = [
-                RoutineUsed.model_validate(r) for r in (routines_used or [])
-            ]
+            focus: Optional user-provided focus prompt to guide context generation.
+
+        Returns:
+            The generated BlueBoxAgentContext.
 
-            # Auto-populate from raw/ execution results if agent didn't provide routines
-            if not validated_routines:
-                validated_routines = self._extract_routines_from_raw()
-                if validated_routines:
-                    logger.info(
-                        "Auto-populated %d routine(s) from raw/ execution results",
-                        len(validated_routines),
-                    )
-
-            context = BlueBoxAgentContext(
-                goal=goal,
-                summary=summary,
-                output_description=output_description,
-                routines_used=validated_routines,
-                python_code=python_code,
-                output_files=output_files or [],
+        Raises:
+            ValueError: If the LLM fails to produce a valid context.
+        """
+        raw_routines = self._extract_routines_from_raw()
+
+        system_prompt = (
+            "You are analyzing a BlueBox Agent conversation to extract a reusable context file. "
+            "Fill in every field of the BlueBoxAgentContext schema based on the conversation.\n\n"
+            "CRITICAL: routines_used must include every routine that was executed with exact "
+            "routine_id, routine_name, and parameter values.\n"
+            "Include the final working python_code snippet if post-processing was done.\n"
+            "Include output_files with relative paths of files written to outputs/.\n"
+        )
+        if raw_routines:
+            system_prompt += "\nRoutines found in execution results:\n"
+            for r in raw_routines:
+                system_prompt += f"- {r.routine_name} ({r.routine_id}): {json.dumps(r.parameters_as_dict(), default=str)}\n"
+        if focus:
+            system_prompt += f"\nUser focus: {focus}\n"
+
+        # One-off structured output call that sees the full conversation via
+        # OpenAI's response chaining (previous_response_id reconstructs the
+        # thread server-side). We don't update self._previous_response_id
+        # afterward so this call doesn't affect the agent loop.
+        response = self.llm_client.call_sync(
+            input="Generate a reusable context file from this conversation.",
+            system_prompt=system_prompt,
+            response_model=BlueBoxAgentContext,
+            previous_response_id=self._previous_response_id,
+        )
+        context = response.parsed
+        if context is None:
+            raise ValueError("LLM failed to produce a valid BlueBoxAgentContext")
+
+        # Safety net: merge raw routines if LLM left routines_used empty
+        if not context.routines_used and raw_routines:
+            context.routines_used = raw_routines
+            logger.info(
+                "Auto-populated %d routine(s) from raw/ execution results",
+                len(raw_routines),
             )
-        except Exception as e:
-            return {"error": f"Failed to build context: {e}"}
 
         # Save canonical JSON
-        json_content = context.model_dump_json(indent=2)
-        try:
-            json_save = self._workspace.save_file("context", "agent_context", json_content)
-        except Exception as e:
-            logger.exception("Failed to save context JSON: %s", e)
-            return {"error": f"Failed to save context file: {e}"}
+        json_save = self._workspace.save_file(
+            "context", "agent_context", context.model_dump_json(indent=2),
+        )
 
         # Save companion Markdown
-        md_content = context.to_markdown()
-        try:
-            md_save = self._workspace.save_file(
-                "context", "agent_context", md_content, extension=".md",
-            )
-        except Exception as e:
-            logger.warning("Failed to save context Markdown: %s", e)
-            md_save = {"output_file": None}
+        md_save = self._workspace.save_file(
+            "context", "agent_context", context.to_markdown(), extension=".md",
+        )
 
         logger.info("Context files saved: %s, %s", json_save["output_file"], md_save["output_file"])
-        return {
-            "success": True,
-            "context_json": json_save["output_file"],
-            "context_md": md_save["output_file"],
-            "message": (
-                f"Context saved to {json_save['output_file']}. "
-                "A new BlueBoxAgent using this workspace will automatically "
-                "load this context and replicate the successful path."
-            ),
-        }
+        return context
diff --git a/bluebox/agents/workspace.py b/bluebox/agents/workspace.py
index 999c0db8..37e61fe0 100644
--- a/bluebox/agents/workspace.py
+++ b/bluebox/agents/workspace.py
@@ -27,9 +27,10 @@ class AgentWorkspace(ABC):
     """
     Abstract workspace that agents use for file I/O.
 
-    A workspace has two logical subdirectories:
+    A workspace has three logical subdirectories:
     - raw/    : input data (e.g., routine results saved automatically)
     - outputs/: agent-generated output files (e.g., CSVs, processed JSON)
+    - context/: reusable context files from successful sessions
     """
 
     @property
@@ -113,7 +114,7 @@ def diff_outputs(self, before: dict[str, float]) -> list[str]:
 
     @abstractmethod
     def ensure_dirs(self) -> None:
-        """Ensure the workspace directory structure exists (raw/, outputs/)."""
+        """Ensure the workspace directory structure exists (raw/, outputs/, context/)."""
 
 
 class LocalWorkspace(AgentWorkspace):
@@ -123,8 +124,10 @@ def __init__(self, workspace_dir: str = "./bluebox_workspace") -> None:
         self._workspace_dir = Path(workspace_dir)
         self._raw_dir = self._workspace_dir / "raw"
         self._outputs_dir = self._workspace_dir / "outputs"
+        self._context_dir = self._workspace_dir / "context"
         self._execution_counter: int = 0
         self._counter_lock = threading.Lock()
+        self.ensure_dirs()
 
     @property
     def root_path(self) -> Path:
@@ -230,3 +233,4 @@ def diff_outputs(self, before: dict[str, float]) -> list[str]:
     def ensure_dirs(self) -> None:
         self._raw_dir.mkdir(parents=True, exist_ok=True)
         self._outputs_dir.mkdir(parents=True, exist_ok=True)
+        self._context_dir.mkdir(parents=True, exist_ok=True)
diff --git a/bluebox/data_models/agents/__init__.py b/bluebox/data_models/agents/__init__.py
index 94c56aed..663a502b 100644
--- a/bluebox/data_models/agents/__init__.py
+++ b/bluebox/data_models/agents/__init__.py
@@ -1,3 +1,3 @@
-from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed
+from bluebox.data_models.agents.context import BlueBoxAgentContext, UsedRoutine, UsedRoutineParameter
 
-__all__ = ["BlueBoxAgentContext", "RoutineUsed"]
+__all__ = ["BlueBoxAgentContext", "UsedRoutine", "UsedRoutineParameter"]
diff --git a/bluebox/data_models/agents/context.py b/bluebox/data_models/agents/context.py
index ede66f49..f5f2f329 100644
--- a/bluebox/data_models/agents/context.py
+++ b/bluebox/data_models/agents/context.py
@@ -20,16 +20,38 @@
 from pydantic import BaseModel, Field
 
 
-class RoutineUsed(BaseModel):
+class UsedRoutineParameter(BaseModel):
+    """A single parameter key-value pair used in a routine execution."""
+
+    key: str = Field(..., description="Parameter name")
+    value: str | bool | int | float = Field(..., description="Parameter value")
+
+
+class UsedRoutine(BaseModel):
     """One routine that was successfully executed during the session."""
 
     routine_id: str = Field(..., description="Routine ID from search_routines results")
     routine_name: str = Field(..., description="Human-readable routine name")
-    parameters: dict[str, Any] = Field(
-        default_factory=dict,
-        description="Parameter name-to-value mapping that produced correct results",
+    parameters: list[UsedRoutineParameter] = Field(
+        default_factory=list,
+        description="Parameter key-value pairs that produced correct results",
     )
 
+    def parameters_as_dict(self) -> dict[str, str | bool | int | float]:
+        """Convert parameters list to a dict for convenience."""
+        return {p.key: p.value for p in self.parameters}
+
+    @classmethod
+    def from_dict_params(
+        cls, routine_id: str, routine_name: str, parameters: dict[str, Any],
+    ) -> UsedRoutine:
+        """Convenience constructor that accepts a dict of parameters."""
+        return cls(
+            routine_id=routine_id,
+            routine_name=routine_name,
+            parameters=[UsedRoutineParameter(key=k, value=v) for k, v in parameters.items()],
+        )
+
 
 class BlueBoxAgentContext(BaseModel):
     """
@@ -41,7 +63,7 @@ class BlueBoxAgentContext(BaseModel):
 
     version: int = Field(default=1, description="Schema version for forward compatibility")
     goal: str = Field(..., description="The user's original request, in their own words")
-    routines_used: list[RoutineUsed] = Field(
+    routines_used: list[UsedRoutine] = Field(
         default_factory=list,
         description="Routines that produced useful results, in execution order",
     )
@@ -96,7 +118,7 @@ def to_markdown(self) -> str:
                 if r.parameters:
                     lines.append("**Parameters:**")
                     lines.append("```json")
-                    lines.append(json.dumps(r.parameters, indent=2, default=str))
+                    lines.append(json.dumps(r.parameters_as_dict(), indent=2, default=str))
                     lines.append("```")
                 else:
                     lines.append("No parameters.")
@@ -209,9 +231,9 @@ def _extract_fenced_block(text: str, language: str | None = None) -> str | None:
     return None
 
 
-def _parse_routines_section(text: str) -> list[RoutineUsed]:
-    """Parse the Routines Used section into RoutineUsed objects."""
-    routines: list[RoutineUsed] = []
+def _parse_routines_section(text: str) -> list[UsedRoutine]:
+    """Parse the Routines Used section into UsedRoutine objects."""
+    routines: list[UsedRoutine] = []
     if not text.strip():
         return routines
 
@@ -228,18 +250,19 @@ def _parse_routines_section(text: str) -> list[RoutineUsed]:
         routine_id = header_match.group(2).strip()
 
         # Parse parameters from JSON code block
-        parameters: dict[str, Any] = {}
+        param_list: list[UsedRoutineParameter] = []
         params_json = _extract_fenced_block(part, "json")
         if params_json:
             try:
-                parameters = json.loads(params_json)
-            except json.JSONDecodeError:
+                params_dict = json.loads(params_json)
+                param_list = [UsedRoutineParameter(key=k, value=v) for k, v in params_dict.items()]
+            except (json.JSONDecodeError, TypeError):
                 pass
 
-        routines.append(RoutineUsed(
+        routines.append(UsedRoutine(
             routine_id=routine_id,
             routine_name=routine_name,
-            parameters=parameters,
+            parameters=param_list,
         ))
 
     return routines
diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py
index 48358f90..abacd7a7 100644
--- a/bluebox/scripts/run_bluebox_agent.py
+++ b/bluebox/scripts/run_bluebox_agent.py
@@ -32,6 +32,7 @@
 
 from rich.console import Console
 from rich.text import Text
+from textual import work
 from textual.widgets import RichLog
 
 from bluebox.agents.bluebox_agent import BlueBoxAgent
@@ -140,22 +141,6 @@ def _add(p: str) -> None:
 
     # ── Custom slash commands ─────────────────────────────────────────
 
-    _GENERATE_CONTEXT_PROMPT: str = (
-        "Review everything we accomplished in this session and call the `generate_context` tool "
-        "to save a reusable context file.\n\n"
-        "**CRITICAL — you MUST include `routines_used`**. For every routine that was executed, "
-        "provide the exact routine_id, routine_name, and the parameter values that were used. "
-        "Look at the execute_routines_in_parallel calls you made earlier in this conversation. "
-        "Do NOT leave routines_used empty — this is the most important field for replay.\n\n"
-        "Also include:\n"
-        "- The original goal (what I asked for)\n"
-        "- The final working Python post-processing code (the last successful run_python_code call)\n"
-        "- The output files that were created\n"
-        "- A clear description of what the output looks like\n"
-        "- A concise summary of what was accomplished\n\n"
-        "Be thorough and accurate — another agent will use this context to replicate our work."
-    )
-
     def _handle_custom_command(self, cmd: str, raw_input: str) -> bool:
         if raw_input.lower().startswith("/generate_context"):
             chat = self.query_one("#chat-log", RichLog)
@@ -163,21 +148,44 @@ def _handle_custom_command(self, cmd: str, raw_input: str) -> bool:
                 chat.write(Text.from_markup("[red]Agent not initialized.[/red]"))
                 return True
 
-            user_focus = raw_input[len("/generate_context"):].strip()
-            prompt = self._GENERATE_CONTEXT_PROMPT
-            if user_focus:
-                prompt += f"\n\n**User focus:** {user_focus}"
-
+            user_focus = raw_input[len("/generate_context"):].strip() or None
             chat.write(Text.from_markup(
                 "[yellow]Generating context from this session...[/yellow]"
             ))
             self._processing = True
-            self._assistant_header_printed = False
-            self._status_update_printed = False
-            self._send_to_agent(prompt)
+            self._generate_context_async(user_focus)
             return True
         return False
 
+    @work(thread=True)
+    def _generate_context_async(self, focus: str | None) -> None:
+        """Run generate_context in a background thread via structured output."""
+        try:
+            assert isinstance(self._agent, BlueBoxAgent)
+            context = self._agent.generate_context(focus=focus)
+            self.call_from_thread(self._show_context_success, context)
+        except Exception as e:
+            self.call_from_thread(self._show_context_error, str(e))
+
+    def _show_context_success(self, context: Any) -> None:
+        """Display context generation success in the chat pane."""
+        chat = self.query_one("#chat-log", RichLog)
+        chat.write(Text.from_markup(
+            f"[bold green]Context saved![/bold green]\n"
+            f"[dim]Goal:[/dim] {context.goal}\n"
+            f"[dim]Summary:[/dim] {context.summary}\n"
+            f"[dim]Routines:[/dim] {len(context.routines_used)}"
+        ))
+        self._processing = False
+        self._update_status()
+
+    def _show_context_error(self, error: str) -> None:
+        """Display context generation error in the chat pane."""
+        chat = self.query_one("#chat-log", RichLog)
+        chat.write(Text.from_markup(f"[bold red]Context generation failed:[/bold red] {error}"))
+        self._processing = False
+        self._update_status()
+
 
 # ─── Entry point ─────────────────────────────────────────────────────────────
 
diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py
index 1c3d15d0..e2403ee4 100644
--- a/tests/unit/agents/test_bluebox_agent_context.py
+++ b/tests/unit/agents/test_bluebox_agent_context.py
@@ -13,7 +13,7 @@
 
 import pytest
 
-from bluebox.data_models.agents.context import BlueBoxAgentContext, RoutineUsed
+from bluebox.data_models.agents.context import BlueBoxAgentContext, UsedRoutine, UsedRoutineParameter
 
 
 # =============================================================================
@@ -28,12 +28,12 @@ def sample_context() -> BlueBoxAgentContext:
         version=1,
         goal="Find one-way train tickets from NYC to Boston on March 15, 2026",
         routines_used=[
-            RoutineUsed(
+            UsedRoutine.from_dict_params(
                 routine_id="Routine_abc123",
                 routine_name="AmtrakOneWaySearch",
                 parameters={"origin": "New York", "destination": "Boston", "date": "2026-03-15"},
             ),
-            RoutineUsed(
+            UsedRoutine.from_dict_params(
                 routine_id="Routine_def456",
                 routine_name="AmtrakPriceFilter",
                 parameters={"max_price": 100},
@@ -86,7 +86,7 @@ def test_json_roundtrip(self, sample_context: BlueBoxAgentContext) -> None:
         assert restored.output_files == sample_context.output_files
         assert len(restored.routines_used) == 2
         assert restored.routines_used[0].routine_id == "Routine_abc123"
-        assert restored.routines_used[1].parameters == {"max_price": 100}
+        assert restored.routines_used[1].parameters_as_dict() == {"max_price": 100}
         assert isinstance(restored.generated_at, datetime)
 
     def test_version_defaults_to_1(self, minimal_context: BlueBoxAgentContext) -> None:
@@ -149,7 +149,7 @@ def test_from_markdown_roundtrip(self, sample_context: BlueBoxAgentContext) -> N
         for orig, rest in zip(sample_context.routines_used, restored.routines_used):
             assert rest.routine_id == orig.routine_id
             assert rest.routine_name == orig.routine_name
-            assert rest.parameters == orig.parameters
+            assert rest.parameters_as_dict() == orig.parameters_as_dict()
 
     def test_from_markdown_no_python_code(self, minimal_context: BlueBoxAgentContext) -> None:
         """Markdown with no Python Code section should parse python_code as None."""
@@ -344,12 +344,12 @@ def test_no_context_no_section(self, tmp_path: Path) -> None:
 
 
 # =============================================================================
-# generate_context tool tests
+# generate_context (structured output) tests
 # =============================================================================
 
 
-class TestGenerateContextTool:
-    """Tests for the _generate_context agent tool."""
+class TestGenerateContext:
+    """Tests for the generate_context public method (structured output)."""
 
     def _make_agent(self, tmp_path: Path) -> Any:
         from bluebox.agents.bluebox_agent import BlueBoxAgent
@@ -361,65 +361,70 @@ def _make_agent(self, tmp_path: Path) -> Any:
             auth_headers_provider=lambda: {"X-Service-Token": "test"},
         )
 
-    def test_tool_is_registered(self) -> None:
+    def _mock_llm_response(self, context: BlueBoxAgentContext) -> MagicMock:
+        """Create a mock LLMChatResponse with parsed context."""
+        response = MagicMock()
+        response.parsed = context
+        return response
+
+    def test_tool_is_not_registered(self) -> None:
+        """generate_context should NOT be an agent tool anymore."""
         from bluebox.agents.bluebox_agent import BlueBoxAgent
         tools = BlueBoxAgent._collect_tools()
         tool_names = [meta.name for meta, _ in tools]
-        assert "generate_context" in tool_names
+        assert "generate_context" not in tool_names
 
     def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentContext) -> None:
         agent = self._make_agent(tmp_path)
-        result = agent._generate_context(
-            goal=sample_context.goal,
-            summary=sample_context.summary,
-            output_description=sample_context.output_description,
-            routines_used=[r.model_dump() for r in sample_context.routines_used],
-            python_code=sample_context.python_code,
-            output_files=sample_context.output_files,
-        )
+        agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(sample_context))
+
+        result = agent.generate_context()
 
-        assert result["success"] is True
-        assert result["context_json"] is not None
-        assert result["context_md"] is not None
+        assert result.goal == sample_context.goal
+        assert result.summary == sample_context.summary
 
-        # Verify JSON file exists and is valid
-        json_path = tmp_path / result["context_json"]
-        assert json_path.is_file()
-        loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text())
+        # Verify both JSON and MD files were saved
+        context_dir = tmp_path / "context"
+        json_files = list(context_dir.glob("*.json"))
+        md_files = list(context_dir.glob("*.md"))
+        assert len(json_files) == 1
+        assert len(md_files) == 1
+
+        # Verify JSON is valid
+        loaded = BlueBoxAgentContext.model_validate_json(json_files[0].read_text())
         assert loaded.goal == sample_context.goal
 
-        # Verify MD file exists
-        md_path = tmp_path / result["context_md"]
-        assert md_path.is_file()
-        assert "## Goal" in md_path.read_text()
+        # Verify MD has expected sections
+        assert "## Goal" in md_files[0].read_text()
 
     def test_saves_to_context_subdirectory(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None:
         agent = self._make_agent(tmp_path)
-        result = agent._generate_context(
-            goal=minimal_context.goal,
-            summary=minimal_context.summary,
-            output_description=minimal_context.output_description,
-        )
-        assert "context/" in result["context_json"]
-        assert "context/" in result["context_md"]
+        agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(minimal_context))
 
-    def test_validates_bad_routines_used(self, tmp_path: Path) -> None:
+        agent.generate_context()
+
+        context_dir = tmp_path / "context"
+        assert context_dir.is_dir()
+        assert len(list(context_dir.glob("*.json"))) == 1
+        assert len(list(context_dir.glob("*.md"))) == 1
+
+    def test_raises_on_none_parsed(self, tmp_path: Path) -> None:
+        """Should raise ValueError when LLM returns None parsed result."""
         agent = self._make_agent(tmp_path)
-        result = agent._generate_context(
-            goal="test",
-            summary="test",
-            output_description="test",
-            routines_used=[{"bad_key": "missing routine_id"}],
-        )
-        assert "error" in result
+        response = MagicMock()
+        response.parsed = None
+        agent.llm_client.call_sync = MagicMock(return_value=response)
+
+        with pytest.raises(ValueError, match="failed to produce"):
+            agent.generate_context()
 
     def test_auto_populates_routines_from_raw(self, tmp_path: Path) -> None:
-        """When routines_used is empty, auto-populate from raw/ execution results."""
+        """When LLM returns empty routines_used, auto-populate from raw/."""
         agent = self._make_agent(tmp_path)
 
         # Write a fake routine result to raw/
         raw_dir = tmp_path / "raw"
-        raw_dir.mkdir()
+        raw_dir.mkdir(exist_ok=True)
         (raw_dir / "result_1.json").write_text(json.dumps({
             "routine_id": "Routine_abc",
             "routine_name": "TestRoutine",
@@ -428,28 +433,28 @@ def test_auto_populates_routines_from_raw(self, tmp_path: Path) -> None:
             "result": {"ok": True, "data": {}},
         }))
 
-        result = agent._generate_context(
+        # LLM returns context with empty routines_used
+        context_from_llm = BlueBoxAgentContext(
             goal="test goal",
             summary="test summary",
             output_description="test output",
-            # routines_used intentionally omitted
+            routines_used=[],
         )
+        agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm))
 
-        assert result["success"] is True
-        # Verify the saved context has the routine from raw/
-        json_path = tmp_path / result["context_json"]
-        loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text())
-        assert len(loaded.routines_used) == 1
-        assert loaded.routines_used[0].routine_id == "Routine_abc"
-        assert loaded.routines_used[0].routine_name == "TestRoutine"
-        assert loaded.routines_used[0].parameters == {"city": "NYC"}
+        result = agent.generate_context()
+
+        assert len(result.routines_used) == 1
+        assert result.routines_used[0].routine_id == "Routine_abc"
+        assert result.routines_used[0].routine_name == "TestRoutine"
+        assert result.routines_used[0].parameters_as_dict() == {"city": "NYC"}
 
     def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None:
         """Same routine_id executed multiple times should appear once."""
         agent = self._make_agent(tmp_path)
 
         raw_dir = tmp_path / "raw"
-        raw_dir.mkdir()
+        raw_dir.mkdir(exist_ok=True)
         for i in range(3):
             (raw_dir / f"result_{i}.json").write_text(json.dumps({
                 "routine_id": "Routine_same",
@@ -459,19 +464,21 @@ def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None:
                 "result": {"ok": True, "data": {}},
             }))
 
-        result = agent._generate_context(
+        context_from_llm = BlueBoxAgentContext(
             goal="test", summary="test", output_description="test",
+            routines_used=[],
         )
-        json_path = tmp_path / result["context_json"]
-        loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text())
-        assert len(loaded.routines_used) == 1
+        agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm))
+
+        result = agent.generate_context()
+        assert len(result.routines_used) == 1
 
-    def test_agent_provided_routines_not_overridden(self, tmp_path: Path) -> None:
-        """When agent provides routines_used, don't auto-populate."""
+    def test_llm_provided_routines_not_overridden(self, tmp_path: Path) -> None:
+        """When LLM provides routines_used, don't auto-populate from raw/."""
         agent = self._make_agent(tmp_path)
 
         raw_dir = tmp_path / "raw"
-        raw_dir.mkdir()
+        raw_dir.mkdir(exist_ok=True)
         (raw_dir / "result_1.json").write_text(json.dumps({
             "routine_id": "Routine_from_raw",
             "routine_name": "RawRoutine",
@@ -480,15 +487,37 @@ def test_agent_provided_routines_not_overridden(self, tmp_path: Path) -> None:
             "result": {"ok": True, "data": {}},
         }))
 
-        result = agent._generate_context(
+        context_from_llm = BlueBoxAgentContext(
             goal="test", summary="test", output_description="test",
-            routines_used=[{
-                "routine_id": "Routine_agent_provided",
-                "routine_name": "AgentRoutine",
-                "parameters": {"x": 1},
-            }],
+            routines_used=[UsedRoutine.from_dict_params(
+                routine_id="Routine_llm_provided",
+                routine_name="LLMRoutine",
+                parameters={"x": 1},
+            )],
         )
-        json_path = tmp_path / result["context_json"]
-        loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text())
-        assert len(loaded.routines_used) == 1
-        assert loaded.routines_used[0].routine_id == "Routine_agent_provided"
+        agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm))
+
+        result = agent.generate_context()
+        assert len(result.routines_used) == 1
+        assert result.routines_used[0].routine_id == "Routine_llm_provided"
+
+    def test_passes_focus_to_system_prompt(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None:
+        """Focus text should be included in the system prompt sent to LLM."""
+        agent = self._make_agent(tmp_path)
+        agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(minimal_context))
+
+        agent.generate_context(focus="focus on the flight search part")
+
+        call_kwargs = agent.llm_client.call_sync.call_args
+        system_prompt = call_kwargs.kwargs.get("system_prompt") or call_kwargs[1].get("system_prompt", "")
+        assert "focus on the flight search part" in system_prompt
+
+    def test_passes_response_model(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None:
+        """Should call llm_client.call_sync with response_model=BlueBoxAgentContext."""
+        agent = self._make_agent(tmp_path)
+        agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(minimal_context))
+
+        agent.generate_context()
+
+        call_kwargs = agent.llm_client.call_sync.call_args
+        assert call_kwargs.kwargs.get("response_model") is BlueBoxAgentContext
diff --git a/tests/unit/test_read_workspace_file.py b/tests/unit/test_read_workspace_file.py
index a10bee9b..a38a237f 100644
--- a/tests/unit/test_read_workspace_file.py
+++ b/tests/unit/test_read_workspace_file.py
@@ -28,7 +28,6 @@ class TestPathTraversalPrevention:
     def test_parent_traversal_blocked(self, tmp_path: Path) -> None:
         """../  should be denied."""
         ws = _make_workspace(tmp_path / "workspace")
-        ws.root_path.mkdir()
         result = _call(ws, "../../../etc/passwd")
         assert "error" in result
         assert "Access denied" in result["error"]
@@ -36,7 +35,6 @@ def test_parent_traversal_blocked(self, tmp_path: Path) -> None:
     def test_absolute_path_outside_blocked(self, tmp_path: Path) -> None:
         """/etc/passwd should be denied."""
         ws = _make_workspace(tmp_path / "workspace")
-        ws.root_path.mkdir()
         result = _call(ws, "/etc/passwd")
         assert "error" in result
         assert "Access denied" in result["error"]
diff --git a/tests/unit/test_workspace.py b/tests/unit/test_workspace.py
index 7aa0b38c..d6db96e9 100644
--- a/tests/unit/test_workspace.py
+++ b/tests/unit/test_workspace.py
@@ -90,9 +90,7 @@ def test_empty_workspace(self, tmp_path: Path) -> None:
 
     def test_lists_files_in_subdirs(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))
-        (tmp_path / "raw").mkdir()
         (tmp_path / "raw" / "result.json").write_text("{}")
-        (tmp_path / "outputs").mkdir()
         (tmp_path / "outputs" / "out.csv").write_text("a,b")
         result = ws.list_files()
         assert result["total_files"] == 2
@@ -106,7 +104,6 @@ class TestLoadRawJson:
     def test_loads_json_files(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))
         raw = tmp_path / "raw"
-        raw.mkdir()
         (raw / "a.json").write_text('{"key": "a"}')
         (raw / "b.json").write_text('{"key": "b"}')
         results = ws.load_raw_json()
@@ -117,7 +114,6 @@ def test_loads_json_files(self, tmp_path: Path) -> None:
     def test_skips_invalid_json(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))
         raw = tmp_path / "raw"
-        raw.mkdir()
         (raw / "good.json").write_text('{"ok": true}')
         (raw / "bad.json").write_text("not json")
         results = ws.load_raw_json()
@@ -134,7 +130,6 @@ class TestSnapshotAndDiffOutputs:
 
     def test_detects_new_file(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))
-        (tmp_path / "outputs").mkdir()
         before = ws.snapshot_outputs()
         (tmp_path / "outputs" / "new.csv").write_text("data")
         changed = ws.diff_outputs(before)
@@ -144,7 +139,6 @@ def test_detects_new_file(self, tmp_path: Path) -> None:
     def test_detects_modified_file(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))
         outputs = tmp_path / "outputs"
-        outputs.mkdir()
         f = outputs / "existing.csv"
         f.write_text("old")
         before = ws.snapshot_outputs()
@@ -156,7 +150,6 @@ def test_detects_modified_file(self, tmp_path: Path) -> None:
     def test_no_changes(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))
         outputs = tmp_path / "outputs"
-        outputs.mkdir()
         (outputs / "stable.csv").write_text("data")
         before = ws.snapshot_outputs()
         changed = ws.diff_outputs(before)
@@ -166,11 +159,11 @@ def test_no_changes(self, tmp_path: Path) -> None:
 class TestEnsureDirs:
     """Tests for LocalWorkspace.ensure_dirs."""
 
-    def test_creates_raw_and_outputs(self, tmp_path: Path) -> None:
+    def test_creates_raw_outputs_and_context(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path / "new_workspace"))
-        ws.ensure_dirs()
         assert (tmp_path / "new_workspace" / "raw").is_dir()
         assert (tmp_path / "new_workspace" / "outputs").is_dir()
+        assert (tmp_path / "new_workspace" / "context").is_dir()
 
     def test_idempotent(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))

From 52d4ee5b139cd06637e8eb307a0ce33619dbc01a Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Sun, 22 Feb 2026 22:23:40 -0500
Subject: [PATCH 08/13] Clean up review feedback from context generation PR

- Remove unused re-exports from data_models/agents/__init__.py
- Fix import sys grouping in run_bluebox_agent.py
- Auto-discover .md context files (prefer .json, fall back to .md)
- Tighten _extract_routines_from_raw to only accept status=="completed"
- Type _show_context_success param as BlueBoxAgentContext instead of Any
- Replace bare assert isinstance with explicit TypeError
- Replace flaky time.sleep with os.utime in test_auto_discovers_most_recent

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/agents/bluebox_agent.py                | 18 ++++++++++++------
 bluebox/data_models/agents/__init__.py         |  3 ---
 bluebox/scripts/run_bluebox_agent.py           |  8 +++++---
 .../unit/agents/test_bluebox_agent_context.py  | 10 ++++++----
 4 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py
index fda99365..042ee036 100644
--- a/bluebox/agents/bluebox_agent.py
+++ b/bluebox/agents/bluebox_agent.py
@@ -333,14 +333,20 @@ def _load_context_from_path(self, context_file: str) -> BlueBoxAgentContext | No
             return None
 
     def _auto_discover_context(self) -> BlueBoxAgentContext | None:
-        """Find and load the most recent .json context file from workspace context/ dir."""
+        """Find and load the most recent context file from workspace context/ dir.
+
+        Prefers .json files over .md when both exist. Falls back to .md if no
+        JSON context files are present.
+        """
         context_dir = self._workspace.root_path / "context"
         if not context_dir.is_dir():
             return None
-        json_files = sorted(context_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True)
-        if not json_files:
-            return None
-        return self._load_context_from_path(str(json_files[0]))
+        # Prefer JSON, fall back to Markdown
+        for ext in ("*.json", "*.md"):
+            files = sorted(context_dir.glob(ext), key=lambda p: p.stat().st_mtime, reverse=True)
+            if files:
+                return self._load_context_from_path(str(files[0]))
+        return None
 
     def _get_context_prompt_section(self) -> str:
         """Build a system prompt section from a loaded BlueBoxAgentContext."""
@@ -379,7 +385,7 @@ def _extract_routines_from_raw(self) -> list[UsedRoutine]:
             if not rid or rid in seen:
                 continue
             # Only include completed executions
-            if rr.get("status") not in ("completed", None):
+            if rr.get("status") != "completed":
                 continue
             seen.add(rid)
             routines.append(UsedRoutine.from_dict_params(
diff --git a/bluebox/data_models/agents/__init__.py b/bluebox/data_models/agents/__init__.py
index 663a502b..e69de29b 100644
--- a/bluebox/data_models/agents/__init__.py
+++ b/bluebox/data_models/agents/__init__.py
@@ -1,3 +0,0 @@
-from bluebox.data_models.agents.context import BlueBoxAgentContext, UsedRoutine, UsedRoutineParameter
-
-__all__ = ["BlueBoxAgentContext", "UsedRoutine", "UsedRoutineParameter"]
diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py
index abacd7a7..4b316bdf 100644
--- a/bluebox/scripts/run_bluebox_agent.py
+++ b/bluebox/scripts/run_bluebox_agent.py
@@ -22,10 +22,10 @@
 
 import argparse
 import shutil
+import sys
 from datetime import datetime
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
-import sys
 
 from bluebox.utils.code_execution_sandbox import is_docker_available
 from bluebox.utils.terminal_utils import ask_yes_no, print_colored, YELLOW
@@ -38,6 +38,7 @@
 from bluebox.agents.bluebox_agent import BlueBoxAgent
 from bluebox.agents.workspace import LocalWorkspace
 from bluebox.config import Config
+from bluebox.data_models.agents.context import BlueBoxAgentContext
 from bluebox.data_models.llms.vendors import LLMModel
 from bluebox.utils.cli_utils import add_model_argument, resolve_model
 from bluebox.utils.logger import enable_tui_logging
@@ -161,13 +162,14 @@ def _handle_custom_command(self, cmd: str, raw_input: str) -> bool:
     def _generate_context_async(self, focus: str | None) -> None:
         """Run generate_context in a background thread via structured output."""
         try:
-            assert isinstance(self._agent, BlueBoxAgent)
+            if not isinstance(self._agent, BlueBoxAgent):
+                raise TypeError(f"Expected BlueBoxAgent, got {type(self._agent).__name__}")
             context = self._agent.generate_context(focus=focus)
             self.call_from_thread(self._show_context_success, context)
         except Exception as e:
             self.call_from_thread(self._show_context_error, str(e))
 
-    def _show_context_success(self, context: Any) -> None:
+    def _show_context_success(self, context: BlueBoxAgentContext) -> None:
         """Display context generation success in the chat pane."""
         chat = self.query_one("#chat-log", RichLog)
         chat.write(Text.from_markup(
diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py
index e2403ee4..a044108c 100644
--- a/tests/unit/agents/test_bluebox_agent_context.py
+++ b/tests/unit/agents/test_bluebox_agent_context.py
@@ -6,6 +6,8 @@
 """
 
 import json
+import os
+import time
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
@@ -232,14 +234,14 @@ def test_auto_discovers_from_workspace(self, tmp_path: Path, sample_context: Blu
 
     def test_auto_discovers_most_recent(self, tmp_path: Path) -> None:
         """When multiple context files exist, loads the most recently modified."""
-        import time
-
         context_dir = tmp_path / "context"
         context_dir.mkdir()
 
         old = BlueBoxAgentContext(goal="old goal", output_description="old", summary="old")
-        (context_dir / "old.json").write_text(old.model_dump_json())
-        time.sleep(0.05)  # ensure mtime differs
+        old_file = context_dir / "old.json"
+        old_file.write_text(old.model_dump_json())
+        past = time.time() - 10
+        os.utime(old_file, (past, past))  # force mtime 10s in the past
 
         new = BlueBoxAgentContext(goal="new goal", output_description="new", summary="new")
         (context_dir / "new.json").write_text(new.model_dump_json())

From 5a267795cecf659696fe042758e906957c568177 Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Sun, 22 Feb 2026 22:36:33 -0500
Subject: [PATCH 09/13] Move lazy imports to top-level, fix return types, add
 roundtrip timestamp assertion

- Move BlueBoxAgent/LocalWorkspace imports to file top (CLAUDE.md: no lazy imports)
- Fix _make_agent return types from Any to BlueBoxAgent
- Remove unused Any import
- Add generated_at assertion in test_from_markdown_roundtrip
- Update save_file docstring to include "context" subdirectory

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/agents/workspace.py                   |  2 +-
 .../unit/agents/test_bluebox_agent_context.py | 23 +++++--------------
 2 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/bluebox/agents/workspace.py b/bluebox/agents/workspace.py
index 37e61fe0..a17863ff 100644
--- a/bluebox/agents/workspace.py
+++ b/bluebox/agents/workspace.py
@@ -49,7 +49,7 @@ def save_file(
         """Save content with a unique timestamped filename.
 
         Args:
-            subdirectory: Logical subdirectory ("raw" or "outputs").
+            subdirectory: Logical subdirectory ("raw", "outputs", or "context").
             filename_prefix: Prefix for the generated filename.
             content: File content to write.
             extension: File extension including the dot.
diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py
index a044108c..af2dfb2a 100644
--- a/tests/unit/agents/test_bluebox_agent_context.py
+++ b/tests/unit/agents/test_bluebox_agent_context.py
@@ -10,11 +10,12 @@
 import time
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any
 from unittest.mock import MagicMock
 
 import pytest
 
+from bluebox.agents.bluebox_agent import BlueBoxAgent
+from bluebox.agents.workspace import LocalWorkspace
 from bluebox.data_models.agents.context import BlueBoxAgentContext, UsedRoutine, UsedRoutineParameter
 
 
@@ -152,6 +153,7 @@ def test_from_markdown_roundtrip(self, sample_context: BlueBoxAgentContext) -> N
             assert rest.routine_id == orig.routine_id
             assert rest.routine_name == orig.routine_name
             assert rest.parameters_as_dict() == orig.parameters_as_dict()
+        assert restored.generated_at == sample_context.generated_at
 
     def test_from_markdown_no_python_code(self, minimal_context: BlueBoxAgentContext) -> None:
         """Markdown with no Python Code section should parse python_code as None."""
@@ -184,11 +186,8 @@ def _make_agent(
         self,
         workspace_dir: Path,
         context_file: str | None = None,
-    ) -> Any:
+    ) -> BlueBoxAgent:
         """Create a BlueBoxAgent with mocked dependencies."""
-        from bluebox.agents.bluebox_agent import BlueBoxAgent
-        from bluebox.agents.workspace import LocalWorkspace
-
         return BlueBoxAgent(
             emit_message_callable=MagicMock(),
             workspace=LocalWorkspace(str(workspace_dir)),
@@ -290,10 +289,7 @@ def test_no_context_dir_no_error(self, tmp_path: Path) -> None:
 class TestContextPromptInjection:
     """Tests for _get_context_prompt_section and system prompt integration."""
 
-    def _make_agent(self, tmp_path: Path, context: BlueBoxAgentContext) -> Any:
-        from bluebox.agents.bluebox_agent import BlueBoxAgent
-        from bluebox.agents.workspace import LocalWorkspace
-
+    def _make_agent(self, tmp_path: Path, context: BlueBoxAgentContext) -> BlueBoxAgent:
         ctx_file = tmp_path / "context.json"
         ctx_file.write_text(context.model_dump_json(indent=2))
 
@@ -333,9 +329,6 @@ def test_context_section_truncation(self, tmp_path: Path) -> None:
         assert "read_workspace_file" in section
 
     def test_no_context_no_section(self, tmp_path: Path) -> None:
-        from bluebox.agents.bluebox_agent import BlueBoxAgent
-        from bluebox.agents.workspace import LocalWorkspace
-
         agent = BlueBoxAgent(
             emit_message_callable=MagicMock(),
             workspace=LocalWorkspace(str(tmp_path)),
@@ -353,10 +346,7 @@ def test_no_context_no_section(self, tmp_path: Path) -> None:
 class TestGenerateContext:
     """Tests for the generate_context public method (structured output)."""
 
-    def _make_agent(self, tmp_path: Path) -> Any:
-        from bluebox.agents.bluebox_agent import BlueBoxAgent
-        from bluebox.agents.workspace import LocalWorkspace
-
+    def _make_agent(self, tmp_path: Path) -> BlueBoxAgent:
         return BlueBoxAgent(
             emit_message_callable=MagicMock(),
             workspace=LocalWorkspace(str(tmp_path)),
@@ -371,7 +361,6 @@ def _mock_llm_response(self, context: BlueBoxAgentContext) -> MagicMock:
 
     def test_tool_is_not_registered(self) -> None:
         """generate_context should NOT be an agent tool anymore."""
-        from bluebox.agents.bluebox_agent import BlueBoxAgent
         tools = BlueBoxAgent._collect_tools()
         tool_names = [meta.name for meta, _ in tools]
         assert "generate_context" not in tool_names

From 835eb9304c31d503c8be4960a68174848b9c44b6 Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Sun, 22 Feb 2026 23:32:15 -0500
Subject: [PATCH 10/13] Show context file paths on save, show loaded context in
 TUI welcome

- generate_context now returns GenerateContextResult (NamedTuple) with
  context, json_path, and md_path so callers can display file locations
- TUI _show_context_success displays the saved JSON and Markdown paths
- TUI _print_welcome shows loaded context goal and routine count when
  a context file was auto-discovered or explicitly loaded
- Add loaded_context property on BlueBoxAgent to expose context state

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/agents/bluebox_agent.py               | 27 ++++++++++--
 bluebox/scripts/run_bluebox_agent.py          | 21 ++++++----
 .../unit/agents/test_bluebox_agent_context.py | 41 +++++++++----------
 3 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py
index 042ee036..2e7ade15 100644
--- a/bluebox/agents/bluebox_agent.py
+++ b/bluebox/agents/bluebox_agent.py
@@ -15,7 +15,7 @@
 from datetime import datetime
 from pathlib import Path
 from textwrap import dedent
-from typing import Any, Callable
+from typing import Any, Callable, NamedTuple
 
 import requests
 
@@ -52,6 +52,14 @@
 logger = get_logger(name=__name__)
 
 
+class GenerateContextResult(NamedTuple):
+    """Return value from BlueBoxAgent.generate_context."""
+
+    context: BlueBoxAgentContext
+    json_path: str
+    md_path: str
+
+
 class BlueBoxAgent(AbstractAgent):
     """
     BlueBoxAgent that searches and executes web automation routines.
@@ -204,6 +212,13 @@ def __init__(
             self._agent_context is not None,
         )
 
+    ## Properties
+
+    @property
+    def loaded_context(self) -> BlueBoxAgentContext | None:
+        """The context loaded on init, if any."""
+        return self._agent_context
+
     ## Auth
 
     def _get_auth_headers(self) -> dict[str, str]:
@@ -778,7 +793,7 @@ def _read_workspace_file(
 
     ## Context generation (structured output, called by TUI slash command)
 
-    def generate_context(self, focus: str | None = None) -> BlueBoxAgentContext:
+    def generate_context(self, focus: str | None = None) -> GenerateContextResult:
         """Generate a context file from the current session using structured output.
 
         Makes a direct LLM call with response_model=BlueBoxAgentContext to get
@@ -789,7 +804,7 @@ def generate_context(self, focus: str | None = None) -> BlueBoxAgentContext:
             focus: Optional user-provided focus prompt to guide context generation.
 
         Returns:
-            The generated BlueBoxAgentContext.
+            GenerateContextResult with the context and saved file paths.
 
         Raises:
             ValueError: If the LLM fails to produce a valid context.
@@ -844,4 +859,8 @@ def generate_context(self, focus: str | None = None) -> BlueBoxAgentContext:
         )
 
         logger.info("Context files saved: %s, %s", json_save["output_file"], md_save["output_file"])
-        return context
+        return GenerateContextResult(
+            context=context,
+            json_path=json_save["output_file"],
+            md_path=md_save["output_file"],
+        )
diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py
index 4b316bdf..88931697 100644
--- a/bluebox/scripts/run_bluebox_agent.py
+++ b/bluebox/scripts/run_bluebox_agent.py
@@ -35,10 +35,9 @@
 from textual import work
 from textual.widgets import RichLog
 
-from bluebox.agents.bluebox_agent import BlueBoxAgent
+from bluebox.agents.bluebox_agent import BlueBoxAgent, GenerateContextResult
 from bluebox.agents.workspace import LocalWorkspace
 from bluebox.config import Config
-from bluebox.data_models.agents.context import BlueBoxAgentContext
 from bluebox.data_models.llms.vendors import LLMModel
 from bluebox.utils.cli_utils import add_model_argument, resolve_model
 from bluebox.utils.logger import enable_tui_logging
@@ -94,6 +93,10 @@ def _print_welcome(self) -> None:
         lines = [
             f"[dim]Model:[/dim]       {self._llm_model.value}",
         ]
+        if isinstance(self._agent, BlueBoxAgent) and self._agent.loaded_context:
+            ctx = self._agent.loaded_context
+            lines.append(f"[dim]Context:[/dim]     [green]loaded[/green] — {ctx.goal[:60]}")
+            lines.append(f"[dim]             {len(ctx.routines_used)} routine(s), {len(ctx.output_files)} output file(s)[/dim]")
         chat.write(Text.from_markup("\n".join(lines)))
         chat.write("")
 
@@ -164,19 +167,21 @@ def _generate_context_async(self, focus: str | None) -> None:
         try:
             if not isinstance(self._agent, BlueBoxAgent):
                 raise TypeError(f"Expected BlueBoxAgent, got {type(self._agent).__name__}")
-            context = self._agent.generate_context(focus=focus)
-            self.call_from_thread(self._show_context_success, context)
+            result = self._agent.generate_context(focus=focus)
+            self.call_from_thread(self._show_context_success, result)
         except Exception as e:
             self.call_from_thread(self._show_context_error, str(e))
 
-    def _show_context_success(self, context: BlueBoxAgentContext) -> None:
+    def _show_context_success(self, result: GenerateContextResult) -> None:
         """Display context generation success in the chat pane."""
         chat = self.query_one("#chat-log", RichLog)
         chat.write(Text.from_markup(
             f"[bold green]Context saved![/bold green]\n"
-            f"[dim]Goal:[/dim] {context.goal}\n"
-            f"[dim]Summary:[/dim] {context.summary}\n"
-            f"[dim]Routines:[/dim] {len(context.routines_used)}"
+            f"[dim]Goal:[/dim]      {result.context.goal}\n"
+            f"[dim]Summary:[/dim]   {result.context.summary}\n"
+            f"[dim]Routines:[/dim]  {len(result.context.routines_used)}\n"
+            f"[dim]JSON:[/dim]      {result.json_path}\n"
+            f"[dim]Markdown:[/dim]  {result.md_path}"
         ))
         self._processing = False
         self._update_status()
diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py
index af2dfb2a..c651af8f 100644
--- a/tests/unit/agents/test_bluebox_agent_context.py
+++ b/tests/unit/agents/test_bluebox_agent_context.py
@@ -371,22 +371,21 @@ def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentCont
 
         result = agent.generate_context()
 
-        assert result.goal == sample_context.goal
-        assert result.summary == sample_context.summary
-
-        # Verify both JSON and MD files were saved
-        context_dir = tmp_path / "context"
-        json_files = list(context_dir.glob("*.json"))
-        md_files = list(context_dir.glob("*.md"))
-        assert len(json_files) == 1
-        assert len(md_files) == 1
-
-        # Verify JSON is valid
-        loaded = BlueBoxAgentContext.model_validate_json(json_files[0].read_text())
+        assert result.context.goal == sample_context.goal
+        assert result.context.summary == sample_context.summary
+        assert "context/" in result.json_path
+        assert "context/" in result.md_path
+
+        # Verify JSON file exists and is valid
+        json_path = Path(result.json_path)
+        assert json_path.is_file()
+        loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text())
         assert loaded.goal == sample_context.goal
 
-        # Verify MD has expected sections
-        assert "## Goal" in md_files[0].read_text()
+        # Verify MD file exists with expected sections
+        md_path = Path(result.md_path)
+        assert md_path.is_file()
+        assert "## Goal" in md_path.read_text()
 
     def test_saves_to_context_subdirectory(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None:
         agent = self._make_agent(tmp_path)
@@ -435,10 +434,10 @@ def test_auto_populates_routines_from_raw(self, tmp_path: Path) -> None:
 
         result = agent.generate_context()
 
-        assert len(result.routines_used) == 1
-        assert result.routines_used[0].routine_id == "Routine_abc"
-        assert result.routines_used[0].routine_name == "TestRoutine"
-        assert result.routines_used[0].parameters_as_dict() == {"city": "NYC"}
+        assert len(result.context.routines_used) == 1
+        assert result.context.routines_used[0].routine_id == "Routine_abc"
+        assert result.context.routines_used[0].routine_name == "TestRoutine"
+        assert result.context.routines_used[0].parameters_as_dict() == {"city": "NYC"}
 
     def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None:
         """Same routine_id executed multiple times should appear once."""
@@ -462,7 +461,7 @@ def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None:
         agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm))
 
         result = agent.generate_context()
-        assert len(result.routines_used) == 1
+        assert len(result.context.routines_used) == 1
 
     def test_llm_provided_routines_not_overridden(self, tmp_path: Path) -> None:
         """When LLM provides routines_used, don't auto-populate from raw/."""
@@ -489,8 +488,8 @@ def test_llm_provided_routines_not_overridden(self, tmp_path: Path) -> None:
         agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm))
 
         result = agent.generate_context()
-        assert len(result.routines_used) == 1
-        assert result.routines_used[0].routine_id == "Routine_llm_provided"
+        assert len(result.context.routines_used) == 1
+        assert result.context.routines_used[0].routine_id == "Routine_llm_provided"
 
     def test_passes_focus_to_system_prompt(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None:
         """Focus text should be included in the system prompt sent to LLM."""

From 7f5f9df16e65991f6cdd0b3128bad5191846c772 Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Mon, 23 Feb 2026 01:48:44 -0500
Subject: [PATCH 11/13] Simplify save_file to take direct filename, dispatch
 context to Saved Files panel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Callers now build their own filenames — routine results use timestamps,
context files use fixed names (agent_context.json/md). Context files are
dispatched to the TUI Saved Files pane via _add_saved_file().

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/agents/bluebox_agent.py      | 10 ++++++----
 bluebox/agents/workspace.py          | 20 ++++----------------
 bluebox/scripts/run_bluebox_agent.py |  2 ++
 tests/unit/test_workspace.py         | 21 +++++++++++----------
 4 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py
index 2e7ade15..3c677d94 100644
--- a/bluebox/agents/bluebox_agent.py
+++ b/bluebox/agents/bluebox_agent.py
@@ -469,8 +469,9 @@ def _execute_routines_in_parallel(
         def save_result(result: dict[str, Any]) -> dict[str, Any]:
             """Save a single routine result to a JSON file in raw/."""
             try:
+                ts = datetime.now().strftime("%y-%m-%d-%H%M%S")
                 save_info = self._workspace.save_file(
-                    "raw", "routine_result",
+                    "raw", f"{ts}-routine_result.json",
                     json.dumps(result, indent=2, default=str),
                 )
                 result.update(save_info)
@@ -610,8 +611,9 @@ def _execute_browser_task(
         final_result = result.get("final_result")
         if final_result:
             try:
+                ts = datetime.now().strftime("%y-%m-%d-%H%M%S")
                 save_info = self._workspace.save_file(
-                    "outputs", "browser_agent", final_result, extension=".md",
+                    "outputs", f"{ts}-browser_agent.md", final_result,
                 )
                 result.update(save_info)
             except Exception as e:
@@ -850,12 +852,12 @@ def generate_context(self, focus: str | None = None) -> GenerateContextResult:
 
         # Save canonical JSON
         json_save = self._workspace.save_file(
-            "context", "agent_context", context.model_dump_json(indent=2),
+            "context", "agent_context.json", context.model_dump_json(indent=2),
         )
 
         # Save companion Markdown
         md_save = self._workspace.save_file(
-            "context", "agent_context", context.to_markdown(), extension=".md",
+            "context", "agent_context.md", context.to_markdown(),
         )
 
         logger.info("Context files saved: %s, %s", json_save["output_file"], md_save["output_file"])
diff --git a/bluebox/agents/workspace.py b/bluebox/agents/workspace.py
index a17863ff..869d3bdb 100644
--- a/bluebox/agents/workspace.py
+++ b/bluebox/agents/workspace.py
@@ -11,9 +11,7 @@
 from __future__ import annotations
 
 import json
-import threading
 from abc import ABC, abstractmethod
-from datetime import datetime
 from pathlib import Path
 from typing import Any
 
@@ -42,17 +40,15 @@ def root_path(self) -> Path:
     def save_file(
         self,
         subdirectory: str,
-        filename_prefix: str,
+        filename: str,
         content: str,
-        extension: str = ".json",
     ) -> dict[str, str]:
-        """Save content with a unique timestamped filename.
+        """Save content to a file in the workspace.
 
         Args:
             subdirectory: Logical subdirectory ("raw", "outputs", or "context").
-            filename_prefix: Prefix for the generated filename.
+            filename: The filename to use (e.g. "routine_result_1.json").
             content: File content to write.
-            extension: File extension including the dot.
 
         Returns:
             Dict with at least "output_file" key (the saved path).
@@ -125,8 +121,6 @@ def __init__(self, workspace_dir: str = "./bluebox_workspace") -> None:
         self._raw_dir = self._workspace_dir / "raw"
         self._outputs_dir = self._workspace_dir / "outputs"
         self._context_dir = self._workspace_dir / "context"
-        self._execution_counter: int = 0
-        self._counter_lock = threading.Lock()
         self.ensure_dirs()
 
     @property
@@ -136,17 +130,11 @@ def root_path(self) -> Path:
     def save_file(
         self,
         subdirectory: str,
-        filename_prefix: str,
+        filename: str,
         content: str,
-        extension: str = ".json",
     ) -> dict[str, str]:
         directory = self._workspace_dir / subdirectory
         directory.mkdir(parents=True, exist_ok=True)
-        with self._counter_lock:
-            self._execution_counter += 1
-            idx = self._execution_counter
-        timestamp = datetime.now().strftime("%y-%m-%d-%H%M%S")
-        filename = f"{timestamp}-{filename_prefix}_{idx}{extension}"
         output_path = directory / filename
         output_path.write_text(content)
         logger.info("Result saved to %s", output_path)
diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py
index 88931697..6d69216c 100644
--- a/bluebox/scripts/run_bluebox_agent.py
+++ b/bluebox/scripts/run_bluebox_agent.py
@@ -183,6 +183,8 @@ def _show_context_success(self, result: GenerateContextResult) -> None:
             f"[dim]JSON:[/dim]      {result.json_path}\n"
             f"[dim]Markdown:[/dim]  {result.md_path}"
         ))
+        self._add_saved_file(result.json_path)
+        self._add_saved_file(result.md_path)
         self._processing = False
         self._update_status()
 
diff --git a/tests/unit/test_workspace.py b/tests/unit/test_workspace.py
index d6db96e9..49df8612 100644
--- a/tests/unit/test_workspace.py
+++ b/tests/unit/test_workspace.py
@@ -20,31 +20,32 @@ class TestSaveFile:
 
     def test_saves_file_with_content(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))
-        result = ws.save_file("raw", "routine_result", '{"data": 1}')
+        result = ws.save_file("raw", "routine_result.json", '{"data": 1}')
         assert "output_file" in result
         saved = Path(result["output_file"])
         assert saved.exists()
         assert saved.read_text() == '{"data": 1}'
+        assert saved.name == "routine_result.json"
 
     def test_creates_subdirectory(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))
-        result = ws.save_file("raw", "test", "content")
-        assert (tmp_path / "raw").is_dir()
+        ws.save_file("custom_subdir", "test.json", "content")
+        assert (tmp_path / "custom_subdir").is_dir()
 
-    def test_unique_filenames(self, tmp_path: Path) -> None:
+    def test_overwrites_existing_file(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))
-        r1 = ws.save_file("raw", "test", "a")
-        r2 = ws.save_file("raw", "test", "b")
-        assert r1["output_file"] != r2["output_file"]
+        ws.save_file("raw", "test.json", "old")
+        ws.save_file("raw", "test.json", "new")
+        assert (tmp_path / "raw" / "test.json").read_text() == "new"
 
-    def test_custom_extension(self, tmp_path: Path) -> None:
+    def test_different_extensions(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))
-        result = ws.save_file("outputs", "browser_agent", "# Result", extension=".md")
+        result = ws.save_file("outputs", "result.md", "# Result")
         assert result["output_file"].endswith(".md")
 
     def test_no_s3_key_in_result(self, tmp_path: Path) -> None:
         ws = LocalWorkspace(str(tmp_path))
-        result = ws.save_file("raw", "test", "data")
+        result = ws.save_file("raw", "test.json", "data")
         assert "output_file_s3_key" not in result
 
 

From 2b8617107ba45b85aba56898de43420c0d4e5984 Mon Sep 17 00:00:00 2001
From: Dima Vremekno <dimavrem22@gmail.com>
Date: Mon, 23 Feb 2026 01:52:19 -0500
Subject: [PATCH 12/13] Remove GenerateContextResult NamedTuple, return
 BlueBoxAgentContext directly

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bluebox/agents/bluebox_agent.py               | 17 ++++---------
 bluebox/scripts/run_bluebox_agent.py          | 22 ++++++++++-------
 .../unit/agents/test_bluebox_agent_context.py | 24 +++++++++----------
 3 files changed, 28 insertions(+), 35 deletions(-)

diff --git a/bluebox/agents/bluebox_agent.py b/bluebox/agents/bluebox_agent.py
index 3c677d94..1fb3ab71 100644
--- a/bluebox/agents/bluebox_agent.py
+++ b/bluebox/agents/bluebox_agent.py
@@ -15,7 +15,7 @@
 from datetime import datetime
 from pathlib import Path
 from textwrap import dedent
-from typing import Any, Callable, NamedTuple
+from typing import Any, Callable
 
 import requests
 
@@ -52,12 +52,7 @@
 logger = get_logger(name=__name__)
 
 
-class GenerateContextResult(NamedTuple):
-    """Return value from BlueBoxAgent.generate_context."""
 
-    context: BlueBoxAgentContext
-    json_path: str
-    md_path: str
 
 
 class BlueBoxAgent(AbstractAgent):
@@ -795,7 +790,7 @@ def _read_workspace_file(
 
     ## Context generation (structured output, called by TUI slash command)
 
-    def generate_context(self, focus: str | None = None) -> GenerateContextResult:
+    def generate_context(self, focus: str | None = None) -> BlueBoxAgentContext:
         """Generate a context file from the current session using structured output.
 
         Makes a direct LLM call with response_model=BlueBoxAgentContext to get
@@ -806,7 +801,7 @@ def generate_context(self, focus: str | None = None) -> GenerateContextResult:
             focus: Optional user-provided focus prompt to guide context generation.
 
         Returns:
-            GenerateContextResult with the context and saved file paths.
+            The validated BlueBoxAgentContext.
 
         Raises:
             ValueError: If the LLM fails to produce a valid context.
@@ -861,8 +856,4 @@ def generate_context(self, focus: str | None = None) -> GenerateContextResult:
         )
 
         logger.info("Context files saved: %s, %s", json_save["output_file"], md_save["output_file"])
-        return GenerateContextResult(
-            context=context,
-            json_path=json_save["output_file"],
-            md_path=md_save["output_file"],
-        )
+        return context
diff --git a/bluebox/scripts/run_bluebox_agent.py b/bluebox/scripts/run_bluebox_agent.py
index 6d69216c..093a4667 100644
--- a/bluebox/scripts/run_bluebox_agent.py
+++ b/bluebox/scripts/run_bluebox_agent.py
@@ -35,8 +35,9 @@
 from textual import work
 from textual.widgets import RichLog
 
-from bluebox.agents.bluebox_agent import BlueBoxAgent, GenerateContextResult
+from bluebox.agents.bluebox_agent import BlueBoxAgent
 from bluebox.agents.workspace import LocalWorkspace
+from bluebox.data_models.agents.context import BlueBoxAgentContext
 from bluebox.config import Config
 from bluebox.data_models.llms.vendors import LLMModel
 from bluebox.utils.cli_utils import add_model_argument, resolve_model
@@ -172,19 +173,22 @@ def _generate_context_async(self, focus: str | None) -> None:
         except Exception as e:
             self.call_from_thread(self._show_context_error, str(e))
 
-    def _show_context_success(self, result: GenerateContextResult) -> None:
+    def _show_context_success(self, context: BlueBoxAgentContext) -> None:
         """Display context generation success in the chat pane."""
+        context_dir = Path(self._workspace_dir) / "context"
+        json_path = str(context_dir / "agent_context.json")
+        md_path = str(context_dir / "agent_context.md")
         chat = self.query_one("#chat-log", RichLog)
         chat.write(Text.from_markup(
             f"[bold green]Context saved![/bold green]\n"
-            f"[dim]Goal:[/dim]      {result.context.goal}\n"
-            f"[dim]Summary:[/dim]   {result.context.summary}\n"
-            f"[dim]Routines:[/dim]  {len(result.context.routines_used)}\n"
-            f"[dim]JSON:[/dim]      {result.json_path}\n"
-            f"[dim]Markdown:[/dim]  {result.md_path}"
+            f"[dim]Goal:[/dim]      {context.goal}\n"
+            f"[dim]Summary:[/dim]   {context.summary}\n"
+            f"[dim]Routines:[/dim]  {len(context.routines_used)}\n"
+            f"[dim]JSON:[/dim]      {json_path}\n"
+            f"[dim]Markdown:[/dim]  {md_path}"
         ))
-        self._add_saved_file(result.json_path)
-        self._add_saved_file(result.md_path)
+        self._add_saved_file(json_path)
+        self._add_saved_file(md_path)
         self._processing = False
         self._update_status()
 
diff --git a/tests/unit/agents/test_bluebox_agent_context.py b/tests/unit/agents/test_bluebox_agent_context.py
index c651af8f..95db05b8 100644
--- a/tests/unit/agents/test_bluebox_agent_context.py
+++ b/tests/unit/agents/test_bluebox_agent_context.py
@@ -371,19 +371,17 @@ def test_saves_both_files(self, tmp_path: Path, sample_context: BlueBoxAgentCont
 
         result = agent.generate_context()
 
-        assert result.context.goal == sample_context.goal
-        assert result.context.summary == sample_context.summary
-        assert "context/" in result.json_path
-        assert "context/" in result.md_path
+        assert result.goal == sample_context.goal
+        assert result.summary == sample_context.summary
 
         # Verify JSON file exists and is valid
-        json_path = Path(result.json_path)
+        json_path = tmp_path / "context" / "agent_context.json"
         assert json_path.is_file()
         loaded = BlueBoxAgentContext.model_validate_json(json_path.read_text())
         assert loaded.goal == sample_context.goal
 
         # Verify MD file exists with expected sections
-        md_path = Path(result.md_path)
+        md_path = tmp_path / "context" / "agent_context.md"
         assert md_path.is_file()
         assert "## Goal" in md_path.read_text()
 
@@ -434,10 +432,10 @@ def test_auto_populates_routines_from_raw(self, tmp_path: Path) -> None:
 
         result = agent.generate_context()
 
-        assert len(result.context.routines_used) == 1
-        assert result.context.routines_used[0].routine_id == "Routine_abc"
-        assert result.context.routines_used[0].routine_name == "TestRoutine"
-        assert result.context.routines_used[0].parameters_as_dict() == {"city": "NYC"}
+        assert len(result.routines_used) == 1
+        assert result.routines_used[0].routine_id == "Routine_abc"
+        assert result.routines_used[0].routine_name == "TestRoutine"
+        assert result.routines_used[0].parameters_as_dict() == {"city": "NYC"}
 
     def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None:
         """Same routine_id executed multiple times should appear once."""
@@ -461,7 +459,7 @@ def test_auto_populate_deduplicates_routines(self, tmp_path: Path) -> None:
         agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm))
 
         result = agent.generate_context()
-        assert len(result.context.routines_used) == 1
+        assert len(result.routines_used) == 1
 
     def test_llm_provided_routines_not_overridden(self, tmp_path: Path) -> None:
         """When LLM provides routines_used, don't auto-populate from raw/."""
@@ -488,8 +486,8 @@ def test_llm_provided_routines_not_overridden(self, tmp_path: Path) -> None:
         agent.llm_client.call_sync = MagicMock(return_value=self._mock_llm_response(context_from_llm))
 
         result = agent.generate_context()
-        assert len(result.context.routines_used) == 1
-        assert result.context.routines_used[0].routine_id == "Routine_llm_provided"
+        assert len(result.routines_used) == 1
+        assert result.routines_used[0].routine_id == "Routine_llm_provided"
 
     def test_passes_focus_to_system_prompt(self, tmp_path: Path, minimal_context: BlueBoxAgentContext) -> None:
         """Focus text should be included in the system prompt sent to LLM."""

From 4d67767cf2dd9faa95badf4f76ab953777a8e577 Mon Sep 17 00:00:00 2001
From: Ray Liao <17989965+rayruizhiliao@users.noreply.github.com>
Date: Mon, 23 Feb 2026 08:53:40 -0500
Subject: [PATCH 13/13] update readme

---
 README.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/README.md b/README.md
index 74157aa7..11581f79 100644
--- a/README.md
+++ b/README.md
@@ -85,9 +85,22 @@ bluebox-agent --model claude-opus-4-5
 - Falls back to an AI browser agent for tasks without predefined routines
 - Post-processes outputs using Python (CSV, JSON, etc.)
 - Saves generated files to a local workspace
+- Generates reusable **context files** to replay successful sessions instantly
 
 Ask it anything: *"Run a price analysis on Rolex Sea Dweller 16600"* — the agent automatically selects the right routine, runs it, and delivers structured results.
 
+### Context (session replay)
+
+After a successful session, run `/generate_context` to save a snapshot of what worked — the goal, routines called (with exact parameters), any Python post-processing code, and output descriptions. Context files are saved to the workspace `context/` directory in both JSON and Markdown formats.
+
+When the agent starts a new session, it automatically loads the most recent context file and injects it into the system prompt. This lets the agent **skip trial and error** and directly replay the known-good path, adjusting parameters as needed for the new request.
+
+You can also load a specific context file explicitly:
+
+```bash
+bluebox-agent --context-file path/to/agent_context.json
+```
+
 ## Create your own routines
 
 To learn about the core technology powering BlueBox, see [routine_discovery.md](routine_discovery.md).