From 5cea54086fa5a3fdb45434dd9d53e3a696d32456 Mon Sep 17 00:00:00 2001 From: Archith Date: Thu, 26 Mar 2026 09:58:35 -0700 Subject: [PATCH 1/3] Token efficiency Phase 3: compress tool descriptions, adaptive max_tokens, conditional reflection Three optimizations that reduce token waste without affecting capability: 1. Tool description compression: Cut verbose multi-sentence descriptions to concise single-line summaries across all 50+ tool definitions. Saves ~40% of per-tool token overhead (e.g. file_read from 230 chars to 90 chars, shell_exec from 280 chars to 100 chars, heartbeat_add from 250 chars to 100). 2. Adaptive max_tokens by model tier: Cheap tier gets 1024 max_tokens (simple greetings), mid tier gets 2048, expensive tier keeps 4096. Ollama stays at 16384 for reasoning models. Only applies when the user has not set an explicit max_tokens in their config. 3. Conditional reflection: Skip the post-bout reflection LLM call for simple exchanges (fewer than 3 loop iterations AND no tool calls). A hello/goodbye bout no longer wastes a reflection call. Substantive bouts (3+ turns or any tool use) still get full reflection. Co-Authored-By: Claude Opus 4.6 --- crates/punch-runtime/src/fighter_loop.rs | 31 +++++-- crates/punch-runtime/src/tools.rs | 107 +++++++++-------------- 2 files changed, 63 insertions(+), 75 deletions(-) diff --git a/crates/punch-runtime/src/fighter_loop.rs b/crates/punch-runtime/src/fighter_loop.rs index c5c6b7b..8e57efa 100644 --- a/crates/punch-runtime/src/fighter_loop.rs +++ b/crates/punch-runtime/src/fighter_loop.rs @@ -311,15 +311,22 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult 1024, + Some("mid") => 2048, + _ => 4096, // expensive or no routing + }; // Reasoning models (Qwen, DeepSeek) use thinking tokens internally, // so they need a much higher default to leave room for visible output. - // The thinking budget can easily consume 2000-4000 tokens alone. match params.manifest.model.provider { punch_types::Provider::Ollama => 16384, - _ => 4096, - }, - ), + _ => tier_default, + } + }), temperature: params.manifest.model.temperature, system_prompt: Some(system_prompt.clone()), }; @@ -443,9 +450,11 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult= 3 || tool_calls_made > 0; + if is_substantive_bout { let driver = Arc::clone(¶ms.driver); let memory = Arc::clone(¶ms.memory); let model = params.manifest.model.model.clone(); @@ -455,6 +464,12 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult ToolDefinition { ToolDefinition { name: "file_read".into(), - description: "Read the contents of a file on the user's machine. Use this to analyze, inspect, or review any file the user mentions — text, code, config, logs, CSV, JSON, etc. Accepts absolute paths (e.g. /Users/name/file.txt) or relative paths.".into(), + description: "Read a file by path (absolute or relative). Supports text, code, config, logs, CSV, JSON, etc.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -821,9 +821,7 @@ fn file_read() -> ToolDefinition { fn file_write() -> ToolDefinition { ToolDefinition { name: "file_write".into(), - description: - "Write or create a file on the user's machine. Creates parent directories if needed. Use for saving results, generating files, or modifying content." - .into(), + description: "Write or create a file. Creates parent directories if needed.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -845,7 +843,7 @@ fn file_write() -> ToolDefinition { fn file_list() -> ToolDefinition { ToolDefinition { name: "file_list".into(), - description: "List files and directories in a folder on the user's machine. Use to explore directory contents, find files, or verify paths exist.".into(), + description: "List files and directories in a folder.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -862,7 +860,7 @@ fn file_list() -> ToolDefinition { fn shell_exec() -> ToolDefinition { ToolDefinition { name: "shell_exec".into(), - description: "Execute a shell command on the user's machine and return stdout, stderr, and exit code. This is your most versatile tool — use it for anything: launching apps, running scripts, system automation, package management, database queries, API calls via curl, and any task that can be done from a terminal.".into(), + description: "Execute a shell command and return stdout, stderr, and exit code. Universal fallback for any task doable from a terminal.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -898,9 +896,7 @@ fn web_fetch() -> ToolDefinition { fn web_search() -> ToolDefinition { ToolDefinition { name: "web_search".into(), - description: - "Search the web using DuckDuckGo and return the top results with titles and URLs." - .into(), + description: "Search the web and return top results with titles and URLs.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -918,7 +914,7 @@ fn web_search() -> ToolDefinition { fn memory_store() -> ToolDefinition { ToolDefinition { name: "memory_store".into(), - description: "Store a key-value pair in your persistent memory. Use this to remember important facts, user preferences, or context across conversations.".into(), + description: "Store a key-value pair in persistent memory across conversations.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -944,7 +940,7 @@ fn memory_store() -> ToolDefinition { fn memory_recall() -> ToolDefinition { ToolDefinition { name: "memory_recall".into(), - description: "Search your persistent memory for previously stored information.".into(), + description: "Search persistent memory for previously stored information.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -1044,7 +1040,7 @@ fn knowledge_query() -> ToolDefinition { fn agent_spawn() -> ToolDefinition { ToolDefinition { name: "agent_spawn".into(), - description: "Spawn a new fighter (AI agent). Returns the new fighter's ID. Use this to create subordinate agents that can handle specialized tasks.".into(), + description: "Spawn a new fighter (AI agent) and return its ID.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -1077,7 +1073,7 @@ fn agent_spawn() -> ToolDefinition { fn agent_message() -> ToolDefinition { ToolDefinition { name: "agent_message".into(), - description: "Send a message to another fighter by ID or name and get its response. Use this for inter-agent coordination and delegation.".into(), + description: "Send a message to another fighter by ID or name and get its response.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -1120,9 +1116,7 @@ fn agent_list() -> ToolDefinition { fn patch_apply() -> ToolDefinition { ToolDefinition { name: "patch_apply".into(), - description: "Apply a unified diff patch to a file. Reads the file, validates the patch, \ - applies it, and writes the result back. Supports standard unified diff format." - .into(), + description: "Apply a unified diff patch to a file.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -1225,9 +1219,8 @@ fn browser_type() -> ToolDefinition { fn browser_content() -> ToolDefinition { ToolDefinition { name: "browser_content".into(), - description: - "Get the text content of the page or a specific element. Useful for extracting readable text from a web page." - .into(), + description: "Get the text content of the page or a specific element by CSS selector." + .into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -1544,7 +1537,7 @@ fn json_query() -> ToolDefinition { fn json_transform() -> ToolDefinition { ToolDefinition { name: "json_transform".into(), - description: "Transform JSON data: extract specific keys, rename keys, or filter an array of objects.".into(), + description: "Transform JSON: extract keys, rename keys, or filter arrays.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -1622,7 +1615,8 @@ fn regex_match() -> ToolDefinition { fn regex_replace() -> ToolDefinition { ToolDefinition { name: "regex_replace".into(), - description: "Find and replace text using a regex pattern. Supports capture group references ($1, $2, etc.) in the replacement.".into(), + description: "Find and replace text using a regex pattern with capture group support." + .into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -1759,7 +1753,7 @@ fn schedule_cancel() -> ToolDefinition { fn code_search() -> ToolDefinition { ToolDefinition { name: "code_search".into(), - description: "Search for text or a regex pattern in files recursively under a directory. Returns matching lines with file paths and line numbers.".into(), + description: "Search for a regex pattern in files recursively. Returns matching lines with paths and line numbers.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -1789,7 +1783,8 @@ fn code_search() -> ToolDefinition { fn code_symbols() -> ToolDefinition { ToolDefinition { name: "code_symbols".into(), - description: "Extract function, struct, class, and method definitions from a source file using regex-based heuristics.".into(), + description: "Extract function, struct, class, and method definitions from a source file." + .into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2053,8 +2048,7 @@ fn text_count() -> ToolDefinition { fn file_search() -> ToolDefinition { ToolDefinition { name: "file_search".into(), - description: "Search for files by name pattern (glob) recursively under a directory." - .into(), + description: "Search for files by glob pattern recursively.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2098,9 +2092,7 @@ fn file_info() -> ToolDefinition { fn a2a_delegate() -> ToolDefinition { ToolDefinition { name: "a2a_delegate".into(), - description: "Delegate a task to a remote A2A agent. Discovers the agent, sends the task, \ - polls for completion, and returns the result." - .into(), + description: "Delegate a task to a remote A2A agent and return the result.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2130,10 +2122,7 @@ fn a2a_delegate() -> ToolDefinition { fn wasm_invoke() -> ToolDefinition { ToolDefinition { name: "wasm_invoke".into(), - description: "Invoke a function on a loaded WASM plugin (imported technique). \ - Executes the named function within the plugin's sandboxed WASM runtime \ - and returns the result." - .into(), + description: "Invoke a function on a loaded WASM plugin and return the result.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2159,9 +2148,7 @@ fn wasm_invoke() -> ToolDefinition { fn channel_notify() -> ToolDefinition { ToolDefinition { name: "channel_notify".into(), - description: "Send a proactive message to an external channel (Telegram, Slack, Discord, \ - etc.). Use this to push notifications, briefings, alerts, and heartbeat \ - results to connected messaging platforms." + description: "Send a message to an external channel (Telegram, Slack, Discord, etc.)." .into(), input_schema: serde_json::json!({ "type": "object", @@ -2192,12 +2179,7 @@ fn channel_notify() -> ToolDefinition { fn heartbeat_add() -> ToolDefinition { ToolDefinition { name: "heartbeat_add".into(), - description: "Add a proactive heartbeat task to your creed. Heartbeat tasks fire on a \ - cadence and remind you to perform recurring actions like checking email, \ - monitoring endpoints, or daily briefings. Timed cadences (hourly, daily, \ - weekly, 'every 30m', cron) are executed by the background scheduler — \ - no user message needed." - .into(), + description: "Add a recurring heartbeat task to your creed. Cadences: every_bout, on_wake, hourly, daily, weekly, cron.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2219,8 +2201,7 @@ fn heartbeat_add() -> ToolDefinition { fn heartbeat_list() -> ToolDefinition { ToolDefinition { name: "heartbeat_list".into(), - description: "List all heartbeat tasks in your creed. Shows task description, cadence, \ - active status, execution count, and last checked time." + description: "List all heartbeat tasks in your creed with cadence and execution counts." .into(), input_schema: serde_json::json!({ "type": "object", @@ -2234,9 +2215,7 @@ fn heartbeat_list() -> ToolDefinition { fn heartbeat_remove() -> ToolDefinition { ToolDefinition { name: "heartbeat_remove".into(), - description: "Remove a heartbeat task from your creed by its index (0-based). Use \ - heartbeat_list first to see the indices." - .into(), + description: "Remove a heartbeat task from your creed by its 0-based index.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2254,10 +2233,7 @@ fn heartbeat_remove() -> ToolDefinition { fn creed_view() -> ToolDefinition { ToolDefinition { name: "creed_view".into(), - description: "View your current creed — identity, personality traits, directives, \ - learned behaviors, relationships, heartbeat tasks, and stats. Use this \ - to understand who you are and what you're configured to do." - .into(), + description: "View your current creed: identity, traits, directives, behaviors, relationships, and stats.".into(), input_schema: serde_json::json!({ "type": "object", "properties": {}, @@ -2270,10 +2246,7 @@ fn creed_view() -> ToolDefinition { fn skill_list() -> ToolDefinition { ToolDefinition { name: "skill_list".into(), - description: "List available skill packs that can be installed. Skill packs bundle MCP \ - server configurations with prompts and tools. Available packs: productivity \ - (calendar/email), developer (GitHub), research (web tools), files (filesystem)." - .into(), + description: "List available skill packs: productivity, developer, research, files.".into(), input_schema: serde_json::json!({ "type": "object", "properties": {}, @@ -2286,11 +2259,7 @@ fn skill_list() -> ToolDefinition { fn skill_recommend() -> ToolDefinition { ToolDefinition { name: "skill_recommend".into(), - description: "Recommend a skill pack to the user based on what they need. Looks up the \ - pack details (what it provides, required setup, install command) and returns \ - a recommendation the user can act on. Use this when the user asks for \ - capabilities you don't currently have (e.g., calendar, email, GitHub)." - .into(), + description: "Recommend a skill pack and show the user the install command.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2312,7 +2281,9 @@ fn skill_recommend() -> ToolDefinition { fn sys_screenshot() -> ToolDefinition { ToolDefinition { name: "sys_screenshot".into(), - description: "Capture a screenshot of the full screen or a specific window. Returns a base64-encoded PNG image that the vision model can read. Use this to see what's currently on screen.".into(), + description: + "Capture a screenshot of the full screen or a specific window. Returns base64 PNG." + .into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2329,7 +2300,7 @@ fn sys_screenshot() -> ToolDefinition { fn ui_screenshot() -> ToolDefinition { ToolDefinition { name: "ui_screenshot".into(), - description: "Capture a screenshot of a specific UI region by element ID or bounds. More targeted than sys_screenshot for inspecting specific parts of the screen.".into(), + description: "Capture a screenshot of a specific UI region by element ID or bounds.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2357,7 +2328,7 @@ fn ui_screenshot() -> ToolDefinition { fn app_ocr() -> ToolDefinition { ToolDefinition { name: "app_ocr".into(), - description: "Extract text from an app window using OCR (optical character recognition). Returns plain text — cheaper than a screenshot + vision model for text-heavy content. Use this first for reading text, fall back to sys_screenshot for visual/spatial understanding.".into(), + description: "Extract text from an app window using OCR. Returns plain text.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2375,7 +2346,7 @@ fn app_ocr() -> ToolDefinition { fn ui_find_elements() -> ToolDefinition { ToolDefinition { name: "ui_find_elements".into(), - description: "Query the accessibility tree of an app to find UI elements (buttons, text fields, rows, etc.). Returns structured element IDs that can be used with ui_click, ui_type_text, and ui_read_attribute. Re-query if the app state changes, as element IDs are session-ephemeral.".into(), + description: "Query the accessibility tree of an app to find UI elements. Returns element IDs for ui_click/ui_type_text.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2405,7 +2376,7 @@ fn ui_find_elements() -> ToolDefinition { fn ui_click() -> ToolDefinition { ToolDefinition { name: "ui_click".into(), - description: "Click a UI element by its element ID (from ui_find_elements). Safe, validated accessibility click — not a raw coordinate click.".into(), + description: "Click a UI element by its element ID from ui_find_elements.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2423,7 +2394,7 @@ fn ui_click() -> ToolDefinition { fn ui_type_text() -> ToolDefinition { ToolDefinition { name: "ui_type_text".into(), - description: "Type text into a UI element by its element ID (from ui_find_elements). Sets the value of a text field or input element.".into(), + description: "Type text into a UI element by its element ID from ui_find_elements.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2445,7 +2416,7 @@ fn ui_type_text() -> ToolDefinition { fn ui_list_windows() -> ToolDefinition { ToolDefinition { name: "ui_list_windows".into(), - description: "List all visible windows with their titles and owning apps. Use this to discover what's on screen before taking a screenshot or interacting with specific apps.".into(), + description: "List all visible windows with their titles and owning apps.".into(), input_schema: serde_json::json!({ "type": "object", "properties": {} @@ -2457,7 +2428,9 @@ fn ui_list_windows() -> ToolDefinition { fn ui_read_attribute() -> ToolDefinition { ToolDefinition { name: "ui_read_attribute".into(), - description: "Read an accessibility attribute (value, enabled, focused, etc.) from a UI element. Useful for checking element state without a screenshot.".into(), + description: + "Read an accessibility attribute (value, enabled, focused, etc.) from a UI element." + .into(), input_schema: serde_json::json!({ "type": "object", "properties": { From f5a4e7fb316d024b2715e75203f12e3fec597271 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Mar 2026 17:16:00 +0000 Subject: [PATCH 2/3] Fix review issues: restore behavioral hints, fix reflection metric, add tests - Restore LLM routing hints in tool descriptions for app_ocr, ui_find_elements, skill_recommend, and memory_store that were over-compressed - Fix conditional reflection to use messages.len() instead of guard.iterations() which only counts tool-use rounds, not conversation turns - Extract magic max_tokens numbers into named constants - Add tests for adaptive max_tokens (Ollama, non-Ollama, explicit override) - Add tests for conditional reflection (simple bout skips, tool bout triggers) https://claude.ai/code/session_0135FeAb3vUvn6Yw29JdiSna --- crates/punch-runtime/src/fighter_loop.rs | 31 +- crates/punch-runtime/src/tools.rs | 8 +- .../punch-runtime/tests/fighter_loop_tests.rs | 284 ++++++++++++++++++ 3 files changed, 312 insertions(+), 11 deletions(-) diff --git a/crates/punch-runtime/src/fighter_loop.rs b/crates/punch-runtime/src/fighter_loop.rs index 8e57efa..f48621b 100644 --- a/crates/punch-runtime/src/fighter_loop.rs +++ b/crates/punch-runtime/src/fighter_loop.rs @@ -45,6 +45,18 @@ const MAX_CONTINUATION_LOOPS: usize = 5; /// Default per-tool timeout in seconds. const DEFAULT_TOOL_TIMEOUT_SECS: u64 = 120; +/// Default max output tokens for the cheap model tier. +const DEFAULT_MAX_TOKENS_CHEAP: u32 = 1024; +/// Default max output tokens for the mid model tier. +const DEFAULT_MAX_TOKENS_MID: u32 = 2048; +/// Default max output tokens for the expensive tier (or when no routing is configured). +const DEFAULT_MAX_TOKENS_EXPENSIVE: u32 = 4096; +/// Default max output tokens for Ollama models (reasoning models need extra headroom). +const DEFAULT_MAX_TOKENS_OLLAMA: u32 = 16384; +/// Minimum message count (including history) for a bout to be considered substantive +/// enough to warrant a post-bout reflection LLM call. +const REFLECTION_MIN_MESSAGES: usize = 6; + /// Parameters for the fighter loop. pub struct FighterLoopParams { /// The fighter's manifest (identity, model config, system prompt, capabilities). @@ -316,14 +328,14 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult 1024, - Some("mid") => 2048, - _ => 4096, // expensive or no routing + Some("cheap") => DEFAULT_MAX_TOKENS_CHEAP, + Some("mid") => DEFAULT_MAX_TOKENS_MID, + _ => DEFAULT_MAX_TOKENS_EXPENSIVE, // expensive or no routing }; // Reasoning models (Qwen, DeepSeek) use thinking tokens internally, // so they need a much higher default to leave room for visible output. match params.manifest.model.provider { - punch_types::Provider::Ollama => 16384, + punch_types::Provider::Ollama => DEFAULT_MAX_TOKENS_OLLAMA, _ => tier_default, } }), @@ -451,9 +463,14 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult= 3 || tool_calls_made > 0; + // We use messages.len() (which counts all messages in the bout including + // history) rather than guard.iterations() because iterations only tracks + // tool-use rounds and retries — a substantive single-turn text exchange + // would have 0 iterations but still deserve reflection. + let is_substantive_bout = + messages.len() >= REFLECTION_MIN_MESSAGES || tool_calls_made > 0; if is_substantive_bout { let driver = Arc::clone(¶ms.driver); let memory = Arc::clone(¶ms.memory); @@ -466,7 +483,7 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult ToolDefinition { fn memory_store() -> ToolDefinition { ToolDefinition { name: "memory_store".into(), - description: "Store a key-value pair in persistent memory across conversations.".into(), + description: "Store a key-value pair in persistent memory. Use to remember facts, preferences, or context across conversations.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2259,7 +2259,7 @@ fn skill_list() -> ToolDefinition { fn skill_recommend() -> ToolDefinition { ToolDefinition { name: "skill_recommend".into(), - description: "Recommend a skill pack and show the user the install command.".into(), + description: "Recommend a skill pack when the user needs capabilities you don't have (calendar, email, GitHub). Shows install command.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2328,7 +2328,7 @@ fn ui_screenshot() -> ToolDefinition { fn app_ocr() -> ToolDefinition { ToolDefinition { name: "app_ocr".into(), - description: "Extract text from an app window using OCR. Returns plain text.".into(), + description: "Extract text from an app window using OCR. Returns plain text. Prefer over sys_screenshot for text extraction.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { @@ -2346,7 +2346,7 @@ fn app_ocr() -> ToolDefinition { fn ui_find_elements() -> ToolDefinition { ToolDefinition { name: "ui_find_elements".into(), - description: "Query the accessibility tree of an app to find UI elements. Returns element IDs for ui_click/ui_type_text.".into(), + description: "Query the accessibility tree of an app to find UI elements. Returns element IDs for ui_click/ui_type_text. Re-query after state changes — IDs are ephemeral.".into(), input_schema: serde_json::json!({ "type": "object", "properties": { diff --git a/crates/punch-runtime/tests/fighter_loop_tests.rs b/crates/punch-runtime/tests/fighter_loop_tests.rs index 8eabbcb..21fcb44 100644 --- a/crates/punch-runtime/tests/fighter_loop_tests.rs +++ b/crates/punch-runtime/tests/fighter_loop_tests.rs @@ -685,3 +685,287 @@ async fn test_llm_completion_error_propagates() { err_str ); } + +// --------------------------------------------------------------------------- +// Adaptive max_tokens tests +// --------------------------------------------------------------------------- + +/// A mock driver that captures the max_tokens from the CompletionRequest. +struct CapturingMockDriver { + captured_max_tokens: Mutex>, +} + +impl CapturingMockDriver { + fn new() -> Self { + Self { + captured_max_tokens: Mutex::new(Vec::new()), + } + } +} + +#[async_trait] +impl LlmDriver for CapturingMockDriver { + async fn complete(&self, request: CompletionRequest) -> PunchResult { + self.captured_max_tokens + .lock() + .unwrap() + .push(request.max_tokens); + + Ok(CompletionResponse { + message: Message { + role: Role::Assistant, + content: "OK".to_string(), + content_parts: Vec::new(), + tool_calls: Vec::new(), + tool_results: Vec::new(), + timestamp: chrono::Utc::now(), + }, + usage: TokenUsage { + input_tokens: 100, + output_tokens: 50, + }, + stop_reason: StopReason::EndTurn, + }) + } +} + +/// Adaptive max_tokens: when no routing is configured and provider is Ollama, +/// max_tokens defaults to 16384 (the Ollama override). +#[tokio::test] +async fn test_adaptive_max_tokens_ollama_default() { + let driver = Arc::new(CapturingMockDriver::new()); + let memory = test_memory(); + + let mut manifest = test_manifest(); + manifest.model.provider = Provider::Ollama; + manifest.model.max_tokens = None; // No explicit max_tokens + + let params = FighterLoopParams { + manifest, + user_message: "Hello".to_string(), + bout_id: punch_memory::BoutId::new(), + fighter_id: FighterId::new(), + memory, + driver: driver.clone(), + available_tools: Vec::new(), + mcp_tools: Vec::new(), + max_iterations: Some(10), + context_window: Some(200_000), + tool_timeout_secs: Some(30), + coordinator: None, + approval_engine: None, + sandbox: None, + mcp_clients: None, + model_routing: None, + channel_notifier: None, + user_content_parts: vec![], + }; + + let _result = run_fighter_loop(params).await.expect("loop should succeed"); + + let captured = driver.captured_max_tokens.lock().unwrap(); + assert_eq!(captured[0], 16384, "Ollama should default to 16384"); +} + +/// Adaptive max_tokens: when no routing is configured and provider is not Ollama, +/// max_tokens defaults to 4096 (the expensive tier default). +#[tokio::test] +async fn test_adaptive_max_tokens_no_routing_non_ollama() { + let driver = Arc::new(CapturingMockDriver::new()); + let memory = test_memory(); + + let mut manifest = test_manifest(); + manifest.model.provider = Provider::Anthropic; + manifest.model.max_tokens = None; // No explicit max_tokens + + let params = FighterLoopParams { + manifest, + user_message: "Hello".to_string(), + bout_id: punch_memory::BoutId::new(), + fighter_id: FighterId::new(), + memory, + driver: driver.clone(), + available_tools: Vec::new(), + mcp_tools: Vec::new(), + max_iterations: Some(10), + context_window: Some(200_000), + tool_timeout_secs: Some(30), + coordinator: None, + approval_engine: None, + sandbox: None, + mcp_clients: None, + model_routing: None, + channel_notifier: None, + user_content_parts: vec![], + }; + + let _result = run_fighter_loop(params).await.expect("loop should succeed"); + + let captured = driver.captured_max_tokens.lock().unwrap(); + assert_eq!( + captured[0], 4096, + "non-Ollama without routing should default to 4096" + ); +} + +/// Adaptive max_tokens: explicit max_tokens in config overrides tier defaults. +#[tokio::test] +async fn test_adaptive_max_tokens_explicit_override() { + let driver = Arc::new(CapturingMockDriver::new()); + let memory = test_memory(); + + let mut manifest = test_manifest(); + manifest.model.provider = Provider::Anthropic; + manifest.model.max_tokens = Some(8192); // Explicit override + + let params = FighterLoopParams { + manifest, + user_message: "Hello".to_string(), + bout_id: punch_memory::BoutId::new(), + fighter_id: FighterId::new(), + memory, + driver: driver.clone(), + available_tools: Vec::new(), + mcp_tools: Vec::new(), + max_iterations: Some(10), + context_window: Some(200_000), + tool_timeout_secs: Some(30), + coordinator: None, + approval_engine: None, + sandbox: None, + mcp_clients: None, + model_routing: None, + channel_notifier: None, + user_content_parts: vec![], + }; + + let _result = run_fighter_loop(params).await.expect("loop should succeed"); + + let captured = driver.captured_max_tokens.lock().unwrap(); + assert_eq!( + captured[0], 8192, + "explicit max_tokens should override tier defaults" + ); +} + +// --------------------------------------------------------------------------- +// Conditional reflection tests +// --------------------------------------------------------------------------- + +/// Conditional reflection: a simple single-turn "hello" bout should skip +/// reflection (iterations=0, tool_calls=0, few messages). +#[tokio::test] +async fn test_simple_bout_skips_reflection() { + let driver = Arc::new(ScriptedMockDriver::text_only("Hi there!")); + let memory = test_memory(); + let fighter_name = "reflection-skip-fighter"; + + // Create a creed so reflection would have something to update + let creed = punch_types::Creed::new(fighter_name); + memory.save_creed(&creed).await.expect("save creed"); + + let mut manifest = test_manifest(); + manifest.name = fighter_name.to_string(); + + let params = FighterLoopParams { + manifest, + user_message: "hello".to_string(), + bout_id: punch_memory::BoutId::new(), + fighter_id: FighterId::new(), + memory: memory.clone(), + driver: driver.clone(), + available_tools: Vec::new(), + mcp_tools: Vec::new(), + max_iterations: Some(10), + context_window: Some(200_000), + tool_timeout_secs: Some(30), + coordinator: None, + approval_engine: None, + sandbox: None, + mcp_clients: None, + model_routing: None, + channel_notifier: None, + user_content_parts: vec![], + }; + + let result = run_fighter_loop(params).await.expect("loop should succeed"); + + assert_eq!(result.response, "Hi there!"); + assert_eq!(result.tool_calls_made, 0); + + // The driver should only have been called once (the main completion). + // If reflection were triggered, the driver would be called a second time. + assert_eq!( + driver.call_count.load(Ordering::SeqCst), + 1, + "simple bout should make only 1 LLM call (no reflection)" + ); +} + +/// Conditional reflection: a bout with tool use should trigger reflection. +#[tokio::test] +async fn test_tool_use_bout_triggers_reflection() { + let tool_call = ToolCall { + id: "tc_reflect".to_string(), + name: "memory_store".to_string(), + input: serde_json::json!({"key": "k", "value": "v"}), + }; + + let driver = Arc::new(ScriptedMockDriver::new(vec![ + ScriptedResponse { + content: "Storing.".to_string(), + tool_calls: vec![tool_call], + stop_reason: StopReason::ToolUse, + }, + ScriptedResponse { + content: "Done storing.".to_string(), + tool_calls: Vec::new(), + stop_reason: StopReason::EndTurn, + }, + ])); + + let memory = test_memory(); + let fighter_name = "reflection-trigger-fighter"; + + let creed = punch_types::Creed::new(fighter_name); + memory.save_creed(&creed).await.expect("save creed"); + + let mut manifest = test_manifest(); + manifest.name = fighter_name.to_string(); + + let mut params = FighterLoopParams { + manifest, + user_message: "Store something for me".to_string(), + bout_id: punch_memory::BoutId::new(), + fighter_id: FighterId::new(), + memory: memory.clone(), + driver: driver.clone(), + available_tools: Vec::new(), + mcp_tools: Vec::new(), + max_iterations: Some(10), + context_window: Some(200_000), + tool_timeout_secs: Some(30), + coordinator: None, + approval_engine: None, + sandbox: None, + mcp_clients: None, + model_routing: None, + channel_notifier: None, + user_content_parts: vec![], + }; + params.available_tools = vec![memory_store_tool_def()]; + + let result = run_fighter_loop(params).await.expect("loop should succeed"); + + assert_eq!(result.tool_calls_made, 1); + + // Give the async reflection task a moment to fire + tokio::time::sleep(std::time::Duration::from_millis(300)).await; + + // Driver should have been called 3 times: 2 for the bout + 1 for reflection + assert_eq!( + driver.call_count.load(Ordering::SeqCst), + 3, + "tool-use bout should trigger reflection (3 LLM calls total)" + ); +} From 7e18b64a5d36f4a9aeab0440c009b692219088c5 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 26 Mar 2026 17:21:51 +0000 Subject: [PATCH 3/3] Fix P1: use routed provider for Ollama max_tokens override When model routing selects a tier that uses Ollama as its provider, the adaptive max_tokens logic now correctly checks the routed provider instead of the base manifest provider, preventing reasoning models from being starved of output tokens. https://claude.ai/code/session_0135FeAb3vUvn6Yw29JdiSna --- crates/punch-runtime/src/fighter_loop.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/punch-runtime/src/fighter_loop.rs b/crates/punch-runtime/src/fighter_loop.rs index f48621b..ccb926a 100644 --- a/crates/punch-runtime/src/fighter_loop.rs +++ b/crates/punch-runtime/src/fighter_loop.rs @@ -180,6 +180,7 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult = None; + let mut routed_provider: Option = None; let routed_driver: Option> = params .model_routing .as_ref() @@ -196,6 +197,7 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult { @@ -334,7 +336,12 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult DEFAULT_MAX_TOKENS_OLLAMA, _ => tier_default, }