From 5cea54086fa5a3fdb45434dd9d53e3a696d32456 Mon Sep 17 00:00:00 2001
From: Archith <archith.rapaka@atomtickets.com>
Date: Thu, 26 Mar 2026 09:58:35 -0700
Subject: [PATCH 1/3] Token efficiency Phase 3: compress tool descriptions,
 adaptive max_tokens, conditional reflection

Three optimizations that reduce token waste without affecting capability:

1. Tool description compression: Cut verbose multi-sentence descriptions to
   concise single-line summaries across all 50+ tool definitions. Saves ~40%
   of per-tool token overhead (e.g. file_read from 230 chars to 90 chars,
   shell_exec from 280 chars to 100 chars, heartbeat_add from 250 chars to 100).

2. Adaptive max_tokens by model tier: Cheap tier gets 1024 max_tokens (simple
   greetings), mid tier gets 2048, expensive tier keeps 4096. Ollama stays at
   16384 for reasoning models. Only applies when the user has not set an
   explicit max_tokens in their config.

3. Conditional reflection: Skip the post-bout reflection LLM call for simple
   exchanges (fewer than 3 loop iterations AND no tool calls). A hello/goodbye
   bout no longer wastes a reflection call. Substantive bouts (3+ turns or any
   tool use) still get full reflection.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/punch-runtime/src/fighter_loop.rs |  31 +++++--
 crates/punch-runtime/src/tools.rs        | 107 +++++++++--------------
 2 files changed, 63 insertions(+), 75 deletions(-)

diff --git a/crates/punch-runtime/src/fighter_loop.rs b/crates/punch-runtime/src/fighter_loop.rs
index c5c6b7b..8e57efa 100644
--- a/crates/punch-runtime/src/fighter_loop.rs
+++ b/crates/punch-runtime/src/fighter_loop.rs
@@ -311,15 +311,22 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
             model: params.manifest.model.model.clone(),
             messages: messages.clone(),
             tools: turn_tools,
-            max_tokens: params.manifest.model.max_tokens.unwrap_or(
+            max_tokens: params.manifest.model.max_tokens.unwrap_or_else(|| {
+                // Adaptive max_tokens: scale output budget by model tier.
+                // Cheap tier gets less headroom since greetings/simple answers
+                // don't need 4K output tokens. Expensive tier gets full budget.
+                let tier_default = match routed_tier.as_deref() {
+                    Some("cheap") => 1024,
+                    Some("mid") => 2048,
+                    _ => 4096, // expensive or no routing
+                };
                 // Reasoning models (Qwen, DeepSeek) use thinking tokens internally,
                 // so they need a much higher default to leave room for visible output.
-                // The thinking budget can easily consume 2000-4000 tokens alone.
                 match params.manifest.model.provider {
                     punch_types::Provider::Ollama => 16384,
-                    _ => 4096,
-                },
-            ),
+                    _ => tier_default,
+                }
+            }),
             temperature: params.manifest.model.temperature,
             system_prompt: Some(system_prompt.clone()),
         };
@@ -443,9 +450,11 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
                     }
                 }
 
-                // Spawn async reflection to extract learned behaviors from the bout.
-                // This runs in the background and does not block the response.
-                {
+                // Conditional reflection: only reflect on substantive bouts.
+                // Skip reflection for simple exchanges (< 3 turns and no tool use)
+                // to avoid wasting an LLM call on "hello" / "how are you?" bouts.
+                let is_substantive_bout = guard.iterations() >= 3 || tool_calls_made > 0;
+                if is_substantive_bout {
                     let driver = Arc::clone(&params.driver);
                     let memory = Arc::clone(&params.memory);
                     let model = params.manifest.model.model.clone();
@@ -455,6 +464,12 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
                         reflect_on_bout(driver, memory, model, fighter_name, reflection_messages)
                             .await;
                     });
+                } else {
+                    debug!(
+                        iterations = guard.iterations(),
+                        tool_calls = tool_calls_made,
+                        "skipping post-bout reflection (simple exchange)"
+                    );
                 }
 
                 return Ok(FighterLoopResult {
diff --git a/crates/punch-runtime/src/tools.rs b/crates/punch-runtime/src/tools.rs
index fc260fd..23e2879 100644
--- a/crates/punch-runtime/src/tools.rs
+++ b/crates/punch-runtime/src/tools.rs
@@ -803,7 +803,7 @@ impl ToolSelector {
 fn file_read() -> ToolDefinition {
     ToolDefinition {
         name: "file_read".into(),
-        description: "Read the contents of a file on the user's machine. Use this to analyze, inspect, or review any file the user mentions — text, code, config, logs, CSV, JSON, etc. Accepts absolute paths (e.g. /Users/name/file.txt) or relative paths.".into(),
+        description: "Read a file by path (absolute or relative). Supports text, code, config, logs, CSV, JSON, etc.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -821,9 +821,7 @@ fn file_read() -> ToolDefinition {
 fn file_write() -> ToolDefinition {
     ToolDefinition {
         name: "file_write".into(),
-        description:
-            "Write or create a file on the user's machine. Creates parent directories if needed. Use for saving results, generating files, or modifying content."
-                .into(),
+        description: "Write or create a file. Creates parent directories if needed.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -845,7 +843,7 @@ fn file_write() -> ToolDefinition {
 fn file_list() -> ToolDefinition {
     ToolDefinition {
         name: "file_list".into(),
-        description: "List files and directories in a folder on the user's machine. Use to explore directory contents, find files, or verify paths exist.".into(),
+        description: "List files and directories in a folder.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -862,7 +860,7 @@ fn file_list() -> ToolDefinition {
 fn shell_exec() -> ToolDefinition {
     ToolDefinition {
         name: "shell_exec".into(),
-        description: "Execute a shell command on the user's machine and return stdout, stderr, and exit code. This is your most versatile tool — use it for anything: launching apps, running scripts, system automation, package management, database queries, API calls via curl, and any task that can be done from a terminal.".into(),
+        description: "Execute a shell command and return stdout, stderr, and exit code. Universal fallback for any task doable from a terminal.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -898,9 +896,7 @@ fn web_fetch() -> ToolDefinition {
 fn web_search() -> ToolDefinition {
     ToolDefinition {
         name: "web_search".into(),
-        description:
-            "Search the web using DuckDuckGo and return the top results with titles and URLs."
-                .into(),
+        description: "Search the web and return top results with titles and URLs.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -918,7 +914,7 @@ fn web_search() -> ToolDefinition {
 fn memory_store() -> ToolDefinition {
     ToolDefinition {
         name: "memory_store".into(),
-        description: "Store a key-value pair in your persistent memory. Use this to remember important facts, user preferences, or context across conversations.".into(),
+        description: "Store a key-value pair in persistent memory across conversations.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -944,7 +940,7 @@ fn memory_store() -> ToolDefinition {
 fn memory_recall() -> ToolDefinition {
     ToolDefinition {
         name: "memory_recall".into(),
-        description: "Search your persistent memory for previously stored information.".into(),
+        description: "Search persistent memory for previously stored information.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -1044,7 +1040,7 @@ fn knowledge_query() -> ToolDefinition {
 fn agent_spawn() -> ToolDefinition {
     ToolDefinition {
         name: "agent_spawn".into(),
-        description: "Spawn a new fighter (AI agent). Returns the new fighter's ID. Use this to create subordinate agents that can handle specialized tasks.".into(),
+        description: "Spawn a new fighter (AI agent) and return its ID.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -1077,7 +1073,7 @@ fn agent_spawn() -> ToolDefinition {
 fn agent_message() -> ToolDefinition {
     ToolDefinition {
         name: "agent_message".into(),
-        description: "Send a message to another fighter by ID or name and get its response. Use this for inter-agent coordination and delegation.".into(),
+        description: "Send a message to another fighter by ID or name and get its response.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -1120,9 +1116,7 @@ fn agent_list() -> ToolDefinition {
 fn patch_apply() -> ToolDefinition {
     ToolDefinition {
         name: "patch_apply".into(),
-        description: "Apply a unified diff patch to a file. Reads the file, validates the patch, \
-                       applies it, and writes the result back. Supports standard unified diff format."
-            .into(),
+        description: "Apply a unified diff patch to a file.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -1225,9 +1219,8 @@ fn browser_type() -> ToolDefinition {
 fn browser_content() -> ToolDefinition {
     ToolDefinition {
         name: "browser_content".into(),
-        description:
-            "Get the text content of the page or a specific element. Useful for extracting readable text from a web page."
-                .into(),
+        description: "Get the text content of the page or a specific element by CSS selector."
+            .into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -1544,7 +1537,7 @@ fn json_query() -> ToolDefinition {
 fn json_transform() -> ToolDefinition {
     ToolDefinition {
         name: "json_transform".into(),
-        description: "Transform JSON data: extract specific keys, rename keys, or filter an array of objects.".into(),
+        description: "Transform JSON: extract keys, rename keys, or filter arrays.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -1622,7 +1615,8 @@ fn regex_match() -> ToolDefinition {
 fn regex_replace() -> ToolDefinition {
     ToolDefinition {
         name: "regex_replace".into(),
-        description: "Find and replace text using a regex pattern. Supports capture group references ($1, $2, etc.) in the replacement.".into(),
+        description: "Find and replace text using a regex pattern with capture group support."
+            .into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -1759,7 +1753,7 @@ fn schedule_cancel() -> ToolDefinition {
 fn code_search() -> ToolDefinition {
     ToolDefinition {
         name: "code_search".into(),
-        description: "Search for text or a regex pattern in files recursively under a directory. Returns matching lines with file paths and line numbers.".into(),
+        description: "Search for a regex pattern in files recursively. Returns matching lines with paths and line numbers.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -1789,7 +1783,8 @@ fn code_search() -> ToolDefinition {
 fn code_symbols() -> ToolDefinition {
     ToolDefinition {
         name: "code_symbols".into(),
-        description: "Extract function, struct, class, and method definitions from a source file using regex-based heuristics.".into(),
+        description: "Extract function, struct, class, and method definitions from a source file."
+            .into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2053,8 +2048,7 @@ fn text_count() -> ToolDefinition {
 fn file_search() -> ToolDefinition {
     ToolDefinition {
         name: "file_search".into(),
-        description: "Search for files by name pattern (glob) recursively under a directory."
-            .into(),
+        description: "Search for files by glob pattern recursively.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2098,9 +2092,7 @@ fn file_info() -> ToolDefinition {
 fn a2a_delegate() -> ToolDefinition {
     ToolDefinition {
         name: "a2a_delegate".into(),
-        description: "Delegate a task to a remote A2A agent. Discovers the agent, sends the task, \
-                      polls for completion, and returns the result."
-            .into(),
+        description: "Delegate a task to a remote A2A agent and return the result.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2130,10 +2122,7 @@ fn a2a_delegate() -> ToolDefinition {
 fn wasm_invoke() -> ToolDefinition {
     ToolDefinition {
         name: "wasm_invoke".into(),
-        description: "Invoke a function on a loaded WASM plugin (imported technique). \
-                      Executes the named function within the plugin's sandboxed WASM runtime \
-                      and returns the result."
-            .into(),
+        description: "Invoke a function on a loaded WASM plugin and return the result.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2159,9 +2148,7 @@ fn wasm_invoke() -> ToolDefinition {
 fn channel_notify() -> ToolDefinition {
     ToolDefinition {
         name: "channel_notify".into(),
-        description: "Send a proactive message to an external channel (Telegram, Slack, Discord, \
-                      etc.). Use this to push notifications, briefings, alerts, and heartbeat \
-                      results to connected messaging platforms."
+        description: "Send a message to an external channel (Telegram, Slack, Discord, etc.)."
             .into(),
         input_schema: serde_json::json!({
             "type": "object",
@@ -2192,12 +2179,7 @@ fn channel_notify() -> ToolDefinition {
 fn heartbeat_add() -> ToolDefinition {
     ToolDefinition {
         name: "heartbeat_add".into(),
-        description: "Add a proactive heartbeat task to your creed. Heartbeat tasks fire on a \
-                      cadence and remind you to perform recurring actions like checking email, \
-                      monitoring endpoints, or daily briefings. Timed cadences (hourly, daily, \
-                      weekly, 'every 30m', cron) are executed by the background scheduler — \
-                      no user message needed."
-            .into(),
+        description: "Add a recurring heartbeat task to your creed. Cadences: every_bout, on_wake, hourly, daily, weekly, cron.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2219,8 +2201,7 @@ fn heartbeat_add() -> ToolDefinition {
 fn heartbeat_list() -> ToolDefinition {
     ToolDefinition {
         name: "heartbeat_list".into(),
-        description: "List all heartbeat tasks in your creed. Shows task description, cadence, \
-                      active status, execution count, and last checked time."
+        description: "List all heartbeat tasks in your creed with cadence and execution counts."
             .into(),
         input_schema: serde_json::json!({
             "type": "object",
@@ -2234,9 +2215,7 @@ fn heartbeat_list() -> ToolDefinition {
 fn heartbeat_remove() -> ToolDefinition {
     ToolDefinition {
         name: "heartbeat_remove".into(),
-        description: "Remove a heartbeat task from your creed by its index (0-based). Use \
-                      heartbeat_list first to see the indices."
-            .into(),
+        description: "Remove a heartbeat task from your creed by its 0-based index.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2254,10 +2233,7 @@ fn heartbeat_remove() -> ToolDefinition {
 fn creed_view() -> ToolDefinition {
     ToolDefinition {
         name: "creed_view".into(),
-        description: "View your current creed — identity, personality traits, directives, \
-                      learned behaviors, relationships, heartbeat tasks, and stats. Use this \
-                      to understand who you are and what you're configured to do."
-            .into(),
+        description: "View your current creed: identity, traits, directives, behaviors, relationships, and stats.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {},
@@ -2270,10 +2246,7 @@ fn creed_view() -> ToolDefinition {
 fn skill_list() -> ToolDefinition {
     ToolDefinition {
         name: "skill_list".into(),
-        description: "List available skill packs that can be installed. Skill packs bundle MCP \
-                      server configurations with prompts and tools. Available packs: productivity \
-                      (calendar/email), developer (GitHub), research (web tools), files (filesystem)."
-            .into(),
+        description: "List available skill packs: productivity, developer, research, files.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {},
@@ -2286,11 +2259,7 @@ fn skill_list() -> ToolDefinition {
 fn skill_recommend() -> ToolDefinition {
     ToolDefinition {
         name: "skill_recommend".into(),
-        description: "Recommend a skill pack to the user based on what they need. Looks up the \
-                      pack details (what it provides, required setup, install command) and returns \
-                      a recommendation the user can act on. Use this when the user asks for \
-                      capabilities you don't currently have (e.g., calendar, email, GitHub)."
-            .into(),
+        description: "Recommend a skill pack and show the user the install command.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2312,7 +2281,9 @@ fn skill_recommend() -> ToolDefinition {
 fn sys_screenshot() -> ToolDefinition {
     ToolDefinition {
         name: "sys_screenshot".into(),
-        description: "Capture a screenshot of the full screen or a specific window. Returns a base64-encoded PNG image that the vision model can read. Use this to see what's currently on screen.".into(),
+        description:
+            "Capture a screenshot of the full screen or a specific window. Returns base64 PNG."
+                .into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2329,7 +2300,7 @@ fn sys_screenshot() -> ToolDefinition {
 fn ui_screenshot() -> ToolDefinition {
     ToolDefinition {
         name: "ui_screenshot".into(),
-        description: "Capture a screenshot of a specific UI region by element ID or bounds. More targeted than sys_screenshot for inspecting specific parts of the screen.".into(),
+        description: "Capture a screenshot of a specific UI region by element ID or bounds.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2357,7 +2328,7 @@ fn ui_screenshot() -> ToolDefinition {
 fn app_ocr() -> ToolDefinition {
     ToolDefinition {
         name: "app_ocr".into(),
-        description: "Extract text from an app window using OCR (optical character recognition). Returns plain text — cheaper than a screenshot + vision model for text-heavy content. Use this first for reading text, fall back to sys_screenshot for visual/spatial understanding.".into(),
+        description: "Extract text from an app window using OCR. Returns plain text.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2375,7 +2346,7 @@ fn app_ocr() -> ToolDefinition {
 fn ui_find_elements() -> ToolDefinition {
     ToolDefinition {
         name: "ui_find_elements".into(),
-        description: "Query the accessibility tree of an app to find UI elements (buttons, text fields, rows, etc.). Returns structured element IDs that can be used with ui_click, ui_type_text, and ui_read_attribute. Re-query if the app state changes, as element IDs are session-ephemeral.".into(),
+        description: "Query the accessibility tree of an app to find UI elements. Returns element IDs for ui_click/ui_type_text.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2405,7 +2376,7 @@ fn ui_find_elements() -> ToolDefinition {
 fn ui_click() -> ToolDefinition {
     ToolDefinition {
         name: "ui_click".into(),
-        description: "Click a UI element by its element ID (from ui_find_elements). Safe, validated accessibility click — not a raw coordinate click.".into(),
+        description: "Click a UI element by its element ID from ui_find_elements.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2423,7 +2394,7 @@ fn ui_click() -> ToolDefinition {
 fn ui_type_text() -> ToolDefinition {
     ToolDefinition {
         name: "ui_type_text".into(),
-        description: "Type text into a UI element by its element ID (from ui_find_elements). Sets the value of a text field or input element.".into(),
+        description: "Type text into a UI element by its element ID from ui_find_elements.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2445,7 +2416,7 @@ fn ui_type_text() -> ToolDefinition {
 fn ui_list_windows() -> ToolDefinition {
     ToolDefinition {
         name: "ui_list_windows".into(),
-        description: "List all visible windows with their titles and owning apps. Use this to discover what's on screen before taking a screenshot or interacting with specific apps.".into(),
+        description: "List all visible windows with their titles and owning apps.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {}
@@ -2457,7 +2428,9 @@ fn ui_list_windows() -> ToolDefinition {
 fn ui_read_attribute() -> ToolDefinition {
     ToolDefinition {
         name: "ui_read_attribute".into(),
-        description: "Read an accessibility attribute (value, enabled, focused, etc.) from a UI element. Useful for checking element state without a screenshot.".into(),
+        description:
+            "Read an accessibility attribute (value, enabled, focused, etc.) from a UI element."
+                .into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {

From f5a4e7fb316d024b2715e75203f12e3fec597271 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 26 Mar 2026 17:16:00 +0000
Subject: [PATCH 2/3] Fix review issues: restore behavioral hints, fix
 reflection metric, add tests

- Restore LLM routing hints in tool descriptions for app_ocr, ui_find_elements,
  skill_recommend, and memory_store that were over-compressed
- Fix conditional reflection to use messages.len() instead of guard.iterations()
  which only counts tool-use rounds, not conversation turns
- Extract magic max_tokens numbers into named constants
- Add tests for adaptive max_tokens (Ollama, non-Ollama, explicit override)
- Add tests for conditional reflection (simple bout skips, tool bout triggers)

https://claude.ai/code/session_0135FeAb3vUvn6Yw29JdiSna
---
 crates/punch-runtime/src/fighter_loop.rs      |  31 +-
 crates/punch-runtime/src/tools.rs             |   8 +-
 .../punch-runtime/tests/fighter_loop_tests.rs | 284 ++++++++++++++++++
 3 files changed, 312 insertions(+), 11 deletions(-)

diff --git a/crates/punch-runtime/src/fighter_loop.rs b/crates/punch-runtime/src/fighter_loop.rs
index 8e57efa..f48621b 100644
--- a/crates/punch-runtime/src/fighter_loop.rs
+++ b/crates/punch-runtime/src/fighter_loop.rs
@@ -45,6 +45,18 @@ const MAX_CONTINUATION_LOOPS: usize = 5;
 /// Default per-tool timeout in seconds.
 const DEFAULT_TOOL_TIMEOUT_SECS: u64 = 120;
 
+/// Default max output tokens for the cheap model tier.
+const DEFAULT_MAX_TOKENS_CHEAP: u32 = 1024;
+/// Default max output tokens for the mid model tier.
+const DEFAULT_MAX_TOKENS_MID: u32 = 2048;
+/// Default max output tokens for the expensive tier (or when no routing is configured).
+const DEFAULT_MAX_TOKENS_EXPENSIVE: u32 = 4096;
+/// Default max output tokens for Ollama models (reasoning models need extra headroom).
+const DEFAULT_MAX_TOKENS_OLLAMA: u32 = 16384;
+/// Minimum message count (including history) for a bout to be considered substantive
+/// enough to warrant a post-bout reflection LLM call.
+const REFLECTION_MIN_MESSAGES: usize = 6;
+
 /// Parameters for the fighter loop.
 pub struct FighterLoopParams {
     /// The fighter's manifest (identity, model config, system prompt, capabilities).
@@ -316,14 +328,14 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
                 // Cheap tier gets less headroom since greetings/simple answers
                 // don't need 4K output tokens. Expensive tier gets full budget.
                 let tier_default = match routed_tier.as_deref() {
-                    Some("cheap") => 1024,
-                    Some("mid") => 2048,
-                    _ => 4096, // expensive or no routing
+                    Some("cheap") => DEFAULT_MAX_TOKENS_CHEAP,
+                    Some("mid") => DEFAULT_MAX_TOKENS_MID,
+                    _ => DEFAULT_MAX_TOKENS_EXPENSIVE, // expensive or no routing
                 };
                 // Reasoning models (Qwen, DeepSeek) use thinking tokens internally,
                 // so they need a much higher default to leave room for visible output.
                 match params.manifest.model.provider {
-                    punch_types::Provider::Ollama => 16384,
+                    punch_types::Provider::Ollama => DEFAULT_MAX_TOKENS_OLLAMA,
                     _ => tier_default,
                 }
             }),
@@ -451,9 +463,14 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
                 }
 
                 // Conditional reflection: only reflect on substantive bouts.
-                // Skip reflection for simple exchanges (< 3 turns and no tool use)
+                // Skip reflection for simple exchanges (few messages and no tool use)
                 // to avoid wasting an LLM call on "hello" / "how are you?" bouts.
-                let is_substantive_bout = guard.iterations() >= 3 || tool_calls_made > 0;
+                // We use messages.len() (which counts all messages in the bout including
+                // history) rather than guard.iterations() because iterations only tracks
+                // tool-use rounds and retries — a substantive single-turn text exchange
+                // would have 0 iterations but still deserve reflection.
+                let is_substantive_bout =
+                    messages.len() >= REFLECTION_MIN_MESSAGES || tool_calls_made > 0;
                 if is_substantive_bout {
                     let driver = Arc::clone(&params.driver);
                     let memory = Arc::clone(&params.memory);
@@ -466,7 +483,7 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
                     });
                 } else {
                     debug!(
-                        iterations = guard.iterations(),
+                        message_count = messages.len(),
                         tool_calls = tool_calls_made,
                         "skipping post-bout reflection (simple exchange)"
                     );
diff --git a/crates/punch-runtime/src/tools.rs b/crates/punch-runtime/src/tools.rs
index 23e2879..e9bfe84 100644
--- a/crates/punch-runtime/src/tools.rs
+++ b/crates/punch-runtime/src/tools.rs
@@ -914,7 +914,7 @@ fn web_search() -> ToolDefinition {
 fn memory_store() -> ToolDefinition {
     ToolDefinition {
         name: "memory_store".into(),
-        description: "Store a key-value pair in persistent memory across conversations.".into(),
+        description: "Store a key-value pair in persistent memory. Use to remember facts, preferences, or context across conversations.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2259,7 +2259,7 @@ fn skill_list() -> ToolDefinition {
 fn skill_recommend() -> ToolDefinition {
     ToolDefinition {
         name: "skill_recommend".into(),
-        description: "Recommend a skill pack and show the user the install command.".into(),
+        description: "Recommend a skill pack when the user needs capabilities you don't have (calendar, email, GitHub). Shows install command.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2328,7 +2328,7 @@ fn ui_screenshot() -> ToolDefinition {
 fn app_ocr() -> ToolDefinition {
     ToolDefinition {
         name: "app_ocr".into(),
-        description: "Extract text from an app window using OCR. Returns plain text.".into(),
+        description: "Extract text from an app window using OCR. Returns plain text. Prefer over sys_screenshot for text extraction.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
@@ -2346,7 +2346,7 @@ fn app_ocr() -> ToolDefinition {
 fn ui_find_elements() -> ToolDefinition {
     ToolDefinition {
         name: "ui_find_elements".into(),
-        description: "Query the accessibility tree of an app to find UI elements. Returns element IDs for ui_click/ui_type_text.".into(),
+        description: "Query the accessibility tree of an app to find UI elements. Returns element IDs for ui_click/ui_type_text. Re-query after state changes — IDs are ephemeral.".into(),
         input_schema: serde_json::json!({
             "type": "object",
             "properties": {
diff --git a/crates/punch-runtime/tests/fighter_loop_tests.rs b/crates/punch-runtime/tests/fighter_loop_tests.rs
index 8eabbcb..21fcb44 100644
--- a/crates/punch-runtime/tests/fighter_loop_tests.rs
+++ b/crates/punch-runtime/tests/fighter_loop_tests.rs
@@ -685,3 +685,287 @@ async fn test_llm_completion_error_propagates() {
         err_str
     );
 }
+
+// ---------------------------------------------------------------------------
+// Adaptive max_tokens tests
+// ---------------------------------------------------------------------------
+
+/// A mock driver that captures the max_tokens from the CompletionRequest.
+struct CapturingMockDriver {
+    captured_max_tokens: Mutex<Vec<u32>>,
+}
+
+impl CapturingMockDriver {
+    fn new() -> Self {
+        Self {
+            captured_max_tokens: Mutex::new(Vec::new()),
+        }
+    }
+}
+
+#[async_trait]
+impl LlmDriver for CapturingMockDriver {
+    async fn complete(&self, request: CompletionRequest) -> PunchResult<CompletionResponse> {
+        self.captured_max_tokens
+            .lock()
+            .unwrap()
+            .push(request.max_tokens);
+
+        Ok(CompletionResponse {
+            message: Message {
+                role: Role::Assistant,
+                content: "OK".to_string(),
+                content_parts: Vec::new(),
+                tool_calls: Vec::new(),
+                tool_results: Vec::new(),
+                timestamp: chrono::Utc::now(),
+            },
+            usage: TokenUsage {
+                input_tokens: 100,
+                output_tokens: 50,
+            },
+            stop_reason: StopReason::EndTurn,
+        })
+    }
+}
+
+/// Adaptive max_tokens: when no routing is configured and provider is Ollama,
+/// max_tokens defaults to 16384 (the Ollama override).
+#[tokio::test]
+async fn test_adaptive_max_tokens_ollama_default() {
+    let driver = Arc::new(CapturingMockDriver::new());
+    let memory = test_memory();
+
+    let mut manifest = test_manifest();
+    manifest.model.provider = Provider::Ollama;
+    manifest.model.max_tokens = None; // No explicit max_tokens
+
+    let params = FighterLoopParams {
+        manifest,
+        user_message: "Hello".to_string(),
+        bout_id: punch_memory::BoutId::new(),
+        fighter_id: FighterId::new(),
+        memory,
+        driver: driver.clone(),
+        available_tools: Vec::new(),
+        mcp_tools: Vec::new(),
+        max_iterations: Some(10),
+        context_window: Some(200_000),
+        tool_timeout_secs: Some(30),
+        coordinator: None,
+        approval_engine: None,
+        sandbox: None,
+        mcp_clients: None,
+        model_routing: None,
+        channel_notifier: None,
+        user_content_parts: vec![],
+    };
+
+    let _result = run_fighter_loop(params).await.expect("loop should succeed");
+
+    let captured = driver.captured_max_tokens.lock().unwrap();
+    assert_eq!(captured[0], 16384, "Ollama should default to 16384");
+}
+
+/// Adaptive max_tokens: when no routing is configured and provider is not Ollama,
+/// max_tokens defaults to 4096 (the expensive tier default).
+#[tokio::test]
+async fn test_adaptive_max_tokens_no_routing_non_ollama() {
+    let driver = Arc::new(CapturingMockDriver::new());
+    let memory = test_memory();
+
+    let mut manifest = test_manifest();
+    manifest.model.provider = Provider::Anthropic;
+    manifest.model.max_tokens = None; // No explicit max_tokens
+
+    let params = FighterLoopParams {
+        manifest,
+        user_message: "Hello".to_string(),
+        bout_id: punch_memory::BoutId::new(),
+        fighter_id: FighterId::new(),
+        memory,
+        driver: driver.clone(),
+        available_tools: Vec::new(),
+        mcp_tools: Vec::new(),
+        max_iterations: Some(10),
+        context_window: Some(200_000),
+        tool_timeout_secs: Some(30),
+        coordinator: None,
+        approval_engine: None,
+        sandbox: None,
+        mcp_clients: None,
+        model_routing: None,
+        channel_notifier: None,
+        user_content_parts: vec![],
+    };
+
+    let _result = run_fighter_loop(params).await.expect("loop should succeed");
+
+    let captured = driver.captured_max_tokens.lock().unwrap();
+    assert_eq!(
+        captured[0], 4096,
+        "non-Ollama without routing should default to 4096"
+    );
+}
+
+/// Adaptive max_tokens: explicit max_tokens in config overrides tier defaults.
+#[tokio::test]
+async fn test_adaptive_max_tokens_explicit_override() {
+    let driver = Arc::new(CapturingMockDriver::new());
+    let memory = test_memory();
+
+    let mut manifest = test_manifest();
+    manifest.model.provider = Provider::Anthropic;
+    manifest.model.max_tokens = Some(8192); // Explicit override
+
+    let params = FighterLoopParams {
+        manifest,
+        user_message: "Hello".to_string(),
+        bout_id: punch_memory::BoutId::new(),
+        fighter_id: FighterId::new(),
+        memory,
+        driver: driver.clone(),
+        available_tools: Vec::new(),
+        mcp_tools: Vec::new(),
+        max_iterations: Some(10),
+        context_window: Some(200_000),
+        tool_timeout_secs: Some(30),
+        coordinator: None,
+        approval_engine: None,
+        sandbox: None,
+        mcp_clients: None,
+        model_routing: None,
+        channel_notifier: None,
+        user_content_parts: vec![],
+    };
+
+    let _result = run_fighter_loop(params).await.expect("loop should succeed");
+
+    let captured = driver.captured_max_tokens.lock().unwrap();
+    assert_eq!(
+        captured[0], 8192,
+        "explicit max_tokens should override tier defaults"
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Conditional reflection tests
+// ---------------------------------------------------------------------------
+
+/// Conditional reflection: a simple single-turn "hello" bout should skip
+/// reflection (iterations=0, tool_calls=0, few messages).
+#[tokio::test]
+async fn test_simple_bout_skips_reflection() {
+    let driver = Arc::new(ScriptedMockDriver::text_only("Hi there!"));
+    let memory = test_memory();
+    let fighter_name = "reflection-skip-fighter";
+
+    // Create a creed so reflection would have something to update
+    let creed = punch_types::Creed::new(fighter_name);
+    memory.save_creed(&creed).await.expect("save creed");
+
+    let mut manifest = test_manifest();
+    manifest.name = fighter_name.to_string();
+
+    let params = FighterLoopParams {
+        manifest,
+        user_message: "hello".to_string(),
+        bout_id: punch_memory::BoutId::new(),
+        fighter_id: FighterId::new(),
+        memory: memory.clone(),
+        driver: driver.clone(),
+        available_tools: Vec::new(),
+        mcp_tools: Vec::new(),
+        max_iterations: Some(10),
+        context_window: Some(200_000),
+        tool_timeout_secs: Some(30),
+        coordinator: None,
+        approval_engine: None,
+        sandbox: None,
+        mcp_clients: None,
+        model_routing: None,
+        channel_notifier: None,
+        user_content_parts: vec![],
+    };
+
+    let result = run_fighter_loop(params).await.expect("loop should succeed");
+
+    assert_eq!(result.response, "Hi there!");
+    assert_eq!(result.tool_calls_made, 0);
+
+    // The driver should only have been called once (the main completion).
+    // If reflection were triggered, the driver would be called a second time.
+    assert_eq!(
+        driver.call_count.load(Ordering::SeqCst),
+        1,
+        "simple bout should make only 1 LLM call (no reflection)"
+    );
+}
+
+/// Conditional reflection: a bout with tool use should trigger reflection.
+#[tokio::test]
+async fn test_tool_use_bout_triggers_reflection() {
+    let tool_call = ToolCall {
+        id: "tc_reflect".to_string(),
+        name: "memory_store".to_string(),
+        input: serde_json::json!({"key": "k", "value": "v"}),
+    };
+
+    let driver = Arc::new(ScriptedMockDriver::new(vec![
+        ScriptedResponse {
+            content: "Storing.".to_string(),
+            tool_calls: vec![tool_call],
+            stop_reason: StopReason::ToolUse,
+        },
+        ScriptedResponse {
+            content: "Done storing.".to_string(),
+            tool_calls: Vec::new(),
+            stop_reason: StopReason::EndTurn,
+        },
+    ]));
+
+    let memory = test_memory();
+    let fighter_name = "reflection-trigger-fighter";
+
+    let creed = punch_types::Creed::new(fighter_name);
+    memory.save_creed(&creed).await.expect("save creed");
+
+    let mut manifest = test_manifest();
+    manifest.name = fighter_name.to_string();
+
+    let mut params = FighterLoopParams {
+        manifest,
+        user_message: "Store something for me".to_string(),
+        bout_id: punch_memory::BoutId::new(),
+        fighter_id: FighterId::new(),
+        memory: memory.clone(),
+        driver: driver.clone(),
+        available_tools: Vec::new(),
+        mcp_tools: Vec::new(),
+        max_iterations: Some(10),
+        context_window: Some(200_000),
+        tool_timeout_secs: Some(30),
+        coordinator: None,
+        approval_engine: None,
+        sandbox: None,
+        mcp_clients: None,
+        model_routing: None,
+        channel_notifier: None,
+        user_content_parts: vec![],
+    };
+    params.available_tools = vec![memory_store_tool_def()];
+
+    let result = run_fighter_loop(params).await.expect("loop should succeed");
+
+    assert_eq!(result.tool_calls_made, 1);
+
+    // Give the async reflection task a moment to fire
+    tokio::time::sleep(std::time::Duration::from_millis(300)).await;
+
+    // Driver should have been called 3 times: 2 for the bout + 1 for reflection
+    assert_eq!(
+        driver.call_count.load(Ordering::SeqCst),
+        3,
+        "tool-use bout should trigger reflection (3 LLM calls total)"
+    );
+}

From 7e18b64a5d36f4a9aeab0440c009b692219088c5 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 26 Mar 2026 17:21:51 +0000
Subject: [PATCH 3/3] Fix P1: use routed provider for Ollama max_tokens
 override

When model routing selects a tier that uses Ollama as its provider,
the adaptive max_tokens logic now correctly checks the routed provider
instead of the base manifest provider, preventing reasoning models
from being starved of output tokens.

https://claude.ai/code/session_0135FeAb3vUvn6Yw29JdiSna
---
 crates/punch-runtime/src/fighter_loop.rs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/crates/punch-runtime/src/fighter_loop.rs b/crates/punch-runtime/src/fighter_loop.rs
index f48621b..ccb926a 100644
--- a/crates/punch-runtime/src/fighter_loop.rs
+++ b/crates/punch-runtime/src/fighter_loop.rs
@@ -180,6 +180,7 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
 
     // 2b. Model routing: check if we should use a tier-specific driver.
     let mut routed_tier: Option<String> = None;
+    let mut routed_provider: Option<punch_types::Provider> = None;
     let routed_driver: Option<Arc<dyn LlmDriver>> = params
         .model_routing
         .as_ref()
@@ -196,6 +197,7 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
                         "model router: using tier-specific driver"
                     );
                     routed_tier = Some(tier.to_string());
+                    routed_provider = Some(model_config.provider);
                     Some(driver)
                 }
                 Err(e) => {
@@ -334,7 +336,12 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
                 };
                 // Reasoning models (Qwen, DeepSeek) use thinking tokens internally,
                 // so they need a much higher default to leave room for visible output.
-                match params.manifest.model.provider {
+                // Use the routed provider if routing selected a tier, otherwise fall
+                // back to the base manifest provider.
+                let active_provider = routed_provider
+                    .clone()
+                    .unwrap_or_else(|| params.manifest.model.provider.clone());
+                match active_provider {
                     punch_types::Provider::Ollama => DEFAULT_MAX_TOKENS_OLLAMA,
                     _ => tier_default,
                 }