From 156aef31efdf8a553950e79bdddc9da33279baeb Mon Sep 17 00:00:00 2001
From: Archith <archith.rapaka@atomtickets.com>
Date: Thu, 26 Mar 2026 00:09:17 -0700
Subject: [PATCH 1/4] Add dynamic tool selection: context-aware per-turn tool
 loading
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of sending all 82 tools on every LLM call (~14K tokens overhead),
the ToolSelector now loads a core set (~18 tools) and activates contextual
tool groups when keywords appear in the conversation.

Architecture:
- ToolGroup enum: 17 contextual groups (git, docker, json, browser, etc.)
  each with keyword triggers and capability gates
- ToolSelector: stateful per-bout selector with monotonic growth — once a
  group activates it stays active to maximize prompt cache hits
- Core tools always loaded: file_read, file_write, shell_exec, web_fetch,
  web_search, memory, screenshot, channel_notify, self-config
- shell_exec is always in core as the universal fallback — the model never
  loses capability, it just gets a leaner tool menu

Token savings:
- Core only (typical): ~18 tools, ~3K tokens (was 82 tools, ~14K tokens)
- Core + 1 group: ~23 tools, ~3.8K tokens
- Core + 2 groups: ~28 tools, ~4.7K tokens
- 66-80% reduction in tool token overhead per request

The tool executor still runs ANY tool regardless of what was sent to the
LLM — capabilities are checked at execution time, not selection time.

Includes 10 unit tests covering: core presence, keyword activation,
capability gating, monotonic growth, change detection, case insensitivity,
scan window limits, no duplicates, and count bounds.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/punch-kernel/src/background.rs         |   1 +
 .../punch-kernel/src/heartbeat_scheduler.rs   |   1 +
 crates/punch-kernel/src/ring.rs               |  18 +-
 crates/punch-kernel/src/workflow.rs           |   1 +
 crates/punch-runtime/src/fighter_loop.rs      |  34 +-
 crates/punch-runtime/src/lib.rs               |   2 +-
 crates/punch-runtime/src/tools.rs             | 659 +++++++++++++++++-
 .../punch-runtime/tests/fighter_loop_tests.rs |   4 +
 8 files changed, 706 insertions(+), 14 deletions(-)

diff --git a/crates/punch-kernel/src/background.rs b/crates/punch-kernel/src/background.rs
index 5350204..58bafc1 100644
--- a/crates/punch-kernel/src/background.rs
+++ b/crates/punch-kernel/src/background.rs
@@ -115,6 +115,7 @@ pub async fn run_gorilla_tick(
         memory: Arc::clone(memory),
         driver: Arc::clone(driver),
         available_tools,
+        mcp_tools: Vec::new(),
         max_iterations: Some(10),
         context_window: None,
         tool_timeout_secs: None,
diff --git a/crates/punch-kernel/src/heartbeat_scheduler.rs b/crates/punch-kernel/src/heartbeat_scheduler.rs
index f8dc17f..aa3d481 100644
--- a/crates/punch-kernel/src/heartbeat_scheduler.rs
+++ b/crates/punch-kernel/src/heartbeat_scheduler.rs
@@ -273,6 +273,7 @@ async fn heartbeat_loop(mut cfg: HeartbeatLoopConfig) {
                 memory: Arc::clone(memory),
                 driver: Arc::clone(driver),
                 available_tools: tools_for_capabilities(&manifest.capabilities),
+                mcp_tools: Vec::new(),
                 max_iterations: Some(10),
                 context_window: None,
                 tool_timeout_secs: Some(60),
diff --git a/crates/punch-kernel/src/ring.rs b/crates/punch-kernel/src/ring.rs
index 5f41937..17e6446 100644
--- a/crates/punch-kernel/src/ring.rs
+++ b/crates/punch-kernel/src/ring.rs
@@ -18,10 +18,7 @@ use tokio::task::JoinHandle;
 use tracing::{info, instrument, warn};
 
 use punch_memory::{BoutId, MemorySubstrate};
-use punch_runtime::{
-    FighterLoopParams, FighterLoopResult, LlmDriver, McpClient, run_fighter_loop,
-    tools_for_capabilities,
-};
+use punch_runtime::{FighterLoopParams, FighterLoopResult, LlmDriver, McpClient, run_fighter_loop};
 use punch_types::{
     AgentCoordinator, AgentInfo, AgentMessageResult, CoordinationStrategy, FighterId,
     FighterManifest, FighterStatus, GorillaId, GorillaManifest, GorillaMetrics, GorillaStatus,
@@ -763,9 +760,9 @@ impl Ring {
         let fighter_name = manifest.name.clone();
         drop(entry); // Release the DashMap guard before any async calls.
 
-        let mut available_tools = tools_for_capabilities(&manifest.capabilities);
-
-        // Merge MCP tools if the fighter has McpAccess capability.
+        // Collect MCP tools separately (the fighter loop handles dynamic
+        // built-in tool selection via ToolSelector when available_tools is empty).
+        let mut mcp_tools = Vec::new();
         let has_mcp_access = manifest
             .capabilities
             .iter()
@@ -782,7 +779,7 @@ impl Ring {
                 });
                 if can_access {
                     match mcp_entry.value().list_tools().await {
-                        Ok(tools) => available_tools.extend(tools),
+                        Ok(tools) => mcp_tools.extend(tools),
                         Err(e) => {
                             warn!(
                                 server = %server_name,
@@ -804,6 +801,8 @@ impl Ring {
         });
 
         // Run the fighter loop.
+        // available_tools is empty — the fighter loop uses ToolSelector for
+        // context-aware dynamic tool loading per turn.
         let params = FighterLoopParams {
             manifest: manifest.clone(),
             user_message: message,
@@ -811,7 +810,8 @@ impl Ring {
             fighter_id: *fighter_id,
             memory: Arc::clone(&self.memory),
             driver: Arc::clone(&self.driver),
-            available_tools,
+            available_tools: Vec::new(),
+            mcp_tools,
             max_iterations: None,
             context_window: None,
             tool_timeout_secs: None,
diff --git a/crates/punch-kernel/src/workflow.rs b/crates/punch-kernel/src/workflow.rs
index fd420d7..991d466 100644
--- a/crates/punch-kernel/src/workflow.rs
+++ b/crates/punch-kernel/src/workflow.rs
@@ -1391,6 +1391,7 @@ impl WorkflowEngine {
             memory: Arc::clone(memory),
             driver: Arc::clone(driver),
             available_tools,
+            mcp_tools: Vec::new(),
             max_iterations: Some(20),
             context_window: None,
             tool_timeout_secs: Some(timeout_secs),
diff --git a/crates/punch-runtime/src/fighter_loop.rs b/crates/punch-runtime/src/fighter_loop.rs
index 68c2aa5..c87c4e3 100644
--- a/crates/punch-runtime/src/fighter_loop.rs
+++ b/crates/punch-runtime/src/fighter_loop.rs
@@ -60,7 +60,11 @@ pub struct FighterLoopParams {
     /// The LLM driver to use for completions.
     pub driver: Arc<dyn LlmDriver>,
     /// Tools available for this fighter to use.
+    /// When provided, bypasses dynamic tool selection (used by workflows, gorillas, tests).
+    /// When empty, the fighter loop uses `ToolSelector` for context-aware tool loading.
     pub available_tools: Vec<ToolDefinition>,
+    /// Pre-fetched MCP tools for this fighter (merged into tool list each turn).
+    pub mcp_tools: Vec<ToolDefinition>,
     /// Maximum loop iterations before forced termination (default: 50).
     pub max_iterations: Option<usize>,
     /// Context window size in tokens (default: 200K).
@@ -243,8 +247,22 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
         }
     }
 
+    // --- Dynamic tool selection ---
+    // If available_tools is pre-populated (workflows, gorillas, tests), use it as-is.
+    // Otherwise, use ToolSelector for context-aware per-turn tool loading.
+    let use_dynamic_tools = params.available_tools.is_empty();
+    let mut tool_selector = if use_dynamic_tools {
+        Some(crate::tools::ToolSelector::new(
+            &params.manifest.capabilities,
+        ))
+    } else {
+        None
+    };
+
     info!(
-        tool_count = params.available_tools.len(),
+        dynamic_tools = use_dynamic_tools,
+        static_tool_count = params.available_tools.len(),
+        mcp_tool_count = params.mcp_tools.len(),
         fighter = %params.manifest.name,
         model = %params.manifest.model.model,
         "fighter loop starting"
@@ -252,8 +270,18 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
 
     // 4. Main loop.
     loop {
+        // --- Dynamic tool selection: pick tools for this turn ---
+        let turn_tools = if let Some(ref mut selector) = tool_selector {
+            let (mut selected, _changed) = selector.select_tools(&messages);
+            // Merge MCP tools (already capability-filtered in ring.rs).
+            selected.extend(params.mcp_tools.iter().cloned());
+            selected
+        } else {
+            params.available_tools.clone()
+        };
+
         // --- Context Budget: check and trim before LLM call ---
-        if let Some(trim_action) = budget.check_trim_needed(&messages, &params.available_tools) {
+        if let Some(trim_action) = budget.check_trim_needed(&messages, &turn_tools) {
             budget.apply_trim(&mut messages, trim_action);
 
             // Re-run session repair after trimming (may create orphans).
@@ -270,7 +298,7 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
         let request = CompletionRequest {
             model: params.manifest.model.model.clone(),
             messages: messages.clone(),
-            tools: params.available_tools.clone(),
+            tools: turn_tools,
             max_tokens: params.manifest.model.max_tokens.unwrap_or(
                 // Reasoning models (Qwen, DeepSeek) use thinking tokens internally,
                 // so they need a much higher default to leave room for visible output.
diff --git a/crates/punch-runtime/src/lib.rs b/crates/punch-runtime/src/lib.rs
index 359621f..22364f7 100644
--- a/crates/punch-runtime/src/lib.rs
+++ b/crates/punch-runtime/src/lib.rs
@@ -41,4 +41,4 @@ pub use mcp::McpClient;
 pub use model_router::{ModelRouter, ModelTier};
 pub use session_repair::{RepairStats, repair_session};
 pub use tool_executor::{ToolExecutionContext, execute_tool};
-pub use tools::{all_tools, tools_for_capabilities};
+pub use tools::{ToolGroup, ToolSelector, all_tools, tools_for_capabilities};
diff --git a/crates/punch-runtime/src/tools.rs b/crates/punch-runtime/src/tools.rs
index 2ba472e..0fce997 100644
--- a/crates/punch-runtime/src/tools.rs
+++ b/crates/punch-runtime/src/tools.rs
@@ -2,8 +2,21 @@
 //!
 //! This module defines the tool schemas that get sent to the LLM so it knows
 //! what tools are available. The actual execution logic lives in `tool_executor`.
+//!
+//! ## Dynamic Tool Selection
+//!
+//! The [`ToolSelector`] provides context-aware tool loading. Instead of sending
+//! all 80+ tools on every LLM call, it loads a core set (~16 tools) and
+//! activates additional tool groups when conversation context mentions relevant
+//! keywords. Once a group is activated in a bout, it stays active (monotonic
+//! growth) to maximize prompt cache hits.
+
+use std::collections::BTreeSet;
+use std::collections::hash_map::DefaultHasher;
+use std::hash::{Hash, Hasher};
 
-use punch_types::{Capability, ToolCategory, ToolDefinition};
+use punch_types::{Capability, Message, ToolCategory, ToolDefinition};
+use tracing::info;
 
 /// Return all built-in tool definitions that match the given capabilities.
 ///
@@ -243,6 +256,442 @@ fn push_unique(tools: &mut Vec<ToolDefinition>, tool: ToolDefinition) {
     }
 }
 
+// ---------------------------------------------------------------------------
+// Dynamic tool selection
+// ---------------------------------------------------------------------------
+
+/// Groups of contextual tools that activate when conversation context is relevant.
+///
+/// Each group maps to a set of related tools and a list of activation keywords.
+/// Groups are ordered deterministically (via BTreeSet) to keep the tool list
+/// stable across turns for prompt cache efficiency.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub enum ToolGroup {
+    SourceControl,
+    Container,
+    DataManipulation,
+    Schedule,
+    BrowserControl,
+    AgentCoordination,
+    Archive,
+    Template,
+    Crypto,
+    UiAutomation,
+    AppIntegration,
+    PluginInvoke,
+    A2ADelegate,
+    KnowledgeGraph,
+    CodeAnalysis,
+    ProcessManagement,
+    HttpAdvanced,
+}
+
+impl ToolGroup {
+    /// All tool groups.
+    const ALL: &[ToolGroup] = &[
+        ToolGroup::SourceControl,
+        ToolGroup::Container,
+        ToolGroup::DataManipulation,
+        ToolGroup::Schedule,
+        ToolGroup::BrowserControl,
+        ToolGroup::AgentCoordination,
+        ToolGroup::Archive,
+        ToolGroup::Template,
+        ToolGroup::Crypto,
+        ToolGroup::UiAutomation,
+        ToolGroup::AppIntegration,
+        ToolGroup::PluginInvoke,
+        ToolGroup::A2ADelegate,
+        ToolGroup::KnowledgeGraph,
+        ToolGroup::CodeAnalysis,
+        ToolGroup::ProcessManagement,
+        ToolGroup::HttpAdvanced,
+    ];
+
+    /// Keywords that trigger this tool group. Matched case-insensitively
+    /// against conversation messages.
+    fn keywords(&self) -> &'static [&'static str] {
+        match self {
+            ToolGroup::SourceControl => &[
+                "git",
+                "commit",
+                "branch",
+                "diff",
+                "merge",
+                "rebase",
+                "push",
+                "pull",
+                "clone",
+                "stash",
+                "checkout",
+                "repo",
+                "repository",
+            ],
+            ToolGroup::Container => &[
+                "docker",
+                "container",
+                "image",
+                "dockerfile",
+                "compose",
+                "pod",
+                "kubernetes",
+                "k8s",
+                "helm",
+            ],
+            ToolGroup::DataManipulation => &[
+                "json",
+                "yaml",
+                "toml",
+                "regex",
+                "parse",
+                "transform",
+                "csv",
+                "xml",
+                "jq",
+            ],
+            ToolGroup::Schedule => &[
+                "schedule",
+                "cron",
+                "timer",
+                "periodic",
+                "interval",
+                "recurring",
+                "every hour",
+                "every day",
+            ],
+            ToolGroup::BrowserControl => &[
+                "browser",
+                "webpage",
+                "click",
+                "navigate",
+                "selenium",
+                "puppeteer",
+                "scrape",
+                "website",
+            ],
+            ToolGroup::AgentCoordination => &[
+                "agent",
+                "spawn",
+                "fighter",
+                "worker",
+                "troop",
+                "multi-agent",
+            ],
+            ToolGroup::Archive => &[
+                "archive",
+                "zip",
+                "tar",
+                "gzip",
+                "extract",
+                "compress",
+                "unzip",
+                "decompress",
+            ],
+            ToolGroup::Template => &["template", "render", "handlebars", "mustache", "jinja"],
+            ToolGroup::Crypto => &[
+                "hash", "crypto", "sha", "md5", "hmac", "checksum", "digest", "encrypt",
+            ],
+            ToolGroup::UiAutomation => &[
+                "ui automation",
+                "accessibility",
+                "ui element",
+                "click button",
+                "menu bar",
+                "window list",
+                "applescript",
+                "osascript",
+            ],
+            ToolGroup::AppIntegration => &[
+                "ocr",
+                "text recognition",
+                "screen read",
+                "extract text from image",
+            ],
+            ToolGroup::PluginInvoke => &["plugin", "wasm", "extension", "webassembly"],
+            ToolGroup::A2ADelegate => &["a2a", "remote agent", "delegate task", "external agent"],
+            ToolGroup::KnowledgeGraph => &[
+                "knowledge",
+                "entity",
+                "relation",
+                "graph",
+                "ontology",
+                "triple",
+            ],
+            ToolGroup::CodeAnalysis => &[
+                "symbols",
+                "code search",
+                "definition",
+                "references",
+                "ast",
+                "code analysis",
+            ],
+            ToolGroup::ProcessManagement => &[
+                "process",
+                "pid",
+                "kill process",
+                "signal",
+                "background process",
+            ],
+            ToolGroup::HttpAdvanced => &[
+                "http post",
+                "api call",
+                "rest api",
+                "endpoint",
+                "webhook",
+                "curl",
+                "http request",
+            ],
+        }
+    }
+
+    /// Check whether the fighter's capabilities include the one required for this group.
+    fn is_permitted(&self, capabilities: &[Capability]) -> bool {
+        capabilities.iter().any(|c| match self {
+            ToolGroup::SourceControl => matches!(c, Capability::SourceControl),
+            ToolGroup::Container => matches!(c, Capability::Container),
+            ToolGroup::DataManipulation => matches!(c, Capability::DataManipulation),
+            ToolGroup::Schedule => matches!(c, Capability::Schedule),
+            ToolGroup::BrowserControl => matches!(c, Capability::BrowserControl),
+            ToolGroup::AgentCoordination => {
+                matches!(c, Capability::AgentSpawn | Capability::AgentMessage)
+            }
+            ToolGroup::Archive => matches!(c, Capability::Archive),
+            ToolGroup::Template => matches!(c, Capability::Template),
+            ToolGroup::Crypto => matches!(c, Capability::Crypto),
+            ToolGroup::UiAutomation => matches!(c, Capability::UiAutomation(_)),
+            ToolGroup::AppIntegration => matches!(c, Capability::AppIntegration(_)),
+            ToolGroup::PluginInvoke => matches!(c, Capability::PluginInvoke),
+            ToolGroup::A2ADelegate => matches!(c, Capability::A2ADelegate),
+            ToolGroup::KnowledgeGraph => matches!(c, Capability::KnowledgeGraph),
+            ToolGroup::CodeAnalysis => matches!(c, Capability::CodeAnalysis),
+            ToolGroup::ProcessManagement => matches!(c, Capability::ShellExec(_)),
+            ToolGroup::HttpAdvanced => matches!(c, Capability::Network(_)),
+        })
+    }
+
+    /// Return the tool definitions for this group.
+    fn tools(&self) -> Vec<ToolDefinition> {
+        match self {
+            ToolGroup::SourceControl => {
+                vec![
+                    git_status(),
+                    git_diff(),
+                    git_log(),
+                    git_commit(),
+                    git_branch(),
+                ]
+            }
+            ToolGroup::Container => {
+                vec![docker_ps(), docker_run(), docker_build(), docker_logs()]
+            }
+            ToolGroup::DataManipulation => vec![
+                json_query(),
+                json_transform(),
+                yaml_parse(),
+                regex_match(),
+                regex_replace(),
+                text_diff(),
+                text_count(),
+            ],
+            ToolGroup::Schedule => {
+                vec![schedule_task(), schedule_list(), schedule_cancel()]
+            }
+            ToolGroup::BrowserControl => vec![
+                browser_navigate(),
+                browser_screenshot(),
+                browser_click(),
+                browser_type(),
+                browser_content(),
+            ],
+            ToolGroup::AgentCoordination => {
+                vec![agent_spawn(), agent_message(), agent_list()]
+            }
+            ToolGroup::Archive => {
+                vec![archive_create(), archive_extract(), archive_list()]
+            }
+            ToolGroup::Template => vec![template_render()],
+            ToolGroup::Crypto => vec![hash_compute(), hash_verify()],
+            ToolGroup::UiAutomation => vec![
+                ui_screenshot(),
+                ui_find_elements(),
+                ui_click(),
+                ui_type_text(),
+                ui_list_windows(),
+                ui_read_attribute(),
+            ],
+            ToolGroup::AppIntegration => vec![app_ocr()],
+            ToolGroup::PluginInvoke => vec![wasm_invoke()],
+            ToolGroup::A2ADelegate => vec![a2a_delegate()],
+            ToolGroup::KnowledgeGraph => vec![
+                knowledge_add_entity(),
+                knowledge_add_relation(),
+                knowledge_query(),
+            ],
+            ToolGroup::CodeAnalysis => vec![code_search(), code_symbols()],
+            ToolGroup::ProcessManagement => vec![process_list(), process_kill()],
+            ToolGroup::HttpAdvanced => vec![http_request(), http_post()],
+        }
+    }
+}
+
+/// Dynamic tool selector that loads tools based on conversation context.
+///
+/// Core tools (~18) are always available. Contextual tool groups activate
+/// when relevant keywords appear in recent messages and stay active for the
+/// remainder of the bout (monotonic growth) to maximize prompt cache hits.
+///
+/// The tool executor can still run ANY tool regardless of what the LLM sees —
+/// capabilities are checked at execution time, not selection time. The model
+/// never loses capability: `shell_exec` is always in the core set and can do
+/// anything the contextual tools can via command-line equivalents.
+pub struct ToolSelector {
+    /// Fighter's granted capabilities.
+    capabilities: Vec<Capability>,
+    /// Tool groups activated so far in this bout (only grows, never shrinks).
+    active_groups: BTreeSet<ToolGroup>,
+    /// Hash of the last tool list returned, for change detection.
+    last_tool_hash: u64,
+}
+
+impl ToolSelector {
+    /// Create a new selector for a fighter with the given capabilities.
+    pub fn new(capabilities: &[Capability]) -> Self {
+        Self {
+            capabilities: capabilities.to_vec(),
+            active_groups: BTreeSet::new(),
+            last_tool_hash: 0,
+        }
+    }
+
+    /// Select tools based on conversation context.
+    ///
+    /// Returns `(tools, tools_changed)` where `tools_changed` is true if the
+    /// tool list differs from the previous call (useful for cache invalidation
+    /// decisions).
+    ///
+    /// Scans the last 4 messages (current + 3 prior) for keywords. Once a
+    /// group activates, it stays active for the rest of the bout.
+    pub fn select_tools(&mut self, messages: &[Message]) -> (Vec<ToolDefinition>, bool) {
+        let scan_text = Self::build_scan_text(messages);
+
+        let mut newly_activated = Vec::new();
+        for group in ToolGroup::ALL {
+            if self.active_groups.contains(group) {
+                continue;
+            }
+            if !group.is_permitted(&self.capabilities) {
+                continue;
+            }
+            if Self::keywords_match(&scan_text, group.keywords()) {
+                self.active_groups.insert(*group);
+                newly_activated.push(*group);
+            }
+        }
+
+        if !newly_activated.is_empty() {
+            info!(
+                groups = ?newly_activated,
+                total_active = self.active_groups.len(),
+                "tool selector: activated new groups"
+            );
+        }
+
+        let mut tools = self.core_tools();
+        for group in &self.active_groups {
+            for tool in group.tools() {
+                push_unique(&mut tools, tool);
+            }
+        }
+
+        let hash = Self::compute_hash(&tools);
+        let changed = hash != self.last_tool_hash;
+        self.last_tool_hash = hash;
+
+        (tools, changed)
+    }
+
+    /// Return the number of currently active contextual tool groups.
+    pub fn active_group_count(&self) -> usize {
+        self.active_groups.len()
+    }
+
+    /// Core tools that are always loaded (when capability permits).
+    fn core_tools(&self) -> Vec<ToolDefinition> {
+        let mut tools = Vec::with_capacity(20);
+
+        for cap in &self.capabilities {
+            match cap {
+                Capability::FileRead(_) => {
+                    push_unique(&mut tools, file_read());
+                    push_unique(&mut tools, file_list());
+                    push_unique(&mut tools, file_search());
+                    push_unique(&mut tools, file_info());
+                }
+                Capability::FileWrite(_) => {
+                    push_unique(&mut tools, file_write());
+                    push_unique(&mut tools, patch_apply());
+                }
+                Capability::ShellExec(_) => {
+                    push_unique(&mut tools, shell_exec());
+                    push_unique(&mut tools, env_get());
+                    push_unique(&mut tools, env_list());
+                }
+                Capability::Network(_) => {
+                    push_unique(&mut tools, web_fetch());
+                    push_unique(&mut tools, web_search());
+                }
+                Capability::Memory => {
+                    push_unique(&mut tools, memory_store());
+                    push_unique(&mut tools, memory_recall());
+                }
+                Capability::SystemAutomation => {
+                    push_unique(&mut tools, sys_screenshot());
+                }
+                Capability::ChannelNotify => {
+                    push_unique(&mut tools, channel_notify());
+                }
+                Capability::SelfConfig => {
+                    push_unique(&mut tools, heartbeat_add());
+                    push_unique(&mut tools, heartbeat_list());
+                    push_unique(&mut tools, heartbeat_remove());
+                    push_unique(&mut tools, creed_view());
+                    push_unique(&mut tools, skill_list());
+                    push_unique(&mut tools, skill_recommend());
+                }
+                _ => {}
+            }
+        }
+
+        tools
+    }
+
+    /// Build the keyword scan text from recent messages.
+    fn build_scan_text(messages: &[Message]) -> String {
+        let window_size = 4.min(messages.len());
+        let start = messages.len().saturating_sub(window_size);
+        let mut text = String::with_capacity(2000);
+        for msg in &messages[start..] {
+            text.push_str(&msg.content);
+            text.push(' ');
+        }
+        text.to_lowercase()
+    }
+
+    /// Check if any keyword appears in the scan text.
+    fn keywords_match(scan_text: &str, keywords: &[&str]) -> bool {
+        keywords.iter().any(|kw| scan_text.contains(kw))
+    }
+
+    /// Compute a stable hash of the tool list for change detection.
+    fn compute_hash(tools: &[ToolDefinition]) -> u64 {
+        let mut hasher = DefaultHasher::new();
+        for tool in tools {
+            tool.name.hash(&mut hasher);
+        }
+        hasher.finish()
+    }
+}
+
 // ---------------------------------------------------------------------------
 // Tool definitions
 // ---------------------------------------------------------------------------
@@ -2895,4 +3344,212 @@ mod tests {
         names.dedup();
         assert_eq!(before, names.len(), "duplicate tools found");
     }
+
+    // -----------------------------------------------------------------------
+    // ToolSelector tests
+    // -----------------------------------------------------------------------
+
+    use super::{ToolGroup, ToolSelector};
+    use punch_types::{Message, Role};
+
+    fn full_caps() -> Vec<Capability> {
+        Capability::full_access()
+    }
+
+    fn msg(role: Role, text: &str) -> Message {
+        Message::new(role, text)
+    }
+
+    #[test]
+    fn test_selector_core_tools_always_present() {
+        let mut sel = ToolSelector::new(&full_caps());
+        let messages = vec![msg(Role::User, "hello, how are you?")];
+        let (tools, _) = sel.select_tools(&messages);
+
+        let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect();
+        // Core tools must be present.
+        assert!(names.contains(&"file_read"), "file_read missing");
+        assert!(names.contains(&"shell_exec"), "shell_exec missing");
+        assert!(names.contains(&"web_fetch"), "web_fetch missing");
+        assert!(names.contains(&"memory_store"), "memory_store missing");
+        // Contextual tools should NOT be present.
+        assert!(
+            !names.contains(&"git_status"),
+            "git tools should not be in core"
+        );
+        assert!(
+            !names.contains(&"docker_ps"),
+            "docker tools should not be in core"
+        );
+    }
+
+    #[test]
+    fn test_selector_keyword_activates_group() {
+        let mut sel = ToolSelector::new(&full_caps());
+        let messages = vec![msg(Role::User, "please commit my changes to git")];
+        let (tools, _) = sel.select_tools(&messages);
+
+        let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect();
+        assert!(
+            names.contains(&"git_status"),
+            "git_status should be activated"
+        );
+        assert!(
+            names.contains(&"git_commit"),
+            "git_commit should be activated"
+        );
+        assert!(names.contains(&"git_diff"), "git_diff should be activated");
+    }
+
+    #[test]
+    fn test_selector_multiple_groups_activate() {
+        let mut sel = ToolSelector::new(&full_caps());
+        let messages = vec![msg(
+            Role::User,
+            "build the docker image and then commit it to git",
+        )];
+        let (tools, _) = sel.select_tools(&messages);
+
+        let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect();
+        assert!(names.contains(&"git_status"), "git should activate");
+        assert!(names.contains(&"docker_build"), "docker should activate");
+    }
+
+    #[test]
+    fn test_selector_capability_gating() {
+        // No capabilities granted — no contextual tools should activate.
+        let mut sel = ToolSelector::new(&[]);
+        let messages = vec![msg(Role::User, "git commit and docker build")];
+        let (tools, _) = sel.select_tools(&messages);
+
+        assert!(tools.is_empty(), "no tools without capabilities");
+    }
+
+    #[test]
+    fn test_selector_monotonic_growth() {
+        let mut sel = ToolSelector::new(&full_caps());
+
+        // Turn 1: activate git tools.
+        let msgs1 = vec![msg(Role::User, "commit my changes")];
+        let (tools1, _) = sel.select_tools(&msgs1);
+        let count1 = tools1.len();
+
+        // Turn 2: no git keywords, but git tools should persist.
+        let msgs2 = vec![
+            msg(Role::User, "commit my changes"),
+            msg(Role::Assistant, "done"),
+            msg(Role::User, "now tell me a joke"),
+        ];
+        let (tools2, _) = sel.select_tools(&msgs2);
+
+        let names2: Vec<&str> = tools2.iter().map(|t| t.name.as_str()).collect();
+        assert!(
+            names2.contains(&"git_status"),
+            "git tools should persist across turns"
+        );
+        assert!(tools2.len() >= count1, "tool count should not decrease");
+    }
+
+    #[test]
+    fn test_selector_tools_changed_detection() {
+        let mut sel = ToolSelector::new(&full_caps());
+
+        let msgs1 = vec![msg(Role::User, "hello")];
+        let (_, changed1) = sel.select_tools(&msgs1);
+        assert!(changed1, "first call should report changed");
+
+        // Same message again — no new groups, no change.
+        let (_, changed2) = sel.select_tools(&msgs1);
+        assert!(!changed2, "no new groups means no change");
+
+        // New keyword activates a group — should report changed.
+        let msgs3 = vec![msg(Role::User, "git commit")];
+        let (_, changed3) = sel.select_tools(&msgs3);
+        assert!(changed3, "new group activation should report changed");
+    }
+
+    #[test]
+    fn test_selector_case_insensitive() {
+        let mut sel = ToolSelector::new(&full_caps());
+        let messages = vec![msg(Role::User, "GIT COMMIT DOCKER BUILD")];
+        let (tools, _) = sel.select_tools(&messages);
+
+        let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect();
+        assert!(names.contains(&"git_status"), "case insensitive match");
+        assert!(names.contains(&"docker_ps"), "case insensitive match");
+    }
+
+    #[test]
+    fn test_selector_scan_window_limited() {
+        let mut sel = ToolSelector::new(&full_caps());
+        // Old message mentions git, but it's outside the 4-message window.
+        let messages = vec![
+            msg(Role::User, "git commit"),     // 5th from end — outside window
+            msg(Role::Assistant, "done"),      // 4th
+            msg(Role::User, "ok"),             // 3rd
+            msg(Role::Assistant, "ok"),        // 2nd
+            msg(Role::User, "tell me a joke"), // 1st (current)
+        ];
+        let (tools, _) = sel.select_tools(&messages);
+
+        let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect();
+        assert!(
+            !names.contains(&"git_status"),
+            "old messages outside window should not activate groups"
+        );
+    }
+
+    #[test]
+    fn test_selector_no_duplicates() {
+        let mut sel = ToolSelector::new(&full_caps());
+        // Activate many groups at once.
+        let messages = vec![msg(
+            Role::User,
+            "git docker json archive schedule browser crypto plugin a2a knowledge code",
+        )];
+        let (tools, _) = sel.select_tools(&messages);
+
+        let mut names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect();
+        let before = names.len();
+        names.sort();
+        names.dedup();
+        assert_eq!(before, names.len(), "duplicate tools found in selection");
+    }
+
+    #[test]
+    fn test_selector_core_count_reasonable() {
+        let mut sel = ToolSelector::new(&full_caps());
+        let messages = vec![msg(Role::User, "hello")];
+        let (tools, _) = sel.select_tools(&messages);
+
+        // Core should be roughly 18-25 tools (not 80+).
+        assert!(
+            tools.len() < 30,
+            "core tools should be under 30, got {}",
+            tools.len()
+        );
+        assert!(
+            tools.len() >= 15,
+            "core tools should be at least 15, got {}",
+            tools.len()
+        );
+    }
+
+    #[test]
+    fn test_tool_group_all_covers_every_variant() {
+        // Ensure ALL array matches the enum variants.
+        assert_eq!(
+            ToolGroup::ALL.len(),
+            17,
+            "ToolGroup::ALL must match the number of variants"
+        );
+    }
+
+    #[test]
+    fn test_tool_group_tools_not_empty() {
+        for group in ToolGroup::ALL {
+            let tools = group.tools();
+            assert!(!tools.is_empty(), "ToolGroup::{:?} has no tools", group);
+        }
+    }
 }
diff --git a/crates/punch-runtime/tests/fighter_loop_tests.rs b/crates/punch-runtime/tests/fighter_loop_tests.rs
index 87af2bd..8eabbcb 100644
--- a/crates/punch-runtime/tests/fighter_loop_tests.rs
+++ b/crates/punch-runtime/tests/fighter_loop_tests.rs
@@ -137,6 +137,7 @@ fn make_params(
         memory,
         driver,
         available_tools: Vec::new(),
+        mcp_tools: Vec::new(),
         max_iterations: Some(10),
         context_window: Some(200_000),
         tool_timeout_secs: Some(30),
@@ -466,6 +467,7 @@ async fn test_messages_persisted_to_memory() {
         memory: memory.clone(),
         driver,
         available_tools: Vec::new(),
+        mcp_tools: Vec::new(),
         max_iterations: Some(10),
         context_window: Some(200_000),
         tool_timeout_secs: Some(30),
@@ -527,6 +529,7 @@ async fn test_creed_bout_count_increments() {
         memory: memory.clone(),
         driver,
         available_tools: Vec::new(),
+        mcp_tools: Vec::new(),
         max_iterations: Some(10),
         context_window: Some(200_000),
         tool_timeout_secs: Some(30),
@@ -586,6 +589,7 @@ async fn test_heartbeat_tasks_marked_checked() {
         memory: memory.clone(),
         driver,
         available_tools: Vec::new(),
+        mcp_tools: Vec::new(),
         max_iterations: Some(10),
         context_window: Some(200_000),
         tool_timeout_secs: Some(30),

From 2d81056cbdd90713781c569c85220ff37271d7fb Mon Sep 17 00:00:00 2001
From: Archith <archith.rapaka@atomtickets.com>
Date: Thu, 26 Mar 2026 08:46:24 -0700
Subject: [PATCH 2/4] Fix PR #18 review: tighten keywords, scan tool context,
 stable hash, auto-activate

Address all 5 review issues on the dynamic tool selection PR:

1. HIGH - Tighten overly broad keywords: Replace generic words like push,
   image, process, agent, hash, render, graph, definition, extract with
   specific multi-word phrases (e.g. git push, docker image, kill process,
   spawn fighter, sha256, render template, knowledge graph, find definition).

2. MEDIUM - Scan tool context: build_scan_text now includes tool call names,
   short argument values, and tool result snippets (first 200 chars).

3. MEDIUM - Avoid static path clone overhead: Move params.available_tools
   into static_tools Option before the loop.

4. MEDIUM - Auto-activate capability groups: Groups with no core-tool overlap
   auto-activate on construction when the fighter has the required capability.

5. NIT - Stable hash: Replace DefaultHasher with FNV-1a-inspired hash that
   produces identical results across Rust versions.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/punch-runtime/src/fighter_loop.rs |  16 +-
 crates/punch-runtime/src/tools.rs        | 290 +++++++++++++++++------
 2 files changed, 238 insertions(+), 68 deletions(-)

diff --git a/crates/punch-runtime/src/fighter_loop.rs b/crates/punch-runtime/src/fighter_loop.rs
index c87c4e3..c5c6b7b 100644
--- a/crates/punch-runtime/src/fighter_loop.rs
+++ b/crates/punch-runtime/src/fighter_loop.rs
@@ -259,9 +259,17 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
         None
     };
 
+    // Pre-build static tool list (avoids cloning per loop iteration for static path).
+    let static_tools: Option<Vec<ToolDefinition>> = if !use_dynamic_tools {
+        Some(params.available_tools)
+    } else {
+        None
+    };
+
+    let static_tool_count = static_tools.as_ref().map_or(0, |t| t.len());
     info!(
         dynamic_tools = use_dynamic_tools,
-        static_tool_count = params.available_tools.len(),
+        static_tool_count,
         mcp_tool_count = params.mcp_tools.len(),
         fighter = %params.manifest.name,
         model = %params.manifest.model.model,
@@ -277,7 +285,11 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult<FighterL
             selected.extend(params.mcp_tools.iter().cloned());
             selected
         } else {
-            params.available_tools.clone()
+            // Static path: clone from pre-built list (CompletionRequest takes ownership).
+            static_tools
+                .as_ref()
+                .expect("static_tools set when not using dynamic selection")
+                .clone()
         };
 
         // --- Context Budget: check and trim before LLM call ---
diff --git a/crates/punch-runtime/src/tools.rs b/crates/punch-runtime/src/tools.rs
index 0fce997..ea3f173 100644
--- a/crates/punch-runtime/src/tools.rs
+++ b/crates/punch-runtime/src/tools.rs
@@ -12,8 +12,6 @@
 //! growth) to maximize prompt cache hits.
 
 use std::collections::BTreeSet;
-use std::collections::hash_map::DefaultHasher;
-use std::hash::{Hash, Hasher};
 
 use punch_types::{Capability, Message, ToolCategory, ToolDefinition};
 use tracing::info;
@@ -313,25 +311,28 @@ impl ToolGroup {
     fn keywords(&self) -> &'static [&'static str] {
         match self {
             ToolGroup::SourceControl => &[
-                "git",
-                "commit",
-                "branch",
-                "diff",
-                "merge",
-                "rebase",
-                "push",
-                "pull",
-                "clone",
-                "stash",
-                "checkout",
+                "git ",
+                "git commit",
+                "git branch",
+                "git diff",
+                "git merge",
+                "git rebase",
+                "git push",
+                "git pull",
+                "git clone",
+                "git stash",
+                "git checkout",
+                "git log",
+                "commit changes",
+                "commit my",
                 "repo",
                 "repository",
             ],
             ToolGroup::Container => &[
                 "docker",
                 "container",
-                "image",
                 "dockerfile",
+                "docker image",
                 "compose",
                 "pod",
                 "kubernetes",
@@ -343,8 +344,9 @@ impl ToolGroup {
                 "yaml",
                 "toml",
                 "regex",
-                "parse",
-                "transform",
+                "parse json",
+                "parse yaml",
+                "transform json",
                 "csv",
                 "xml",
                 "jq",
@@ -362,34 +364,55 @@ impl ToolGroup {
             ToolGroup::BrowserControl => &[
                 "browser",
                 "webpage",
-                "click",
-                "navigate",
+                "click button",
+                "click element",
+                "click link",
+                "navigate to",
                 "selenium",
                 "puppeteer",
                 "scrape",
-                "website",
+                "open website",
+                "web page",
             ],
             ToolGroup::AgentCoordination => &[
-                "agent",
-                "spawn",
-                "fighter",
-                "worker",
+                "spawn fighter",
+                "spawn agent",
+                "spawn worker",
+                "new fighter",
                 "troop",
                 "multi-agent",
+                "delegate to",
+                "coordinate agents",
             ],
             ToolGroup::Archive => &[
                 "archive",
-                "zip",
-                "tar",
+                "zip file",
+                "tar file",
                 "gzip",
-                "extract",
-                "compress",
+                "extract archive",
+                "compress file",
                 "unzip",
                 "decompress",
             ],
-            ToolGroup::Template => &["template", "render", "handlebars", "mustache", "jinja"],
+            ToolGroup::Template => &[
+                "template",
+                "render template",
+                "handlebars",
+                "mustache",
+                "jinja",
+            ],
             ToolGroup::Crypto => &[
-                "hash", "crypto", "sha", "md5", "hmac", "checksum", "digest", "encrypt",
+                "sha256",
+                "sha512",
+                "crypto",
+                "md5",
+                "hmac",
+                "checksum",
+                "digest",
+                "encrypt",
+                "decrypt",
+                "hash file",
+                "hash password",
             ],
             ToolGroup::UiAutomation => &[
                 "ui automation",
@@ -410,36 +433,40 @@ impl ToolGroup {
             ToolGroup::PluginInvoke => &["plugin", "wasm", "extension", "webassembly"],
             ToolGroup::A2ADelegate => &["a2a", "remote agent", "delegate task", "external agent"],
             ToolGroup::KnowledgeGraph => &[
-                "knowledge",
-                "entity",
-                "relation",
-                "graph",
+                "knowledge graph",
+                "add entity",
+                "add relation",
+                "knowledge base",
                 "ontology",
-                "triple",
+                "triple store",
             ],
             ToolGroup::CodeAnalysis => &[
-                "symbols",
+                "code symbols",
                 "code search",
-                "definition",
-                "references",
+                "find definition",
+                "find references",
                 "ast",
                 "code analysis",
+                "go to definition",
             ],
             ToolGroup::ProcessManagement => &[
-                "process",
-                "pid",
+                "list processes",
                 "kill process",
-                "signal",
+                "pid",
+                "send signal",
                 "background process",
+                "process list",
             ],
             ToolGroup::HttpAdvanced => &[
                 "http post",
                 "api call",
                 "rest api",
-                "endpoint",
+                "api endpoint",
                 "webhook",
                 "curl",
                 "http request",
+                "http put",
+                "http delete",
             ],
         }
     }
@@ -555,14 +582,52 @@ pub struct ToolSelector {
 
 impl ToolSelector {
     /// Create a new selector for a fighter with the given capabilities.
+    ///
+    /// Groups whose capabilities are granted but have no representation in the
+    /// core tool set are auto-activated on construction. This prevents a
+    /// capability regression where, e.g., a fighter with `SourceControl` wouldn't
+    /// get git tools on turn 1 unless the user explicitly said "git".
     pub fn new(capabilities: &[Capability]) -> Self {
+        let mut active_groups = BTreeSet::new();
+
+        // Auto-activate groups that are permitted AND have no core-tool overlap.
+        // These are "capability-only" groups whose tools would be invisible without
+        // keyword triggers, creating a silent regression for restricted fighters.
+        for group in ToolGroup::ALL {
+            if group.is_permitted(capabilities) && Self::group_needs_auto_activate(group) {
+                active_groups.insert(*group);
+            }
+        }
+
+        if !active_groups.is_empty() {
+            info!(
+                auto_activated = ?active_groups,
+                "tool selector: auto-activated groups for granted capabilities"
+            );
+        }
+
         Self {
             capabilities: capabilities.to_vec(),
-            active_groups: BTreeSet::new(),
+            active_groups,
             last_tool_hash: 0,
         }
     }
 
+    /// Returns true if a group's tools have zero overlap with core tools,
+    /// meaning the group would be invisible without keyword activation.
+    fn group_needs_auto_activate(group: &ToolGroup) -> bool {
+        matches!(
+            group,
+            ToolGroup::SourceControl
+                | ToolGroup::Container
+                | ToolGroup::DataManipulation
+                | ToolGroup::Schedule
+                | ToolGroup::BrowserControl
+                | ToolGroup::AgentCoordination
+                | ToolGroup::ProcessManagement
+        )
+    }
+
     /// Select tools based on conversation context.
     ///
     /// Returns `(tools, tools_changed)` where `tools_changed` is true if the
@@ -666,13 +731,42 @@ impl ToolSelector {
     }
 
     /// Build the keyword scan text from recent messages.
+    ///
+    /// Scans message content, tool call names/arguments, and tool result content
+    /// to catch context from tool usage (e.g. a git_status call signals SourceControl).
     fn build_scan_text(messages: &[Message]) -> String {
         let window_size = 4.min(messages.len());
         let start = messages.len().saturating_sub(window_size);
-        let mut text = String::with_capacity(2000);
+        let mut text = String::with_capacity(4000);
         for msg in &messages[start..] {
             text.push_str(&msg.content);
             text.push(' ');
+            for tc in &msg.tool_calls {
+                text.push_str(&tc.name);
+                text.push(' ');
+                // Include a compact form of arguments (tool names are most useful).
+                if let Some(obj) = tc.input.as_object() {
+                    for val in obj.values() {
+                        if let Some(s) = val.as_str() {
+                            // Only include short string values to avoid bloating scan text.
+                            if s.len() <= 200 {
+                                text.push_str(s);
+                                text.push(' ');
+                            }
+                        }
+                    }
+                }
+            }
+            for tr in &msg.tool_results {
+                // Include first 200 chars of tool result content for keyword detection.
+                let snippet = if tr.content.len() > 200 {
+                    &tr.content[..200]
+                } else {
+                    &tr.content
+                };
+                text.push_str(snippet);
+                text.push(' ');
+            }
         }
         text.to_lowercase()
     }
@@ -683,12 +777,21 @@ impl ToolSelector {
     }
 
     /// Compute a stable hash of the tool list for change detection.
+    ///
+    /// Uses a simple FNV-1a-inspired hash that is stable across Rust versions
+    /// (unlike `DefaultHasher` which uses SipHash with randomized keys).
     fn compute_hash(tools: &[ToolDefinition]) -> u64 {
-        let mut hasher = DefaultHasher::new();
+        let mut hash: u64 = 0xcbf2_9ce4_8422_2325; // FNV offset basis
         for tool in tools {
-            tool.name.hash(&mut hasher);
+            for byte in tool.name.as_bytes() {
+                hash ^= u64::from(*byte);
+                hash = hash.wrapping_mul(0x0100_0000_01b3); // FNV prime
+            }
+            // Separator to distinguish "ab"+"cd" from "abc"+"d".
+            hash ^= 0xff;
+            hash = hash.wrapping_mul(0x0100_0000_01b3);
         }
-        hasher.finish()
+        hash
     }
 }
 
@@ -3372,14 +3475,36 @@ mod tests {
         assert!(names.contains(&"shell_exec"), "shell_exec missing");
         assert!(names.contains(&"web_fetch"), "web_fetch missing");
         assert!(names.contains(&"memory_store"), "memory_store missing");
-        // Contextual tools should NOT be present.
+        // Auto-activated groups (granted capability, no core overlap) should also be present.
+        assert!(
+            names.contains(&"git_status"),
+            "git tools should auto-activate for SourceControl capability"
+        );
+        assert!(
+            names.contains(&"docker_ps"),
+            "docker tools should auto-activate for Container capability"
+        );
+    }
+
+    #[test]
+    fn test_selector_no_auto_activate_without_capability() {
+        // A fighter with only FileRead + ShellExec should NOT get git or docker tools.
+        let caps = vec![
+            Capability::FileRead("**".to_string()),
+            Capability::ShellExec("*".to_string()),
+        ];
+        let mut sel = ToolSelector::new(&caps);
+        let messages = vec![msg(Role::User, "hello")];
+        let (tools, _) = sel.select_tools(&messages);
+
+        let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect();
         assert!(
             !names.contains(&"git_status"),
-            "git tools should not be in core"
+            "git should not auto-activate without SourceControl"
         );
         assert!(
             !names.contains(&"docker_ps"),
-            "docker tools should not be in core"
+            "docker should not auto-activate without Container"
         );
     }
 
@@ -3452,10 +3577,13 @@ mod tests {
 
     #[test]
     fn test_selector_tools_changed_detection() {
-        let mut sel = ToolSelector::new(&full_caps());
+        // Use Archive + Crypto (not auto-activated) to test change detection.
+        let caps = vec![Capability::Archive, Capability::Crypto];
+        let mut sel = ToolSelector::new(&caps);
 
         let msgs1 = vec![msg(Role::User, "hello")];
         let (_, changed1) = sel.select_tools(&msgs1);
+        // First call always reports changed (hash was 0).
         assert!(changed1, "first call should report changed");
 
         // Same message again — no new groups, no change.
@@ -3463,7 +3591,7 @@ mod tests {
         assert!(!changed2, "no new groups means no change");
 
         // New keyword activates a group — should report changed.
-        let msgs3 = vec![msg(Role::User, "git commit")];
+        let msgs3 = vec![msg(Role::User, "compute sha256 checksum")];
         let (_, changed3) = sel.select_tools(&msgs3);
         assert!(changed3, "new group activation should report changed");
     }
@@ -3481,20 +3609,22 @@ mod tests {
 
     #[test]
     fn test_selector_scan_window_limited() {
-        let mut sel = ToolSelector::new(&full_caps());
-        // Old message mentions git, but it's outside the 4-message window.
+        // Use Crypto capability (not auto-activated) to test window limits.
+        let caps = vec![Capability::Crypto];
+        let mut sel = ToolSelector::new(&caps);
+        // Old message mentions crypto keyword, but it's outside the 4-message window.
         let messages = vec![
-            msg(Role::User, "git commit"),     // 5th from end — outside window
-            msg(Role::Assistant, "done"),      // 4th
-            msg(Role::User, "ok"),             // 3rd
-            msg(Role::Assistant, "ok"),        // 2nd
-            msg(Role::User, "tell me a joke"), // 1st (current)
+            msg(Role::User, "compute sha256 checksum"), // 5th from end — outside window
+            msg(Role::Assistant, "done"),               // 4th
+            msg(Role::User, "ok"),                      // 3rd
+            msg(Role::Assistant, "ok"),                 // 2nd
+            msg(Role::User, "tell me a joke"),          // 1st (current)
         ];
         let (tools, _) = sel.select_tools(&messages);
 
         let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect();
         assert!(
-            !names.contains(&"git_status"),
+            !names.contains(&"hash_compute"),
             "old messages outside window should not activate groups"
         );
     }
@@ -3502,10 +3632,10 @@ mod tests {
     #[test]
     fn test_selector_no_duplicates() {
         let mut sel = ToolSelector::new(&full_caps());
-        // Activate many groups at once.
+        // Use precise keywords that match the tightened patterns.
         let messages = vec![msg(
             Role::User,
-            "git docker json archive schedule browser crypto plugin a2a knowledge code",
+            "git commit docker json unzip schedule cron browser sha256 plugin a2a knowledge graph code analysis",
         )];
         let (tools, _) = sel.select_tools(&messages);
 
@@ -3518,19 +3648,47 @@ mod tests {
 
     #[test]
     fn test_selector_core_count_reasonable() {
+        // Use a fighter with only basic capabilities (no auto-activated groups).
+        let caps = vec![
+            Capability::FileRead("**".to_string()),
+            Capability::FileWrite("**".to_string()),
+            Capability::ShellExec("*".to_string()),
+            Capability::Network("*".to_string()),
+            Capability::Memory,
+        ];
+        let mut sel = ToolSelector::new(&caps);
+        let messages = vec![msg(Role::User, "hello")];
+        let (tools, _) = sel.select_tools(&messages);
+
+        // Core should be roughly 12-20 tools (not 80+).
+        assert!(
+            tools.len() < 25,
+            "core tools should be under 25, got {}",
+            tools.len()
+        );
+        assert!(
+            tools.len() >= 10,
+            "core tools should be at least 10, got {}",
+            tools.len()
+        );
+    }
+
+    #[test]
+    fn test_selector_full_caps_with_auto_activation() {
+        // Full capabilities: auto-activated groups should be present from turn 1.
         let mut sel = ToolSelector::new(&full_caps());
         let messages = vec![msg(Role::User, "hello")];
         let (tools, _) = sel.select_tools(&messages);
 
-        // Core should be roughly 18-25 tools (not 80+).
+        // Should have core + auto-activated groups, but still well under the full 80+.
         assert!(
-            tools.len() < 30,
-            "core tools should be under 30, got {}",
+            tools.len() < 60,
+            "full caps + auto-activation should be under 60, got {}",
             tools.len()
         );
         assert!(
-            tools.len() >= 15,
-            "core tools should be at least 15, got {}",
+            tools.len() >= 30,
+            "full caps + auto-activation should be at least 30, got {}",
             tools.len()
         );
     }

From 1a72d544c5117a0c239e911880ed4237487f9759 Mon Sep 17 00:00:00 2001
From: Archith <archith.rapaka@atomtickets.com>
Date: Thu, 26 Mar 2026 09:29:15 -0700
Subject: [PATCH 3/4] Fix UTF-8 panic in build_scan_text tool result truncation

Use floor_char_boundary(200) instead of byte slice [..200] to avoid
panicking when byte 200 lands mid-character in multi-byte UTF-8 text.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/punch-runtime/src/tools.rs | 35 ++++++++++++++-----------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/crates/punch-runtime/src/tools.rs b/crates/punch-runtime/src/tools.rs
index ea3f173..3f43fdc 100644
--- a/crates/punch-runtime/src/tools.rs
+++ b/crates/punch-runtime/src/tools.rs
@@ -758,13 +758,14 @@ impl ToolSelector {
                 }
             }
             for tr in &msg.tool_results {
-                // Include first 200 chars of tool result content for keyword detection.
-                let snippet = if tr.content.len() > 200 {
-                    &tr.content[..200]
+                // Include first ~200 chars of tool result content for keyword detection.
+                // Use floor_char_boundary to avoid panicking on multi-byte UTF-8.
+                let end = if tr.content.len() > 200 {
+                    tr.content.floor_char_boundary(200)
                 } else {
-                    &tr.content
+                    tr.content.len()
                 };
-                text.push_str(snippet);
+                text.push_str(&tr.content[..end]);
                 text.push(' ');
             }
         }
@@ -2488,13 +2489,11 @@ mod tests {
         let nav = browser_navigate();
         assert_eq!(nav.name, "browser_navigate");
         assert_eq!(nav.category, ToolCategory::Browser);
-        assert!(
-            nav.input_schema["required"]
-                .as_array()
-                .expect("required should be array")
-                .iter()
-                .any(|v| v == "url")
-        );
+        assert!(nav.input_schema["required"]
+            .as_array()
+            .expect("required should be array")
+            .iter()
+            .any(|v| v == "url"));
 
         let ss = browser_screenshot();
         assert_eq!(ss.name, "browser_screenshot");
@@ -2502,13 +2501,11 @@ mod tests {
 
         let click = browser_click();
         assert_eq!(click.name, "browser_click");
-        assert!(
-            click.input_schema["required"]
-                .as_array()
-                .expect("required should be array")
-                .iter()
-                .any(|v| v == "selector")
-        );
+        assert!(click.input_schema["required"]
+            .as_array()
+            .expect("required should be array")
+            .iter()
+            .any(|v| v == "selector"));
 
         let typ = browser_type();
         assert_eq!(typ.name, "browser_type");

From 2d88ed92becd85656bde4dd21ac0fedf868ca142 Mon Sep 17 00:00:00 2001
From: Archith <archith.rapaka@atomtickets.com>
Date: Thu, 26 Mar 2026 09:31:55 -0700
Subject: [PATCH 4/4] Fix formatting in browser tool tests (cargo fmt)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/punch-runtime/src/tools.rs | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/crates/punch-runtime/src/tools.rs b/crates/punch-runtime/src/tools.rs
index 3f43fdc..fc260fd 100644
--- a/crates/punch-runtime/src/tools.rs
+++ b/crates/punch-runtime/src/tools.rs
@@ -2489,11 +2489,13 @@ mod tests {
         let nav = browser_navigate();
         assert_eq!(nav.name, "browser_navigate");
         assert_eq!(nav.category, ToolCategory::Browser);
-        assert!(nav.input_schema["required"]
-            .as_array()
-            .expect("required should be array")
-            .iter()
-            .any(|v| v == "url"));
+        assert!(
+            nav.input_schema["required"]
+                .as_array()
+                .expect("required should be array")
+                .iter()
+                .any(|v| v == "url")
+        );
 
         let ss = browser_screenshot();
         assert_eq!(ss.name, "browser_screenshot");
@@ -2501,11 +2503,13 @@ mod tests {
 
         let click = browser_click();
         assert_eq!(click.name, "browser_click");
-        assert!(click.input_schema["required"]
-            .as_array()
-            .expect("required should be array")
-            .iter()
-            .any(|v| v == "selector"));
+        assert!(
+            click.input_schema["required"]
+                .as_array()
+                .expect("required should be array")
+                .iter()
+                .any(|v| v == "selector")
+        );
 
         let typ = browser_type();
         assert_eq!(typ.name, "browser_type");