From 156aef31efdf8a553950e79bdddc9da33279baeb Mon Sep 17 00:00:00 2001 From: Archith Date: Thu, 26 Mar 2026 00:09:17 -0700 Subject: [PATCH 1/4] Add dynamic tool selection: context-aware per-turn tool loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of sending all 82 tools on every LLM call (~14K tokens overhead), the ToolSelector now loads a core set (~18 tools) and activates contextual tool groups when keywords appear in the conversation. Architecture: - ToolGroup enum: 17 contextual groups (git, docker, json, browser, etc.) each with keyword triggers and capability gates - ToolSelector: stateful per-bout selector with monotonic growth — once a group activates it stays active to maximize prompt cache hits - Core tools always loaded: file_read, file_write, shell_exec, web_fetch, web_search, memory, screenshot, channel_notify, self-config - shell_exec is always in core as the universal fallback — the model never loses capability, it just gets a leaner tool menu Token savings: - Core only (typical): ~18 tools, ~3K tokens (was 82 tools, ~14K tokens) - Core + 1 group: ~23 tools, ~3.8K tokens - Core + 2 groups: ~28 tools, ~4.7K tokens - 66-80% reduction in tool token overhead per request The tool executor still runs ANY tool regardless of what was sent to the LLM — capabilities are checked at execution time, not selection time. Includes 10 unit tests covering: core presence, keyword activation, capability gating, monotonic growth, change detection, case insensitivity, scan window limits, no duplicates, and count bounds. Co-Authored-By: Claude Opus 4.6 --- crates/punch-kernel/src/background.rs | 1 + .../punch-kernel/src/heartbeat_scheduler.rs | 1 + crates/punch-kernel/src/ring.rs | 18 +- crates/punch-kernel/src/workflow.rs | 1 + crates/punch-runtime/src/fighter_loop.rs | 34 +- crates/punch-runtime/src/lib.rs | 2 +- crates/punch-runtime/src/tools.rs | 659 +++++++++++++++++- .../punch-runtime/tests/fighter_loop_tests.rs | 4 + 8 files changed, 706 insertions(+), 14 deletions(-) diff --git a/crates/punch-kernel/src/background.rs b/crates/punch-kernel/src/background.rs index 5350204..58bafc1 100644 --- a/crates/punch-kernel/src/background.rs +++ b/crates/punch-kernel/src/background.rs @@ -115,6 +115,7 @@ pub async fn run_gorilla_tick( memory: Arc::clone(memory), driver: Arc::clone(driver), available_tools, + mcp_tools: Vec::new(), max_iterations: Some(10), context_window: None, tool_timeout_secs: None, diff --git a/crates/punch-kernel/src/heartbeat_scheduler.rs b/crates/punch-kernel/src/heartbeat_scheduler.rs index f8dc17f..aa3d481 100644 --- a/crates/punch-kernel/src/heartbeat_scheduler.rs +++ b/crates/punch-kernel/src/heartbeat_scheduler.rs @@ -273,6 +273,7 @@ async fn heartbeat_loop(mut cfg: HeartbeatLoopConfig) { memory: Arc::clone(memory), driver: Arc::clone(driver), available_tools: tools_for_capabilities(&manifest.capabilities), + mcp_tools: Vec::new(), max_iterations: Some(10), context_window: None, tool_timeout_secs: Some(60), diff --git a/crates/punch-kernel/src/ring.rs b/crates/punch-kernel/src/ring.rs index 5f41937..17e6446 100644 --- a/crates/punch-kernel/src/ring.rs +++ b/crates/punch-kernel/src/ring.rs @@ -18,10 +18,7 @@ use tokio::task::JoinHandle; use tracing::{info, instrument, warn}; use punch_memory::{BoutId, MemorySubstrate}; -use punch_runtime::{ - FighterLoopParams, FighterLoopResult, LlmDriver, McpClient, run_fighter_loop, - tools_for_capabilities, -}; +use punch_runtime::{FighterLoopParams, FighterLoopResult, LlmDriver, McpClient, run_fighter_loop}; use punch_types::{ AgentCoordinator, AgentInfo, AgentMessageResult, CoordinationStrategy, FighterId, FighterManifest, FighterStatus, GorillaId, GorillaManifest, GorillaMetrics, GorillaStatus, @@ -763,9 +760,9 @@ impl Ring { let fighter_name = manifest.name.clone(); drop(entry); // Release the DashMap guard before any async calls. - let mut available_tools = tools_for_capabilities(&manifest.capabilities); - - // Merge MCP tools if the fighter has McpAccess capability. + // Collect MCP tools separately (the fighter loop handles dynamic + // built-in tool selection via ToolSelector when available_tools is empty). + let mut mcp_tools = Vec::new(); let has_mcp_access = manifest .capabilities .iter() @@ -782,7 +779,7 @@ impl Ring { }); if can_access { match mcp_entry.value().list_tools().await { - Ok(tools) => available_tools.extend(tools), + Ok(tools) => mcp_tools.extend(tools), Err(e) => { warn!( server = %server_name, @@ -804,6 +801,8 @@ impl Ring { }); // Run the fighter loop. + // available_tools is empty — the fighter loop uses ToolSelector for + // context-aware dynamic tool loading per turn. let params = FighterLoopParams { manifest: manifest.clone(), user_message: message, @@ -811,7 +810,8 @@ impl Ring { fighter_id: *fighter_id, memory: Arc::clone(&self.memory), driver: Arc::clone(&self.driver), - available_tools, + available_tools: Vec::new(), + mcp_tools, max_iterations: None, context_window: None, tool_timeout_secs: None, diff --git a/crates/punch-kernel/src/workflow.rs b/crates/punch-kernel/src/workflow.rs index fd420d7..991d466 100644 --- a/crates/punch-kernel/src/workflow.rs +++ b/crates/punch-kernel/src/workflow.rs @@ -1391,6 +1391,7 @@ impl WorkflowEngine { memory: Arc::clone(memory), driver: Arc::clone(driver), available_tools, + mcp_tools: Vec::new(), max_iterations: Some(20), context_window: None, tool_timeout_secs: Some(timeout_secs), diff --git a/crates/punch-runtime/src/fighter_loop.rs b/crates/punch-runtime/src/fighter_loop.rs index 68c2aa5..c87c4e3 100644 --- a/crates/punch-runtime/src/fighter_loop.rs +++ b/crates/punch-runtime/src/fighter_loop.rs @@ -60,7 +60,11 @@ pub struct FighterLoopParams { /// The LLM driver to use for completions. pub driver: Arc, /// Tools available for this fighter to use. + /// When provided, bypasses dynamic tool selection (used by workflows, gorillas, tests). + /// When empty, the fighter loop uses `ToolSelector` for context-aware tool loading. pub available_tools: Vec, + /// Pre-fetched MCP tools for this fighter (merged into tool list each turn). + pub mcp_tools: Vec, /// Maximum loop iterations before forced termination (default: 50). pub max_iterations: Option, /// Context window size in tokens (default: 200K). @@ -243,8 +247,22 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult PunchResult PunchResult, tool: ToolDefinition) { } } +// --------------------------------------------------------------------------- +// Dynamic tool selection +// --------------------------------------------------------------------------- + +/// Groups of contextual tools that activate when conversation context is relevant. +/// +/// Each group maps to a set of related tools and a list of activation keywords. +/// Groups are ordered deterministically (via BTreeSet) to keep the tool list +/// stable across turns for prompt cache efficiency. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum ToolGroup { + SourceControl, + Container, + DataManipulation, + Schedule, + BrowserControl, + AgentCoordination, + Archive, + Template, + Crypto, + UiAutomation, + AppIntegration, + PluginInvoke, + A2ADelegate, + KnowledgeGraph, + CodeAnalysis, + ProcessManagement, + HttpAdvanced, +} + +impl ToolGroup { + /// All tool groups. + const ALL: &[ToolGroup] = &[ + ToolGroup::SourceControl, + ToolGroup::Container, + ToolGroup::DataManipulation, + ToolGroup::Schedule, + ToolGroup::BrowserControl, + ToolGroup::AgentCoordination, + ToolGroup::Archive, + ToolGroup::Template, + ToolGroup::Crypto, + ToolGroup::UiAutomation, + ToolGroup::AppIntegration, + ToolGroup::PluginInvoke, + ToolGroup::A2ADelegate, + ToolGroup::KnowledgeGraph, + ToolGroup::CodeAnalysis, + ToolGroup::ProcessManagement, + ToolGroup::HttpAdvanced, + ]; + + /// Keywords that trigger this tool group. Matched case-insensitively + /// against conversation messages. + fn keywords(&self) -> &'static [&'static str] { + match self { + ToolGroup::SourceControl => &[ + "git", + "commit", + "branch", + "diff", + "merge", + "rebase", + "push", + "pull", + "clone", + "stash", + "checkout", + "repo", + "repository", + ], + ToolGroup::Container => &[ + "docker", + "container", + "image", + "dockerfile", + "compose", + "pod", + "kubernetes", + "k8s", + "helm", + ], + ToolGroup::DataManipulation => &[ + "json", + "yaml", + "toml", + "regex", + "parse", + "transform", + "csv", + "xml", + "jq", + ], + ToolGroup::Schedule => &[ + "schedule", + "cron", + "timer", + "periodic", + "interval", + "recurring", + "every hour", + "every day", + ], + ToolGroup::BrowserControl => &[ + "browser", + "webpage", + "click", + "navigate", + "selenium", + "puppeteer", + "scrape", + "website", + ], + ToolGroup::AgentCoordination => &[ + "agent", + "spawn", + "fighter", + "worker", + "troop", + "multi-agent", + ], + ToolGroup::Archive => &[ + "archive", + "zip", + "tar", + "gzip", + "extract", + "compress", + "unzip", + "decompress", + ], + ToolGroup::Template => &["template", "render", "handlebars", "mustache", "jinja"], + ToolGroup::Crypto => &[ + "hash", "crypto", "sha", "md5", "hmac", "checksum", "digest", "encrypt", + ], + ToolGroup::UiAutomation => &[ + "ui automation", + "accessibility", + "ui element", + "click button", + "menu bar", + "window list", + "applescript", + "osascript", + ], + ToolGroup::AppIntegration => &[ + "ocr", + "text recognition", + "screen read", + "extract text from image", + ], + ToolGroup::PluginInvoke => &["plugin", "wasm", "extension", "webassembly"], + ToolGroup::A2ADelegate => &["a2a", "remote agent", "delegate task", "external agent"], + ToolGroup::KnowledgeGraph => &[ + "knowledge", + "entity", + "relation", + "graph", + "ontology", + "triple", + ], + ToolGroup::CodeAnalysis => &[ + "symbols", + "code search", + "definition", + "references", + "ast", + "code analysis", + ], + ToolGroup::ProcessManagement => &[ + "process", + "pid", + "kill process", + "signal", + "background process", + ], + ToolGroup::HttpAdvanced => &[ + "http post", + "api call", + "rest api", + "endpoint", + "webhook", + "curl", + "http request", + ], + } + } + + /// Check whether the fighter's capabilities include the one required for this group. + fn is_permitted(&self, capabilities: &[Capability]) -> bool { + capabilities.iter().any(|c| match self { + ToolGroup::SourceControl => matches!(c, Capability::SourceControl), + ToolGroup::Container => matches!(c, Capability::Container), + ToolGroup::DataManipulation => matches!(c, Capability::DataManipulation), + ToolGroup::Schedule => matches!(c, Capability::Schedule), + ToolGroup::BrowserControl => matches!(c, Capability::BrowserControl), + ToolGroup::AgentCoordination => { + matches!(c, Capability::AgentSpawn | Capability::AgentMessage) + } + ToolGroup::Archive => matches!(c, Capability::Archive), + ToolGroup::Template => matches!(c, Capability::Template), + ToolGroup::Crypto => matches!(c, Capability::Crypto), + ToolGroup::UiAutomation => matches!(c, Capability::UiAutomation(_)), + ToolGroup::AppIntegration => matches!(c, Capability::AppIntegration(_)), + ToolGroup::PluginInvoke => matches!(c, Capability::PluginInvoke), + ToolGroup::A2ADelegate => matches!(c, Capability::A2ADelegate), + ToolGroup::KnowledgeGraph => matches!(c, Capability::KnowledgeGraph), + ToolGroup::CodeAnalysis => matches!(c, Capability::CodeAnalysis), + ToolGroup::ProcessManagement => matches!(c, Capability::ShellExec(_)), + ToolGroup::HttpAdvanced => matches!(c, Capability::Network(_)), + }) + } + + /// Return the tool definitions for this group. + fn tools(&self) -> Vec { + match self { + ToolGroup::SourceControl => { + vec![ + git_status(), + git_diff(), + git_log(), + git_commit(), + git_branch(), + ] + } + ToolGroup::Container => { + vec![docker_ps(), docker_run(), docker_build(), docker_logs()] + } + ToolGroup::DataManipulation => vec![ + json_query(), + json_transform(), + yaml_parse(), + regex_match(), + regex_replace(), + text_diff(), + text_count(), + ], + ToolGroup::Schedule => { + vec![schedule_task(), schedule_list(), schedule_cancel()] + } + ToolGroup::BrowserControl => vec![ + browser_navigate(), + browser_screenshot(), + browser_click(), + browser_type(), + browser_content(), + ], + ToolGroup::AgentCoordination => { + vec![agent_spawn(), agent_message(), agent_list()] + } + ToolGroup::Archive => { + vec![archive_create(), archive_extract(), archive_list()] + } + ToolGroup::Template => vec![template_render()], + ToolGroup::Crypto => vec![hash_compute(), hash_verify()], + ToolGroup::UiAutomation => vec![ + ui_screenshot(), + ui_find_elements(), + ui_click(), + ui_type_text(), + ui_list_windows(), + ui_read_attribute(), + ], + ToolGroup::AppIntegration => vec![app_ocr()], + ToolGroup::PluginInvoke => vec![wasm_invoke()], + ToolGroup::A2ADelegate => vec![a2a_delegate()], + ToolGroup::KnowledgeGraph => vec![ + knowledge_add_entity(), + knowledge_add_relation(), + knowledge_query(), + ], + ToolGroup::CodeAnalysis => vec![code_search(), code_symbols()], + ToolGroup::ProcessManagement => vec![process_list(), process_kill()], + ToolGroup::HttpAdvanced => vec![http_request(), http_post()], + } + } +} + +/// Dynamic tool selector that loads tools based on conversation context. +/// +/// Core tools (~18) are always available. Contextual tool groups activate +/// when relevant keywords appear in recent messages and stay active for the +/// remainder of the bout (monotonic growth) to maximize prompt cache hits. +/// +/// The tool executor can still run ANY tool regardless of what the LLM sees — +/// capabilities are checked at execution time, not selection time. The model +/// never loses capability: `shell_exec` is always in the core set and can do +/// anything the contextual tools can via command-line equivalents. +pub struct ToolSelector { + /// Fighter's granted capabilities. + capabilities: Vec, + /// Tool groups activated so far in this bout (only grows, never shrinks). + active_groups: BTreeSet, + /// Hash of the last tool list returned, for change detection. + last_tool_hash: u64, +} + +impl ToolSelector { + /// Create a new selector for a fighter with the given capabilities. + pub fn new(capabilities: &[Capability]) -> Self { + Self { + capabilities: capabilities.to_vec(), + active_groups: BTreeSet::new(), + last_tool_hash: 0, + } + } + + /// Select tools based on conversation context. + /// + /// Returns `(tools, tools_changed)` where `tools_changed` is true if the + /// tool list differs from the previous call (useful for cache invalidation + /// decisions). + /// + /// Scans the last 4 messages (current + 3 prior) for keywords. Once a + /// group activates, it stays active for the rest of the bout. + pub fn select_tools(&mut self, messages: &[Message]) -> (Vec, bool) { + let scan_text = Self::build_scan_text(messages); + + let mut newly_activated = Vec::new(); + for group in ToolGroup::ALL { + if self.active_groups.contains(group) { + continue; + } + if !group.is_permitted(&self.capabilities) { + continue; + } + if Self::keywords_match(&scan_text, group.keywords()) { + self.active_groups.insert(*group); + newly_activated.push(*group); + } + } + + if !newly_activated.is_empty() { + info!( + groups = ?newly_activated, + total_active = self.active_groups.len(), + "tool selector: activated new groups" + ); + } + + let mut tools = self.core_tools(); + for group in &self.active_groups { + for tool in group.tools() { + push_unique(&mut tools, tool); + } + } + + let hash = Self::compute_hash(&tools); + let changed = hash != self.last_tool_hash; + self.last_tool_hash = hash; + + (tools, changed) + } + + /// Return the number of currently active contextual tool groups. + pub fn active_group_count(&self) -> usize { + self.active_groups.len() + } + + /// Core tools that are always loaded (when capability permits). + fn core_tools(&self) -> Vec { + let mut tools = Vec::with_capacity(20); + + for cap in &self.capabilities { + match cap { + Capability::FileRead(_) => { + push_unique(&mut tools, file_read()); + push_unique(&mut tools, file_list()); + push_unique(&mut tools, file_search()); + push_unique(&mut tools, file_info()); + } + Capability::FileWrite(_) => { + push_unique(&mut tools, file_write()); + push_unique(&mut tools, patch_apply()); + } + Capability::ShellExec(_) => { + push_unique(&mut tools, shell_exec()); + push_unique(&mut tools, env_get()); + push_unique(&mut tools, env_list()); + } + Capability::Network(_) => { + push_unique(&mut tools, web_fetch()); + push_unique(&mut tools, web_search()); + } + Capability::Memory => { + push_unique(&mut tools, memory_store()); + push_unique(&mut tools, memory_recall()); + } + Capability::SystemAutomation => { + push_unique(&mut tools, sys_screenshot()); + } + Capability::ChannelNotify => { + push_unique(&mut tools, channel_notify()); + } + Capability::SelfConfig => { + push_unique(&mut tools, heartbeat_add()); + push_unique(&mut tools, heartbeat_list()); + push_unique(&mut tools, heartbeat_remove()); + push_unique(&mut tools, creed_view()); + push_unique(&mut tools, skill_list()); + push_unique(&mut tools, skill_recommend()); + } + _ => {} + } + } + + tools + } + + /// Build the keyword scan text from recent messages. + fn build_scan_text(messages: &[Message]) -> String { + let window_size = 4.min(messages.len()); + let start = messages.len().saturating_sub(window_size); + let mut text = String::with_capacity(2000); + for msg in &messages[start..] { + text.push_str(&msg.content); + text.push(' '); + } + text.to_lowercase() + } + + /// Check if any keyword appears in the scan text. + fn keywords_match(scan_text: &str, keywords: &[&str]) -> bool { + keywords.iter().any(|kw| scan_text.contains(kw)) + } + + /// Compute a stable hash of the tool list for change detection. + fn compute_hash(tools: &[ToolDefinition]) -> u64 { + let mut hasher = DefaultHasher::new(); + for tool in tools { + tool.name.hash(&mut hasher); + } + hasher.finish() + } +} + // --------------------------------------------------------------------------- // Tool definitions // --------------------------------------------------------------------------- @@ -2895,4 +3344,212 @@ mod tests { names.dedup(); assert_eq!(before, names.len(), "duplicate tools found"); } + + // ----------------------------------------------------------------------- + // ToolSelector tests + // ----------------------------------------------------------------------- + + use super::{ToolGroup, ToolSelector}; + use punch_types::{Message, Role}; + + fn full_caps() -> Vec { + Capability::full_access() + } + + fn msg(role: Role, text: &str) -> Message { + Message::new(role, text) + } + + #[test] + fn test_selector_core_tools_always_present() { + let mut sel = ToolSelector::new(&full_caps()); + let messages = vec![msg(Role::User, "hello, how are you?")]; + let (tools, _) = sel.select_tools(&messages); + + let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect(); + // Core tools must be present. + assert!(names.contains(&"file_read"), "file_read missing"); + assert!(names.contains(&"shell_exec"), "shell_exec missing"); + assert!(names.contains(&"web_fetch"), "web_fetch missing"); + assert!(names.contains(&"memory_store"), "memory_store missing"); + // Contextual tools should NOT be present. + assert!( + !names.contains(&"git_status"), + "git tools should not be in core" + ); + assert!( + !names.contains(&"docker_ps"), + "docker tools should not be in core" + ); + } + + #[test] + fn test_selector_keyword_activates_group() { + let mut sel = ToolSelector::new(&full_caps()); + let messages = vec![msg(Role::User, "please commit my changes to git")]; + let (tools, _) = sel.select_tools(&messages); + + let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect(); + assert!( + names.contains(&"git_status"), + "git_status should be activated" + ); + assert!( + names.contains(&"git_commit"), + "git_commit should be activated" + ); + assert!(names.contains(&"git_diff"), "git_diff should be activated"); + } + + #[test] + fn test_selector_multiple_groups_activate() { + let mut sel = ToolSelector::new(&full_caps()); + let messages = vec![msg( + Role::User, + "build the docker image and then commit it to git", + )]; + let (tools, _) = sel.select_tools(&messages); + + let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect(); + assert!(names.contains(&"git_status"), "git should activate"); + assert!(names.contains(&"docker_build"), "docker should activate"); + } + + #[test] + fn test_selector_capability_gating() { + // No capabilities granted — no contextual tools should activate. + let mut sel = ToolSelector::new(&[]); + let messages = vec![msg(Role::User, "git commit and docker build")]; + let (tools, _) = sel.select_tools(&messages); + + assert!(tools.is_empty(), "no tools without capabilities"); + } + + #[test] + fn test_selector_monotonic_growth() { + let mut sel = ToolSelector::new(&full_caps()); + + // Turn 1: activate git tools. + let msgs1 = vec![msg(Role::User, "commit my changes")]; + let (tools1, _) = sel.select_tools(&msgs1); + let count1 = tools1.len(); + + // Turn 2: no git keywords, but git tools should persist. + let msgs2 = vec![ + msg(Role::User, "commit my changes"), + msg(Role::Assistant, "done"), + msg(Role::User, "now tell me a joke"), + ]; + let (tools2, _) = sel.select_tools(&msgs2); + + let names2: Vec<&str> = tools2.iter().map(|t| t.name.as_str()).collect(); + assert!( + names2.contains(&"git_status"), + "git tools should persist across turns" + ); + assert!(tools2.len() >= count1, "tool count should not decrease"); + } + + #[test] + fn test_selector_tools_changed_detection() { + let mut sel = ToolSelector::new(&full_caps()); + + let msgs1 = vec![msg(Role::User, "hello")]; + let (_, changed1) = sel.select_tools(&msgs1); + assert!(changed1, "first call should report changed"); + + // Same message again — no new groups, no change. + let (_, changed2) = sel.select_tools(&msgs1); + assert!(!changed2, "no new groups means no change"); + + // New keyword activates a group — should report changed. + let msgs3 = vec![msg(Role::User, "git commit")]; + let (_, changed3) = sel.select_tools(&msgs3); + assert!(changed3, "new group activation should report changed"); + } + + #[test] + fn test_selector_case_insensitive() { + let mut sel = ToolSelector::new(&full_caps()); + let messages = vec![msg(Role::User, "GIT COMMIT DOCKER BUILD")]; + let (tools, _) = sel.select_tools(&messages); + + let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect(); + assert!(names.contains(&"git_status"), "case insensitive match"); + assert!(names.contains(&"docker_ps"), "case insensitive match"); + } + + #[test] + fn test_selector_scan_window_limited() { + let mut sel = ToolSelector::new(&full_caps()); + // Old message mentions git, but it's outside the 4-message window. + let messages = vec![ + msg(Role::User, "git commit"), // 5th from end — outside window + msg(Role::Assistant, "done"), // 4th + msg(Role::User, "ok"), // 3rd + msg(Role::Assistant, "ok"), // 2nd + msg(Role::User, "tell me a joke"), // 1st (current) + ]; + let (tools, _) = sel.select_tools(&messages); + + let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect(); + assert!( + !names.contains(&"git_status"), + "old messages outside window should not activate groups" + ); + } + + #[test] + fn test_selector_no_duplicates() { + let mut sel = ToolSelector::new(&full_caps()); + // Activate many groups at once. + let messages = vec![msg( + Role::User, + "git docker json archive schedule browser crypto plugin a2a knowledge code", + )]; + let (tools, _) = sel.select_tools(&messages); + + let mut names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect(); + let before = names.len(); + names.sort(); + names.dedup(); + assert_eq!(before, names.len(), "duplicate tools found in selection"); + } + + #[test] + fn test_selector_core_count_reasonable() { + let mut sel = ToolSelector::new(&full_caps()); + let messages = vec![msg(Role::User, "hello")]; + let (tools, _) = sel.select_tools(&messages); + + // Core should be roughly 18-25 tools (not 80+). + assert!( + tools.len() < 30, + "core tools should be under 30, got {}", + tools.len() + ); + assert!( + tools.len() >= 15, + "core tools should be at least 15, got {}", + tools.len() + ); + } + + #[test] + fn test_tool_group_all_covers_every_variant() { + // Ensure ALL array matches the enum variants. + assert_eq!( + ToolGroup::ALL.len(), + 17, + "ToolGroup::ALL must match the number of variants" + ); + } + + #[test] + fn test_tool_group_tools_not_empty() { + for group in ToolGroup::ALL { + let tools = group.tools(); + assert!(!tools.is_empty(), "ToolGroup::{:?} has no tools", group); + } + } } diff --git a/crates/punch-runtime/tests/fighter_loop_tests.rs b/crates/punch-runtime/tests/fighter_loop_tests.rs index 87af2bd..8eabbcb 100644 --- a/crates/punch-runtime/tests/fighter_loop_tests.rs +++ b/crates/punch-runtime/tests/fighter_loop_tests.rs @@ -137,6 +137,7 @@ fn make_params( memory, driver, available_tools: Vec::new(), + mcp_tools: Vec::new(), max_iterations: Some(10), context_window: Some(200_000), tool_timeout_secs: Some(30), @@ -466,6 +467,7 @@ async fn test_messages_persisted_to_memory() { memory: memory.clone(), driver, available_tools: Vec::new(), + mcp_tools: Vec::new(), max_iterations: Some(10), context_window: Some(200_000), tool_timeout_secs: Some(30), @@ -527,6 +529,7 @@ async fn test_creed_bout_count_increments() { memory: memory.clone(), driver, available_tools: Vec::new(), + mcp_tools: Vec::new(), max_iterations: Some(10), context_window: Some(200_000), tool_timeout_secs: Some(30), @@ -586,6 +589,7 @@ async fn test_heartbeat_tasks_marked_checked() { memory: memory.clone(), driver, available_tools: Vec::new(), + mcp_tools: Vec::new(), max_iterations: Some(10), context_window: Some(200_000), tool_timeout_secs: Some(30), From 2d81056cbdd90713781c569c85220ff37271d7fb Mon Sep 17 00:00:00 2001 From: Archith Date: Thu, 26 Mar 2026 08:46:24 -0700 Subject: [PATCH 2/4] Fix PR #18 review: tighten keywords, scan tool context, stable hash, auto-activate Address all 5 review issues on the dynamic tool selection PR: 1. HIGH - Tighten overly broad keywords: Replace generic words like push, image, process, agent, hash, render, graph, definition, extract with specific multi-word phrases (e.g. git push, docker image, kill process, spawn fighter, sha256, render template, knowledge graph, find definition). 2. MEDIUM - Scan tool context: build_scan_text now includes tool call names, short argument values, and tool result snippets (first 200 chars). 3. MEDIUM - Avoid static path clone overhead: Move params.available_tools into static_tools Option before the loop. 4. MEDIUM - Auto-activate capability groups: Groups with no core-tool overlap auto-activate on construction when the fighter has the required capability. 5. NIT - Stable hash: Replace DefaultHasher with FNV-1a-inspired hash that produces identical results across Rust versions. Co-Authored-By: Claude Opus 4.6 --- crates/punch-runtime/src/fighter_loop.rs | 16 +- crates/punch-runtime/src/tools.rs | 290 +++++++++++++++++------ 2 files changed, 238 insertions(+), 68 deletions(-) diff --git a/crates/punch-runtime/src/fighter_loop.rs b/crates/punch-runtime/src/fighter_loop.rs index c87c4e3..c5c6b7b 100644 --- a/crates/punch-runtime/src/fighter_loop.rs +++ b/crates/punch-runtime/src/fighter_loop.rs @@ -259,9 +259,17 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult> = if !use_dynamic_tools { + Some(params.available_tools) + } else { + None + }; + + let static_tool_count = static_tools.as_ref().map_or(0, |t| t.len()); info!( dynamic_tools = use_dynamic_tools, - static_tool_count = params.available_tools.len(), + static_tool_count, mcp_tool_count = params.mcp_tools.len(), fighter = %params.manifest.name, model = %params.manifest.model.model, @@ -277,7 +285,11 @@ pub async fn run_fighter_loop(params: FighterLoopParams) -> PunchResult &'static [&'static str] { match self { ToolGroup::SourceControl => &[ - "git", - "commit", - "branch", - "diff", - "merge", - "rebase", - "push", - "pull", - "clone", - "stash", - "checkout", + "git ", + "git commit", + "git branch", + "git diff", + "git merge", + "git rebase", + "git push", + "git pull", + "git clone", + "git stash", + "git checkout", + "git log", + "commit changes", + "commit my", "repo", "repository", ], ToolGroup::Container => &[ "docker", "container", - "image", "dockerfile", + "docker image", "compose", "pod", "kubernetes", @@ -343,8 +344,9 @@ impl ToolGroup { "yaml", "toml", "regex", - "parse", - "transform", + "parse json", + "parse yaml", + "transform json", "csv", "xml", "jq", @@ -362,34 +364,55 @@ impl ToolGroup { ToolGroup::BrowserControl => &[ "browser", "webpage", - "click", - "navigate", + "click button", + "click element", + "click link", + "navigate to", "selenium", "puppeteer", "scrape", - "website", + "open website", + "web page", ], ToolGroup::AgentCoordination => &[ - "agent", - "spawn", - "fighter", - "worker", + "spawn fighter", + "spawn agent", + "spawn worker", + "new fighter", "troop", "multi-agent", + "delegate to", + "coordinate agents", ], ToolGroup::Archive => &[ "archive", - "zip", - "tar", + "zip file", + "tar file", "gzip", - "extract", - "compress", + "extract archive", + "compress file", "unzip", "decompress", ], - ToolGroup::Template => &["template", "render", "handlebars", "mustache", "jinja"], + ToolGroup::Template => &[ + "template", + "render template", + "handlebars", + "mustache", + "jinja", + ], ToolGroup::Crypto => &[ - "hash", "crypto", "sha", "md5", "hmac", "checksum", "digest", "encrypt", + "sha256", + "sha512", + "crypto", + "md5", + "hmac", + "checksum", + "digest", + "encrypt", + "decrypt", + "hash file", + "hash password", ], ToolGroup::UiAutomation => &[ "ui automation", @@ -410,36 +433,40 @@ impl ToolGroup { ToolGroup::PluginInvoke => &["plugin", "wasm", "extension", "webassembly"], ToolGroup::A2ADelegate => &["a2a", "remote agent", "delegate task", "external agent"], ToolGroup::KnowledgeGraph => &[ - "knowledge", - "entity", - "relation", - "graph", + "knowledge graph", + "add entity", + "add relation", + "knowledge base", "ontology", - "triple", + "triple store", ], ToolGroup::CodeAnalysis => &[ - "symbols", + "code symbols", "code search", - "definition", - "references", + "find definition", + "find references", "ast", "code analysis", + "go to definition", ], ToolGroup::ProcessManagement => &[ - "process", - "pid", + "list processes", "kill process", - "signal", + "pid", + "send signal", "background process", + "process list", ], ToolGroup::HttpAdvanced => &[ "http post", "api call", "rest api", - "endpoint", + "api endpoint", "webhook", "curl", "http request", + "http put", + "http delete", ], } } @@ -555,14 +582,52 @@ pub struct ToolSelector { impl ToolSelector { /// Create a new selector for a fighter with the given capabilities. + /// + /// Groups whose capabilities are granted but have no representation in the + /// core tool set are auto-activated on construction. This prevents a + /// capability regression where, e.g., a fighter with `SourceControl` wouldn't + /// get git tools on turn 1 unless the user explicitly said "git". pub fn new(capabilities: &[Capability]) -> Self { + let mut active_groups = BTreeSet::new(); + + // Auto-activate groups that are permitted AND have no core-tool overlap. + // These are "capability-only" groups whose tools would be invisible without + // keyword triggers, creating a silent regression for restricted fighters. + for group in ToolGroup::ALL { + if group.is_permitted(capabilities) && Self::group_needs_auto_activate(group) { + active_groups.insert(*group); + } + } + + if !active_groups.is_empty() { + info!( + auto_activated = ?active_groups, + "tool selector: auto-activated groups for granted capabilities" + ); + } + Self { capabilities: capabilities.to_vec(), - active_groups: BTreeSet::new(), + active_groups, last_tool_hash: 0, } } + /// Returns true if a group's tools have zero overlap with core tools, + /// meaning the group would be invisible without keyword activation. + fn group_needs_auto_activate(group: &ToolGroup) -> bool { + matches!( + group, + ToolGroup::SourceControl + | ToolGroup::Container + | ToolGroup::DataManipulation + | ToolGroup::Schedule + | ToolGroup::BrowserControl + | ToolGroup::AgentCoordination + | ToolGroup::ProcessManagement + ) + } + /// Select tools based on conversation context. /// /// Returns `(tools, tools_changed)` where `tools_changed` is true if the @@ -666,13 +731,42 @@ impl ToolSelector { } /// Build the keyword scan text from recent messages. + /// + /// Scans message content, tool call names/arguments, and tool result content + /// to catch context from tool usage (e.g. a git_status call signals SourceControl). fn build_scan_text(messages: &[Message]) -> String { let window_size = 4.min(messages.len()); let start = messages.len().saturating_sub(window_size); - let mut text = String::with_capacity(2000); + let mut text = String::with_capacity(4000); for msg in &messages[start..] { text.push_str(&msg.content); text.push(' '); + for tc in &msg.tool_calls { + text.push_str(&tc.name); + text.push(' '); + // Include a compact form of arguments (tool names are most useful). + if let Some(obj) = tc.input.as_object() { + for val in obj.values() { + if let Some(s) = val.as_str() { + // Only include short string values to avoid bloating scan text. + if s.len() <= 200 { + text.push_str(s); + text.push(' '); + } + } + } + } + } + for tr in &msg.tool_results { + // Include first 200 chars of tool result content for keyword detection. + let snippet = if tr.content.len() > 200 { + &tr.content[..200] + } else { + &tr.content + }; + text.push_str(snippet); + text.push(' '); + } } text.to_lowercase() } @@ -683,12 +777,21 @@ impl ToolSelector { } /// Compute a stable hash of the tool list for change detection. + /// + /// Uses a simple FNV-1a-inspired hash that is stable across Rust versions + /// (unlike `DefaultHasher` which uses SipHash with randomized keys). fn compute_hash(tools: &[ToolDefinition]) -> u64 { - let mut hasher = DefaultHasher::new(); + let mut hash: u64 = 0xcbf2_9ce4_8422_2325; // FNV offset basis for tool in tools { - tool.name.hash(&mut hasher); + for byte in tool.name.as_bytes() { + hash ^= u64::from(*byte); + hash = hash.wrapping_mul(0x0100_0000_01b3); // FNV prime + } + // Separator to distinguish "ab"+"cd" from "abc"+"d". + hash ^= 0xff; + hash = hash.wrapping_mul(0x0100_0000_01b3); } - hasher.finish() + hash } } @@ -3372,14 +3475,36 @@ mod tests { assert!(names.contains(&"shell_exec"), "shell_exec missing"); assert!(names.contains(&"web_fetch"), "web_fetch missing"); assert!(names.contains(&"memory_store"), "memory_store missing"); - // Contextual tools should NOT be present. + // Auto-activated groups (granted capability, no core overlap) should also be present. + assert!( + names.contains(&"git_status"), + "git tools should auto-activate for SourceControl capability" + ); + assert!( + names.contains(&"docker_ps"), + "docker tools should auto-activate for Container capability" + ); + } + + #[test] + fn test_selector_no_auto_activate_without_capability() { + // A fighter with only FileRead + ShellExec should NOT get git or docker tools. + let caps = vec![ + Capability::FileRead("**".to_string()), + Capability::ShellExec("*".to_string()), + ]; + let mut sel = ToolSelector::new(&caps); + let messages = vec![msg(Role::User, "hello")]; + let (tools, _) = sel.select_tools(&messages); + + let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect(); assert!( !names.contains(&"git_status"), - "git tools should not be in core" + "git should not auto-activate without SourceControl" ); assert!( !names.contains(&"docker_ps"), - "docker tools should not be in core" + "docker should not auto-activate without Container" ); } @@ -3452,10 +3577,13 @@ mod tests { #[test] fn test_selector_tools_changed_detection() { - let mut sel = ToolSelector::new(&full_caps()); + // Use Archive + Crypto (not auto-activated) to test change detection. + let caps = vec![Capability::Archive, Capability::Crypto]; + let mut sel = ToolSelector::new(&caps); let msgs1 = vec![msg(Role::User, "hello")]; let (_, changed1) = sel.select_tools(&msgs1); + // First call always reports changed (hash was 0). assert!(changed1, "first call should report changed"); // Same message again — no new groups, no change. @@ -3463,7 +3591,7 @@ mod tests { assert!(!changed2, "no new groups means no change"); // New keyword activates a group — should report changed. - let msgs3 = vec![msg(Role::User, "git commit")]; + let msgs3 = vec![msg(Role::User, "compute sha256 checksum")]; let (_, changed3) = sel.select_tools(&msgs3); assert!(changed3, "new group activation should report changed"); } @@ -3481,20 +3609,22 @@ mod tests { #[test] fn test_selector_scan_window_limited() { - let mut sel = ToolSelector::new(&full_caps()); - // Old message mentions git, but it's outside the 4-message window. + // Use Crypto capability (not auto-activated) to test window limits. + let caps = vec![Capability::Crypto]; + let mut sel = ToolSelector::new(&caps); + // Old message mentions crypto keyword, but it's outside the 4-message window. let messages = vec![ - msg(Role::User, "git commit"), // 5th from end — outside window - msg(Role::Assistant, "done"), // 4th - msg(Role::User, "ok"), // 3rd - msg(Role::Assistant, "ok"), // 2nd - msg(Role::User, "tell me a joke"), // 1st (current) + msg(Role::User, "compute sha256 checksum"), // 5th from end — outside window + msg(Role::Assistant, "done"), // 4th + msg(Role::User, "ok"), // 3rd + msg(Role::Assistant, "ok"), // 2nd + msg(Role::User, "tell me a joke"), // 1st (current) ]; let (tools, _) = sel.select_tools(&messages); let names: Vec<&str> = tools.iter().map(|t| t.name.as_str()).collect(); assert!( - !names.contains(&"git_status"), + !names.contains(&"hash_compute"), "old messages outside window should not activate groups" ); } @@ -3502,10 +3632,10 @@ mod tests { #[test] fn test_selector_no_duplicates() { let mut sel = ToolSelector::new(&full_caps()); - // Activate many groups at once. + // Use precise keywords that match the tightened patterns. let messages = vec![msg( Role::User, - "git docker json archive schedule browser crypto plugin a2a knowledge code", + "git commit docker json unzip schedule cron browser sha256 plugin a2a knowledge graph code analysis", )]; let (tools, _) = sel.select_tools(&messages); @@ -3518,19 +3648,47 @@ mod tests { #[test] fn test_selector_core_count_reasonable() { + // Use a fighter with only basic capabilities (no auto-activated groups). + let caps = vec![ + Capability::FileRead("**".to_string()), + Capability::FileWrite("**".to_string()), + Capability::ShellExec("*".to_string()), + Capability::Network("*".to_string()), + Capability::Memory, + ]; + let mut sel = ToolSelector::new(&caps); + let messages = vec![msg(Role::User, "hello")]; + let (tools, _) = sel.select_tools(&messages); + + // Core should be roughly 12-20 tools (not 80+). + assert!( + tools.len() < 25, + "core tools should be under 25, got {}", + tools.len() + ); + assert!( + tools.len() >= 10, + "core tools should be at least 10, got {}", + tools.len() + ); + } + + #[test] + fn test_selector_full_caps_with_auto_activation() { + // Full capabilities: auto-activated groups should be present from turn 1. let mut sel = ToolSelector::new(&full_caps()); let messages = vec![msg(Role::User, "hello")]; let (tools, _) = sel.select_tools(&messages); - // Core should be roughly 18-25 tools (not 80+). + // Should have core + auto-activated groups, but still well under the full 80+. assert!( - tools.len() < 30, - "core tools should be under 30, got {}", + tools.len() < 60, + "full caps + auto-activation should be under 60, got {}", tools.len() ); assert!( - tools.len() >= 15, - "core tools should be at least 15, got {}", + tools.len() >= 30, + "full caps + auto-activation should be at least 30, got {}", tools.len() ); } From 1a72d544c5117a0c239e911880ed4237487f9759 Mon Sep 17 00:00:00 2001 From: Archith Date: Thu, 26 Mar 2026 09:29:15 -0700 Subject: [PATCH 3/4] Fix UTF-8 panic in build_scan_text tool result truncation Use floor_char_boundary(200) instead of byte slice [..200] to avoid panicking when byte 200 lands mid-character in multi-byte UTF-8 text. Co-Authored-By: Claude Opus 4.6 --- crates/punch-runtime/src/tools.rs | 35 ++++++++++++++----------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/crates/punch-runtime/src/tools.rs b/crates/punch-runtime/src/tools.rs index ea3f173..3f43fdc 100644 --- a/crates/punch-runtime/src/tools.rs +++ b/crates/punch-runtime/src/tools.rs @@ -758,13 +758,14 @@ impl ToolSelector { } } for tr in &msg.tool_results { - // Include first 200 chars of tool result content for keyword detection. - let snippet = if tr.content.len() > 200 { - &tr.content[..200] + // Include first ~200 chars of tool result content for keyword detection. + // Use floor_char_boundary to avoid panicking on multi-byte UTF-8. + let end = if tr.content.len() > 200 { + tr.content.floor_char_boundary(200) } else { - &tr.content + tr.content.len() }; - text.push_str(snippet); + text.push_str(&tr.content[..end]); text.push(' '); } } @@ -2488,13 +2489,11 @@ mod tests { let nav = browser_navigate(); assert_eq!(nav.name, "browser_navigate"); assert_eq!(nav.category, ToolCategory::Browser); - assert!( - nav.input_schema["required"] - .as_array() - .expect("required should be array") - .iter() - .any(|v| v == "url") - ); + assert!(nav.input_schema["required"] + .as_array() + .expect("required should be array") + .iter() + .any(|v| v == "url")); let ss = browser_screenshot(); assert_eq!(ss.name, "browser_screenshot"); @@ -2502,13 +2501,11 @@ mod tests { let click = browser_click(); assert_eq!(click.name, "browser_click"); - assert!( - click.input_schema["required"] - .as_array() - .expect("required should be array") - .iter() - .any(|v| v == "selector") - ); + assert!(click.input_schema["required"] + .as_array() + .expect("required should be array") + .iter() + .any(|v| v == "selector")); let typ = browser_type(); assert_eq!(typ.name, "browser_type"); From 2d88ed92becd85656bde4dd21ac0fedf868ca142 Mon Sep 17 00:00:00 2001 From: Archith Date: Thu, 26 Mar 2026 09:31:55 -0700 Subject: [PATCH 4/4] Fix formatting in browser tool tests (cargo fmt) Co-Authored-By: Claude Opus 4.6 --- crates/punch-runtime/src/tools.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/crates/punch-runtime/src/tools.rs b/crates/punch-runtime/src/tools.rs index 3f43fdc..fc260fd 100644 --- a/crates/punch-runtime/src/tools.rs +++ b/crates/punch-runtime/src/tools.rs @@ -2489,11 +2489,13 @@ mod tests { let nav = browser_navigate(); assert_eq!(nav.name, "browser_navigate"); assert_eq!(nav.category, ToolCategory::Browser); - assert!(nav.input_schema["required"] - .as_array() - .expect("required should be array") - .iter() - .any(|v| v == "url")); + assert!( + nav.input_schema["required"] + .as_array() + .expect("required should be array") + .iter() + .any(|v| v == "url") + ); let ss = browser_screenshot(); assert_eq!(ss.name, "browser_screenshot"); @@ -2501,11 +2503,13 @@ mod tests { let click = browser_click(); assert_eq!(click.name, "browser_click"); - assert!(click.input_schema["required"] - .as_array() - .expect("required should be array") - .iter() - .any(|v| v == "selector")); + assert!( + click.input_schema["required"] + .as_array() + .expect("required should be array") + .iter() + .any(|v| v == "selector") + ); let typ = browser_type(); assert_eq!(typ.name, "browser_type");