diff --git a/claude-code/hooks/test_read_equivalent_tools.py b/claude-code/hooks/test_read_equivalent_tools.py new file mode 100644 index 00000000..5c35e3d4 --- /dev/null +++ b/claude-code/hooks/test_read_equivalent_tools.py @@ -0,0 +1,104 @@ +""" +Tests for read-equivalent tool (Grep/Glob/LS) path forwarding in +claude-code/hooks/unbound.py (WEB-4871). + +Covers: + - _derive_read_equivalent_path (file_path the gateway evaluates) + - extract_command_for_pretool (LS command / approval-key, regression on others) + +These pin the W1/W2/W3 review fixes: the no-`path` (whole-cwd-scan) case must +forward the cwd rather than nothing, LS must be path-specific, and Grep's regex +pattern must never be forwarded as a path. +""" + +import unittest + +import unbound + + +def _event(tool_name, tool_input, cwd=None): + e = {"tool_name": tool_name, "tool_input": tool_input} + if cwd is not None: + e["cwd"] = cwd + return e + + +class TestDeriveReadEquivalentPath(unittest.TestCase): + def d(self, tool_name, tool_input, cwd=None): + return unbound._derive_read_equivalent_path( + tool_name, tool_input, {"cwd": cwd} if cwd is not None else {} + ) + + # Explicit path always wins + def test_grep_explicit_path(self): + self.assertEqual(self.d("Grep", {"pattern": "X", "path": "/a/b"}), "/a/b") + + def test_ls_explicit_path(self): + self.assertEqual(self.d("LS", {"path": "/etc"}), "/etc") + + def test_glob_explicit_path_beats_pattern(self): + self.assertEqual(self.d("Glob", {"pattern": "**/*.env", "path": "/p"}), "/p") + + # W2: no path -> fall back to cwd (whole-tree scan must not be path-less) + def test_grep_no_path_falls_back_to_cwd(self): + self.assertEqual(self.d("Grep", {"pattern": "AWS_SECRET"}, cwd="/repo"), "/repo") + + def test_ls_no_path_falls_back_to_cwd(self): + self.assertEqual(self.d("LS", {}, cwd="/repo"), "/repo") + + # W3: Glob keeps the path-like pattern as a fallback (filename/glob policies) + def test_glob_pattern_used_when_no_path(self): + self.assertEqual(self.d("Glob", {"pattern": "**/*.pem"}, cwd="/repo"), "**/*.pem") + + def test_glob_no_path_no_pattern_falls_back_to_cwd(self): + self.assertEqual(self.d("Glob", {}, cwd="/repo"), "/repo") + + # Grep's regex pattern is NEVER forwarded as a path + def test_grep_pattern_never_used_as_path(self): + self.assertEqual(self.d("Grep", {"pattern": "SECRET_KEY.*="}, cwd="/repo"), "/repo") + + # Non-breaking edge: nothing available -> None (caller forwards no file_path) + def test_no_path_no_cwd_returns_none(self): + self.assertIsNone(self.d("Grep", {"pattern": "X"})) + self.assertIsNone(self.d("LS", {})) + self.assertIsNone(self.d("Glob", {})) + + +class TestExtractCommandForPretool(unittest.TestCase): + # W1: LS command is path-specific (no "LS:LS" approval-key collapse) + def test_ls_with_path(self): + self.assertEqual( + unbound.extract_command_for_pretool(_event("LS", {"path": "/srv"})), "/srv" + ) + + def test_ls_no_path_uses_cwd(self): + self.assertEqual( + unbound.extract_command_for_pretool(_event("LS", {}, cwd="/repo")), "/repo" + ) + + def test_ls_no_path_no_cwd_falls_back_to_tool_name(self): + self.assertEqual(unbound.extract_command_for_pretool(_event("LS", {})), "LS") + + # Regression: unchanged behavior for the other tools + def test_grep_returns_pattern(self): + self.assertEqual( + unbound.extract_command_for_pretool(_event("Grep", {"pattern": "X"})), "X" + ) + + def test_glob_returns_pattern(self): + self.assertEqual( + unbound.extract_command_for_pretool(_event("Glob", {"pattern": "**/*.py"})), + "**/*.py", + ) + + def test_read_returns_file_path(self): + self.assertEqual( + unbound.extract_command_for_pretool( + _event("Read", {"file_path": "/x/y"}) + ), + "/x/y", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/claude-code/hooks/unbound.py b/claude-code/hooks/unbound.py index 331abbf3..7a165d2e 100644 --- a/claude-code/hooks/unbound.py +++ b/claude-code/hooks/unbound.py @@ -20,8 +20,12 @@ AUDIT_LOG = Path.home() / ".claude" / "hooks" / "agent-audit.log" ERROR_LOG = Path.home() / ".claude" / "hooks" / "error.log" LAST_REPORT_FILE = Path.home() / ".claude" / "hooks" / ".last_error_report" -ALLOWED_NON_MCP_HOOK_NAMES = ['Bash', 'Read', 'Write', 'Edit'] # MCP tools (mcp__*) are always checked separately -NATIVE_FILE_TOOLS = {'Read', 'Write', 'Edit'} +# Grep/Glob/LS are read-equivalent: they expose file CONTENTS (Grep) or +# enumerate paths (Glob/LS), so they could leak/discover a secret file that the +# equivalent `cat`/`ls` is blocked from. The gateway maps them to `read_file`. +READ_EQUIVALENT_FILE_TOOLS = {'Grep', 'Glob', 'LS'} +ALLOWED_NON_MCP_HOOK_NAMES = ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob', 'LS'] # MCP tools (mcp__*) are always checked separately +NATIVE_FILE_TOOLS = {'Read', 'Write', 'Edit'} | READ_EQUIVALENT_FILE_TOOLS MCP_TOOL_PREFIX = 'mcp__' CLAUDE_MCP_CONFIG_PATH = Path.home() / ".claude.json" POLICY_CACHE_FILE = Path.home() / ".claude" / "hooks" / ".policy_cache.json" @@ -498,6 +502,11 @@ def extract_command_for_pretool(event: Dict) -> str: # Glob: pattern if tool_name == 'Glob' and 'pattern' in tool_input: return tool_input['pattern'] + # LS: path (fall back to cwd) so the command — and therefore the approval + # key — stays path-specific instead of collapsing to the literal "LS" for + # every directory listing. + if tool_name == 'LS': + return tool_input.get('path') or event.get('cwd') or tool_name # WebFetch: url if tool_name == 'WebFetch' and 'url' in tool_input: return tool_input['url'] @@ -511,6 +520,32 @@ def extract_command_for_pretool(event: Dict) -> str: return tool_name +def _derive_read_equivalent_path( + tool_name: str, tool_input: Dict, event: Dict +) -> Optional[str]: + """Resolve a concrete path for a read-equivalent tool (Grep/Glob/LS). + + Path-scoped policies need a path to match against: + - an explicit ``path`` argument always wins; + - Glob's ``pattern`` (itself a path-like glob, e.g. ``**/*.env``) is kept + as a fallback so a filename/glob policy still matches; + - otherwise fall back to the session ``cwd``, because Grep/Glob/LS with no + ``path`` scan the WHOLE working tree — the broadest (and most dangerous) + case must not forward an empty path and silently bypass policy. + + Returns ``None`` only when no path, no Glob pattern, and no cwd are present; + the caller then forwards no ``file_path`` (the gateway fails open). Uses + ``.get()`` throughout, so a malformed event never raises. + """ + if tool_name == 'Glob': + return ( + tool_input.get('path') + or tool_input.get('pattern') + or event.get('cwd') + ) + return tool_input.get('path') or event.get('cwd') + + def send_to_hook_api(request_body: Dict, api_key: str) -> Dict: """Send request to /v1/hooks/pretool endpoint.""" if not api_key: @@ -863,6 +898,13 @@ def process_pre_tool_use(event: Dict, api_key: str) -> Dict: tool_input = event.get('tool_input') or {} if 'file_path' in tool_input: metadata['file_path'] = tool_input['file_path'] + elif tool_name in READ_EQUIVALENT_FILE_TOOLS: + # Grep/Glob/LS expose file contents or enumerate paths; resolve a + # concrete path (incl. a cwd fallback for the no-`path` whole-tree scan) + # so path-scoped policies can evaluate them. See helper for the rules. + derived_path = _derive_read_equivalent_path(tool_name, tool_input, event) + if derived_path: + metadata['file_path'] = derived_path if is_mcp: # Parse mcp____ to extract server and tool for gateway matching