websentry-ai · vigneshsubbiah16 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
diff --git a/claude-code/hooks/test_read_equivalent_tools.py b/claude-code/hooks/test_read_equivalent_tools.py
@@ -0,0 +1,104 @@
+"""
+Tests for read-equivalent tool (Grep/Glob/LS) path forwarding in
+claude-code/hooks/unbound.py (WEB-4871).
+
+Covers:
+  - _derive_read_equivalent_path  (file_path the gateway evaluates)
+  - extract_command_for_pretool   (LS command / approval-key, regression on others)
+
+These pin the W1/W2/W3 review fixes: the no-`path` (whole-cwd-scan) case must
+forward the cwd rather than nothing, LS must be path-specific, and Grep's regex
+pattern must never be forwarded as a path.
+"""
+
+import unittest
+
+import unbound
+
+
+def _event(tool_name, tool_input, cwd=None):
+    e = {"tool_name": tool_name, "tool_input": tool_input}
+    if cwd is not None:
+        e["cwd"] = cwd
+    return e
+
+
+class TestDeriveReadEquivalentPath(unittest.TestCase):
+    def d(self, tool_name, tool_input, cwd=None):
+        return unbound._derive_read_equivalent_path(
+            tool_name, tool_input, {"cwd": cwd} if cwd is not None else {}
+        )
+
+    # Explicit path always wins
+    def test_grep_explicit_path(self):
+        self.assertEqual(self.d("Grep", {"pattern": "X", "path": "/a/b"}), "/a/b")
+
+    def test_ls_explicit_path(self):
+        self.assertEqual(self.d("LS", {"path": "/etc"}), "/etc")
+
+    def test_glob_explicit_path_beats_pattern(self):
+        self.assertEqual(self.d("Glob", {"pattern": "**/*.env", "path": "/p"}), "/p")
+
+    # W2: no path -> fall back to cwd (whole-tree scan must not be path-less)
+    def test_grep_no_path_falls_back_to_cwd(self):
+        self.assertEqual(self.d("Grep", {"pattern": "AWS_SECRET"}, cwd="/repo"), "/repo")
+
+    def test_ls_no_path_falls_back_to_cwd(self):
+        self.assertEqual(self.d("LS", {}, cwd="/repo"), "/repo")
+
+    # W3: Glob keeps the path-like pattern as a fallback (filename/glob policies)
+    def test_glob_pattern_used_when_no_path(self):
+        self.assertEqual(self.d("Glob", {"pattern": "**/*.pem"}, cwd="/repo"), "**/*.pem")
+
+    def test_glob_no_path_no_pattern_falls_back_to_cwd(self):
+        self.assertEqual(self.d("Glob", {}, cwd="/repo"), "/repo")
+
+    # Grep's regex pattern is NEVER forwarded as a path
+    def test_grep_pattern_never_used_as_path(self):
+        self.assertEqual(self.d("Grep", {"pattern": "SECRET_KEY.*="}, cwd="/repo"), "/repo")
+
+    # Non-breaking edge: nothing available -> None (caller forwards no file_path)
+    def test_no_path_no_cwd_returns_none(self):
+        self.assertIsNone(self.d("Grep", {"pattern": "X"}))
+        self.assertIsNone(self.d("LS", {}))
+        self.assertIsNone(self.d("Glob", {}))
+
+
+class TestExtractCommandForPretool(unittest.TestCase):
+    # W1: LS command is path-specific (no "LS:LS" approval-key collapse)
+    def test_ls_with_path(self):
+        self.assertEqual(
+            unbound.extract_command_for_pretool(_event("LS", {"path": "/srv"})), "/srv"
+        )
+
+    def test_ls_no_path_uses_cwd(self):
+        self.assertEqual(
+            unbound.extract_command_for_pretool(_event("LS", {}, cwd="/repo")), "/repo"
+        )
+
+    def test_ls_no_path_no_cwd_falls_back_to_tool_name(self):
+        self.assertEqual(unbound.extract_command_for_pretool(_event("LS", {})), "LS")
+
+    # Regression: unchanged behavior for the other tools
+    def test_grep_returns_pattern(self):
+        self.assertEqual(
+            unbound.extract_command_for_pretool(_event("Grep", {"pattern": "X"})), "X"
+        )
+
+    def test_glob_returns_pattern(self):
+        self.assertEqual(
+            unbound.extract_command_for_pretool(_event("Glob", {"pattern": "**/*.py"})),
+            "**/*.py",
+        )
+
+    def test_read_returns_file_path(self):
+        self.assertEqual(
+            unbound.extract_command_for_pretool(
+                _event("Read", {"file_path": "/x/y"})
+            ),
+            "/x/y",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/claude-code/hooks/unbound.py b/claude-code/hooks/unbound.py
@@ -20,8 +20,12 @@
 AUDIT_LOG = Path.home() / ".claude" / "hooks" / "agent-audit.log"
 ERROR_LOG = Path.home() / ".claude" / "hooks" / "error.log"
 LAST_REPORT_FILE = Path.home() / ".claude" / "hooks" / ".last_error_report"
-ALLOWED_NON_MCP_HOOK_NAMES = ['Bash', 'Read', 'Write', 'Edit']  # MCP tools (mcp__*) are always checked separately
-NATIVE_FILE_TOOLS = {'Read', 'Write', 'Edit'}
+# Grep/Glob/LS are read-equivalent: they expose file CONTENTS (Grep) or
+# enumerate paths (Glob/LS), so they could leak/discover a secret file that the
+# equivalent `cat`/`ls` is blocked from. The gateway maps them to `read_file`.
+READ_EQUIVALENT_FILE_TOOLS = {'Grep', 'Glob', 'LS'}
+ALLOWED_NON_MCP_HOOK_NAMES = ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob', 'LS']  # MCP tools (mcp__*) are always checked separately
+NATIVE_FILE_TOOLS = {'Read', 'Write', 'Edit'} | READ_EQUIVALENT_FILE_TOOLS
 MCP_TOOL_PREFIX = 'mcp__'
 CLAUDE_MCP_CONFIG_PATH = Path.home() / ".claude.json"
 POLICY_CACHE_FILE = Path.home() / ".claude" / "hooks" / ".policy_cache.json"
@@ -498,6 +502,11 @@ def extract_command_for_pretool(event: Dict) -> str:
     # Glob: pattern
     if tool_name == 'Glob' and 'pattern' in tool_input:
         return tool_input['pattern']
+    # LS: path (fall back to cwd) so the command — and therefore the approval
+    # key — stays path-specific instead of collapsing to the literal "LS" for
+    # every directory listing.
+    if tool_name == 'LS':
+        return tool_input.get('path') or event.get('cwd') or tool_name
     # WebFetch: url
     if tool_name == 'WebFetch' and 'url' in tool_input:
         return tool_input['url']
@@ -511,6 +520,32 @@ def extract_command_for_pretool(event: Dict) -> str:
     return tool_name
 
 
+def _derive_read_equivalent_path(
+    tool_name: str, tool_input: Dict, event: Dict
+) -> Optional[str]:
+    """Resolve a concrete path for a read-equivalent tool (Grep/Glob/LS).
+
+    Path-scoped policies need a path to match against:
+      - an explicit ``path`` argument always wins;
+      - Glob's ``pattern`` (itself a path-like glob, e.g. ``**/*.env``) is kept
+        as a fallback so a filename/glob policy still matches;
+      - otherwise fall back to the session ``cwd``, because Grep/Glob/LS with no
+        ``path`` scan the WHOLE working tree — the broadest (and most dangerous)
+        case must not forward an empty path and silently bypass policy.
+
+    Returns ``None`` only when no path, no Glob pattern, and no cwd are present;
+    the caller then forwards no ``file_path`` (the gateway fails open). Uses
+    ``.get()`` throughout, so a malformed event never raises.
+    """
+    if tool_name == 'Glob':
+        return (
+            tool_input.get('path')
+            or tool_input.get('pattern')
+            or event.get('cwd')
+        )
+    return tool_input.get('path') or event.get('cwd')
+
+
 def send_to_hook_api(request_body: Dict, api_key: str) -> Dict:
     """Send request to /v1/hooks/pretool endpoint."""
     if not api_key:
@@ -863,6 +898,13 @@ def process_pre_tool_use(event: Dict, api_key: str) -> Dict:
     tool_input = event.get('tool_input') or {}
     if 'file_path' in tool_input:
         metadata['file_path'] = tool_input['file_path']
+    elif tool_name in READ_EQUIVALENT_FILE_TOOLS:
+        # Grep/Glob/LS expose file contents or enumerate paths; resolve a
+        # concrete path (incl. a cwd fallback for the no-`path` whole-tree scan)
+        # so path-scoped policies can evaluate them. See helper for the rules.
+        derived_path = _derive_read_equivalent_path(tool_name, tool_input, event)
+        if derived_path:
+            metadata['file_path'] = derived_path
 
     if is_mcp:
         # Parse mcp__<server>__<tool> to extract server and tool for gateway matching