From 9b3f6f150080e3e208a70916793bb9001b85ec09 Mon Sep 17 00:00:00 2001
From: Nanda Pranesh <nandapranesh27@gmail.com>
Date: Wed, 3 Jun 2026 18:39:09 +0530
Subject: [PATCH 1/3] WEB-4507: capture Claude Code skill & agent invocations
 from hooks

Detect skill and agent/subagent invocations during agentic operations and
send them to the backend (analytics later consume this).

- parse_transcript_file: capture Skill + Task/Agent tool_use blocks from the
  session transcript (tool_use_id, cwd, gitBranch). PostToolUse is unreliable
  for these, so the transcript is the source of truth.
- collect_subagent_skill_tool_uses: sweep <session>/subagents/*.jsonl so
  skills/agents invoked inside subagents (separate transcripts the parent Stop
  never reads) are captured too, flagged is_subagent.
- build_llm_exchange: forward transcript tool_uses, skip the duplicate
  PostToolUse copies, and tag trigger (namespace-aware user-typed /skill vs
  agent auto-invoke) + is_subagent. tool_use_id enables downstream dedup.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 claude-code/hooks/unbound.py | 121 +++++++++++++++++++++++++++++++++--
 1 file changed, 116 insertions(+), 5 deletions(-)
diff --git a/claude-code/hooks/unbound.py b/claude-code/hooks/unbound.py
index ac8b79fd..094308e8 100644
--- a/claude-code/hooks/unbound.py
+++ b/claude-code/hooks/unbound.py
@@ -3,6 +3,7 @@
 import sys
 import json
 import os
+import glob
 import subprocess
 from pathlib import Path
 from datetime import datetime, timezone
@@ -401,6 +402,22 @@ def parse_transcript_file(transcript_path: str, user_prompt_timestamp: Optional[
                                             'content': text_content,
                                             'timestamp': entry_timestamp
                                         })
+                                # Skill invocations and agent/subagent spawns (Task/Agent)
+                                # appear in the transcript as tool_use blocks. PostToolUse
+                                # is unreliable for them, so the transcript is the source.
+                                elif (isinstance(content_item, dict)
+                                        and content_item.get('type') == 'tool_use'
+                                        and content_item.get('name') in ('Skill', 'Task', 'Agent')):
+                                    conversation_data['tool_uses'].append({
+                                        'type': 'PostToolUse',
+                                        'tool_name': content_item.get('name'),
+                                        'tool_input': content_item.get('input', {}),
+                                        'tool_response': {},
+                                        'timestamp': entry_timestamp,
+                                        'tool_use_id': content_item.get('id'),
+                                        'cwd': entry.get('cwd'),
+                                        'git_branch': entry.get('gitBranch'),
+                                    })
 
                             # Model is captured unconditionally so it survives even on usage-less assistant entries.
                             turn_model = turn_model or message.get('model')
@@ -846,7 +863,7 @@ def process_user_prompt_submit(event: Dict, api_key: str) -> Dict:
     return transform_response_for_claude_prompt(api_response)
 
 
-def build_llm_exchange(events: List[Dict], stop_assistant_message: Optional[str] = None, transcript_assistant_messages: Optional[List[str]] = None, model: Optional[str] = None, usage: Optional[Dict] = None) -> Optional[Dict]:
+def build_llm_exchange(events: List[Dict], stop_assistant_message: Optional[str] = None, transcript_assistant_messages: Optional[List[str]] = None, model: Optional[str] = None, usage: Optional[Dict] = None, transcript_tool_uses: Optional[List[Dict]] = None) -> Optional[Dict]:
     messages = []
     assistant_tool_uses = []
 
@@ -868,23 +885,55 @@ def build_llm_exchange(events: List[Dict], stop_assistant_message: Optional[str]
             prompt = event.get('prompt')
             if prompt:
                 user_prompt = prompt
-        
+
         elif hook_event_name == 'PostToolUse':
             tool_name = event.get('tool_name')
+            # Skills and agent/subagent spawns are captured from the transcript
+            # (with a stable tool_use_id). The PostToolUse copy lacks a reliable
+            # id and would create duplicate rows — skip it here.
+            if tool_name in ('Skill', 'Task', 'Agent'):
+                continue
             tool_input = event.get('tool_input', {})
             tool_response = event.get('tool_response', {})
-            
+
             if 'content' in tool_response and 'content' in tool_input:
                 if tool_response['content'] == tool_input['content']:
                     tool_response = {k: v for k, v in tool_response.items() if k != 'content'}
-            
+
             assistant_tool_uses.append({
                 'type': 'PostToolUse',
                 'tool_name': tool_name,
                 'tool_input': tool_input,
                 'tool_response': tool_response
             })
-    
+
+    # Skill / agent-spawn tool_uses captured from the transcript (main thread +
+    # subagents). Tagged with trigger (user-typed "/skill" vs agent) and
+    # is_subagent; deduped downstream on tool_use_id.
+    for tu in (transcript_tool_uses or []):
+        if tu.get('is_subagent'):
+            trigger = 'agent'
+        else:
+            skill_name = (tu.get('tool_input') or {}).get('skill') or ''
+            up = (user_prompt or '').strip()
+            trigger = 'agent'
+            if skill_name and up.startswith('/'):
+                typed_cmd = up[1:].split(None, 1)[0]
+                if typed_cmd and typed_cmd.split(':')[-1] == skill_name.split(':')[-1]:
+                    trigger = 'user'
+        assistant_tool_uses.append({
+            'type': tu.get('type', 'PostToolUse'),
+            'tool_name': tu.get('tool_name'),
+            'tool_input': tu.get('tool_input', {}),
+            'tool_response': tu.get('tool_response', {}),
+            'tool_use_id': tu.get('tool_use_id'),
+            'timestamp': tu.get('timestamp'),
+            'cwd': tu.get('cwd'),
+            'git_branch': tu.get('git_branch'),
+            'trigger': trigger,
+            'is_subagent': bool(tu.get('is_subagent')),
+        })
+
     if user_prompt:
         messages.append({'role': 'user', 'content': user_prompt})
     
@@ -985,6 +1034,60 @@ def cleanup_old_logs():
         save_logs(logs[-AUDIT_LOG_TOTAL_LIMIT:])
 
 
+def collect_subagent_skill_tool_uses(transcript_path: str, user_prompt_timestamp: Optional[str] = None) -> List[Dict]:
+    """Collect Skill / agent-spawn tool_use blocks from subagent transcripts.
+
+    Subagents (Task tool) write to <session_dir>/subagents/agent-*.jsonl — separate
+    files from the parent transcript — so skills/agents invoked inside a subagent
+    are never in the parent Stop transcript. Derive that directory from the parent
+    transcript path and extract any Skill/Task/Agent tool_use (flagged is_subagent).
+    """
+    tool_uses: List[Dict] = []
+    try:
+        if not transcript_path or not transcript_path.endswith('.jsonl'):
+            return tool_uses
+        subagents_dir = os.path.join(transcript_path[:-len('.jsonl')], 'subagents')
+        if not os.path.isdir(subagents_dir):
+            return tool_uses
+        for sub_file in glob.glob(os.path.join(subagents_dir, '*.jsonl')):
+            try:
+                with open(sub_file, 'r', encoding='utf-8') as f:
+                    for line in f:
+                        line = line.strip()
+                        if not line or ('"Skill"' not in line and '"Task"' not in line and '"Agent"' not in line):
+                            continue
+                        try:
+                            entry = json.loads(line)
+                        except json.JSONDecodeError:
+                            continue
+                        if entry.get('type') != 'assistant':
+                            continue
+                        entry_timestamp = entry.get('timestamp')
+                        if user_prompt_timestamp and entry_timestamp and entry_timestamp <= user_prompt_timestamp:
+                            continue
+                        message = entry.get('message', {})
+                        for content_item in message.get('content', []) or []:
+                            if (isinstance(content_item, dict)
+                                    and content_item.get('type') == 'tool_use'
+                                    and content_item.get('name') in ('Skill', 'Task', 'Agent')):
+                                tool_uses.append({
+                                    'type': 'PostToolUse',
+                                    'tool_name': content_item.get('name'),
+                                    'tool_input': content_item.get('input', {}),
+                                    'tool_response': {},
+                                    'timestamp': entry_timestamp,
+                                    'tool_use_id': content_item.get('id'),
+                                    'cwd': entry.get('cwd'),
+                                    'git_branch': entry.get('gitBranch'),
+                                    'is_subagent': True,
+                                })
+            except Exception:
+                continue
+    except Exception:
+        pass
+    return tool_uses
+
+
 def process_stop_event(event: Dict, api_key: str):
     session_id = event.get('session_id')
     transcript_path = event.get('transcript_path')
@@ -1012,6 +1115,7 @@ def process_stop_event(event: Dict, api_key: str):
     transcript_assistant_messages = []
     transcript_usage = None
     transcript_model = None
+    transcript_tool_uses = []
     if transcript_path and transcript_path != 'undefined' and user_prompt_timestamp:
         transcript_data = parse_transcript_file(transcript_path, user_prompt_timestamp)
         transcript_assistant_messages = [
@@ -1020,6 +1124,12 @@ def process_stop_event(event: Dict, api_key: str):
         ]
         transcript_usage = transcript_data.get('usage')
         transcript_model = transcript_data.get('model')
+        transcript_tool_uses = list(transcript_data.get('tool_uses', []))
+        # Skills/agents invoked inside subagents live in separate transcripts the
+        # parent Stop never reads — sweep them too (deduped downstream on id).
+        transcript_tool_uses.extend(
+            collect_subagent_skill_tool_uses(transcript_path, user_prompt_timestamp)
+        )
 
     # Prefer the dominant model from the transcript (covers sub-agent turns where
     # the cached session model is wrong). Fall back to the audit log otherwise.
@@ -1031,6 +1141,7 @@ def process_stop_event(event: Dict, api_key: str):
         transcript_assistant_messages=transcript_assistant_messages,
         model=session_model,
         usage=transcript_usage,
+        transcript_tool_uses=transcript_tool_uses,
     )
 
     if exchange:

From f335091319cf2f42f4c181c62c4f68d9b822423e Mon Sep 17 00:00:00 2001
From: Nanda Pranesh <nandapranesh27@gmail.com>
Date: Wed, 3 Jun 2026 18:58:09 +0530
Subject: [PATCH 2/3] WEB-4507: address Greptile review (trigger namespacing +
 dedup-able id)

- build_llm_exchange trigger: replace pure-suffix match with full / bare-vs-
  namespaced (either direction) exact comparisons, so two skills sharing a base
  name across namespaces (ns1:deploy vs ns2:deploy) no longer mislabel an
  agent invocation as user-typed.
- parse_transcript_file + collect_subagent_skill_tool_uses: only capture
  tool_use blocks that carry an id, so a row can always dedup on tool_use_id
  (avoids None ids that the partial unique index would skip).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 claude-code/hooks/unbound.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/claude-code/hooks/unbound.py b/claude-code/hooks/unbound.py
index 094308e8..af850e22 100644
--- a/claude-code/hooks/unbound.py
+++ b/claude-code/hooks/unbound.py
@@ -407,7 +407,8 @@ def parse_transcript_file(transcript_path: str, user_prompt_timestamp: Optional[
                                 # is unreliable for them, so the transcript is the source.
                                 elif (isinstance(content_item, dict)
                                         and content_item.get('type') == 'tool_use'
-                                        and content_item.get('name') in ('Skill', 'Task', 'Agent')):
+                                        and content_item.get('name') in ('Skill', 'Task', 'Agent')
+                                        and content_item.get('id')):  # require an id so the row can dedup
                                     conversation_data['tool_uses'].append({
                                         'type': 'PostToolUse',
                                         'tool_name': content_item.get('name'),
@@ -919,7 +920,16 @@ def build_llm_exchange(events: List[Dict], stop_assistant_message: Optional[str]
             trigger = 'agent'
             if skill_name and up.startswith('/'):
                 typed_cmd = up[1:].split(None, 1)[0]
-                if typed_cmd and typed_cmd.split(':')[-1] == skill_name.split(':')[-1]:
+                # Match full name, or bare-vs-namespaced in either direction
+                # (/stripe:test-cards typed vs bare "test-cards" skill, or vice
+                # versa). Deliberately NOT a pure suffix match, so two skills that
+                # merely share a base name across namespaces (ns1:deploy vs
+                # ns2:deploy) don't mislabel an agent invocation as user-typed.
+                if typed_cmd and (
+                    typed_cmd == skill_name
+                    or typed_cmd.split(':')[-1] == skill_name
+                    or typed_cmd == skill_name.split(':')[-1]
+                ):
                     trigger = 'user'
         assistant_tool_uses.append({
             'type': tu.get('type', 'PostToolUse'),
@@ -1069,7 +1079,8 @@ def collect_subagent_skill_tool_uses(transcript_path: str, user_prompt_timestamp
                         for content_item in message.get('content', []) or []:
                             if (isinstance(content_item, dict)
                                     and content_item.get('type') == 'tool_use'
-                                    and content_item.get('name') in ('Skill', 'Task', 'Agent')):
+                                    and content_item.get('name') in ('Skill', 'Task', 'Agent')
+                                    and content_item.get('id')):  # require an id so the row can dedup
                                 tool_uses.append({
                                     'type': 'PostToolUse',
                                     'tool_name': content_item.get('name'),

From 6467cff4afcc77dc0b78ca6ddc45ad0e34967886 Mon Sep 17 00:00:00 2001
From: Nanda Pranesh <nandapranesh27@gmail.com>
Date: Thu, 4 Jun 2026 12:04:04 +0530
Subject: [PATCH 3/3] WEB-4507: trim verbose comments (no functional change)

Condensed the multi-line explanatory comments added for skill/agent capture
to concise one-liners. No behavior change.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 claude-code/hooks/unbound.py | 34 +++++++++++-----------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/claude-code/hooks/unbound.py b/claude-code/hooks/unbound.py
index af850e22..7248cac6 100644
--- a/claude-code/hooks/unbound.py
+++ b/claude-code/hooks/unbound.py
@@ -402,13 +402,11 @@ def parse_transcript_file(transcript_path: str, user_prompt_timestamp: Optional[
                                             'content': text_content,
                                             'timestamp': entry_timestamp
                                         })
-                                # Skill invocations and agent/subagent spawns (Task/Agent)
-                                # appear in the transcript as tool_use blocks. PostToolUse
-                                # is unreliable for them, so the transcript is the source.
+                                # Skill/agent invocations (transcript is the reliable source, not PostToolUse).
                                 elif (isinstance(content_item, dict)
                                         and content_item.get('type') == 'tool_use'
                                         and content_item.get('name') in ('Skill', 'Task', 'Agent')
-                                        and content_item.get('id')):  # require an id so the row can dedup
+                                        and content_item.get('id')):  # need an id to dedup
                                     conversation_data['tool_uses'].append({
                                         'type': 'PostToolUse',
                                         'tool_name': content_item.get('name'),
@@ -889,9 +887,7 @@ def build_llm_exchange(events: List[Dict], stop_assistant_message: Optional[str]
 
         elif hook_event_name == 'PostToolUse':
             tool_name = event.get('tool_name')
-            # Skills and agent/subagent spawns are captured from the transcript
-            # (with a stable tool_use_id). The PostToolUse copy lacks a reliable
-            # id and would create duplicate rows — skip it here.
+            # Skills/agents come from the transcript (stable id); skip the id-less PostToolUse duplicate.
             if tool_name in ('Skill', 'Task', 'Agent'):
                 continue
             tool_input = event.get('tool_input', {})
@@ -908,9 +904,7 @@ def build_llm_exchange(events: List[Dict], stop_assistant_message: Optional[str]
                 'tool_response': tool_response
             })
 
-    # Skill / agent-spawn tool_uses captured from the transcript (main thread +
-    # subagents). Tagged with trigger (user-typed "/skill" vs agent) and
-    # is_subagent; deduped downstream on tool_use_id.
+    # Transcript-sourced skill/agent invocations: tag trigger (user vs agent) + is_subagent.
     for tu in (transcript_tool_uses or []):
         if tu.get('is_subagent'):
             trigger = 'agent'
@@ -920,11 +914,8 @@ def build_llm_exchange(events: List[Dict], stop_assistant_message: Optional[str]
             trigger = 'agent'
             if skill_name and up.startswith('/'):
                 typed_cmd = up[1:].split(None, 1)[0]
-                # Match full name, or bare-vs-namespaced in either direction
-                # (/stripe:test-cards typed vs bare "test-cards" skill, or vice
-                # versa). Deliberately NOT a pure suffix match, so two skills that
-                # merely share a base name across namespaces (ns1:deploy vs
-                # ns2:deploy) don't mislabel an agent invocation as user-typed.
+                # Match full or bare-vs-namespaced name (not a pure suffix match,
+                # so ns1:deploy vs ns2:deploy isn't mislabelled user-typed).
                 if typed_cmd and (
                     typed_cmd == skill_name
                     or typed_cmd.split(':')[-1] == skill_name
@@ -1045,12 +1036,10 @@ def cleanup_old_logs():
 
 
 def collect_subagent_skill_tool_uses(transcript_path: str, user_prompt_timestamp: Optional[str] = None) -> List[Dict]:
-    """Collect Skill / agent-spawn tool_use blocks from subagent transcripts.
+    """Collect Skill/agent tool_use blocks from subagent transcripts.
 
-    Subagents (Task tool) write to <session_dir>/subagents/agent-*.jsonl — separate
-    files from the parent transcript — so skills/agents invoked inside a subagent
-    are never in the parent Stop transcript. Derive that directory from the parent
-    transcript path and extract any Skill/Task/Agent tool_use (flagged is_subagent).
+    Subagents write to <session>/subagents/agent-*.jsonl (separate from the parent
+    transcript), so their skills/agents are missed otherwise. Flagged is_subagent.
     """
     tool_uses: List[Dict] = []
     try:
@@ -1080,7 +1069,7 @@ def collect_subagent_skill_tool_uses(transcript_path: str, user_prompt_timestamp
                             if (isinstance(content_item, dict)
                                     and content_item.get('type') == 'tool_use'
                                     and content_item.get('name') in ('Skill', 'Task', 'Agent')
-                                    and content_item.get('id')):  # require an id so the row can dedup
+                                    and content_item.get('id')):  # need an id to dedup
                                 tool_uses.append({
                                     'type': 'PostToolUse',
                                     'tool_name': content_item.get('name'),
@@ -1136,8 +1125,7 @@ def process_stop_event(event: Dict, api_key: str):
         transcript_usage = transcript_data.get('usage')
         transcript_model = transcript_data.get('model')
         transcript_tool_uses = list(transcript_data.get('tool_uses', []))
-        # Skills/agents invoked inside subagents live in separate transcripts the
-        # parent Stop never reads — sweep them too (deduped downstream on id).
+        # Subagents write to separate transcripts the parent Stop never reads — sweep them too.
         transcript_tool_uses.extend(
             collect_subagent_skill_tool_uses(transcript_path, user_prompt_timestamp)
         )