From d4904100f7185ca3b27c74ee28babaf40bbee61d Mon Sep 17 00:00:00 2001
From: Vignesh Subbiah <vis@unboundsecurity.ai>
Date: Mon, 15 Jun 2026 13:51:29 -0700
Subject: [PATCH] hooks: ship each tool_use once per turn (fix Stop-event
 duplicate counts)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Claude Code (and Codex) fire multiple Stop events per user turn — one per
agent turn. The Stop handler rebuilds the exchange window from the persisted
audit log, resetting on each UserPromptSubmit and appending every event after.

Commit 16d1ffe removed the post-send prune of already-shipped events (it had
broken the get_recent_user_prompts_for_session lookback by wiping prior
prompts). With the prune gone, every Stop re-shipped ALL PostToolUse events
accumulated since the last UserPromptSubmit, so a single file edit was counted
1 + 2 + ... + N times across N Stops — the root cause of inflated
policy-match counts (one edit seen 14-35x).

Fix — restore the prune safely (mark, don't delete):

- claude-code: skip PostToolUse audit entries already flagged SHIPPED_FLAG when
  building the window; after a successful send, stamp the shipped tool entries
  (and the turn's UserPromptSubmit anchor) and persist. Entries stay in the log
  so the user-prompt lookback keeps working. A tool-free conversation turn
  still ships exactly once via the prompt anchor.
- codex: ships tool_uses from the transcript per turn; mark the turn's
  UserPromptSubmit anchor shipped after a successful send and skip resend.
- cursor: groups by generation_id; mark the generation's stop entry shipped and
  skip if a stop fires twice for the same generation.

_persist_*_flags re-read the log before stamping to avoid clobbering entries a
concurrent hook appended.

Tests: new test_stop_dedup.py in each hook dir reproduces the bug (multiple
Stops in one turn) and asserts each tool_use ships exactly once, plus that the
get_recent_user_prompts_for_session lookback still returns prior prompts (what
16d1ffe protected). Verified red without the fix, green with it.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 claude-code/hooks/test_stop_dedup.py | 232 +++++++++++++++++++++++++++
 claude-code/hooks/unbound.py         |  92 ++++++++++-
 codex/hooks/test_stop_dedup.py       | 115 +++++++++++++
 codex/hooks/unbound.py               |  53 +++++-
 cursor/test_stop_dedup.py            | 103 ++++++++++++
 cursor/unbound.py                    |  53 ++++--
 6 files changed, 628 insertions(+), 20 deletions(-)
 create mode 100644 claude-code/hooks/test_stop_dedup.py
 create mode 100644 codex/hooks/test_stop_dedup.py
 create mode 100644 cursor/test_stop_dedup.py

diff --git a/claude-code/hooks/test_stop_dedup.py b/claude-code/hooks/test_stop_dedup.py
new file mode 100644
index 00000000..f90f8f45
--- /dev/null
+++ b/claude-code/hooks/test_stop_dedup.py
@@ -0,0 +1,232 @@
+"""
+Regression tests for the Stop-event duplicate-count bug in the claude-code hook.
+
+Background
+----------
+Claude Code fires multiple `Stop` events per user turn (one per agent turn).
+`process_stop_event` rebuilds the exchange window from the persisted audit log:
+it resets the window on each `UserPromptSubmit` and appends every event after.
+Commit 16d1ffe removed the post-send prune of already-shipped events (to fix a
+`get_recent_user_prompts_for_session` lookback regression). With the prune gone,
+every Stop re-shipped ALL PostToolUse events accumulated since the last
+UserPromptSubmit, so one file edit got counted 1 + 2 + ... + N times across N
+Stops.
+
+These tests pin both halves of the contract:
+  - each PostToolUse tool_use is shipped exactly once across multiple Stops
+    (idempotent), and
+  - the user-prompt lookback still returns prior prompts (what 16d1ffe protected).
+"""
+
+import json
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+import unbound
+
+
+def _entry(session_id, hook_event_name, timestamp, **event_fields):
+    event = {'session_id': session_id, 'hook_event_name': hook_event_name}
+    event.update(event_fields)
+    return {'timestamp': timestamp, 'session_id': session_id, 'event': event}
+
+
+class StopDedupTestBase(unittest.TestCase):
+    def setUp(self):
+        self.tmp = Path(tempfile.mkdtemp())
+        self.audit_log = self.tmp / "agent-audit.log"
+        self._audit_patcher = patch.object(unbound, "AUDIT_LOG", self.audit_log)
+        self._audit_patcher.start()
+        self.addCleanup(self._audit_patcher.stop)
+
+        # Capture every exchange handed to send_to_api; pretend the send
+        # succeeded so the shipped-marker path runs.
+        self.sent_exchanges = []
+
+        def _fake_send(exchange, api_key):
+            self.sent_exchanges.append(exchange)
+            return True
+
+        self._send_patcher = patch.object(unbound, "send_to_api", _fake_send)
+        self._send_patcher.start()
+        self.addCleanup(self._send_patcher.stop)
+
+        # Identity probe shells out / reads config; stub it.
+        self._identity_patcher = patch.object(
+            unbound, "build_account_identity", lambda probe=False: {}
+        )
+        self._identity_patcher.start()
+        self.addCleanup(self._identity_patcher.stop)
+
+    def _write_log(self, entries):
+        unbound.save_logs(entries)
+
+    def _all_shipped_tool_paths(self):
+        """Flatten every tool_use file_path shipped across all sends."""
+        paths = []
+        for exchange in self.sent_exchanges:
+            for msg in exchange.get('messages', []):
+                for tu in msg.get('tool_use', []) or []:
+                    paths.append(tu.get('tool_input', {}).get('file_path'))
+        return paths
+
+
+class TestStopEventIdempotent(StopDedupTestBase):
+    """Multiple Stops in one turn must ship each tool_use exactly once."""
+
+    def _turn_with_two_edits(self):
+        sid = "sess-1"
+        return [
+            _entry(sid, 'UserPromptSubmit', '2026-06-15T00:00:00Z',
+                   prompt='please edit two files'),
+            _entry(sid, 'PostToolUse', '2026-06-15T00:00:01Z',
+                   tool_name='Edit', tool_input={'file_path': '/a.py'},
+                   tool_response={'ok': True}),
+            _entry(sid, 'PostToolUse', '2026-06-15T00:00:02Z',
+                   tool_name='Edit', tool_input={'file_path': '/b.py'},
+                   tool_response={'ok': True}),
+        ]
+
+    def _stop_event(self, sid="sess-1", msg="done"):
+        return {
+            'session_id': sid,
+            'hook_event_name': 'Stop',
+            'transcript_path': 'undefined',
+            'last_assistant_message': msg,
+        }
+
+    def test_single_stop_ships_each_tool_once(self):
+        self._write_log(self._turn_with_two_edits())
+
+        unbound.process_stop_event(self._stop_event(), api_key='k')
+
+        paths = self._all_shipped_tool_paths()
+        self.assertEqual(sorted(paths), ['/a.py', '/b.py'])
+
+    def test_repeated_stops_do_not_reship(self):
+        # The bug: 2nd/3rd Stop in the same turn re-ship the same tool_uses.
+        self._write_log(self._turn_with_two_edits())
+
+        for _ in range(3):
+            unbound.process_stop_event(self._stop_event(), api_key='k')
+
+        paths = self._all_shipped_tool_paths()
+        # Exactly one tool_use per edited file across ALL Stops — no inflation.
+        self.assertEqual(sorted(paths), ['/a.py', '/b.py'])
+
+    def test_new_tool_after_first_stop_is_shipped_once(self):
+        # First Stop ships /a.py. Then a new edit lands and a second Stop fires:
+        # /b.py must ship, /a.py must NOT ship again.
+        sid = "sess-1"
+        self._write_log([
+            _entry(sid, 'UserPromptSubmit', '2026-06-15T00:00:00Z', prompt='go'),
+            _entry(sid, 'PostToolUse', '2026-06-15T00:00:01Z',
+                   tool_name='Edit', tool_input={'file_path': '/a.py'},
+                   tool_response={'ok': True}),
+        ])
+        unbound.process_stop_event(self._stop_event(), api_key='k')
+
+        # A new PostToolUse arrives (as main() would append it), then Stop again.
+        unbound.append_to_audit_log(
+            _entry(sid, 'PostToolUse', '2026-06-15T00:00:03Z',
+                   tool_name='Edit', tool_input={'file_path': '/b.py'},
+                   tool_response={'ok': True})
+        )
+        unbound.process_stop_event(self._stop_event(), api_key='k')
+
+        paths = self._all_shipped_tool_paths()
+        self.assertEqual(sorted(paths), ['/a.py', '/b.py'])
+
+    def test_stop_with_nothing_new_does_not_send(self):
+        # A turn with only a prompt and an already-shipped edit: a redundant
+        # Stop must not produce any send at all.
+        self._write_log(self._turn_with_two_edits())
+        unbound.process_stop_event(self._stop_event(), api_key='k')
+        sends_after_first = len(self.sent_exchanges)
+
+        unbound.process_stop_event(self._stop_event(), api_key='k')
+        self.assertEqual(len(self.sent_exchanges), sends_after_first)
+
+    def test_tool_free_turn_ships_exactly_once(self):
+        # A pure-conversation turn (no PostToolUse) must still ship once — and
+        # only once — across multiple Stops. Guards against over-narrowing the
+        # send to tool-bearing turns only.
+        sid = "sess-1"
+        self._write_log([
+            _entry(sid, 'UserPromptSubmit', '2026-06-15T00:00:00Z',
+                   prompt='just answer, no tools'),
+        ])
+
+        for _ in range(3):
+            unbound.process_stop_event(self._stop_event(msg='here is the answer'),
+                                       api_key='k')
+
+        self.assertEqual(len(self.sent_exchanges), 1)
+
+
+class TestLookbackStillWorks(StopDedupTestBase):
+    """The thing 16d1ffe protected: prior user prompts remain in the log and are
+    returned by get_recent_user_prompts_for_session after Stops are processed."""
+
+    def test_prior_prompts_survive_stop_processing(self):
+        sid = "sess-1"
+        self._write_log([
+            _entry(sid, 'UserPromptSubmit', '2026-06-15T00:00:00Z', prompt='first prompt'),
+            _entry(sid, 'PostToolUse', '2026-06-15T00:00:01Z',
+                   tool_name='Edit', tool_input={'file_path': '/a.py'},
+                   tool_response={'ok': True}),
+            _entry(sid, 'UserPromptSubmit', '2026-06-15T00:01:00Z', prompt='second prompt'),
+            _entry(sid, 'PostToolUse', '2026-06-15T00:01:01Z',
+                   tool_name='Edit', tool_input={'file_path': '/b.py'},
+                   tool_response={'ok': True}),
+        ])
+
+        stop = {
+            'session_id': sid,
+            'hook_event_name': 'Stop',
+            'transcript_path': 'undefined',
+            'last_assistant_message': 'done',
+        }
+        # Process a couple of Stops, which is when the old code wiped the log.
+        unbound.process_stop_event(stop, api_key='k')
+        unbound.process_stop_event(stop, api_key='k')
+
+        prompts = unbound.get_recent_user_prompts_for_session(sid, n=5)
+        self.assertEqual(prompts, ['first prompt', 'second prompt'])
+
+    def test_shipped_marker_does_not_drop_postooluse_entries(self):
+        # Marking entries shipped must keep them in the log (size unchanged),
+        # only annotate them — otherwise rotation/lookback assumptions break.
+        sid = "sess-1"
+        self._write_log([
+            _entry(sid, 'UserPromptSubmit', '2026-06-15T00:00:00Z', prompt='go'),
+            _entry(sid, 'PostToolUse', '2026-06-15T00:00:01Z',
+                   tool_name='Edit', tool_input={'file_path': '/a.py'},
+                   tool_response={'ok': True}),
+        ])
+        before = len(unbound.load_existing_logs())
+
+        stop = {
+            'session_id': sid,
+            'hook_event_name': 'Stop',
+            'transcript_path': 'undefined',
+            'last_assistant_message': 'done',
+        }
+        unbound.process_stop_event(stop, api_key='k')
+
+        after_logs = unbound.load_existing_logs()
+        # Nothing is dropped — entries are annotated in place, not pruned.
+        self.assertEqual(len(after_logs), before)
+        # The PostToolUse entry is now flagged shipped.
+        flagged_tools = [
+            l for l in after_logs
+            if l.get(unbound.SHIPPED_FLAG)
+            and l.get('event', {}).get('hook_event_name') == 'PostToolUse'
+        ]
+        self.assertEqual(len(flagged_tools), 1)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/claude-code/hooks/unbound.py b/claude-code/hooks/unbound.py
index 331abbf3..8f5ae9ce 100644
--- a/claude-code/hooks/unbound.py
+++ b/claude-code/hooks/unbound.py
@@ -31,6 +31,13 @@
 PRETOOL_USER_MESSAGES_LIMIT = 5
 AUDIT_LOG_TOTAL_LIMIT = 100
 
+# Per-entry flag set on PostToolUse audit-log entries once their tool_use has
+# been shipped to the proxy on a Stop event. Claude Code fires multiple Stop
+# events per turn; this flag lets a later Stop skip already-shipped tool_uses
+# instead of re-sending them, while the entries stay in the log so the
+# user-prompt lookback (get_recent_user_prompts_for_session) keeps working.
+SHIPPED_FLAG = 'unbound_shipped'
+
 APPROVAL_TIMEOUT = 4 * 60 * 60
 
 DISCOVERY_DEBOUNCE_SECONDS = 24 * 3600
@@ -1126,29 +1133,85 @@ def cleanup_old_logs():
         save_logs(logs[-AUDIT_LOG_TOTAL_LIMIT:])
 
 
+def _event_name_of(log: Dict) -> Optional[str]:
+    if 'event' in log:
+        return log.get('event', {}).get('hook_event_name')
+    return log.get('hook_event_name')
+
+
+def _log_identity(log: Dict) -> tuple:
+    """Stable identity for an audit-log entry, used to re-find the entries this
+    Stop shipped after a fresh re-read (a concurrent Stop may have appended new
+    lines since we loaded the log)."""
+    event = log.get('event', {}) if 'event' in log else log
+    return (
+        log.get('timestamp'),
+        log.get('session_id') or event.get('session_id'),
+        event.get('hook_event_name'),
+        event.get('tool_name'),
+    )
+
+
+def _persist_shipped_flags(session_id: Optional[str], shipped_logs: List[Dict]) -> None:
+    """Re-read the audit log and stamp SHIPPED_FLAG on the entries that were
+    just shipped, then rewrite it. Re-reading (instead of saving our stale
+    in-memory copy) avoids clobbering entries a concurrent hook appended after
+    we loaded the log."""
+    if not shipped_logs:
+        return
+    shipped_ids = {_log_identity(log) for log in shipped_logs}
+    fresh_logs = load_existing_logs()
+    changed = False
+    for log in fresh_logs:
+        if log.get(SHIPPED_FLAG):
+            continue
+        if _log_identity(log) in shipped_ids:
+            log[SHIPPED_FLAG] = True
+            changed = True
+    if changed:
+        save_logs(fresh_logs)
+
+
 def process_stop_event(event: Dict, api_key: str):
     session_id = event.get('session_id')
     transcript_path = event.get('transcript_path')
     last_assistant_message = event.get('last_assistant_message')
 
     logs = load_existing_logs()
-    
+
     session_events = []
+    # PostToolUse entries (the same dict objects held in `logs`) that this Stop
+    # is about to ship. Marked shipped only after send_to_api succeeds.
+    pending_tool_logs = []
+    # The current turn's UserPromptSubmit entry. Doubles as the per-turn shipped
+    # anchor so a no-tool conversation turn ships exactly once, not once per Stop.
+    prompt_log = None
     current_conversation_started = False
     user_prompt_timestamp = None
-    
+
     for log in logs:
         log_session_id = log.get('session_id') or log.get('event', {}).get('session_id')
-        
+
         if log_session_id == session_id:
-            event_name = log.get('event', {}).get('hook_event_name') if 'event' in log else log.get('hook_event_name')
-            
+            event_name = _event_name_of(log)
+
             if event_name == 'UserPromptSubmit':
+                # Reset the window on each new prompt. The anchor entries
+                # (UserPromptSubmit, SessionStart) stay in the log untouched so
+                # the user-prompt lookback keeps finding prior prompts.
                 session_events = [log]
+                pending_tool_logs = []
+                prompt_log = log
                 current_conversation_started = True
                 user_prompt_timestamp = log.get('timestamp')
             elif current_conversation_started:
+                # Skip PostToolUse entries already shipped by an earlier Stop in
+                # this same turn — re-shipping them is the duplicate-count bug.
+                if event_name == 'PostToolUse' and log.get(SHIPPED_FLAG):
+                    continue
                 session_events.append(log)
+                if event_name == 'PostToolUse':
+                    pending_tool_logs.append(log)
 
     transcript_assistant_messages = []
     transcript_usage = None
@@ -1166,6 +1229,15 @@ def process_stop_event(event: Dict, api_key: str):
     # the cached session model is wrong). Fall back to the audit log otherwise.
     session_model = transcript_model or _extract_session_model(logs, session_id) or 'auto'
 
+    # Idempotency guard: Claude Code fires a Stop per agent turn, so a single
+    # user turn produces several Stop events. Re-sending already-shipped
+    # tool_uses is the root cause of inflated policy-match counts. Send only
+    # when this Stop has something new: an unshipped PostToolUse, or — for a
+    # tool-free conversation turn — a turn whose prompt anchor hasn't shipped yet.
+    turn_already_shipped = prompt_log is not None and prompt_log.get(SHIPPED_FLAG)
+    if not pending_tool_logs and turn_already_shipped:
+        return
+
     exchange = build_llm_exchange(
         session_events,
         stop_assistant_message=last_assistant_message,
@@ -1174,8 +1246,14 @@ def process_stop_event(event: Dict, api_key: str):
         usage=transcript_usage,
     )
 
-    if exchange:
-        send_to_api(exchange, api_key)
+    if exchange and send_to_api(exchange, api_key):
+        # Persist the shipped markers so later Stops in this turn skip what was
+        # already sent. The entries stay in the log for the user-prompt lookback.
+        for log in pending_tool_logs:
+            log[SHIPPED_FLAG] = True
+        if prompt_log is not None:
+            prompt_log[SHIPPED_FLAG] = True
+        _persist_shipped_flags(session_id, pending_tool_logs + ([prompt_log] if prompt_log else []))
 
 
 def get_api_key():
diff --git a/codex/hooks/test_stop_dedup.py b/codex/hooks/test_stop_dedup.py
new file mode 100644
index 00000000..8ab1439d
--- /dev/null
+++ b/codex/hooks/test_stop_dedup.py
@@ -0,0 +1,115 @@
+"""
+Regression tests for the Stop-event duplicate-count bug in the codex hook.
+
+Like the claude-code hook, codex fires multiple `Stop` events per user turn and
+`process_stop_event` rebuilds the exchange each time. Codex sources its
+tool_uses from the transcript (scoped by the turn's user-prompt timestamp), so a
+second Stop in the same turn re-ships the same tool_uses. Commit 16d1ffe removed
+the post-send prune, so nothing stopped the resend.
+
+These tests pin the contract:
+  - a turn is shipped exactly once across multiple Stops (idempotent), and
+  - prior user prompts remain in the audit log for the lookback.
+"""
+
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+import unbound
+
+
+def _entry(session_id, hook_event_name, timestamp, **event_fields):
+    event = {'session_id': session_id, 'hook_event_name': hook_event_name}
+    event.update(event_fields)
+    return {'timestamp': timestamp, 'session_id': session_id, 'event': event}
+
+
+class CodexStopDedupBase(unittest.TestCase):
+    def setUp(self):
+        self.tmp = Path(tempfile.mkdtemp())
+        self.audit_log = self.tmp / "agent-audit.log"
+        self._audit_patcher = patch.object(unbound, "AUDIT_LOG", self.audit_log)
+        self._audit_patcher.start()
+        self.addCleanup(self._audit_patcher.stop)
+
+        self.sent_exchanges = []
+        self._send_patcher = patch.object(
+            unbound, "send_to_api",
+            lambda exchange, api_key: self.sent_exchanges.append(exchange) or True,
+        )
+        self._send_patcher.start()
+        self.addCleanup(self._send_patcher.stop)
+
+        # Codex sources tool_uses from the transcript — stub it to a fixed set.
+        self._tools_patcher = patch.object(
+            unbound, "parse_codex_transcript_for_tools",
+            lambda path, ts=None: [
+                {'type': 'PostToolUse', 'tool_name': 'apply_patch',
+                 'tool_input': {'file_path': '/a.py'}, 'tool_response': {}},
+            ],
+        )
+        self._tools_patcher.start()
+        self.addCleanup(self._tools_patcher.stop)
+
+        self._usage_patcher = patch.object(
+            unbound, "parse_codex_transcript_for_usage", lambda path, ts=None: None
+        )
+        self._usage_patcher.start()
+        self.addCleanup(self._usage_patcher.stop)
+
+    def _stop(self, sid="sess-1"):
+        return {
+            'session_id': sid,
+            'hook_event_name': 'Stop',
+            'transcript_path': '/transcript.jsonl',
+            'last_assistant_message': 'done',
+        }
+
+
+class TestCodexStopIdempotent(CodexStopDedupBase):
+    def test_repeated_stops_ship_turn_once(self):
+        sid = "sess-1"
+        unbound.save_logs([
+            _entry(sid, 'UserPromptSubmit', '2026-06-15T00:00:00Z', prompt='go'),
+        ])
+
+        for _ in range(3):
+            unbound.process_stop_event(self._stop(), api_key='k')
+
+        # The turn is shipped exactly once across all three Stops.
+        self.assertEqual(len(self.sent_exchanges), 1)
+
+    def test_new_turn_ships_again(self):
+        sid = "sess-1"
+        unbound.save_logs([
+            _entry(sid, 'UserPromptSubmit', '2026-06-15T00:00:00Z', prompt='first'),
+        ])
+        unbound.process_stop_event(self._stop(), api_key='k')
+
+        # A new prompt opens a new turn — it must ship.
+        unbound.append_to_audit_log(
+            _entry(sid, 'UserPromptSubmit', '2026-06-15T00:01:00Z', prompt='second')
+        )
+        unbound.process_stop_event(self._stop(), api_key='k')
+
+        self.assertEqual(len(self.sent_exchanges), 2)
+
+
+class TestCodexLookback(CodexStopDedupBase):
+    def test_prior_prompts_survive(self):
+        sid = "sess-1"
+        unbound.save_logs([
+            _entry(sid, 'UserPromptSubmit', '2026-06-15T00:00:00Z', prompt='first'),
+            _entry(sid, 'UserPromptSubmit', '2026-06-15T00:01:00Z', prompt='second'),
+        ])
+        unbound.process_stop_event(self._stop(), api_key='k')
+        unbound.process_stop_event(self._stop(), api_key='k')
+
+        prompts = unbound.get_recent_user_prompts_for_session(sid, n=5)
+        self.assertEqual(prompts, ['first', 'second'])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/codex/hooks/unbound.py b/codex/hooks/unbound.py
index a1c99b71..89be9ebb 100644
--- a/codex/hooks/unbound.py
+++ b/codex/hooks/unbound.py
@@ -32,6 +32,14 @@
 PRETOOL_USER_MESSAGES_LIMIT = 5
 AUDIT_LOG_TOTAL_LIMIT = 100
 
+# Per-entry flag set on the turn's UserPromptSubmit audit-log entry once that
+# turn's exchange has been shipped to the proxy on a Stop event. Codex fires
+# multiple Stop events per turn and rebuilds the tool_uses from the transcript
+# each time; this flag lets a later Stop skip an already-shipped turn instead
+# of re-sending the same tool_uses, while the entry stays in the log so the
+# user-prompt lookback keeps working.
+SHIPPED_FLAG = 'unbound_shipped'
+
 APPROVAL_TIMEOUT = 4 * 60 * 60
 
 DISCOVERY_DEBOUNCE_SECONDS = 24 * 3600
@@ -1067,6 +1075,34 @@ def parse_codex_transcript_for_usage(transcript_path: str, user_prompt_timestamp
     }
 
 
+def _log_identity(log: Dict) -> tuple:
+    """Stable identity for an audit-log entry, used to re-find the entry this
+    Stop shipped after a fresh re-read (a concurrent Stop may have appended new
+    lines since we loaded the log)."""
+    event = log.get('event', {}) if 'event' in log else log
+    return (
+        log.get('timestamp'),
+        log.get('session_id') or event.get('session_id'),
+        event.get('hook_event_name'),
+    )
+
+
+def _persist_shipped_flag(prompt_log: Dict) -> None:
+    """Re-read the audit log and stamp SHIPPED_FLAG on the turn's
+    UserPromptSubmit entry, then rewrite it. Re-reading (instead of saving a
+    stale in-memory copy) avoids clobbering entries a concurrent hook appended
+    after we loaded the log."""
+    target_id = _log_identity(prompt_log)
+    fresh_logs = load_existing_logs()
+    changed = False
+    for log in fresh_logs:
+        if not log.get(SHIPPED_FLAG) and _log_identity(log) == target_id:
+            log[SHIPPED_FLAG] = True
+            changed = True
+    if changed:
+        save_logs(fresh_logs)
+
+
 def process_stop_event(event: Dict, api_key: str):
     session_id = event.get('session_id')
     transcript_path = event.get('transcript_path')
@@ -1074,10 +1110,11 @@ def process_stop_event(event: Dict, api_key: str):
 
     logs = load_existing_logs()
 
-    # Find the UserPromptSubmit for this session
+    # Find the UserPromptSubmit for this session (latest wins)
     user_prompt = None
     user_prompt_timestamp = None
     permission_mode = None
+    prompt_log = None
 
     for log in logs:
         log_session_id = log.get('session_id') or log.get('event', {}).get('session_id')
@@ -1090,10 +1127,18 @@ def process_stop_event(event: Dict, api_key: str):
                 user_prompt = log_event.get('prompt')
                 user_prompt_timestamp = log.get('timestamp')
                 permission_mode = log_event.get('permission_mode', 'default')
+                prompt_log = log
 
     if not user_prompt:
         return
 
+    # Idempotency guard: Codex fires a Stop per agent turn, so a single user
+    # turn produces several Stop events. Once this turn's exchange has shipped,
+    # re-sending would re-count the same tool_uses (root cause of inflated
+    # policy-match counts), so skip it.
+    if prompt_log is not None and prompt_log.get(SHIPPED_FLAG):
+        return
+
     messages = [{'role': 'user', 'content': user_prompt}]
 
     # Parse tool uses from Codex transcript (function_call/function_call_output pairs)
@@ -1118,7 +1163,11 @@ def process_stop_event(event: Dict, api_key: str):
     if usage:
         exchange['usage'] = usage
 
-    send_to_api(exchange, api_key)
+    if send_to_api(exchange, api_key) and prompt_log is not None:
+        # Persist the shipped marker so later Stops in this turn skip the send.
+        # The entry stays in the log for the user-prompt lookback.
+        prompt_log[SHIPPED_FLAG] = True
+        _persist_shipped_flag(prompt_log)
 
 
 _GATEWAY_URL_RE = re.compile(r'^https?://[A-Za-z0-9._\-]+(:\d+)?(/[A-Za-z0-9._/\-]*)?$')
diff --git a/cursor/test_stop_dedup.py b/cursor/test_stop_dedup.py
new file mode 100644
index 00000000..2500f7b4
--- /dev/null
+++ b/cursor/test_stop_dedup.py
@@ -0,0 +1,103 @@
+"""
+Regression tests for the Stop-event duplicate-count guard in the cursor hook.
+
+Cursor groups audit events by generation_id and process_stop_event ships only
+the generation matching the incoming stop. That isolates turns better than the
+claude-code hook, but if a generation_id ever receives more than one `stop`
+event the exchange (and its tool_uses) would be re-shipped — the same
+duplicate-count class fixed in 16d1ffe's wake.
+
+These tests pin the contract:
+  - a generation is shipped exactly once even if `stop` fires twice for it, and
+  - prior user prompts remain in the audit log for the lookback.
+"""
+
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+import unbound
+
+
+def _entry(conversation_id, generation_id, hook_event_name, timestamp, **fields):
+    event = {
+        'conversation_id': conversation_id,
+        'generation_id': generation_id,
+        'hook_event_name': hook_event_name,
+    }
+    event.update(fields)
+    return {'timestamp': timestamp, 'event': event}
+
+
+class CursorStopDedupBase(unittest.TestCase):
+    def setUp(self):
+        self.tmp = Path(tempfile.mkdtemp())
+        self.audit_log = self.tmp / "agent-audit.log"
+        self._audit_patcher = patch.object(unbound, "AUDIT_LOG", self.audit_log)
+        self._audit_patcher.start()
+        self.addCleanup(self._audit_patcher.stop)
+
+        self.sent_exchanges = []
+        self._send_patcher = patch.object(
+            unbound, "send_to_api",
+            lambda exchange, api_key=None: self.sent_exchanges.append(exchange) or True,
+        )
+        self._send_patcher.start()
+        self.addCleanup(self._send_patcher.stop)
+
+        self._serial_patcher = patch.object(
+            unbound, "_device_serial", lambda probe=False: None
+        )
+        self._serial_patcher.start()
+        self.addCleanup(self._serial_patcher.stop)
+
+    def _generation(self, conv="conv-1", gen="gen-1"):
+        return [
+            _entry(conv, gen, 'beforeSubmitPrompt', '2026-06-15T00:00:00Z', prompt='go'),
+            _entry(conv, gen, 'afterFileEdit', '2026-06-15T00:00:01Z',
+                   file_path='/a.py', edits=[{'old': 'x', 'new': 'y'}]),
+            _entry(conv, gen, 'afterAgentResponse', '2026-06-15T00:00:02Z', text='done'),
+            _entry(conv, gen, 'stop', '2026-06-15T00:00:03Z'),
+        ]
+
+
+class TestCursorStopIdempotent(CursorStopDedupBase):
+    def test_repeated_stops_for_same_generation_ship_once(self):
+        unbound.save_logs(self._generation())
+
+        unbound.process_stop_event('gen-1', api_key='k')
+        unbound.process_stop_event('gen-1', api_key='k')
+
+        self.assertEqual(len(self.sent_exchanges), 1)
+
+    def test_new_generation_ships_again(self):
+        unbound.save_logs(self._generation(gen='gen-1'))
+        unbound.process_stop_event('gen-1', api_key='k')
+
+        for entry in self._generation(gen='gen-2'):
+            unbound.append_to_audit_log(entry)
+        unbound.process_stop_event('gen-2', api_key='k')
+
+        self.assertEqual(len(self.sent_exchanges), 2)
+
+
+class TestCursorLookback(CursorStopDedupBase):
+    def test_prior_prompts_survive(self):
+        logs = self._generation(gen='gen-1')
+        logs += [
+            _entry('conv-1', 'gen-2', 'beforeSubmitPrompt', '2026-06-15T00:01:00Z',
+                   prompt='second'),
+            _entry('conv-1', 'gen-2', 'stop', '2026-06-15T00:01:01Z'),
+        ]
+        unbound.save_logs(logs)
+
+        unbound.process_stop_event('gen-1', api_key='k')
+        unbound.process_stop_event('gen-1', api_key='k')
+
+        prompts = unbound.get_recent_user_prompts_for_session('conv-1', 5)
+        self.assertEqual(prompts, ['go', 'second'])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/cursor/unbound.py b/cursor/unbound.py
index 6a455186..2e42bf04 100644
--- a/cursor/unbound.py
+++ b/cursor/unbound.py
@@ -79,6 +79,13 @@
 PRETOOL_USER_MESSAGES_LIMIT = 5
 AUDIT_LOG_TOTAL_LIMIT = 100
 
+# Per-entry flag set on a generation's stop audit-log entry once that
+# generation's exchange has been shipped to the proxy. If a generation_id ever
+# receives more than one stop event, this flag lets the later stop skip the
+# resend instead of re-counting the same tool_uses, while the entries stay in
+# the log so the user-prompt lookback keeps working.
+SHIPPED_FLAG = 'unbound_shipped'
+
 # Ensure log directory exists
 try:
     LOG_DIR.mkdir(parents=True, exist_ok=True)
@@ -1154,28 +1161,52 @@ def cleanup_old_logs():
         save_logs(logs[-AUDIT_LOG_TOTAL_LIMIT:])
 
 
+def _persist_generation_shipped(generation_id):
+    """Re-read the audit log and stamp SHIPPED_FLAG on this generation's stop
+    entries, then rewrite it. Re-reading (instead of saving a stale in-memory
+    copy) avoids clobbering entries a concurrent hook appended after we loaded
+    the log."""
+    fresh_logs = load_existing_logs()
+    changed = False
+    for log in fresh_logs:
+        event = log.get('event', {})
+        if (event.get('generation_id') == generation_id
+                and event.get('hook_event_name') == 'stop'
+                and not log.get(SHIPPED_FLAG)):
+            log[SHIPPED_FLAG] = True
+            changed = True
+    if changed:
+        save_logs(fresh_logs)
+
+
 def process_stop_event(generation_id, api_key=None):
     """Process stop event: convert to LLM format and send to API."""
     logs = load_existing_logs()
-    
+
     # Group events
     grouped = group_events_by_generation(logs)
-    
+
     # Find and process the generation with stop event
     for conversation_id, generations in grouped.items():
         if generation_id in generations:
             events = generations[generation_id]
-            
+
             # Check if this generation has a stop event
-            has_stop = any(
-                log.get('event', {}).get('hook_event_name') == 'stop'
-                for log in events
-            )
-            
-            if has_stop:
+            stop_logs = [
+                log for log in events
+                if log.get('event', {}).get('hook_event_name') == 'stop'
+            ]
+
+            if stop_logs:
+                # Idempotency guard: if this generation was already shipped by an
+                # earlier stop, re-sending would re-count the same tool_uses
+                # (root cause of inflated policy-match counts). Skip it.
+                if any(log.get(SHIPPED_FLAG) for log in stop_logs):
+                    break
+
                 exchange = build_llm_exchange(events, api_key)
-                if exchange:
-                    send_to_api(exchange, api_key)
+                if exchange and send_to_api(exchange, api_key):
+                    _persist_generation_shipped(generation_id)
                 break