From 020c1fc704bf33a4b8692d4d22848e5fa73089f8 Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 20:07:39 -0400
Subject: [PATCH 01/13] chore: trigger CI

Made-with: Cursor

From 418cdc34178553ed99f3ab65cee7e5d5d4106ebe Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 20:10:07 -0400
Subject: [PATCH 02/13] debug(codex): query SQLite db for token usage instead
 of missing JSONL

Made-with: Cursor
---
 .github/workflows/agent-review-codex.yml | 36 +++++++++---------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml
index a85436a..ad7a297 100644
--- a/.github/workflows/agent-review-codex.yml
+++ b/.github/workflows/agent-review-codex.yml
@@ -62,33 +62,25 @@ jobs:
         env:
           CODEX_HOME: /home/runner/.codex
         run: |
-          echo "=== CODEX_HOME contents ==="
-          find "$CODEX_HOME" -type f 2>/dev/null || echo "(empty or missing)"
-          echo "=== end ==="
+          DB="$CODEX_HOME/logs_1.sqlite"
+          INPUT=0; OUTPUT=0; CACHE_READ=0
 
-          # Try session files in CODEX_HOME/sessions/ first, then ~/.codex/sessions/
-          SESSION_FILE=$(ls -t "$CODEX_HOME"/sessions/*.jsonl 2>/dev/null | head -1 || \
-                         ls -t ~/.codex/sessions/*.jsonl 2>/dev/null | head -1 || true)
-
-          echo "SESSION_FILE=$SESSION_FILE"
+          if [ -f "$DB" ]; then
+            echo "=== SQLite schema ==="
+            sqlite3 "$DB" ".schema" 2>/dev/null || echo "(schema unavailable)"
+            echo "=== end ==="
 
-          if [ -n "$SESSION_FILE" ] && [ -f "$SESSION_FILE" ]; then
-            echo "=== last 5 lines of session file ==="
-            tail -5 "$SESSION_FILE"
+            # Try to extract token usage — query will be refined once schema is known
+            USAGE_JSON=$(sqlite3 "$DB" \
+              "SELECT json FROM events WHERE json LIKE '%token%' ORDER BY rowid DESC LIMIT 5;" \
+              2>/dev/null || true)
+            echo "=== token rows ==="
+            echo "$USAGE_JSON"
             echo "=== end ==="
-            TOKEN_LINE=$(grep '"token_count"' "$SESSION_FILE" 2>/dev/null | tail -1 || true)
-            if [ -n "$TOKEN_LINE" ]; then
-              INPUT=$(echo "$TOKEN_LINE" | jq -r '.payload.info.total_token_usage.input_tokens // 0')
-              OUTPUT=$(echo "$TOKEN_LINE" | jq -r '.payload.info.total_token_usage.output_tokens // 0')
-              CACHE_READ=$(echo "$TOKEN_LINE" | jq -r '.payload.info.total_token_usage.cached_input_tokens // 0')
-            else
-              echo "No token_count event found in session file"
-              INPUT=0; OUTPUT=0; CACHE_READ=0
-            fi
           else
-            echo "No session file found"
-            INPUT=0; OUTPUT=0; CACHE_READ=0
+            echo "No SQLite database found at $DB"
           fi
+
           {
             echo "input_tokens=$INPUT"
             echo "output_tokens=$OUTPUT"

From 996430c7d164ebd7115017391f523463a1db7517 Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 20:14:59 -0400
Subject: [PATCH 03/13] debug(codex): query logs table message column for token
 data

Made-with: Cursor
---
 .github/workflows/agent-review-codex.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml
index ad7a297..06733b2 100644
--- a/.github/workflows/agent-review-codex.yml
+++ b/.github/workflows/agent-review-codex.yml
@@ -66,16 +66,16 @@ jobs:
           INPUT=0; OUTPUT=0; CACHE_READ=0
 
           if [ -f "$DB" ]; then
-            echo "=== SQLite schema ==="
-            sqlite3 "$DB" ".schema" 2>/dev/null || echo "(schema unavailable)"
+            echo "=== messages containing 'token' ==="
+            sqlite3 "$DB" \
+              "SELECT message FROM logs WHERE message LIKE '%token%' ORDER BY id DESC LIMIT 10;" \
+              2>/dev/null || echo "(query failed)"
             echo "=== end ==="
 
-            # Try to extract token usage — query will be refined once schema is known
-            USAGE_JSON=$(sqlite3 "$DB" \
-              "SELECT json FROM events WHERE json LIKE '%token%' ORDER BY rowid DESC LIMIT 5;" \
-              2>/dev/null || true)
-            echo "=== token rows ==="
-            echo "$USAGE_JSON"
+            echo "=== last 10 messages ==="
+            sqlite3 "$DB" \
+              "SELECT message FROM logs ORDER BY id DESC LIMIT 10;" \
+              2>/dev/null || echo "(query failed)"
             echo "=== end ==="
           else
             echo "No SQLite database found at $DB"

From de0ed92c28deadae0ed376eab5a9874e8d53233f Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 20:15:37 -0400
Subject: [PATCH 04/13] style(pricing): simplify fetch log to show count only

Made-with: Cursor
---
 src/pricing.ts | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/pricing.ts b/src/pricing.ts
index ec292e1..e137f65 100644
--- a/src/pricing.ts
+++ b/src/pricing.ts
@@ -71,9 +71,7 @@ export async function fetchPricing({
         cacheReadPer1M: entry.cacheReadPerMillionTokens ?? 0,
       };
     }
-    core.info(
-      `AgentMeter: fetched pricing for ${Object.keys(result).length} models: ${Object.keys(result).join(', ')}`
-    );
+    core.info(`AgentMeter: fetched pricing for ${Object.keys(result).length} models.`);
     return result;
   } catch (error) {
     core.info(`AgentMeter: could not fetch pricing from API (${error}) — cost will show as —.`);

From 39bad2f42f6f934655c3430fa87608436a1e668c Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 20:18:31 -0400
Subject: [PATCH 05/13] debug(codex): inspect run_id.json and output file for
 token data

Made-with: Cursor
---
 .github/workflows/agent-review-codex.yml | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml
index 06733b2..e9ced5a 100644
--- a/.github/workflows/agent-review-codex.yml
+++ b/.github/workflows/agent-review-codex.yml
@@ -65,21 +65,17 @@ jobs:
           DB="$CODEX_HOME/logs_1.sqlite"
           INPUT=0; OUTPUT=0; CACHE_READ=0
 
-          if [ -f "$DB" ]; then
-            echo "=== messages containing 'token' ==="
-            sqlite3 "$DB" \
-              "SELECT message FROM logs WHERE message LIKE '%token%' ORDER BY id DESC LIMIT 10;" \
-              2>/dev/null || echo "(query failed)"
-            echo "=== end ==="
+          echo "=== CODEX_HOME files ==="
+          find "$CODEX_HOME" -type f 2>/dev/null | while read -r f; do
+            echo "--- $f ---"
+            if [[ "$f" == *.json ]]; then cat "$f" 2>/dev/null; fi
+            if [[ "$f" == *.toml ]]; then cat "$f" 2>/dev/null; fi
+          done
+          echo "=== end ==="
 
-            echo "=== last 10 messages ==="
-            sqlite3 "$DB" \
-              "SELECT message FROM logs ORDER BY id DESC LIMIT 10;" \
-              2>/dev/null || echo "(query failed)"
-            echo "=== end ==="
-          else
-            echo "No SQLite database found at $DB"
-          fi
+          echo "=== /tmp/codex-output.md (last 20 lines) ==="
+          tail -20 /tmp/codex-output.md 2>/dev/null || echo "(not found)"
+          echo "=== end ==="
 
           {
             echo "input_tokens=$INPUT"

From eec5dd7166d9ba99e48e797a2f42e276be5b6678 Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 20:22:37 -0400
Subject: [PATCH 06/13] fix(codex): remove fragile token extraction, document
 limitation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

codex exec (via openai/codex-action) does not expose per-run token
counts through any stable interface — not JSONL, not SQLite, not
action outputs. Remove the extraction step entirely; cost will show
as — which is accurate. Duration and status still track correctly.
Document in challenges.md.

Made-with: Cursor
---
 .github/workflows/agent-review-codex.yml | 31 ------------------------
 docs/challenges.md                       |  3 ++-
 2 files changed, 2 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml
index e9ced5a..3350b17 100644
--- a/.github/workflows/agent-review-codex.yml
+++ b/.github/workflows/agent-review-codex.yml
@@ -39,7 +39,6 @@ jobs:
           prompt-file: .github/codex/prompts/review.md
           model: ${{ vars.GH_AW_MODEL_AGENT_CODEX || 'gpt-5.4-mini' }}
           sandbox: workspace-write
-          output-file: /tmp/codex-output.md
 
       - name: Post review comment
         if: steps.codex.outputs.final-message != ''
@@ -56,33 +55,6 @@ jobs:
         env:
           CODEX_REVIEW: ${{ steps.codex.outputs.final-message }}
 
-      - name: Extract Codex token usage
-        id: extract_tokens
-        if: always()
-        env:
-          CODEX_HOME: /home/runner/.codex
-        run: |
-          DB="$CODEX_HOME/logs_1.sqlite"
-          INPUT=0; OUTPUT=0; CACHE_READ=0
-
-          echo "=== CODEX_HOME files ==="
-          find "$CODEX_HOME" -type f 2>/dev/null | while read -r f; do
-            echo "--- $f ---"
-            if [[ "$f" == *.json ]]; then cat "$f" 2>/dev/null; fi
-            if [[ "$f" == *.toml ]]; then cat "$f" 2>/dev/null; fi
-          done
-          echo "=== end ==="
-
-          echo "=== /tmp/codex-output.md (last 20 lines) ==="
-          tail -20 /tmp/codex-output.md 2>/dev/null || echo "(not found)"
-          echo "=== end ==="
-
-          {
-            echo "input_tokens=$INPUT"
-            echo "output_tokens=$OUTPUT"
-            echo "cache_read_tokens=$CACHE_READ"
-          } >> "$GITHUB_OUTPUT"
-
       - name: Track with AgentMeter
         if: always()
         uses: foo-software/agentmeter-action@main
@@ -92,8 +64,5 @@ jobs:
           engine: codex
           model: ${{ vars.GH_AW_MODEL_AGENT_CODEX || 'gpt-5.4-mini' }}
           status: ${{ job.status == 'success' && 'success' || 'failed' }}
-          input_tokens: ${{ steps.extract_tokens.outputs.input_tokens }}
-          output_tokens: ${{ steps.extract_tokens.outputs.output_tokens }}
-          cache_read_tokens: ${{ steps.extract_tokens.outputs.cache_read_tokens }}
           started_at: ${{ steps.timer.outputs.started_at }}
           post_comment: 'true'
diff --git a/docs/challenges.md b/docs/challenges.md
index 5ab1ac0..74e121a 100644
--- a/docs/challenges.md
+++ b/docs/challenges.md
@@ -123,4 +123,5 @@ If the user omits `if: always()` on the AgentMeter step, failed agent runs won't
 | Comment posting | ✅ | Upsert by marker, correct PR/issue number |
 | `GITHUB_TOKEN` availability | ✅ | `github_token` input with `default: ${{ github.token }}` |
 | Node.js version | ✅ | node24 |
-| Pricing table | ✅ | Fetched from `/api/models/pricing`; built-in prefix fallback |
+| Pricing table | ✅ | Fetched from `/api/models/pricing`; shows `—` if unreachable |
+| Codex token counts | ⚠️ Known limitation | `codex exec` (via `openai/codex-action`) does not expose per-run token counts through any stable interface. Cost shows as `—` for Codex runs. Duration and status are tracked correctly. |

From 3416ed2bcc47176d8f2ade0c65adfbcbae438e05 Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 20:35:55 -0400
Subject: [PATCH 07/13] feat(codex): extract token counts from rollout JSONL,
 add turn.completed parser

- Set known codex-home in agent-review-codex.yml so rollout JSONL is
  findable at /tmp/codex-home/sessions/YYYY/MM/DD/rollout-*.jsonl
- Add "Extract Codex token usage" step that greps for the last
  token_count event and sets input/output/cache_read_tokens outputs
- Pass those outputs to the AgentMeter tracking step
- Add tryExtractFromCodexExecJsonl() to token-extractor.ts for
  consumers passing codex exec --json stdout as agent_output
- Add CodexExecTurnCompleted type for the turn.completed event format
- Update challenges.md to reflect the rollout JSONL investigation status

Made-with: Cursor
---
 .github/workflows/agent-review-codex.yml | 29 +++++++++++
 docs/challenges.md                       |  2 +-
 src/token-extractor.ts                   | 64 +++++++++++++++++++++++-
 src/types.ts                             | 17 +++++++
 4 files changed, 110 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml
index 3350b17..ca81ba5 100644
--- a/.github/workflows/agent-review-codex.yml
+++ b/.github/workflows/agent-review-codex.yml
@@ -39,6 +39,32 @@ jobs:
           prompt-file: .github/codex/prompts/review.md
           model: ${{ vars.GH_AW_MODEL_AGENT_CODEX || 'gpt-5.4-mini' }}
           sandbox: workspace-write
+          codex-home: /tmp/codex-home
+
+      - name: Extract Codex token usage
+        id: codex-tokens
+        if: always()
+        run: |
+          rollout=$(find /tmp/codex-home/sessions -name "rollout-*.jsonl" 2>/dev/null | sort | tail -1)
+          if [ -z "$rollout" ]; then
+            echo "No rollout JSONL found — token counts unavailable"
+            echo "input_tokens=" >> "$GITHUB_OUTPUT"
+            echo "output_tokens=" >> "$GITHUB_OUTPUT"
+            echo "cache_read_tokens=" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+          echo "Parsing rollout: $rollout"
+          token_line=$(grep '"token_count"' "$rollout" | tail -1)
+          if [ -z "$token_line" ]; then
+            echo "No token_count event in rollout — token counts unavailable"
+            echo "input_tokens=" >> "$GITHUB_OUTPUT"
+            echo "output_tokens=" >> "$GITHUB_OUTPUT"
+            echo "cache_read_tokens=" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+          echo "input_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.input_tokens // empty')" >> "$GITHUB_OUTPUT"
+          echo "output_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.output_tokens // empty')" >> "$GITHUB_OUTPUT"
+          echo "cache_read_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.cached_input_tokens // empty')" >> "$GITHUB_OUTPUT"
 
       - name: Post review comment
         if: steps.codex.outputs.final-message != ''
@@ -66,3 +92,6 @@ jobs:
           status: ${{ job.status == 'success' && 'success' || 'failed' }}
           started_at: ${{ steps.timer.outputs.started_at }}
           post_comment: 'true'
+          input_tokens: ${{ steps.codex-tokens.outputs.input_tokens }}
+          output_tokens: ${{ steps.codex-tokens.outputs.output_tokens }}
+          cache_read_tokens: ${{ steps.codex-tokens.outputs.cache_read_tokens }}
diff --git a/docs/challenges.md b/docs/challenges.md
index 74e121a..7dc05a5 100644
--- a/docs/challenges.md
+++ b/docs/challenges.md
@@ -124,4 +124,4 @@ If the user omits `if: always()` on the AgentMeter step, failed agent runs won't
 | `GITHUB_TOKEN` availability | ✅ | `github_token` input with `default: ${{ github.token }}` |
 | Node.js version | ✅ | node24 |
 | Pricing table | ✅ | Fetched from `/api/models/pricing`; shows `—` if unreachable |
-| Codex token counts | ⚠️ Known limitation | `codex exec` (via `openai/codex-action`) does not expose per-run token counts through any stable interface. Cost shows as `—` for Codex runs. Duration and status are tracked correctly. |
+| Codex token counts | 🔬 Under investigation | `codex exec` writes rollout JSONL to `$CODEX_HOME/sessions/YYYY/MM/DD/rollout-*.jsonl`. `token_count` events in those files contain cumulative `total_token_usage`. The workflow sets a known `codex-home` and parses the rollout file after the codex step. Two alternative paths exist: (1) rollout JSONL file, (2) `codex exec --json` stdout `turn.completed` events — but the `openai/codex-action` captures only the final message, not stdout JSONL. If the rollout file is empty or absent, cost shows as `—`. |
diff --git a/src/token-extractor.ts b/src/token-extractor.ts
index f43fefb..b97e8e0 100644
--- a/src/token-extractor.ts
+++ b/src/token-extractor.ts
@@ -1,4 +1,10 @@
-import type { ClaudeCodeOutput, CodexTokenEvent, TokenCounts, TokenCountsWithMeta } from './types';
+import type {
+  ClaudeCodeOutput,
+  CodexExecTurnCompleted,
+  CodexTokenEvent,
+  TokenCounts,
+  TokenCountsWithMeta,
+} from './types';
 
 /**
  * Attempts to extract token counts from agent stdout.
@@ -13,6 +19,9 @@ export function extractTokensFromOutput(
   const jsonResult = tryExtractFromJson(agentOutput);
   if (jsonResult) return jsonResult;
 
+  const codexExecResult = tryExtractFromCodexExecJsonl(agentOutput);
+  if (codexExecResult) return codexExecResult;
+
   const codexResult = tryExtractFromCodexJsonl(agentOutput);
   if (codexResult) return codexResult;
 
@@ -45,6 +54,59 @@ function tryExtractFromJson(
   }
 }
 
+/**
+ * Tries to extract token counts from `codex exec --json` stdout.
+ * Sums `usage` fields across all `turn.completed` events.
+ *
+ * Field mapping:
+ *   input_tokens        → inputTokens
+ *   output_tokens       → outputTokens
+ *   cached_input_tokens → cacheReadTokens
+ */
+function tryExtractFromCodexExecJsonl(
+  agentOutput: string,
+): { tokens: TokenCounts; isApproximate: boolean } | null {
+  const lines = agentOutput.split('\n');
+  let inputTokens = 0;
+  let outputTokens = 0;
+  let cacheReadTokens = 0;
+  let found = false;
+
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (!trimmed.includes('"turn.completed"')) continue;
+    try {
+      const parsed = JSON.parse(trimmed) as unknown;
+      const obj =
+        typeof parsed === 'object' && parsed !== null
+          ? (parsed as Record<string, unknown>)
+          : null;
+      if (obj?.['type'] !== 'turn.completed') continue;
+      const usage = obj['usage'];
+      if (typeof usage !== 'object' || usage === null) continue;
+      const u = usage as (CodexExecTurnCompleted)['usage'];
+      inputTokens += u.input_tokens ?? 0;
+      outputTokens += u.output_tokens ?? 0;
+      cacheReadTokens += u.cached_input_tokens ?? 0;
+      found = true;
+    } catch {
+      // not valid JSON, skip line
+    }
+  }
+
+  if (!found) return null;
+
+  return {
+    tokens: {
+      inputTokens,
+      outputTokens,
+      cacheReadTokens,
+      cacheWriteTokens: 0,
+    },
+    isApproximate: false,
+  };
+}
+
 /**
  * Tries to extract token counts from Codex CLI JSONL streaming output.
  * Looks for `token_count` events emitted by `codex exec` and takes the last one,
diff --git a/src/types.ts b/src/types.ts
index a740a0c..4aae3b2 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -155,6 +155,7 @@ export interface AgentTokensArtifact {
 /**
  * A single JSONL event emitted by `codex exec` in streaming mode.
  * Token counts are found in `token_count` events.
+ * Written to the rollout JSONL file at `$CODEX_HOME/sessions/YYYY/MM/DD/rollout-*.jsonl`.
  */
 export interface CodexTokenEvent {
   type: 'event_msg';
@@ -173,6 +174,22 @@ export interface CodexTokenEvent {
   };
 }
 
+/**
+ * A `turn.completed` event emitted to stdout when running `codex exec --json`.
+ * Sums all turns for total usage of a full `codex exec` run.
+ */
+export interface CodexExecTurnCompleted {
+  type: 'turn.completed';
+  usage: {
+    /** Total input tokens sent (includes cached) */
+    input_tokens?: number;
+    /** Output tokens generated */
+    output_tokens?: number;
+    /** Input tokens served from cache (subset of input_tokens) */
+    cached_input_tokens?: number;
+  };
+}
+
 /** Known Claude Code JSON output structure (best-effort) */
 export interface ClaudeCodeOutput {
   /** Top-level usage block */

From e98d254a6de87423618589db7c7726297c063f70 Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 21:25:41 -0400
Subject: [PATCH 08/13] docs(challenges): document Codex rollout JSONL token
 extraction approach

Made-with: Cursor
---
 docs/challenges.md | 44 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/docs/challenges.md b/docs/challenges.md
index 7dc05a5..0cd5a12 100644
--- a/docs/challenges.md
+++ b/docs/challenges.md
@@ -94,6 +94,48 @@ If the user omits `if: always()` on the AgentMeter step, failed agent runs won't
 
 ---
 
+### 6. Codex token counts rely on an internal rollout file format
+
+`codex exec` (via `openai/codex-action`) does not expose token usage through any documented public API. However, when running without `--ephemeral`, the Codex CLI writes a rollout JSONL file to:
+
+```
+$CODEX_HOME/sessions/YYYY/MM/DD/rollout-<timestamp>-<uuid>.jsonl
+```
+
+Each line is a JSON event. Token totals appear in `token_count` events:
+
+```json
+{
+  "type": "event_msg",
+  "payload": {
+    "type": "token_count",
+    "info": {
+      "total_token_usage": {
+        "input_tokens": 479565,
+        "output_tokens": 7489,
+        "cached_input_tokens": 444416
+      }
+    },
+    "rate_limits": null
+  }
+}
+```
+
+The last `token_count` event in the file contains cumulative totals for the full run.
+
+**How the workflow extracts tokens:**
+
+1. Set `codex-home: /tmp/codex-home` on `openai/codex-action` so the rollout path is known
+2. After the codex step, find the latest rollout file with `find /tmp/codex-home/sessions -name "rollout-*.jsonl" | sort | tail -1`
+3. Grep for `"token_count"`, take the last line, extract fields with `jq`
+4. Pass `input_tokens`, `output_tokens`, `cache_read_tokens` as explicit inputs to the AgentMeter step
+
+**Stability caveat:** The rollout format is an internal Codex CLI implementation detail, not a versioned public API. A future `@openai/codex` release could rename fields or restructure events. Since `codex-version` in `openai/codex-action` defaults to latest, this could silently break on a CLI upgrade. Failure is graceful — costs show as `—` if the rollout file is missing or unparseable.
+
+**Alternative path (`codex exec --json`):** Running with `--json` writes JSONL to stdout with `turn.completed` events containing a `usage` field. However, `openai/codex-action`'s `final-message` output reads from the output file, not stdout — so the JSONL stream is not accessible from within the action's step outputs. The `tryExtractFromCodexExecJsonl` function in `token-extractor.ts` handles this format for consumers who capture `codex exec --json` stdout directly.
+
+---
+
 ## What works regardless of mode
 
 - The action **never fails the workflow** — all errors are `core.warning()`, not `core.setFailed()`.
@@ -124,4 +166,4 @@ If the user omits `if: always()` on the AgentMeter step, failed agent runs won't
 | `GITHUB_TOKEN` availability | ✅ | `github_token` input with `default: ${{ github.token }}` |
 | Node.js version | ✅ | node24 |
 | Pricing table | ✅ | Fetched from `/api/models/pricing`; shows `—` if unreachable |
-| Codex token counts | 🔬 Under investigation | `codex exec` writes rollout JSONL to `$CODEX_HOME/sessions/YYYY/MM/DD/rollout-*.jsonl`. `token_count` events in those files contain cumulative `total_token_usage`. The workflow sets a known `codex-home` and parses the rollout file after the codex step. Two alternative paths exist: (1) rollout JSONL file, (2) `codex exec --json` stdout `turn.completed` events — but the `openai/codex-action` captures only the final message, not stdout JSONL. If the rollout file is empty or absent, cost shows as `—`. |
+| Codex token counts | ✅ with caveat | Parsed from rollout JSONL at `$CODEX_HOME/sessions/YYYY/MM/DD/rollout-*.jsonl`. Works in production. Rollout format is internal (not a public API) — see section 6 below. |

From 60726608b0404e0e68ac13936d5edbe98600bbdf Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 21:37:28 -0400
Subject: [PATCH 09/13] ci: add nightly Codex rollout JSONL compatibility check

Made-with: Cursor
---
 .github/workflows/codex-compat-check.yml | 112 +++++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 .github/workflows/codex-compat-check.yml

diff --git a/.github/workflows/codex-compat-check.yml b/.github/workflows/codex-compat-check.yml
new file mode 100644
index 0000000..3a96c23
--- /dev/null
+++ b/.github/workflows/codex-compat-check.yml
@@ -0,0 +1,112 @@
+name: "Codex: Rollout JSONL Compatibility Check"
+
+on:
+  schedule:
+    - cron: "0 6 * * *"
+  workflow_dispatch:
+
+jobs:
+  verify:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      issues: write
+    steps:
+      - name: Run minimal Codex exec
+        id: codex
+        uses: openai/codex-action@v1
+        with:
+          openai-api-key: ${{ secrets.OPENAI_API_KEY }}
+          prompt: "Reply with only the single word: hello"
+          model: gpt-5.4-mini
+          sandbox: read-only
+          codex-home: /tmp/codex-check
+
+      - name: Verify rollout JSONL structure
+        run: |
+          rollout=$(find /tmp/codex-check/sessions -name "rollout-*.jsonl" 2>/dev/null | sort | tail -1)
+
+          if [ -z "$rollout" ]; then
+            echo "::error::No rollout JSONL found — codex may have changed its session file layout"
+            echo "Contents of /tmp/codex-check:"
+            find /tmp/codex-check -type f 2>/dev/null || echo "(empty)"
+            exit 1
+          fi
+
+          echo "Found rollout: $rollout"
+
+          token_line=$(grep '"token_count"' "$rollout" | tail -1)
+
+          if [ -z "$token_line" ]; then
+            echo "::error::No token_count event in rollout JSONL — codex may have changed its event format"
+            echo "Rollout file contents (last 20 lines):"
+            tail -20 "$rollout"
+            exit 1
+          fi
+
+          input_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.input_tokens // empty')
+
+          if [ -z "$input_tokens" ]; then
+            echo "::error::input_tokens field missing from total_token_usage — codex may have changed the token_count schema"
+            echo "token_count event: $token_line"
+            exit 1
+          fi
+
+          if ! [[ "$input_tokens" =~ ^[0-9]+$ ]] || [ "$input_tokens" -eq 0 ]; then
+            echo "::error::input_tokens is not a positive integer ($input_tokens) — something unexpected in the rollout"
+            echo "token_count event: $token_line"
+            exit 1
+          fi
+
+          output_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.output_tokens // empty')
+          echo "✅ Rollout JSONL verified — input_tokens=$input_tokens output_tokens=$output_tokens"
+
+      - name: Open issue on failure
+        if: failure()
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const date = new Date().toISOString().split('T')[0];
+            const title = `⚠️ Codex rollout JSONL compat check failed (${date})`;
+            const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+
+            const { data: existing } = await github.rest.issues.listForRepo({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              state: 'open',
+              labels: 'codex-compat',
+            });
+
+            if (existing.length > 0) {
+              console.log(`Open codex-compat issue already exists (#${existing[0].number}), skipping.`);
+              return;
+            }
+
+            await github.rest.issues.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              title,
+              labels: ['codex-compat'],
+              body: [
+                '## Codex rollout JSONL compatibility check failed',
+                '',
+                'The nightly check that verifies `openai/codex-action` still writes token counts',
+                'to the rollout JSONL file has failed. This likely means the `@openai/codex` CLI',
+                'changed its internal session file format.',
+                '',
+                '**Impact:** Codex runs tracked by agentmeter-action will show `—` for cost instead',
+                'of a real value until this is fixed.',
+                '',
+                `**Failed run:** ${runUrl}`,
+                '',
+                '## What to check',
+                '',
+                '1. Look at the failed step logs — it will say which assertion failed',
+                '2. Run `codex exec --ephemeral "say hello"` locally and inspect `~/.codex/sessions/`',
+                '3. If the format changed, update:',
+                '   - The `Extract Codex token usage` step in `.github/workflows/agent-review-codex.yml`',
+                '   - `tryExtractFromCodexJsonl()` in `src/token-extractor.ts`',
+                '   - `CodexTokenEvent` in `src/types.ts`',
+                '   - `docs/challenges.md` section 6',
+              ].join('\n'),
+            });

From 8617cb854d7f5db382b82f88de4b7f52c1788c0a Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 21:52:35 -0400
Subject: [PATCH 10/13] fix: typecheck, code review fixes, comment pagination

- fix(token-extractor): biome formatting (trailing comma, ternary,
  remove parens from CodexExecTurnCompleted cast)
- fix(run): initialize workflowRunTokens to undefined to prevent crash
  when no workflow_run_id is provided and no token overrides are set
- fix(run): add pr_comment to buildTriggerRef so issue_comment on PRs
  correctly formats as "PR #N" instead of "#N"
- feat(comment): show runs newest-first; limit visible table to 5 most
  recent; add collapsible "All N runs" section when over limit;
  fix parseExistingRuns to prefer the collapsible section (full history)
  over the truncated main table to prevent run count drift
- test: add tryExtractFromCodexExecJsonl tests (basic, multi-turn,
  no events, missing fields); add comment ordering and pagination tests

Made-with: Cursor
---
 __tests__/comment.test.ts         |  52 +++++++++
 __tests__/token-extractor.test.ts |  54 ++++++++++
 src/comment.ts                    | 169 +++++++++++++++++++-----------
 src/run.ts                        |   8 +-
 src/token-extractor.ts            |   8 +-
 5 files changed, 225 insertions(+), 66 deletions(-)

diff --git a/__tests__/comment.test.ts b/__tests__/comment.test.ts
index 13b4755..7ae2680 100644
--- a/__tests__/comment.test.ts
+++ b/__tests__/comment.test.ts
@@ -196,6 +196,58 @@ describe('buildCommentBody', () => {
     expect(updatedBody).toContain('$0.01');
   });
 
+  it('shows newest run first (row #1)', () => {
+    const firstBody = buildCommentBody({
+      apiPricing: testPricing,
+      existingBody: null,
+      runData: { ...baseRun, workflowName: 'first-run' },
+    });
+    const secondBody = buildCommentBody({
+      apiPricing: testPricing,
+      existingBody: firstBody,
+      runData: { ...baseRun, workflowName: 'second-run' },
+    });
+    const rows = secondBody.match(/\| \d+ \| .+? \|/g) ?? [];
+    expect(rows[0]).toContain('second-run');
+    expect(rows[1]).toContain('first-run');
+  });
+
+  it('shows all runs inline when count is at or below the limit', () => {
+    let body: string | null = null;
+    for (let i = 0; i < 5; i++) {
+      body = buildCommentBody({
+        apiPricing: testPricing,
+        existingBody: body,
+        runData: { ...baseRun, workflowName: `run-${i}` },
+      });
+    }
+    // No "All N runs" collapsible should appear
+    expect(body).not.toContain('All 5 runs');
+    expect(body).not.toContain('All 6 runs');
+  });
+
+  it('shows only 5 most recent runs and adds collapsible when over limit', () => {
+    let body: string | null = null;
+    for (let i = 1; i <= 7; i++) {
+      body = buildCommentBody({
+        apiPricing: testPricing,
+        existingBody: body,
+        runData: { ...baseRun, workflowName: `run-${i}` },
+      });
+    }
+    // Collapsible should exist
+    expect(body).toContain('All 7 runs');
+    // Latest 5 visible in main table (runs 7, 6, 5, 4, 3)
+    const mainTableSection = body!.split('<details>')[0];
+    expect(mainTableSection).toContain('run-7');
+    expect(mainTableSection).toContain('run-3');
+    expect(mainTableSection).not.toContain('run-2');
+    expect(mainTableSection).not.toContain('run-1');
+    // All runs present inside collapsible
+    expect(body).toContain('run-1');
+    expect(body).toContain('run-2');
+  });
+
   it('appends new run to existing comment and shows total', () => {
     const firstBody = buildCommentBody({
       apiPricing: testPricing,
diff --git a/__tests__/token-extractor.test.ts b/__tests__/token-extractor.test.ts
index e3e3888..f76f2bb 100644
--- a/__tests__/token-extractor.test.ts
+++ b/__tests__/token-extractor.test.ts
@@ -130,6 +130,60 @@ describe('extractTokensFromOutput', () => {
     expect(extractTokensFromOutput(jsonlOutput)).toBeNull();
   });
 
+  it('parses codex exec --json turn.completed event', () => {
+    const jsonlOutput = [
+      JSON.stringify({ type: 'thread.started', thread_id: 'abc' }),
+      JSON.stringify({ type: 'turn.started' }),
+      JSON.stringify({
+        type: 'turn.completed',
+        usage: { input_tokens: 24763, cached_input_tokens: 24448, output_tokens: 122 },
+      }),
+    ].join('\n');
+
+    const result = extractTokensFromOutput(jsonlOutput);
+    expect(result).not.toBeNull();
+    expect(result!.tokens.inputTokens).toBe(24763);
+    expect(result!.tokens.outputTokens).toBe(122);
+    expect(result!.tokens.cacheReadTokens).toBe(24448);
+    expect(result!.tokens.cacheWriteTokens).toBe(0);
+    expect(result!.isApproximate).toBe(false);
+  });
+
+  it('sums multiple turn.completed events across turns', () => {
+    const jsonlOutput = [
+      JSON.stringify({
+        type: 'turn.completed',
+        usage: { input_tokens: 1000, cached_input_tokens: 800, output_tokens: 100 },
+      }),
+      JSON.stringify({
+        type: 'turn.completed',
+        usage: { input_tokens: 500, cached_input_tokens: 200, output_tokens: 50 },
+      }),
+    ].join('\n');
+
+    const result = extractTokensFromOutput(jsonlOutput);
+    expect(result!.tokens.inputTokens).toBe(1500);
+    expect(result!.tokens.outputTokens).toBe(150);
+    expect(result!.tokens.cacheReadTokens).toBe(1000);
+  });
+
+  it('returns null for --json output with no turn.completed events', () => {
+    const jsonlOutput = [
+      JSON.stringify({ type: 'thread.started', thread_id: 'abc' }),
+      JSON.stringify({ type: 'item.started', item: { type: 'command_execution' } }),
+    ].join('\n');
+    expect(extractTokensFromOutput(jsonlOutput)).toBeNull();
+  });
+
+  it('handles missing usage fields in turn.completed gracefully', () => {
+    const jsonlOutput = JSON.stringify({ type: 'turn.completed', usage: {} });
+    const result = extractTokensFromOutput(jsonlOutput);
+    expect(result).not.toBeNull();
+    expect(result!.tokens.inputTokens).toBe(0);
+    expect(result!.tokens.outputTokens).toBe(0);
+    expect(result!.tokens.cacheReadTokens).toBe(0);
+  });
+
   it('defaults missing cache fields to zero in JSON', () => {
     const output = JSON.stringify({
       usage: { input_tokens: 100, output_tokens: 50 },
diff --git a/src/comment.ts b/src/comment.ts
index da57dd8..b36d8bc 100644
--- a/src/comment.ts
+++ b/src/comment.ts
@@ -47,9 +47,34 @@ function formatNumber(n: number): string {
   return n.toLocaleString('en-US');
 }
 
+const TABLE_HEADER = [
+  '| # | Workflow | Model | Status | Cost | Duration |',
+  '|---|----------|-------|--------|------|----------|',
+];
+
+const VISIBLE_RUNS_LIMIT = 5;
+
+/** Builds table row strings for a slice of runs, numbered from startIndex. */
+function buildTableRows({
+  runs,
+  startIndex,
+}: {
+  /** Runs to render */
+  runs: Array<Pick<RunCommentData, 'durationSeconds' | 'model' | 'status' | 'totalCostCents' | 'workflowName'>>;
+  /** 1-based row number for the first run */
+  startIndex: number;
+}): string[] {
+  return runs.map((run, i) => {
+    const icon = STATUS_EMOJI[run.status] ?? '❓';
+    return `| ${startIndex + i} | ${run.workflowName} | ${run.model ?? '—'} | ${icon} | ${formatCost(run.totalCostCents)} | ${formatDuration(run.durationSeconds)} |`;
+  });
+}
+
 /**
  * Builds the Markdown comment body for a PR/issue.
- * Parses any existing comment to extract previous run rows and append the new one.
+ * Parses any existing comment to extract previous run rows and prepends the new one.
+ * Runs are shown newest-first. If there are more than 5 runs, only the 5 most recent
+ * are shown in the main table; a collapsible section shows all runs.
  */
 export function buildCommentBody({
   apiPricing,
@@ -64,36 +89,48 @@ export function buildCommentBody({
   runData: RunCommentData;
 }): string {
   const existingRuns = existingBody ? parseExistingRuns(existingBody) : [];
-  const allRuns = [...existingRuns, runData];
-
-  const tableRows = allRuns
-    .map((run, i) => {
-      const icon = STATUS_EMOJI[run.status] ?? '❓';
-      const model = run.model ?? '—';
-      return `| ${i + 1} | ${run.workflowName} | ${model} | ${icon} | ${formatCost(run.totalCostCents)} | ${formatDuration(run.durationSeconds)} |`;
-    })
-    .join('\n');
+  // Newest first: current run at the top
+  const allRuns: Array<Pick<RunCommentData, 'durationSeconds' | 'model' | 'status' | 'totalCostCents' | 'workflowName'>> = [runData, ...existingRuns];
 
   const totalCostCents = allRuns.reduce((sum, r) => sum + r.totalCostCents, 0);
   const totalRow =
     allRuns.length > 1 ? `| **Total** | | | | **${formatCost(totalCostCents)}** | |` : '';
 
-  const latestRun = runData;
-  const tokenDetails = buildTokenDetails({ apiPricing, run: latestRun });
+  const visibleRuns = allRuns.slice(0, VISIBLE_RUNS_LIMIT);
+  const hasMore = allRuns.length > VISIBLE_RUNS_LIMIT;
+
+  const tokenDetails = buildTokenDetails({ apiPricing, run: runData });
 
-  const lines = [
+  const lines: string[] = [
     COMMENT_MARKER,
     '## ⚡ AgentMeter',
     '',
-    '| # | Workflow | Model | Status | Cost | Duration |',
-    '|---|----------|-------|--------|------|----------|',
-    tableRows,
+    ...TABLE_HEADER,
+    ...buildTableRows({ runs: visibleRuns, startIndex: 1 }),
     ...(totalRow ? [totalRow] : []),
     '',
-    ...(tokenDetails ? [tokenDetails, ''] : []),
-    `[View in AgentMeter →](${latestRun.dashboardUrl})`,
   ];
 
+  if (hasMore) {
+    lines.push(
+      '<details>',
+      `<summary>All ${allRuns.length} runs</summary>`,
+      '',
+      ...TABLE_HEADER,
+      ...buildTableRows({ runs: allRuns, startIndex: 1 }),
+      ...(totalRow ? [totalRow] : []),
+      '',
+      '</details>',
+      '',
+    );
+  }
+
+  if (tokenDetails) {
+    lines.push(tokenDetails, '');
+  }
+
+  lines.push(`[View in AgentMeter →](${runData.dashboardUrl})`);
+
   return lines.join('\n');
 }
 
@@ -176,54 +213,68 @@ interface ParsedRun {
   turns: number | null;
 }
 
+/**
+ * Parses raw table row strings from a Markdown table body (rows only, no header).
+ */
+function parseTableRows(rawRows: string): ParsedRun[] {
+  return rawRows
+    .trim()
+    .split('\n')
+    .filter((r) => r.startsWith('|') && !r.includes('**Total**'))
+    .map((row) => {
+      const cells = row
+        .split('|')
+        .map((c) => c.trim())
+        .filter(Boolean);
+      if (cells.length < 5) return null;
+
+      // Support both old (5-col) and new (6-col) format:
+      // Old: # | Workflow | Status | Cost | Duration
+      // New: # | Workflow | Model  | Status | Cost | Duration
+      const hasModelCol = cells.length >= 6;
+      const workflowName = cells[1] ?? '';
+      const model = hasModelCol && cells[2] && cells[2] !== '—' ? cells[2] : null;
+      const statusEmoji = (hasModelCol ? cells[3] : cells[2]) ?? '';
+      const costStr = ((hasModelCol ? cells[4] : cells[3]) ?? '').replace(/[$*]/g, '');
+      const totalCostCents = Math.round(parseFloat(costStr) * 100);
+      const durationSeconds = parseDuration((hasModelCol ? cells[5] : cells[4]) ?? '');
+      const status =
+        Object.entries(STATUS_EMOJI).find(([, emoji]) => emoji === statusEmoji)?.[0] ?? 'other';
+
+      return {
+        workflowName,
+        status,
+        totalCostCents: Number.isNaN(totalCostCents) ? 0 : totalCostCents,
+        durationSeconds,
+        dashboardUrl: '',
+        model,
+        turns: null,
+      } satisfies ParsedRun;
+    })
+    .filter((r): r is NonNullable<typeof r> => r !== null);
+}
+
 /**
  * Parses run rows out of an existing AgentMeter comment body.
- * Returns an empty array if parsing fails or comment is malformed.
+ * Prefers the "All N runs" collapsible section when present (contains the full history),
+ * falling back to the main table otherwise.
+ * Returns an empty array if parsing fails or the comment is malformed.
  */
 function parseExistingRuns(body: string): ParsedRun[] {
   try {
+    // When >5 runs exist the full history lives in the collapsible — prefer that
+    const detailsMatch = body.match(
+      /<summary>All \d+ runs<\/summary>\n\n([\s\S]+?)\n\n<\/details>/,
+    );
+    if (detailsMatch?.[1]) {
+      const tableMatch = detailsMatch[1].match(/\| #.*?\n\|[-|: ]+\n((?:\|.*?\n)*)/s);
+      if (tableMatch?.[1]) return parseTableRows(tableMatch[1]);
+    }
+
+    // Fall back to the main (potentially truncated) table
     const tableMatch = body.match(/\| #.*?\n\|[-|: ]+\n((?:\|.*?\n)*)/s);
     if (!tableMatch?.[1]) return [];
-
-    const rows = tableMatch[1]
-      .trim()
-      .split('\n')
-      .filter((r) => r.startsWith('|') && !r.includes('**Total**'));
-
-    return rows
-      .map((row) => {
-        const cells = row
-          .split('|')
-          .map((c) => c.trim())
-          .filter(Boolean);
-        if (cells.length < 5) return null;
-
-        // Support both old (5-col) and new (6-col) format:
-        // Old: # | Workflow | Status | Cost | Duration
-        // New: # | Workflow | Model  | Status | Cost | Duration
-        const hasModelCol = cells.length >= 6;
-        const workflowName = cells[1] ?? '';
-        const model = hasModelCol && cells[2] && cells[2] !== '—' ? cells[2] : null;
-        const statusEmoji = (hasModelCol ? cells[3] : cells[2]) ?? '';
-        const costStr = ((hasModelCol ? cells[4] : cells[3]) ?? '').replace(/[$*]/g, '');
-        const totalCostCents = Math.round(parseFloat(costStr) * 100);
-        const durationSeconds = parseDuration((hasModelCol ? cells[5] : cells[4]) ?? '');
-
-        const status =
-          Object.entries(STATUS_EMOJI).find(([, emoji]) => emoji === statusEmoji)?.[0] ?? 'other';
-
-        const parsed: ParsedRun = {
-          workflowName,
-          status,
-          totalCostCents: Number.isNaN(totalCostCents) ? 0 : totalCostCents,
-          durationSeconds,
-          dashboardUrl: '',
-          model,
-          turns: null,
-        };
-        return parsed;
-      })
-      .filter((r): r is NonNullable<typeof r> => r !== null) as ParsedRun[];
+    return parseTableRows(tableMatch[1]);
   } catch {
     return [];
   }
diff --git a/src/run.ts b/src/run.ts
index d76c998..5ce90f5 100644
--- a/src/run.ts
+++ b/src/run.ts
@@ -12,7 +12,11 @@ import { resolveWorkflowRun } from './workflow-run';
  * Builds a human-readable trigger ref string from a number and event name.
  */
 function buildTriggerRef(number: number, eventName: string): string {
-  if (eventName === 'pull_request' || eventName === 'pull_request_review_comment') {
+  if (
+    eventName === 'pull_request' ||
+    eventName === 'pull_request_review_comment' ||
+    eventName === 'pr_comment'
+  ) {
     return `PR #${number}`;
   }
   return `#${number}`;
@@ -34,7 +38,7 @@ export async function run(): Promise<void> {
   // When workflow_run_id is provided, resolve all workflow-run data automatically:
   // timestamps, trigger number, and agent-tokens artifact. This removes the need
   // for manual pre-steps in the caller's companion workflow.
-  let workflowRunTokens: ReturnType<typeof resolveTokens>;
+  let workflowRunTokens: ReturnType<typeof resolveTokens> = undefined;
   let resolvedTriggerNumber = inputs.triggerNumber ?? ctx.triggerNumber;
   let resolvedTriggerEvent = inputs.triggerEvent || ctx.triggerType;
   let resolvedStartedAt = inputs.startedAt || selfStartedAt;
diff --git a/src/token-extractor.ts b/src/token-extractor.ts
index b97e8e0..1677e5f 100644
--- a/src/token-extractor.ts
+++ b/src/token-extractor.ts
@@ -64,7 +64,7 @@ function tryExtractFromJson(
  *   cached_input_tokens → cacheReadTokens
  */
 function tryExtractFromCodexExecJsonl(
-  agentOutput: string,
+  agentOutput: string
 ): { tokens: TokenCounts; isApproximate: boolean } | null {
   const lines = agentOutput.split('\n');
   let inputTokens = 0;
@@ -78,13 +78,11 @@ function tryExtractFromCodexExecJsonl(
     try {
       const parsed = JSON.parse(trimmed) as unknown;
       const obj =
-        typeof parsed === 'object' && parsed !== null
-          ? (parsed as Record<string, unknown>)
-          : null;
+        typeof parsed === 'object' && parsed !== null ? (parsed as Record<string, unknown>) : null;
       if (obj?.['type'] !== 'turn.completed') continue;
       const usage = obj['usage'];
       if (typeof usage !== 'object' || usage === null) continue;
-      const u = usage as (CodexExecTurnCompleted)['usage'];
+      const u = usage as CodexExecTurnCompleted['usage'];
       inputTokens += u.input_tokens ?? 0;
       outputTokens += u.output_tokens ?? 0;
       cacheReadTokens += u.cached_input_tokens ?? 0;

From d510c0ee120be9b460f54b603e81927b9db5762e Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 22:21:49 -0400
Subject: [PATCH 11/13] =?UTF-8?q?fix:=20address=20code=20review=20?=
 =?UTF-8?q?=E2=80=94=20lint,=20pagination,=20PR=20lookup,=20trigger=20ref?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- fix(comment): extract RunRow type alias to satisfy Biome line-length
- fix(comment): use octokit.paginate for comment lookup so existing
  AgentMeter comment is found even beyond page 1 on busy PRs/issues
- fix(workflow-run): change PR lookup state: 'open' → 'all' so merged
  or closed PRs are resolved correctly in companion workflow mode
- fix(workflow-run): default missing numeric fields to 0 in
  parseAgentTokensZip instead of letting undefined propagate to ingest
- fix(run): expand buildTriggerRef to cover pr_opened, pr_synchronize,
  pr_reopened so inline PR runs always render "PR #N" not "#N"
- fix(workflow): use mtime-based rollout file selection with -printf
  instead of lexicographic sort for more robust latest-file detection

Made-with: Cursor
---
 .github/workflows/agent-review-codex.yml |  2 +-
 src/comment.ts                           | 19 ++++++++++++-------
 src/run.ts                               | 20 +++++++++++---------
 src/workflow-run.ts                      | 11 +++++++++--
 4 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml
index ca81ba5..b0e5ed6 100644
--- a/.github/workflows/agent-review-codex.yml
+++ b/.github/workflows/agent-review-codex.yml
@@ -45,7 +45,7 @@ jobs:
         id: codex-tokens
         if: always()
         run: |
-          rollout=$(find /tmp/codex-home/sessions -name "rollout-*.jsonl" 2>/dev/null | sort | tail -1)
+          rollout=$(find /tmp/codex-home/sessions -name "rollout-*.jsonl" 2>/dev/null -printf "%T@ %p\n" | sort -rn | head -1 | cut -d' ' -f2-)
           if [ -z "$rollout" ]; then
             echo "No rollout JSONL found — token counts unavailable"
             echo "input_tokens=" >> "$GITHUB_OUTPUT"
diff --git a/src/comment.ts b/src/comment.ts
index b36d8bc..88106fb 100644
--- a/src/comment.ts
+++ b/src/comment.ts
@@ -54,13 +54,19 @@ const TABLE_HEADER = [
 
 const VISIBLE_RUNS_LIMIT = 5;
 
+/** Minimal run fields needed to render a table row */
+type RunRow = Pick<
+  RunCommentData,
+  'durationSeconds' | 'model' | 'status' | 'totalCostCents' | 'workflowName'
+>;
+
 /** Builds table row strings for a slice of runs, numbered from startIndex. */
 function buildTableRows({
   runs,
   startIndex,
 }: {
   /** Runs to render */
-  runs: Array<Pick<RunCommentData, 'durationSeconds' | 'model' | 'status' | 'totalCostCents' | 'workflowName'>>;
+  runs: RunRow[];
   /** 1-based row number for the first run */
   startIndex: number;
 }): string[] {
@@ -90,7 +96,7 @@ export function buildCommentBody({
 }): string {
   const existingRuns = existingBody ? parseExistingRuns(existingBody) : [];
   // Newest first: current run at the top
-  const allRuns: Array<Pick<RunCommentData, 'durationSeconds' | 'model' | 'status' | 'totalCostCents' | 'workflowName'>> = [runData, ...existingRuns];
+  const allRuns: RunRow[] = [runData, ...existingRuns];
 
   const totalCostCents = allRuns.reduce((sum, r) => sum + r.totalCostCents, 0);
   const totalRow =
@@ -121,7 +127,7 @@ export function buildCommentBody({
       ...(totalRow ? [totalRow] : []),
       '',
       '</details>',
-      '',
+      ''
     );
   }
 
@@ -264,7 +270,7 @@ function parseExistingRuns(body: string): ParsedRun[] {
   try {
     // When >5 runs exist the full history lives in the collapsible — prefer that
     const detailsMatch = body.match(
-      /<summary>All \d+ runs<\/summary>\n\n([\s\S]+?)\n\n<\/details>/,
+      /<summary>All \d+ runs<\/summary>\n\n([\s\S]+?)\n\n<\/details>/
     );
     if (detailsMatch?.[1]) {
       const tableMatch = detailsMatch[1].match(/\| #.*?\n\|[-|: ]+\n((?:\|.*?\n)*)/s);
@@ -296,9 +302,8 @@ async function findExistingComment({
   issueOrPrNumber: number;
 }): Promise<{ id: number; body: string } | null> {
   try {
-    const { data: comments } = await (
-      octokit as ReturnType<typeof import('@actions/github').getOctokit>
-    ).rest.issues.listComments({
+    const gh = octokit as ReturnType<typeof import('@actions/github').getOctokit>;
+    const comments = await gh.paginate(gh.rest.issues.listComments, {
       owner,
       repo,
       issue_number: issueOrPrNumber,
diff --git a/src/run.ts b/src/run.ts
index 5ce90f5..bf1151d 100644
--- a/src/run.ts
+++ b/src/run.ts
@@ -12,14 +12,16 @@ import { resolveWorkflowRun } from './workflow-run';
  * Builds a human-readable trigger ref string from a number and event name.
  */
 function buildTriggerRef(number: number, eventName: string): string {
-  if (
-    eventName === 'pull_request' ||
-    eventName === 'pull_request_review_comment' ||
-    eventName === 'pr_comment'
-  ) {
-    return `PR #${number}`;
-  }
-  return `#${number}`;
+  // Covers both raw GitHub event names and the mapped triggerType values from context.ts
+  const prEvents = new Set([
+    'pull_request',
+    'pull_request_review_comment',
+    'pr_comment',
+    'pr_opened',
+    'pr_synchronize',
+    'pr_reopened',
+  ]);
+  return prEvents.has(eventName) ? `PR #${number}` : `#${number}`;
 }
 
 /**
@@ -38,7 +40,7 @@ export async function run(): Promise<void> {
   // When workflow_run_id is provided, resolve all workflow-run data automatically:
   // timestamps, trigger number, and agent-tokens artifact. This removes the need
   // for manual pre-steps in the caller's companion workflow.
-  let workflowRunTokens: ReturnType<typeof resolveTokens> = undefined;
+  let workflowRunTokens: ReturnType<typeof resolveTokens>;
   let resolvedTriggerNumber = inputs.triggerNumber ?? ctx.triggerNumber;
   let resolvedTriggerEvent = inputs.triggerEvent || ctx.triggerType;
   let resolvedStartedAt = inputs.startedAt || selfStartedAt;
diff --git a/src/workflow-run.ts b/src/workflow-run.ts
index 588a0b1..1c72844 100644
--- a/src/workflow-run.ts
+++ b/src/workflow-run.ts
@@ -278,7 +278,7 @@ async function resolveTrigger({
         owner,
         repo,
         head: `${owner}:${headBranch}`,
-        state: 'open',
+        state: 'all',
         per_page: 1,
       });
       if (prs[0]) {
@@ -375,7 +375,14 @@ async function parseAgentTokensZip(zipData: ArrayBuffer): Promise<AgentTokensArt
       core.warning('AgentMeter: agent-tokens artifact has unexpected structure.');
       return null;
     }
-    return parsed;
+    return {
+      input_tokens: parsed.input_tokens,
+      output_tokens: typeof parsed.output_tokens === 'number' ? parsed.output_tokens : 0,
+      cache_read_tokens:
+        typeof parsed.cache_read_tokens === 'number' ? parsed.cache_read_tokens : 0,
+      cache_write_tokens:
+        typeof parsed.cache_write_tokens === 'number' ? parsed.cache_write_tokens : 0,
+    };
   } catch (error) {
     core.warning(`AgentMeter: failed to parse agent-tokens zip: ${error}`);
     return null;

From dce16537b7e110625c7059e8f2061786a724c2df Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 22:26:21 -0400
Subject: [PATCH 12/13] fix: token precedence and issue_comment triggerRef
 labeling

- Split resolveTokens calls in run.ts so workflow_run artifact tokens
  win over agent_output stdout extraction (matching documented priority)
- Prefer ctx.triggerRef in all inline-run cases so plain issue comments
  are labeled #N instead of PR #N; buildTriggerRef only fires for
  companion workflow_run mode where ctx.triggerRef is null
- Fix buildTableRows params to alphabetical order per code style rules

Made-with: Cursor
---
 src/comment.ts |  4 ++--
 src/run.ts     | 24 ++++++++++++++++++------
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/comment.ts b/src/comment.ts
index 88106fb..be0f000 100644
--- a/src/comment.ts
+++ b/src/comment.ts
@@ -65,10 +65,10 @@ function buildTableRows({
   runs,
   startIndex,
 }: {
-  /** Runs to render */
-  runs: RunRow[];
   /** 1-based row number for the first run */
   startIndex: number;
+  /** Runs to render */
+  runs: RunRow[];
 }): string[] {
   return runs.map((run, i) => {
     const icon = STATUS_EMOJI[run.status] ?? '❓';
diff --git a/src/run.ts b/src/run.ts
index bf1151d..820062b 100644
--- a/src/run.ts
+++ b/src/run.ts
@@ -79,20 +79,32 @@ export async function run(): Promise<void> {
     }
   }
 
-  // Token resolution priority: explicit inputs > workflow_run artifact > agent_output extraction
+  // Token resolution priority: explicit inputs > workflow_run artifact > agent_output extraction.
+  // Split into two resolveTokens calls so the artifact wins over stdout extraction.
   const tokens =
     resolveTokens({
-      agentOutput: inputs.agentOutput,
+      agentOutput: '',
       inputTokensOverride: inputs.inputTokens,
       outputTokensOverride: inputs.outputTokens,
       cacheReadTokensOverride: inputs.cacheReadTokens,
       cacheWriteTokensOverride: inputs.cacheWriteTokens,
-    }) ?? workflowRunTokens;
-
+    }) ??
+    workflowRunTokens ??
+    resolveTokens({
+      agentOutput: inputs.agentOutput,
+      inputTokensOverride: null,
+      outputTokensOverride: null,
+      cacheReadTokensOverride: null,
+      cacheWriteTokensOverride: null,
+    });
+
+  // Prefer ctx.triggerRef (correctly set for inline runs including issue vs PR distinction).
+  // Fall back to buildTriggerRef only for companion workflow_run mode where ctx.triggerRef is null.
   const triggerRef =
-    resolvedTriggerNumber !== null
+    ctx.triggerRef ??
+    (resolvedTriggerNumber !== null
       ? buildTriggerRef(resolvedTriggerNumber, resolvedTriggerEvent)
-      : ctx.triggerRef;
+      : null);
 
   const triggerType = resolvedTriggerEvent || ctx.triggerType || 'other';
 

From f2e75a7301b59f7af440527dd5d6069028e715db Mon Sep 17 00:00:00 2001
From: adamhenson <adamhenson1979@gmail.com>
Date: Wed, 18 Mar 2026 22:32:22 -0400
Subject: [PATCH 13/13] fix: object params on buildTriggerRef, revert PR lookup
 to state open
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Convert buildTriggerRef to object params with alphabetical ordering
  per code style rules
- Revert pull_requests[] fallback lookup to state: 'open' — the
  fallback is only for recovering active runs with missing
  pull_requests[]; state: 'all' risks matching stale PRs on
  reused branches

Made-with: Cursor
---
 src/run.ts          | 14 +++++++++++---
 src/workflow-run.ts |  2 +-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/run.ts b/src/run.ts
index 820062b..127bcdf 100644
--- a/src/run.ts
+++ b/src/run.ts
@@ -10,9 +10,17 @@ import { resolveWorkflowRun } from './workflow-run';
 
 /**
  * Builds a human-readable trigger ref string from a number and event name.
+ * Covers both raw GitHub event names and the mapped triggerType values from context.ts.
  */
-function buildTriggerRef(number: number, eventName: string): string {
-  // Covers both raw GitHub event names and the mapped triggerType values from context.ts
+function buildTriggerRef({
+  eventName,
+  number,
+}: {
+  /** Raw GitHub event name or mapped triggerType from context.ts */
+  eventName: string;
+  /** PR or issue number */
+  number: number;
+}): string {
   const prEvents = new Set([
     'pull_request',
     'pull_request_review_comment',
@@ -103,7 +111,7 @@ export async function run(): Promise<void> {
   const triggerRef =
     ctx.triggerRef ??
     (resolvedTriggerNumber !== null
-      ? buildTriggerRef(resolvedTriggerNumber, resolvedTriggerEvent)
+      ? buildTriggerRef({ eventName: resolvedTriggerEvent, number: resolvedTriggerNumber })
       : null);
 
   const triggerType = resolvedTriggerEvent || ctx.triggerType || 'other';
diff --git a/src/workflow-run.ts b/src/workflow-run.ts
index 1c72844..2443a82 100644
--- a/src/workflow-run.ts
+++ b/src/workflow-run.ts
@@ -278,8 +278,8 @@ async function resolveTrigger({
         owner,
         repo,
         head: `${owner}:${headBranch}`,
-        state: 'all',
         per_page: 1,
+        state: 'open',
       });
       if (prs[0]) {
         return { triggerNumber: prs[0].number, triggerEvent: 'pull_request' };