From 020c1fc704bf33a4b8692d4d22848e5fa73089f8 Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 20:07:39 -0400 Subject: [PATCH 01/13] chore: trigger CI Made-with: Cursor From 418cdc34178553ed99f3ab65cee7e5d5d4106ebe Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 20:10:07 -0400 Subject: [PATCH 02/13] debug(codex): query SQLite db for token usage instead of missing JSONL Made-with: Cursor --- .github/workflows/agent-review-codex.yml | 36 +++++++++--------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml index a85436a..ad7a297 100644 --- a/.github/workflows/agent-review-codex.yml +++ b/.github/workflows/agent-review-codex.yml @@ -62,33 +62,25 @@ jobs: env: CODEX_HOME: /home/runner/.codex run: | - echo "=== CODEX_HOME contents ===" - find "$CODEX_HOME" -type f 2>/dev/null || echo "(empty or missing)" - echo "=== end ===" + DB="$CODEX_HOME/logs_1.sqlite" + INPUT=0; OUTPUT=0; CACHE_READ=0 - # Try session files in CODEX_HOME/sessions/ first, then ~/.codex/sessions/ - SESSION_FILE=$(ls -t "$CODEX_HOME"/sessions/*.jsonl 2>/dev/null | head -1 || \ - ls -t ~/.codex/sessions/*.jsonl 2>/dev/null | head -1 || true) - - echo "SESSION_FILE=$SESSION_FILE" + if [ -f "$DB" ]; then + echo "=== SQLite schema ===" + sqlite3 "$DB" ".schema" 2>/dev/null || echo "(schema unavailable)" + echo "=== end ===" - if [ -n "$SESSION_FILE" ] && [ -f "$SESSION_FILE" ]; then - echo "=== last 5 lines of session file ===" - tail -5 "$SESSION_FILE" + # Try to extract token usage — query will be refined once schema is known + USAGE_JSON=$(sqlite3 "$DB" \ + "SELECT json FROM events WHERE json LIKE '%token%' ORDER BY rowid DESC LIMIT 5;" \ + 2>/dev/null || true) + echo "=== token rows ===" + echo "$USAGE_JSON" echo "=== end ===" - TOKEN_LINE=$(grep '"token_count"' "$SESSION_FILE" 2>/dev/null | tail -1 || true) - if [ -n "$TOKEN_LINE" ]; then - INPUT=$(echo "$TOKEN_LINE" | jq -r '.payload.info.total_token_usage.input_tokens // 0') - OUTPUT=$(echo "$TOKEN_LINE" | jq -r '.payload.info.total_token_usage.output_tokens // 0') - CACHE_READ=$(echo "$TOKEN_LINE" | jq -r '.payload.info.total_token_usage.cached_input_tokens // 0') - else - echo "No token_count event found in session file" - INPUT=0; OUTPUT=0; CACHE_READ=0 - fi else - echo "No session file found" - INPUT=0; OUTPUT=0; CACHE_READ=0 + echo "No SQLite database found at $DB" fi + { echo "input_tokens=$INPUT" echo "output_tokens=$OUTPUT" From 996430c7d164ebd7115017391f523463a1db7517 Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 20:14:59 -0400 Subject: [PATCH 03/13] debug(codex): query logs table message column for token data Made-with: Cursor --- .github/workflows/agent-review-codex.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml index ad7a297..06733b2 100644 --- a/.github/workflows/agent-review-codex.yml +++ b/.github/workflows/agent-review-codex.yml @@ -66,16 +66,16 @@ jobs: INPUT=0; OUTPUT=0; CACHE_READ=0 if [ -f "$DB" ]; then - echo "=== SQLite schema ===" - sqlite3 "$DB" ".schema" 2>/dev/null || echo "(schema unavailable)" + echo "=== messages containing 'token' ===" + sqlite3 "$DB" \ + "SELECT message FROM logs WHERE message LIKE '%token%' ORDER BY id DESC LIMIT 10;" \ + 2>/dev/null || echo "(query failed)" echo "=== end ===" - # Try to extract token usage — query will be refined once schema is known - USAGE_JSON=$(sqlite3 "$DB" \ - "SELECT json FROM events WHERE json LIKE '%token%' ORDER BY rowid DESC LIMIT 5;" \ - 2>/dev/null || true) - echo "=== token rows ===" - echo "$USAGE_JSON" + echo "=== last 10 messages ===" + sqlite3 "$DB" \ + "SELECT message FROM logs ORDER BY id DESC LIMIT 10;" \ + 2>/dev/null || echo "(query failed)" echo "=== end ===" else echo "No SQLite database found at $DB" From de0ed92c28deadae0ed376eab5a9874e8d53233f Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 20:15:37 -0400 Subject: [PATCH 04/13] style(pricing): simplify fetch log to show count only Made-with: Cursor --- src/pricing.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/pricing.ts b/src/pricing.ts index ec292e1..e137f65 100644 --- a/src/pricing.ts +++ b/src/pricing.ts @@ -71,9 +71,7 @@ export async function fetchPricing({ cacheReadPer1M: entry.cacheReadPerMillionTokens ?? 0, }; } - core.info( - `AgentMeter: fetched pricing for ${Object.keys(result).length} models: ${Object.keys(result).join(', ')}` - ); + core.info(`AgentMeter: fetched pricing for ${Object.keys(result).length} models.`); return result; } catch (error) { core.info(`AgentMeter: could not fetch pricing from API (${error}) — cost will show as —.`); From 39bad2f42f6f934655c3430fa87608436a1e668c Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 20:18:31 -0400 Subject: [PATCH 05/13] debug(codex): inspect run_id.json and output file for token data Made-with: Cursor --- .github/workflows/agent-review-codex.yml | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml index 06733b2..e9ced5a 100644 --- a/.github/workflows/agent-review-codex.yml +++ b/.github/workflows/agent-review-codex.yml @@ -65,21 +65,17 @@ jobs: DB="$CODEX_HOME/logs_1.sqlite" INPUT=0; OUTPUT=0; CACHE_READ=0 - if [ -f "$DB" ]; then - echo "=== messages containing 'token' ===" - sqlite3 "$DB" \ - "SELECT message FROM logs WHERE message LIKE '%token%' ORDER BY id DESC LIMIT 10;" \ - 2>/dev/null || echo "(query failed)" - echo "=== end ===" + echo "=== CODEX_HOME files ===" + find "$CODEX_HOME" -type f 2>/dev/null | while read -r f; do + echo "--- $f ---" + if [[ "$f" == *.json ]]; then cat "$f" 2>/dev/null; fi + if [[ "$f" == *.toml ]]; then cat "$f" 2>/dev/null; fi + done + echo "=== end ===" - echo "=== last 10 messages ===" - sqlite3 "$DB" \ - "SELECT message FROM logs ORDER BY id DESC LIMIT 10;" \ - 2>/dev/null || echo "(query failed)" - echo "=== end ===" - else - echo "No SQLite database found at $DB" - fi + echo "=== /tmp/codex-output.md (last 20 lines) ===" + tail -20 /tmp/codex-output.md 2>/dev/null || echo "(not found)" + echo "=== end ===" { echo "input_tokens=$INPUT" From eec5dd7166d9ba99e48e797a2f42e276be5b6678 Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 20:22:37 -0400 Subject: [PATCH 06/13] fix(codex): remove fragile token extraction, document limitation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit codex exec (via openai/codex-action) does not expose per-run token counts through any stable interface — not JSONL, not SQLite, not action outputs. Remove the extraction step entirely; cost will show as — which is accurate. Duration and status still track correctly. Document in challenges.md. Made-with: Cursor --- .github/workflows/agent-review-codex.yml | 31 ------------------------ docs/challenges.md | 3 ++- 2 files changed, 2 insertions(+), 32 deletions(-) diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml index e9ced5a..3350b17 100644 --- a/.github/workflows/agent-review-codex.yml +++ b/.github/workflows/agent-review-codex.yml @@ -39,7 +39,6 @@ jobs: prompt-file: .github/codex/prompts/review.md model: ${{ vars.GH_AW_MODEL_AGENT_CODEX || 'gpt-5.4-mini' }} sandbox: workspace-write - output-file: /tmp/codex-output.md - name: Post review comment if: steps.codex.outputs.final-message != '' @@ -56,33 +55,6 @@ jobs: env: CODEX_REVIEW: ${{ steps.codex.outputs.final-message }} - - name: Extract Codex token usage - id: extract_tokens - if: always() - env: - CODEX_HOME: /home/runner/.codex - run: | - DB="$CODEX_HOME/logs_1.sqlite" - INPUT=0; OUTPUT=0; CACHE_READ=0 - - echo "=== CODEX_HOME files ===" - find "$CODEX_HOME" -type f 2>/dev/null | while read -r f; do - echo "--- $f ---" - if [[ "$f" == *.json ]]; then cat "$f" 2>/dev/null; fi - if [[ "$f" == *.toml ]]; then cat "$f" 2>/dev/null; fi - done - echo "=== end ===" - - echo "=== /tmp/codex-output.md (last 20 lines) ===" - tail -20 /tmp/codex-output.md 2>/dev/null || echo "(not found)" - echo "=== end ===" - - { - echo "input_tokens=$INPUT" - echo "output_tokens=$OUTPUT" - echo "cache_read_tokens=$CACHE_READ" - } >> "$GITHUB_OUTPUT" - - name: Track with AgentMeter if: always() uses: foo-software/agentmeter-action@main @@ -92,8 +64,5 @@ jobs: engine: codex model: ${{ vars.GH_AW_MODEL_AGENT_CODEX || 'gpt-5.4-mini' }} status: ${{ job.status == 'success' && 'success' || 'failed' }} - input_tokens: ${{ steps.extract_tokens.outputs.input_tokens }} - output_tokens: ${{ steps.extract_tokens.outputs.output_tokens }} - cache_read_tokens: ${{ steps.extract_tokens.outputs.cache_read_tokens }} started_at: ${{ steps.timer.outputs.started_at }} post_comment: 'true' diff --git a/docs/challenges.md b/docs/challenges.md index 5ab1ac0..74e121a 100644 --- a/docs/challenges.md +++ b/docs/challenges.md @@ -123,4 +123,5 @@ If the user omits `if: always()` on the AgentMeter step, failed agent runs won't | Comment posting | ✅ | Upsert by marker, correct PR/issue number | | `GITHUB_TOKEN` availability | ✅ | `github_token` input with `default: ${{ github.token }}` | | Node.js version | ✅ | node24 | -| Pricing table | ✅ | Fetched from `/api/models/pricing`; built-in prefix fallback | +| Pricing table | ✅ | Fetched from `/api/models/pricing`; shows `—` if unreachable | +| Codex token counts | ⚠️ Known limitation | `codex exec` (via `openai/codex-action`) does not expose per-run token counts through any stable interface. Cost shows as `—` for Codex runs. Duration and status are tracked correctly. | From 3416ed2bcc47176d8f2ade0c65adfbcbae438e05 Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 20:35:55 -0400 Subject: [PATCH 07/13] feat(codex): extract token counts from rollout JSONL, add turn.completed parser - Set known codex-home in agent-review-codex.yml so rollout JSONL is findable at /tmp/codex-home/sessions/YYYY/MM/DD/rollout-*.jsonl - Add "Extract Codex token usage" step that greps for the last token_count event and sets input/output/cache_read_tokens outputs - Pass those outputs to the AgentMeter tracking step - Add tryExtractFromCodexExecJsonl() to token-extractor.ts for consumers passing codex exec --json stdout as agent_output - Add CodexExecTurnCompleted type for the turn.completed event format - Update challenges.md to reflect the rollout JSONL investigation status Made-with: Cursor --- .github/workflows/agent-review-codex.yml | 29 +++++++++++ docs/challenges.md | 2 +- src/token-extractor.ts | 64 +++++++++++++++++++++++- src/types.ts | 17 +++++++ 4 files changed, 110 insertions(+), 2 deletions(-) diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml index 3350b17..ca81ba5 100644 --- a/.github/workflows/agent-review-codex.yml +++ b/.github/workflows/agent-review-codex.yml @@ -39,6 +39,32 @@ jobs: prompt-file: .github/codex/prompts/review.md model: ${{ vars.GH_AW_MODEL_AGENT_CODEX || 'gpt-5.4-mini' }} sandbox: workspace-write + codex-home: /tmp/codex-home + + - name: Extract Codex token usage + id: codex-tokens + if: always() + run: | + rollout=$(find /tmp/codex-home/sessions -name "rollout-*.jsonl" 2>/dev/null | sort | tail -1) + if [ -z "$rollout" ]; then + echo "No rollout JSONL found — token counts unavailable" + echo "input_tokens=" >> "$GITHUB_OUTPUT" + echo "output_tokens=" >> "$GITHUB_OUTPUT" + echo "cache_read_tokens=" >> "$GITHUB_OUTPUT" + exit 0 + fi + echo "Parsing rollout: $rollout" + token_line=$(grep '"token_count"' "$rollout" | tail -1) + if [ -z "$token_line" ]; then + echo "No token_count event in rollout — token counts unavailable" + echo "input_tokens=" >> "$GITHUB_OUTPUT" + echo "output_tokens=" >> "$GITHUB_OUTPUT" + echo "cache_read_tokens=" >> "$GITHUB_OUTPUT" + exit 0 + fi + echo "input_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.input_tokens // empty')" >> "$GITHUB_OUTPUT" + echo "output_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.output_tokens // empty')" >> "$GITHUB_OUTPUT" + echo "cache_read_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.cached_input_tokens // empty')" >> "$GITHUB_OUTPUT" - name: Post review comment if: steps.codex.outputs.final-message != '' @@ -66,3 +92,6 @@ jobs: status: ${{ job.status == 'success' && 'success' || 'failed' }} started_at: ${{ steps.timer.outputs.started_at }} post_comment: 'true' + input_tokens: ${{ steps.codex-tokens.outputs.input_tokens }} + output_tokens: ${{ steps.codex-tokens.outputs.output_tokens }} + cache_read_tokens: ${{ steps.codex-tokens.outputs.cache_read_tokens }} diff --git a/docs/challenges.md b/docs/challenges.md index 74e121a..7dc05a5 100644 --- a/docs/challenges.md +++ b/docs/challenges.md @@ -124,4 +124,4 @@ If the user omits `if: always()` on the AgentMeter step, failed agent runs won't | `GITHUB_TOKEN` availability | ✅ | `github_token` input with `default: ${{ github.token }}` | | Node.js version | ✅ | node24 | | Pricing table | ✅ | Fetched from `/api/models/pricing`; shows `—` if unreachable | -| Codex token counts | ⚠️ Known limitation | `codex exec` (via `openai/codex-action`) does not expose per-run token counts through any stable interface. Cost shows as `—` for Codex runs. Duration and status are tracked correctly. | +| Codex token counts | 🔬 Under investigation | `codex exec` writes rollout JSONL to `$CODEX_HOME/sessions/YYYY/MM/DD/rollout-*.jsonl`. `token_count` events in those files contain cumulative `total_token_usage`. The workflow sets a known `codex-home` and parses the rollout file after the codex step. Two alternative paths exist: (1) rollout JSONL file, (2) `codex exec --json` stdout `turn.completed` events — but the `openai/codex-action` captures only the final message, not stdout JSONL. If the rollout file is empty or absent, cost shows as `—`. | diff --git a/src/token-extractor.ts b/src/token-extractor.ts index f43fefb..b97e8e0 100644 --- a/src/token-extractor.ts +++ b/src/token-extractor.ts @@ -1,4 +1,10 @@ -import type { ClaudeCodeOutput, CodexTokenEvent, TokenCounts, TokenCountsWithMeta } from './types'; +import type { + ClaudeCodeOutput, + CodexExecTurnCompleted, + CodexTokenEvent, + TokenCounts, + TokenCountsWithMeta, +} from './types'; /** * Attempts to extract token counts from agent stdout. @@ -13,6 +19,9 @@ export function extractTokensFromOutput( const jsonResult = tryExtractFromJson(agentOutput); if (jsonResult) return jsonResult; + const codexExecResult = tryExtractFromCodexExecJsonl(agentOutput); + if (codexExecResult) return codexExecResult; + const codexResult = tryExtractFromCodexJsonl(agentOutput); if (codexResult) return codexResult; @@ -45,6 +54,59 @@ function tryExtractFromJson( } } +/** + * Tries to extract token counts from `codex exec --json` stdout. + * Sums `usage` fields across all `turn.completed` events. + * + * Field mapping: + * input_tokens → inputTokens + * output_tokens → outputTokens + * cached_input_tokens → cacheReadTokens + */ +function tryExtractFromCodexExecJsonl( + agentOutput: string, +): { tokens: TokenCounts; isApproximate: boolean } | null { + const lines = agentOutput.split('\n'); + let inputTokens = 0; + let outputTokens = 0; + let cacheReadTokens = 0; + let found = false; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed.includes('"turn.completed"')) continue; + try { + const parsed = JSON.parse(trimmed) as unknown; + const obj = + typeof parsed === 'object' && parsed !== null + ? (parsed as Record) + : null; + if (obj?.['type'] !== 'turn.completed') continue; + const usage = obj['usage']; + if (typeof usage !== 'object' || usage === null) continue; + const u = usage as (CodexExecTurnCompleted)['usage']; + inputTokens += u.input_tokens ?? 0; + outputTokens += u.output_tokens ?? 0; + cacheReadTokens += u.cached_input_tokens ?? 0; + found = true; + } catch { + // not valid JSON, skip line + } + } + + if (!found) return null; + + return { + tokens: { + inputTokens, + outputTokens, + cacheReadTokens, + cacheWriteTokens: 0, + }, + isApproximate: false, + }; +} + /** * Tries to extract token counts from Codex CLI JSONL streaming output. * Looks for `token_count` events emitted by `codex exec` and takes the last one, diff --git a/src/types.ts b/src/types.ts index a740a0c..4aae3b2 100644 --- a/src/types.ts +++ b/src/types.ts @@ -155,6 +155,7 @@ export interface AgentTokensArtifact { /** * A single JSONL event emitted by `codex exec` in streaming mode. * Token counts are found in `token_count` events. + * Written to the rollout JSONL file at `$CODEX_HOME/sessions/YYYY/MM/DD/rollout-*.jsonl`. */ export interface CodexTokenEvent { type: 'event_msg'; @@ -173,6 +174,22 @@ export interface CodexTokenEvent { }; } +/** + * A `turn.completed` event emitted to stdout when running `codex exec --json`. + * Sums all turns for total usage of a full `codex exec` run. + */ +export interface CodexExecTurnCompleted { + type: 'turn.completed'; + usage: { + /** Total input tokens sent (includes cached) */ + input_tokens?: number; + /** Output tokens generated */ + output_tokens?: number; + /** Input tokens served from cache (subset of input_tokens) */ + cached_input_tokens?: number; + }; +} + /** Known Claude Code JSON output structure (best-effort) */ export interface ClaudeCodeOutput { /** Top-level usage block */ From e98d254a6de87423618589db7c7726297c063f70 Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 21:25:41 -0400 Subject: [PATCH 08/13] docs(challenges): document Codex rollout JSONL token extraction approach Made-with: Cursor --- docs/challenges.md | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/docs/challenges.md b/docs/challenges.md index 7dc05a5..0cd5a12 100644 --- a/docs/challenges.md +++ b/docs/challenges.md @@ -94,6 +94,48 @@ If the user omits `if: always()` on the AgentMeter step, failed agent runs won't --- +### 6. Codex token counts rely on an internal rollout file format + +`codex exec` (via `openai/codex-action`) does not expose token usage through any documented public API. However, when running without `--ephemeral`, the Codex CLI writes a rollout JSONL file to: + +``` +$CODEX_HOME/sessions/YYYY/MM/DD/rollout--.jsonl +``` + +Each line is a JSON event. Token totals appear in `token_count` events: + +```json +{ + "type": "event_msg", + "payload": { + "type": "token_count", + "info": { + "total_token_usage": { + "input_tokens": 479565, + "output_tokens": 7489, + "cached_input_tokens": 444416 + } + }, + "rate_limits": null + } +} +``` + +The last `token_count` event in the file contains cumulative totals for the full run. + +**How the workflow extracts tokens:** + +1. Set `codex-home: /tmp/codex-home` on `openai/codex-action` so the rollout path is known +2. After the codex step, find the latest rollout file with `find /tmp/codex-home/sessions -name "rollout-*.jsonl" | sort | tail -1` +3. Grep for `"token_count"`, take the last line, extract fields with `jq` +4. Pass `input_tokens`, `output_tokens`, `cache_read_tokens` as explicit inputs to the AgentMeter step + +**Stability caveat:** The rollout format is an internal Codex CLI implementation detail, not a versioned public API. A future `@openai/codex` release could rename fields or restructure events. Since `codex-version` in `openai/codex-action` defaults to latest, this could silently break on a CLI upgrade. Failure is graceful — costs show as `—` if the rollout file is missing or unparseable. + +**Alternative path (`codex exec --json`):** Running with `--json` writes JSONL to stdout with `turn.completed` events containing a `usage` field. However, `openai/codex-action`'s `final-message` output reads from the output file, not stdout — so the JSONL stream is not accessible from within the action's step outputs. The `tryExtractFromCodexExecJsonl` function in `token-extractor.ts` handles this format for consumers who capture `codex exec --json` stdout directly. + +--- + ## What works regardless of mode - The action **never fails the workflow** — all errors are `core.warning()`, not `core.setFailed()`. @@ -124,4 +166,4 @@ If the user omits `if: always()` on the AgentMeter step, failed agent runs won't | `GITHUB_TOKEN` availability | ✅ | `github_token` input with `default: ${{ github.token }}` | | Node.js version | ✅ | node24 | | Pricing table | ✅ | Fetched from `/api/models/pricing`; shows `—` if unreachable | -| Codex token counts | 🔬 Under investigation | `codex exec` writes rollout JSONL to `$CODEX_HOME/sessions/YYYY/MM/DD/rollout-*.jsonl`. `token_count` events in those files contain cumulative `total_token_usage`. The workflow sets a known `codex-home` and parses the rollout file after the codex step. Two alternative paths exist: (1) rollout JSONL file, (2) `codex exec --json` stdout `turn.completed` events — but the `openai/codex-action` captures only the final message, not stdout JSONL. If the rollout file is empty or absent, cost shows as `—`. | +| Codex token counts | ✅ with caveat | Parsed from rollout JSONL at `$CODEX_HOME/sessions/YYYY/MM/DD/rollout-*.jsonl`. Works in production. Rollout format is internal (not a public API) — see section 6 below. | From 60726608b0404e0e68ac13936d5edbe98600bbdf Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 21:37:28 -0400 Subject: [PATCH 09/13] ci: add nightly Codex rollout JSONL compatibility check Made-with: Cursor --- .github/workflows/codex-compat-check.yml | 112 +++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 .github/workflows/codex-compat-check.yml diff --git a/.github/workflows/codex-compat-check.yml b/.github/workflows/codex-compat-check.yml new file mode 100644 index 0000000..3a96c23 --- /dev/null +++ b/.github/workflows/codex-compat-check.yml @@ -0,0 +1,112 @@ +name: "Codex: Rollout JSONL Compatibility Check" + +on: + schedule: + - cron: "0 6 * * *" + workflow_dispatch: + +jobs: + verify: + runs-on: ubuntu-latest + permissions: + contents: read + issues: write + steps: + - name: Run minimal Codex exec + id: codex + uses: openai/codex-action@v1 + with: + openai-api-key: ${{ secrets.OPENAI_API_KEY }} + prompt: "Reply with only the single word: hello" + model: gpt-5.4-mini + sandbox: read-only + codex-home: /tmp/codex-check + + - name: Verify rollout JSONL structure + run: | + rollout=$(find /tmp/codex-check/sessions -name "rollout-*.jsonl" 2>/dev/null | sort | tail -1) + + if [ -z "$rollout" ]; then + echo "::error::No rollout JSONL found — codex may have changed its session file layout" + echo "Contents of /tmp/codex-check:" + find /tmp/codex-check -type f 2>/dev/null || echo "(empty)" + exit 1 + fi + + echo "Found rollout: $rollout" + + token_line=$(grep '"token_count"' "$rollout" | tail -1) + + if [ -z "$token_line" ]; then + echo "::error::No token_count event in rollout JSONL — codex may have changed its event format" + echo "Rollout file contents (last 20 lines):" + tail -20 "$rollout" + exit 1 + fi + + input_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.input_tokens // empty') + + if [ -z "$input_tokens" ]; then + echo "::error::input_tokens field missing from total_token_usage — codex may have changed the token_count schema" + echo "token_count event: $token_line" + exit 1 + fi + + if ! [[ "$input_tokens" =~ ^[0-9]+$ ]] || [ "$input_tokens" -eq 0 ]; then + echo "::error::input_tokens is not a positive integer ($input_tokens) — something unexpected in the rollout" + echo "token_count event: $token_line" + exit 1 + fi + + output_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.output_tokens // empty') + echo "✅ Rollout JSONL verified — input_tokens=$input_tokens output_tokens=$output_tokens" + + - name: Open issue on failure + if: failure() + uses: actions/github-script@v7 + with: + script: | + const date = new Date().toISOString().split('T')[0]; + const title = `⚠️ Codex rollout JSONL compat check failed (${date})`; + const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + + const { data: existing } = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: 'codex-compat', + }); + + if (existing.length > 0) { + console.log(`Open codex-compat issue already exists (#${existing[0].number}), skipping.`); + return; + } + + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title, + labels: ['codex-compat'], + body: [ + '## Codex rollout JSONL compatibility check failed', + '', + 'The nightly check that verifies `openai/codex-action` still writes token counts', + 'to the rollout JSONL file has failed. This likely means the `@openai/codex` CLI', + 'changed its internal session file format.', + '', + '**Impact:** Codex runs tracked by agentmeter-action will show `—` for cost instead', + 'of a real value until this is fixed.', + '', + `**Failed run:** ${runUrl}`, + '', + '## What to check', + '', + '1. Look at the failed step logs — it will say which assertion failed', + '2. Run `codex exec --ephemeral "say hello"` locally and inspect `~/.codex/sessions/`', + '3. If the format changed, update:', + ' - The `Extract Codex token usage` step in `.github/workflows/agent-review-codex.yml`', + ' - `tryExtractFromCodexJsonl()` in `src/token-extractor.ts`', + ' - `CodexTokenEvent` in `src/types.ts`', + ' - `docs/challenges.md` section 6', + ].join('\n'), + }); From 8617cb854d7f5db382b82f88de4b7f52c1788c0a Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 21:52:35 -0400 Subject: [PATCH 10/13] fix: typecheck, code review fixes, comment pagination - fix(token-extractor): biome formatting (trailing comma, ternary, remove parens from CodexExecTurnCompleted cast) - fix(run): initialize workflowRunTokens to undefined to prevent crash when no workflow_run_id is provided and no token overrides are set - fix(run): add pr_comment to buildTriggerRef so issue_comment on PRs correctly formats as "PR #N" instead of "#N" - feat(comment): show runs newest-first; limit visible table to 5 most recent; add collapsible "All N runs" section when over limit; fix parseExistingRuns to prefer the collapsible section (full history) over the truncated main table to prevent run count drift - test: add tryExtractFromCodexExecJsonl tests (basic, multi-turn, no events, missing fields); add comment ordering and pagination tests Made-with: Cursor --- __tests__/comment.test.ts | 52 +++++++++ __tests__/token-extractor.test.ts | 54 ++++++++++ src/comment.ts | 169 +++++++++++++++++++----------- src/run.ts | 8 +- src/token-extractor.ts | 8 +- 5 files changed, 225 insertions(+), 66 deletions(-) diff --git a/__tests__/comment.test.ts b/__tests__/comment.test.ts index 13b4755..7ae2680 100644 --- a/__tests__/comment.test.ts +++ b/__tests__/comment.test.ts @@ -196,6 +196,58 @@ describe('buildCommentBody', () => { expect(updatedBody).toContain('$0.01'); }); + it('shows newest run first (row #1)', () => { + const firstBody = buildCommentBody({ + apiPricing: testPricing, + existingBody: null, + runData: { ...baseRun, workflowName: 'first-run' }, + }); + const secondBody = buildCommentBody({ + apiPricing: testPricing, + existingBody: firstBody, + runData: { ...baseRun, workflowName: 'second-run' }, + }); + const rows = secondBody.match(/\| \d+ \| .+? \|/g) ?? []; + expect(rows[0]).toContain('second-run'); + expect(rows[1]).toContain('first-run'); + }); + + it('shows all runs inline when count is at or below the limit', () => { + let body: string | null = null; + for (let i = 0; i < 5; i++) { + body = buildCommentBody({ + apiPricing: testPricing, + existingBody: body, + runData: { ...baseRun, workflowName: `run-${i}` }, + }); + } + // No "All N runs" collapsible should appear + expect(body).not.toContain('All 5 runs'); + expect(body).not.toContain('All 6 runs'); + }); + + it('shows only 5 most recent runs and adds collapsible when over limit', () => { + let body: string | null = null; + for (let i = 1; i <= 7; i++) { + body = buildCommentBody({ + apiPricing: testPricing, + existingBody: body, + runData: { ...baseRun, workflowName: `run-${i}` }, + }); + } + // Collapsible should exist + expect(body).toContain('All 7 runs'); + // Latest 5 visible in main table (runs 7, 6, 5, 4, 3) + const mainTableSection = body!.split('
')[0]; + expect(mainTableSection).toContain('run-7'); + expect(mainTableSection).toContain('run-3'); + expect(mainTableSection).not.toContain('run-2'); + expect(mainTableSection).not.toContain('run-1'); + // All runs present inside collapsible + expect(body).toContain('run-1'); + expect(body).toContain('run-2'); + }); + it('appends new run to existing comment and shows total', () => { const firstBody = buildCommentBody({ apiPricing: testPricing, diff --git a/__tests__/token-extractor.test.ts b/__tests__/token-extractor.test.ts index e3e3888..f76f2bb 100644 --- a/__tests__/token-extractor.test.ts +++ b/__tests__/token-extractor.test.ts @@ -130,6 +130,60 @@ describe('extractTokensFromOutput', () => { expect(extractTokensFromOutput(jsonlOutput)).toBeNull(); }); + it('parses codex exec --json turn.completed event', () => { + const jsonlOutput = [ + JSON.stringify({ type: 'thread.started', thread_id: 'abc' }), + JSON.stringify({ type: 'turn.started' }), + JSON.stringify({ + type: 'turn.completed', + usage: { input_tokens: 24763, cached_input_tokens: 24448, output_tokens: 122 }, + }), + ].join('\n'); + + const result = extractTokensFromOutput(jsonlOutput); + expect(result).not.toBeNull(); + expect(result!.tokens.inputTokens).toBe(24763); + expect(result!.tokens.outputTokens).toBe(122); + expect(result!.tokens.cacheReadTokens).toBe(24448); + expect(result!.tokens.cacheWriteTokens).toBe(0); + expect(result!.isApproximate).toBe(false); + }); + + it('sums multiple turn.completed events across turns', () => { + const jsonlOutput = [ + JSON.stringify({ + type: 'turn.completed', + usage: { input_tokens: 1000, cached_input_tokens: 800, output_tokens: 100 }, + }), + JSON.stringify({ + type: 'turn.completed', + usage: { input_tokens: 500, cached_input_tokens: 200, output_tokens: 50 }, + }), + ].join('\n'); + + const result = extractTokensFromOutput(jsonlOutput); + expect(result!.tokens.inputTokens).toBe(1500); + expect(result!.tokens.outputTokens).toBe(150); + expect(result!.tokens.cacheReadTokens).toBe(1000); + }); + + it('returns null for --json output with no turn.completed events', () => { + const jsonlOutput = [ + JSON.stringify({ type: 'thread.started', thread_id: 'abc' }), + JSON.stringify({ type: 'item.started', item: { type: 'command_execution' } }), + ].join('\n'); + expect(extractTokensFromOutput(jsonlOutput)).toBeNull(); + }); + + it('handles missing usage fields in turn.completed gracefully', () => { + const jsonlOutput = JSON.stringify({ type: 'turn.completed', usage: {} }); + const result = extractTokensFromOutput(jsonlOutput); + expect(result).not.toBeNull(); + expect(result!.tokens.inputTokens).toBe(0); + expect(result!.tokens.outputTokens).toBe(0); + expect(result!.tokens.cacheReadTokens).toBe(0); + }); + it('defaults missing cache fields to zero in JSON', () => { const output = JSON.stringify({ usage: { input_tokens: 100, output_tokens: 50 }, diff --git a/src/comment.ts b/src/comment.ts index da57dd8..b36d8bc 100644 --- a/src/comment.ts +++ b/src/comment.ts @@ -47,9 +47,34 @@ function formatNumber(n: number): string { return n.toLocaleString('en-US'); } +const TABLE_HEADER = [ + '| # | Workflow | Model | Status | Cost | Duration |', + '|---|----------|-------|--------|------|----------|', +]; + +const VISIBLE_RUNS_LIMIT = 5; + +/** Builds table row strings for a slice of runs, numbered from startIndex. */ +function buildTableRows({ + runs, + startIndex, +}: { + /** Runs to render */ + runs: Array>; + /** 1-based row number for the first run */ + startIndex: number; +}): string[] { + return runs.map((run, i) => { + const icon = STATUS_EMOJI[run.status] ?? '❓'; + return `| ${startIndex + i} | ${run.workflowName} | ${run.model ?? '—'} | ${icon} | ${formatCost(run.totalCostCents)} | ${formatDuration(run.durationSeconds)} |`; + }); +} + /** * Builds the Markdown comment body for a PR/issue. - * Parses any existing comment to extract previous run rows and append the new one. + * Parses any existing comment to extract previous run rows and prepends the new one. + * Runs are shown newest-first. If there are more than 5 runs, only the 5 most recent + * are shown in the main table; a collapsible section shows all runs. */ export function buildCommentBody({ apiPricing, @@ -64,36 +89,48 @@ export function buildCommentBody({ runData: RunCommentData; }): string { const existingRuns = existingBody ? parseExistingRuns(existingBody) : []; - const allRuns = [...existingRuns, runData]; - - const tableRows = allRuns - .map((run, i) => { - const icon = STATUS_EMOJI[run.status] ?? '❓'; - const model = run.model ?? '—'; - return `| ${i + 1} | ${run.workflowName} | ${model} | ${icon} | ${formatCost(run.totalCostCents)} | ${formatDuration(run.durationSeconds)} |`; - }) - .join('\n'); + // Newest first: current run at the top + const allRuns: Array> = [runData, ...existingRuns]; const totalCostCents = allRuns.reduce((sum, r) => sum + r.totalCostCents, 0); const totalRow = allRuns.length > 1 ? `| **Total** | | | | **${formatCost(totalCostCents)}** | |` : ''; - const latestRun = runData; - const tokenDetails = buildTokenDetails({ apiPricing, run: latestRun }); + const visibleRuns = allRuns.slice(0, VISIBLE_RUNS_LIMIT); + const hasMore = allRuns.length > VISIBLE_RUNS_LIMIT; + + const tokenDetails = buildTokenDetails({ apiPricing, run: runData }); - const lines = [ + const lines: string[] = [ COMMENT_MARKER, '## ⚡ AgentMeter', '', - '| # | Workflow | Model | Status | Cost | Duration |', - '|---|----------|-------|--------|------|----------|', - tableRows, + ...TABLE_HEADER, + ...buildTableRows({ runs: visibleRuns, startIndex: 1 }), ...(totalRow ? [totalRow] : []), '', - ...(tokenDetails ? [tokenDetails, ''] : []), - `[View in AgentMeter →](${latestRun.dashboardUrl})`, ]; + if (hasMore) { + lines.push( + '
', + `All ${allRuns.length} runs`, + '', + ...TABLE_HEADER, + ...buildTableRows({ runs: allRuns, startIndex: 1 }), + ...(totalRow ? [totalRow] : []), + '', + '
', + '', + ); + } + + if (tokenDetails) { + lines.push(tokenDetails, ''); + } + + lines.push(`[View in AgentMeter →](${runData.dashboardUrl})`); + return lines.join('\n'); } @@ -176,54 +213,68 @@ interface ParsedRun { turns: number | null; } +/** + * Parses raw table row strings from a Markdown table body (rows only, no header). + */ +function parseTableRows(rawRows: string): ParsedRun[] { + return rawRows + .trim() + .split('\n') + .filter((r) => r.startsWith('|') && !r.includes('**Total**')) + .map((row) => { + const cells = row + .split('|') + .map((c) => c.trim()) + .filter(Boolean); + if (cells.length < 5) return null; + + // Support both old (5-col) and new (6-col) format: + // Old: # | Workflow | Status | Cost | Duration + // New: # | Workflow | Model | Status | Cost | Duration + const hasModelCol = cells.length >= 6; + const workflowName = cells[1] ?? ''; + const model = hasModelCol && cells[2] && cells[2] !== '—' ? cells[2] : null; + const statusEmoji = (hasModelCol ? cells[3] : cells[2]) ?? ''; + const costStr = ((hasModelCol ? cells[4] : cells[3]) ?? '').replace(/[$*]/g, ''); + const totalCostCents = Math.round(parseFloat(costStr) * 100); + const durationSeconds = parseDuration((hasModelCol ? cells[5] : cells[4]) ?? ''); + const status = + Object.entries(STATUS_EMOJI).find(([, emoji]) => emoji === statusEmoji)?.[0] ?? 'other'; + + return { + workflowName, + status, + totalCostCents: Number.isNaN(totalCostCents) ? 0 : totalCostCents, + durationSeconds, + dashboardUrl: '', + model, + turns: null, + } satisfies ParsedRun; + }) + .filter((r): r is NonNullable => r !== null); +} + /** * Parses run rows out of an existing AgentMeter comment body. - * Returns an empty array if parsing fails or comment is malformed. + * Prefers the "All N runs" collapsible section when present (contains the full history), + * falling back to the main table otherwise. + * Returns an empty array if parsing fails or the comment is malformed. */ function parseExistingRuns(body: string): ParsedRun[] { try { + // When >5 runs exist the full history lives in the collapsible — prefer that + const detailsMatch = body.match( + /All \d+ runs<\/summary>\n\n([\s\S]+?)\n\n<\/details>/, + ); + if (detailsMatch?.[1]) { + const tableMatch = detailsMatch[1].match(/\| #.*?\n\|[-|: ]+\n((?:\|.*?\n)*)/s); + if (tableMatch?.[1]) return parseTableRows(tableMatch[1]); + } + + // Fall back to the main (potentially truncated) table const tableMatch = body.match(/\| #.*?\n\|[-|: ]+\n((?:\|.*?\n)*)/s); if (!tableMatch?.[1]) return []; - - const rows = tableMatch[1] - .trim() - .split('\n') - .filter((r) => r.startsWith('|') && !r.includes('**Total**')); - - return rows - .map((row) => { - const cells = row - .split('|') - .map((c) => c.trim()) - .filter(Boolean); - if (cells.length < 5) return null; - - // Support both old (5-col) and new (6-col) format: - // Old: # | Workflow | Status | Cost | Duration - // New: # | Workflow | Model | Status | Cost | Duration - const hasModelCol = cells.length >= 6; - const workflowName = cells[1] ?? ''; - const model = hasModelCol && cells[2] && cells[2] !== '—' ? cells[2] : null; - const statusEmoji = (hasModelCol ? cells[3] : cells[2]) ?? ''; - const costStr = ((hasModelCol ? cells[4] : cells[3]) ?? '').replace(/[$*]/g, ''); - const totalCostCents = Math.round(parseFloat(costStr) * 100); - const durationSeconds = parseDuration((hasModelCol ? cells[5] : cells[4]) ?? ''); - - const status = - Object.entries(STATUS_EMOJI).find(([, emoji]) => emoji === statusEmoji)?.[0] ?? 'other'; - - const parsed: ParsedRun = { - workflowName, - status, - totalCostCents: Number.isNaN(totalCostCents) ? 0 : totalCostCents, - durationSeconds, - dashboardUrl: '', - model, - turns: null, - }; - return parsed; - }) - .filter((r): r is NonNullable => r !== null) as ParsedRun[]; + return parseTableRows(tableMatch[1]); } catch { return []; } diff --git a/src/run.ts b/src/run.ts index d76c998..5ce90f5 100644 --- a/src/run.ts +++ b/src/run.ts @@ -12,7 +12,11 @@ import { resolveWorkflowRun } from './workflow-run'; * Builds a human-readable trigger ref string from a number and event name. */ function buildTriggerRef(number: number, eventName: string): string { - if (eventName === 'pull_request' || eventName === 'pull_request_review_comment') { + if ( + eventName === 'pull_request' || + eventName === 'pull_request_review_comment' || + eventName === 'pr_comment' + ) { return `PR #${number}`; } return `#${number}`; @@ -34,7 +38,7 @@ export async function run(): Promise { // When workflow_run_id is provided, resolve all workflow-run data automatically: // timestamps, trigger number, and agent-tokens artifact. This removes the need // for manual pre-steps in the caller's companion workflow. - let workflowRunTokens: ReturnType; + let workflowRunTokens: ReturnType = undefined; let resolvedTriggerNumber = inputs.triggerNumber ?? ctx.triggerNumber; let resolvedTriggerEvent = inputs.triggerEvent || ctx.triggerType; let resolvedStartedAt = inputs.startedAt || selfStartedAt; diff --git a/src/token-extractor.ts b/src/token-extractor.ts index b97e8e0..1677e5f 100644 --- a/src/token-extractor.ts +++ b/src/token-extractor.ts @@ -64,7 +64,7 @@ function tryExtractFromJson( * cached_input_tokens → cacheReadTokens */ function tryExtractFromCodexExecJsonl( - agentOutput: string, + agentOutput: string ): { tokens: TokenCounts; isApproximate: boolean } | null { const lines = agentOutput.split('\n'); let inputTokens = 0; @@ -78,13 +78,11 @@ function tryExtractFromCodexExecJsonl( try { const parsed = JSON.parse(trimmed) as unknown; const obj = - typeof parsed === 'object' && parsed !== null - ? (parsed as Record) - : null; + typeof parsed === 'object' && parsed !== null ? (parsed as Record) : null; if (obj?.['type'] !== 'turn.completed') continue; const usage = obj['usage']; if (typeof usage !== 'object' || usage === null) continue; - const u = usage as (CodexExecTurnCompleted)['usage']; + const u = usage as CodexExecTurnCompleted['usage']; inputTokens += u.input_tokens ?? 0; outputTokens += u.output_tokens ?? 0; cacheReadTokens += u.cached_input_tokens ?? 0; From d510c0ee120be9b460f54b603e81927b9db5762e Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 22:21:49 -0400 Subject: [PATCH 11/13] =?UTF-8?q?fix:=20address=20code=20review=20?= =?UTF-8?q?=E2=80=94=20lint,=20pagination,=20PR=20lookup,=20trigger=20ref?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - fix(comment): extract RunRow type alias to satisfy Biome line-length - fix(comment): use octokit.paginate for comment lookup so existing AgentMeter comment is found even beyond page 1 on busy PRs/issues - fix(workflow-run): change PR lookup state: 'open' → 'all' so merged or closed PRs are resolved correctly in companion workflow mode - fix(workflow-run): default missing numeric fields to 0 in parseAgentTokensZip instead of letting undefined propagate to ingest - fix(run): expand buildTriggerRef to cover pr_opened, pr_synchronize, pr_reopened so inline PR runs always render "PR #N" not "#N" - fix(workflow): use mtime-based rollout file selection with -printf instead of lexicographic sort for more robust latest-file detection Made-with: Cursor --- .github/workflows/agent-review-codex.yml | 2 +- src/comment.ts | 19 ++++++++++++------- src/run.ts | 20 +++++++++++--------- src/workflow-run.ts | 11 +++++++++-- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml index ca81ba5..b0e5ed6 100644 --- a/.github/workflows/agent-review-codex.yml +++ b/.github/workflows/agent-review-codex.yml @@ -45,7 +45,7 @@ jobs: id: codex-tokens if: always() run: | - rollout=$(find /tmp/codex-home/sessions -name "rollout-*.jsonl" 2>/dev/null | sort | tail -1) + rollout=$(find /tmp/codex-home/sessions -name "rollout-*.jsonl" 2>/dev/null -printf "%T@ %p\n" | sort -rn | head -1 | cut -d' ' -f2-) if [ -z "$rollout" ]; then echo "No rollout JSONL found — token counts unavailable" echo "input_tokens=" >> "$GITHUB_OUTPUT" diff --git a/src/comment.ts b/src/comment.ts index b36d8bc..88106fb 100644 --- a/src/comment.ts +++ b/src/comment.ts @@ -54,13 +54,19 @@ const TABLE_HEADER = [ const VISIBLE_RUNS_LIMIT = 5; +/** Minimal run fields needed to render a table row */ +type RunRow = Pick< + RunCommentData, + 'durationSeconds' | 'model' | 'status' | 'totalCostCents' | 'workflowName' +>; + /** Builds table row strings for a slice of runs, numbered from startIndex. */ function buildTableRows({ runs, startIndex, }: { /** Runs to render */ - runs: Array>; + runs: RunRow[]; /** 1-based row number for the first run */ startIndex: number; }): string[] { @@ -90,7 +96,7 @@ export function buildCommentBody({ }): string { const existingRuns = existingBody ? parseExistingRuns(existingBody) : []; // Newest first: current run at the top - const allRuns: Array> = [runData, ...existingRuns]; + const allRuns: RunRow[] = [runData, ...existingRuns]; const totalCostCents = allRuns.reduce((sum, r) => sum + r.totalCostCents, 0); const totalRow = @@ -121,7 +127,7 @@ export function buildCommentBody({ ...(totalRow ? [totalRow] : []), '', '
', - '', + '' ); } @@ -264,7 +270,7 @@ function parseExistingRuns(body: string): ParsedRun[] { try { // When >5 runs exist the full history lives in the collapsible — prefer that const detailsMatch = body.match( - /All \d+ runs<\/summary>\n\n([\s\S]+?)\n\n<\/details>/, + /All \d+ runs<\/summary>\n\n([\s\S]+?)\n\n<\/details>/ ); if (detailsMatch?.[1]) { const tableMatch = detailsMatch[1].match(/\| #.*?\n\|[-|: ]+\n((?:\|.*?\n)*)/s); @@ -296,9 +302,8 @@ async function findExistingComment({ issueOrPrNumber: number; }): Promise<{ id: number; body: string } | null> { try { - const { data: comments } = await ( - octokit as ReturnType - ).rest.issues.listComments({ + const gh = octokit as ReturnType; + const comments = await gh.paginate(gh.rest.issues.listComments, { owner, repo, issue_number: issueOrPrNumber, diff --git a/src/run.ts b/src/run.ts index 5ce90f5..bf1151d 100644 --- a/src/run.ts +++ b/src/run.ts @@ -12,14 +12,16 @@ import { resolveWorkflowRun } from './workflow-run'; * Builds a human-readable trigger ref string from a number and event name. */ function buildTriggerRef(number: number, eventName: string): string { - if ( - eventName === 'pull_request' || - eventName === 'pull_request_review_comment' || - eventName === 'pr_comment' - ) { - return `PR #${number}`; - } - return `#${number}`; + // Covers both raw GitHub event names and the mapped triggerType values from context.ts + const prEvents = new Set([ + 'pull_request', + 'pull_request_review_comment', + 'pr_comment', + 'pr_opened', + 'pr_synchronize', + 'pr_reopened', + ]); + return prEvents.has(eventName) ? `PR #${number}` : `#${number}`; } /** @@ -38,7 +40,7 @@ export async function run(): Promise { // When workflow_run_id is provided, resolve all workflow-run data automatically: // timestamps, trigger number, and agent-tokens artifact. This removes the need // for manual pre-steps in the caller's companion workflow. - let workflowRunTokens: ReturnType = undefined; + let workflowRunTokens: ReturnType; let resolvedTriggerNumber = inputs.triggerNumber ?? ctx.triggerNumber; let resolvedTriggerEvent = inputs.triggerEvent || ctx.triggerType; let resolvedStartedAt = inputs.startedAt || selfStartedAt; diff --git a/src/workflow-run.ts b/src/workflow-run.ts index 588a0b1..1c72844 100644 --- a/src/workflow-run.ts +++ b/src/workflow-run.ts @@ -278,7 +278,7 @@ async function resolveTrigger({ owner, repo, head: `${owner}:${headBranch}`, - state: 'open', + state: 'all', per_page: 1, }); if (prs[0]) { @@ -375,7 +375,14 @@ async function parseAgentTokensZip(zipData: ArrayBuffer): Promise Date: Wed, 18 Mar 2026 22:26:21 -0400 Subject: [PATCH 12/13] fix: token precedence and issue_comment triggerRef labeling - Split resolveTokens calls in run.ts so workflow_run artifact tokens win over agent_output stdout extraction (matching documented priority) - Prefer ctx.triggerRef in all inline-run cases so plain issue comments are labeled #N instead of PR #N; buildTriggerRef only fires for companion workflow_run mode where ctx.triggerRef is null - Fix buildTableRows params to alphabetical order per code style rules Made-with: Cursor --- src/comment.ts | 4 ++-- src/run.ts | 24 ++++++++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/comment.ts b/src/comment.ts index 88106fb..be0f000 100644 --- a/src/comment.ts +++ b/src/comment.ts @@ -65,10 +65,10 @@ function buildTableRows({ runs, startIndex, }: { - /** Runs to render */ - runs: RunRow[]; /** 1-based row number for the first run */ startIndex: number; + /** Runs to render */ + runs: RunRow[]; }): string[] { return runs.map((run, i) => { const icon = STATUS_EMOJI[run.status] ?? '❓'; diff --git a/src/run.ts b/src/run.ts index bf1151d..820062b 100644 --- a/src/run.ts +++ b/src/run.ts @@ -79,20 +79,32 @@ export async function run(): Promise { } } - // Token resolution priority: explicit inputs > workflow_run artifact > agent_output extraction + // Token resolution priority: explicit inputs > workflow_run artifact > agent_output extraction. + // Split into two resolveTokens calls so the artifact wins over stdout extraction. const tokens = resolveTokens({ - agentOutput: inputs.agentOutput, + agentOutput: '', inputTokensOverride: inputs.inputTokens, outputTokensOverride: inputs.outputTokens, cacheReadTokensOverride: inputs.cacheReadTokens, cacheWriteTokensOverride: inputs.cacheWriteTokens, - }) ?? workflowRunTokens; - + }) ?? + workflowRunTokens ?? + resolveTokens({ + agentOutput: inputs.agentOutput, + inputTokensOverride: null, + outputTokensOverride: null, + cacheReadTokensOverride: null, + cacheWriteTokensOverride: null, + }); + + // Prefer ctx.triggerRef (correctly set for inline runs including issue vs PR distinction). + // Fall back to buildTriggerRef only for companion workflow_run mode where ctx.triggerRef is null. const triggerRef = - resolvedTriggerNumber !== null + ctx.triggerRef ?? + (resolvedTriggerNumber !== null ? buildTriggerRef(resolvedTriggerNumber, resolvedTriggerEvent) - : ctx.triggerRef; + : null); const triggerType = resolvedTriggerEvent || ctx.triggerType || 'other'; From f2e75a7301b59f7af440527dd5d6069028e715db Mon Sep 17 00:00:00 2001 From: adamhenson Date: Wed, 18 Mar 2026 22:32:22 -0400 Subject: [PATCH 13/13] fix: object params on buildTriggerRef, revert PR lookup to state open MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Convert buildTriggerRef to object params with alphabetical ordering per code style rules - Revert pull_requests[] fallback lookup to state: 'open' — the fallback is only for recovering active runs with missing pull_requests[]; state: 'all' risks matching stale PRs on reused branches Made-with: Cursor --- src/run.ts | 14 +++++++++++--- src/workflow-run.ts | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/run.ts b/src/run.ts index 820062b..127bcdf 100644 --- a/src/run.ts +++ b/src/run.ts @@ -10,9 +10,17 @@ import { resolveWorkflowRun } from './workflow-run'; /** * Builds a human-readable trigger ref string from a number and event name. + * Covers both raw GitHub event names and the mapped triggerType values from context.ts. */ -function buildTriggerRef(number: number, eventName: string): string { - // Covers both raw GitHub event names and the mapped triggerType values from context.ts +function buildTriggerRef({ + eventName, + number, +}: { + /** Raw GitHub event name or mapped triggerType from context.ts */ + eventName: string; + /** PR or issue number */ + number: number; +}): string { const prEvents = new Set([ 'pull_request', 'pull_request_review_comment', @@ -103,7 +111,7 @@ export async function run(): Promise { const triggerRef = ctx.triggerRef ?? (resolvedTriggerNumber !== null - ? buildTriggerRef(resolvedTriggerNumber, resolvedTriggerEvent) + ? buildTriggerRef({ eventName: resolvedTriggerEvent, number: resolvedTriggerNumber }) : null); const triggerType = resolvedTriggerEvent || ctx.triggerType || 'other'; diff --git a/src/workflow-run.ts b/src/workflow-run.ts index 1c72844..2443a82 100644 --- a/src/workflow-run.ts +++ b/src/workflow-run.ts @@ -278,8 +278,8 @@ async function resolveTrigger({ owner, repo, head: `${owner}:${headBranch}`, - state: 'all', per_page: 1, + state: 'open', }); if (prs[0]) { return { triggerNumber: prs[0].number, triggerEvent: 'pull_request' };