diff --git a/.github/workflows/agent-review-codex.yml b/.github/workflows/agent-review-codex.yml index a85436a..b0e5ed6 100644 --- a/.github/workflows/agent-review-codex.yml +++ b/.github/workflows/agent-review-codex.yml @@ -39,7 +39,32 @@ jobs: prompt-file: .github/codex/prompts/review.md model: ${{ vars.GH_AW_MODEL_AGENT_CODEX || 'gpt-5.4-mini' }} sandbox: workspace-write - output-file: /tmp/codex-output.md + codex-home: /tmp/codex-home + + - name: Extract Codex token usage + id: codex-tokens + if: always() + run: | + rollout=$(find /tmp/codex-home/sessions -name "rollout-*.jsonl" 2>/dev/null -printf "%T@ %p\n" | sort -rn | head -1 | cut -d' ' -f2-) + if [ -z "$rollout" ]; then + echo "No rollout JSONL found — token counts unavailable" + echo "input_tokens=" >> "$GITHUB_OUTPUT" + echo "output_tokens=" >> "$GITHUB_OUTPUT" + echo "cache_read_tokens=" >> "$GITHUB_OUTPUT" + exit 0 + fi + echo "Parsing rollout: $rollout" + token_line=$(grep '"token_count"' "$rollout" | tail -1) + if [ -z "$token_line" ]; then + echo "No token_count event in rollout — token counts unavailable" + echo "input_tokens=" >> "$GITHUB_OUTPUT" + echo "output_tokens=" >> "$GITHUB_OUTPUT" + echo "cache_read_tokens=" >> "$GITHUB_OUTPUT" + exit 0 + fi + echo "input_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.input_tokens // empty')" >> "$GITHUB_OUTPUT" + echo "output_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.output_tokens // empty')" >> "$GITHUB_OUTPUT" + echo "cache_read_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.cached_input_tokens // empty')" >> "$GITHUB_OUTPUT" - name: Post review comment if: steps.codex.outputs.final-message != '' @@ -56,45 +81,6 @@ jobs: env: CODEX_REVIEW: ${{ steps.codex.outputs.final-message }} - - name: Extract Codex token usage - id: extract_tokens - if: always() - env: - CODEX_HOME: /home/runner/.codex - run: | - echo "=== CODEX_HOME contents ===" - find "$CODEX_HOME" -type f 2>/dev/null || echo "(empty or missing)" - echo "=== end ===" - - # Try session files in CODEX_HOME/sessions/ first, then ~/.codex/sessions/ - SESSION_FILE=$(ls -t "$CODEX_HOME"/sessions/*.jsonl 2>/dev/null | head -1 || \ - ls -t ~/.codex/sessions/*.jsonl 2>/dev/null | head -1 || true) - - echo "SESSION_FILE=$SESSION_FILE" - - if [ -n "$SESSION_FILE" ] && [ -f "$SESSION_FILE" ]; then - echo "=== last 5 lines of session file ===" - tail -5 "$SESSION_FILE" - echo "=== end ===" - TOKEN_LINE=$(grep '"token_count"' "$SESSION_FILE" 2>/dev/null | tail -1 || true) - if [ -n "$TOKEN_LINE" ]; then - INPUT=$(echo "$TOKEN_LINE" | jq -r '.payload.info.total_token_usage.input_tokens // 0') - OUTPUT=$(echo "$TOKEN_LINE" | jq -r '.payload.info.total_token_usage.output_tokens // 0') - CACHE_READ=$(echo "$TOKEN_LINE" | jq -r '.payload.info.total_token_usage.cached_input_tokens // 0') - else - echo "No token_count event found in session file" - INPUT=0; OUTPUT=0; CACHE_READ=0 - fi - else - echo "No session file found" - INPUT=0; OUTPUT=0; CACHE_READ=0 - fi - { - echo "input_tokens=$INPUT" - echo "output_tokens=$OUTPUT" - echo "cache_read_tokens=$CACHE_READ" - } >> "$GITHUB_OUTPUT" - - name: Track with AgentMeter if: always() uses: foo-software/agentmeter-action@main @@ -104,8 +90,8 @@ jobs: engine: codex model: ${{ vars.GH_AW_MODEL_AGENT_CODEX || 'gpt-5.4-mini' }} status: ${{ job.status == 'success' && 'success' || 'failed' }} - input_tokens: ${{ steps.extract_tokens.outputs.input_tokens }} - output_tokens: ${{ steps.extract_tokens.outputs.output_tokens }} - cache_read_tokens: ${{ steps.extract_tokens.outputs.cache_read_tokens }} started_at: ${{ steps.timer.outputs.started_at }} post_comment: 'true' + input_tokens: ${{ steps.codex-tokens.outputs.input_tokens }} + output_tokens: ${{ steps.codex-tokens.outputs.output_tokens }} + cache_read_tokens: ${{ steps.codex-tokens.outputs.cache_read_tokens }} diff --git a/.github/workflows/codex-compat-check.yml b/.github/workflows/codex-compat-check.yml new file mode 100644 index 0000000..3a96c23 --- /dev/null +++ b/.github/workflows/codex-compat-check.yml @@ -0,0 +1,112 @@ +name: "Codex: Rollout JSONL Compatibility Check" + +on: + schedule: + - cron: "0 6 * * *" + workflow_dispatch: + +jobs: + verify: + runs-on: ubuntu-latest + permissions: + contents: read + issues: write + steps: + - name: Run minimal Codex exec + id: codex + uses: openai/codex-action@v1 + with: + openai-api-key: ${{ secrets.OPENAI_API_KEY }} + prompt: "Reply with only the single word: hello" + model: gpt-5.4-mini + sandbox: read-only + codex-home: /tmp/codex-check + + - name: Verify rollout JSONL structure + run: | + rollout=$(find /tmp/codex-check/sessions -name "rollout-*.jsonl" 2>/dev/null | sort | tail -1) + + if [ -z "$rollout" ]; then + echo "::error::No rollout JSONL found — codex may have changed its session file layout" + echo "Contents of /tmp/codex-check:" + find /tmp/codex-check -type f 2>/dev/null || echo "(empty)" + exit 1 + fi + + echo "Found rollout: $rollout" + + token_line=$(grep '"token_count"' "$rollout" | tail -1) + + if [ -z "$token_line" ]; then + echo "::error::No token_count event in rollout JSONL — codex may have changed its event format" + echo "Rollout file contents (last 20 lines):" + tail -20 "$rollout" + exit 1 + fi + + input_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.input_tokens // empty') + + if [ -z "$input_tokens" ]; then + echo "::error::input_tokens field missing from total_token_usage — codex may have changed the token_count schema" + echo "token_count event: $token_line" + exit 1 + fi + + if ! [[ "$input_tokens" =~ ^[0-9]+$ ]] || [ "$input_tokens" -eq 0 ]; then + echo "::error::input_tokens is not a positive integer ($input_tokens) — something unexpected in the rollout" + echo "token_count event: $token_line" + exit 1 + fi + + output_tokens=$(echo "$token_line" | jq -r '.payload.info.total_token_usage.output_tokens // empty') + echo "✅ Rollout JSONL verified — input_tokens=$input_tokens output_tokens=$output_tokens" + + - name: Open issue on failure + if: failure() + uses: actions/github-script@v7 + with: + script: | + const date = new Date().toISOString().split('T')[0]; + const title = `⚠️ Codex rollout JSONL compat check failed (${date})`; + const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + + const { data: existing } = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: 'codex-compat', + }); + + if (existing.length > 0) { + console.log(`Open codex-compat issue already exists (#${existing[0].number}), skipping.`); + return; + } + + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title, + labels: ['codex-compat'], + body: [ + '## Codex rollout JSONL compatibility check failed', + '', + 'The nightly check that verifies `openai/codex-action` still writes token counts', + 'to the rollout JSONL file has failed. This likely means the `@openai/codex` CLI', + 'changed its internal session file format.', + '', + '**Impact:** Codex runs tracked by agentmeter-action will show `—` for cost instead', + 'of a real value until this is fixed.', + '', + `**Failed run:** ${runUrl}`, + '', + '## What to check', + '', + '1. Look at the failed step logs — it will say which assertion failed', + '2. Run `codex exec --ephemeral "say hello"` locally and inspect `~/.codex/sessions/`', + '3. If the format changed, update:', + ' - The `Extract Codex token usage` step in `.github/workflows/agent-review-codex.yml`', + ' - `tryExtractFromCodexJsonl()` in `src/token-extractor.ts`', + ' - `CodexTokenEvent` in `src/types.ts`', + ' - `docs/challenges.md` section 6', + ].join('\n'), + }); diff --git a/__tests__/comment.test.ts b/__tests__/comment.test.ts index 13b4755..7ae2680 100644 --- a/__tests__/comment.test.ts +++ b/__tests__/comment.test.ts @@ -196,6 +196,58 @@ describe('buildCommentBody', () => { expect(updatedBody).toContain('$0.01'); }); + it('shows newest run first (row #1)', () => { + const firstBody = buildCommentBody({ + apiPricing: testPricing, + existingBody: null, + runData: { ...baseRun, workflowName: 'first-run' }, + }); + const secondBody = buildCommentBody({ + apiPricing: testPricing, + existingBody: firstBody, + runData: { ...baseRun, workflowName: 'second-run' }, + }); + const rows = secondBody.match(/\| \d+ \| .+? \|/g) ?? []; + expect(rows[0]).toContain('second-run'); + expect(rows[1]).toContain('first-run'); + }); + + it('shows all runs inline when count is at or below the limit', () => { + let body: string | null = null; + for (let i = 0; i < 5; i++) { + body = buildCommentBody({ + apiPricing: testPricing, + existingBody: body, + runData: { ...baseRun, workflowName: `run-${i}` }, + }); + } + // No "All N runs" collapsible should appear + expect(body).not.toContain('All 5 runs'); + expect(body).not.toContain('All 6 runs'); + }); + + it('shows only 5 most recent runs and adds collapsible when over limit', () => { + let body: string | null = null; + for (let i = 1; i <= 7; i++) { + body = buildCommentBody({ + apiPricing: testPricing, + existingBody: body, + runData: { ...baseRun, workflowName: `run-${i}` }, + }); + } + // Collapsible should exist + expect(body).toContain('All 7 runs'); + // Latest 5 visible in main table (runs 7, 6, 5, 4, 3) + const mainTableSection = body!.split('
')[0]; + expect(mainTableSection).toContain('run-7'); + expect(mainTableSection).toContain('run-3'); + expect(mainTableSection).not.toContain('run-2'); + expect(mainTableSection).not.toContain('run-1'); + // All runs present inside collapsible + expect(body).toContain('run-1'); + expect(body).toContain('run-2'); + }); + it('appends new run to existing comment and shows total', () => { const firstBody = buildCommentBody({ apiPricing: testPricing, diff --git a/__tests__/token-extractor.test.ts b/__tests__/token-extractor.test.ts index e3e3888..f76f2bb 100644 --- a/__tests__/token-extractor.test.ts +++ b/__tests__/token-extractor.test.ts @@ -130,6 +130,60 @@ describe('extractTokensFromOutput', () => { expect(extractTokensFromOutput(jsonlOutput)).toBeNull(); }); + it('parses codex exec --json turn.completed event', () => { + const jsonlOutput = [ + JSON.stringify({ type: 'thread.started', thread_id: 'abc' }), + JSON.stringify({ type: 'turn.started' }), + JSON.stringify({ + type: 'turn.completed', + usage: { input_tokens: 24763, cached_input_tokens: 24448, output_tokens: 122 }, + }), + ].join('\n'); + + const result = extractTokensFromOutput(jsonlOutput); + expect(result).not.toBeNull(); + expect(result!.tokens.inputTokens).toBe(24763); + expect(result!.tokens.outputTokens).toBe(122); + expect(result!.tokens.cacheReadTokens).toBe(24448); + expect(result!.tokens.cacheWriteTokens).toBe(0); + expect(result!.isApproximate).toBe(false); + }); + + it('sums multiple turn.completed events across turns', () => { + const jsonlOutput = [ + JSON.stringify({ + type: 'turn.completed', + usage: { input_tokens: 1000, cached_input_tokens: 800, output_tokens: 100 }, + }), + JSON.stringify({ + type: 'turn.completed', + usage: { input_tokens: 500, cached_input_tokens: 200, output_tokens: 50 }, + }), + ].join('\n'); + + const result = extractTokensFromOutput(jsonlOutput); + expect(result!.tokens.inputTokens).toBe(1500); + expect(result!.tokens.outputTokens).toBe(150); + expect(result!.tokens.cacheReadTokens).toBe(1000); + }); + + it('returns null for --json output with no turn.completed events', () => { + const jsonlOutput = [ + JSON.stringify({ type: 'thread.started', thread_id: 'abc' }), + JSON.stringify({ type: 'item.started', item: { type: 'command_execution' } }), + ].join('\n'); + expect(extractTokensFromOutput(jsonlOutput)).toBeNull(); + }); + + it('handles missing usage fields in turn.completed gracefully', () => { + const jsonlOutput = JSON.stringify({ type: 'turn.completed', usage: {} }); + const result = extractTokensFromOutput(jsonlOutput); + expect(result).not.toBeNull(); + expect(result!.tokens.inputTokens).toBe(0); + expect(result!.tokens.outputTokens).toBe(0); + expect(result!.tokens.cacheReadTokens).toBe(0); + }); + it('defaults missing cache fields to zero in JSON', () => { const output = JSON.stringify({ usage: { input_tokens: 100, output_tokens: 50 }, diff --git a/docs/challenges.md b/docs/challenges.md index 5ab1ac0..0cd5a12 100644 --- a/docs/challenges.md +++ b/docs/challenges.md @@ -94,6 +94,48 @@ If the user omits `if: always()` on the AgentMeter step, failed agent runs won't --- +### 6. Codex token counts rely on an internal rollout file format + +`codex exec` (via `openai/codex-action`) does not expose token usage through any documented public API. However, when running without `--ephemeral`, the Codex CLI writes a rollout JSONL file to: + +``` +$CODEX_HOME/sessions/YYYY/MM/DD/rollout--.jsonl +``` + +Each line is a JSON event. Token totals appear in `token_count` events: + +```json +{ + "type": "event_msg", + "payload": { + "type": "token_count", + "info": { + "total_token_usage": { + "input_tokens": 479565, + "output_tokens": 7489, + "cached_input_tokens": 444416 + } + }, + "rate_limits": null + } +} +``` + +The last `token_count` event in the file contains cumulative totals for the full run. + +**How the workflow extracts tokens:** + +1. Set `codex-home: /tmp/codex-home` on `openai/codex-action` so the rollout path is known +2. After the codex step, find the latest rollout file with `find /tmp/codex-home/sessions -name "rollout-*.jsonl" | sort | tail -1` +3. Grep for `"token_count"`, take the last line, extract fields with `jq` +4. Pass `input_tokens`, `output_tokens`, `cache_read_tokens` as explicit inputs to the AgentMeter step + +**Stability caveat:** The rollout format is an internal Codex CLI implementation detail, not a versioned public API. A future `@openai/codex` release could rename fields or restructure events. Since `codex-version` in `openai/codex-action` defaults to latest, this could silently break on a CLI upgrade. Failure is graceful — costs show as `—` if the rollout file is missing or unparseable. + +**Alternative path (`codex exec --json`):** Running with `--json` writes JSONL to stdout with `turn.completed` events containing a `usage` field. However, `openai/codex-action`'s `final-message` output reads from the output file, not stdout — so the JSONL stream is not accessible from within the action's step outputs. The `tryExtractFromCodexExecJsonl` function in `token-extractor.ts` handles this format for consumers who capture `codex exec --json` stdout directly. + +--- + ## What works regardless of mode - The action **never fails the workflow** — all errors are `core.warning()`, not `core.setFailed()`. @@ -123,4 +165,5 @@ If the user omits `if: always()` on the AgentMeter step, failed agent runs won't | Comment posting | ✅ | Upsert by marker, correct PR/issue number | | `GITHUB_TOKEN` availability | ✅ | `github_token` input with `default: ${{ github.token }}` | | Node.js version | ✅ | node24 | -| Pricing table | ✅ | Fetched from `/api/models/pricing`; built-in prefix fallback | +| Pricing table | ✅ | Fetched from `/api/models/pricing`; shows `—` if unreachable | +| Codex token counts | ✅ with caveat | Parsed from rollout JSONL at `$CODEX_HOME/sessions/YYYY/MM/DD/rollout-*.jsonl`. Works in production. Rollout format is internal (not a public API) — see section 6 below. | diff --git a/src/comment.ts b/src/comment.ts index da57dd8..be0f000 100644 --- a/src/comment.ts +++ b/src/comment.ts @@ -47,9 +47,40 @@ function formatNumber(n: number): string { return n.toLocaleString('en-US'); } +const TABLE_HEADER = [ + '| # | Workflow | Model | Status | Cost | Duration |', + '|---|----------|-------|--------|------|----------|', +]; + +const VISIBLE_RUNS_LIMIT = 5; + +/** Minimal run fields needed to render a table row */ +type RunRow = Pick< + RunCommentData, + 'durationSeconds' | 'model' | 'status' | 'totalCostCents' | 'workflowName' +>; + +/** Builds table row strings for a slice of runs, numbered from startIndex. */ +function buildTableRows({ + runs, + startIndex, +}: { + /** 1-based row number for the first run */ + startIndex: number; + /** Runs to render */ + runs: RunRow[]; +}): string[] { + return runs.map((run, i) => { + const icon = STATUS_EMOJI[run.status] ?? '❓'; + return `| ${startIndex + i} | ${run.workflowName} | ${run.model ?? '—'} | ${icon} | ${formatCost(run.totalCostCents)} | ${formatDuration(run.durationSeconds)} |`; + }); +} + /** * Builds the Markdown comment body for a PR/issue. - * Parses any existing comment to extract previous run rows and append the new one. + * Parses any existing comment to extract previous run rows and prepends the new one. + * Runs are shown newest-first. If there are more than 5 runs, only the 5 most recent + * are shown in the main table; a collapsible section shows all runs. */ export function buildCommentBody({ apiPricing, @@ -64,36 +95,48 @@ export function buildCommentBody({ runData: RunCommentData; }): string { const existingRuns = existingBody ? parseExistingRuns(existingBody) : []; - const allRuns = [...existingRuns, runData]; - - const tableRows = allRuns - .map((run, i) => { - const icon = STATUS_EMOJI[run.status] ?? '❓'; - const model = run.model ?? '—'; - return `| ${i + 1} | ${run.workflowName} | ${model} | ${icon} | ${formatCost(run.totalCostCents)} | ${formatDuration(run.durationSeconds)} |`; - }) - .join('\n'); + // Newest first: current run at the top + const allRuns: RunRow[] = [runData, ...existingRuns]; const totalCostCents = allRuns.reduce((sum, r) => sum + r.totalCostCents, 0); const totalRow = allRuns.length > 1 ? `| **Total** | | | | **${formatCost(totalCostCents)}** | |` : ''; - const latestRun = runData; - const tokenDetails = buildTokenDetails({ apiPricing, run: latestRun }); + const visibleRuns = allRuns.slice(0, VISIBLE_RUNS_LIMIT); + const hasMore = allRuns.length > VISIBLE_RUNS_LIMIT; - const lines = [ + const tokenDetails = buildTokenDetails({ apiPricing, run: runData }); + + const lines: string[] = [ COMMENT_MARKER, '## ⚡ AgentMeter', '', - '| # | Workflow | Model | Status | Cost | Duration |', - '|---|----------|-------|--------|------|----------|', - tableRows, + ...TABLE_HEADER, + ...buildTableRows({ runs: visibleRuns, startIndex: 1 }), ...(totalRow ? [totalRow] : []), '', - ...(tokenDetails ? [tokenDetails, ''] : []), - `[View in AgentMeter →](${latestRun.dashboardUrl})`, ]; + if (hasMore) { + lines.push( + '
', + `All ${allRuns.length} runs`, + '', + ...TABLE_HEADER, + ...buildTableRows({ runs: allRuns, startIndex: 1 }), + ...(totalRow ? [totalRow] : []), + '', + '
', + '' + ); + } + + if (tokenDetails) { + lines.push(tokenDetails, ''); + } + + lines.push(`[View in AgentMeter →](${runData.dashboardUrl})`); + return lines.join('\n'); } @@ -176,54 +219,68 @@ interface ParsedRun { turns: number | null; } +/** + * Parses raw table row strings from a Markdown table body (rows only, no header). + */ +function parseTableRows(rawRows: string): ParsedRun[] { + return rawRows + .trim() + .split('\n') + .filter((r) => r.startsWith('|') && !r.includes('**Total**')) + .map((row) => { + const cells = row + .split('|') + .map((c) => c.trim()) + .filter(Boolean); + if (cells.length < 5) return null; + + // Support both old (5-col) and new (6-col) format: + // Old: # | Workflow | Status | Cost | Duration + // New: # | Workflow | Model | Status | Cost | Duration + const hasModelCol = cells.length >= 6; + const workflowName = cells[1] ?? ''; + const model = hasModelCol && cells[2] && cells[2] !== '—' ? cells[2] : null; + const statusEmoji = (hasModelCol ? cells[3] : cells[2]) ?? ''; + const costStr = ((hasModelCol ? cells[4] : cells[3]) ?? '').replace(/[$*]/g, ''); + const totalCostCents = Math.round(parseFloat(costStr) * 100); + const durationSeconds = parseDuration((hasModelCol ? cells[5] : cells[4]) ?? ''); + const status = + Object.entries(STATUS_EMOJI).find(([, emoji]) => emoji === statusEmoji)?.[0] ?? 'other'; + + return { + workflowName, + status, + totalCostCents: Number.isNaN(totalCostCents) ? 0 : totalCostCents, + durationSeconds, + dashboardUrl: '', + model, + turns: null, + } satisfies ParsedRun; + }) + .filter((r): r is NonNullable => r !== null); +} + /** * Parses run rows out of an existing AgentMeter comment body. - * Returns an empty array if parsing fails or comment is malformed. + * Prefers the "All N runs" collapsible section when present (contains the full history), + * falling back to the main table otherwise. + * Returns an empty array if parsing fails or the comment is malformed. */ function parseExistingRuns(body: string): ParsedRun[] { try { + // When >5 runs exist the full history lives in the collapsible — prefer that + const detailsMatch = body.match( + /All \d+ runs<\/summary>\n\n([\s\S]+?)\n\n<\/details>/ + ); + if (detailsMatch?.[1]) { + const tableMatch = detailsMatch[1].match(/\| #.*?\n\|[-|: ]+\n((?:\|.*?\n)*)/s); + if (tableMatch?.[1]) return parseTableRows(tableMatch[1]); + } + + // Fall back to the main (potentially truncated) table const tableMatch = body.match(/\| #.*?\n\|[-|: ]+\n((?:\|.*?\n)*)/s); if (!tableMatch?.[1]) return []; - - const rows = tableMatch[1] - .trim() - .split('\n') - .filter((r) => r.startsWith('|') && !r.includes('**Total**')); - - return rows - .map((row) => { - const cells = row - .split('|') - .map((c) => c.trim()) - .filter(Boolean); - if (cells.length < 5) return null; - - // Support both old (5-col) and new (6-col) format: - // Old: # | Workflow | Status | Cost | Duration - // New: # | Workflow | Model | Status | Cost | Duration - const hasModelCol = cells.length >= 6; - const workflowName = cells[1] ?? ''; - const model = hasModelCol && cells[2] && cells[2] !== '—' ? cells[2] : null; - const statusEmoji = (hasModelCol ? cells[3] : cells[2]) ?? ''; - const costStr = ((hasModelCol ? cells[4] : cells[3]) ?? '').replace(/[$*]/g, ''); - const totalCostCents = Math.round(parseFloat(costStr) * 100); - const durationSeconds = parseDuration((hasModelCol ? cells[5] : cells[4]) ?? ''); - - const status = - Object.entries(STATUS_EMOJI).find(([, emoji]) => emoji === statusEmoji)?.[0] ?? 'other'; - - const parsed: ParsedRun = { - workflowName, - status, - totalCostCents: Number.isNaN(totalCostCents) ? 0 : totalCostCents, - durationSeconds, - dashboardUrl: '', - model, - turns: null, - }; - return parsed; - }) - .filter((r): r is NonNullable => r !== null) as ParsedRun[]; + return parseTableRows(tableMatch[1]); } catch { return []; } @@ -245,9 +302,8 @@ async function findExistingComment({ issueOrPrNumber: number; }): Promise<{ id: number; body: string } | null> { try { - const { data: comments } = await ( - octokit as ReturnType - ).rest.issues.listComments({ + const gh = octokit as ReturnType; + const comments = await gh.paginate(gh.rest.issues.listComments, { owner, repo, issue_number: issueOrPrNumber, diff --git a/src/pricing.ts b/src/pricing.ts index ec292e1..e137f65 100644 --- a/src/pricing.ts +++ b/src/pricing.ts @@ -71,9 +71,7 @@ export async function fetchPricing({ cacheReadPer1M: entry.cacheReadPerMillionTokens ?? 0, }; } - core.info( - `AgentMeter: fetched pricing for ${Object.keys(result).length} models: ${Object.keys(result).join(', ')}` - ); + core.info(`AgentMeter: fetched pricing for ${Object.keys(result).length} models.`); return result; } catch (error) { core.info(`AgentMeter: could not fetch pricing from API (${error}) — cost will show as —.`); diff --git a/src/run.ts b/src/run.ts index d76c998..127bcdf 100644 --- a/src/run.ts +++ b/src/run.ts @@ -10,12 +10,26 @@ import { resolveWorkflowRun } from './workflow-run'; /** * Builds a human-readable trigger ref string from a number and event name. + * Covers both raw GitHub event names and the mapped triggerType values from context.ts. */ -function buildTriggerRef(number: number, eventName: string): string { - if (eventName === 'pull_request' || eventName === 'pull_request_review_comment') { - return `PR #${number}`; - } - return `#${number}`; +function buildTriggerRef({ + eventName, + number, +}: { + /** Raw GitHub event name or mapped triggerType from context.ts */ + eventName: string; + /** PR or issue number */ + number: number; +}): string { + const prEvents = new Set([ + 'pull_request', + 'pull_request_review_comment', + 'pr_comment', + 'pr_opened', + 'pr_synchronize', + 'pr_reopened', + ]); + return prEvents.has(eventName) ? `PR #${number}` : `#${number}`; } /** @@ -73,20 +87,32 @@ export async function run(): Promise { } } - // Token resolution priority: explicit inputs > workflow_run artifact > agent_output extraction + // Token resolution priority: explicit inputs > workflow_run artifact > agent_output extraction. + // Split into two resolveTokens calls so the artifact wins over stdout extraction. const tokens = resolveTokens({ - agentOutput: inputs.agentOutput, + agentOutput: '', inputTokensOverride: inputs.inputTokens, outputTokensOverride: inputs.outputTokens, cacheReadTokensOverride: inputs.cacheReadTokens, cacheWriteTokensOverride: inputs.cacheWriteTokens, - }) ?? workflowRunTokens; - + }) ?? + workflowRunTokens ?? + resolveTokens({ + agentOutput: inputs.agentOutput, + inputTokensOverride: null, + outputTokensOverride: null, + cacheReadTokensOverride: null, + cacheWriteTokensOverride: null, + }); + + // Prefer ctx.triggerRef (correctly set for inline runs including issue vs PR distinction). + // Fall back to buildTriggerRef only for companion workflow_run mode where ctx.triggerRef is null. const triggerRef = - resolvedTriggerNumber !== null - ? buildTriggerRef(resolvedTriggerNumber, resolvedTriggerEvent) - : ctx.triggerRef; + ctx.triggerRef ?? + (resolvedTriggerNumber !== null + ? buildTriggerRef({ eventName: resolvedTriggerEvent, number: resolvedTriggerNumber }) + : null); const triggerType = resolvedTriggerEvent || ctx.triggerType || 'other'; diff --git a/src/token-extractor.ts b/src/token-extractor.ts index f43fefb..1677e5f 100644 --- a/src/token-extractor.ts +++ b/src/token-extractor.ts @@ -1,4 +1,10 @@ -import type { ClaudeCodeOutput, CodexTokenEvent, TokenCounts, TokenCountsWithMeta } from './types'; +import type { + ClaudeCodeOutput, + CodexExecTurnCompleted, + CodexTokenEvent, + TokenCounts, + TokenCountsWithMeta, +} from './types'; /** * Attempts to extract token counts from agent stdout. @@ -13,6 +19,9 @@ export function extractTokensFromOutput( const jsonResult = tryExtractFromJson(agentOutput); if (jsonResult) return jsonResult; + const codexExecResult = tryExtractFromCodexExecJsonl(agentOutput); + if (codexExecResult) return codexExecResult; + const codexResult = tryExtractFromCodexJsonl(agentOutput); if (codexResult) return codexResult; @@ -45,6 +54,57 @@ function tryExtractFromJson( } } +/** + * Tries to extract token counts from `codex exec --json` stdout. + * Sums `usage` fields across all `turn.completed` events. + * + * Field mapping: + * input_tokens → inputTokens + * output_tokens → outputTokens + * cached_input_tokens → cacheReadTokens + */ +function tryExtractFromCodexExecJsonl( + agentOutput: string +): { tokens: TokenCounts; isApproximate: boolean } | null { + const lines = agentOutput.split('\n'); + let inputTokens = 0; + let outputTokens = 0; + let cacheReadTokens = 0; + let found = false; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed.includes('"turn.completed"')) continue; + try { + const parsed = JSON.parse(trimmed) as unknown; + const obj = + typeof parsed === 'object' && parsed !== null ? (parsed as Record) : null; + if (obj?.['type'] !== 'turn.completed') continue; + const usage = obj['usage']; + if (typeof usage !== 'object' || usage === null) continue; + const u = usage as CodexExecTurnCompleted['usage']; + inputTokens += u.input_tokens ?? 0; + outputTokens += u.output_tokens ?? 0; + cacheReadTokens += u.cached_input_tokens ?? 0; + found = true; + } catch { + // not valid JSON, skip line + } + } + + if (!found) return null; + + return { + tokens: { + inputTokens, + outputTokens, + cacheReadTokens, + cacheWriteTokens: 0, + }, + isApproximate: false, + }; +} + /** * Tries to extract token counts from Codex CLI JSONL streaming output. * Looks for `token_count` events emitted by `codex exec` and takes the last one, diff --git a/src/types.ts b/src/types.ts index a740a0c..4aae3b2 100644 --- a/src/types.ts +++ b/src/types.ts @@ -155,6 +155,7 @@ export interface AgentTokensArtifact { /** * A single JSONL event emitted by `codex exec` in streaming mode. * Token counts are found in `token_count` events. + * Written to the rollout JSONL file at `$CODEX_HOME/sessions/YYYY/MM/DD/rollout-*.jsonl`. */ export interface CodexTokenEvent { type: 'event_msg'; @@ -173,6 +174,22 @@ export interface CodexTokenEvent { }; } +/** + * A `turn.completed` event emitted to stdout when running `codex exec --json`. + * Sums all turns for total usage of a full `codex exec` run. + */ +export interface CodexExecTurnCompleted { + type: 'turn.completed'; + usage: { + /** Total input tokens sent (includes cached) */ + input_tokens?: number; + /** Output tokens generated */ + output_tokens?: number; + /** Input tokens served from cache (subset of input_tokens) */ + cached_input_tokens?: number; + }; +} + /** Known Claude Code JSON output structure (best-effort) */ export interface ClaudeCodeOutput { /** Top-level usage block */ diff --git a/src/workflow-run.ts b/src/workflow-run.ts index 588a0b1..2443a82 100644 --- a/src/workflow-run.ts +++ b/src/workflow-run.ts @@ -278,8 +278,8 @@ async function resolveTrigger({ owner, repo, head: `${owner}:${headBranch}`, - state: 'open', per_page: 1, + state: 'open', }); if (prs[0]) { return { triggerNumber: prs[0].number, triggerEvent: 'pull_request' }; @@ -375,7 +375,14 @@ async function parseAgentTokensZip(zipData: ArrayBuffer): Promise