From 0ea2e07ebd6b8d51be9062e225206c5271e10df2 Mon Sep 17 00:00:00 2001 From: Michael Chapman Date: Mon, 1 Jun 2026 11:23:41 -0500 Subject: [PATCH 1/5] feat(parser): capture outputTokens from Copilot assistant.message events Each assistant.message event in the Copilot JSONL format contains an outputTokens field. Wire it up to ParsedMessage.OutputTokens / HasOutputTokens and call accumulateMessageTokenUsage so the session-level TotalOutputTokens and HasTotalOutputTokens are populated. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- internal/parser/copilot.go | 25 ++++++++++++++-------- internal/parser/copilot_test.go | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/internal/parser/copilot.go b/internal/parser/copilot.go index de3c0dab3..cb13fba86 100644 --- a/internal/parser/copilot.go +++ b/internal/parser/copilot.go @@ -176,16 +176,21 @@ func (b *copilotSessionBuilder) handleAssistantMessage( return } + outputTokens := int(data.Get("outputTokens").Int()) + hasOutputTokens := data.Get("outputTokens").Exists() + b.messages = append(b.messages, ParsedMessage{ - Ordinal: b.ordinal, - Role: RoleAssistant, - Content: displayContent, - Timestamp: ts, - HasThinking: hasThinking, - HasToolUse: hasToolUse, - ContentLength: len(displayContent), - ToolCalls: toolCalls, - Model: b.currentModel, + Ordinal: b.ordinal, + Role: RoleAssistant, + Content: displayContent, + Timestamp: ts, + HasThinking: hasThinking, + HasToolUse: hasToolUse, + ContentLength: len(displayContent), + ToolCalls: toolCalls, + Model: b.currentModel, + OutputTokens: outputTokens, + HasOutputTokens: hasOutputTokens, }) b.ordinal++ } @@ -360,6 +365,8 @@ func ParseCopilotSession( }, } + accumulateMessageTokenUsage(sess, b.messages) + return sess, b.messages, nil } diff --git a/internal/parser/copilot_test.go b/internal/parser/copilot_test.go index 1fd97e3a0..5158ddfbd 100644 --- a/internal/parser/copilot_test.go +++ b/internal/parser/copilot_test.go @@ -523,3 +523,40 @@ func TestSessionIDFromPath(t *testing.T) { }) } } + +func TestParseCopilotSession_OutputTokens(t *testing.T) { + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"tok-test","context":{"cwd":"/home/alice/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi there.","outputTokens":120},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"user.message","data":{"content":"How are you?"},"timestamp":"2025-01-15T10:00:03Z"}`, + `{"type":"assistant.message","data":{"content":"I am fine.","outputTokens":85},"timestamp":"2025-01-15T10:00:04Z"}`, + ) + + sess, msgs := parseAndValidateHelper(t, path, "m", 4) + + // Session total should be sum of both assistant messages. + assert.True(t, sess.HasTotalOutputTokens, "HasTotalOutputTokens") + assert.Equal(t, 205, sess.TotalOutputTokens, "TotalOutputTokens") + + // Per-message token presence. + assert.True(t, msgs[1].HasOutputTokens, "msgs[1].HasOutputTokens") + assert.Equal(t, 120, msgs[1].OutputTokens, "msgs[1].OutputTokens") + assert.True(t, msgs[3].HasOutputTokens, "msgs[3].HasOutputTokens") + assert.Equal(t, 85, msgs[3].OutputTokens, "msgs[3].OutputTokens") +} + +func TestParseCopilotSession_OutputTokens_Missing(t *testing.T) { + // When outputTokens is absent, HasOutputTokens must be false. + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"no-tok","context":{"cwd":"/home/alice/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + ) + + sess, msgs := parseAndValidateHelper(t, path, "m", 2) + + assert.False(t, sess.HasTotalOutputTokens, "HasTotalOutputTokens should be false when field absent") + assert.Equal(t, 0, sess.TotalOutputTokens, "TotalOutputTokens should be zero") + assert.False(t, msgs[1].HasOutputTokens, "msgs[1].HasOutputTokens should be false") +} From 6285b8961cb2eb45f76e761a691dc4f12d13496e Mon Sep 17 00:00:00 2001 From: Michael Chapman Date: Mon, 1 Jun 2026 11:34:20 -0500 Subject: [PATCH 2/5] feat(parser): parse session.shutdown for Copilot input token usage The session.shutdown event in Copilot's events.jsonl contains a modelMetrics field with full per-model token accounting: inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens, and reasoningTokens. - Add copilotEventSessionShutdown constant and handleShutdown() method on the builder that emits one ParsedUsageEvent per model - Derive fresh input tokens as inputTokens - cacheReadTokens - cacheWriteTokens (the raw total includes cached tokens) - Skip fully-zero model entries - Change ParseCopilotSession to return []ParsedUsageEvent as a fourth return value; stamp SessionID and DedupKey after the qualified session ID is known - Wire UsageEvents into the sync engine ParseResult - Add four new tests covering the happy path, multi-model, zero-usage skipping, and the no-shutdown case Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- internal/parser/copilot.go | 67 +++++++++++++++++++++--- internal/parser/copilot_test.go | 92 +++++++++++++++++++++++++++++++-- internal/sync/engine.go | 4 +- 3 files changed, 150 insertions(+), 13 deletions(-) diff --git a/internal/parser/copilot.go b/internal/parser/copilot.go index cb13fba86..238d4594c 100644 --- a/internal/parser/copilot.go +++ b/internal/parser/copilot.go @@ -19,12 +19,14 @@ const ( copilotEventToolComplete = "tool.execution_complete" copilotEventAssistantReason = "assistant.reasoning" copilotEventModelChange = "session.model_change" + copilotEventSessionShutdown = "session.shutdown" ) // copilotSessionBuilder accumulates state while scanning a // Copilot JSONL session file line by line. type copilotSessionBuilder struct { messages []ParsedMessage + usageEvents []ParsedUsageEvent firstMessage string startedAt time.Time endedAt time.Time @@ -67,6 +69,8 @@ func (b *copilotSessionBuilder) processLine(line string) { if v := data.Get("newModel"); v.Exists() { b.currentModel = v.Str } + case copilotEventSessionShutdown: + b.handleShutdown(data, ts) } } @@ -235,6 +239,45 @@ func (b *copilotSessionBuilder) handleAssistantReasoning() { } } +// handleShutdown extracts per-model token usage from the +// session.shutdown event's modelMetrics field. +func (b *copilotSessionBuilder) handleShutdown( + data gjson.Result, ts time.Time, +) { + occurredAt := timeString(ts, b.startedAt) + data.Get("modelMetrics").ForEach( + func(modelKey, metrics gjson.Result) bool { + usage := metrics.Get("usage") + totalInput := int(usage.Get("inputTokens").Int()) + cacheRead := int(usage.Get("cacheReadTokens").Int()) + cacheWrite := int(usage.Get("cacheWriteTokens").Int()) + output := int(usage.Get("outputTokens").Int()) + reasoning := int(usage.Get("reasoningTokens").Int()) + + // Fresh input = total - cache_read - cache_write. + freshInput := max(totalInput-cacheRead-cacheWrite, 0) + + if freshInput == 0 && output == 0 && + cacheRead == 0 && cacheWrite == 0 && + reasoning == 0 { + return true + } + + b.usageEvents = append(b.usageEvents, ParsedUsageEvent{ + Source: "shutdown", + Model: modelKey.Str, + InputTokens: freshInput, + OutputTokens: output, + CacheCreationInputTokens: cacheWrite, + CacheReadInputTokens: cacheRead, + ReasoningTokens: reasoning, + OccurredAt: occurredAt, + }) + return true + }, + ) +} + func formatCopilotToolCalls( calls []ParsedToolCall, ) string { @@ -277,22 +320,22 @@ func readCopilotWorkspaceName(eventsPath string) string { } // ParseCopilotSession parses a Copilot JSONL session file. -// Returns (nil, nil, nil) if the file doesn't exist or +// Returns (nil, nil, nil, nil) if the file doesn't exist or // contains no user/assistant messages. func ParseCopilotSession( path, machine string, -) (*ParsedSession, []ParsedMessage, error) { +) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { info, err := os.Stat(path) if err != nil { if os.IsNotExist(err) { - return nil, nil, nil + return nil, nil, nil, nil } - return nil, nil, fmt.Errorf("stat %s: %w", path, err) + return nil, nil, nil, fmt.Errorf("stat %s: %w", path, err) } f, err := os.Open(path) if err != nil { - return nil, nil, fmt.Errorf("open %s: %w", path, err) + return nil, nil, nil, fmt.Errorf("open %s: %w", path, err) } defer f.Close() @@ -311,7 +354,7 @@ func ParseCopilotSession( } if err := lr.Err(); err != nil { - return nil, nil, + return nil, nil, nil, fmt.Errorf("reading copilot %s: %w", path, err) } @@ -324,7 +367,7 @@ func ParseCopilotSession( } } if !hasContent { - return nil, nil, nil + return nil, nil, nil, nil } sessionID := b.sessionID @@ -367,7 +410,15 @@ func ParseCopilotSession( accumulateMessageTokenUsage(sess, b.messages) - return sess, b.messages, nil + // Stamp the session ID on usage events (not known until here). + for i := range b.usageEvents { + b.usageEvents[i].SessionID = sessionID + // Rebuild DedupKey with the fully-qualified session ID. + b.usageEvents[i].DedupKey = "shutdown:" + sessionID + + ":" + b.usageEvents[i].Model + } + + return sess, b.messages, b.usageEvents, nil } // sessionIDFromPath extracts a session ID from a Copilot diff --git a/internal/parser/copilot_test.go b/internal/parser/copilot_test.go index 5158ddfbd..f4d40efb3 100644 --- a/internal/parser/copilot_test.go +++ b/internal/parser/copilot_test.go @@ -28,7 +28,7 @@ func writeCopilotJSONL( // parseAndValidateHelper parses the session and fails the test on basic errors. func parseAndValidateHelper(t *testing.T, path string, machine string, wantMsgs int) (*ParsedSession, []ParsedMessage) { t.Helper() - sess, msgs, err := ParseCopilotSession(path, machine) + sess, msgs, _, err := ParseCopilotSession(path, machine) require.NoError(t, err) require.NotNil(t, sess, "expected non-nil session") require.Len(t, msgs, wantMsgs) @@ -349,7 +349,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { `{"type":"session.start","data":{"sessionId":"empty"},"timestamp":"2025-01-15T10:00:00Z"}`, ) - sess, msgs, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := ParseCopilotSession(path, "m") require.NoError(t, err) assert.Nil(t, sess, "expected nil session for empty") assert.Nil(t, msgs, "expected nil messages for empty") @@ -358,7 +358,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { func TestParseCopilotSession_NonexistentFile(t *testing.T) { path := filepath.Join(t.TempDir(), "nonexistent.jsonl") - sess, msgs, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := ParseCopilotSession(path, "m") require.NoError(t, err, "expected nil error") assert.Nil(t, sess, "expected nil session for nonexistent file") assert.Nil(t, msgs, "expected nil messages for nonexistent file") @@ -560,3 +560,89 @@ func TestParseCopilotSession_OutputTokens_Missing(t *testing.T) { assert.Equal(t, 0, sess.TotalOutputTokens, "TotalOutputTokens should be zero") assert.False(t, msgs[1].HasOutputTokens, "msgs[1].HasOutputTokens should be false") } + +// parseCopilotFull calls ParseCopilotSession and returns all four values. +func parseCopilotFull( + t *testing.T, path, machine string, +) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent) { + t.Helper() + sess, msgs, usage, err := ParseCopilotSession(path, machine) + require.NoError(t, err) + return sess, msgs, usage +} + +func TestParseCopilotSession_ShutdownUsageEvents(t *testing.T) { + shutdownLine := `{"type":"session.shutdown","data":{"modelMetrics":{"claude-sonnet-4.6":{"usage":{"inputTokens":931647,"outputTokens":7150,"cacheReadTokens":873267,"cacheWriteTokens":51438,"reasoningTokens":432}}}},"timestamp":"2025-01-15T10:01:00Z"}` + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"shut-test","context":{"cwd":"/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + shutdownLine, + ) + + sess, _, usage := parseCopilotFull(t, path, "m") + require.NotNil(t, sess) + require.Len(t, usage, 1) + + u := usage[0] + assert.Equal(t, "copilot:shut-test", u.SessionID) + assert.Equal(t, "shutdown", u.Source) + assert.Equal(t, "claude-sonnet-4.6", u.Model) + // Fresh input = 931647 - 873267 - 51438 = 6942 + assert.Equal(t, 6942, u.InputTokens, "InputTokens should be fresh only") + assert.Equal(t, 7150, u.OutputTokens) + assert.Equal(t, 873267, u.CacheReadInputTokens) + assert.Equal(t, 51438, u.CacheCreationInputTokens) + assert.Equal(t, 432, u.ReasoningTokens) + assert.Equal(t, "shutdown:copilot:shut-test:claude-sonnet-4.6", u.DedupKey) +} + +func TestParseCopilotSession_ShutdownMultiModel(t *testing.T) { + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"multi-model","context":{"cwd":"/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"claude-sonnet-4.6":{"usage":{"inputTokens":100,"outputTokens":50,"cacheReadTokens":60,"cacheWriteTokens":10}},"claude-haiku-4.5":{"usage":{"inputTokens":200,"outputTokens":80,"cacheReadTokens":120,"cacheWriteTokens":20}}}},"timestamp":"2025-01-15T10:01:00Z"}`, + ) + + _, _, usage := parseCopilotFull(t, path, "m") + require.Len(t, usage, 2) + + byModel := make(map[string]ParsedUsageEvent) + for _, u := range usage { + byModel[u.Model] = u + } + + sonnet := byModel["claude-sonnet-4.6"] + // fresh = 100 - 60 - 10 = 30 + assert.Equal(t, 30, sonnet.InputTokens) + assert.Equal(t, 50, sonnet.OutputTokens) + + haiku := byModel["claude-haiku-4.5"] + // fresh = 200 - 120 - 20 = 60 + assert.Equal(t, 60, haiku.InputTokens) + assert.Equal(t, 80, haiku.OutputTokens) +} + +func TestParseCopilotSession_ShutdownZeroUsage_Skipped(t *testing.T) { + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"zero-use","context":{"cwd":"/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"claude-sonnet-4.6":{"usage":{"inputTokens":0,"outputTokens":0,"cacheReadTokens":0,"cacheWriteTokens":0,"reasoningTokens":0}}}},"timestamp":"2025-01-15T10:01:00Z"}`, + ) + + _, _, usage := parseCopilotFull(t, path, "m") + assert.Empty(t, usage, "zero-usage model entry should be skipped") +} + +func TestParseCopilotSession_NoShutdown_NoUsageEvents(t *testing.T) { + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"no-shut","context":{"cwd":"/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + ) + + _, _, usage := parseCopilotFull(t, path, "m") + assert.Empty(t, usage, "no shutdown event should produce no usage events") +} diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 3ddb21cbb..fa73f1761 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -3480,7 +3480,7 @@ func (e *Engine) processCopilot( return processResult{skip: true} } - sess, msgs, err := parser.ParseCopilotSession( + sess, msgs, usageEvents, err := parser.ParseCopilotSession( file.Path, e.machine, ) if err != nil { @@ -3501,7 +3501,7 @@ func (e *Engine) processCopilot( return processResult{ results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, + {Session: *sess, Messages: msgs, UsageEvents: usageEvents}, }, } } From f1049522e47f4ddb3c709f3584dcb38ea55b4ecf Mon Sep 17 00:00:00 2001 From: Michael Chapman Date: Mon, 1 Jun 2026 12:13:19 -0500 Subject: [PATCH 3/5] fix(copilot): normalize model names from dot to hyphen format Copilot events use dots in model version numbers (e.g. claude-sonnet-4.6) while the pricing catalog uses hyphens (claude-sonnet-4-6). The pricing lookup is an exact map match, so dot-form names produced no cost results. Add normalizeCopilotModel() helper that replaces dots with hyphens and apply it in both handleModelChange (per-message Model field) and handleShutdown (ParsedUsageEvent.Model). Update tests to assert hyphen-form model names. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- internal/parser/copilot.go | 14 ++++++++++++-- internal/parser/copilot_test.go | 16 ++++++++-------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/internal/parser/copilot.go b/internal/parser/copilot.go index 238d4594c..1a8211979 100644 --- a/internal/parser/copilot.go +++ b/internal/parser/copilot.go @@ -67,7 +67,7 @@ func (b *copilotSessionBuilder) processLine(line string) { b.handleAssistantReasoning() case copilotEventModelChange: if v := data.Get("newModel"); v.Exists() { - b.currentModel = v.Str + b.currentModel = normalizeCopilotModel(v.Str) } case copilotEventSessionShutdown: b.handleShutdown(data, ts) @@ -265,7 +265,7 @@ func (b *copilotSessionBuilder) handleShutdown( b.usageEvents = append(b.usageEvents, ParsedUsageEvent{ Source: "shutdown", - Model: modelKey.Str, + Model: normalizeCopilotModel(modelKey.Str), InputTokens: freshInput, OutputTokens: output, CacheCreationInputTokens: cacheWrite, @@ -289,6 +289,16 @@ func formatCopilotToolCalls( return strings.Join(parts, "\n") } +// normalizeCopilotModel converts the model identifier used in +// Copilot session events (dots in version numbers, e.g. +// "claude-sonnet-4.6") to the form used in the pricing catalog +// (hyphens, e.g. "claude-sonnet-4-6"). Only dots that follow a +// digit and precede a digit are replaced to avoid mangling +// non-version dots in other model names. +func normalizeCopilotModel(model string) string { + return strings.ReplaceAll(model, ".", "-") +} + // readCopilotWorkspaceName reads the session name from the // workspace.yaml sibling file in a directory-format session. // Returns an empty string for flat .jsonl sessions or when diff --git a/internal/parser/copilot_test.go b/internal/parser/copilot_test.go index f4d40efb3..c41cb6cc8 100644 --- a/internal/parser/copilot_test.go +++ b/internal/parser/copilot_test.go @@ -455,7 +455,7 @@ func TestParseCopilotSession_ModelChange(t *testing.T) { _, msgs := parseAndValidateHelper(t, path, "m", 2) - assertEqual(t, "claude-sonnet-4.6", msgs[1].Model, "msgs[1].Model") + assertEqual(t, "claude-sonnet-4-6", msgs[1].Model, "msgs[1].Model") assertEqual(t, "", msgs[0].Model, "msgs[0].Model") } @@ -483,8 +483,8 @@ func TestParseCopilotSession_ModelMidSessionChange(t *testing.T) { _, msgs := parseAndValidateHelper(t, path, "m", 4) - assertEqual(t, "claude-sonnet-4.6", msgs[1].Model, "msgs[1].Model") - assertEqual(t, "claude-haiku-4.5", msgs[3].Model, "msgs[3].Model") + assertEqual(t, "claude-sonnet-4-6", msgs[1].Model, "msgs[1].Model") + assertEqual(t, "claude-haiku-4-5", msgs[3].Model, "msgs[3].Model") } func TestParseCopilotSession_ModelReset(t *testing.T) { @@ -502,7 +502,7 @@ func TestParseCopilotSession_ModelReset(t *testing.T) { _, msgs := parseAndValidateHelper(t, path, "m", 4) - assertEqual(t, "claude-sonnet-4.6", msgs[1].Model, "msgs[1].Model") + assertEqual(t, "claude-sonnet-4-6", msgs[1].Model, "msgs[1].Model") assertEqual(t, "", msgs[3].Model, "msgs[3].Model (reset)") } @@ -587,14 +587,14 @@ func TestParseCopilotSession_ShutdownUsageEvents(t *testing.T) { u := usage[0] assert.Equal(t, "copilot:shut-test", u.SessionID) assert.Equal(t, "shutdown", u.Source) - assert.Equal(t, "claude-sonnet-4.6", u.Model) + assert.Equal(t, "claude-sonnet-4-6", u.Model) // Fresh input = 931647 - 873267 - 51438 = 6942 assert.Equal(t, 6942, u.InputTokens, "InputTokens should be fresh only") assert.Equal(t, 7150, u.OutputTokens) assert.Equal(t, 873267, u.CacheReadInputTokens) assert.Equal(t, 51438, u.CacheCreationInputTokens) assert.Equal(t, 432, u.ReasoningTokens) - assert.Equal(t, "shutdown:copilot:shut-test:claude-sonnet-4.6", u.DedupKey) + assert.Equal(t, "shutdown:copilot:shut-test:claude-sonnet-4-6", u.DedupKey) } func TestParseCopilotSession_ShutdownMultiModel(t *testing.T) { @@ -613,12 +613,12 @@ func TestParseCopilotSession_ShutdownMultiModel(t *testing.T) { byModel[u.Model] = u } - sonnet := byModel["claude-sonnet-4.6"] + sonnet := byModel["claude-sonnet-4-6"] // fresh = 100 - 60 - 10 = 30 assert.Equal(t, 30, sonnet.InputTokens) assert.Equal(t, 50, sonnet.OutputTokens) - haiku := byModel["claude-haiku-4.5"] + haiku := byModel["claude-haiku-4-5"] // fresh = 200 - 120 - 20 = 60 assert.Equal(t, 60, haiku.InputTokens) assert.Equal(t, 80, haiku.OutputTokens) From a7d08c9e717bcd2329d81bd3f92738c43d04b115 Mon Sep 17 00:00:00 2001 From: Michael Chapman Date: Mon, 1 Jun 2026 12:27:54 -0500 Subject: [PATCH 4/5] fix(copilot): use positional dedup key for multi-segment shutdown events Sessions with context compaction emit multiple session.shutdown events (one per segment). The previous key format 'shutdown::' caused all segments for the same model to collide on the unique index, keeping only the first segment and silently dropping the rest. Fix: include the event's ordinal position in b.usageEvents as a discriminator: 'shutdown:::'. Position is stable across re-syncs because events are always appended in file order. Also add TestParseCopilotSession_MultiShutdown_SameModel as a regression test confirming both segments are captured with distinct keys. Bump dataVersion to 31 to trigger a full re-sync so existing collision-based rows are replaced with the correct per-segment rows. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- internal/db/db.go | 4 +++- internal/parser/copilot.go | 12 +++++++++--- internal/parser/copilot_test.go | 30 +++++++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/internal/db/db.go b/internal/db/db.go index 030f706eb..02b3e0c8f 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -102,7 +102,9 @@ import ( // // (17: Codex template filtering.) // (16: system messages.) -const dataVersion = 30 +// (31: Copilot shutdown usage events use positional DedupKey to +// handle multi-segment sessions correctly.) +const dataVersion = 31 const tokenCoverageRepairStatsKey = "token_coverage_repair_v1" diff --git a/internal/parser/copilot.go b/internal/parser/copilot.go index 1a8211979..4939d92f0 100644 --- a/internal/parser/copilot.go +++ b/internal/parser/copilot.go @@ -421,11 +421,17 @@ func ParseCopilotSession( accumulateMessageTokenUsage(sess, b.messages) // Stamp the session ID on usage events (not known until here). + // DedupKey encodes the event's position in the slice so that + // multi-segment sessions (where the same model appears in + // several shutdown events) each get a distinct key. for i := range b.usageEvents { b.usageEvents[i].SessionID = sessionID - // Rebuild DedupKey with the fully-qualified session ID. - b.usageEvents[i].DedupKey = "shutdown:" + sessionID + - ":" + b.usageEvents[i].Model + b.usageEvents[i].DedupKey = fmt.Sprintf( + "shutdown:%s:%s:%d", + sessionID, + b.usageEvents[i].Model, + i, + ) } return sess, b.messages, b.usageEvents, nil diff --git a/internal/parser/copilot_test.go b/internal/parser/copilot_test.go index c41cb6cc8..9e9776189 100644 --- a/internal/parser/copilot_test.go +++ b/internal/parser/copilot_test.go @@ -594,7 +594,7 @@ func TestParseCopilotSession_ShutdownUsageEvents(t *testing.T) { assert.Equal(t, 873267, u.CacheReadInputTokens) assert.Equal(t, 51438, u.CacheCreationInputTokens) assert.Equal(t, 432, u.ReasoningTokens) - assert.Equal(t, "shutdown:copilot:shut-test:claude-sonnet-4-6", u.DedupKey) + assert.Equal(t, "shutdown:copilot:shut-test:claude-sonnet-4-6:0", u.DedupKey) } func TestParseCopilotSession_ShutdownMultiModel(t *testing.T) { @@ -624,6 +624,34 @@ func TestParseCopilotSession_ShutdownMultiModel(t *testing.T) { assert.Equal(t, 80, haiku.OutputTokens) } +func TestParseCopilotSession_MultiShutdown_SameModel(t *testing.T) { + // Sessions with compaction have multiple shutdown events for the + // same model. All segments must be captured with distinct DedupKeys. + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"multi-shut","context":{"cwd":"/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"claude-sonnet-4.6":{"usage":{"inputTokens":100,"outputTokens":50,"cacheReadTokens":60,"cacheWriteTokens":10}}}},"timestamp":"2025-01-15T10:01:00Z"}`, + `{"type":"user.message","data":{"content":"Continue"},"timestamp":"2025-01-15T10:02:00Z"}`, + `{"type":"assistant.message","data":{"content":"Sure."},"timestamp":"2025-01-15T10:02:01Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"claude-sonnet-4.6":{"usage":{"inputTokens":300,"outputTokens":80,"cacheReadTokens":250,"cacheWriteTokens":20}}}},"timestamp":"2025-01-15T10:03:00Z"}`, + ) + + _, _, usage := parseCopilotFull(t, path, "m") + require.Len(t, usage, 2, "both shutdown segments must be captured") + + assert.Equal(t, "shutdown:copilot:multi-shut:claude-sonnet-4-6:0", usage[0].DedupKey) + assert.Equal(t, "shutdown:copilot:multi-shut:claude-sonnet-4-6:1", usage[1].DedupKey) + + // First segment: fresh = 100 - 60 - 10 = 30 + assert.Equal(t, 30, usage[0].InputTokens) + assert.Equal(t, 50, usage[0].OutputTokens) + + // Second segment: fresh = 300 - 250 - 20 = 30 + assert.Equal(t, 30, usage[1].InputTokens) + assert.Equal(t, 80, usage[1].OutputTokens) +} + func TestParseCopilotSession_ShutdownZeroUsage_Skipped(t *testing.T) { path := writeCopilotJSONL(t, `{"type":"session.start","data":{"sessionId":"zero-use","context":{"cwd":"/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, From f62ddcfd0bbf8881488e8047fdf169c233d7e583 Mon Sep 17 00:00:00 2001 From: Michael Chapman Date: Mon, 1 Jun 2026 12:43:12 -0500 Subject: [PATCH 5/5] fix(copilot): restrict model normalization to claude-prefixed names GPT model IDs use dots in the pricing catalog (e.g. gpt-5.4) so applying strings.ReplaceAll universally would convert gpt-5.4 to gpt-5-4 and cause pricing lookup misses. Restrict the dot-to-hyphen substitution in normalizeCopilotModel to names that begin with 'claude-', which are the only Copilot-emitted IDs that need normalization. All other model names pass through unchanged. Add TestNormalizeCopilotModel table test covering claude variants, gpt-5.4/gpt-5.5, gpt-4o, o3-mini, and the empty string. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- internal/parser/copilot.go | 16 ++++++++++------ internal/parser/copilot_test.go | 22 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/internal/parser/copilot.go b/internal/parser/copilot.go index 4939d92f0..57ee77d36 100644 --- a/internal/parser/copilot.go +++ b/internal/parser/copilot.go @@ -290,13 +290,17 @@ func formatCopilotToolCalls( } // normalizeCopilotModel converts the model identifier used in -// Copilot session events (dots in version numbers, e.g. -// "claude-sonnet-4.6") to the form used in the pricing catalog -// (hyphens, e.g. "claude-sonnet-4-6"). Only dots that follow a -// digit and precede a digit are replaced to avoid mangling -// non-version dots in other model names. +// Copilot session events to the form used in the pricing catalog. +// Claude model IDs use dots in version numbers in Copilot events +// (e.g. "claude-sonnet-4.6") but hyphens in the pricing catalog +// (e.g. "claude-sonnet-4-6"). Other model families such as GPT +// already use dots in the catalog (e.g. "gpt-5.4"), so only +// claude-prefixed names are normalized. func normalizeCopilotModel(model string) string { - return strings.ReplaceAll(model, ".", "-") + if strings.HasPrefix(model, "claude-") { + return strings.ReplaceAll(model, ".", "-") + } + return model } // readCopilotWorkspaceName reads the session name from the diff --git a/internal/parser/copilot_test.go b/internal/parser/copilot_test.go index 9e9776189..26a6d2f82 100644 --- a/internal/parser/copilot_test.go +++ b/internal/parser/copilot_test.go @@ -506,6 +506,28 @@ func TestParseCopilotSession_ModelReset(t *testing.T) { assertEqual(t, "", msgs[3].Model, "msgs[3].Model (reset)") } +func TestNormalizeCopilotModel(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"claude-sonnet-4.6", "claude-sonnet-4-6"}, + {"claude-haiku-4.5", "claude-haiku-4-5"}, + {"claude-opus-4.7", "claude-opus-4-7"}, + // GPT models use dots in the pricing catalog and must not be changed. + {"gpt-5.4", "gpt-5.4"}, + {"gpt-5.5", "gpt-5.5"}, + {"gpt-4o", "gpt-4o"}, + {"o3-mini", "o3-mini"}, + {"", ""}, + } + for _, tc := range tests { + t.Run(tc.input, func(t *testing.T) { + assert.Equal(t, tc.want, normalizeCopilotModel(tc.input)) + }) + } +} + func TestSessionIDFromPath(t *testing.T) { tests := []struct { path string