diff --git a/internal/db/db.go b/internal/db/db.go index 030f706e..02b3e0c8 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -102,7 +102,9 @@ import ( // // (17: Codex template filtering.) // (16: system messages.) -const dataVersion = 30 +// (31: Copilot shutdown usage events use positional DedupKey to +// handle multi-segment sessions correctly.) +const dataVersion = 31 const tokenCoverageRepairStatsKey = "token_coverage_repair_v1" diff --git a/internal/parser/copilot.go b/internal/parser/copilot.go index de3c0dab..57ee77d3 100644 --- a/internal/parser/copilot.go +++ b/internal/parser/copilot.go @@ -19,12 +19,14 @@ const ( copilotEventToolComplete = "tool.execution_complete" copilotEventAssistantReason = "assistant.reasoning" copilotEventModelChange = "session.model_change" + copilotEventSessionShutdown = "session.shutdown" ) // copilotSessionBuilder accumulates state while scanning a // Copilot JSONL session file line by line. type copilotSessionBuilder struct { messages []ParsedMessage + usageEvents []ParsedUsageEvent firstMessage string startedAt time.Time endedAt time.Time @@ -65,8 +67,10 @@ func (b *copilotSessionBuilder) processLine(line string) { b.handleAssistantReasoning() case copilotEventModelChange: if v := data.Get("newModel"); v.Exists() { - b.currentModel = v.Str + b.currentModel = normalizeCopilotModel(v.Str) } + case copilotEventSessionShutdown: + b.handleShutdown(data, ts) } } @@ -176,16 +180,21 @@ func (b *copilotSessionBuilder) handleAssistantMessage( return } + outputTokens := int(data.Get("outputTokens").Int()) + hasOutputTokens := data.Get("outputTokens").Exists() + b.messages = append(b.messages, ParsedMessage{ - Ordinal: b.ordinal, - Role: RoleAssistant, - Content: displayContent, - Timestamp: ts, - HasThinking: hasThinking, - HasToolUse: hasToolUse, - ContentLength: len(displayContent), - ToolCalls: toolCalls, - Model: b.currentModel, + Ordinal: b.ordinal, + Role: RoleAssistant, + Content: displayContent, + Timestamp: ts, + HasThinking: hasThinking, + HasToolUse: hasToolUse, + ContentLength: len(displayContent), + ToolCalls: toolCalls, + Model: b.currentModel, + OutputTokens: outputTokens, + HasOutputTokens: hasOutputTokens, }) b.ordinal++ } @@ -230,6 +239,45 @@ func (b *copilotSessionBuilder) handleAssistantReasoning() { } } +// handleShutdown extracts per-model token usage from the +// session.shutdown event's modelMetrics field. +func (b *copilotSessionBuilder) handleShutdown( + data gjson.Result, ts time.Time, +) { + occurredAt := timeString(ts, b.startedAt) + data.Get("modelMetrics").ForEach( + func(modelKey, metrics gjson.Result) bool { + usage := metrics.Get("usage") + totalInput := int(usage.Get("inputTokens").Int()) + cacheRead := int(usage.Get("cacheReadTokens").Int()) + cacheWrite := int(usage.Get("cacheWriteTokens").Int()) + output := int(usage.Get("outputTokens").Int()) + reasoning := int(usage.Get("reasoningTokens").Int()) + + // Fresh input = total - cache_read - cache_write. + freshInput := max(totalInput-cacheRead-cacheWrite, 0) + + if freshInput == 0 && output == 0 && + cacheRead == 0 && cacheWrite == 0 && + reasoning == 0 { + return true + } + + b.usageEvents = append(b.usageEvents, ParsedUsageEvent{ + Source: "shutdown", + Model: normalizeCopilotModel(modelKey.Str), + InputTokens: freshInput, + OutputTokens: output, + CacheCreationInputTokens: cacheWrite, + CacheReadInputTokens: cacheRead, + ReasoningTokens: reasoning, + OccurredAt: occurredAt, + }) + return true + }, + ) +} + func formatCopilotToolCalls( calls []ParsedToolCall, ) string { @@ -241,6 +289,20 @@ func formatCopilotToolCalls( return strings.Join(parts, "\n") } +// normalizeCopilotModel converts the model identifier used in +// Copilot session events to the form used in the pricing catalog. +// Claude model IDs use dots in version numbers in Copilot events +// (e.g. "claude-sonnet-4.6") but hyphens in the pricing catalog +// (e.g. "claude-sonnet-4-6"). Other model families such as GPT +// already use dots in the catalog (e.g. "gpt-5.4"), so only +// claude-prefixed names are normalized. +func normalizeCopilotModel(model string) string { + if strings.HasPrefix(model, "claude-") { + return strings.ReplaceAll(model, ".", "-") + } + return model +} + // readCopilotWorkspaceName reads the session name from the // workspace.yaml sibling file in a directory-format session. // Returns an empty string for flat .jsonl sessions or when @@ -272,22 +334,22 @@ func readCopilotWorkspaceName(eventsPath string) string { } // ParseCopilotSession parses a Copilot JSONL session file. -// Returns (nil, nil, nil) if the file doesn't exist or +// Returns (nil, nil, nil, nil) if the file doesn't exist or // contains no user/assistant messages. func ParseCopilotSession( path, machine string, -) (*ParsedSession, []ParsedMessage, error) { +) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent, error) { info, err := os.Stat(path) if err != nil { if os.IsNotExist(err) { - return nil, nil, nil + return nil, nil, nil, nil } - return nil, nil, fmt.Errorf("stat %s: %w", path, err) + return nil, nil, nil, fmt.Errorf("stat %s: %w", path, err) } f, err := os.Open(path) if err != nil { - return nil, nil, fmt.Errorf("open %s: %w", path, err) + return nil, nil, nil, fmt.Errorf("open %s: %w", path, err) } defer f.Close() @@ -306,7 +368,7 @@ func ParseCopilotSession( } if err := lr.Err(); err != nil { - return nil, nil, + return nil, nil, nil, fmt.Errorf("reading copilot %s: %w", path, err) } @@ -319,7 +381,7 @@ func ParseCopilotSession( } } if !hasContent { - return nil, nil, nil + return nil, nil, nil, nil } sessionID := b.sessionID @@ -360,7 +422,23 @@ func ParseCopilotSession( }, } - return sess, b.messages, nil + accumulateMessageTokenUsage(sess, b.messages) + + // Stamp the session ID on usage events (not known until here). + // DedupKey encodes the event's position in the slice so that + // multi-segment sessions (where the same model appears in + // several shutdown events) each get a distinct key. + for i := range b.usageEvents { + b.usageEvents[i].SessionID = sessionID + b.usageEvents[i].DedupKey = fmt.Sprintf( + "shutdown:%s:%s:%d", + sessionID, + b.usageEvents[i].Model, + i, + ) + } + + return sess, b.messages, b.usageEvents, nil } // sessionIDFromPath extracts a session ID from a Copilot diff --git a/internal/parser/copilot_test.go b/internal/parser/copilot_test.go index 1fd97e3a..26a6d2f8 100644 --- a/internal/parser/copilot_test.go +++ b/internal/parser/copilot_test.go @@ -28,7 +28,7 @@ func writeCopilotJSONL( // parseAndValidateHelper parses the session and fails the test on basic errors. func parseAndValidateHelper(t *testing.T, path string, machine string, wantMsgs int) (*ParsedSession, []ParsedMessage) { t.Helper() - sess, msgs, err := ParseCopilotSession(path, machine) + sess, msgs, _, err := ParseCopilotSession(path, machine) require.NoError(t, err) require.NotNil(t, sess, "expected non-nil session") require.Len(t, msgs, wantMsgs) @@ -349,7 +349,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { `{"type":"session.start","data":{"sessionId":"empty"},"timestamp":"2025-01-15T10:00:00Z"}`, ) - sess, msgs, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := ParseCopilotSession(path, "m") require.NoError(t, err) assert.Nil(t, sess, "expected nil session for empty") assert.Nil(t, msgs, "expected nil messages for empty") @@ -358,7 +358,7 @@ func TestParseCopilotSession_EmptySession(t *testing.T) { func TestParseCopilotSession_NonexistentFile(t *testing.T) { path := filepath.Join(t.TempDir(), "nonexistent.jsonl") - sess, msgs, err := ParseCopilotSession(path, "m") + sess, msgs, _, err := ParseCopilotSession(path, "m") require.NoError(t, err, "expected nil error") assert.Nil(t, sess, "expected nil session for nonexistent file") assert.Nil(t, msgs, "expected nil messages for nonexistent file") @@ -455,7 +455,7 @@ func TestParseCopilotSession_ModelChange(t *testing.T) { _, msgs := parseAndValidateHelper(t, path, "m", 2) - assertEqual(t, "claude-sonnet-4.6", msgs[1].Model, "msgs[1].Model") + assertEqual(t, "claude-sonnet-4-6", msgs[1].Model, "msgs[1].Model") assertEqual(t, "", msgs[0].Model, "msgs[0].Model") } @@ -483,8 +483,8 @@ func TestParseCopilotSession_ModelMidSessionChange(t *testing.T) { _, msgs := parseAndValidateHelper(t, path, "m", 4) - assertEqual(t, "claude-sonnet-4.6", msgs[1].Model, "msgs[1].Model") - assertEqual(t, "claude-haiku-4.5", msgs[3].Model, "msgs[3].Model") + assertEqual(t, "claude-sonnet-4-6", msgs[1].Model, "msgs[1].Model") + assertEqual(t, "claude-haiku-4-5", msgs[3].Model, "msgs[3].Model") } func TestParseCopilotSession_ModelReset(t *testing.T) { @@ -502,10 +502,32 @@ func TestParseCopilotSession_ModelReset(t *testing.T) { _, msgs := parseAndValidateHelper(t, path, "m", 4) - assertEqual(t, "claude-sonnet-4.6", msgs[1].Model, "msgs[1].Model") + assertEqual(t, "claude-sonnet-4-6", msgs[1].Model, "msgs[1].Model") assertEqual(t, "", msgs[3].Model, "msgs[3].Model (reset)") } +func TestNormalizeCopilotModel(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"claude-sonnet-4.6", "claude-sonnet-4-6"}, + {"claude-haiku-4.5", "claude-haiku-4-5"}, + {"claude-opus-4.7", "claude-opus-4-7"}, + // GPT models use dots in the pricing catalog and must not be changed. + {"gpt-5.4", "gpt-5.4"}, + {"gpt-5.5", "gpt-5.5"}, + {"gpt-4o", "gpt-4o"}, + {"o3-mini", "o3-mini"}, + {"", ""}, + } + for _, tc := range tests { + t.Run(tc.input, func(t *testing.T) { + assert.Equal(t, tc.want, normalizeCopilotModel(tc.input)) + }) + } +} + func TestSessionIDFromPath(t *testing.T) { tests := []struct { path string @@ -523,3 +545,154 @@ func TestSessionIDFromPath(t *testing.T) { }) } } + +func TestParseCopilotSession_OutputTokens(t *testing.T) { + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"tok-test","context":{"cwd":"/home/alice/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi there.","outputTokens":120},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"user.message","data":{"content":"How are you?"},"timestamp":"2025-01-15T10:00:03Z"}`, + `{"type":"assistant.message","data":{"content":"I am fine.","outputTokens":85},"timestamp":"2025-01-15T10:00:04Z"}`, + ) + + sess, msgs := parseAndValidateHelper(t, path, "m", 4) + + // Session total should be sum of both assistant messages. + assert.True(t, sess.HasTotalOutputTokens, "HasTotalOutputTokens") + assert.Equal(t, 205, sess.TotalOutputTokens, "TotalOutputTokens") + + // Per-message token presence. + assert.True(t, msgs[1].HasOutputTokens, "msgs[1].HasOutputTokens") + assert.Equal(t, 120, msgs[1].OutputTokens, "msgs[1].OutputTokens") + assert.True(t, msgs[3].HasOutputTokens, "msgs[3].HasOutputTokens") + assert.Equal(t, 85, msgs[3].OutputTokens, "msgs[3].OutputTokens") +} + +func TestParseCopilotSession_OutputTokens_Missing(t *testing.T) { + // When outputTokens is absent, HasOutputTokens must be false. + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"no-tok","context":{"cwd":"/home/alice/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + ) + + sess, msgs := parseAndValidateHelper(t, path, "m", 2) + + assert.False(t, sess.HasTotalOutputTokens, "HasTotalOutputTokens should be false when field absent") + assert.Equal(t, 0, sess.TotalOutputTokens, "TotalOutputTokens should be zero") + assert.False(t, msgs[1].HasOutputTokens, "msgs[1].HasOutputTokens should be false") +} + +// parseCopilotFull calls ParseCopilotSession and returns all four values. +func parseCopilotFull( + t *testing.T, path, machine string, +) (*ParsedSession, []ParsedMessage, []ParsedUsageEvent) { + t.Helper() + sess, msgs, usage, err := ParseCopilotSession(path, machine) + require.NoError(t, err) + return sess, msgs, usage +} + +func TestParseCopilotSession_ShutdownUsageEvents(t *testing.T) { + shutdownLine := `{"type":"session.shutdown","data":{"modelMetrics":{"claude-sonnet-4.6":{"usage":{"inputTokens":931647,"outputTokens":7150,"cacheReadTokens":873267,"cacheWriteTokens":51438,"reasoningTokens":432}}}},"timestamp":"2025-01-15T10:01:00Z"}` + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"shut-test","context":{"cwd":"/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + shutdownLine, + ) + + sess, _, usage := parseCopilotFull(t, path, "m") + require.NotNil(t, sess) + require.Len(t, usage, 1) + + u := usage[0] + assert.Equal(t, "copilot:shut-test", u.SessionID) + assert.Equal(t, "shutdown", u.Source) + assert.Equal(t, "claude-sonnet-4-6", u.Model) + // Fresh input = 931647 - 873267 - 51438 = 6942 + assert.Equal(t, 6942, u.InputTokens, "InputTokens should be fresh only") + assert.Equal(t, 7150, u.OutputTokens) + assert.Equal(t, 873267, u.CacheReadInputTokens) + assert.Equal(t, 51438, u.CacheCreationInputTokens) + assert.Equal(t, 432, u.ReasoningTokens) + assert.Equal(t, "shutdown:copilot:shut-test:claude-sonnet-4-6:0", u.DedupKey) +} + +func TestParseCopilotSession_ShutdownMultiModel(t *testing.T) { + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"multi-model","context":{"cwd":"/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"claude-sonnet-4.6":{"usage":{"inputTokens":100,"outputTokens":50,"cacheReadTokens":60,"cacheWriteTokens":10}},"claude-haiku-4.5":{"usage":{"inputTokens":200,"outputTokens":80,"cacheReadTokens":120,"cacheWriteTokens":20}}}},"timestamp":"2025-01-15T10:01:00Z"}`, + ) + + _, _, usage := parseCopilotFull(t, path, "m") + require.Len(t, usage, 2) + + byModel := make(map[string]ParsedUsageEvent) + for _, u := range usage { + byModel[u.Model] = u + } + + sonnet := byModel["claude-sonnet-4-6"] + // fresh = 100 - 60 - 10 = 30 + assert.Equal(t, 30, sonnet.InputTokens) + assert.Equal(t, 50, sonnet.OutputTokens) + + haiku := byModel["claude-haiku-4-5"] + // fresh = 200 - 120 - 20 = 60 + assert.Equal(t, 60, haiku.InputTokens) + assert.Equal(t, 80, haiku.OutputTokens) +} + +func TestParseCopilotSession_MultiShutdown_SameModel(t *testing.T) { + // Sessions with compaction have multiple shutdown events for the + // same model. All segments must be captured with distinct DedupKeys. + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"multi-shut","context":{"cwd":"/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"claude-sonnet-4.6":{"usage":{"inputTokens":100,"outputTokens":50,"cacheReadTokens":60,"cacheWriteTokens":10}}}},"timestamp":"2025-01-15T10:01:00Z"}`, + `{"type":"user.message","data":{"content":"Continue"},"timestamp":"2025-01-15T10:02:00Z"}`, + `{"type":"assistant.message","data":{"content":"Sure."},"timestamp":"2025-01-15T10:02:01Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"claude-sonnet-4.6":{"usage":{"inputTokens":300,"outputTokens":80,"cacheReadTokens":250,"cacheWriteTokens":20}}}},"timestamp":"2025-01-15T10:03:00Z"}`, + ) + + _, _, usage := parseCopilotFull(t, path, "m") + require.Len(t, usage, 2, "both shutdown segments must be captured") + + assert.Equal(t, "shutdown:copilot:multi-shut:claude-sonnet-4-6:0", usage[0].DedupKey) + assert.Equal(t, "shutdown:copilot:multi-shut:claude-sonnet-4-6:1", usage[1].DedupKey) + + // First segment: fresh = 100 - 60 - 10 = 30 + assert.Equal(t, 30, usage[0].InputTokens) + assert.Equal(t, 50, usage[0].OutputTokens) + + // Second segment: fresh = 300 - 250 - 20 = 30 + assert.Equal(t, 30, usage[1].InputTokens) + assert.Equal(t, 80, usage[1].OutputTokens) +} + +func TestParseCopilotSession_ShutdownZeroUsage_Skipped(t *testing.T) { + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"zero-use","context":{"cwd":"/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + `{"type":"session.shutdown","data":{"modelMetrics":{"claude-sonnet-4.6":{"usage":{"inputTokens":0,"outputTokens":0,"cacheReadTokens":0,"cacheWriteTokens":0,"reasoningTokens":0}}}},"timestamp":"2025-01-15T10:01:00Z"}`, + ) + + _, _, usage := parseCopilotFull(t, path, "m") + assert.Empty(t, usage, "zero-usage model entry should be skipped") +} + +func TestParseCopilotSession_NoShutdown_NoUsageEvents(t *testing.T) { + path := writeCopilotJSONL(t, + `{"type":"session.start","data":{"sessionId":"no-shut","context":{"cwd":"/proj","branch":"main"}},"timestamp":"2025-01-15T10:00:00Z"}`, + `{"type":"user.message","data":{"content":"Hello"},"timestamp":"2025-01-15T10:00:01Z"}`, + `{"type":"assistant.message","data":{"content":"Hi."},"timestamp":"2025-01-15T10:00:02Z"}`, + ) + + _, _, usage := parseCopilotFull(t, path, "m") + assert.Empty(t, usage, "no shutdown event should produce no usage events") +} diff --git a/internal/sync/engine.go b/internal/sync/engine.go index 3ddb21cb..fa73f176 100644 --- a/internal/sync/engine.go +++ b/internal/sync/engine.go @@ -3480,7 +3480,7 @@ func (e *Engine) processCopilot( return processResult{skip: true} } - sess, msgs, err := parser.ParseCopilotSession( + sess, msgs, usageEvents, err := parser.ParseCopilotSession( file.Path, e.machine, ) if err != nil { @@ -3501,7 +3501,7 @@ func (e *Engine) processCopilot( return processResult{ results: []parser.ParseResult{ - {Session: *sess, Messages: msgs}, + {Session: *sess, Messages: msgs, UsageEvents: usageEvents}, }, } }