Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions claude_code_log/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ class SessionCacheData(BaseModel):

session_id: str
summary: Optional[str] = None
# Claude Code's AI-generated short session title, sourced from
# `ai-title` JSONL entries (last one wins). Preferred over `summary`
# for display when present.
ai_title: Optional[str] = None
first_timestamp: str
last_timestamp: str
message_count: int
Expand Down Expand Up @@ -624,8 +628,8 @@ def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> Non
message_count, first_user_message, cwd,
total_input_tokens, total_output_tokens,
total_cache_creation_tokens, total_cache_read_tokens,
team_name
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
team_name, ai_title
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(project_id, session_id) DO UPDATE SET
summary = excluded.summary,
first_timestamp = excluded.first_timestamp,
Expand All @@ -637,7 +641,8 @@ def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> Non
total_output_tokens = excluded.total_output_tokens,
total_cache_creation_tokens = excluded.total_cache_creation_tokens,
total_cache_read_tokens = excluded.total_cache_read_tokens,
team_name = excluded.team_name
team_name = excluded.team_name,
ai_title = excluded.ai_title
""",
(
self._project_id,
Expand All @@ -657,6 +662,7 @@ def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> Non
data.total_cache_creation_tokens,
data.total_cache_read_tokens,
scrub_surrogates(data.team_name),
scrub_surrogates(data.ai_title),
),
)

Expand Down Expand Up @@ -775,6 +781,7 @@ def get_cached_project_data(self) -> Optional[ProjectCache]:
sessions[row["session_id"]] = SessionCacheData(
session_id=row["session_id"],
summary=row["summary"],
ai_title=row["ai_title"] if "ai_title" in row.keys() else None,
first_timestamp=row["first_timestamp"],
last_timestamp=row["last_timestamp"],
message_count=row["message_count"],
Expand Down Expand Up @@ -824,6 +831,11 @@ def _is_cache_version_compatible(self, cache_version: str) -> bool:
# 0.9.0 introduced _compact_ide_tags_for_preview() which transforms
# first_user_message to use emoji indicators instead of raw IDE tags
"0.8.0": "0.9.0",
# 1.3.0 added handling for `ai-title` JSONL entries: existing
# caches have a NULL `ai_title` column for every session until
# JSONLs are re-ingested, so the project index keeps showing
# the old session-id title until the cache is rebuilt.
"1.2.0": "1.3.0",
}

cache_ver = version.parse(cache_version)
Expand Down Expand Up @@ -1093,6 +1105,7 @@ def get_archived_sessions(
archived_sessions[session_id] = SessionCacheData(
session_id=session_id,
summary=row["summary"],
ai_title=row["ai_title"] if "ai_title" in row.keys() else None,
first_timestamp=row["first_timestamp"],
last_timestamp=row["last_timestamp"],
message_count=row["message_count"],
Expand Down
121 changes: 102 additions & 19 deletions claude_code_log/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
import json
import logging
import re
from collections.abc import Iterator
from dataclasses import dataclass, field
from pathlib import Path
import traceback
from typing import Any, Dict, Iterator, List, Optional, TYPE_CHECKING, cast
from typing import Any, Dict, List, Optional, TYPE_CHECKING, cast

import dateparser

Expand All @@ -34,6 +35,7 @@
from .factories import create_transcript_entry
from .factories.teammate_factory import find_team_lead_body
from .models import (
AiTitleTranscriptEntry,
BaseTranscriptEntry,
DetailLevel,
PassthroughTranscriptEntry,
Expand All @@ -50,11 +52,6 @@
from .renderer import get_renderer, is_html_outdated


# Internal Claude Code message types that carry no DAG fields and are
# dropped without warning. Unknown types outside this set are surfaced
# so we notice new kinds worth supporting (see the else branch in
# load_transcript). `progress` is not here because it has uuid+sessionId
# and participates in the DAG as a PassthroughTranscriptEntry.
@contextlib.contextmanager
def _dag_warnings_suppressed(silent: bool) -> Iterator[None]:
"""Temporarily raise the DAG module's log level under ``silent=True``.
Expand All @@ -77,6 +74,11 @@ def _dag_warnings_suppressed(silent: bool) -> Iterator[None]:
dag_logger.setLevel(previous)


# Internal Claude Code message types that carry no DAG fields and are
# dropped without warning. Unknown types outside this set are surfaced
# so we notice new kinds worth supporting (see the else branch in
# load_transcript). `progress` is not here because it has uuid+sessionId
# and participates in the DAG as a PassthroughTranscriptEntry.
SILENT_SKIP_TYPES: frozenset[str] = frozenset(
{
"file-history-snapshot", # Internal file backup metadata
Expand Down Expand Up @@ -261,8 +263,9 @@ def filter_messages_by_date(

filtered_messages: list[TranscriptEntry] = []
for message in messages:
# Handle SummaryTranscriptEntry which doesn't have timestamp
if isinstance(message, SummaryTranscriptEntry):
# Summary / ai-title entries carry no timestamp — keep them so
# the title/summary survives date filtering.
if isinstance(message, (SummaryTranscriptEntry, AiTitleTranscriptEntry)):
filtered_messages.append(message)
continue

Expand Down Expand Up @@ -378,6 +381,7 @@ def load_transcript(
"user",
"assistant",
"summary",
"ai-title",
"system",
"queue-operation",
]:
Expand Down Expand Up @@ -767,11 +771,19 @@ def load_directory_transcripts(
)
dag_ordered = traverse_session_tree(tree)

# Re-add summaries/queue-ops (excluded from DAG since they lack uuid)
# Re-add summaries/ai-titles/queue-ops (excluded from DAG since they
# lack uuid).
non_dag_entries: list[TranscriptEntry] = [
e
for e in all_messages
if isinstance(e, (SummaryTranscriptEntry, QueueOperationTranscriptEntry))
if isinstance(
e,
(
SummaryTranscriptEntry,
AiTitleTranscriptEntry,
QueueOperationTranscriptEntry,
),
)
]

return dag_ordered + non_dag_entries, tree
Expand Down Expand Up @@ -852,6 +864,13 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr
elif isinstance(message, SummaryTranscriptEntry):
# Summaries have no timestamp or uuid - use leafUuid to keep them distinct
content_key = message.leafUuid
elif isinstance(message, AiTitleTranscriptEntry):
# ai-title entries have no timestamp/uuid; collapse duplicates
# per session so we don't carry the same title 12x downstream.
# The last entry wins via prepare_session_ai_titles either way,
# but deduping here keeps message lists tidy.
session_id = message.sessionId
content_key = "ai-title"
elif isinstance(message, (SystemTranscriptEntry, PassthroughTranscriptEntry)):
content_key = message.uuid

Expand All @@ -867,6 +886,10 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr
message.message.content
) > len(existing.message.content):
deduplicated[idx] = message # Replace with better version
elif isinstance(message, AiTitleTranscriptEntry):
# Always keep the most recent ai-title per session — Claude
# Code may refine the curated title across the session.
deduplicated[seen[dedup_key]] = message
# Otherwise skip duplicate
else:
seen[dedup_key] = len(deduplicated)
Expand Down Expand Up @@ -1111,11 +1134,49 @@ def _build_session_data_from_messages(
# Pre-compute warmup session IDs to filter them out
warmup_session_ids = get_warmup_session_ids(messages)

# Map summaries to sessions via leafUuid -> message UUID -> session ID.
# Mirrors _update_cache_with_session_data so the title fallback chain
# (ai_title > summary > preview > id) survives the cache-miss path.
uuid_to_session: Dict[str, str] = {}
uuid_to_session_backup: Dict[str, str] = {}
for message in messages:
if hasattr(message, "uuid") and hasattr(message, "sessionId"):
message_uuid = getattr(message, "uuid", "")
session_id = getattr(message, "sessionId", "")
if message_uuid and session_id:
if type(message) is AssistantTranscriptEntry:
uuid_to_session[message_uuid] = session_id
else:
uuid_to_session_backup[message_uuid] = session_id

session_summaries: Dict[str, str] = {}
for message in messages:
if isinstance(message, SummaryTranscriptEntry):
leaf_uuid = message.leafUuid
if leaf_uuid in uuid_to_session:
session_summaries[uuid_to_session[leaf_uuid]] = message.summary
elif (
leaf_uuid in uuid_to_session_backup
and uuid_to_session_backup[leaf_uuid] not in session_summaries
):
session_summaries[uuid_to_session_backup[leaf_uuid]] = message.summary

# Map AI-generated titles to sessions (last entry per sessionId wins).
session_ai_titles: Dict[str, str] = {}
for message in messages:
if isinstance(message, AiTitleTranscriptEntry):
session_ai_titles[message.sessionId] = message.aiTitle
Comment thread
coderabbitai[bot] marked this conversation as resolved.

# Group messages by session
sessions: Dict[str, Dict[str, Any]] = {}
for message in messages:
if not hasattr(message, "sessionId") or isinstance(
message, (SummaryTranscriptEntry, PassthroughTranscriptEntry)
message,
(
SummaryTranscriptEntry,
AiTitleTranscriptEntry,
PassthroughTranscriptEntry,
),
):
continue

Expand Down Expand Up @@ -1185,6 +1246,8 @@ def _build_session_data_from_messages(
for session_id, data in sessions.items():
result[session_id] = SessionCacheData(
session_id=session_id,
summary=session_summaries.get(session_id),
ai_title=session_ai_titles.get(session_id),
first_timestamp=data["first_timestamp"],
last_timestamp=data["last_timestamp"],
message_count=data["message_count"],
Expand Down Expand Up @@ -1774,6 +1837,12 @@ def _update_cache_with_session_data(
):
session_summaries[uuid_to_session_backup[leaf_uuid]] = message.summary

# Map AI-generated titles to sessions (last entry per sessionId wins).
session_ai_titles: dict[str, str] = {}
for message in messages:
if isinstance(message, AiTitleTranscriptEntry):
session_ai_titles[message.sessionId] = message.aiTitle
Comment thread
coderabbitai[bot] marked this conversation as resolved.

# Group messages by session and calculate session data
sessions_cache_data: dict[str, SessionCacheData] = {}

Expand All @@ -1797,9 +1866,10 @@ def _update_cache_with_session_data(
if not earliest_timestamp or message_timestamp < earliest_timestamp:
earliest_timestamp = message_timestamp

# Process session-level data (skip summaries)
# Process session-level data (skip summaries and ai-title — they
# carry no DAG fields and are folded into session metadata above).
if hasattr(message, "sessionId") and not isinstance(
message, SummaryTranscriptEntry
message, (SummaryTranscriptEntry, AiTitleTranscriptEntry)
):
session_id = get_parent_session_id(getattr(message, "sessionId", ""))
if not session_id:
Expand All @@ -1809,6 +1879,7 @@ def _update_cache_with_session_data(
sessions_cache_data[session_id] = SessionCacheData(
session_id=session_id,
summary=session_summaries.get(session_id),
ai_title=session_ai_titles.get(session_id),
first_timestamp=getattr(message, "timestamp", ""),
last_timestamp=getattr(message, "timestamp", ""),
message_count=0,
Expand Down Expand Up @@ -1941,12 +2012,18 @@ def _collect_project_sessions(messages: list[TranscriptEntry]) -> list[dict[str,
):
session_summaries[uuid_to_session_backup[leaf_uuid]] = message.summary

# Overlay AI-generated titles (last per session wins) — these take
# precedence over leafUuid-mapped summaries for display purposes.
for message in messages:
if isinstance(message, AiTitleTranscriptEntry):
session_summaries[message.sessionId] = message.aiTitle

# Group messages by session (excluding warmup-only sessions,
# coalescing agent sessions into their parent)
sessions: dict[str, dict[str, Any]] = {}
for message in messages:
if hasattr(message, "sessionId") and not isinstance(
message, SummaryTranscriptEntry
message, (SummaryTranscriptEntry, AiTitleTranscriptEntry)
):
session_id = get_parent_session_id(getattr(message, "sessionId", ""))
if not session_id or session_id in warmup_session_ids:
Expand Down Expand Up @@ -2013,10 +2090,13 @@ def build_session_title(
) -> str:
"""Build a display title for a session.

Uses the session summary if available, otherwise the first user message
preview (truncated to 50 chars), falling back to "Session {id[:8]}".
Priority: Claude Code's curated ``ai_title`` (if any), then the
session summary, then a 50-char-truncated first-user-message preview,
finally "Session {id[:8]}".
"""
if session_cache:
if session_cache.ai_title:
return f"{project_title}: {session_cache.ai_title}"
if session_cache.summary:
return f"{project_title}: {session_cache.summary}"
preview = session_cache.first_user_message
Expand Down Expand Up @@ -2576,7 +2656,10 @@ def process_projects_hierarchy(
"sessions": [
{
"id": session_data.session_id,
"summary": session_data.summary,
# Display title: ai_title (Claude Code's
# curated short title) wins over summary.
"summary": session_data.ai_title
or session_data.summary,
"timestamp_range": format_timestamp_range(
session_data.first_timestamp,
session_data.last_timestamp,
Expand Down Expand Up @@ -2684,7 +2767,7 @@ def process_projects_hierarchy(
warmup_for_teams = get_warmup_session_ids(messages)
team_name_per_session: dict[str, str] = {}
for _msg in messages:
if isinstance(_msg, SummaryTranscriptEntry):
if isinstance(_msg, (SummaryTranscriptEntry, AiTitleTranscriptEntry)):
continue
if not hasattr(_msg, "sessionId"):
continue
Expand Down Expand Up @@ -2775,7 +2858,7 @@ def process_projects_hierarchy(
"sessions": [
{
"id": session_data.session_id,
"summary": session_data.summary,
"summary": session_data.ai_title or session_data.summary,
"timestamp_range": format_timestamp_range(
session_data.first_timestamp,
session_data.last_timestamp,
Expand Down
23 changes: 19 additions & 4 deletions claude_code_log/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from typing import Optional

from .models import (
AiTitleTranscriptEntry,
BaseTranscriptEntry,
TranscriptEntry,
SummaryTranscriptEntry,
Expand Down Expand Up @@ -90,14 +91,21 @@ def build_message_index(
) -> dict[str, MessageNode]:
"""Build a deduplicated message index from transcript entries.

Skips SummaryTranscriptEntry (no uuid/sessionId) and
QueueOperationTranscriptEntry (no uuid). For duplicate uuids,
Skips SummaryTranscriptEntry / AiTitleTranscriptEntry (no uuid)
and QueueOperationTranscriptEntry (no uuid). For duplicate uuids,
keeps the entry from the earliest session (by first entry timestamp).
"""
# First pass: determine earliest timestamp per session
session_first_ts: dict[str, str] = {}
for entry in entries:
if isinstance(entry, (SummaryTranscriptEntry, QueueOperationTranscriptEntry)):
if isinstance(
entry,
(
SummaryTranscriptEntry,
AiTitleTranscriptEntry,
QueueOperationTranscriptEntry,
),
):
continue
sid = entry.sessionId
ts = entry.timestamp
Expand All @@ -107,7 +115,14 @@ def build_message_index(
# Second pass: build nodes, deduplicating by uuid (earliest session wins)
nodes: dict[str, MessageNode] = {}
for entry in entries:
if isinstance(entry, (SummaryTranscriptEntry, QueueOperationTranscriptEntry)):
if isinstance(
entry,
(
SummaryTranscriptEntry,
AiTitleTranscriptEntry,
QueueOperationTranscriptEntry,
),
):
continue
uuid = entry.uuid
sid = entry.sessionId
Expand Down
Loading
Loading