From c41c1bf8a64e1c9df2ebf7b51fe3bb7dd10f62f9 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Wed, 6 May 2026 12:49:36 +0100 Subject: [PATCH 1/3] Add support for ai-title and prefer it over legacy summary # Conflicts: # claude_code_log/cache.py # claude_code_log/dag.py --- claude_code_log/cache.py | 19 ++- claude_code_log/converter.py | 80 +++++++-- claude_code_log/dag.py | 23 ++- .../factories/transcript_factory.py | 2 + .../migrations/006_session_ai_title.sql | 14 ++ claude_code_log/models.py | 15 ++ claude_code_log/renderer.py | 42 ++++- claude_code_log/tui.py | 13 +- test/test_ai_title.py | 153 ++++++++++++++++++ test/test_cache.py | 28 ++-- test/test_html_regeneration.py | 38 +++-- 11 files changed, 377 insertions(+), 50 deletions(-) create mode 100644 claude_code_log/migrations/006_session_ai_title.sql create mode 100644 test/test_ai_title.py diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 7cc8f5bf..1639fb87 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -47,6 +47,10 @@ class SessionCacheData(BaseModel): session_id: str summary: Optional[str] = None + # Claude Code's AI-generated short session title, sourced from + # `ai-title` JSONL entries (last one wins). Preferred over `summary` + # for display when present. + ai_title: Optional[str] = None first_timestamp: str last_timestamp: str message_count: int @@ -624,8 +628,8 @@ def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> Non message_count, first_user_message, cwd, total_input_tokens, total_output_tokens, total_cache_creation_tokens, total_cache_read_tokens, - team_name - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + team_name, ai_title + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(project_id, session_id) DO UPDATE SET summary = excluded.summary, first_timestamp = excluded.first_timestamp, @@ -637,7 +641,8 @@ def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> Non total_output_tokens = excluded.total_output_tokens, total_cache_creation_tokens = excluded.total_cache_creation_tokens, total_cache_read_tokens = excluded.total_cache_read_tokens, - team_name = excluded.team_name + team_name = excluded.team_name, + ai_title = excluded.ai_title """, ( self._project_id, @@ -657,6 +662,7 @@ def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> Non data.total_cache_creation_tokens, data.total_cache_read_tokens, scrub_surrogates(data.team_name), + scrub_surrogates(data.ai_title), ), ) @@ -775,6 +781,7 @@ def get_cached_project_data(self) -> Optional[ProjectCache]: sessions[row["session_id"]] = SessionCacheData( session_id=row["session_id"], summary=row["summary"], + ai_title=row["ai_title"] if "ai_title" in row.keys() else None, first_timestamp=row["first_timestamp"], last_timestamp=row["last_timestamp"], message_count=row["message_count"], @@ -824,6 +831,11 @@ def _is_cache_version_compatible(self, cache_version: str) -> bool: # 0.9.0 introduced _compact_ide_tags_for_preview() which transforms # first_user_message to use emoji indicators instead of raw IDE tags "0.8.0": "0.9.0", + # 1.3.0 added handling for `ai-title` JSONL entries: existing + # caches have a NULL `ai_title` column for every session until + # JSONLs are re-ingested, so the project index keeps showing + # the old session-id title until the cache is rebuilt. + "1.2.0": "1.3.0", } cache_ver = version.parse(cache_version) @@ -1093,6 +1105,7 @@ def get_archived_sessions( archived_sessions[session_id] = SessionCacheData( session_id=session_id, summary=row["summary"], + ai_title=row["ai_title"] if "ai_title" in row.keys() else None, first_timestamp=row["first_timestamp"], last_timestamp=row["last_timestamp"], message_count=row["message_count"], diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 90a795d7..3d50fafb 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -34,6 +34,7 @@ from .factories import create_transcript_entry from .factories.teammate_factory import find_team_lead_body from .models import ( + AiTitleTranscriptEntry, BaseTranscriptEntry, DetailLevel, PassthroughTranscriptEntry, @@ -261,8 +262,9 @@ def filter_messages_by_date( filtered_messages: list[TranscriptEntry] = [] for message in messages: - # Handle SummaryTranscriptEntry which doesn't have timestamp - if isinstance(message, SummaryTranscriptEntry): + # Summary / ai-title entries carry no timestamp — keep them so + # the title/summary survives date filtering. + if isinstance(message, (SummaryTranscriptEntry, AiTitleTranscriptEntry)): filtered_messages.append(message) continue @@ -378,6 +380,7 @@ def load_transcript( "user", "assistant", "summary", + "ai-title", "system", "queue-operation", ]: @@ -767,11 +770,19 @@ def load_directory_transcripts( ) dag_ordered = traverse_session_tree(tree) - # Re-add summaries/queue-ops (excluded from DAG since they lack uuid) + # Re-add summaries/ai-titles/queue-ops (excluded from DAG since they + # lack uuid). non_dag_entries: list[TranscriptEntry] = [ e for e in all_messages - if isinstance(e, (SummaryTranscriptEntry, QueueOperationTranscriptEntry)) + if isinstance( + e, + ( + SummaryTranscriptEntry, + AiTitleTranscriptEntry, + QueueOperationTranscriptEntry, + ), + ) ] return dag_ordered + non_dag_entries, tree @@ -852,6 +863,13 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr elif isinstance(message, SummaryTranscriptEntry): # Summaries have no timestamp or uuid - use leafUuid to keep them distinct content_key = message.leafUuid + elif isinstance(message, AiTitleTranscriptEntry): + # ai-title entries have no timestamp/uuid; collapse duplicates + # per session so we don't carry the same title 12x downstream. + # The last entry wins via prepare_session_ai_titles either way, + # but deduping here keeps message lists tidy. + session_id = message.sessionId + content_key = "ai-title" elif isinstance(message, (SystemTranscriptEntry, PassthroughTranscriptEntry)): content_key = message.uuid @@ -867,6 +885,10 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr message.message.content ) > len(existing.message.content): deduplicated[idx] = message # Replace with better version + elif isinstance(message, AiTitleTranscriptEntry): + # Always keep the most recent ai-title per session — Claude + # Code may refine the curated title across the session. + deduplicated[seen[dedup_key]] = message # Otherwise skip duplicate else: seen[dedup_key] = len(deduplicated) @@ -1111,11 +1133,22 @@ def _build_session_data_from_messages( # Pre-compute warmup session IDs to filter them out warmup_session_ids = get_warmup_session_ids(messages) + # Map AI-generated titles to sessions (last entry per sessionId wins). + session_ai_titles: Dict[str, str] = {} + for message in messages: + if isinstance(message, AiTitleTranscriptEntry): + session_ai_titles[message.sessionId] = message.aiTitle + # Group messages by session sessions: Dict[str, Dict[str, Any]] = {} for message in messages: if not hasattr(message, "sessionId") or isinstance( - message, (SummaryTranscriptEntry, PassthroughTranscriptEntry) + message, + ( + SummaryTranscriptEntry, + AiTitleTranscriptEntry, + PassthroughTranscriptEntry, + ), ): continue @@ -1185,6 +1218,7 @@ def _build_session_data_from_messages( for session_id, data in sessions.items(): result[session_id] = SessionCacheData( session_id=session_id, + ai_title=session_ai_titles.get(session_id), first_timestamp=data["first_timestamp"], last_timestamp=data["last_timestamp"], message_count=data["message_count"], @@ -1774,6 +1808,12 @@ def _update_cache_with_session_data( ): session_summaries[uuid_to_session_backup[leaf_uuid]] = message.summary + # Map AI-generated titles to sessions (last entry per sessionId wins). + session_ai_titles: dict[str, str] = {} + for message in messages: + if isinstance(message, AiTitleTranscriptEntry): + session_ai_titles[message.sessionId] = message.aiTitle + # Group messages by session and calculate session data sessions_cache_data: dict[str, SessionCacheData] = {} @@ -1797,9 +1837,10 @@ def _update_cache_with_session_data( if not earliest_timestamp or message_timestamp < earliest_timestamp: earliest_timestamp = message_timestamp - # Process session-level data (skip summaries) + # Process session-level data (skip summaries and ai-title — they + # carry no DAG fields and are folded into session metadata above). if hasattr(message, "sessionId") and not isinstance( - message, SummaryTranscriptEntry + message, (SummaryTranscriptEntry, AiTitleTranscriptEntry) ): session_id = get_parent_session_id(getattr(message, "sessionId", "")) if not session_id: @@ -1809,6 +1850,7 @@ def _update_cache_with_session_data( sessions_cache_data[session_id] = SessionCacheData( session_id=session_id, summary=session_summaries.get(session_id), + ai_title=session_ai_titles.get(session_id), first_timestamp=getattr(message, "timestamp", ""), last_timestamp=getattr(message, "timestamp", ""), message_count=0, @@ -1941,12 +1983,18 @@ def _collect_project_sessions(messages: list[TranscriptEntry]) -> list[dict[str, ): session_summaries[uuid_to_session_backup[leaf_uuid]] = message.summary + # Overlay AI-generated titles (last per session wins) — these take + # precedence over leafUuid-mapped summaries for display purposes. + for message in messages: + if isinstance(message, AiTitleTranscriptEntry): + session_summaries[message.sessionId] = message.aiTitle + # Group messages by session (excluding warmup-only sessions, # coalescing agent sessions into their parent) sessions: dict[str, dict[str, Any]] = {} for message in messages: if hasattr(message, "sessionId") and not isinstance( - message, SummaryTranscriptEntry + message, (SummaryTranscriptEntry, AiTitleTranscriptEntry) ): session_id = get_parent_session_id(getattr(message, "sessionId", "")) if not session_id or session_id in warmup_session_ids: @@ -2013,10 +2061,13 @@ def build_session_title( ) -> str: """Build a display title for a session. - Uses the session summary if available, otherwise the first user message - preview (truncated to 50 chars), falling back to "Session {id[:8]}". + Priority: Claude Code's curated ``ai_title`` (if any), then the + session summary, then a 50-char-truncated first-user-message preview, + finally "Session {id[:8]}". """ if session_cache: + if session_cache.ai_title: + return f"{project_title}: {session_cache.ai_title}" if session_cache.summary: return f"{project_title}: {session_cache.summary}" preview = session_cache.first_user_message @@ -2576,7 +2627,10 @@ def process_projects_hierarchy( "sessions": [ { "id": session_data.session_id, - "summary": session_data.summary, + # Display title: ai_title (Claude Code's + # curated short title) wins over summary. + "summary": session_data.ai_title + or session_data.summary, "timestamp_range": format_timestamp_range( session_data.first_timestamp, session_data.last_timestamp, @@ -2684,7 +2738,7 @@ def process_projects_hierarchy( warmup_for_teams = get_warmup_session_ids(messages) team_name_per_session: dict[str, str] = {} for _msg in messages: - if isinstance(_msg, SummaryTranscriptEntry): + if isinstance(_msg, (SummaryTranscriptEntry, AiTitleTranscriptEntry)): continue if not hasattr(_msg, "sessionId"): continue @@ -2775,7 +2829,7 @@ def process_projects_hierarchy( "sessions": [ { "id": session_data.session_id, - "summary": session_data.summary, + "summary": session_data.ai_title or session_data.summary, "timestamp_range": format_timestamp_range( session_data.first_timestamp, session_data.last_timestamp, diff --git a/claude_code_log/dag.py b/claude_code_log/dag.py index b2e26966..2166b93c 100644 --- a/claude_code_log/dag.py +++ b/claude_code_log/dag.py @@ -11,6 +11,7 @@ from typing import Optional from .models import ( + AiTitleTranscriptEntry, BaseTranscriptEntry, TranscriptEntry, SummaryTranscriptEntry, @@ -90,14 +91,21 @@ def build_message_index( ) -> dict[str, MessageNode]: """Build a deduplicated message index from transcript entries. - Skips SummaryTranscriptEntry (no uuid/sessionId) and - QueueOperationTranscriptEntry (no uuid). For duplicate uuids, + Skips SummaryTranscriptEntry / AiTitleTranscriptEntry (no uuid) + and QueueOperationTranscriptEntry (no uuid). For duplicate uuids, keeps the entry from the earliest session (by first entry timestamp). """ # First pass: determine earliest timestamp per session session_first_ts: dict[str, str] = {} for entry in entries: - if isinstance(entry, (SummaryTranscriptEntry, QueueOperationTranscriptEntry)): + if isinstance( + entry, + ( + SummaryTranscriptEntry, + AiTitleTranscriptEntry, + QueueOperationTranscriptEntry, + ), + ): continue sid = entry.sessionId ts = entry.timestamp @@ -107,7 +115,14 @@ def build_message_index( # Second pass: build nodes, deduplicating by uuid (earliest session wins) nodes: dict[str, MessageNode] = {} for entry in entries: - if isinstance(entry, (SummaryTranscriptEntry, QueueOperationTranscriptEntry)): + if isinstance( + entry, + ( + SummaryTranscriptEntry, + AiTitleTranscriptEntry, + QueueOperationTranscriptEntry, + ), + ): continue uuid = entry.uuid sid = entry.sessionId diff --git a/claude_code_log/factories/transcript_factory.py b/claude_code_log/factories/transcript_factory.py index f7a8d539..cadb2937 100644 --- a/claude_code_log/factories/transcript_factory.py +++ b/claude_code_log/factories/transcript_factory.py @@ -22,6 +22,7 @@ ToolResultContent, ToolUseContent, # Transcript entry types + AiTitleTranscriptEntry, AssistantTranscriptEntry, MessageType, PassthroughTranscriptEntry, @@ -212,6 +213,7 @@ def _create_queue_operation_entry( "user": _create_user_entry, "assistant": _create_assistant_entry, "summary": lambda data: SummaryTranscriptEntry.model_validate(data), + "ai-title": lambda data: AiTitleTranscriptEntry.model_validate(data), "system": lambda data: SystemTranscriptEntry.model_validate(data), "queue-operation": _create_queue_operation_entry, } diff --git a/claude_code_log/migrations/006_session_ai_title.sql b/claude_code_log/migrations/006_session_ai_title.sql new file mode 100644 index 00000000..f81d216e --- /dev/null +++ b/claude_code_log/migrations/006_session_ai_title.sql @@ -0,0 +1,14 @@ +-- AI-generated session title +-- Migration: 006 +-- Description: Add an `ai_title` column to `sessions` so the project +-- index, TUI, and session headers can surface Claude Code's curated +-- short title (emitted as `{"type":"ai-title", "aiTitle": , +-- "sessionId": }` JSONL entries). Multiple ai-title entries may +-- appear per session as the title is refined; the last one wins. +-- +-- Backward-compatible: existing rows get NULL via SQLite's column-add +-- default (and `SessionCacheData.ai_title: Optional[str] = None`). +-- Old caches will simply not have ai-title populated until the next +-- cache rewrite for the affected project. + +ALTER TABLE sessions ADD COLUMN ai_title TEXT; diff --git a/claude_code_log/models.py b/claude_code_log/models.py index b96d97bc..675d04cf 100644 --- a/claude_code_log/models.py +++ b/claude_code_log/models.py @@ -28,6 +28,7 @@ class MessageType(str, Enum): SYSTEM = "system" SUMMARY = "summary" QUEUE_OPERATION = "queue-operation" + AI_TITLE = "ai-title" # Rendering/display types (derived from content) TOOL_USE = "tool_use" @@ -204,6 +205,19 @@ class SummaryTranscriptEntry(BaseModel): sessionId: None = None # Summaries don't have a sessionId +class AiTitleTranscriptEntry(BaseModel): + """AI-generated session title. + + Claude Code emits these as session-level metadata (no uuid, no parent + chain). Multiple entries may be written per session as the title is + refined; the last one wins. + """ + + type: Literal["ai-title"] + aiTitle: str + sessionId: str + + class SystemTranscriptEntry(BaseTranscriptEntry): """System messages like warnings, notifications, hook summaries, etc.""" @@ -264,6 +278,7 @@ class PassthroughTranscriptEntry(BaseModel): UserTranscriptEntry, AssistantTranscriptEntry, SummaryTranscriptEntry, + AiTitleTranscriptEntry, SystemTranscriptEntry, QueueOperationTranscriptEntry, PassthroughTranscriptEntry, diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 2ec66afe..09866671 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -20,6 +20,7 @@ MessageMeta, MessageType, TranscriptEntry, + AiTitleTranscriptEntry, AssistantMessageModel, AssistantTranscriptEntry, PassthroughTranscriptEntry, @@ -656,9 +657,13 @@ def generate_template_messages( if getattr(msg, "sessionId", None) not in warmup_session_ids ] - # Pre-process to find session summaries + # Pre-process to find session summaries. AI-generated session titles + # ("ai-title" entries) override any leafUuid-mapped summary so the + # session header and back-link labels use the curated short title + # whenever Claude Code has emitted one. with log_timing("Session summary processing", t_start): session_summaries = prepare_session_summaries(messages) + session_summaries.update(prepare_session_ai_titles(messages)) # Pre-process: collect teamName per session (teammates feature) so # session headers can surface a team badge without re-scanning later. @@ -938,6 +943,22 @@ def prepare_session_team_names(messages: list[TranscriptEntry]) -> dict[str, str return out +def prepare_session_ai_titles(messages: list[TranscriptEntry]) -> dict[str, str]: + """Extract Claude Code AI-generated session titles from messages. + + Multiple ``ai-title`` entries may appear per session as the title is + refined; the last one wins. + + Returns: + Dict mapping session_id to ai_title text. + """ + out: dict[str, str] = {} + for message in messages: + if isinstance(message, AiTitleTranscriptEntry): + out[message.sessionId] = message.aiTitle + return out + + def prepare_session_summaries(messages: list[TranscriptEntry]) -> dict[str, str]: """Extract session summaries from messages. @@ -2714,6 +2735,10 @@ def _filter_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntry]: if isinstance(message, SummaryTranscriptEntry): continue + # Skip ai-title entries (folded into session metadata, not rendered) + if isinstance(message, AiTitleTranscriptEntry): + continue + # Skip passthrough entries (structural DAG nodes, not rendered) if isinstance(message, PassthroughTranscriptEntry): continue @@ -3404,10 +3429,17 @@ def _render_messages( ctx.register(system_msg) continue - # Skip summary and passthrough entries (should be filtered in pass 1, - # but be defensive — they lack .message / BaseTranscriptEntry fields - # used by the rendering path below) - if isinstance(message, (SummaryTranscriptEntry, PassthroughTranscriptEntry)): + # Skip summary, ai-title, and passthrough entries (should be + # filtered in pass 1, but be defensive — they lack .message / + # BaseTranscriptEntry fields used by the rendering path below) + if isinstance( + message, + ( + SummaryTranscriptEntry, + AiTitleTranscriptEntry, + PassthroughTranscriptEntry, + ), + ): continue # Handle queue-operation 'remove' messages as user messages diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index 4115c3a2..4a5992c4 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -1469,11 +1469,13 @@ def populate_table(self) -> None: ) token_display = f"{total_tokens:,}" if total_tokens > 0 else "-" - # Get summary or first user message + # Get title preview: ai_title (Claude Code's curated title) + # wins over summary, then first user message preview. # Escape Rich markup to prevent MarkupError from square brackets - # in paths like [/Users/foo/bar] being parsed as closing tags + # in paths like [/Users/foo/bar] being parsed as closing tags. preview = escape_markup( - session_data.summary + session_data.ai_title + or session_data.summary or session_data.first_user_message or "No preview available" ) @@ -1773,6 +1775,11 @@ def _update_expanded_content(self) -> None: # Session ID (safe - UUID format) content_parts.append(f"[bold]Session ID:[/bold] {self.selected_session_id}") + # AI title (Claude Code's curated short title) - escape markup + if session_data.ai_title: + escaped_title = self._escape_rich_markup(session_data.ai_title) + content_parts.append(f"\n[bold]Title:[/bold] {escaped_title}") + # Summary (if available) - escape markup if session_data.summary: escaped_summary = self._escape_rich_markup(session_data.summary) diff --git a/test/test_ai_title.py b/test/test_ai_title.py new file mode 100644 index 00000000..fea937bb --- /dev/null +++ b/test/test_ai_title.py @@ -0,0 +1,153 @@ +"""Tests for `ai-title` JSONL entry handling. + +`ai-title` lines are session-level metadata (no uuid, no timestamp) that +Claude Code emits to record an AI-generated short title for the session. +Multiple entries may appear per session as the title is refined; the +last one wins. They participate in the existing session-title selection +chain (`build_session_title`) and override `summary` for display. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from claude_code_log.converter import ( + build_session_title, + deduplicate_messages, + load_transcript, +) +from claude_code_log.cache import SessionCacheData +from claude_code_log.models import AiTitleTranscriptEntry + + +def _write_jsonl(path: Path, entries: list[dict[str, object]]) -> None: + with path.open("w", encoding="utf-8") as f: + for e in entries: + f.write(json.dumps(e) + "\n") + + +class TestAiTitleParsing: + def test_parsed_as_dedicated_entry( + self, tmp_path: Path, capsys: pytest.CaptureFixture[str] + ) -> None: + """ai-title is a known type — parses cleanly with no warning.""" + jsonl = tmp_path / "s.jsonl" + _write_jsonl( + jsonl, + [ + { + "type": "ai-title", + "aiTitle": "Activate Python virtual environment", + "sessionId": "327fac9d-8b0b-4f8a-88c7-d8fea5e354d3", + } + ], + ) + + messages = load_transcript(jsonl, silent=False) + captured = capsys.readouterr() + + assert len(messages) == 1 + entry = messages[0] + assert isinstance(entry, AiTitleTranscriptEntry) + assert entry.aiTitle == "Activate Python virtual environment" + assert entry.sessionId == "327fac9d-8b0b-4f8a-88c7-d8fea5e354d3" + assert "unrecognized message type" not in captured.out + + def test_multiple_entries_collapsed_to_last(self, tmp_path: Path) -> None: + """Multiple ai-title entries per session collapse to the latest one + after deduplication, since Claude Code may refine the title.""" + jsonl = tmp_path / "s.jsonl" + _write_jsonl( + jsonl, + [ + {"type": "ai-title", "aiTitle": "First draft", "sessionId": "s1"}, + {"type": "ai-title", "aiTitle": "Second draft", "sessionId": "s1"}, + {"type": "ai-title", "aiTitle": "Final title", "sessionId": "s1"}, + ], + ) + + messages = load_transcript(jsonl, silent=True) + deduped = deduplicate_messages(messages) + ai_titles = [m for m in deduped if isinstance(m, AiTitleTranscriptEntry)] + + assert len(ai_titles) == 1 + assert ai_titles[0].aiTitle == "Final title" + + def test_distinct_sessions_kept_separately(self, tmp_path: Path) -> None: + """Different sessions each keep their own ai-title.""" + jsonl = tmp_path / "s.jsonl" + _write_jsonl( + jsonl, + [ + {"type": "ai-title", "aiTitle": "Title A", "sessionId": "sA"}, + {"type": "ai-title", "aiTitle": "Title B", "sessionId": "sB"}, + {"type": "ai-title", "aiTitle": "Title A v2", "sessionId": "sA"}, + ], + ) + + messages = load_transcript(jsonl, silent=True) + deduped = deduplicate_messages(messages) + titles_by_session = { + m.sessionId: m.aiTitle + for m in deduped + if isinstance(m, AiTitleTranscriptEntry) + } + + assert titles_by_session == {"sA": "Title A v2", "sB": "Title B"} + + +class TestBuildSessionTitlePriority: + """``build_session_title`` priority: ai_title > summary > preview > id.""" + + def _make(self, **overrides: object) -> SessionCacheData: + defaults: dict[str, object] = { + "session_id": "abc12345", + "first_timestamp": "", + "last_timestamp": "", + "message_count": 0, + "first_user_message": "", + } + defaults.update(overrides) + return SessionCacheData(**defaults) # type: ignore[arg-type] + + def test_ai_title_wins_over_summary(self) -> None: + cache = self._make( + ai_title="Curated AI title", + summary="Long-form summary", + first_user_message="Some preview", + ) + assert ( + build_session_title("Project", "abc12345", cache) + == "Project: Curated AI title" + ) + + def test_summary_wins_when_no_ai_title(self) -> None: + cache = self._make( + summary="Long-form summary", first_user_message="Some preview" + ) + assert ( + build_session_title("Project", "abc12345", cache) + == "Project: Long-form summary" + ) + + def test_preview_used_when_only_user_message(self) -> None: + cache = self._make(first_user_message="Some preview") + assert ( + build_session_title("Project", "abc12345", cache) == "Project: Some preview" + ) + + def test_session_id_fallback_when_cache_empty(self) -> None: + cache = self._make() + assert ( + build_session_title("Project", "abc12345", cache) + == "Project: Session abc12345" + ) + + def test_no_cache_falls_back_to_session_id(self) -> None: + assert ( + build_session_title("Project", "abc12345", None) + == "Project: Session abc12345" + ) diff --git a/test/test_cache.py b/test/test_cache.py index 71a57b34..412edcdf 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -190,24 +190,26 @@ def test_cache_invalidation_file_modification( assert not cache_manager.is_file_cached(jsonl_path) def test_cache_invalidation_version_mismatch(self, temp_project_dir): - """Test cache invalidation when library version changes.""" - # Create cache with version 1.0.0 - with patch("claude_code_log.cache.get_library_version", return_value="1.0.0"): - cache_manager_v1 = CacheManager(temp_project_dir, "1.0.0") - # Verify project was created with version 1.0.0 + """Test cache compatibility when library version changes. + + Uses a version pair outside the explicit ``breaking_changes`` rules + in ``cache.py`` so the test stays focused on default-path + compatibility (caches are preserved across compatible upgrades). + """ + # Create cache with version 5.0.0 — well outside any breaking rule + with patch("claude_code_log.cache.get_library_version", return_value="5.0.0"): + cache_manager_v1 = CacheManager(temp_project_dir, "5.0.0") cached_data = cache_manager_v1.get_cached_project_data() assert cached_data is not None - assert cached_data.version == "1.0.0" + assert cached_data.version == "5.0.0" - # Create new cache manager with different version - with patch("claude_code_log.cache.get_library_version", return_value="2.0.0"): - cache_manager_v2 = CacheManager(temp_project_dir, "2.0.0") - # Since the default implementation has empty breaking_changes, - # versions should be compatible and cache should be preserved + # Bump to 5.1.0 — still no breaking rule between these. + with patch("claude_code_log.cache.get_library_version", return_value="5.1.0"): + cache_manager_v2 = CacheManager(temp_project_dir, "5.1.0") cached_data = cache_manager_v2.get_cached_project_data() assert cached_data is not None - # Version should remain 1.0.0 since it's compatible - assert cached_data.version == "1.0.0" + # Version should remain 5.0.0 since it's compatible + assert cached_data.version == "5.0.0" def test_filtered_loading_with_dates(self, cache_manager, temp_project_dir): """Test timestamp-based filtering during cache loading.""" diff --git a/test/test_html_regeneration.py b/test/test_html_regeneration.py index a0ade6c3..29732d94 100644 --- a/test/test_html_regeneration.py +++ b/test/test_html_regeneration.py @@ -415,7 +415,14 @@ def test_incremental_regeneration_only_updates_changed_sessions(self, tmp_path): assert session2_html.stat().st_mtime == session2_mtime def test_html_cache_detects_library_version_change(self, tmp_path): - """Test that HTML is regenerated when library version changes.""" + """Test that HTML is regenerated when library version changes. + + Uses version values past all current ``breaking_changes`` rules so + the cache itself isn't invalidated — exercising the HTML + staleness ``version_mismatch`` path specifically. + """ + from unittest.mock import patch + # Setup project project_dir = tmp_path / "test_project" project_dir.mkdir() @@ -429,17 +436,30 @@ def test_html_cache_detects_library_version_change(self, tmp_path): encoding="utf-8", ) - # Generate HTML with current version - convert_jsonl_to_html(project_dir) + # Build the cache + HTML at a version past all breaking rules so + # our compatibility-window changes don't accidentally invalidate + # the cache here. Patch the symbol at every call-site (the name + # is imported into multiple modules at startup). + with ( + patch( + "claude_code_log.cache.get_library_version", + return_value="999.999.998", + ), + patch( + "claude_code_log.converter.get_library_version", + return_value="999.999.998", + ), + ): + convert_jsonl_to_html(project_dir) - cache_manager = CacheManager(project_dir, get_library_version()) + cache_manager = CacheManager(project_dir, "999.999.998") - # Check staleness with same version - is_stale, reason = cache_manager.is_html_stale("combined_transcripts.html") - assert not is_stale - assert reason == "up_to_date" + # Check staleness with same version + is_stale, reason = cache_manager.is_html_stale("combined_transcripts.html") + assert not is_stale + assert reason == "up_to_date" - # Create new cache manager with different version + # Create new cache manager with a higher (but still post-rule) version cache_manager_new = CacheManager(project_dir, "999.999.999") is_stale, reason = cache_manager_new.is_html_stale("combined_transcripts.html") assert is_stale From aaeab7ad2886f078de442e67c33f2cb6f246e818 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Wed, 6 May 2026 23:27:52 +0100 Subject: [PATCH 2/3] CR PR feedback --- claude_code_log/converter.py | 41 +++++++++++++++++++++++++++++------ claude_code_log/tui.py | 15 ++++--------- test/test_ai_title.py | 42 +++++++++++++++++++++++++++++++++++- 3 files changed, 80 insertions(+), 18 deletions(-) diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 3d50fafb..28262b0f 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -5,10 +5,11 @@ import json import logging import re +from collections.abc import Iterator from dataclasses import dataclass, field from pathlib import Path import traceback -from typing import Any, Dict, Iterator, List, Optional, TYPE_CHECKING, cast +from typing import Any, Dict, List, Optional, TYPE_CHECKING, cast import dateparser @@ -51,11 +52,6 @@ from .renderer import get_renderer, is_html_outdated -# Internal Claude Code message types that carry no DAG fields and are -# dropped without warning. Unknown types outside this set are surfaced -# so we notice new kinds worth supporting (see the else branch in -# load_transcript). `progress` is not here because it has uuid+sessionId -# and participates in the DAG as a PassthroughTranscriptEntry. @contextlib.contextmanager def _dag_warnings_suppressed(silent: bool) -> Iterator[None]: """Temporarily raise the DAG module's log level under ``silent=True``. @@ -78,6 +74,11 @@ def _dag_warnings_suppressed(silent: bool) -> Iterator[None]: dag_logger.setLevel(previous) +# Internal Claude Code message types that carry no DAG fields and are +# dropped without warning. Unknown types outside this set are surfaced +# so we notice new kinds worth supporting (see the else branch in +# load_transcript). `progress` is not here because it has uuid+sessionId +# and participates in the DAG as a PassthroughTranscriptEntry. SILENT_SKIP_TYPES: frozenset[str] = frozenset( { "file-history-snapshot", # Internal file backup metadata @@ -1133,6 +1134,33 @@ def _build_session_data_from_messages( # Pre-compute warmup session IDs to filter them out warmup_session_ids = get_warmup_session_ids(messages) + # Map summaries to sessions via leafUuid -> message UUID -> session ID. + # Mirrors _update_cache_with_session_data so the title fallback chain + # (ai_title > summary > preview > id) survives the cache-miss path. + uuid_to_session: Dict[str, str] = {} + uuid_to_session_backup: Dict[str, str] = {} + for message in messages: + if hasattr(message, "uuid") and hasattr(message, "sessionId"): + message_uuid = getattr(message, "uuid", "") + session_id = getattr(message, "sessionId", "") + if message_uuid and session_id: + if type(message) is AssistantTranscriptEntry: + uuid_to_session[message_uuid] = session_id + else: + uuid_to_session_backup[message_uuid] = session_id + + session_summaries: Dict[str, str] = {} + for message in messages: + if isinstance(message, SummaryTranscriptEntry): + leaf_uuid = message.leafUuid + if leaf_uuid in uuid_to_session: + session_summaries[uuid_to_session[leaf_uuid]] = message.summary + elif ( + leaf_uuid in uuid_to_session_backup + and uuid_to_session_backup[leaf_uuid] not in session_summaries + ): + session_summaries[uuid_to_session_backup[leaf_uuid]] = message.summary + # Map AI-generated titles to sessions (last entry per sessionId wins). session_ai_titles: Dict[str, str] = {} for message in messages: @@ -1218,6 +1246,7 @@ def _build_session_data_from_messages( for session_id, data in sessions.items(): result[session_id] = SessionCacheData( session_id=session_id, + summary=session_summaries.get(session_id), ai_title=session_ai_titles.get(session_id), first_timestamp=data["first_timestamp"], last_timestamp=data["last_timestamp"], diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index 4a5992c4..d370913c 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -1748,13 +1748,6 @@ def action_resume_selected(self) -> None: except Exception as e: self.notify(f"Error resuming session: {e}", severity="error") - def _escape_rich_markup(self, text: str) -> str: - """Escape Rich markup characters in text to prevent parsing errors.""" - if not text: - return text - # Escape square brackets which are used for Rich markup - return text.replace("[", "\\[").replace("]", "\\]") - def _update_expanded_content(self) -> None: """Update the expanded content for the currently selected session.""" if not self.selected_session_id: @@ -1777,24 +1770,24 @@ def _update_expanded_content(self) -> None: # AI title (Claude Code's curated short title) - escape markup if session_data.ai_title: - escaped_title = self._escape_rich_markup(session_data.ai_title) + escaped_title = escape_markup(session_data.ai_title) content_parts.append(f"\n[bold]Title:[/bold] {escaped_title}") # Summary (if available) - escape markup if session_data.summary: - escaped_summary = self._escape_rich_markup(session_data.summary) + escaped_summary = escape_markup(session_data.summary) content_parts.append(f"\n[bold]Summary:[/bold] {escaped_summary}") # First user message - escape markup if session_data.first_user_message: - escaped_message = self._escape_rich_markup(session_data.first_user_message) + escaped_message = escape_markup(session_data.first_user_message) content_parts.append( f"\n[bold]First User Message:[/bold] {escaped_message}" ) # Working directory (if available) - escape markup if session_data.cwd: - escaped_cwd = self._escape_rich_markup(session_data.cwd) + escaped_cwd = escape_markup(session_data.cwd) content_parts.append(f"\n[bold]Working Directory:[/bold] {escaped_cwd}") # Token usage (safe - numeric data) diff --git a/test/test_ai_title.py b/test/test_ai_title.py index fea937bb..6d6418a8 100644 --- a/test/test_ai_title.py +++ b/test/test_ai_title.py @@ -11,6 +11,7 @@ import json from pathlib import Path +from unittest.mock import patch import pytest @@ -19,7 +20,7 @@ deduplicate_messages, load_transcript, ) -from claude_code_log.cache import SessionCacheData +from claude_code_log.cache import CacheManager, SessionCacheData from claude_code_log.models import AiTitleTranscriptEntry @@ -151,3 +152,42 @@ def test_no_cache_falls_back_to_session_id(self) -> None: build_session_title("Project", "abc12345", None) == "Project: Session abc12345" ) + + +class TestAiTitleCacheRoundTrip: + """Persisting ai_title through SQLite must survive a reload — guards + against schema/binding regressions in update_session_cache and the + SELECT in get_cached_project_data.""" + + def test_ai_title_persisted_and_reloaded(self, tmp_path: Path) -> None: + project_dir = tmp_path / "proj" + project_dir.mkdir() + + with patch( + "claude_code_log.cache.get_library_version", return_value="1.0.0-test" + ): + writer = CacheManager(project_dir, "1.0.0-test") + writer.update_session_cache( + { + "s1": SessionCacheData( + session_id="s1", + ai_title="Saved AI title", + first_timestamp="2026-05-05T10:00:00Z", + last_timestamp="2026-05-05T10:05:00Z", + message_count=1, + first_user_message="hi", + ) + } + ) + + # Fresh manager forces a SELECT path through SQLite, not memory. + reader = CacheManager(project_dir, "1.0.0-test") + cached = reader.get_cached_project_data() + + assert cached is not None + assert "s1" in cached.sessions + reloaded = cached.sessions["s1"] + assert reloaded.ai_title == "Saved AI title" + assert ( + build_session_title("Project", "s1", reloaded) == "Project: Saved AI title" + ) From 09c9f0578dfa9e4cf5d08b6474a0aeb70b1a3198 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Sat, 9 May 2026 01:06:26 +0100 Subject: [PATCH 3/3] Whitespace --- claude_code_log/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 1639fb87..b58bbade 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -662,7 +662,7 @@ def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> Non data.total_cache_creation_tokens, data.total_cache_read_tokens, scrub_surrogates(data.team_name), - scrub_surrogates(data.ai_title), + scrub_surrogates(data.ai_title), ), )