From 243c31f64b918ca20d47cc83f3a365ab41c84d1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arnaud=20He=CC=81ritier?= Date: Thu, 21 May 2026 21:16:54 +0200 Subject: [PATCH] fix(#2861): release per-message render caches when streaming completes messageModel held its rendered ANSI output (renderCache.result) and a per-message IncrementalRenderer (inputPrefix + outputPrefix + codeBlocksPrefix) for the lifetime of the session. With ~80-120 KB of retained strings per assistant turn and no eviction, a long TUI session grew its heap linearly until macOS's memorystatus subsystem killed docker-agent as the largest compressed process. Add Finalize() on messageModel that drops both caches when a message is no longer the active streaming target, and call it on the previous view whenever a new top-level entry is appended (and on every loaded view except the last in LoadFromSession). Render() and renderAssistantMarkdown() gracefully fall back to a transient renderer for finalized messages, so output stays correct; the chat list's bounded LRU still memoizes their rendered output. Finalize() is gated on MessageTypeAssistant: only assistant views allocate an IncrementalRenderer and accumulate large rendered ANSI strings during streaming, so user messages, tool calls, and other types have nothing to release. Setting finalized=true on those views would only have the side effect of permanently disabling renderCache for selected/hovered states (which bypass the parent's bounded LRU), forcing a fresh re-render every animation tick. addReasoningBlock() does not finalize the previous view: reasoning blocks routinely interleave with an actively streaming assistant turn, and finalizing the in-progress view would force every subsequent chunk through the transient-renderer path. The next non-reasoning entry will finalize via addMessage() when the streaming turn actually ends. removeSpinner() no longer calls invalidateAllItems(): the spinner is always at the tail, so wiping the LRU on every assistant turn was both wasteful and immediately re-armed the leak by forcing a full re-render of every previous message into a fresh renderCache. Spinner-driven messages are never put in the LRU (shouldCacheMessage returns false), so only the joined renderedLines slab needs to be reset. Adds a regression test that streams 400 large assistant messages and asserts both heap growth (<30 KB/message vs. the 161 KB/message pre-fix slope) and the structural invariant that at most one view retains render-cache state. The assertion uses a new exported HasLiveRenderState() method on the message.Model interface instead of cross-package reflection on unexported fields. Fixes #2861 Assisted-By: docker-agent --- pkg/tui/components/message/message.go | 89 +++++++++++- pkg/tui/components/messages/leak_test.go | 169 +++++++++++++++++++++++ pkg/tui/components/messages/messages.go | 51 ++++++- 3 files changed, 307 insertions(+), 2 deletions(-) create mode 100644 pkg/tui/components/messages/leak_test.go diff --git a/pkg/tui/components/message/message.go b/pkg/tui/components/message/message.go index 96e50a50e..bd1f35237 100644 --- a/pkg/tui/components/message/message.go +++ b/pkg/tui/components/message/message.go @@ -27,6 +27,16 @@ type Model interface { SetSelected(selected bool) SetHovered(hovered bool) CodeBlocks() []markdown.CodeBlock + // Finalize releases per-message render state that is only needed while the + // message is actively streaming. The message content and code-block metadata + // are preserved; calling View() afterwards still produces correct output + // without retaining a per-view render cache or IncrementalRenderer. + Finalize() + // HasLiveRenderState reports whether this view currently retains a + // populated renderCache or an IncrementalRenderer instance. Used by tests + // to assert that finalized views have actually released their per-message + // render state without reaching into unexported fields via reflection. + HasLiveRenderState() bool } // messageModel implements Model @@ -59,6 +69,14 @@ type messageModel struct { // streamed-in chunks only re-render the trailing block instead of the whole // accumulated markdown each time. mdRenderer *markdown.IncrementalRenderer + + // finalized is set by Finalize() once the message is no longer the active + // streaming view. After it is set, Render() still produces correct output, + // but does not store anything in renderCache and does not retain an + // IncrementalRenderer between calls — both are pure caches whose memory + // dominates a long session, and they are not worth keeping for messages + // that are unlikely to be re-rendered hot. + finalized bool } // renderCache stores the most recent Render result keyed by the inputs that @@ -99,6 +117,11 @@ func (mv *messageModel) Init() tea.Cmd { } func (mv *messageModel) SetMessage(msg *types.Message) { + // Un-finalize when the underlying message is changed (e.g. streaming + // resumes into this view). Finalize is meant for views that have + // permanently lost their actively-streaming status; mutating the message + // re-arms the per-message caches so subsequent renders are fast again. + mv.finalized = false // If the new content is not an extension of the previous one (different // message, or the message was edited), drop the IncrementalRenderer's // cached prefix so its memory is released immediately rather than on the @@ -177,7 +200,11 @@ func (mv *messageModel) Render(width int) string { // MessageTypeAssistant placeholder) animate on every tick, so the result is // not cacheable. Everything else is a pure function of the inputs tracked in // renderCache below. - cacheable := !mv.isSpinnerDriven() + // Spinner-driven messages animate every tick and are not cacheable. + // Finalized messages skip writing into renderCache so the per-view + // retained ANSI string does not pile up across long sessions; the chat + // list's bounded LRU still memoizes their rendered output. + cacheable := !mv.isSpinnerDriven() && !mv.finalized if cacheable { c := &mv.renderCache if c.valid && @@ -376,10 +403,22 @@ func (mv *messageModel) render(width int) string { // so each new chunk only re-parses the trailing region. The first render at a // given width is equivalent to a fresh full render. // +// For finalized messages we use a transient renderer that is discarded after +// each call. Finalized messages are no longer streamed, so the prefix-cache +// inside an IncrementalRenderer is not earning its keep — keeping it resident +// across the lifetime of every historical message in a session is the +// dominant source of retained memory in long sessions. The parent message +// list's bounded rendered-item LRU can still memoize finalized message output +// without storing an additional per-view copy. +// // It also returns the list of fenced code blocks emitted by the renderer so // that callers can map clicks on the per-block copy affordance back to the // underlying raw code. func (mv *messageModel) renderAssistantMarkdown(content string, width int) (string, []markdown.CodeBlock, error) { + if mv.finalized { + r := markdown.NewIncrementalRenderer(width) + return r.RenderWithCodeBlocks(content) + } if mv.mdRenderer == nil { mv.mdRenderer = markdown.NewIncrementalRenderer(width) } else { @@ -442,6 +481,54 @@ func (mv *messageModel) StopAnimation() { } } +// Finalize releases per-message render state that no longer needs to be kept +// resident once the message is no longer the actively streaming view. This is +// called by the parent message list when a new top-level message arrives, and +// for every historical view loaded from a session. +// +// Finalize is a no-op for non-assistant message types: only assistant views +// allocate an IncrementalRenderer and accumulate large rendered ANSI strings +// during streaming, so user messages, tool calls, error/welcome banners and +// the like have nothing to release. Setting `finalized = true` on those views +// would only have the side-effect of permanently disabling renderCache for +// selected/hovered states (which bypass the parent's bounded LRU), forcing a +// fresh re-render on every animation tick. Restricting the disable to +// assistant views keeps the leak fix scoped to the type that actually leaks. +// +// The retained payload of an assistant view is dominated by the renderCache +// (a copy of the rendered ANSI string) and the IncrementalRenderer's internal +// caches (last rendered prefix, glamour AST state). Both are pure render +// state — they can be regenerated from mv.message on demand. We deliberately +// leave mv.message, mv.codeBlocks and the spinner untouched so that View() +// keeps returning correct output, click-targeting on code blocks still works, +// and the spinner-driven types continue to animate. +// +// Finalize is idempotent and durable: subsequent renders do not re-populate +// renderCache or store an IncrementalRenderer on the struct. This is +// important because the parent message list invalidates its own LRU on +// several events (spinner removal, theme change, window resize) and would +// otherwise re-render every previously finalized view, putting the per- +// message render state right back where it was. +func (mv *messageModel) Finalize() { + if mv.message == nil || mv.message.Type != types.MessageTypeAssistant { + return + } + mv.renderCache = renderCache{} + if mv.mdRenderer != nil { + mv.mdRenderer.Reset() + mv.mdRenderer = nil + } + mv.finalized = true +} + +// HasLiveRenderState reports whether this view still retains per-message +// render state — either a populated renderCache or an IncrementalRenderer +// instance. Used as a structural assertion in regression tests that verify +// Finalize() actually released what it was supposed to release. +func (mv *messageModel) HasLiveRenderState() bool { + return mv.renderCache.result != "" || mv.mdRenderer != nil +} + // SetSize sets the dimensions of the message view func (mv *messageModel) SetSize(width, height int) tea.Cmd { if mv.width != width { diff --git a/pkg/tui/components/messages/leak_test.go b/pkg/tui/components/messages/leak_test.go new file mode 100644 index 000000000..a0a980979 --- /dev/null +++ b/pkg/tui/components/messages/leak_test.go @@ -0,0 +1,169 @@ +package messages + +import ( + "fmt" + "runtime" + "strings" + "testing" + + "github.com/docker/docker-agent/pkg/tui/components/message" + "github.com/docker/docker-agent/pkg/tui/core/layout" + "github.com/docker/docker-agent/pkg/tui/service" +) + +// TestLongSessionDoesNotRetainPerMessageRenderState streams a long sequence +// of large assistant messages through the same public API the TUI uses +// (AddUserMessage / AddAssistantMessage / AppendToLastMessage), rendering the +// active assistant view between chunks so messageModel.renderCache and +// IncrementalRenderer state are exercised. It asserts two things: +// +// 1. Heap growth stays far below the pre-fix slope. Before the fix every +// assistant view kept its own renderCache (~70 KB rendered ANSI) and an +// IncrementalRenderer (cached prefix + rendered output) for a measured +// ~161 KB of retained state per message. After the fix, per-message render +// state is released as soon as the message stops being the active +// streaming view, so the remaining slope is mostly the raw session content. +// +// 2. The structural invariant: after every non-active assistant view has +// been finalized, only the single actively-streaming view (at most one) +// should be holding a non-empty renderCache or a live IncrementalRenderer. +// +// Run with -short to skip; the test is deterministic but takes a few seconds. +func TestLongSessionDoesNotRetainPerMessageRenderState(t *testing.T) { + if testing.Short() { + t.Skip("skipping leak test in -short mode") + } + + // `go test` may reset runtime.MemProfileRate when its own -memprofile flag + // is unset, which would override the value set in init(). + runtime.MemProfileRate = 1 + + const ( + warmupMessages = 200 + measureMessages = 200 + chunkSize = 256 + messageBytes = 8192 + viewWidth = 120 + viewHeight = 40 + + // Per-message heap budget for the retained slope. Pre-fix slope was + // ~161 KB/msg and did not flatten. The remaining post-fix slope is mostly + // the original message content and small per-view bookkeeping. + maxGrowthPerMessageBytes = 30 * 1024 + ) + + sessionState := &service.SessionState{} + m := NewScrollableView(viewWidth, viewHeight, sessionState).(*model) + m.SetSize(viewWidth, viewHeight) + + body := buildMarkdownBody(messageBytes) + + heapInuse := func() uint64 { + runtime.GC() + runtime.GC() // second pass to settle finalizers + var ms runtime.MemStats + runtime.ReadMemStats(&ms) + return ms.HeapInuse + } + + renderLastView := func() { + if len(m.views) == 0 { + return + } + _ = m.views[len(m.views)-1].View() + } + + streamMessage := func(i int) { + m.AddUserMessage(fmt.Sprintf("user message %d", i)) + m.AddAssistantMessage() + for off := 0; off < len(body); off += chunkSize { + end := min(off+chunkSize, len(body)) + m.AppendToLastMessage("root", body[off:end]) + if (off/chunkSize)%4 == 0 { + renderLastView() + } + } + renderLastView() + } + + // Warmup: stream enough messages to saturate the chat-list LRU. + for i := range warmupMessages { + streamMessage(i) + } + afterWarmup := heapInuse() + + // Measurement: stream another batch and observe the slope. + for i := range measureMessages { + streamMessage(warmupMessages + i) + } + afterMeasure := heapInuse() + + growth := max(int64(afterMeasure)-int64(afterWarmup), 0) + perMessage := growth / measureMessages + + t.Logf("warmup_heap=%d KB measure_heap=%d KB delta=%d KB perMsg=%d KB (budget=%d KB)", + afterWarmup/1024, afterMeasure/1024, growth/1024, perMessage/1024, + maxGrowthPerMessageBytes/1024) + + live := countLiveRenderState(t, m.views) + t.Logf("views=%d live render-state holders=%d (LRU cap=%d, current=%d)", + len(m.views), live, renderedItemsCacheSize, m.renderedItems.Len()) + + if perMessage > maxGrowthPerMessageBytes { + t.Fatalf("heap growth %d B/message exceeds budget %d B/message — "+ + "per-message render state appears to be retained after finalization", + perMessage, maxGrowthPerMessageBytes) + } + + if live > 2 { + t.Fatalf("expected at most 2 messageModels to hold render-cache state "+ + "(the active streaming view), got %d out of %d views", + live, len(m.views)) + } +} + +// countLiveRenderState walks the view list and reports how many message.Model +// views still hold per-message render state — a populated renderCache or an +// IncrementalRenderer. Uses the exported HasLiveRenderState() so the test +// does not depend on reflection across package boundaries. +func countLiveRenderState(t *testing.T, views []layout.Model) int { + t.Helper() + var live int + for _, v := range views { + mv, ok := v.(message.Model) + if !ok { + continue + } + if mv.HasLiveRenderState() { + live++ + } + } + return live +} + +// buildMarkdownBody returns a deterministic markdown blob of approximately +// the requested size. The mix of prose paragraphs and a fenced code block +// exercises both stable-boundary detection and code-block caching in the +// IncrementalRenderer (the two structures called out in the leak hypothesis). +func buildMarkdownBody(targetBytes int) string { + const paragraph = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. " + + "Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " + + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris " + + "nisi ut aliquip ex ea commodo consequat.\n\n" + const codeBlock = "```go\n" + + "func example(x int) int {\n" + + " // some representative code\n" + + " return x * 2\n" + + "}\n" + + "```\n\n" + + var b strings.Builder + b.Grow(targetBytes + len(paragraph)) + for b.Len() < targetBytes { + b.WriteString(paragraph) + if b.Len()%3 == 0 { + b.WriteString(codeBlock) + } + } + return b.String() +} diff --git a/pkg/tui/components/messages/messages.go b/pkg/tui/components/messages/messages.go index c5c5748f8..390ab50d3 100644 --- a/pkg/tui/components/messages/messages.go +++ b/pkg/tui/components/messages/messages.go @@ -1184,6 +1184,21 @@ func (m *model) invalidateAllItems() { m.renderDirty = true } +// finalizePreviousMessageView releases per-message render state on the most +// recent message.Model view (if any) before a new top-level entry is +// appended. The renderCache and IncrementalRenderer are pure caches — dropping +// them prevents long sessions from accumulating O(N) retained render state +// for messages that are no longer streaming, while View() lazily rebuilds +// them if the message is ever re-rendered. +func (m *model) finalizePreviousMessageView() { + if len(m.views) == 0 { + return + } + if mv, ok := m.views[len(m.views)-1].(message.Model); ok { + mv.Finalize() + } +} + // Message management methods func (m *model) AddUserMessage(content string) tea.Cmd { return m.addMessage(types.User(content)) @@ -1226,6 +1241,7 @@ func (m *model) AddAssistantMessage() tea.Cmd { } func (m *model) AddCancelledMessage() tea.Cmd { + m.finalizePreviousMessageView() msg := types.Cancelled() m.messages = append(m.messages, msg) view := m.createMessageView(msg) @@ -1250,6 +1266,7 @@ func (m *model) addMessage(msg *types.Message) tea.Cmd { m.clearSelection() shouldAutoScroll := !m.userHasScrolled + m.finalizePreviousMessageView() m.messages = append(m.messages, msg) view := m.createMessageView(msg) m.sessionState.SetPreviousMessage(msg) @@ -1410,6 +1427,16 @@ func (m *model) LoadFromSession(sess *session.Session) tea.Cmd { cmds = append(cmds, view.Init()) } + // Finalize all but the last message.Model view: historical assistant + // messages will only ever be re-rendered on demand, so there is no need + // to keep their renderCache or IncrementalRenderer state resident. The + // most recent view is left untouched in case streaming continues into it. + for i := range len(m.views) - 1 { + if mv, ok := m.views[i].(message.Model); ok { + mv.Finalize() + } + } + cmds = append(cmds, m.ScrollToBottom()) return tea.Batch(cmds...) } @@ -1460,6 +1487,7 @@ func (m *model) AddOrUpdateToolCall(agentName string, toolCall tools.ToolCall, t } // Otherwise create a standalone tool call message + m.finalizePreviousMessageView() msg := types.ToolCallMessage(agentName, toolCall, toolDef, status) m.messages = append(m.messages, msg) view := m.createToolCallView(msg) @@ -1546,6 +1574,14 @@ func (m *model) AppendReasoning(agentName, content string) tea.Cmd { } // addReasoningBlock creates a new reasoning block message. +// +// Reasoning blocks routinely interleave with an actively streaming assistant +// turn (the LLM emits a thought, then resumes content). Finalizing the +// previous view here would drop the renderCache and IncrementalRenderer of +// a message the user is still watching, and the very next chunk would have +// to rebuild them from scratch via the transient renderer path. The next +// non-reasoning entry (user message, tool call, error) will finalize via +// addMessage when the streaming turn actually ends. func (m *model) addReasoningBlock(agentName, content string) tea.Cmd { m.clearSelection() shouldAutoScroll := !m.userHasScrolled @@ -1670,7 +1706,20 @@ func (m *model) removeSpinner() { m.views = m.views[:lastIdx] } m.messages = m.messages[:lastIdx] - m.invalidateAllItems() + // The spinner is always at the tail, so other indices are unchanged + // and their cached entries remain valid. Only the joined renderedLines + // references the now-removed spinner, so we drop it and force a rejoin + // on the next render. The LRU itself never held a spinner entry + // (shouldCacheMessage returns false for spinner-driven types). + // Avoiding invalidateAllItems here is essential for long sessions: + // it would otherwise wipe up to 500 cached renderings once per + // assistant turn, forcing every previous message to be re-parsed + // from markdown on the next render. + m.renderedLines = nil + m.lineOffsets = nil + m.totalHeight = 0 + m.urlSpans.clear() + m.renderDirty = true } }