diff --git a/internal/app/bootstrap.go b/internal/app/bootstrap.go index 4ab88b53..b095c8ed 100644 --- a/internal/app/bootstrap.go +++ b/internal/app/bootstrap.go @@ -146,7 +146,10 @@ func BuildRuntime(ctx context.Context, opts BootstrapOptions) (RuntimeBundle, er // 这意味着所有会话都归属到启动时指定的项目目录下,运行时不会因配置变更而迁移存储位置。 sessionStore := agentsession.NewStore(loader.BaseDir(), cfg.Workdir) - var contextBuilder agentcontext.Builder = agentcontext.NewBuilderWithToolPolicies(toolRegistry) + // 注册内置工具的内容摘要器,使 micro-compact 在清理旧工具结果时保留关键上下文。 + tools.RegisterBuiltinSummarizers(toolRegistry) + + var contextBuilder agentcontext.Builder = agentcontext.NewBuilderWithToolPoliciesAndSummarizers(toolRegistry, toolRegistry) var memoSvc *memo.Service if cfg.Memo.Enabled { memoStore := memo.NewFileStore(loader.BaseDir(), cfg.Workdir) @@ -155,7 +158,7 @@ func BuildRuntime(ctx context.Context, opts BootstrapOptions) (RuntimeBundle, er if invalidator, ok := memoSource.(interface{ InvalidateCache() }); ok { sourceInvl = invalidator.InvalidateCache } - contextBuilder = agentcontext.NewBuilderWithMemo(toolRegistry, memoSource) + contextBuilder = agentcontext.NewBuilderWithMemoAndSummarizers(toolRegistry, toolRegistry, memoSource) memoSvc = memo.NewService(memoStore, nil, cfg.Memo, sourceInvl) toolRegistry.Register(memotool.NewRememberTool(memoSvc)) toolRegistry.Register(memotool.NewRecallTool(memoSvc)) diff --git a/internal/context/builder.go b/internal/context/builder.go index 88369966..fed9763c 100644 --- a/internal/context/builder.go +++ b/internal/context/builder.go @@ -9,37 +9,28 @@ import ( // DefaultBuilder preserves the current runtime context-building behavior. type DefaultBuilder struct { - promptSources []promptSectionSource - trimPolicy messageTrimPolicy - microCompactPolicies MicroCompactPolicySource + promptSources []promptSectionSource + trimPolicy messageTrimPolicy + microCompactPolicies MicroCompactPolicySource + microCompactSummarizers MicroCompactSummarizerSource } -// NewBuilder returns the default context builder implementation. -func NewBuilder() Builder { - return NewBuilderWithToolPolicies(nil) -} - -// NewBuilderWithToolPolicies 返回带工具 micro compact 策略源的默认上下文构建器。 -func NewBuilderWithToolPolicies(policies MicroCompactPolicySource) Builder { - systemSource := &systemStateSource{gitRunner: runGitCommand} +// newDefaultBuilder 统一构建默认上下文构建器,避免多个构造函数重复装配相同依赖。 +func newDefaultBuilder( + policies MicroCompactPolicySource, + summarizers MicroCompactSummarizerSource, + memoSource SectionSource, +) Builder { return &DefaultBuilder{ - promptSources: []promptSectionSource{ - corePromptSource{}, - &projectRulesSource{}, - taskStateSource{}, - todosSource{}, - skillPromptSource{}, - systemSource, - }, - trimPolicy: spanMessageTrimPolicy{}, - microCompactPolicies: policies, + promptSources: newPromptSources(memoSource), + trimPolicy: spanMessageTrimPolicy{}, + microCompactPolicies: policies, + microCompactSummarizers: summarizers, } } -// NewBuilderWithMemo 返回带记忆注入能力的上下文构建器。 -// memoSource 为 nil 时等价于 NewBuilderWithToolPolicies。 -func NewBuilderWithMemo(policies MicroCompactPolicySource, memoSource SectionSource) Builder { - systemSource := &systemStateSource{gitRunner: runGitCommand} +// newPromptSources 组装系统提示词来源列表,并按约定将 memoSource 插入到 systemState 之前。 +func newPromptSources(memoSource SectionSource) []promptSectionSource { sources := []promptSectionSource{ corePromptSource{}, &projectRulesSource{}, @@ -50,12 +41,33 @@ func NewBuilderWithMemo(policies MicroCompactPolicySource, memoSource SectionSou if memoSource != nil { sources = append(sources, memoSource) } - sources = append(sources, systemSource) - return &DefaultBuilder{ - promptSources: sources, - trimPolicy: spanMessageTrimPolicy{}, - microCompactPolicies: policies, - } + return append(sources, &systemStateSource{gitRunner: runGitCommand}) +} + +// NewBuilder returns the default context builder implementation. +func NewBuilder() Builder { + return NewBuilderWithToolPolicies(nil) +} + +// NewBuilderWithToolPolicies 返回带工具 micro compact 策略源的默认上下文构建器。 +func NewBuilderWithToolPolicies(policies MicroCompactPolicySource) Builder { + return newDefaultBuilder(policies, nil, nil) +} + +// NewBuilderWithToolPoliciesAndSummarizers 返回带工具策略与内容摘要器的上下文构建器。 +func NewBuilderWithToolPoliciesAndSummarizers(policies MicroCompactPolicySource, summarizers MicroCompactSummarizerSource) Builder { + return newDefaultBuilder(policies, summarizers, nil) +} + +// NewBuilderWithMemo 返回带记忆注入能力的上下文构建器。 +// memoSource 为 nil 时等价于 NewBuilderWithToolPolicies。 +func NewBuilderWithMemo(policies MicroCompactPolicySource, memoSource SectionSource) Builder { + return NewBuilderWithMemoAndSummarizers(policies, nil, memoSource) +} + +// NewBuilderWithMemoAndSummarizers 返回带记忆注入与内容摘要器的上下文构建器。 +func NewBuilderWithMemoAndSummarizers(policies MicroCompactPolicySource, summarizers MicroCompactSummarizerSource, memoSource SectionSource) Builder { + return newDefaultBuilder(policies, summarizers, memoSource) } // Build assembles the provider-facing context for the current round. @@ -83,7 +95,7 @@ func (b *DefaultBuilder) Build(ctx context.Context, input BuildInput) (BuildResu return BuildResult{ SystemPrompt: composeSystemPrompt(sections...), - Messages: applyReadTimeContextProjection(trimPolicy.Trim(input.Messages, input.Compact), input.TaskState, input.Compact, b.microCompactPolicies), + Messages: applyReadTimeContextProjection(trimPolicy.Trim(input.Messages, input.Compact), input.TaskState, input.Compact, b.microCompactPolicies, b.microCompactSummarizers), AutoCompactSuggested: shouldAutoCompact, }, nil } @@ -94,12 +106,13 @@ func applyReadTimeContextProjection( taskState agentsession.TaskState, options CompactOptions, policies MicroCompactPolicySource, + summarizers MicroCompactSummarizerSource, ) []providertypes.Message { - var projected []providertypes.Message if options.DisableMicroCompact || !taskState.Established() { - projected = cloneContextMessages(messages) + return ProjectToolMessagesForModel(cloneContextMessages(messages)) } else { - projected = microCompactMessagesWithPolicies(messages, policies, options.MicroCompactRetainedToolSpans) + return ProjectToolMessagesForModel( + microCompactMessagesWithPolicies(messages, policies, options.MicroCompactRetainedToolSpans, summarizers), + ) } - return ProjectToolMessagesForModel(projected) } diff --git a/internal/context/builder_test.go b/internal/context/builder_test.go index 8b7b7e05..c24d91ec 100644 --- a/internal/context/builder_test.go +++ b/internal/context/builder_test.go @@ -279,6 +279,61 @@ func TestDefaultBuilderBuildAppliesMicroCompactAfterTrim(t *testing.T) { } } +func TestNewBuilderWithToolPoliciesAndSummarizers(t *testing.T) { + t.Parallel() + + builder := NewBuilderWithToolPoliciesAndSummarizers( + nil, + stubMicroCompactSummarizerSource{ + "filesystem_read_file": func(content string, metadata map[string]string, isError bool) string { + return "[summary] read_file" + }, + }, + ) + + messages := []providertypes.Message{ + {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("older user")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-1", Name: "filesystem_read_file", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-1", Parts: []providertypes.ContentPart{providertypes.NewTextPart("old read result")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-2", Name: "bash", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-2", Parts: []providertypes.ContentPart{providertypes.NewTextPart("recent bash result")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-3", Name: "webfetch", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-3", Parts: []providertypes.ContentPart{providertypes.NewTextPart("latest webfetch result")}}, + {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("latest explicit instruction")}}, + } + + got, err := builder.Build(stdcontext.Background(), BuildInput{ + Messages: messages, + TaskState: agentsession.TaskState{Goal: "keep implementing task"}, + Metadata: testMetadata(t.TempDir()), + }) + if err != nil { + t.Fatalf("Build() error = %v", err) + } + const summarizedMessageIndex = 2 + if renderDisplayParts(got.Messages[summarizedMessageIndex].Parts) != "[summary] read_file" { + t.Fatalf( + "expected summarized older read result, got %q", + renderDisplayParts(got.Messages[summarizedMessageIndex].Parts), + ) + } +} + func TestDefaultBuilderBuildSkipsMicroCompactWithoutEstablishedTaskState(t *testing.T) { t.Parallel() diff --git a/internal/context/microcompact.go b/internal/context/microcompact.go index 6627d459..37993eb2 100644 --- a/internal/context/microcompact.go +++ b/internal/context/microcompact.go @@ -13,15 +13,17 @@ const ( microCompactClearedMessage = "[Old tool result content cleared]" // defaultMicroCompactRetainedToolSpans 定义 micro compact 默认保留原始内容的最近可压缩工具块数量。 defaultMicroCompactRetainedToolSpans = 2 + // microCompactSummaryMaxRunes 是摘要回灌到上下文前允许的最大 rune 数量。 + microCompactSummaryMaxRunes = 200 ) -// microCompactMessages 对裁剪后的消息做只读投影式微压缩,仅清理旧工具结果内容。 +// microCompactMessages 对裁剪后的消息做只读投影式微压缩,优先摘要旧工具结果,失败时回退清理占位。 func microCompactMessages(messages []providertypes.Message) []providertypes.Message { - return microCompactMessagesWithPolicies(messages, nil, 0) + return microCompactMessagesWithPolicies(messages, nil, 0, nil) } // microCompactMessagesWithPolicies 按工具策略对裁剪后的消息做只读投影式微压缩。 -func microCompactMessagesWithPolicies(messages []providertypes.Message, policies MicroCompactPolicySource, retainedToolSpans int) []providertypes.Message { +func microCompactMessagesWithPolicies(messages []providertypes.Message, policies MicroCompactPolicySource, retainedToolSpans int, summarizers MicroCompactSummarizerSource) []providertypes.Message { if retainedToolSpans <= 0 { retainedToolSpans = defaultMicroCompactRetainedToolSpans } @@ -44,22 +46,29 @@ func microCompactMessagesWithPolicies(messages []providertypes.Message, policies continue } - compactableIDs := compactableToolCallIDs(cloned[span.Start].ToolCalls, policies) + compactableIDs, toolNames := compactableToolCallIDs(cloned[span.Start].ToolCalls, policies) if len(compactableIDs) == 0 { continue } - if !hasCompactableToolContent(cloned, span, compactableIDs) { + if retainedCompactableSpans < retainedToolSpans { + if hasCompactableToolMessage(cloned, span, compactableIDs) { + retainedCompactableSpans++ + } continue } - if retainedCompactableSpans < retainedToolSpans { - retainedCompactableSpans++ + + compactableContents := compactableToolMessageContents(cloned, span, compactableIDs) + if len(compactableContents) == 0 { continue } for messageIndex := span.Start + 1; messageIndex < span.End; messageIndex++ { - if shouldClearToolMessage(cloned[messageIndex], compactableIDs) { - cloned[messageIndex].Parts = []providertypes.ContentPart{providertypes.NewTextPart(microCompactClearedMessage)} + content, ok := compactableContents[messageIndex] + if !ok { + continue } + summary := summarizeOrClear(cloned[messageIndex], content, toolNames, summarizers) + cloned[messageIndex].Parts = []providertypes.ContentPart{providertypes.NewTextPart(summary)} } } @@ -96,13 +105,14 @@ func isToolCallSpan(messages []providertypes.Message, span internalcompact.Messa return message.Role == providertypes.RoleAssistant && len(message.ToolCalls) > 0 } -// compactableToolCallIDs 返回 assistant tool call 中可参与微压缩的调用 ID 集合。 -func compactableToolCallIDs(calls []providertypes.ToolCall, policies MicroCompactPolicySource) map[string]struct{} { +// compactableToolCallIDs 返回 assistant tool call 中可参与微压缩的调用 ID 集合及对应的工具名映射。 +func compactableToolCallIDs(calls []providertypes.ToolCall, policies MicroCompactPolicySource) (map[string]struct{}, map[string]string) { if len(calls) == 0 { - return nil + return nil, nil } ids := make(map[string]struct{}, len(calls)) + toolNames := make(map[string]string, len(calls)) for _, call := range calls { toolName := strings.TrimSpace(call.Name) if !toolParticipatesInMicroCompact(toolName, policies) { @@ -113,11 +123,12 @@ func compactableToolCallIDs(calls []providertypes.ToolCall, policies MicroCompac continue } ids[callID] = struct{}{} + toolNames[callID] = toolName } if len(ids) == 0 { - return nil + return nil, nil } - return ids + return ids, toolNames } // toolParticipatesInMicroCompact 判断工具是否应参与 micro compact;未知工具默认视为可压缩。 @@ -128,28 +139,114 @@ func toolParticipatesInMicroCompact(toolName string, policies MicroCompactPolicy return policies.MicroCompactPolicy(toolName) != tools.MicroCompactPolicyPreserveHistory } -// hasCompactableToolContent 判断工具块中是否存在会影响保留预算的有效工具结果内容。 -func hasCompactableToolContent(messages []providertypes.Message, span internalcompact.MessageSpan, compactableIDs map[string]struct{}) bool { +// compactableToolMessageContents 收集工具块中可压缩消息的渲染内容,避免重复渲染。 +func compactableToolMessageContents(messages []providertypes.Message, span internalcompact.MessageSpan, compactableIDs map[string]struct{}) map[int]string { + var contents map[int]string for messageIndex := span.Start + 1; messageIndex < span.End; messageIndex++ { - if shouldClearToolMessage(messages[messageIndex], compactableIDs) { + content, ok := compactableToolMessageContent(messages[messageIndex], compactableIDs) + if !ok { + continue + } + if contents == nil { + contents = make(map[int]string) + } + contents[messageIndex] = content + } + return contents +} + +// hasCompactableToolMessage 判断工具块中是否存在至少一条可压缩的工具消息。 +func hasCompactableToolMessage(messages []providertypes.Message, span internalcompact.MessageSpan, compactableIDs map[string]struct{}) bool { + for messageIndex := span.Start + 1; messageIndex < span.End; messageIndex++ { + if _, ok := compactableToolMessageContent(messages[messageIndex], compactableIDs); ok { return true } } return false } -// shouldClearToolMessage 判断一条 tool 消息是否满足旧结果清理条件。 -func shouldClearToolMessage(message providertypes.Message, compactableIDs map[string]struct{}) bool { +// compactableToolMessageContent 判断 tool 消息是否可压缩,并返回渲染后的内容文本。 +func compactableToolMessageContent(message providertypes.Message, compactableIDs map[string]struct{}) (string, bool) { if message.Role != providertypes.RoleTool || message.IsError { - return false - } - if compactableIDs == nil { - return false + return "", false } - if _, ok := compactableIDs[strings.TrimSpace(message.ToolCallID)]; !ok { - return false + callID := strings.TrimSpace(message.ToolCallID) + if _, ok := compactableIDs[callID]; !ok { + return "", false } content := strings.TrimSpace(renderDisplayParts(message.Parts)) - return content != "" && content != microCompactClearedMessage + if content == "" || content == microCompactClearedMessage { + return "", false + } + return content, true +} + +// summarizeOrClear 为单条可压缩工具消息生成摘要或回退到默认清除占位。 +func summarizeOrClear( + message providertypes.Message, + content string, + toolNames map[string]string, + summarizers MicroCompactSummarizerSource, +) string { + if summarizers == nil { + return microCompactClearedMessage + } + + callID := strings.TrimSpace(message.ToolCallID) + toolName, ok := toolNames[callID] + if !ok { + return microCompactClearedMessage + } + + summarizer := summarizers.MicroCompactSummarizer(toolName) + if summarizer == nil { + return microCompactClearedMessage + } + + summary := summarizer(content, message.ToolMetadata, message.IsError) + if summary == "" { + return microCompactClearedMessage + } + summary = sanitizeMicroCompactSummary(summary) + if summary == "" { + return microCompactClearedMessage + } + return summary +} + +// sanitizeMicroCompactSummary 对 summarizer 输出做最终净化与限长,避免把不安全文本直接回灌上下文。 +func sanitizeMicroCompactSummary(summary string) string { + trimmed := strings.TrimSpace(summary) + if trimmed == "" { + return "" + } + + var b strings.Builder + b.Grow(len(trimmed)) + for _, r := range trimmed { + if r < 32 || r == 127 { + continue + } + b.WriteRune(r) + } + + clean := strings.TrimSpace(b.String()) + if clean == "" { + return "" + } + return truncateSummaryRunes(clean, microCompactSummaryMaxRunes) +} + +// truncateSummaryRunes 按 rune 数量截断摘要,超限时追加 "..."。 +func truncateSummaryRunes(summary string, maxRunes int) string { + if maxRunes <= 0 || summary == "" { + return summary + } + + runes := []rune(summary) + if len(runes) <= maxRunes { + return summary + } + return string(runes[:maxRunes]) + "..." } diff --git a/internal/context/microcompact_summarizer_test.go b/internal/context/microcompact_summarizer_test.go new file mode 100644 index 00000000..12ffc2e9 --- /dev/null +++ b/internal/context/microcompact_summarizer_test.go @@ -0,0 +1,379 @@ +package context + +import ( + "strings" + "testing" + "unicode/utf8" + + "neo-code/internal/context/internalcompact" + providertypes "neo-code/internal/provider/types" + "neo-code/internal/tools" +) + +// stubMicroCompactSummarizerSource 实现 MicroCompactSummarizerSource,用于测试。 +type stubMicroCompactSummarizerSource map[string]tools.ContentSummarizer + +func (s stubMicroCompactSummarizerSource) MicroCompactSummarizer(name string) tools.ContentSummarizer { + return s[name] +} + +// TestMicroCompactWithSummarizerProducesSummary 验证注册 summarizer 的工具生成摘要而非清除占位。 +func TestMicroCompactWithSummarizerProducesSummary(t *testing.T) { + t.Parallel() + + bashSummarizer := func(content string, metadata map[string]string, isError bool) string { + return "[summary] bash: " + content + } + + messages := []providertypes.Message{ + {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("older user")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-1", Name: "bash", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-1", Parts: []providertypes.ContentPart{providertypes.NewTextPart("old bash result")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-2", Name: "bash", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-2", Parts: []providertypes.ContentPart{providertypes.NewTextPart("recent bash result")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-3", Name: "bash", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-3", Parts: []providertypes.ContentPart{providertypes.NewTextPart("latest bash result")}}, + {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("latest explicit instruction")}}, + {Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("current reply")}}, + } + + got := microCompactMessagesWithPolicies( + messages, + stubMicroCompactPolicySource{}, + 0, + stubMicroCompactSummarizerSource{"bash": bashSummarizer}, + ) + + if renderDisplayParts(got[2].Parts) == microCompactClearedMessage { + t.Fatalf("expected summarized content for old bash result, got cleared placeholder") + } + if !strings.Contains(renderDisplayParts(got[2].Parts), "[summary] bash:") { + t.Fatalf("expected summary prefix, got %q", renderDisplayParts(got[2].Parts)) + } + if renderDisplayParts(got[4].Parts) != "recent bash result" { + t.Fatalf("expected recent bash result retained, got %q", renderDisplayParts(got[4].Parts)) + } + if renderDisplayParts(got[6].Parts) != "latest bash result" { + t.Fatalf("expected latest bash result retained, got %q", renderDisplayParts(got[6].Parts)) + } + // 原始切片不被修改 + if renderDisplayParts(messages[2].Parts) != "old bash result" { + t.Fatalf("expected original slice unchanged, got %q", renderDisplayParts(messages[2].Parts)) + } +} + +// TestMicroCompactWithoutSummarizerFallsBackToClear 验证未注册 summarizer 的工具仍使用清除占位。 +func TestMicroCompactWithoutSummarizerFallsBackToClear(t *testing.T) { + t.Parallel() + + messages := []providertypes.Message{ + {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("older user")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-1", Name: "filesystem_read_file", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-1", Parts: []providertypes.ContentPart{providertypes.NewTextPart("old read result")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-2", Name: "bash", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-2", Parts: []providertypes.ContentPart{providertypes.NewTextPart("recent bash result")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-3", Name: "bash", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-3", Parts: []providertypes.ContentPart{providertypes.NewTextPart("latest bash result")}}, + {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("latest explicit instruction")}}, + } + + // 只为 bash 注册 summarizer,read_file 没有 + got := microCompactMessagesWithPolicies( + messages, + stubMicroCompactPolicySource{}, + 0, + stubMicroCompactSummarizerSource{ + "bash": func(content string, metadata map[string]string, isError bool) string { + return "[summary] bash: " + content + }, + }, + ) + + // read_file 没有 summarizer,应回退到清除 + if renderDisplayParts(got[2].Parts) != microCompactClearedMessage { + t.Fatalf("expected cleared placeholder for read_file without summarizer, got %q", renderDisplayParts(got[2].Parts)) + } +} + +// TestMicroCompactMixedSpanWithSummarizer 验证混合工具 span 中部分有摘要、部分清除。 +func TestMicroCompactMixedSpanWithSummarizer(t *testing.T) { + t.Parallel() + + messages := []providertypes.Message{ + {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("older user")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-1", Name: "bash", Arguments: "{}"}, + {ID: "call-2", Name: "filesystem_read_file", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-1", Parts: []providertypes.ContentPart{providertypes.NewTextPart("bash output")}}, + {Role: providertypes.RoleTool, ToolCallID: "call-2", Parts: []providertypes.ContentPart{providertypes.NewTextPart("read output")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-3", Name: "bash", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-3", Parts: []providertypes.ContentPart{providertypes.NewTextPart("recent bash")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-4", Name: "bash", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-4", Parts: []providertypes.ContentPart{providertypes.NewTextPart("latest bash")}}, + {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("latest explicit instruction")}}, + {Role: providertypes.RoleAssistant, Parts: []providertypes.ContentPart{providertypes.NewTextPart("reply")}}, + } + + got := microCompactMessagesWithPolicies( + messages, + stubMicroCompactPolicySource{}, + 0, + stubMicroCompactSummarizerSource{ + "bash": func(content string, metadata map[string]string, isError bool) string { + return "[summary] " + content + }, + }, + ) + + // call-1 bash 在旧 span,有 summarizer,应生成摘要 + if !strings.Contains(renderDisplayParts(got[2].Parts), "[summary]") { + t.Fatalf("expected bash summary in old span, got %q", renderDisplayParts(got[2].Parts)) + } + // call-2 read_file 在旧 span,没有 summarizer,应清除 + if renderDisplayParts(got[3].Parts) != microCompactClearedMessage { + t.Fatalf("expected read_file cleared in old span, got %q", renderDisplayParts(got[3].Parts)) + } +} + +// TestMicroCompactSummarizerReturnsEmptyFallsBackToClear 验证 summarizer 返回空字符串时回退到清除。 +func TestMicroCompactSummarizerReturnsEmptyFallsBackToClear(t *testing.T) { + t.Parallel() + + messages := []providertypes.Message{ + {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("older user")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-1", Name: "bash", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-1", Parts: []providertypes.ContentPart{providertypes.NewTextPart("old result")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-2", Name: "bash", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-2", Parts: []providertypes.ContentPart{providertypes.NewTextPart("middle result")}}, + { + Role: providertypes.RoleAssistant, + ToolCalls: []providertypes.ToolCall{ + {ID: "call-3", Name: "bash", Arguments: "{}"}, + }, + }, + {Role: providertypes.RoleTool, ToolCallID: "call-3", Parts: []providertypes.ContentPart{providertypes.NewTextPart("recent result")}}, + {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("latest explicit instruction")}}, + } + + got := microCompactMessagesWithPolicies( + messages, + stubMicroCompactPolicySource{}, + 0, + stubMicroCompactSummarizerSource{ + "bash": func(content string, metadata map[string]string, isError bool) string { + return "" // 返回空 + }, + }, + ) + + if renderDisplayParts(got[2].Parts) != microCompactClearedMessage { + t.Fatalf("expected cleared fallback when summarizer returns empty, got %q", renderDisplayParts(got[2].Parts)) + } +} + +// TestSummarizeOrClearWithNilSummarizers 验证 nil summarizers 回退到清除。 +func TestSummarizeOrClearWithNilSummarizers(t *testing.T) { + t.Parallel() + + got := summarizeOrClear( + providertypes.Message{Parts: []providertypes.ContentPart{providertypes.NewTextPart("test")}}, + "test", + nil, + nil, + ) + if got != microCompactClearedMessage { + t.Fatalf("expected cleared message for nil summarizers, got %q", got) + } +} + +// TestSummarizeOrClearWithToolNamesLookup 验证 toolNames map 查找工具名。 +func TestSummarizeOrClearWithToolNamesLookup(t *testing.T) { + t.Parallel() + + t.Run("found", func(t *testing.T) { + toolNames := map[string]string{"call-2": "filesystem_read_file"} + got := summarizeOrClear( + providertypes.Message{ToolCallID: "call-2", Parts: []providertypes.ContentPart{providertypes.NewTextPart("content")}}, + "content", + toolNames, + stubMicroCompactSummarizerSource{ + "filesystem_read_file": func(content string, metadata map[string]string, isError bool) string { + return "[summary] " + content + }, + }, + ) + if !strings.Contains(got, "[summary]") { + t.Fatalf("expected summary, got %q", got) + } + }) + + t.Run("not_found_in_tool_names", func(t *testing.T) { + toolNames := map[string]string{"call-1": "bash"} + got := summarizeOrClear( + providertypes.Message{ToolCallID: "unknown-id", Parts: []providertypes.ContentPart{providertypes.NewTextPart("content")}}, + "content", + toolNames, + stubMicroCompactSummarizerSource{}, + ) + if got != microCompactClearedMessage { + t.Fatalf("expected cleared for unknown tool call id, got %q", got) + } + }) +} + +// TestSummarizeOrClearSanitizesSummary 验证摘要回灌前会执行控制字符净化与长度裁剪。 +func TestSummarizeOrClearSanitizesSummary(t *testing.T) { + t.Parallel() + + raw := strings.Repeat("x", microCompactSummaryMaxRunes+50) + "\n\t\x07" + got := summarizeOrClear( + providertypes.Message{ToolCallID: "call-1"}, + "ignored", + map[string]string{"call-1": "bash"}, + stubMicroCompactSummarizerSource{ + "bash": func(content string, metadata map[string]string, isError bool) string { + return raw + }, + }, + ) + + if strings.ContainsAny(got, "\n\t\a") { + t.Fatalf("expected control characters removed, got %q", got) + } + if utf8.RuneCountInString(got) > microCompactSummaryMaxRunes+3 { + t.Fatalf("expected summary capped, got %d runes", utf8.RuneCountInString(got)) + } + if !strings.HasSuffix(got, "...") { + t.Fatalf("expected truncated summary suffix, got %q", got) + } +} + +// TestSummarizeOrClearSanitizationEmptyFallback 验证净化后为空时会回退清理占位。 +func TestSummarizeOrClearSanitizationEmptyFallback(t *testing.T) { + t.Parallel() + + got := summarizeOrClear( + providertypes.Message{ToolCallID: "call-1"}, + "ignored", + map[string]string{"call-1": "bash"}, + stubMicroCompactSummarizerSource{ + "bash": func(content string, metadata map[string]string, isError bool) string { + return "\n\t\x07 " + }, + }, + ) + + if got != microCompactClearedMessage { + t.Fatalf("expected cleared fallback when sanitized summary is empty, got %q", got) + } +} + +// TestIsToolCallSpanBoundaries 验证 span 边界异常时返回 false。 +func TestIsToolCallSpanBoundaries(t *testing.T) { + t.Parallel() + + messages := []providertypes.Message{ + {Role: providertypes.RoleAssistant, ToolCalls: []providertypes.ToolCall{{ID: "c1", Name: "bash"}}}, + } + + if isToolCallSpan(messages, internalcompact.MessageSpan{Start: -1, End: 0}) { + t.Fatal("expected false for negative start") + } + if isToolCallSpan(messages, internalcompact.MessageSpan{Start: 2, End: 3}) { + t.Fatal("expected false for out-of-range start") + } +} + +// TestCompactableToolCallIDsEmptyInput 验证空 tool call 输入时返回 nil。 +func TestCompactableToolCallIDsEmptyInput(t *testing.T) { + t.Parallel() + + ids, names := compactableToolCallIDs(nil, nil) + if ids != nil || names != nil { + t.Fatalf("expected nil maps for empty input, got ids=%v names=%v", ids, names) + } +} + +// TestHasCompactableToolMessage 验证工具块可压缩消息探测逻辑。 +func TestHasCompactableToolMessage(t *testing.T) { + t.Parallel() + + span := internalcompact.MessageSpan{Start: 0, End: 3} + ids := map[string]struct{}{"call-1": {}} + + t.Run("true_when_matching_tool_message_exists", func(t *testing.T) { + messages := []providertypes.Message{ + {Role: providertypes.RoleAssistant, ToolCalls: []providertypes.ToolCall{{ID: "call-1", Name: "bash"}}}, + {Role: providertypes.RoleTool, ToolCallID: "call-1", Parts: []providertypes.ContentPart{providertypes.NewTextPart("output")}}, + {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("u")}}, + } + if !hasCompactableToolMessage(messages, span, ids) { + t.Fatal("expected compactable tool message to be found") + } + }) + + t.Run("false_when_tool_messages_are_not_compactable", func(t *testing.T) { + messages := []providertypes.Message{ + {Role: providertypes.RoleAssistant, ToolCalls: []providertypes.ToolCall{{ID: "call-1", Name: "bash"}}}, + {Role: providertypes.RoleTool, ToolCallID: "call-1", IsError: true, Parts: []providertypes.ContentPart{providertypes.NewTextPart("error")}}, + {Role: providertypes.RoleTool, ToolCallID: "call-2", Parts: []providertypes.ContentPart{providertypes.NewTextPart("other")}}, + } + if hasCompactableToolMessage(messages, span, ids) { + t.Fatal("expected no compactable tool message") + } + }) +} diff --git a/internal/context/microcompact_test.go b/internal/context/microcompact_test.go index 6077100a..0264cb8d 100644 --- a/internal/context/microcompact_test.go +++ b/internal/context/microcompact_test.go @@ -79,7 +79,7 @@ func TestMicroCompactMessagesHandlesEmptyAndInvalidSpanInputs(t *testing.T) { }, }, } - got := microCompactMessagesWithPolicies(assistantOnly, stubMicroCompactPolicySource{}, 0) + got := microCompactMessagesWithPolicies(assistantOnly, stubMicroCompactPolicySource{}, 0, nil) if len(got) != 1 || len(got[0].ToolCalls) != 1 { t.Fatalf("expected invalid tool call id path to keep message untouched, got %+v", got) } @@ -173,7 +173,7 @@ func TestMicroCompactMessagesKeepsPreservedToolsErrorsAndOrphans(t *testing.T) { got := microCompactMessagesWithPolicies(messages, stubMicroCompactPolicySource{ "custom_tool": tools.MicroCompactPolicyPreserveHistory, - }, 0) + }, 0, nil) if renderDisplayParts(got[1].Parts) != "custom result" { t.Fatalf("expected preserved tool result to remain, got %q", renderDisplayParts(got[1].Parts)) } @@ -225,7 +225,7 @@ func TestMicroCompactMessagesClearsOnlyNonPreservedResultsInMixedToolSpan(t *tes got := microCompactMessagesWithPolicies(messages, stubMicroCompactPolicySource{ "custom_tool": tools.MicroCompactPolicyPreserveHistory, - }, 0) + }, 0, nil) if renderDisplayParts(got[2].Parts) != microCompactClearedMessage { t.Fatalf("expected default compactable tool result to be cleared, got %q", renderDisplayParts(got[2].Parts)) } @@ -266,7 +266,7 @@ func TestMicroCompactMessagesTreatsNewToolsAsCompactableByDefault(t *testing.T) {Role: providertypes.RoleUser, Parts: []providertypes.ContentPart{providertypes.NewTextPart("latest explicit instruction")}}, } - got := microCompactMessagesWithPolicies(messages, stubMicroCompactPolicySource{}, 0) + got := microCompactMessagesWithPolicies(messages, stubMicroCompactPolicySource{}, 0, nil) if renderDisplayParts(got[2].Parts) != microCompactClearedMessage { t.Fatalf("expected new tool result to be compacted by default, got %q", renderDisplayParts(got[2].Parts)) } @@ -341,7 +341,7 @@ func TestMicroCompactMessagesSkipsToolMessagesWhenCompactableIDsMissing(t *testi {Role: providertypes.RoleTool, ToolCallID: "orphan", Parts: []providertypes.ContentPart{providertypes.NewTextPart("orphan result")}}, } - got := microCompactMessagesWithPolicies(messages, stubMicroCompactPolicySource{}, 0) + got := microCompactMessagesWithPolicies(messages, stubMicroCompactPolicySource{}, 0, nil) if renderDisplayParts(got[0].Parts) != "orphan result" { t.Fatalf("expected orphan tool result to remain, got %q", renderDisplayParts(got[0].Parts)) } diff --git a/internal/context/types.go b/internal/context/types.go index 269fcf21..10877ad0 100644 --- a/internal/context/types.go +++ b/internal/context/types.go @@ -36,6 +36,11 @@ type MicroCompactPolicySource interface { MicroCompactPolicy(name string) tools.MicroCompactPolicy } +// MicroCompactSummarizerSource 定义 context 查找按工具内容摘要器的最小依赖。 +type MicroCompactSummarizerSource interface { + MicroCompactSummarizer(name string) tools.ContentSummarizer +} + // CompactOptions controls read-time compact behavior inside the context builder. type CompactOptions struct { DisableMicroCompact bool diff --git a/internal/runtime/runtime_remaining_branches_test.go b/internal/runtime/runtime_remaining_branches_test.go index 5e1fd701..cab8f1dd 100644 --- a/internal/runtime/runtime_remaining_branches_test.go +++ b/internal/runtime/runtime_remaining_branches_test.go @@ -35,6 +35,10 @@ func (m *callbackToolManager) MicroCompactPolicy(name string) tools.MicroCompact return tools.MicroCompactPolicyCompact } +func (m *callbackToolManager) MicroCompactSummarizer(name string) tools.ContentSummarizer { + return nil +} + func (m *callbackToolManager) Execute(ctx context.Context, input tools.ToolCallInput) (tools.ToolResult, error) { if m.executeFn != nil { return m.executeFn(ctx, input) diff --git a/internal/runtime/runtime_test.go b/internal/runtime/runtime_test.go index 2bc2f93e..d958e324 100644 --- a/internal/runtime/runtime_test.go +++ b/internal/runtime/runtime_test.go @@ -380,6 +380,10 @@ func (m *stubToolManager) MicroCompactPolicy(name string) tools.MicroCompactPoli return tools.MicroCompactPolicyCompact } +func (m *stubToolManager) MicroCompactSummarizer(name string) tools.ContentSummarizer { + return nil +} + func (m *stubToolManager) Execute(ctx context.Context, input tools.ToolCallInput) (tools.ToolResult, error) { m.mu.Lock() m.executeCalls++ diff --git a/internal/tools/manager.go b/internal/tools/manager.go index 424e68be..09334cad 100644 --- a/internal/tools/manager.go +++ b/internal/tools/manager.go @@ -24,6 +24,8 @@ type SpecListInput struct { type Manager interface { ListAvailableSpecs(ctx context.Context, input SpecListInput) ([]providertypes.ToolSpec, error) MicroCompactPolicy(name string) MicroCompactPolicy + MicroCompactSummarizer(name string) ContentSummarizer + // Execute 必须支持并发调用;runtime 可能在同一轮中并行调度多个工具调用。 Execute(ctx context.Context, input ToolCallInput) (ToolResult, error) RememberSessionDecision(sessionID string, action security.Action, scope SessionPermissionScope) error } @@ -39,6 +41,10 @@ type microCompactPolicyExecutor interface { MicroCompactPolicy(name string) MicroCompactPolicy } +type microCompactSummarizerExecutor interface { + MicroCompactSummarizer(name string) ContentSummarizer +} + // WorkspaceSandbox enforces workspace-oriented constraints before execution. type WorkspaceSandbox interface { Check(ctx context.Context, action security.Action) (*security.WorkspaceExecutionPlan, error) @@ -249,6 +255,17 @@ func (m *DefaultManager) MicroCompactPolicy(name string) MicroCompactPolicy { return MicroCompactPolicyCompact } +// MicroCompactSummarizer 返回工具的内容摘要器;未注册时返回 nil。 +func (m *DefaultManager) MicroCompactSummarizer(name string) ContentSummarizer { + if m == nil || m.executor == nil { + return nil + } + if source, ok := m.executor.(microCompactSummarizerExecutor); ok { + return source.MicroCompactSummarizer(name) + } + return nil +} + // Execute runs the tool if the permission engine allows it and the sandbox // check passes. func (m *DefaultManager) Execute(ctx context.Context, input ToolCallInput) (ToolResult, error) { diff --git a/internal/tools/manager_test.go b/internal/tools/manager_test.go index 1645d83c..bdfa5263 100644 --- a/internal/tools/manager_test.go +++ b/internal/tools/manager_test.go @@ -47,20 +47,20 @@ type stubSandbox struct { lastAction security.Action } -type executorWithoutMicroCompactPolicy struct{} +type executorWithoutOptionalCompactFeatures struct{} -func (executorWithoutMicroCompactPolicy) ListAvailableSpecs(ctx context.Context, input SpecListInput) ([]providertypes.ToolSpec, error) { +func (executorWithoutOptionalCompactFeatures) ListAvailableSpecs(ctx context.Context, input SpecListInput) ([]providertypes.ToolSpec, error) { if err := ctx.Err(); err != nil { return nil, err } return nil, nil } -func (executorWithoutMicroCompactPolicy) Execute(ctx context.Context, call ToolCallInput) (ToolResult, error) { +func (executorWithoutOptionalCompactFeatures) Execute(ctx context.Context, call ToolCallInput) (ToolResult, error) { return ToolResult{}, ctx.Err() } -func (executorWithoutMicroCompactPolicy) Supports(name string) bool { return false } +func (executorWithoutOptionalCompactFeatures) Supports(name string) bool { return false } func (s *stubSandbox) Check(ctx context.Context, action security.Action) (*security.WorkspaceExecutionPlan, error) { s.callCount++ @@ -105,7 +105,7 @@ func TestDefaultManagerMicroCompactPolicy(t *testing.T) { t.Run("executor without policy support defaults to compact", func(t *testing.T) { t.Parallel() - manager, err := NewManager(executorWithoutMicroCompactPolicy{}, nil, nil) + manager, err := NewManager(executorWithoutOptionalCompactFeatures{}, nil, nil) if err != nil { t.Fatalf("new manager: %v", err) } @@ -130,6 +130,53 @@ func TestDefaultManagerMicroCompactPolicy(t *testing.T) { }) } +func TestDefaultManagerMicroCompactSummarizer(t *testing.T) { + t.Parallel() + + t.Run("nil manager returns nil", func(t *testing.T) { + t.Parallel() + + var manager *DefaultManager + if got := manager.MicroCompactSummarizer("custom_tool"); got != nil { + t.Fatalf("expected nil summarizer, got non-nil") + } + }) + + t.Run("executor without summarizer support returns nil", func(t *testing.T) { + t.Parallel() + + manager, err := NewManager(executorWithoutOptionalCompactFeatures{}, nil, nil) + if err != nil { + t.Fatalf("new manager: %v", err) + } + if got := manager.MicroCompactSummarizer("custom_tool"); got != nil { + t.Fatalf("expected nil summarizer, got non-nil") + } + }) + + t.Run("executor summarizer is forwarded", func(t *testing.T) { + t.Parallel() + + registry := NewRegistry() + registry.RegisterSummarizer("custom_tool", func(content string, metadata map[string]string, isError bool) string { + return "summary:" + content + }) + + manager, err := NewManager(registry, nil, nil) + if err != nil { + t.Fatalf("new manager: %v", err) + } + + summarizer := manager.MicroCompactSummarizer("CUSTOM_TOOL") + if summarizer == nil { + t.Fatal("expected non-nil summarizer") + } + if got := summarizer("content", nil, false); got != "summary:content" { + t.Fatalf("unexpected summary output: %q", got) + } + }) +} + func TestDefaultManagerListAvailableSpecsBoundaries(t *testing.T) { t.Parallel() diff --git a/internal/tools/micro_compact_summarizer.go b/internal/tools/micro_compact_summarizer.go new file mode 100644 index 00000000..ad5a8274 --- /dev/null +++ b/internal/tools/micro_compact_summarizer.go @@ -0,0 +1,6 @@ +package tools + +// ContentSummarizer 将工具结果内容压缩为短摘要,用于 micro-compact 替换旧工具输出。 +// content 和 metadata 来自持久化后的 Message 字段,isError 标识原始工具是否报错。 +// 返回空字符串表示"无摘要,回退到默认清除行为"。 +type ContentSummarizer func(content string, metadata map[string]string, isError bool) string diff --git a/internal/tools/micro_compact_summarizer_test.go b/internal/tools/micro_compact_summarizer_test.go new file mode 100644 index 00000000..d01901e6 --- /dev/null +++ b/internal/tools/micro_compact_summarizer_test.go @@ -0,0 +1,460 @@ +package tools + +import ( + "strings" + "sync" + "testing" + "unicode/utf8" +) + +// stubMetadata 快速构建测试用 metadata map。 +func stubMetadata(keyValue ...string) map[string]string { + m := make(map[string]string, len(keyValue)/2) + for i := 0; i+1 < len(keyValue); i += 2 { + m[keyValue[i]] = keyValue[i+1] + } + return m +} + +func assertContains(t *testing.T, got, expected string) { + t.Helper() + if !strings.Contains(got, expected) { + t.Fatalf("expected %q in summary, got %q", expected, got) + } +} + +func assertMaxRuneCount(t *testing.T, got string, max int) { + t.Helper() + if utf8.RuneCountInString(got) > max { + t.Fatalf("summary exceeds %d runes: %d", max, utf8.RuneCountInString(got)) + } +} + +func assertEmptySummary(t *testing.T, got string) { + t.Helper() + if got != "" { + t.Fatalf("expected empty string, got %q", got) + } +} + +func TestBashSummarizer(t *testing.T) { + t.Parallel() + + t.Run("normal_output", func(t *testing.T) { + content := "line1\nline2\nline3\nline4\nline5\nline6\nline7\nline8" + meta := stubMetadata("workdir", "/home/user/project") + got := bashSummarizer(content, meta, false) + assertContains(t, got, "[exit=0]") + assertContains(t, got, "workdir=/home/user/project") + assertContains(t, got, "lines=8") + assertContains(t, got, "chars=") + assertMaxRuneCount(t, got, summaryMaxRunes) + }) + + t.Run("error_output", func(t *testing.T) { + content := "error: command not found" + meta := stubMetadata("workdir", "/tmp") + got := bashSummarizer(content, meta, true) + assertContains(t, got, "[exit=non-zero]") + }) + + t.Run("short_output", func(t *testing.T) { + content := "ok" + got := bashSummarizer(content, nil, false) + assertContains(t, got, "lines=1") + }) + + t.Run("empty_content", func(t *testing.T) { + got := bashSummarizer("", nil, false) + assertContains(t, got, "[exit=0]") + }) + + t.Run("sanitizes_workdir_metadata", func(t *testing.T) { + meta := stubMetadata("workdir", " \n\t/tmp/proj\x07 ") + got := bashSummarizer("ok", meta, false) + if strings.ContainsAny(got, "\n\t\a") { + t.Fatalf("expected sanitized workdir without control characters, got %q", got) + } + assertContains(t, got, "workdir=/tmp/proj") + }) +} + +func TestReadFileSummarizer(t *testing.T) { + t.Parallel() + + t.Run("normal_file", func(t *testing.T) { + content := "package main\n\nfunc main() {\n\tfmt.Println(\"hello\")\n}\n" + meta := stubMetadata("path", "/home/user/main.go") + got := readFileSummarizer(content, meta, false) + assertContains(t, got, "/home/user/main.go") + assertContains(t, got, "lines=5") + assertContains(t, got, "chars=") + assertMaxRuneCount(t, got, summaryMaxRunes) + }) + + t.Run("trailing_newline_not_counted_as_extra_line", func(t *testing.T) { + content := "a\nb\n" + meta := stubMetadata("path", "/tmp/a.txt") + got := readFileSummarizer(content, meta, false) + assertContains(t, got, "lines=2") + }) + + t.Run("empty_lines_are_counted", func(t *testing.T) { + content := "\n\n" + meta := stubMetadata("path", "/tmp/empty.txt") + got := readFileSummarizer(content, meta, false) + assertContains(t, got, "lines=2") + }) + + t.Run("missing_path", func(t *testing.T) { + got := readFileSummarizer("content", nil, false) + assertEmptySummary(t, got) + }) + + t.Run("sanitizes_path_metadata", func(t *testing.T) { + content := "line1\nline2" + meta := stubMetadata("path", " \n\t/tmp/a.go\x07 ") + got := readFileSummarizer(content, meta, false) + if strings.ContainsAny(got, "\n\t\a") { + t.Fatalf("expected sanitized path without control characters, got %q", got) + } + assertContains(t, got, "/tmp/a.go") + }) +} + +func TestWriteFileSummarizer(t *testing.T) { + t.Parallel() + + t.Run("normal", func(t *testing.T) { + meta := stubMetadata("path", "/home/user/test.go", "bytes", "1024") + got := writeFileSummarizer("", meta, false) + assertContains(t, got, "/home/user/test.go") + assertContains(t, got, "1024 bytes") + assertMaxRuneCount(t, got, summaryMaxRunes) + }) + + t.Run("missing_path", func(t *testing.T) { + got := writeFileSummarizer("", stubMetadata("bytes", "100"), false) + assertEmptySummary(t, got) + }) + + t.Run("sanitizes_path_metadata", func(t *testing.T) { + meta := stubMetadata("path", " \n\t/tmp/out.go\x07 ", "bytes", "4") + got := writeFileSummarizer("", meta, false) + if strings.ContainsAny(got, "\n\t\a") { + t.Fatalf("expected sanitized path without control characters, got %q", got) + } + assertContains(t, got, "/tmp/out.go") + }) +} + +func TestEditSummarizer(t *testing.T) { + t.Parallel() + + t.Run("with_relative_path", func(t *testing.T) { + meta := stubMetadata("relative_path", "src/main.go", "path", "/abs/src/main.go", "search_length", "50", "replacement_length", "60") + got := editSummarizer("", meta, false) + assertContains(t, got, "src/main.go") + assertContains(t, got, "search=50") + assertMaxRuneCount(t, got, summaryMaxRunes) + }) + + t.Run("fallback_to_abs_path", func(t *testing.T) { + meta := stubMetadata("path", "/abs/src/main.go", "search_length", "10", "replacement_length", "20") + got := editSummarizer("", meta, false) + assertContains(t, got, "/abs/src/main.go") + }) + + t.Run("missing_path", func(t *testing.T) { + got := editSummarizer("", stubMetadata("search_length", "10"), false) + assertEmptySummary(t, got) + }) + + t.Run("sanitizes_path_metadata", func(t *testing.T) { + meta := stubMetadata("relative_path", " \n\tsrc/main.go\x07 ", "search_length", "10", "replacement_length", "12") + got := editSummarizer("", meta, false) + if strings.ContainsAny(got, "\n\t\a") { + t.Fatalf("expected sanitized path without control characters, got %q", got) + } + assertContains(t, got, "src/main.go") + }) + + t.Run("long_path_is_truncated", func(t *testing.T) { + longPath := strings.Repeat("abcdef/", 80) + "main.go" + meta := stubMetadata("path", longPath, "search_length", "10", "replacement_length", "20") + got := editSummarizer("", meta, false) + assertMaxRuneCount(t, got, summaryMaxRunes+3) + }) +} + +func TestGrepSummarizer(t *testing.T) { + t.Parallel() + + t.Run("with_matches", func(t *testing.T) { + content := "src/a.go:10:match1\nsrc/b.go:20:match2\nsrc/c.go:30:match3\nsrc/d.go:40:match4" + meta := stubMetadata("root", "/home/user", "matched_files", "4", "matched_lines", "4") + got := grepSummarizer(content, meta, false) + assertContains(t, got, "root=/home/user") + assertContains(t, got, "files=4") + assertMaxRuneCount(t, got, summaryMaxRunes) + }) + + t.Run("empty_content", func(t *testing.T) { + meta := stubMetadata("root", "/home", "matched_files", "0", "matched_lines", "0") + got := grepSummarizer("", meta, false) + assertContains(t, got, "files=0") + }) + + t.Run("sanitizes_root_metadata", func(t *testing.T) { + content := "a.go:1:x" + meta := stubMetadata("root", " \n\t/tmp/root\x07 ", "matched_files", "1", "matched_lines", "1") + got := grepSummarizer(content, meta, false) + if strings.ContainsAny(got, "\n\t\a") { + t.Fatalf("expected sanitized root without control characters, got %q", got) + } + assertContains(t, got, "root=/tmp/root") + }) + + t.Run("sanitizes_injected_filename", func(t *testing.T) { + content := "src/a.go\nignore:1:x\nsafe.go:2:y" + meta := stubMetadata("matched_files", "2", "matched_lines", "2") + got := grepSummarizer(content, meta, false) + if strings.Contains(got, "\n") || strings.Contains(got, "\t") { + t.Fatalf("expected sanitized summary without control characters, got %q", got) + } + assertContains(t, got, "matches=ignore, safe.go") + }) +} + +func TestGlobSummarizer(t *testing.T) { + t.Parallel() + + t.Run("with_files", func(t *testing.T) { + content := "src/a.go\nsrc/b.go\nsrc/c.go\nsrc/d.go" + meta := stubMetadata("count", "4") + got := globSummarizer(content, meta, false) + assertContains(t, got, "4 files") + assertMaxRuneCount(t, got, summaryMaxRunes) + }) + + t.Run("no_matches", func(t *testing.T) { + meta := stubMetadata("count", "0") + got := globSummarizer("", meta, false) + assertContains(t, got, "0 files") + }) + + t.Run("skips_blank_and_control_lines", func(t *testing.T) { + content := "\n\t\nsrc/a.go\nsrc/b.go\n" + meta := stubMetadata("count", "2") + got := globSummarizer(content, meta, false) + assertContains(t, got, "src/a.go, src/b.go") + if strings.Contains(got, "\n") || strings.Contains(got, "\t") { + t.Fatalf("expected sanitized preview, got %q", got) + } + }) +} + +func TestWebfetchSummarizer(t *testing.T) { + t.Parallel() + + t.Run("with_truncated_flag", func(t *testing.T) { + meta := stubMetadata("truncated", "true") + got := webfetchSummarizer("", meta, false) + assertContains(t, got, "truncated=true") + }) + + t.Run("minimal", func(t *testing.T) { + got := webfetchSummarizer("", nil, false) + assertContains(t, got, "[summary] webfetch") + }) +} + +func TestRegisterBuiltinSummarizers(t *testing.T) { + t.Parallel() + + registry := NewRegistry() + RegisterBuiltinSummarizers(registry) + + toolNames := []string{ + ToolNameBash, ToolNameFilesystemReadFile, ToolNameFilesystemWriteFile, + ToolNameFilesystemEdit, ToolNameFilesystemGrep, ToolNameFilesystemGlob, + ToolNameWebFetch, + } + for _, name := range toolNames { + if registry.MicroCompactSummarizer(name) == nil { + t.Errorf("expected summarizer for %q to be registered", name) + } + } + + // 不在注册列表中的工具应返回 nil + if registry.MicroCompactSummarizer("unknown_tool") != nil { + t.Fatal("expected nil for unknown tool") + } +} + +func TestRegisterBuiltinSummarizersNilRegistry(t *testing.T) { + t.Parallel() + RegisterBuiltinSummarizers(nil) +} + +func TestRegisterSummarizer(t *testing.T) { + t.Parallel() + + registry := NewRegistry() + + // 注册 + called := false + registry.RegisterSummarizer("test_tool", func(content string, metadata map[string]string, isError bool) string { + called = true + return "summary" + }) + + s := registry.MicroCompactSummarizer("test_tool") + if s == nil { + t.Fatal("expected summarizer to be registered") + } + result := s("content", nil, false) + if !called { + t.Fatal("expected summarizer to be called") + } + if result != "summary" { + t.Fatalf("expected 'summary', got %q", result) + } + + // 移除 + registry.RegisterSummarizer("test_tool", nil) + if registry.MicroCompactSummarizer("test_tool") != nil { + t.Fatal("expected nil after removal") + } +} + +func TestRegisterSummarizerNormalizesName(t *testing.T) { + t.Parallel() + + registry := NewRegistry() + registry.RegisterSummarizer(" Mixed_Tool ", func(content string, metadata map[string]string, isError bool) string { + return "ok" + }) + + if registry.MicroCompactSummarizer("mixed_tool") == nil { + t.Fatal("expected normalized summarizer lookup") + } + if registry.MicroCompactSummarizer(" MIXED_TOOL ") == nil { + t.Fatal("expected case-insensitive summarizer lookup") + } +} + +func TestRegisterSummarizerNilRegistry(t *testing.T) { + t.Parallel() + + var nilRegistry *Registry + nilRegistry.RegisterSummarizer("tool", func(content string, metadata map[string]string, isError bool) string { + return "ok" + }) + if nilRegistry.MicroCompactSummarizer("tool") != nil { + t.Fatal("expected nil summarizer on nil registry") + } +} + +func TestRegisterSummarizerConcurrentAccess(t *testing.T) { + t.Parallel() + + registry := NewRegistry() + var wg sync.WaitGroup + + for i := 0; i < 8; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < 200; j++ { + if j%3 == 0 { + registry.RegisterSummarizer("concurrent_tool", nil) + continue + } + registry.RegisterSummarizer("concurrent_tool", func(content string, metadata map[string]string, isError bool) string { + return "worker" + }) + s := registry.MicroCompactSummarizer("concurrent_tool") + if s != nil { + _ = s("content", nil, false) + } + } + }() + } + + wg.Wait() +} + +func TestTruncateRunes(t *testing.T) { + t.Parallel() + + t.Run("short", func(t *testing.T) { + got := truncateRunes("hello", 10) + if got != "hello" { + t.Fatalf("expected unchanged, got %q", got) + } + }) + + t.Run("exact", func(t *testing.T) { + got := truncateRunes("hello", 5) + if got != "hello" { + t.Fatalf("expected unchanged, got %q", got) + } + }) + + t.Run("truncated", func(t *testing.T) { + got := truncateRunes("hello world", 5) + if got != "hello..." { + t.Fatalf("expected 'hello...', got %q", got) + } + }) + + t.Run("chinese", func(t *testing.T) { + got := truncateRunes("你好世界测试", 3) + if got != "你好世..." { + t.Fatalf("expected '你好世...', got %q", got) + } + }) + + t.Run("zero_limit_keeps_original", func(t *testing.T) { + got := truncateRunes("hello", 0) + if got != "hello" { + t.Fatalf("expected unchanged with zero limit, got %q", got) + } + }) + + t.Run("empty_text", func(t *testing.T) { + got := truncateRunes("", 10) + if got != "" { + t.Fatalf("expected empty string, got %q", got) + } + }) +} + +func TestStableLineCount(t *testing.T) { + t.Parallel() + + t.Run("empty", func(t *testing.T) { + if got := stableLineCount(""); got != 0 { + t.Fatalf("expected 0, got %d", got) + } + }) + + t.Run("non_empty", func(t *testing.T) { + if got := stableLineCount("a\nb"); got != 2 { + t.Fatalf("expected 2, got %d", got) + } + }) + + t.Run("trailing_newline", func(t *testing.T) { + if got := stableLineCount("a\nb\n"); got != 2 { + t.Fatalf("expected 2, got %d", got) + } + }) + + t.Run("only_empty_lines", func(t *testing.T) { + if got := stableLineCount("\n\n"); got != 2 { + t.Fatalf("expected 2, got %d", got) + } + }) +} diff --git a/internal/tools/micro_compact_summarizers_builtin.go b/internal/tools/micro_compact_summarizers_builtin.go new file mode 100644 index 00000000..d18481ae --- /dev/null +++ b/internal/tools/micro_compact_summarizers_builtin.go @@ -0,0 +1,281 @@ +package tools + +import ( + "strconv" + "strings" + "unicode/utf8" +) + +type builtinSummarizerRegistration struct { + toolName string + summarizer ContentSummarizer +} + +var builtinSummarizers = []builtinSummarizerRegistration{ + {toolName: ToolNameBash, summarizer: bashSummarizer}, + {toolName: ToolNameFilesystemReadFile, summarizer: readFileSummarizer}, + {toolName: ToolNameFilesystemWriteFile, summarizer: writeFileSummarizer}, + {toolName: ToolNameFilesystemEdit, summarizer: editSummarizer}, + {toolName: ToolNameFilesystemGrep, summarizer: grepSummarizer}, + {toolName: ToolNameFilesystemGlob, summarizer: globSummarizer}, + {toolName: ToolNameWebFetch, summarizer: webfetchSummarizer}, +} + +// RegisterBuiltinSummarizers 将所有内置工具的内容摘要器注册到 Registry。 +// 建议在启动装配阶段调用;可重复调用并覆盖同名摘要器。 +func RegisterBuiltinSummarizers(registry *Registry) { + if registry == nil { + return + } + for _, item := range builtinSummarizers { + registry.RegisterSummarizer(item.toolName, item.summarizer) + } +} + +const summaryMaxRunes = 200 +const metadataTokenMaxRunes = 120 + +// bashSummarizer 仅保留结构化执行元信息,避免把原始输出内容重新注入上下文。 +func bashSummarizer(content string, metadata map[string]string, isError bool) string { + var parts []string + + if isError { + parts = append(parts, "[exit=non-zero]") + } else { + parts = append(parts, "[exit=0]") + } + + if workdir := metadataToken(metadata["workdir"]); workdir != "" { + parts = append(parts, "workdir="+workdir) + } + + trimmed := strings.TrimSpace(content) + if trimmed != "" { + parts = appendTextStats(parts, trimmed) + } + + return truncateRunes(strings.Join(parts, " "), summaryMaxRunes) +} + +// readFileSummarizer 仅保留稳定元信息,避免在摘要中再次暴露文件正文。 +func readFileSummarizer(content string, metadata map[string]string, isError bool) string { + path := metadataToken(metadata["path"]) + if path == "" { + return "" + } + + lineCount := stableLineCount(content) + + var parts []string + parts = append(parts, "[summary]", path, "lines="+strconv.Itoa(lineCount)) + if content != "" { + parts = append(parts, "chars="+strconv.Itoa(utf8.RuneCountInString(content))) + } + + return truncateRunes(strings.Join(parts, " "), summaryMaxRunes) +} + +// writeFileSummarizer 保留文件路径与写入字节数。 +func writeFileSummarizer(content string, metadata map[string]string, isError bool) string { + path := metadataToken(metadata["path"]) + if path == "" { + return "" + } + bytes := metadata["bytes"] + return truncateRunes("[summary] wrote "+path+" ("+bytes+" bytes)", summaryMaxRunes) +} + +// editSummarizer 保留编辑路径与替换范围。 +func editSummarizer(content string, metadata map[string]string, isError bool) string { + path := metadataToken(metadata["relative_path"]) + if path == "" { + path = metadataToken(metadata["path"]) + } + if path == "" { + return "" + } + searchLen := metadata["search_length"] + replaceLen := metadata["replacement_length"] + return truncateRunes( + "[summary] edited "+path+" (search="+searchLen+" chars, replace="+replaceLen+" chars)", + summaryMaxRunes, + ) +} + +// grepSummarizer 保留搜索根目录、匹配计数与前若干文件名。 +func grepSummarizer(content string, metadata map[string]string, isError bool) string { + var parts []string + parts = append(parts, "[summary] grep") + + if root := metadataToken(metadata["root"]); root != "" { + parts = append(parts, "root="+root) + } + + if matchedFiles := metadata["matched_files"]; matchedFiles != "" { + parts = append(parts, "files="+matchedFiles) + } + if matchedLines := metadata["matched_lines"]; matchedLines != "" { + parts = append(parts, "lines="+matchedLines) + } + + // 从 content 中提取前几个不重复文件名,避免对整段输出做全量切分。 + fileNames := extractUniqueMatchFiles(content, 3) + if len(fileNames) > 0 { + parts = append(parts, "matches="+strings.Join(fileNames, ", ")) + } + + return truncateRunes(strings.Join(parts, " "), summaryMaxRunes) +} + +// globSummarizer 保留匹配计数与前若干文件名。 +func globSummarizer(content string, metadata map[string]string, isError bool) string { + count := metadata["count"] + if count == "" { + count = "?" + } + + preview := collectPreviewLines(content, 3) + + var parts []string + parts = append(parts, "[summary] glob", count+" files") + if len(preview) > 0 { + parts = append(parts, strings.Join(preview, ", ")) + } + + return truncateRunes(strings.Join(parts, " "), summaryMaxRunes) +} + +// webfetchSummarizer 保留可稳定持久化的 webfetch 结果标记。 +func webfetchSummarizer(content string, metadata map[string]string, isError bool) string { + var parts []string + parts = append(parts, "[summary] webfetch") + + if truncated := metadata["truncated"]; truncated == "true" { + parts = append(parts, "truncated=true") + } + + return truncateRunes(strings.Join(parts, " "), summaryMaxRunes) +} + +// truncateRunes 按 rune 数量截断字符串,超出时追加 "..."。 +func truncateRunes(text string, maxRunes int) string { + if maxRunes <= 0 || text == "" { + return text + } + if utf8.RuneCountInString(text) <= maxRunes { + return text + } + runes := []rune(text) + return string(runes[:maxRunes]) + "..." +} + +// stableLineCount 统计文本行数;空文本返回 0,末尾换行不会产生额外空行计数。 +func stableLineCount(text string) int { + if text == "" { + return 0 + } + count := strings.Count(text, "\n") + 1 + if strings.HasSuffix(text, "\n") { + count-- + } + if count < 0 { + return 0 + } + return count +} + +// appendTextStats 为摘要补充文本统计字段,保持统一的结构化输出格式。 +func appendTextStats(parts []string, text string) []string { + return append(parts, + "lines="+strconv.Itoa(stableLineCount(text)), + "chars="+strconv.Itoa(utf8.RuneCountInString(text)), + ) +} + +// extractUniqueMatchFiles 按行扫描 grep 输出,提取前若干个去重后的文件名摘要。 +func extractUniqueMatchFiles(content string, limit int) []string { + if limit <= 0 { + return nil + } + + seen := make(map[string]struct{}, limit) + result := make([]string, 0, limit) + remaining := content + for len(remaining) > 0 && len(result) < limit { + line, rest := nextLine(remaining) + remaining = rest + + colon := strings.Index(line, ":") + if colon <= 0 { + continue + } + + file := sanitizeSummaryToken(line[:colon], 80) + if file == "" { + continue + } + if _, ok := seen[file]; ok { + continue + } + seen[file] = struct{}{} + result = append(result, file) + } + return result +} + +// collectPreviewLines 按行扫描输出并提取前若干个非空预览,避免全量 Split 带来的额外分配。 +func collectPreviewLines(content string, limit int) []string { + if limit <= 0 { + return nil + } + + result := make([]string, 0, limit) + remaining := content + for len(remaining) > 0 && len(result) < limit { + line, rest := nextLine(remaining) + remaining = rest + + clean := sanitizeSummaryToken(line, 100) + if clean == "" { + continue + } + result = append(result, clean) + } + return result +} + +// nextLine 返回 text 的首行及余下文本,兼容存在或不存在换行符的输入。 +func nextLine(text string) (line string, rest string) { + idx := strings.IndexByte(text, '\n') + if idx < 0 { + return text, "" + } + return text[:idx], text[idx+1:] +} + +// sanitizeSummaryToken 清理不可见控制字符并裁剪长度,降低摘要注入风险。 +func sanitizeSummaryToken(text string, maxRunes int) string { + trimmed := strings.TrimSpace(text) + if trimmed == "" { + return "" + } + + var b strings.Builder + b.Grow(len(trimmed)) + for _, r := range trimmed { + if r < 32 || r == 127 { + continue + } + b.WriteRune(r) + } + clean := strings.TrimSpace(b.String()) + if clean == "" { + return "" + } + return truncateRunes(clean, maxRunes) +} + +// metadataToken 统一清理 metadata 中可回灌到摘要的文本字段。 +func metadataToken(text string) string { + return sanitizeSummaryToken(text, metadataTokenMaxRunes) +} diff --git a/internal/tools/registry.go b/internal/tools/registry.go index 998ba939..45a1e3fd 100644 --- a/internal/tools/registry.go +++ b/internal/tools/registry.go @@ -5,6 +5,7 @@ import ( "errors" "sort" "strings" + "sync" providertypes "neo-code/internal/provider/types" "neo-code/internal/security" @@ -12,18 +13,21 @@ import ( ) type Registry struct { - tools map[string]Tool - microCompactPolicies map[string]MicroCompactPolicy - mcpRegistry *mcp.Registry - mcpFactory *mcp.AdapterFactory - mcpExposureFilter mcp.ExposureFilter - mcpExposureAudit []mcp.ExposureDecision + tools map[string]Tool + microCompactPolicies map[string]MicroCompactPolicy + microCompactSummarizers map[string]ContentSummarizer + microCompactSummaryMu sync.RWMutex + mcpRegistry *mcp.Registry + mcpFactory *mcp.AdapterFactory + mcpExposureFilter mcp.ExposureFilter + mcpExposureAudit []mcp.ExposureDecision } func NewRegistry() *Registry { return &Registry{ - tools: map[string]Tool{}, - microCompactPolicies: map[string]MicroCompactPolicy{}, + tools: map[string]Tool{}, + microCompactPolicies: map[string]MicroCompactPolicy{}, + microCompactSummarizers: map[string]ContentSummarizer{}, } } @@ -102,6 +106,34 @@ func (r *Registry) MicroCompactPolicy(name string) MicroCompactPolicy { return MicroCompactPolicyCompact } +// RegisterSummarizer 为指定工具注册内容摘要器;传入 nil 移除已有条目。 +func (r *Registry) RegisterSummarizer(toolName string, summarizer ContentSummarizer) { + if r == nil { + return + } + name := strings.ToLower(strings.TrimSpace(toolName)) + r.microCompactSummaryMu.Lock() + defer r.microCompactSummaryMu.Unlock() + if summarizer == nil { + delete(r.microCompactSummarizers, name) + return + } + r.microCompactSummarizers[name] = summarizer +} + +// MicroCompactSummarizer 返回指定工具的内容摘要器;无注册时返回 nil。 +func (r *Registry) MicroCompactSummarizer(name string) ContentSummarizer { + if r == nil { + return nil + } + r.microCompactSummaryMu.RLock() + defer r.microCompactSummaryMu.RUnlock() + if r.microCompactSummarizers == nil { + return nil + } + return r.microCompactSummarizers[strings.ToLower(strings.TrimSpace(name))] +} + func (r *Registry) GetSpecs() []providertypes.ToolSpec { names := make([]string, 0, len(r.tools)) for name := range r.tools {