diff --git a/lib/hooks.ts b/lib/hooks.ts index 54c1232d..e1e1bb4a 100644 --- a/lib/hooks.ts +++ b/lib/hooks.ts @@ -1,9 +1,10 @@ import type { SessionState, WithParts } from "./state" import type { Logger } from "./logger" import type { PluginConfig } from "./config" +import { assignMessageRefs } from "./message-ids" import { syncToolCache } from "./state/tool-cache" import { deduplicate, supersedeWrites, purgeErrors } from "./strategies" -import { prune, insertPruneToolContext } from "./messages" +import { prune, insertPruneToolContext, insertMessageIdContext } from "./messages" import { buildToolIdList, isIgnoredUserMessage } from "./messages/utils" import { checkSession } from "./state" import { renderSystemPrompt } from "./prompts" @@ -13,7 +14,6 @@ import { handleHelpCommand } from "./commands/help" import { handleSweepCommand } from "./commands/sweep" import { handleManualToggleCommand, handleManualTriggerCommand } from "./commands/manual" import { ensureSessionInitialized } from "./state/state" -import { getCurrentParams } from "./strategies/utils" const INTERNAL_AGENT_SIGNATURES = [ "You are a title generator", @@ -109,6 +109,8 @@ export function createChatMessageTransformHandler( return } + assignMessageRefs(state, output.messages) + syncToolCache(state, config, logger, output.messages) buildToolIdList(state, output.messages, logger) @@ -118,6 +120,7 @@ export function createChatMessageTransformHandler( prune(state, logger, config, output.messages) insertPruneToolContext(state, config, logger, output.messages) + insertMessageIdContext(state, output.messages) applyPendingManualTriggerPrompt(state, output.messages, logger) diff --git a/lib/message-ids.ts b/lib/message-ids.ts new file mode 100644 index 00000000..75b981cd --- /dev/null +++ b/lib/message-ids.ts @@ -0,0 +1,133 @@ +import type { SessionState, WithParts } from "./state" + +const MESSAGE_REF_REGEX = /^m(\d{4})$/ +const BLOCK_REF_REGEX = /^b([1-9]\d*)$/ +const MESSAGE_ID_TAG_NAME = "dcp-message-id" + +const MESSAGE_REF_WIDTH = 4 +const MESSAGE_REF_MIN_INDEX = 0 +export const MESSAGE_REF_MAX_INDEX = 9999 + +export type ParsedBoundaryId = + | { + kind: "message" + ref: string + index: number + } + | { + kind: "compressed-block" + ref: string + blockId: number + } + +export function formatMessageRef(index: number): string { + if ( + !Number.isInteger(index) || + index < MESSAGE_REF_MIN_INDEX || + index > MESSAGE_REF_MAX_INDEX + ) { + throw new Error( + `Message ID index out of bounds: ${index}. Supported range is 0-${MESSAGE_REF_MAX_INDEX}.`, + ) + } + return `m${index.toString().padStart(MESSAGE_REF_WIDTH, "0")}` +} + +export function formatBlockRef(blockId: number): string { + if (!Number.isInteger(blockId) || blockId < 1) { + throw new Error(`Invalid block ID: ${blockId}`) + } + return `b${blockId}` +} + +export function parseMessageRef(ref: string): number | null { + const normalized = ref.trim().toLowerCase() + const match = normalized.match(MESSAGE_REF_REGEX) + if (!match) { + return null + } + const index = Number.parseInt(match[1], 10) + return Number.isInteger(index) ? index : null +} + +export function parseBlockRef(ref: string): number | null { + const normalized = ref.trim().toLowerCase() + const match = normalized.match(BLOCK_REF_REGEX) + if (!match) { + return null + } + const id = Number.parseInt(match[1], 10) + return Number.isInteger(id) ? id : null +} + +export function parseBoundaryId(id: string): ParsedBoundaryId | null { + const normalized = id.trim().toLowerCase() + const messageIndex = parseMessageRef(normalized) + if (messageIndex !== null) { + return { + kind: "message", + ref: formatMessageRef(messageIndex), + index: messageIndex, + } + } + + const blockId = parseBlockRef(normalized) + if (blockId !== null) { + return { + kind: "compressed-block", + ref: formatBlockRef(blockId), + blockId, + } + } + + return null +} + +export function formatMessageIdTag(ref: string): string { + return `<${MESSAGE_ID_TAG_NAME}>${ref}` +} + +export function assignMessageRefs(state: SessionState, messages: WithParts[]): number { + let assigned = 0 + + for (const message of messages) { + const rawMessageId = message.info.id + if (typeof rawMessageId !== "string" || rawMessageId.length === 0) { + continue + } + + const existingRef = state.messageIds.byRawId.get(rawMessageId) + if (existingRef) { + if (state.messageIds.byRef.get(existingRef) !== rawMessageId) { + state.messageIds.byRef.set(existingRef, rawMessageId) + } + continue + } + + const ref = allocateNextMessageRef(state) + state.messageIds.byRawId.set(rawMessageId, ref) + state.messageIds.byRef.set(ref, rawMessageId) + assigned++ + } + + return assigned +} + +function allocateNextMessageRef(state: SessionState): string { + let candidate = Number.isInteger(state.messageIds.nextRef) + ? Math.max(MESSAGE_REF_MIN_INDEX, state.messageIds.nextRef) + : MESSAGE_REF_MIN_INDEX + + while (candidate <= MESSAGE_REF_MAX_INDEX) { + const ref = formatMessageRef(candidate) + if (!state.messageIds.byRef.has(ref)) { + state.messageIds.nextRef = candidate + 1 + return ref + } + candidate++ + } + + throw new Error( + `Message ID alias capacity exceeded. Cannot allocate more than ${formatMessageRef(MESSAGE_REF_MAX_INDEX)} aliases in this session.`, + ) +} diff --git a/lib/messages/index.ts b/lib/messages/index.ts index 7c9cc89c..78efa7b0 100644 --- a/lib/messages/index.ts +++ b/lib/messages/index.ts @@ -1,2 +1,3 @@ export { prune } from "./prune" export { insertPruneToolContext } from "./inject" +export { insertMessageIdContext } from "./inject" diff --git a/lib/messages/inject.ts b/lib/messages/inject.ts index 915fc533..74f412a1 100644 --- a/lib/messages/inject.ts +++ b/lib/messages/inject.ts @@ -2,12 +2,15 @@ import type { SessionState, WithParts } from "../state" import type { Logger } from "../logger" import type { PluginConfig } from "../config" import type { UserMessage } from "@opencode-ai/sdk/v2" +import { formatMessageIdTag } from "../message-ids" import { renderNudge, renderCompressNudge } from "../prompts" import { extractParameterKey, createSyntheticTextPart, createSyntheticToolPart, isIgnoredUserMessage, + appendMessageIdTagToToolOutput, + findLastToolPart, } from "./utils" import { getFilePathsFromParameters, isProtected } from "../protected-file-patterns" import { getLastUserMessage, isMessageCompacted } from "../shared-utils" @@ -38,7 +41,7 @@ ${content} export const wrapCompressContext = (messageCount: number): string => ` Compress available. Conversation: ${messageCount} messages. Compress collapses completed task sequences or exploration phases into summaries. -Uses text boundaries [startString, endString, topic, summary]. +Uses ID boundaries [startId, endId, topic, summary]. ` export const wrapCooldownMessage = (flags: { @@ -291,8 +294,6 @@ export const insertPruneToolContext = ( return } - const userInfo = lastUserMessage.info as UserMessage - const lastNonIgnoredMessage = messages.findLast( (msg) => !(msg.info.role === "user" && isIgnoredUserMessage(msg)), ) @@ -306,11 +307,56 @@ export const insertPruneToolContext = ( // For all other cases, append a synthetic tool part to the last message which works // across all models without disrupting their behavior. if (lastNonIgnoredMessage.info.role === "user") { - const textPart = createSyntheticTextPart(lastNonIgnoredMessage, combinedContent) + const textPart = createSyntheticTextPart( + lastNonIgnoredMessage, + combinedContent, + `${lastNonIgnoredMessage.info.id}:context`, + ) lastNonIgnoredMessage.parts.push(textPart) } else { - const modelID = userInfo.model?.modelID || "" - const toolPart = createSyntheticToolPart(lastNonIgnoredMessage, combinedContent, modelID) + const toolPart = createSyntheticToolPart( + lastNonIgnoredMessage, + combinedContent, + modelId ?? "", + `${lastNonIgnoredMessage.info.id}:context`, + ) lastNonIgnoredMessage.parts.push(toolPart) } } + +export const insertMessageIdContext = (state: SessionState, messages: WithParts[]): void => { + const lastUserMessage = getLastUserMessage(messages) + const toolModelId = lastUserMessage + ? ((lastUserMessage.info as UserMessage).model.modelID ?? "") + : "" + + for (const message of messages) { + if (message.info.role === "user" && isIgnoredUserMessage(message)) { + continue + } + + const messageRef = state.messageIds.byRawId.get(message.info.id) + if (!messageRef) { + continue + } + + const tag = formatMessageIdTag(messageRef) + const messageIdSeed = `${message.info.id}:message-id:${messageRef}` + + if (message.info.role === "user") { + message.parts.push(createSyntheticTextPart(message, tag, messageIdSeed)) + continue + } + + if (message.info.role !== "assistant") { + continue + } + + const lastToolPart = findLastToolPart(message) + if (lastToolPart && appendMessageIdTagToToolOutput(lastToolPart, tag)) { + continue + } + + message.parts.push(createSyntheticToolPart(message, tag, toolModelId, messageIdSeed)) + } +} diff --git a/lib/messages/prune.ts b/lib/messages/prune.ts index 4f4d5b7e..783647ff 100644 --- a/lib/messages/prune.ts +++ b/lib/messages/prune.ts @@ -9,8 +9,8 @@ const PRUNED_TOOL_OUTPUT_REPLACEMENT = "[Output removed to save context - information superseded or no longer needed]" const PRUNED_TOOL_ERROR_INPUT_REPLACEMENT = "[input removed due to failed tool call]" const PRUNED_QUESTION_INPUT_REPLACEMENT = "[questions removed - see output for user's answers]" -const PRUNED_COMPRESS_INPUT_REPLACEMENT = - "[compress content removed - topic retained for reference]" +const PRUNED_COMPRESS_SUMMARY_REPLACEMENT = + "[summary removed to save context - see injected compressed block]" export const prune = ( state: SessionState, @@ -109,8 +109,9 @@ const pruneToolInputs = (state: SessionState, logger: Logger, messages: WithPart continue } if (part.tool === "compress" && part.state.status === "completed") { - if (part.state.input?.content !== undefined) { - part.state.input.content = PRUNED_COMPRESS_INPUT_REPLACEMENT + const content = part.state.input?.content + if (content && typeof content === "object" && "summary" in content) { + content.summary = PRUNED_COMPRESS_SUMMARY_REPLACEMENT } continue } @@ -187,8 +188,14 @@ const filterCompressedRanges = ( if (userMessage) { const userInfo = userMessage.info as UserMessage const summaryContent = summary.summary + const summarySeed = `${summary.blockId}:${summary.anchorMessageId}` result.push( - createSyntheticUserMessage(userMessage, summaryContent, userInfo.variant), + createSyntheticUserMessage( + userMessage, + summaryContent, + userInfo.variant, + summarySeed, + ), ) logger.info("Injected compress summary", { diff --git a/lib/messages/utils.ts b/lib/messages/utils.ts index a57d626e..0e1d69c0 100644 --- a/lib/messages/utils.ts +++ b/lib/messages/utils.ts @@ -1,12 +1,18 @@ -import { ulid } from "ulid" +import { createHash } from "node:crypto" import { isMessageCompacted } from "../shared-utils" import { Logger } from "../logger" import type { SessionState, WithParts } from "../state" import type { UserMessage } from "@opencode-ai/sdk/v2" -export const COMPRESS_SUMMARY_PREFIX = "[Compressed conversation block]\n\n" +const SUMMARY_ID_HASH_LENGTH = 16 -const generateUniqueId = (prefix: string): string => `${prefix}_${ulid()}` +const generateStableId = (prefix: string, seed: string): string => { + const hash = createHash("sha256").update(seed).digest("hex").slice(0, SUMMARY_ID_HASH_LENGTH) + return `${prefix}_${hash}` +} + +type MessagePart = WithParts["parts"][number] +type ToolPart = Extract const isGeminiModel = (modelID: string): boolean => { const lowerModelID = modelID.toLowerCase() @@ -17,11 +23,13 @@ export const createSyntheticUserMessage = ( baseMessage: WithParts, content: string, variant?: string, + stableSeed?: string, ): WithParts => { const userInfo = baseMessage.info as UserMessage const now = Date.now() - const messageId = generateUniqueId("msg") - const partId = generateUniqueId("prt") + const deterministicSeed = stableSeed?.trim() || userInfo.id + const messageId = generateStableId("msg_dcp_summary", deterministicSeed) + const partId = generateStableId("prt_dcp_summary", deterministicSeed) return { info: { @@ -45,9 +53,14 @@ export const createSyntheticUserMessage = ( } } -export const createSyntheticTextPart = (baseMessage: WithParts, content: string) => { +export const createSyntheticTextPart = ( + baseMessage: WithParts, + content: string, + stableSeed?: string, +) => { const userInfo = baseMessage.info as UserMessage - const partId = generateUniqueId("prt") + const deterministicSeed = stableSeed?.trim() || userInfo.id + const partId = generateStableId("prt_dcp_text", deterministicSeed) return { id: partId, @@ -62,12 +75,14 @@ export const createSyntheticToolPart = ( baseMessage: WithParts, content: string, modelID: string, + stableSeed?: string, ) => { const userInfo = baseMessage.info as UserMessage const now = Date.now() - const partId = generateUniqueId("prt") - const callId = generateUniqueId("call") + const deterministicSeed = stableSeed?.trim() || userInfo.id + const partId = generateStableId("prt_dcp_tool", deterministicSeed) + const callId = generateStableId("call_dcp_tool", deterministicSeed) // Gemini requires thoughtSignature bypass to accept synthetic tool parts const toolPartMetadata = isGeminiModel(modelID) @@ -92,6 +107,30 @@ export const createSyntheticToolPart = ( } } +export const appendMessageIdTagToToolOutput = (part: ToolPart, tag: string): boolean => { + if (part.state?.status !== "completed" || typeof part.state.output !== "string") { + return false + } + if (part.state.output.includes(tag)) { + return true + } + + const separator = part.state.output.length > 0 && !part.state.output.endsWith("\n") ? "\n" : "" + part.state.output = `${part.state.output}${separator}${tag}` + return true +} + +export const findLastToolPart = (message: WithParts): ToolPart | null => { + for (let i = message.parts.length - 1; i >= 0; i--) { + const part = message.parts[i] + if (part.type === "tool") { + return part + } + } + + return null +} + /** * Extracts a human-readable key from tool metadata for display purposes. */ diff --git a/lib/prompts/compress.md b/lib/prompts/compress.md index 69ad412b..94bc3279 100644 --- a/lib/prompts/compress.md +++ b/lib/prompts/compress.md @@ -8,6 +8,8 @@ Think of compression as phase transitions: raw exploration becomes refined under THE SUMMARY Your summary must be EXHAUSTIVE. Capture file paths, function signatures, decisions made, constraints discovered, key findings... EVERYTHING that maintains context integrity. This is not a brief note - it is an authoritative record so faithful that the original conversation adds no value. +When the selected range includes user messages, preserve the user's intent with extra care. Do not change scope, constraints, priorities, acceptance criteria, or requested outcomes. + Yet be LEAN. Strip away the noise: failed attempts that led nowhere, verbose tool outputs, back-and-forth exploration. What remains should be pure signal - golden nuggets of detail that preserve full understanding with zero ambiguity. THE WAYS OF COMPRESS @@ -24,28 +26,36 @@ You're mid-sprint on related functionality Before compressing, ask: _"Is this chapter closed?"_ Compression is irreversible. The summary replaces everything in the range. -BOUNDARY MATCHING -You specify boundaries by matching unique text strings in the conversation. CRITICAL: In code-centric conversations, strings repeat often. Provide sufficiently unique text to match exactly once. If a match fails (not found or found multiple times), the tool will error - extend your boundary string with more surrounding context in order to make SURE the tool does NOT error. +BOUNDARY IDS +You specify boundaries by ID. + +Use the injected IDs visible in the conversation: + +- `mNNNN` IDs identify raw messages +- `bN` IDs identify previously compressed blocks + +Rules: + +- Pick `startId` and `endId` directly from injected IDs in context. +- IDs must exist in the current visible context. +- `startId` must appear before `endId`. +- Do not invent IDs. -WHERE TO PICK STRINGS FROM (important for reliable matching): +COMPRESSED BLOCK PLACEHOLDERS +When the selected range includes previously compressed blocks, use placeholders in this exact format: -- Your own assistant text responses (MOST RELIABLE - always stored verbatim) -- The user's own words in their messages -- Tool result output text (distinctive substrings within the output) -- Previous compress summaries -- Tool input string values (individual values, not whole serialized objects) +- `(bN)` -WHERE TO NEVER PICK STRINGS FROM: +Rules: -- `` tags or any XML wrapper/meta-commentary around messages -- Injected system instructions (plan mode text, max-steps warnings, mode-switch text, environment info) -- File/directory listing framing text (e.g. "Called the Read tool with the following input...") -- Strings that span across message or part boundaries -- Entire serialized JSON objects (key ordering may differ - pick a distinctive substring within instead) +- Include every required placeholder exactly once. +- Do not include placeholders for blocks outside the selected range. +- Treat `(bN)` placeholders as reserved tokens and only use them intentionally. +- If needed in prose, refer to a block as plain text like `compressed b3` (not as a placeholder token). THE FORMAT OF COMPRESS `topic`: Short label (3-5 words) for display - e.g., "Auth System Exploration" `content`: Object containing: -`startString`: Unique text string marking the beginning of the range -`endString`: Unique text string marking the end of the range +`startId`: Boundary ID marking the beginning of the range (`mNNNN` or `bN`) +`endId`: Boundary ID marking the end of the range (`mNNNN` or `bN`) `summary`: Complete technical summary replacing all content in the range diff --git a/lib/prompts/system.md b/lib/prompts/system.md index 48ce2a4b..6ff9b717 100644 --- a/lib/prompts/system.md +++ b/lib/prompts/system.md @@ -12,11 +12,11 @@ AVAILABLE TOOLS FOR CONTEXT MANAGEMENT THE COMPRESS TOOL -`compress` is a sledgehammer and should be used accordingly. It's purpose is to reduce whole part of the conversation to its essence and technical details in order to leave room for newer context. Your summary MUST be technical and specific enough to preserve FULL understanding of WHAT TRANSPIRED, such that NO AMBIGUITY remains about what was done, found, or decided. Your compress summary must be thorough and precise. `compress` will replace everything in the range you match, user and assistant messages, tool inputs and outputs. It is preferred to not compress preemptively, but rather wait for natural breakpoints in the conversation. Those breakpoints are to be infered from user messages. You WILL NOT compress based on thinking that you are done with the task, wait for conversation queues that the user has moved on from current phase. +`compress` is a sledgehammer and should be used accordingly. It's purpose is to reduce whole part of the conversation to its essence and technical details in order to leave room for newer context. Your summary MUST be technical and specific enough to preserve FULL understanding of WHAT TRANSPIRED, such that NO AMBIGUITY remains about what was done, found, or decided. Your compress summary must be thorough and precise. `compress` will replace everything in the range you match, user and assistant messages, tool inputs and outputs. It is preferred to not compress preemptively, but rather wait for natural breakpoints in the conversation. Those breakpoints are to be infered from user messages. You WILL NOT compress based on thinking that you are done with the task, wait for conversation queues that the user has moved on from current phase. Use injected boundary IDs (`startId`/`endId`) to select ranges. This tool will typically be used at the end of a phase of work, when conversation starts to accumulate noise that would better served summarized, or when you've done significant exploration and can FULLY synthesize your findings and understanding into a technical summary. -Make sure to match enough of the context with start and end strings so you're not faced with an error calling the tool. Be VERY CAREFUL AND CONSERVATIVE when using `compress`. +Use only injected `mNNNN`/`bN` IDs that are visible in the current context. If compressed blocks are included in your range, preserve their content through required `(bN)` placeholders in your summary. Be VERY CAREFUL AND CONSERVATIVE when using `compress`. THE PRUNE TOOL diff --git a/lib/state/persistence.ts b/lib/state/persistence.ts index 725ffc71..4652ce18 100644 --- a/lib/state/persistence.ts +++ b/lib/state/persistence.ts @@ -110,21 +110,57 @@ export async function loadSessionState( } if (Array.isArray(state.compressSummaries)) { - const validSummaries = state.compressSummaries.filter( - (s): s is CompressSummary => - s !== null && - typeof s === "object" && - typeof s.anchorMessageId === "string" && - typeof s.summary === "string", - ) - if (validSummaries.length !== state.compressSummaries.length) { + const migratedSummaries: CompressSummary[] = [] + let nextBlockId = 1 + + for (const entry of state.compressSummaries) { + if ( + entry === null || + typeof entry !== "object" || + typeof entry.anchorMessageId !== "string" || + typeof entry.summary !== "string" + ) { + continue + } + + const blockId = + typeof entry.blockId === "number" && Number.isInteger(entry.blockId) + ? entry.blockId + : nextBlockId + migratedSummaries.push({ + blockId, + anchorMessageId: entry.anchorMessageId, + summary: entry.summary, + }) + nextBlockId = Math.max(nextBlockId, blockId + 1) + } + + if (migratedSummaries.length !== state.compressSummaries.length) { logger.warn("Filtered out malformed compressSummaries entries", { sessionId: sessionId, original: state.compressSummaries.length, - valid: validSummaries.length, + valid: migratedSummaries.length, }) } - state.compressSummaries = validSummaries + + const seenBlockIds = new Set() + const dedupedSummaries = migratedSummaries.filter((summary) => { + if (seenBlockIds.has(summary.blockId)) { + return false + } + seenBlockIds.add(summary.blockId) + return true + }) + + if (dedupedSummaries.length !== migratedSummaries.length) { + logger.warn("Removed duplicate compress block IDs", { + sessionId: sessionId, + original: migratedSummaries.length, + valid: dedupedSummaries.length, + }) + } + + state.compressSummaries = dedupedSummaries } else { state.compressSummaries = [] } diff --git a/lib/state/state.ts b/lib/state/state.ts index d6e5296f..a9f34ddf 100644 --- a/lib/state/state.ts +++ b/lib/state/state.ts @@ -1,6 +1,6 @@ import type { SessionState, ToolParameterEntry, WithParts } from "./types" import type { Logger } from "../logger" -import { loadSessionState } from "./persistence" +import { loadSessionState, saveSessionState } from "./persistence" import { isSubAgentSession, findLastCompactionTimestamp, @@ -47,6 +47,12 @@ export const checkSession = async ( logger.info("Detected compaction - reset stale state", { timestamp: lastCompactionTimestamp, }) + + saveSessionState(state, logger).catch((error) => { + logger.warn("Failed to persist state reset after compaction", { + error: error instanceof Error ? error.message : String(error), + }) + }) } state.currentTurn = countTurns(state, messages) @@ -69,6 +75,11 @@ export function createSessionState(): SessionState { }, toolParameters: new Map(), toolIdList: [], + messageIds: { + byRawId: new Map(), + byRef: new Map(), + nextRef: 0, + }, nudgeCounter: 0, lastToolPrune: false, lastCompaction: 0, @@ -94,6 +105,11 @@ export function resetSessionState(state: SessionState): void { } state.toolParameters.clear() state.toolIdList = [] + state.messageIds = { + byRawId: new Map(), + byRef: new Map(), + nextRef: 0, + } state.nudgeCounter = 0 state.lastToolPrune = false state.lastCompaction = 0 diff --git a/lib/state/types.ts b/lib/state/types.ts index b9942289..218756cc 100644 --- a/lib/state/types.ts +++ b/lib/state/types.ts @@ -22,6 +22,7 @@ export interface SessionStats { } export interface CompressSummary { + blockId: number anchorMessageId: string summary: string } @@ -36,6 +37,12 @@ export interface PendingManualTrigger { prompt: string } +export interface MessageIdState { + byRawId: Map + byRef: Map + nextRef: number +} + export interface SessionState { sessionId: string | null isSubAgent: boolean @@ -46,6 +53,7 @@ export interface SessionState { stats: SessionStats toolParameters: Map toolIdList: string[] + messageIds: MessageIdState nudgeCounter: number lastToolPrune: boolean lastCompaction: number diff --git a/lib/state/utils.ts b/lib/state/utils.ts index 1550f014..f5e0918b 100644 --- a/lib/state/utils.ts +++ b/lib/state/utils.ts @@ -50,6 +50,11 @@ export function resetOnCompaction(state: SessionState): void { state.prune.tools = new Map() state.prune.messages = new Map() state.compressSummaries = [] + state.messageIds = { + byRawId: new Map(), + byRef: new Map(), + nextRef: 0, + } state.nudgeCounter = 0 state.lastToolPrune = false } diff --git a/lib/tools/compress-utils.ts b/lib/tools/compress-utils.ts new file mode 100644 index 00000000..11c75f93 --- /dev/null +++ b/lib/tools/compress-utils.ts @@ -0,0 +1,598 @@ +import type { SessionState, WithParts, CompressSummary } from "../state" +import { formatBlockRef, formatMessageIdTag, parseBoundaryId } from "../message-ids" +import { isIgnoredUserMessage } from "../messages/utils" +import { countMessageTextTokens } from "../strategies/utils" + +const BLOCK_PLACEHOLDER_REGEX = /\(b(\d+)\)|\{block_(\d+)\}/gi + +export interface CompressToolArgs { + topic: string + content: { + startId: string + endId: string + summary: string + } +} + +export interface BoundaryReference { + kind: "message" | "compressed-block" + rawIndex: number + messageId?: string + blockId?: number + anchorMessageId?: string +} + +export interface SearchContext { + rawMessages: WithParts[] + rawMessagesById: Map + rawIndexById: Map + summaryByBlockId: Map +} + +export interface RangeResolution { + startReference: BoundaryReference + endReference: BoundaryReference + messageIds: string[] + messageTokenById: Map + toolIds: string[] + requiredBlockIds: number[] +} + +export interface ParsedBlockPlaceholder { + raw: string + blockId: number + startIndex: number + endIndex: number +} + +export interface InjectedSummaryResult { + expandedSummary: string + consumedBlockIds: number[] +} + +export function formatCompressedBlockHeader(_blockId: number): string { + return "[Compressed conversation section]" +} + +export function formatCompressedBlockFooter(blockId: number): string { + return formatMessageIdTag(formatBlockRef(blockId)) +} + +export function formatBlockPlaceholder(blockId: number): string { + return `(b${blockId})` +} + +export function validateCompressArgs(args: CompressToolArgs): void { + if (typeof args.topic !== "string" || args.topic.trim().length === 0) { + throw new Error("topic is required and must be a non-empty string") + } + + if (typeof args.content?.startId !== "string" || args.content.startId.trim().length === 0) { + throw new Error("content.startId is required and must be a non-empty string") + } + + if (typeof args.content?.endId !== "string" || args.content.endId.trim().length === 0) { + throw new Error("content.endId is required and must be a non-empty string") + } + + if (typeof args.content?.summary !== "string" || args.content.summary.trim().length === 0) { + throw new Error("content.summary is required and must be a non-empty string") + } +} + +export async function fetchSessionMessages(client: any, sessionId: string): Promise { + const response = await client.session.messages({ + path: { id: sessionId }, + }) + + const payload = (response?.data || response) as WithParts[] + return Array.isArray(payload) ? payload : [] +} + +export function buildSearchContext(state: SessionState, rawMessages: WithParts[]): SearchContext { + const rawMessagesById = new Map() + const rawIndexById = new Map() + for (const msg of rawMessages) { + rawMessagesById.set(msg.info.id, msg) + } + for (let index = 0; index < rawMessages.length; index++) { + const message = rawMessages[index] + if (!message) { + continue + } + rawIndexById.set(message.info.id, index) + } + + const summaryByBlockId = new Map() + for (const summary of state.compressSummaries || []) { + summaryByBlockId.set(summary.blockId, summary) + } + + return { + rawMessages, + rawMessagesById, + rawIndexById, + summaryByBlockId, + } +} + +export function resolveBoundaryIds( + context: SearchContext, + state: SessionState, + startId: string, + endId: string, +): { startReference: BoundaryReference; endReference: BoundaryReference } { + const lookup = buildBoundaryReferenceLookup(context, state) + const issues: string[] = [] + const parsedStartId = parseBoundaryId(startId) + const parsedEndId = parseBoundaryId(endId) + + if (parsedStartId === null) { + issues.push("startId is invalid. Use an injected message ID (mNNNN) or block ID (bN).") + } + + if (parsedEndId === null) { + issues.push("endId is invalid. Use an injected message ID (mNNNN) or block ID (bN).") + } + + if (issues.length > 0) { + throwCombinedIssues(issues) + } + + const validStartId = parsedStartId as NonNullable + const validEndId = parsedEndId as NonNullable + + const startReference = lookup.get(validStartId.ref) + const endReference = lookup.get(validEndId.ref) + + if (!startReference) { + issues.push( + `startId ${validStartId.ref} is not available in the current conversation context. Choose an injected ID visible in context.`, + ) + } + + if (!endReference) { + issues.push( + `endId ${validEndId.ref} is not available in the current conversation context. Choose an injected ID visible in context.`, + ) + } + + if (issues.length > 0) { + throwCombinedIssues(issues) + } + + if (!startReference || !endReference) { + throw new Error("Failed to resolve boundary IDs") + } + + if (startReference.rawIndex > endReference.rawIndex) { + throw new Error( + `startId ${validStartId.ref} appears after endId ${validEndId.ref} in the conversation. Start must come before end.`, + ) + } + + return { startReference, endReference } +} + +function buildBoundaryReferenceLookup( + context: SearchContext, + state: SessionState, +): Map { + const lookup = new Map() + + for (const [messageRef, messageId] of state.messageIds.byRef) { + const rawMessage = context.rawMessagesById.get(messageId) + if (!rawMessage) { + continue + } + if (rawMessage.info.role === "user" && isIgnoredUserMessage(rawMessage)) { + continue + } + + const rawIndex = context.rawIndexById.get(messageId) + if (rawIndex === undefined) { + continue + } + lookup.set(messageRef, { + kind: "message", + rawIndex, + messageId, + }) + } + + const summaries = Array.from(context.summaryByBlockId.values()).sort( + (a, b) => a.blockId - b.blockId, + ) + for (const summary of summaries) { + const anchorMessage = context.rawMessagesById.get(summary.anchorMessageId) + if (!anchorMessage) { + continue + } + if (anchorMessage.info.role === "user" && isIgnoredUserMessage(anchorMessage)) { + continue + } + + const rawIndex = context.rawIndexById.get(summary.anchorMessageId) + if (rawIndex === undefined) { + continue + } + const blockRef = formatBlockRef(summary.blockId) + if (!lookup.has(blockRef)) { + lookup.set(blockRef, { + kind: "compressed-block", + rawIndex, + blockId: summary.blockId, + anchorMessageId: summary.anchorMessageId, + }) + } + } + + return lookup +} + +export function resolveRange( + context: SearchContext, + startReference: BoundaryReference, + endReference: BoundaryReference, +): RangeResolution { + const startRawIndex = startReference.rawIndex + const endRawIndex = endReference.rawIndex + const messageIds: string[] = [] + const messageSeen = new Set() + const toolIds: string[] = [] + const toolSeen = new Set() + const requiredBlockIds: number[] = [] + const requiredBlockSeen = new Set() + const messageTokenById = new Map() + + for (let index = startRawIndex; index <= endRawIndex; index++) { + const rawMessage = context.rawMessages[index] + if (!rawMessage) { + continue + } + if (rawMessage.info.role === "user" && isIgnoredUserMessage(rawMessage)) { + continue + } + + const messageId = rawMessage.info.id + if (!messageSeen.has(messageId)) { + messageSeen.add(messageId) + messageIds.push(messageId) + } + + if (!messageTokenById.has(messageId)) { + messageTokenById.set(messageId, countMessageTextTokens(rawMessage)) + } + + const parts = Array.isArray(rawMessage.parts) ? rawMessage.parts : [] + for (const part of parts) { + if (part.type !== "tool" || !part.callID) { + continue + } + if (toolSeen.has(part.callID)) { + continue + } + toolSeen.add(part.callID) + toolIds.push(part.callID) + } + } + + const rangeMessageIdSet = new Set(messageIds) + const summariesInRange: Array<{ blockId: number; rawIndex: number }> = [] + for (const summary of context.summaryByBlockId.values()) { + if (!rangeMessageIdSet.has(summary.anchorMessageId)) { + continue + } + + const anchorIndex = context.rawIndexById.get(summary.anchorMessageId) + if (anchorIndex === undefined) { + continue + } + + summariesInRange.push({ + blockId: summary.blockId, + rawIndex: anchorIndex, + }) + } + + summariesInRange.sort((a, b) => a.rawIndex - b.rawIndex || a.blockId - b.blockId) + for (const summary of summariesInRange) { + if (requiredBlockSeen.has(summary.blockId)) { + continue + } + requiredBlockSeen.add(summary.blockId) + requiredBlockIds.push(summary.blockId) + } + + if (messageIds.length === 0) { + throw new Error( + "Failed to map boundary matches back to raw messages. Choose boundaries that include original conversation messages.", + ) + } + + return { + startReference, + endReference, + messageIds, + messageTokenById, + toolIds, + requiredBlockIds, + } +} + +export function resolveAnchorMessageId(startReference: BoundaryReference): string { + if (startReference.kind === "compressed-block") { + if (!startReference.anchorMessageId) { + throw new Error("Failed to map boundary matches back to raw messages") + } + return startReference.anchorMessageId + } + + if (!startReference.messageId) { + throw new Error("Failed to map boundary matches back to raw messages") + } + return startReference.messageId +} + +export function parseBlockPlaceholders(summary: string): ParsedBlockPlaceholder[] { + const placeholders: ParsedBlockPlaceholder[] = [] + const regex = new RegExp(BLOCK_PLACEHOLDER_REGEX) + + let match: RegExpExecArray | null + while ((match = regex.exec(summary)) !== null) { + const full = match[0] + const blockIdPart = match[1] || match[2] + const parsed = Number.parseInt(blockIdPart, 10) + if (!Number.isInteger(parsed)) { + continue + } + + placeholders.push({ + raw: full, + blockId: parsed, + startIndex: match.index, + endIndex: match.index + full.length, + }) + } + + return placeholders +} + +export function validateSummaryPlaceholders( + placeholders: ParsedBlockPlaceholder[], + requiredBlockIds: number[], + startReference: BoundaryReference, + endReference: BoundaryReference, + summaryByBlockId: Map, +): void { + const issues: string[] = [] + + const boundaryOptionalIds = new Set() + if (startReference.kind === "compressed-block") { + if (startReference.blockId === undefined) { + issues.push("Failed to map boundary matches back to raw messages") + } else { + boundaryOptionalIds.add(startReference.blockId) + } + } + if (endReference.kind === "compressed-block") { + if (endReference.blockId === undefined) { + issues.push("Failed to map boundary matches back to raw messages") + } else { + boundaryOptionalIds.add(endReference.blockId) + } + } + + const strictRequiredIds = requiredBlockIds.filter((id) => !boundaryOptionalIds.has(id)) + const requiredSet = new Set(requiredBlockIds) + const placeholderIds = placeholders.map((p) => p.blockId) + const placeholderSet = new Set() + const duplicateIds = new Set() + + for (const id of placeholderIds) { + if (placeholderSet.has(id)) { + duplicateIds.add(id) + continue + } + placeholderSet.add(id) + } + + const missing = strictRequiredIds.filter((id) => !placeholderSet.has(id)) + if (missing.length > 0) { + issues.push( + `Missing required block placeholders: ${missing.map(formatBlockPlaceholder).join(", ")}`, + ) + } + + const unknown = placeholderIds.filter((id) => !summaryByBlockId.has(id)) + if (unknown.length > 0) { + const uniqueUnknown = [...new Set(unknown)] + issues.push( + `Unknown block placeholders: ${uniqueUnknown.map(formatBlockPlaceholder).join(", ")}`, + ) + } + + const invalid = placeholderIds.filter((id) => !requiredSet.has(id)) + if (invalid.length > 0) { + const uniqueInvalid = [...new Set(invalid)] + issues.push( + `Invalid block placeholders for selected range: ${uniqueInvalid.map(formatBlockPlaceholder).join(", ")}`, + ) + } + + if (duplicateIds.size > 0) { + issues.push( + `Duplicate block placeholders are not allowed: ${[...duplicateIds] + .map(formatBlockPlaceholder) + .join(", ")}`, + ) + } + + if (issues.length > 0) { + throwCombinedIssues(issues) + } +} + +export function injectBlockPlaceholders( + summary: string, + placeholders: ParsedBlockPlaceholder[], + summaryByBlockId: Map, + startReference: BoundaryReference, + endReference: BoundaryReference, +): InjectedSummaryResult { + let cursor = 0 + let expanded = summary + const consumed: number[] = [] + const consumedSeen = new Set() + + if (placeholders.length > 0) { + expanded = "" + for (const placeholder of placeholders) { + const target = summaryByBlockId.get(placeholder.blockId) + if (!target) { + throw new Error( + `Compressed block not found: ${formatBlockPlaceholder(placeholder.blockId)}`, + ) + } + + expanded += summary.slice(cursor, placeholder.startIndex) + expanded += restoreStoredCompressedSummary(target.summary) + cursor = placeholder.endIndex + + if (!consumedSeen.has(placeholder.blockId)) { + consumedSeen.add(placeholder.blockId) + consumed.push(placeholder.blockId) + } + } + + expanded += summary.slice(cursor) + } + + expanded = injectBoundarySummaryIfMissing( + expanded, + startReference, + "start", + summaryByBlockId, + consumed, + consumedSeen, + ) + expanded = injectBoundarySummaryIfMissing( + expanded, + endReference, + "end", + summaryByBlockId, + consumed, + consumedSeen, + ) + + return { + expandedSummary: expanded, + consumedBlockIds: consumed, + } +} + +export function allocateBlockId(summaries: CompressSummary[]): number { + if (summaries.length === 0) { + return 1 + } + + let max = 0 + for (const summary of summaries) { + if (summary.blockId > max) { + max = summary.blockId + } + } + return max + 1 +} + +export function addCompressedBlockHeader(blockId: number, summary: string): string { + const header = formatCompressedBlockHeader(blockId) + const footer = formatCompressedBlockFooter(blockId) + const body = summary.trim() + if (body.length === 0) { + return `${header}\n${footer}` + } + return `${header}\n${body}\n\n${footer}` +} + +export function upsertCompressedSummary( + summaries: CompressSummary[], + blockId: number, + anchorMessageId: string, + summary: string, + consumedBlockIds: number[], +): CompressSummary[] { + const consumed = new Set(consumedBlockIds) + const next = summaries.filter((s) => !consumed.has(s.blockId)) + next.push({ + blockId, + anchorMessageId, + summary, + }) + return next +} + +function restoreStoredCompressedSummary(summary: string): string { + const headerMatch = summary.match(/^\s*\[Compressed conversation(?: section)?(?: b\d+)?\]/i) + if (!headerMatch) { + return summary + } + + const afterHeader = summary.slice(headerMatch[0].length) + const withoutLeadingBreaks = afterHeader.replace(/^(?:\r?\n)+/, "") + return withoutLeadingBreaks + .replace(/(?:\r?\n)*b\d+<\/dcp-message-id>\s*$/i, "") + .replace(/(?:\r?\n)+$/, "") +} + +function injectBoundarySummaryIfMissing( + summary: string, + reference: BoundaryReference, + position: "start" | "end", + summaryByBlockId: Map, + consumed: number[], + consumedSeen: Set, +): string { + if (reference.kind !== "compressed-block" || reference.blockId === undefined) { + return summary + } + if (consumedSeen.has(reference.blockId)) { + return summary + } + + const target = summaryByBlockId.get(reference.blockId) + if (!target) { + throw new Error(`Compressed block not found: ${formatBlockPlaceholder(reference.blockId)}`) + } + + const injectedBody = restoreStoredCompressedSummary(target.summary) + const next = + position === "start" + ? mergeWithSpacing(injectedBody, summary) + : mergeWithSpacing(summary, injectedBody) + + consumedSeen.add(reference.blockId) + consumed.push(reference.blockId) + return next +} + +function mergeWithSpacing(left: string, right: string): string { + const l = left.trim() + const r = right.trim() + + if (!l) { + return right + } + if (!r) { + return left + } + return `${l}\n\n${r}` +} + +function throwCombinedIssues(issues: string[]): never { + if (issues.length === 1) { + throw new Error(issues[0]) + } + + throw new Error(issues.map((issue) => `- ${issue}`).join("\n")) +} diff --git a/lib/tools/compress.ts b/lib/tools/compress.ts index 0367649b..9408642b 100644 --- a/lib/tools/compress.ts +++ b/lib/tools/compress.ts @@ -1,16 +1,28 @@ import { tool } from "@opencode-ai/plugin" -import type { WithParts, CompressSummary } from "../state" +import type { BoundaryReference, CompressToolArgs } from "./compress-utils" import type { PruneToolContext } from "./types" import { ensureSessionInitialized } from "../state" import { saveSessionState } from "../state/persistence" import { loadPrompt } from "../prompts" -import { getCurrentParams, getTotalToolTokens, countMessageTextTokens } from "../strategies/utils" -import { findStringInMessages, collectToolIdsInRange, collectMessageIdsInRange } from "./utils" +import { getCurrentParams, getTotalToolTokens } from "../strategies/utils" import { sendCompressNotification } from "../ui/notification" -import { prune as applyPruneTransforms } from "../messages/prune" +import { assignMessageRefs } from "../message-ids" +import { + addCompressedBlockHeader, + allocateBlockId, + buildSearchContext, + fetchSessionMessages, + injectBlockPlaceholders, + parseBlockPlaceholders, + resolveAnchorMessageId, + resolveBoundaryIds, + resolveRange, + upsertCompressedSummary, + validateCompressArgs, + validateSummaryPlaceholders, +} from "./compress-utils" const COMPRESS_TOOL_DESCRIPTION = loadPrompt("compress-tool-spec") -const COMPRESS_SUMMARY_PREFIX = "[Compressed conversation block]\n\n" export function createCompressTool(ctx: PruneToolContext): ReturnType { return tool({ @@ -21,17 +33,19 @@ export function createCompressTool(ctx: PruneToolContext): ReturnType m.info.id === startResult.messageId) - let rawEndIndex = messages.findIndex((m) => m.info.id === endResult.messageId) - - // If a boundary matched inside a synthetic compress summary message, - // resolve it back to the summary's anchor message in the raw messages - if (rawStartIndex === -1) { - const summary = state.compressSummaries.find((s) => s.summary.includes(startString)) - if (summary) { - rawStartIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId) - } - } - if (rawEndIndex === -1) { - const summary = state.compressSummaries.find((s) => s.summary.includes(endString)) - if (summary) { - rawEndIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId) - } - } - - if (rawStartIndex === -1 || rawEndIndex === -1) { - throw new Error(`Failed to map boundary matches back to raw messages`) - } + const searchContext = buildSearchContext(state, rawMessages) - if (rawStartIndex > rawEndIndex) { - throw new Error( - `startString appears after endString in the conversation. Start must come before end.`, - ) - } + const { startReference, endReference } = resolveBoundaryIds( + searchContext, + state, + compressArgs.content.startId, + compressArgs.content.endId, + ) - const containedToolIds = collectToolIdsInRange(messages, rawStartIndex, rawEndIndex) + const range = resolveRange(searchContext, startReference, endReference) + const anchorMessageId = resolveAnchorMessageId(range.startReference) - const containedMessageIds = collectMessageIdsInRange( - messages, - rawStartIndex, - rawEndIndex, + const parsedPlaceholders = parseBlockPlaceholders(compressArgs.content.summary) + validateSummaryPlaceholders( + parsedPlaceholders, + range.requiredBlockIds, + range.startReference, + range.endReference, + searchContext.summaryByBlockId, ) - // Remove any existing summaries whose anchors are now inside this range - // This prevents duplicate injections when a larger compress subsumes a smaller one - const removedSummaries = state.compressSummaries.filter((s) => - containedMessageIds.includes(s.anchorMessageId), + const injected = injectBlockPlaceholders( + compressArgs.content.summary, + parsedPlaceholders, + searchContext.summaryByBlockId, + range.startReference, + range.endReference, ) - if (removedSummaries.length > 0) { - state.compressSummaries = state.compressSummaries.filter( - (s) => !containedMessageIds.includes(s.anchorMessageId), - ) - } - const compressSummary: CompressSummary = { - anchorMessageId: startResult.messageId, - summary: COMPRESS_SUMMARY_PREFIX + summary, - } - state.compressSummaries.push(compressSummary) + const blockId = allocateBlockId(state.compressSummaries) + const storedSummary = addCompressedBlockHeader(blockId, injected.expandedSummary) - const compressedMessageIds = containedMessageIds.filter( - (id) => !state.prune.messages.has(id), + state.compressSummaries = upsertCompressedSummary( + state.compressSummaries, + blockId, + anchorMessageId, + storedSummary, + injected.consumedBlockIds, ) - const compressedToolIds = containedToolIds.filter((id) => !state.prune.tools.has(id)) + const compressedMessageIds = range.messageIds.filter( + (id) => !state.prune.messages.has(id), + ) let textTokens = 0 - for (const msgId of compressedMessageIds) { - const msg = messages.find((m) => m.info.id === msgId) - if (msg) { - const tokens = countMessageTextTokens(msg) - textTokens += tokens - state.prune.messages.set(msgId, tokens) - } + for (const messageId of compressedMessageIds) { + const tokenCount = range.messageTokenById.get(messageId) || 0 + textTokens += tokenCount + state.prune.messages.set(messageId, tokenCount) } + + const compressedToolIds = range.toolIds.filter((id) => !state.prune.tools.has(id)) const toolTokens = getTotalToolTokens(state, compressedToolIds) for (const id of compressedToolIds) { const entry = state.toolParameters.get(id) state.prune.tools.set(id, entry?.tokenCount ?? 0) } - const estimatedCompressedTokens = textTokens + toolTokens + const estimatedCompressedTokens = textTokens + toolTokens state.stats.pruneTokenCounter += estimatedCompressedTokens - const rawStartResult = { messageId: startResult.messageId, messageIndex: rawStartIndex } - const rawEndResult = { messageId: endResult.messageId, messageIndex: rawEndIndex } + const rawStartResult = { + messageId: getBoundaryMessageId(startReference), + messageIndex: startReference.rawIndex, + } + const rawEndResult = { + messageId: getBoundaryMessageId(endReference), + messageIndex: endReference.rawIndex, + } - const currentParams = getCurrentParams(state, messages, logger) + const currentParams = getCurrentParams(state, rawMessages, logger) await sendCompressNotification( client, logger, @@ -188,11 +152,11 @@ export function createCompressTool(ctx: PruneToolContext): ReturnType - logger.error("Failed to persist state", { error: err.message }), - ) + await saveSessionState(state, logger) return `Compressed ${compressedMessageIds.length} messages (${compressedToolIds.length} tool calls) into summary. The content will be replaced with your summary.` }, }) } + +function getBoundaryMessageId(reference: BoundaryReference): string { + if (reference.kind === "message") { + if (!reference.messageId) { + throw new Error("Failed to map boundary matches back to raw messages") + } + return reference.messageId + } + + if (!reference.anchorMessageId) { + throw new Error("Failed to map boundary matches back to raw messages") + } + return reference.anchorMessageId +} diff --git a/lib/tools/utils.ts b/lib/tools/utils.ts deleted file mode 100644 index fe47a735..00000000 --- a/lib/tools/utils.ts +++ /dev/null @@ -1,244 +0,0 @@ -import { partial_ratio } from "fuzzball" -import type { WithParts } from "../state" -import type { Logger } from "../logger" -import { isIgnoredUserMessage } from "../messages/utils" - -export interface FuzzyConfig { - minScore: number - minGap: number -} - -export const DEFAULT_FUZZY_CONFIG: FuzzyConfig = { - minScore: 95, - minGap: 15, -} - -interface MatchResult { - messageId: string - messageIndex: number - score: number - matchType: "exact" | "fuzzy" -} - -function extractMessageContent(msg: WithParts): string { - const parts = Array.isArray(msg.parts) ? msg.parts : [] - let content = "" - - for (const part of parts) { - const p = part as Record - if ((part as any).ignored) { - continue - } - - switch (part.type) { - case "text": - case "reasoning": - if (typeof p.text === "string") { - content += " " + p.text - } - break - - case "tool": { - const state = p.state as Record | undefined - if (!state) break - - // Include tool output (completed or error) - if (state.status === "completed" && typeof state.output === "string") { - content += " " + state.output - } else if (state.status === "error" && typeof state.error === "string") { - content += " " + state.error - } - - // Include tool input - if (state.input) { - content += - " " + - (typeof state.input === "string" - ? state.input - : JSON.stringify(state.input)) - } - break - } - - case "compaction": - if (typeof p.summary === "string") { - content += " " + p.summary - } - break - - case "subtask": - if (typeof p.summary === "string") { - content += " " + p.summary - } - if (typeof p.result === "string") { - content += " " + p.result - } - break - } - } - - return content -} - -function findExactMatches(messages: WithParts[], searchString: string): MatchResult[] { - const matches: MatchResult[] = [] - - for (let i = 0; i < messages.length; i++) { - const msg = messages[i] - if (isIgnoredUserMessage(msg)) { - continue - } - const content = extractMessageContent(msg) - if (content.includes(searchString)) { - matches.push({ - messageId: msg.info.id, - messageIndex: i, - score: 100, - matchType: "exact", - }) - } - } - - return matches -} - -function findFuzzyMatches( - messages: WithParts[], - searchString: string, - minScore: number, -): MatchResult[] { - const matches: MatchResult[] = [] - - for (let i = 0; i < messages.length; i++) { - const msg = messages[i] - if (isIgnoredUserMessage(msg)) { - continue - } - const content = extractMessageContent(msg) - const score = partial_ratio(searchString, content) - if (score >= minScore) { - matches.push({ - messageId: msg.info.id, - messageIndex: i, - score, - matchType: "fuzzy", - }) - } - } - - return matches -} - -export function findStringInMessages( - messages: WithParts[], - searchString: string, - logger: Logger, - stringType: "startString" | "endString", - fuzzyConfig: FuzzyConfig = DEFAULT_FUZZY_CONFIG, -): { messageId: string; messageIndex: number } { - const searchableMessages = messages.length > 1 ? messages.slice(0, -1) : messages - const lastMessage = messages.length > 0 ? messages[messages.length - 1] : undefined - - const exactMatches = findExactMatches(searchableMessages, searchString) - - if (exactMatches.length === 1) { - return { messageId: exactMatches[0].messageId, messageIndex: exactMatches[0].messageIndex } - } - - if (exactMatches.length > 1) { - throw new Error( - `Found multiple matches for ${stringType}. ` + - `Provide more surrounding context to uniquely identify the intended match.`, - ) - } - - const fuzzyMatches = findFuzzyMatches(searchableMessages, searchString, fuzzyConfig.minScore) - - if (fuzzyMatches.length === 0) { - if (lastMessage && !isIgnoredUserMessage(lastMessage)) { - const lastMsgContent = extractMessageContent(lastMessage) - const lastMsgIndex = messages.length - 1 - if (lastMsgContent.includes(searchString)) { - // logger.info( - // `${stringType} found in last message (last resort) at index ${lastMsgIndex}`, - // ) - return { - messageId: lastMessage.info.id, - messageIndex: lastMsgIndex, - } - } - } - - throw new Error( - `${stringType} not found in conversation. ` + - `Make sure the string exists and is spelled exactly as it appears.`, - ) - } - - fuzzyMatches.sort((a, b) => b.score - a.score) - - const best = fuzzyMatches[0] - const secondBest = fuzzyMatches[1] - - // Log fuzzy match candidates - // logger.info( - // `Fuzzy match for ${stringType}: best=${best.score}% (msg ${best.messageIndex})` + - // (secondBest - // ? `, secondBest=${secondBest.score}% (msg ${secondBest.messageIndex})` - // : ""), - // ) - - // Check confidence gap - best must be significantly better than second best - if (secondBest && best.score - secondBest.score < fuzzyConfig.minGap) { - throw new Error( - `Found multiple matches for ${stringType}. ` + - `Provide more unique surrounding context to disambiguate.`, - ) - } - - logger.info( - `Fuzzy matched ${stringType} with ${best.score}% confidence at message index ${best.messageIndex}`, - ) - - return { messageId: best.messageId, messageIndex: best.messageIndex } -} - -export function collectToolIdsInRange( - messages: WithParts[], - startIndex: number, - endIndex: number, -): string[] { - const toolIds: string[] = [] - - for (let i = startIndex; i <= endIndex; i++) { - const msg = messages[i] - const parts = Array.isArray(msg.parts) ? msg.parts : [] - - for (const part of parts) { - if (part.type === "tool" && part.callID) { - if (!toolIds.includes(part.callID)) { - toolIds.push(part.callID) - } - } - } - } - - return toolIds -} - -export function collectMessageIdsInRange( - messages: WithParts[], - startIndex: number, - endIndex: number, -): string[] { - const messageIds: string[] = [] - - for (let i = startIndex; i <= endIndex; i++) { - const msgId = messages[i].info.id - if (!messageIds.includes(msgId)) { - messageIds.push(msgId) - } - } - - return messageIds -} diff --git a/package-lock.json b/package-lock.json index 626fc4bf..812319ef 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,9 +11,7 @@ "dependencies": { "@anthropic-ai/tokenizer": "^0.0.4", "@opencode-ai/sdk": "^1.1.48", - "fuzzball": "^2.2.3", "jsonc-parser": "^3.3.1", - "ulid": "^3.0.2", "zod": "^4.3.6" }, "devDependencies": { @@ -588,17 +586,6 @@ "node": "^8.16.0 || ^10.6.0 || >=11.0.0" } }, - "node_modules/fuzzball": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/fuzzball/-/fuzzball-2.2.3.tgz", - "integrity": "sha512-sQDb3kjI7auA4YyE1YgEW85MTparcSgRgcCweUK06Cn0niY5lN+uhFiRUZKN4MQVGGiHxlbrYCA4nL1QjOXBLQ==", - "license": "MIT", - "dependencies": { - "heap": ">=0.2.0", - "lodash": "^4.17.21", - "setimmediate": "^1.0.5" - } - }, "node_modules/get-tsconfig": { "version": "4.13.0", "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.0.tgz", @@ -612,24 +599,12 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, - "node_modules/heap": { - "version": "0.2.7", - "resolved": "https://registry.npmjs.org/heap/-/heap-0.2.7.tgz", - "integrity": "sha512-2bsegYkkHO+h/9MGbn6KWcE45cHZgPANo5LXF7EvWdT0yT2EguSVO1nDgU5c8+ZOPwp2vMNa7YFsJhVcDR9Sdg==", - "license": "MIT" - }, "node_modules/jsonc-parser": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz", "integrity": "sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ==", "license": "MIT" }, - "node_modules/lodash": { - "version": "4.17.23", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.23.tgz", - "integrity": "sha512-LgVTMpQtIopCi79SJeDiP0TfWi5CNEc/L/aRdTh3yIvmZXTnheWpKjSZhnvMl8iXbC1tFg9gdHHDMLoV7CnG+w==", - "license": "MIT" - }, "node_modules/prettier": { "version": "3.8.1", "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.8.1.tgz", @@ -656,12 +631,6 @@ "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } }, - "node_modules/setimmediate": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", - "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==", - "license": "MIT" - }, "node_modules/tiktoken": { "version": "1.0.22", "resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.22.tgz", @@ -702,15 +671,6 @@ "node": ">=14.17" } }, - "node_modules/ulid": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/ulid/-/ulid-3.0.2.tgz", - "integrity": "sha512-yu26mwteFYzBAot7KVMqFGCVpsF6g8wXfJzQUHvu1no3+rRRSFcSV2nKeYvNPLD2J4b08jYBDhHUjeH0ygIl9w==", - "license": "MIT", - "bin": { - "ulid": "dist/cli.js" - } - }, "node_modules/undici-types": { "version": "7.16.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz", diff --git a/package.json b/package.json index 5ff5796a..3a767fc7 100644 --- a/package.json +++ b/package.json @@ -44,9 +44,7 @@ "dependencies": { "@anthropic-ai/tokenizer": "^0.0.4", "@opencode-ai/sdk": "^1.1.48", - "fuzzball": "^2.2.3", "jsonc-parser": "^3.3.1", - "ulid": "^3.0.2", "zod": "^4.3.6" }, "devDependencies": {