From 928e4e5362eb202b623fb9c7b709e794932dffe3 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Wed, 27 May 2026 15:05:54 +0700 Subject: [PATCH 1/7] claude text only --- package.json | 2 +- src/plugin.ts | 24 +++++++++++++++++++++--- src/plugin/models.ts | 21 ++++++++++++++++++++- tests/unit/variant.test.ts | 6 ++++++ 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/package.json b/package.json index 9e2c22a..77e4c7c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "opencode-windsurf-auth", - "version": "0.3.3", + "version": "0.3.4", "description": "OpenCode plugin for Windsurf/Codeium authentication - use Windsurf models in OpenCode", "repository": { "type": "git", diff --git a/src/plugin.ts b/src/plugin.ts index a2899c4..82b5298 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -203,6 +203,13 @@ function createStreamingResponse( description: t.function?.description ?? '', parameters: t.function?.parameters ?? {}, })); + const effectiveTools = resolved.textOnly ? [] : tools; + if (resolved.textOnly && tools.length > 0) { + debugLog.log( + `[windsurf-plugin] model=${resolved.modelUid} is text-only in Cognition cloud; ` + + `stripping ${tools.length} tool definition(s) to avoid server invalid_argument`, + ); + } const { streamChatEvents } = await import('./cloud-direct/index.js'); // Cloud-direct accepts the FULL @ai-sdk multimodal content shape @@ -225,7 +232,7 @@ function createStreamingResponse( let usage: { promptTokens?: number; completionTokens?: number; totalTokens?: number } | null = null; let firstChunkSent = false; const t0 = Date.now(); - debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${resolved.modelUid}, msgs=${multimodalMessages.length}, tools=${tools.length})`); + debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${resolved.modelUid}, msgs=${multimodalMessages.length}, tools=${effectiveTools.length})`); let eventCount = 0; let textBytes = 0; // Thread the caller's `max_tokens` into the proto's @@ -251,7 +258,7 @@ function createStreamingResponse( apiServerUrl: credentials.apiServerUrl, modelUid: resolved.modelUid, messages: multimodalMessages, - tools: tools.length > 0 ? tools : undefined, + tools: effectiveTools.length > 0 ? effectiveTools : undefined, signal: abort.signal, completionOpts: { maxOutputTokens: requestedMaxTokens, @@ -466,6 +473,13 @@ async function createNonStreamingResponse( description: t.function?.description ?? '', parameters: t.function?.parameters ?? {}, })); + const effectiveTools = resolved.textOnly ? [] : tools; + if (resolved.textOnly && tools.length > 0) { + debugLog.log( + `[windsurf-plugin] model=${resolved.modelUid} is text-only in Cognition cloud; ` + + `stripping ${tools.length} tool definition(s) to avoid server invalid_argument`, + ); + } const multimodalMessages: ChatHistoryItem[] = request.messages.map((m) => mapMessageToHistoryItem(m)); @@ -493,7 +507,7 @@ async function createNonStreamingResponse( apiServerUrl: credentials.apiServerUrl, modelUid: resolved.modelUid, messages: multimodalMessages, - tools: tools.length > 0 ? tools : undefined, + tools: effectiveTools.length > 0 ? effectiveTools : undefined, completionOpts: { maxOutputTokens: requestedMaxTokens, }, @@ -829,11 +843,15 @@ async function ensureWindsurfProxyServer(): Promise { object: 'list', data: models.map((id) => { const variants = getModelVariants(id); + const resolved = resolveModel(id); + const supportsTools = !resolved.textOnly; return { id, object: 'model', created: Math.floor(Date.now() / 1000), owned_by: 'windsurf', + capabilities: { tools: supportsTools }, + text_only: !supportsTools, ...(variants ? { variants: Object.entries(variants).map(([name, meta]) => ({ diff --git a/src/plugin/models.ts b/src/plugin/models.ts index bf0c9c3..bed5a9e 100644 --- a/src/plugin/models.ts +++ b/src/plugin/models.ts @@ -111,6 +111,11 @@ type ModelCatalogEntry = { variants?: Record; /** Aliases accepted for backwards compatibility */ aliases?: string[]; + /** + * True when Cognition's cloud rejects tool-bearing requests for every + * variant of this model. The plugin strips tools and warns the user. + */ + textOnly?: boolean; }; // ========================================================================== @@ -134,6 +139,7 @@ const VARIANT_CATALOG: Record = { 'claude-opus-4.5': { id: 'claude-opus-4.5', defaultUid: 'MODEL_CLAUDE_4_5_OPUS', + textOnly: true, variants: { 'base': { modelUid: 'MODEL_CLAUDE_4_5_OPUS', description: 'Claude Opus 4.5' }, 'thinking': { modelUid: 'MODEL_CLAUDE_4_5_OPUS_THINKING', description: 'Claude Opus 4.5 Thinking' }, @@ -143,6 +149,7 @@ const VARIANT_CATALOG: Record = { 'claude-opus-4.6': { id: 'claude-opus-4.6', defaultUid: 'claude-opus-4-6-thinking', + textOnly: true, variants: { 'thinking': { modelUid: 'claude-opus-4-6-thinking', description: 'Claude Opus 4.6 Thinking' }, 'base': { modelUid: 'claude-opus-4-6', description: 'Claude Opus 4.6' }, @@ -156,6 +163,7 @@ const VARIANT_CATALOG: Record = { 'claude-opus-4.7': { id: 'claude-opus-4.7', defaultUid: 'claude-opus-4-7-medium', + textOnly: true, variants: { 'medium': { modelUid: 'claude-opus-4-7-medium', description: 'Claude Opus 4.7 Medium' }, 'low': { modelUid: 'claude-opus-4-7-low', description: 'Claude Opus 4.7 Low' }, @@ -173,6 +181,7 @@ const VARIANT_CATALOG: Record = { 'claude-sonnet-4.5': { id: 'claude-sonnet-4.5', defaultUid: 'MODEL_PRIVATE_2', + textOnly: true, variants: { '2': { modelUid: 'MODEL_PRIVATE_2', description: 'Claude Sonnet 4.5' }, '3': { modelUid: 'MODEL_PRIVATE_3', description: 'Claude Sonnet 4.5 Thinking' }, @@ -182,6 +191,7 @@ const VARIANT_CATALOG: Record = { 'claude-sonnet-4.6': { id: 'claude-sonnet-4.6', defaultUid: 'claude-sonnet-4-6-thinking', + textOnly: true, variants: { 'thinking': { modelUid: 'claude-sonnet-4-6-thinking', description: 'Claude Sonnet 4.6 Thinking' }, 'base': { modelUid: 'claude-sonnet-4-6', description: 'Claude Sonnet 4.6' }, @@ -420,6 +430,7 @@ const VARIANT_CATALOG: Record = { 'claude-3.7-sonnet': { id: 'claude-3.7-sonnet', defaultEnum: ModelEnum.CLAUDE_3_7_SONNET_20250219, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_3_7_SONNET_20250219_THINKING, description: 'Thinking mode' }, }, @@ -428,6 +439,7 @@ const VARIANT_CATALOG: Record = { 'claude-4-opus': { id: 'claude-4-opus', defaultEnum: ModelEnum.CLAUDE_4_OPUS, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_4_OPUS_THINKING, description: 'Thinking mode' }, }, @@ -435,6 +447,7 @@ const VARIANT_CATALOG: Record = { 'claude-4-sonnet': { id: 'claude-4-sonnet', defaultEnum: ModelEnum.CLAUDE_4_SONNET, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_4_SONNET_THINKING, description: 'Thinking mode' }, }, @@ -442,6 +455,7 @@ const VARIANT_CATALOG: Record = { 'claude-4.1-opus': { id: 'claude-4.1-opus', defaultEnum: ModelEnum.CLAUDE_4_1_OPUS, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_4_1_OPUS_THINKING, description: 'Thinking mode' }, }, @@ -450,6 +464,7 @@ const VARIANT_CATALOG: Record = { 'claude-4.5-sonnet': { id: 'claude-4.5-sonnet', defaultEnum: ModelEnum.CLAUDE_4_5_SONNET, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_4_5_SONNET_THINKING, description: 'Thinking mode' }, }, @@ -458,6 +473,7 @@ const VARIANT_CATALOG: Record = { 'claude-4.5-opus': { id: 'claude-4.5-opus', defaultEnum: ModelEnum.CLAUDE_4_5_OPUS, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_4_5_OPUS_THINKING, description: 'Thinking mode' }, }, @@ -1045,6 +1061,8 @@ export interface ResolvedModel { variant?: string; /** Legacy proto-enum value. Undefined for Cognition-era string-UID models. */ enumValue?: ModelEnumValue; + /** True when the cloud API rejects tool-bearing requests for this model. */ + textOnly?: boolean; } export function resolveModel(modelName: string, variantOverride?: string): ResolvedModel { @@ -1061,9 +1079,10 @@ export function resolveModel(modelName: string, variantOverride?: string): Resol modelUid: uidForVariant(v) ?? uidForEntry(entry), enumValue: v.enumValue, variant: effectiveVariant, + textOnly: entry.textOnly, }; } - return { modelId: entry.id, modelUid: uidForEntry(entry), enumValue: entry.defaultEnum }; + return { modelId: entry.id, modelUid: uidForEntry(entry), enumValue: entry.defaultEnum, textOnly: entry.textOnly }; } // Fallback to legacy alias table (proto-enum-only models). diff --git a/tests/unit/variant.test.ts b/tests/unit/variant.test.ts index ff1d977..6fbfcb8 100644 --- a/tests/unit/variant.test.ts +++ b/tests/unit/variant.test.ts @@ -46,6 +46,12 @@ describe('resolveModel variants', () => { expect(resolveModel('gpt-5.2:low').modelUid).toBe('MODEL_GPT_5_2_LOW'); expect(resolveModel('gpt-5.2:high-priority').modelUid).toBe('MODEL_GPT_5_2_HIGH_PRIORITY'); }); + + test('marks Claude cloud models as text-only', () => { + expect(resolveModel('claude-opus-4.7').textOnly).toBe(true); + expect(resolveModel('claude-sonnet-4.6').textOnly).toBe(true); + expect(resolveModel('swe-1.6').textOnly).toBeUndefined(); + }); }); describe('getModelVariants', () => { From ff4b08fde26e0705b668ecdae2c8fc1faeb9d5d2 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Wed, 27 May 2026 15:22:00 +0700 Subject: [PATCH 2/7] fallback tool call to swe --- src/plugin.ts | 83 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/src/plugin.ts b/src/plugin.ts index 82b5298..6c62b19 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -112,6 +112,61 @@ interface ChatCompletionRequest { type ToolDef = NonNullable[number]; +type CloudToolDef = { + name: string; + description: string; + parameters: unknown; +}; + +const DEFAULT_TEXT_ONLY_TOOL_FALLBACK_MODEL = 'swe-1.6'; + +function routeToolsForModel( + resolved: ReturnType, + tools: CloudToolDef[], + fallbackOverride?: string, +): { modelUid: string; tools: CloudToolDef[]; fallbackModelId?: string } { + if (!resolved.textOnly || tools.length === 0) { + return { modelUid: resolved.modelUid, tools }; + } + + const fallbackName = + fallbackOverride?.trim() || + process.env.OPENCODE_WINDSURF_TEXT_ONLY_TOOL_FALLBACK_MODEL?.trim() || + DEFAULT_TEXT_ONLY_TOOL_FALLBACK_MODEL; + const fallback = resolveModel(fallbackName); + if (fallback.textOnly) { + throw new Error( + `Model "${resolved.modelId}" cannot use tools through Cognition cloud, and fallback ` + + `"${fallbackName}" is also marked text-only. Set ` + + `OPENCODE_WINDSURF_TEXT_ONLY_TOOL_FALLBACK_MODEL to a tool-capable model like swe-1.6.`, + ); + } + + debugLog.log( + `[windsurf-plugin] model=${resolved.modelUid} is text-only in Cognition cloud; ` + + `routing tool-bearing turn to fallback model=${fallback.modelUid} with ${tools.length} tool definition(s)`, + ); + return { modelUid: fallback.modelUid, tools, fallbackModelId: fallback.modelId }; +} + +function extractTextOnlyToolFallbackFromProviderOptions(providerOptions: Record | undefined): string | undefined { + if (!providerOptions) return undefined; + const windsurfRaw = providerOptions['windsurf']; + const windsurf = + windsurfRaw && typeof windsurfRaw === 'object' + ? (windsurfRaw as Record) + : undefined; + const pickString = (v: unknown): string | undefined => (typeof v === 'string' ? v : undefined); + return ( + pickString(windsurf?.['textOnlyToolFallbackModel']) ?? + pickString(windsurf?.['toolFallbackModel']) ?? + pickString(windsurf?.['fallbackModel']) ?? + pickString(providerOptions['textOnlyToolFallbackModel']) ?? + pickString(providerOptions['toolFallbackModel']) ?? + pickString(providerOptions['fallbackModel']) + ); +} + /** * Map an opencode/OpenAI-shaped chat message into the ChatHistoryItem the * cloud-direct encoder expects. Importantly, this preserves `tool_call_id` @@ -190,6 +245,7 @@ function createStreamingResponse( const responseId = `chatcmpl-${crypto.randomUUID()}`; const requestedModel = request.model || getDefaultModel(); const variantOverride = extractVariantFromProviderOptions(request.providerOptions); + const textOnlyToolFallback = extractTextOnlyToolFallbackFromProviderOptions(request.providerOptions); const abort = new AbortController(); @@ -203,13 +259,7 @@ function createStreamingResponse( description: t.function?.description ?? '', parameters: t.function?.parameters ?? {}, })); - const effectiveTools = resolved.textOnly ? [] : tools; - if (resolved.textOnly && tools.length > 0) { - debugLog.log( - `[windsurf-plugin] model=${resolved.modelUid} is text-only in Cognition cloud; ` + - `stripping ${tools.length} tool definition(s) to avoid server invalid_argument`, - ); - } + const routed = routeToolsForModel(resolved, tools, textOnlyToolFallback); const { streamChatEvents } = await import('./cloud-direct/index.js'); // Cloud-direct accepts the FULL @ai-sdk multimodal content shape @@ -232,7 +282,7 @@ function createStreamingResponse( let usage: { promptTokens?: number; completionTokens?: number; totalTokens?: number } | null = null; let firstChunkSent = false; const t0 = Date.now(); - debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${resolved.modelUid}, msgs=${multimodalMessages.length}, tools=${effectiveTools.length})`); + debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${routed.modelUid}, requested=${resolved.modelUid}, msgs=${multimodalMessages.length}, tools=${routed.tools.length})`); let eventCount = 0; let textBytes = 0; // Thread the caller's `max_tokens` into the proto's @@ -256,9 +306,9 @@ function createStreamingResponse( for await (const ev of streamChatEvents({ apiKey: credentials.apiKey, apiServerUrl: credentials.apiServerUrl, - modelUid: resolved.modelUid, + modelUid: routed.modelUid, messages: multimodalMessages, - tools: effectiveTools.length > 0 ? effectiveTools : undefined, + tools: routed.tools.length > 0 ? routed.tools : undefined, signal: abort.signal, completionOpts: { maxOutputTokens: requestedMaxTokens, @@ -466,6 +516,7 @@ async function createNonStreamingResponse( const responseId = `chatcmpl-${crypto.randomUUID()}`; const requestedModel = request.model || getDefaultModel(); const variantOverride = extractVariantFromProviderOptions(request.providerOptions); + const textOnlyToolFallback = extractTextOnlyToolFallbackFromProviderOptions(request.providerOptions); const resolved = resolveModel(requestedModel, variantOverride); const tools = (request.tools ?? []).map((t) => ({ @@ -473,13 +524,7 @@ async function createNonStreamingResponse( description: t.function?.description ?? '', parameters: t.function?.parameters ?? {}, })); - const effectiveTools = resolved.textOnly ? [] : tools; - if (resolved.textOnly && tools.length > 0) { - debugLog.log( - `[windsurf-plugin] model=${resolved.modelUid} is text-only in Cognition cloud; ` + - `stripping ${tools.length} tool definition(s) to avoid server invalid_argument`, - ); - } + const routed = routeToolsForModel(resolved, tools, textOnlyToolFallback); const multimodalMessages: ChatHistoryItem[] = request.messages.map((m) => mapMessageToHistoryItem(m)); @@ -505,9 +550,9 @@ async function createNonStreamingResponse( for await (const ev of streamChatEvents({ apiKey: credentials.apiKey, apiServerUrl: credentials.apiServerUrl, - modelUid: resolved.modelUid, + modelUid: routed.modelUid, messages: multimodalMessages, - tools: effectiveTools.length > 0 ? effectiveTools : undefined, + tools: routed.tools.length > 0 ? routed.tools : undefined, completionOpts: { maxOutputTokens: requestedMaxTokens, }, From 76d6513e326b8686eec5e9a78ee143bd084aaf09 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Wed, 27 May 2026 17:32:32 +0700 Subject: [PATCH 3/7] strictly only use fallback model for tool call --- src/plugin.ts | 239 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 173 insertions(+), 66 deletions(-) diff --git a/src/plugin.ts b/src/plugin.ts index 6c62b19..5deeb0a 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -69,7 +69,7 @@ const debugLog = (() => { import { WindsurfCredentials, WindsurfError } from './plugin/auth.js'; import { resolveCredentials } from './plugin/credentials-resolver.js'; import { loadCredentials as loadOAuthCredentials } from './oauth/storage.js'; -import type { ChatHistoryItem } from './cloud-direct/index.js'; +import type { ChatHistoryItem, CloudChatEvent } from './cloud-direct/index.js'; import { getDefaultModel, getCanonicalModels, @@ -118,38 +118,9 @@ type CloudToolDef = { parameters: unknown; }; -const DEFAULT_TEXT_ONLY_TOOL_FALLBACK_MODEL = 'swe-1.6'; +const DEFAULT_TOOL_CALL_TRANSLATOR_MODEL = 'swe-1.6'; -function routeToolsForModel( - resolved: ReturnType, - tools: CloudToolDef[], - fallbackOverride?: string, -): { modelUid: string; tools: CloudToolDef[]; fallbackModelId?: string } { - if (!resolved.textOnly || tools.length === 0) { - return { modelUid: resolved.modelUid, tools }; - } - - const fallbackName = - fallbackOverride?.trim() || - process.env.OPENCODE_WINDSURF_TEXT_ONLY_TOOL_FALLBACK_MODEL?.trim() || - DEFAULT_TEXT_ONLY_TOOL_FALLBACK_MODEL; - const fallback = resolveModel(fallbackName); - if (fallback.textOnly) { - throw new Error( - `Model "${resolved.modelId}" cannot use tools through Cognition cloud, and fallback ` + - `"${fallbackName}" is also marked text-only. Set ` + - `OPENCODE_WINDSURF_TEXT_ONLY_TOOL_FALLBACK_MODEL to a tool-capable model like swe-1.6.`, - ); - } - - debugLog.log( - `[windsurf-plugin] model=${resolved.modelUid} is text-only in Cognition cloud; ` + - `routing tool-bearing turn to fallback model=${fallback.modelUid} with ${tools.length} tool definition(s)`, - ); - return { modelUid: fallback.modelUid, tools, fallbackModelId: fallback.modelId }; -} - -function extractTextOnlyToolFallbackFromProviderOptions(providerOptions: Record | undefined): string | undefined { +function extractToolCallTranslatorFromProviderOptions(providerOptions: Record | undefined): string | undefined { if (!providerOptions) return undefined; const windsurfRaw = providerOptions['windsurf']; const windsurf = @@ -158,15 +129,64 @@ function extractTextOnlyToolFallbackFromProviderOptions(providerOptions: Record< : undefined; const pickString = (v: unknown): string | undefined => (typeof v === 'string' ? v : undefined); return ( - pickString(windsurf?.['textOnlyToolFallbackModel']) ?? + pickString(windsurf?.['toolCallTranslatorModel']) ?? pickString(windsurf?.['toolFallbackModel']) ?? pickString(windsurf?.['fallbackModel']) ?? - pickString(providerOptions['textOnlyToolFallbackModel']) ?? + pickString(providerOptions['toolCallTranslatorModel']) ?? pickString(providerOptions['toolFallbackModel']) ?? pickString(providerOptions['fallbackModel']) ); } +function getToolCallTranslatorModel(providerOptions: Record | undefined): ReturnType { + const fallbackName = + extractToolCallTranslatorFromProviderOptions(providerOptions)?.trim() || + process.env.OPENCODE_WINDSURF_TOOL_CALL_TRANSLATOR_MODEL?.trim() || + DEFAULT_TOOL_CALL_TRANSLATOR_MODEL; + const fallback = resolveModel(fallbackName); + if (fallback.textOnly) { + throw new Error( + `Tool-call translator model "${fallbackName}" is marked text-only. ` + + `Set OPENCODE_WINDSURF_TOOL_CALL_TRANSLATOR_MODEL to a tool-capable model like swe-1.6.`, + ); + } + return fallback; +} + +function buildOpusToolPlanningMessages(messages: ChatHistoryItem[], tools: CloudToolDef[]): ChatHistoryItem[] { + const manifest = tools.map((t) => ({ name: t.name, description: t.description, parameters: t.parameters })); + return [ + ...messages, + { + role: 'system', + content: + `Native tool schemas cannot be sent to this model, but opencode can still execute tools after your decision.\n` + + `You are responsible for planning the next step.\n` + + `If the next step needs a command, file read/edit/search, todo update, web fetch, or any tool action, describe the intended tool action plainly.\n` + + `Prefer: TOOL_INTENT: .\n` + + `Do not invent tool output and do not continue as if a tool already ran.\n` + + `If no tool is needed, answer normally.\n\n` + + `Available tools:\n${JSON.stringify(manifest)}`, + }, + ]; +} + +function buildToolCallTranslatorMessages(messages: ChatHistoryItem[], opusDraft: string): ChatHistoryItem[] { + return [ + ...messages, + { + role: 'user', + content: + `The requested model cannot emit native tool calls. It produced this planned next step:\n\n` + + `\n${opusDraft}\n\n\n` + + `Convert that planned next step into at most one native tool call.\n` + + `If the draft implies command execution, file read/edit/search, todo update, web fetch, or any tool action, call exactly the matching tool.\n` + + `If the draft is a final answer or no tool is needed, respond with exactly: NO_TOOL.\n` + + `Do not answer the user. Do not add commentary.`, + }, + ]; +} + /** * Map an opencode/OpenAI-shaped chat message into the ChatHistoryItem the * cloud-direct encoder expects. Importantly, this preserves `tool_call_id` @@ -245,7 +265,6 @@ function createStreamingResponse( const responseId = `chatcmpl-${crypto.randomUUID()}`; const requestedModel = request.model || getDefaultModel(); const variantOverride = extractVariantFromProviderOptions(request.providerOptions); - const textOnlyToolFallback = extractTextOnlyToolFallbackFromProviderOptions(request.providerOptions); const abort = new AbortController(); @@ -259,8 +278,6 @@ function createStreamingResponse( description: t.function?.description ?? '', parameters: t.function?.parameters ?? {}, })); - const routed = routeToolsForModel(resolved, tools, textOnlyToolFallback); - const { streamChatEvents } = await import('./cloud-direct/index.js'); // Cloud-direct accepts the FULL @ai-sdk multimodal content shape // (text + image_url parts). We pass `request.messages` straight @@ -282,7 +299,9 @@ function createStreamingResponse( let usage: { promptTokens?: number; completionTokens?: number; totalTokens?: number } | null = null; let firstChunkSent = false; const t0 = Date.now(); - debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${routed.modelUid}, requested=${resolved.modelUid}, msgs=${multimodalMessages.length}, tools=${routed.tools.length})`); + const useTranslator = !!resolved.textOnly && tools.length > 0; + const translator = useTranslator ? getToolCallTranslatorModel(request.providerOptions) : undefined; + debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${resolved.modelUid}, msgs=${multimodalMessages.length}, tools=${useTranslator ? 0 : tools.length}, toolCallTranslator=${translator?.modelUid ?? 'none'})`); let eventCount = 0; let textBytes = 0; // Thread the caller's `max_tokens` into the proto's @@ -303,17 +322,64 @@ function createStreamingResponse( typeof request.max_tokens === 'number' && request.max_tokens > 0 ? request.max_tokens : 128_000; - for await (const ev of streamChatEvents({ - apiKey: credentials.apiKey, - apiServerUrl: credentials.apiServerUrl, - modelUid: routed.modelUid, - messages: multimodalMessages, - tools: routed.tools.length > 0 ? routed.tools : undefined, - signal: abort.signal, - completionOpts: { - maxOutputTokens: requestedMaxTokens, - }, - })) { + + const eventSource = async function* (): AsyncGenerator { + const common = { + apiKey: credentials.apiKey, + apiServerUrl: credentials.apiServerUrl, + signal: abort.signal, + completionOpts: { maxOutputTokens: requestedMaxTokens }, + }; + + if (!useTranslator || !translator) { + yield* streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: multimodalMessages, + tools: tools.length > 0 ? tools : undefined, + }); + return; + } + + const opusEvents: CloudChatEvent[] = []; + let opusDraft = ''; + for await (const ev of streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: buildOpusToolPlanningMessages(multimodalMessages, tools), + })) { + opusEvents.push(ev); + if (ev.kind === 'text') opusDraft += ev.text; + } + + debugLog.log(`[windsurf-plugin] opus planner draft (${opusDraft.length}B): ${opusDraft.slice(0, 500).replace(/\n/g, '\\n')}`); + + let fallbackSawTool = false; + const fallbackEvents: CloudChatEvent[] = []; + for await (const ev of streamChatEvents({ + ...common, + modelUid: translator.modelUid, + messages: buildToolCallTranslatorMessages(multimodalMessages, opusDraft), + tools, + })) { + fallbackEvents.push(ev); + if (ev.kind === 'tool_call_start') fallbackSawTool = true; + } + + if (fallbackSawTool) { + debugLog.log(`[windsurf-plugin] tool-call translator model=${translator.modelUid} emitted tool call(s)`); + for (const ev of fallbackEvents) { + if (ev.kind === 'text' || ev.kind === 'reasoning') continue; + yield ev; + } + return; + } + + debugLog.log(`[windsurf-plugin] tool-call translator model=${translator.modelUid} emitted no tool call; streaming opus draft`); + for (const ev of opusEvents) yield ev; + }; + + for await (const ev of eventSource()) { eventCount++; if (eventCount === 1) debugLog.log(`[windsurf-plugin] streamChatEvents first event after ${Date.now() - t0}ms (kind=${ev.kind})`); // @ai-sdk expects `delta.role: 'assistant'` on the *first* chunk @@ -516,7 +582,6 @@ async function createNonStreamingResponse( const responseId = `chatcmpl-${crypto.randomUUID()}`; const requestedModel = request.model || getDefaultModel(); const variantOverride = extractVariantFromProviderOptions(request.providerOptions); - const textOnlyToolFallback = extractTextOnlyToolFallbackFromProviderOptions(request.providerOptions); const resolved = resolveModel(requestedModel, variantOverride); const tools = (request.tools ?? []).map((t) => ({ @@ -524,8 +589,6 @@ async function createNonStreamingResponse( description: t.function?.description ?? '', parameters: t.function?.parameters ?? {}, })); - const routed = routeToolsForModel(resolved, tools, textOnlyToolFallback); - const multimodalMessages: ChatHistoryItem[] = request.messages.map((m) => mapMessageToHistoryItem(m)); const { streamChatEvents } = await import('./cloud-direct/index.js'); @@ -546,21 +609,65 @@ async function createNonStreamingResponse( type CollectedToolCall = { id: string; name: string; args: string }; const collectedToolCalls: CollectedToolCall[] = []; let currentToolCall: CollectedToolCall | null = null; + const useTranslator = !!resolved.textOnly && tools.length > 0; + const translator = useTranslator ? getToolCallTranslatorModel(request.providerOptions) : undefined; + + const eventSource = async function* (): AsyncGenerator { + const common = { + apiKey: credentials.apiKey, + apiServerUrl: credentials.apiServerUrl, + completionOpts: { maxOutputTokens: requestedMaxTokens }, + // Propagate the caller's abort so a client disconnect during a + // non-streaming title-gen / summary call actually stops the upstream + // cloud request and the billable token usage with it. + signal, + }; - for await (const ev of streamChatEvents({ - apiKey: credentials.apiKey, - apiServerUrl: credentials.apiServerUrl, - modelUid: routed.modelUid, - messages: multimodalMessages, - tools: routed.tools.length > 0 ? routed.tools : undefined, - completionOpts: { - maxOutputTokens: requestedMaxTokens, - }, - // Propagate the caller's abort so a client disconnect during a - // non-streaming title-gen / summary call actually stops the upstream - // cloud request and the billable token usage with it. - signal, - })) { + if (!useTranslator || !translator) { + yield* streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: multimodalMessages, + tools: tools.length > 0 ? tools : undefined, + }); + return; + } + + const opusEvents: CloudChatEvent[] = []; + let opusDraft = ''; + for await (const ev of streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: buildOpusToolPlanningMessages(multimodalMessages, tools), + })) { + opusEvents.push(ev); + if (ev.kind === 'text') opusDraft += ev.text; + } + + let fallbackSawTool = false; + const fallbackEvents: CloudChatEvent[] = []; + for await (const ev of streamChatEvents({ + ...common, + modelUid: translator.modelUid, + messages: buildToolCallTranslatorMessages(multimodalMessages, opusDraft), + tools, + })) { + fallbackEvents.push(ev); + if (ev.kind === 'tool_call_start') fallbackSawTool = true; + } + + if (fallbackSawTool) { + for (const ev of fallbackEvents) { + if (ev.kind === 'text' || ev.kind === 'reasoning') continue; + yield ev; + } + return; + } + + for (const ev of opusEvents) yield ev; + }; + + for await (const ev of eventSource()) { if (ev.kind === 'text') { collected += ev.text; } else if (ev.kind === 'tool_call_start') { From 2132deb5d64d5f0bcf223708b55f7d9d8a820f80 Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Wed, 27 May 2026 18:19:50 +0700 Subject: [PATCH 4/7] optimize usage --- src/plugin.ts | 196 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 187 insertions(+), 9 deletions(-) diff --git a/src/plugin.ts b/src/plugin.ts index 5deeb0a..e788c4c 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -119,6 +119,39 @@ type CloudToolDef = { }; const DEFAULT_TOOL_CALL_TRANSLATOR_MODEL = 'swe-1.6'; +const DEFAULT_TOOL_INTENT_DETECTION: ToolIntentDetectionMode = 'always'; +const DEFAULT_TOOL_TRANSLATOR_CONTEXT_MESSAGES = 8; +const DEFAULT_TOOL_RESULT_CONTEXT: ToolResultContextMode = 'tail'; +const DEFAULT_TOOL_RESULT_CONTEXT_MESSAGES = 24; +const MAX_STORED_PLANNER_DRAFTS = 200; + +type ToolIntentDetectionMode = 'always' | 'assist' | 'marker'; +type ToolResultContextMode = 'full' | 'tail' | 'minimal'; + +interface TextOnlyToolConfig { + toolIntentDetection: ToolIntentDetectionMode; + toolTranslatorContextMessages: number; + toolResultContext: ToolResultContextMode; + toolResultContextMessages: number; +} + +interface PlannerDraftEntry { + draft: string; + modelUid: string; + createdAt: number; +} + +const plannerDraftByToolCallId = new Map(); + +function storePlannerDraft(toolCallId: string, entry: Omit): void { + if (!toolCallId || !entry.draft) return; + plannerDraftByToolCallId.set(toolCallId, { ...entry, createdAt: Date.now() }); + while (plannerDraftByToolCallId.size > MAX_STORED_PLANNER_DRAFTS) { + const oldest = plannerDraftByToolCallId.keys().next().value; + if (!oldest) break; + plannerDraftByToolCallId.delete(oldest); + } +} function extractToolCallTranslatorFromProviderOptions(providerOptions: Record | undefined): string | undefined { if (!providerOptions) return undefined; @@ -138,6 +171,71 @@ function extractToolCallTranslatorFromProviderOptions(providerOptions: Record | undefined): Record | undefined { + if (!providerOptions) return undefined; + const raw = providerOptions['windsurf']; + return raw && typeof raw === 'object' ? (raw as Record) : undefined; +} + +function pickStringConfig(providerOptions: Record | undefined, key: string): string | undefined { + const windsurf = windsurfProviderOptions(providerOptions); + const v = windsurf?.[key] ?? providerOptions?.[key]; + return typeof v === 'string' ? v : undefined; +} + +function pickNumberConfig(providerOptions: Record | undefined, key: string): number | undefined { + const windsurf = windsurfProviderOptions(providerOptions); + const v = windsurf?.[key] ?? providerOptions?.[key]; + if (typeof v === 'number' && Number.isFinite(v)) return v; + if (typeof v === 'string' && v.trim() !== '') { + const n = Number(v); + if (Number.isFinite(n)) return n; + } + return undefined; +} + +function clampInt(v: number | undefined, fallback: number, min: number, max: number): number { + if (v === undefined || !Number.isFinite(v)) return fallback; + return Math.max(min, Math.min(max, Math.trunc(v))); +} + +function resolveToolConfig(providerOptions: Record | undefined): TextOnlyToolConfig { + const detectionRaw = + pickStringConfig(providerOptions, 'toolIntentDetection') ?? + process.env.OPENCODE_WINDSURF_TOOL_INTENT_DETECTION ?? + DEFAULT_TOOL_INTENT_DETECTION; + const detection: ToolIntentDetectionMode = + detectionRaw === 'marker' || detectionRaw === 'assist' || detectionRaw === 'always' + ? detectionRaw + : DEFAULT_TOOL_INTENT_DETECTION; + + const resultRaw = + pickStringConfig(providerOptions, 'toolResultContext') ?? + process.env.OPENCODE_WINDSURF_TOOL_RESULT_CONTEXT ?? + DEFAULT_TOOL_RESULT_CONTEXT; + const resultContext: ToolResultContextMode = + resultRaw === 'full' || resultRaw === 'tail' || resultRaw === 'minimal' + ? resultRaw + : DEFAULT_TOOL_RESULT_CONTEXT; + + return { + toolIntentDetection: detection, + toolTranslatorContextMessages: clampInt( + pickNumberConfig(providerOptions, 'toolTranslatorContextMessages') ?? Number(process.env.OPENCODE_WINDSURF_TOOL_TRANSLATOR_CONTEXT_MESSAGES), + DEFAULT_TOOL_TRANSLATOR_CONTEXT_MESSAGES, + 1, + 64, + ), + toolResultContext: resultContext, + toolResultContextMessages: clampInt( + pickNumberConfig(providerOptions, 'toolResultContextMessages') ?? Number(process.env.OPENCODE_WINDSURF_TOOL_RESULT_CONTEXT_MESSAGES), + DEFAULT_TOOL_RESULT_CONTEXT_MESSAGES, + 1, + 128, + ), + }; +} + function getToolCallTranslatorModel(providerOptions: Record | undefined): ReturnType { const fallbackName = extractToolCallTranslatorFromProviderOptions(providerOptions)?.trim() || @@ -171,9 +269,36 @@ function buildOpusToolPlanningMessages(messages: ChatHistoryItem[], tools: Cloud ]; } -function buildToolCallTranslatorMessages(messages: ChatHistoryItem[], opusDraft: string): ChatHistoryItem[] { +function recentMessages(messages: ChatHistoryItem[], count: number): ChatHistoryItem[] { + return messages.filter((m) => m.role !== 'system').slice(-count); +} + +function latestUserMessage(messages: ChatHistoryItem[]): ChatHistoryItem | undefined { + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i]?.role === 'user') return messages[i]; + } + return undefined; +} + +function plannerDraftContext(messages: ChatHistoryItem[]): string { + const ids = new Set(); + for (const m of messages) { + if (m.role === 'tool' && typeof m.tool_call_id === 'string' && m.tool_call_id) ids.add(m.tool_call_id); + } + const parts: string[] = []; + for (const id of ids) { + const entry = plannerDraftByToolCallId.get(id); + if (entry) parts.push(`tool_call_id=${id}\n${entry.draft}`); + } + return parts.join('\n\n'); +} + +function buildToolCallTranslatorMessages(messages: ChatHistoryItem[], opusDraft: string, tailCount: number): ChatHistoryItem[] { + const latestUser = latestUserMessage(messages); + const tail = recentMessages(messages, tailCount); + const context: ChatHistoryItem[] = latestUser ? [latestUser, ...tail.filter((m) => m !== latestUser)] : tail; return [ - ...messages, + ...context, { role: 'user', content: @@ -187,6 +312,33 @@ function buildToolCallTranslatorMessages(messages: ChatHistoryItem[], opusDraft: ]; } +function buildToolResultMessages(messages: ChatHistoryItem[], config: TextOnlyToolConfig): ChatHistoryItem[] { + const draftContext = plannerDraftContext(messages); + if (!draftContext) return messages; + + const injected: ChatHistoryItem = { + role: 'system', + content: + `Previous Opus planner draft(s) that led to the tool result(s) in this turn:\n` + + `${draftContext}\n\nUse this to interpret the tool result and continue from the original plan.`, + }; + + if (config.toolResultContext === 'full') return [...messages, injected]; + + const latestUser = latestUserMessage(messages); + const tailCount = config.toolResultContext === 'minimal' ? 6 : config.toolResultContextMessages; + const tail = recentMessages(messages, tailCount); + const context = latestUser ? [latestUser, ...tail.filter((m) => m !== latestUser)] : tail; + return [...context, injected]; +} + +function shouldCallToolTranslator(draft: string, mode: ToolIntentDetectionMode): boolean { + if (mode === 'always') return true; + if (/\bTOOL_INTENT\s*:/i.test(draft)) return true; + if (mode === 'marker') return false; + return /\b(?:I'll|I will|let me|now|next I'll|I need to)\s+(?:run|execute|read|inspect|check|edit|search|grep|build|flash|capture|write|update)\b/i.test(draft); +} + /** * Map an opencode/OpenAI-shaped chat message into the ChatHistoryItem the * cloud-direct encoder expects. Importantly, this preserves `tool_call_id` @@ -299,9 +451,13 @@ function createStreamingResponse( let usage: { promptTokens?: number; completionTokens?: number; totalTokens?: number } | null = null; let firstChunkSent = false; const t0 = Date.now(); + const toolConfig = resolveToolConfig(request.providerOptions); const useTranslator = !!resolved.textOnly && tools.length > 0; const translator = useTranslator ? getToolCallTranslatorModel(request.providerOptions) : undefined; - debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${resolved.modelUid}, msgs=${multimodalMessages.length}, tools=${useTranslator ? 0 : tools.length}, toolCallTranslator=${translator?.modelUid ?? 'none'})`); + const opusMessages = resolved.textOnly + ? buildToolResultMessages(multimodalMessages, toolConfig) + : multimodalMessages; + debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${resolved.modelUid}, msgs=${opusMessages.length}, tools=${useTranslator ? 0 : tools.length}, toolCallTranslator=${translator?.modelUid ?? 'none'}, intent=${toolConfig.toolIntentDetection}, resultContext=${toolConfig.toolResultContext}:${toolConfig.toolResultContextMessages})`); let eventCount = 0; let textBytes = 0; // Thread the caller's `max_tokens` into the proto's @@ -335,7 +491,7 @@ function createStreamingResponse( yield* streamChatEvents({ ...common, modelUid: resolved.modelUid, - messages: multimodalMessages, + messages: opusMessages, tools: tools.length > 0 ? tools : undefined, }); return; @@ -346,7 +502,7 @@ function createStreamingResponse( for await (const ev of streamChatEvents({ ...common, modelUid: resolved.modelUid, - messages: buildOpusToolPlanningMessages(multimodalMessages, tools), + messages: buildOpusToolPlanningMessages(opusMessages, tools), })) { opusEvents.push(ev); if (ev.kind === 'text') opusDraft += ev.text; @@ -354,12 +510,18 @@ function createStreamingResponse( debugLog.log(`[windsurf-plugin] opus planner draft (${opusDraft.length}B): ${opusDraft.slice(0, 500).replace(/\n/g, '\\n')}`); + if (!shouldCallToolTranslator(opusDraft, toolConfig.toolIntentDetection)) { + debugLog.log(`[windsurf-plugin] tool-call translator skipped by detection=${toolConfig.toolIntentDetection}`); + for (const ev of opusEvents) yield ev; + return; + } + let fallbackSawTool = false; const fallbackEvents: CloudChatEvent[] = []; for await (const ev of streamChatEvents({ ...common, modelUid: translator.modelUid, - messages: buildToolCallTranslatorMessages(multimodalMessages, opusDraft), + messages: buildToolCallTranslatorMessages(multimodalMessages, opusDraft, toolConfig.toolTranslatorContextMessages), tools, })) { fallbackEvents.push(ev); @@ -369,6 +531,10 @@ function createStreamingResponse( if (fallbackSawTool) { debugLog.log(`[windsurf-plugin] tool-call translator model=${translator.modelUid} emitted tool call(s)`); for (const ev of fallbackEvents) { + if (ev.kind === 'tool_call_start') { + storePlannerDraft(ev.id, { draft: opusDraft, modelUid: resolved.modelUid }); + debugLog.log(`[windsurf-plugin] stored opus planner draft for tool_call_id=${ev.id}`); + } if (ev.kind === 'text' || ev.kind === 'reasoning') continue; yield ev; } @@ -609,8 +775,12 @@ async function createNonStreamingResponse( type CollectedToolCall = { id: string; name: string; args: string }; const collectedToolCalls: CollectedToolCall[] = []; let currentToolCall: CollectedToolCall | null = null; + const toolConfig = resolveToolConfig(request.providerOptions); const useTranslator = !!resolved.textOnly && tools.length > 0; const translator = useTranslator ? getToolCallTranslatorModel(request.providerOptions) : undefined; + const opusMessages = resolved.textOnly + ? buildToolResultMessages(multimodalMessages, toolConfig) + : multimodalMessages; const eventSource = async function* (): AsyncGenerator { const common = { @@ -627,7 +797,7 @@ async function createNonStreamingResponse( yield* streamChatEvents({ ...common, modelUid: resolved.modelUid, - messages: multimodalMessages, + messages: opusMessages, tools: tools.length > 0 ? tools : undefined, }); return; @@ -638,18 +808,23 @@ async function createNonStreamingResponse( for await (const ev of streamChatEvents({ ...common, modelUid: resolved.modelUid, - messages: buildOpusToolPlanningMessages(multimodalMessages, tools), + messages: buildOpusToolPlanningMessages(opusMessages, tools), })) { opusEvents.push(ev); if (ev.kind === 'text') opusDraft += ev.text; } + if (!shouldCallToolTranslator(opusDraft, toolConfig.toolIntentDetection)) { + for (const ev of opusEvents) yield ev; + return; + } + let fallbackSawTool = false; const fallbackEvents: CloudChatEvent[] = []; for await (const ev of streamChatEvents({ ...common, modelUid: translator.modelUid, - messages: buildToolCallTranslatorMessages(multimodalMessages, opusDraft), + messages: buildToolCallTranslatorMessages(multimodalMessages, opusDraft, toolConfig.toolTranslatorContextMessages), tools, })) { fallbackEvents.push(ev); @@ -658,6 +833,9 @@ async function createNonStreamingResponse( if (fallbackSawTool) { for (const ev of fallbackEvents) { + if (ev.kind === 'tool_call_start') { + storePlannerDraft(ev.id, { draft: opusDraft, modelUid: resolved.modelUid }); + } if (ev.kind === 'text' || ev.kind === 'reasoning') continue; yield ev; } From 4f9660592d89fb60686bfb5462be19fe3dda21bd Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Wed, 27 May 2026 18:42:27 +0700 Subject: [PATCH 5/7] Context also must not contain tool --- src/plugin.ts | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/plugin.ts b/src/plugin.ts index e788c4c..220f04a 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -273,6 +273,43 @@ function recentMessages(messages: ChatHistoryItem[], count: number): ChatHistory return messages.filter((m) => m.role !== 'system').slice(-count); } +function contentToText(content: ChatHistoryItem['content']): string { + if (typeof content === 'string') return content; + if (!Array.isArray(content)) return String(content ?? ''); + return content + .map((part) => { + if (!part || typeof part !== 'object') return ''; + if ('text' in part && typeof part.text === 'string') return part.text; + if ('image_url' in part) return '[image]'; + return ''; + }) + .filter(Boolean) + .join('\n'); +} + +function flattenToolHistoryMessages(messages: ChatHistoryItem[]): ChatHistoryItem[] { + return messages.map((m) => { + const text = contentToText(m.content); + if (m.role === 'tool') { + return { + role: 'user', + content: `\n${text}\n`, + } satisfies ChatHistoryItem; + } + if (m.role === 'assistant' && m.tool_calls && m.tool_calls.length > 0) { + const calls = m.tool_calls + .map((tc) => `${tc.arguments}`) + .join('\n'); + return { + role: 'assistant', + content: text ? `${text}\n${calls}` : calls, + } satisfies ChatHistoryItem; + } + if (m.role === 'system') return m; + return { role: m.role, content: text } satisfies ChatHistoryItem; + }); +} + function latestUserMessage(messages: ChatHistoryItem[]): ChatHistoryItem | undefined { for (let i = messages.length - 1; i >= 0; i--) { if (messages[i]?.role === 'user') return messages[i]; @@ -296,7 +333,9 @@ function plannerDraftContext(messages: ChatHistoryItem[]): string { function buildToolCallTranslatorMessages(messages: ChatHistoryItem[], opusDraft: string, tailCount: number): ChatHistoryItem[] { const latestUser = latestUserMessage(messages); const tail = recentMessages(messages, tailCount); - const context: ChatHistoryItem[] = latestUser ? [latestUser, ...tail.filter((m) => m !== latestUser)] : tail; + const context: ChatHistoryItem[] = flattenToolHistoryMessages( + latestUser ? [latestUser, ...tail.filter((m) => m !== latestUser)] : tail, + ); return [ ...context, { @@ -328,7 +367,7 @@ function buildToolResultMessages(messages: ChatHistoryItem[], config: TextOnlyTo const latestUser = latestUserMessage(messages); const tailCount = config.toolResultContext === 'minimal' ? 6 : config.toolResultContextMessages; const tail = recentMessages(messages, tailCount); - const context = latestUser ? [latestUser, ...tail.filter((m) => m !== latestUser)] : tail; + const context = flattenToolHistoryMessages(latestUser ? [latestUser, ...tail.filter((m) => m !== latestUser)] : tail); return [...context, injected]; } From 8434e1c8f867e704676fd1aa44f7cce21a39567b Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Thu, 28 May 2026 17:26:42 +0700 Subject: [PATCH 6/7] fix contexts and some params --- src/cloud-direct/chat.ts | 21 +++++--- src/plugin.ts | 113 +++++++++++++++++++++++++++++++-------- 2 files changed, 105 insertions(+), 29 deletions(-) diff --git a/src/cloud-direct/chat.ts b/src/cloud-direct/chat.ts index 0e1c01e..225e456 100644 --- a/src/cloud-direct/chat.ts +++ b/src/cloud-direct/chat.ts @@ -36,14 +36,21 @@ import { getCachedUserJwt } from './auth.js'; import { getCachedCatalog, ModelNotAvailableError } from './catalog.js'; /** - * Connect-RPC streaming inactivity timeout. If the cloud sends zero bytes - * for this long after the last chunk, we abort the fetch. The cloud's own - * idle limit is around 90s on most models; we set ours a little above so - * we only trigger when the server has genuinely stopped responding. + * Connect-RPC streaming inactivity timeout. Opus can spend multiple minutes + * before its first body chunk on large hardware-debugging contexts, so keep + * this above ordinary model thinking latency and let users override it. */ -const CLOUD_STREAM_IDLE_MS = 120_000; +const CLOUD_STREAM_IDLE_MS = readPositiveIntEnv('OPENCODE_WINDSURF_CLOUD_STREAM_IDLE_MS', 300_000); /** Time-to-first-byte timeout. */ -const CLOUD_STREAM_TTFB_MS = 60_000; +const CLOUD_STREAM_TTFB_MS = readPositiveIntEnv('OPENCODE_WINDSURF_CLOUD_STREAM_TTFB_MS', 120_000); +const DEFAULT_MAX_INPUT_TOKENS = readPositiveIntEnv('OPENCODE_WINDSURF_MAX_INPUT_TOKENS', 256_000); + +function readPositiveIntEnv(name: string, fallback: number): number { + const raw = process.env[name]; + if (!raw) return fallback; + const parsed = Number(raw); + return Number.isFinite(parsed) && parsed > 0 ? Math.trunc(parsed) : fallback; +} /** * Compose multiple AbortSignals into a single signal that aborts when ANY @@ -274,7 +281,7 @@ function encodeCompletionConfiguration(opts: { }; return Buffer.concat([ encodeVarintField(1, 1), - encodeVarintField(2, opts.maxInputTokens ?? 64000), + encodeVarintField(2, opts.maxInputTokens ?? DEFAULT_MAX_INPUT_TOKENS), // Default to the catalog's most permissive `maxOutputTokens` (128K). // The cloud clamps to the per-model limit anyway. The old 4096 default // would silently truncate any callers (tests, CLI users of diff --git a/src/plugin.ts b/src/plugin.ts index 220f04a..1c67ba2 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -122,7 +122,7 @@ const DEFAULT_TOOL_CALL_TRANSLATOR_MODEL = 'swe-1.6'; const DEFAULT_TOOL_INTENT_DETECTION: ToolIntentDetectionMode = 'always'; const DEFAULT_TOOL_TRANSLATOR_CONTEXT_MESSAGES = 8; const DEFAULT_TOOL_RESULT_CONTEXT: ToolResultContextMode = 'tail'; -const DEFAULT_TOOL_RESULT_CONTEXT_MESSAGES = 24; +const DEFAULT_TOOL_RESULT_CONTEXT_MESSAGES = 64; const MAX_STORED_PLANNER_DRAFTS = 200; type ToolIntentDetectionMode = 'always' | 'assist' | 'marker'; @@ -261,16 +261,26 @@ function buildOpusToolPlanningMessages(messages: ChatHistoryItem[], tools: Cloud `Native tool schemas cannot be sent to this model, but opencode can still execute tools after your decision.\n` + `You are responsible for planning the next step.\n` + `If the next step needs a command, file read/edit/search, todo update, web fetch, or any tool action, describe the intended tool action plainly.\n` + + `Do not ask the user for build commands, paths, files, status, or other facts that tools can inspect. Plan the tool inspection instead.\n` + + `If the user asks you to build, flash, test, inspect, continue work, or verify hardware/logs, plan a tool action unless the answer is already proven by the latest context.\n` + `Prefer: TOOL_INTENT: .\n` + `Do not invent tool output and do not continue as if a tool already ran.\n` + - `If no tool is needed, answer normally.\n\n` + + `If no tool is needed, answer normally. Never prefix answers with "No tool needed".\n\n` + `Available tools:\n${JSON.stringify(manifest)}`, }, ]; } -function recentMessages(messages: ChatHistoryItem[], count: number): ChatHistoryItem[] { - return messages.filter((m) => m.role !== 'system').slice(-count); +function recentMessagesWithLatestUser(messages: ChatHistoryItem[], count: number): ChatHistoryItem[] { + const nonSystem = messages.filter((m) => m.role !== 'system'); + const tail = nonSystem.slice(-count); + const latestUser = latestUserMessage(nonSystem); + if (!latestUser || tail.includes(latestUser)) return tail; + return [latestUser, ...tail]; +} + +function roleOrder(messages: ChatHistoryItem[]): string { + return messages.map((m) => m.role).join(','); } function contentToText(content: ChatHistoryItem['content']): string { @@ -331,11 +341,7 @@ function plannerDraftContext(messages: ChatHistoryItem[]): string { } function buildToolCallTranslatorMessages(messages: ChatHistoryItem[], opusDraft: string, tailCount: number): ChatHistoryItem[] { - const latestUser = latestUserMessage(messages); - const tail = recentMessages(messages, tailCount); - const context: ChatHistoryItem[] = flattenToolHistoryMessages( - latestUser ? [latestUser, ...tail.filter((m) => m !== latestUser)] : tail, - ); + const context: ChatHistoryItem[] = flattenToolHistoryMessages(recentMessagesWithLatestUser(messages, tailCount)); return [ ...context, { @@ -345,7 +351,9 @@ function buildToolCallTranslatorMessages(messages: ChatHistoryItem[], opusDraft: `\n${opusDraft}\n\n\n` + `Convert that planned next step into at most one native tool call.\n` + `If the draft implies command execution, file read/edit/search, todo update, web fetch, or any tool action, call exactly the matching tool.\n` + - `If the draft is a final answer or no tool is needed, respond with exactly: NO_TOOL.\n` + + `If the draft says it lacks build commands, paths, files, current status, logs, or other inspectable facts, call an appropriate inspection tool instead of returning NO_TOOL.\n` + + `If the latest user asks to build, flash, test, inspect, continue work, or verify hardware/logs, prefer a tool call when any available tool can make progress.\n` + + `Return exactly NO_TOOL only when the draft is already a final user-facing answer or no available tool can make progress.\n` + `Do not answer the user. Do not add commentary.`, }, ]; @@ -364,10 +372,8 @@ function buildToolResultMessages(messages: ChatHistoryItem[], config: TextOnlyTo if (config.toolResultContext === 'full') return [...messages, injected]; - const latestUser = latestUserMessage(messages); const tailCount = config.toolResultContext === 'minimal' ? 6 : config.toolResultContextMessages; - const tail = recentMessages(messages, tailCount); - const context = flattenToolHistoryMessages(latestUser ? [latestUser, ...tail.filter((m) => m !== latestUser)] : tail); + const context = flattenToolHistoryMessages(recentMessagesWithLatestUser(messages, tailCount)); return [...context, injected]; } @@ -378,6 +384,39 @@ function shouldCallToolTranslator(draft: string, mode: ToolIntentDetectionMode): return /\b(?:I'll|I will|let me|now|next I'll|I need to)\s+(?:run|execute|read|inspect|check|edit|search|grep|build|flash|capture|write|update)\b/i.test(draft); } +function hasToolResultMessages(messages: ChatHistoryItem[]): boolean { + return messages.some((m) => m.role === 'tool'); +} + +function syntheticBashToolCallFromDraft(draft: string, tools: CloudToolDef[]): CloudChatEvent[] | undefined { + if (!tools.some((t) => t.name === 'bash')) return undefined; + + const command = + draft.match(/([\s\S]*?)(?:<\/parameter>|$)/i)?.[1]?.trim() ?? + draft.match(/```tool\s*\n\s*bash\s*:\s*([\s\S]*?)```/i)?.[1]?.trim() ?? + draft.match(/\bTOOL_INTENT\s*:[^\n]*\bbash\b[^\n`]*`([^`]+)`/i)?.[1]?.trim(); + + if (!command) return undefined; + + const workdir = + draft.match(/\bworkdir\b\s*`([^`]+)`/i)?.[1] ?? + draft.match(/\bworkdir\b\s*["']([^"']+)["']/i)?.[1]; + const timeoutSeconds = Number(draft.match(/\btimeout\b\s*(\d+)\s*s\b/i)?.[1]); + const args: Record = { + command, + description: 'Run planned command', + }; + if (workdir) args.workdir = workdir; + if (Number.isFinite(timeoutSeconds) && timeoutSeconds > 0) args.timeout = timeoutSeconds * 1000; + + const id = `call_${crypto.randomBytes(12).toString('hex')}`; + return [ + { kind: 'tool_call_start', id, name: 'bash' }, + { kind: 'tool_call_args', id, argsDelta: JSON.stringify(args) }, + { kind: 'finish', reason: 'tool_calls' }, + ]; +} + /** * Map an opencode/OpenAI-shaped chat message into the ChatHistoryItem the * cloud-direct encoder expects. Importantly, this preserves `tool_call_id` @@ -493,10 +532,13 @@ function createStreamingResponse( const toolConfig = resolveToolConfig(request.providerOptions); const useTranslator = !!resolved.textOnly && tools.length > 0; const translator = useTranslator ? getToolCallTranslatorModel(request.providerOptions) : undefined; - const opusMessages = resolved.textOnly + const shouldReduceToolResultContext = useTranslator && hasToolResultMessages(multimodalMessages); + const passthroughMessages = multimodalMessages; + const resultMessages = shouldReduceToolResultContext ? buildToolResultMessages(multimodalMessages, toolConfig) : multimodalMessages; - debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${resolved.modelUid}, msgs=${opusMessages.length}, tools=${useTranslator ? 0 : tools.length}, toolCallTranslator=${translator?.modelUid ?? 'none'}, intent=${toolConfig.toolIntentDetection}, resultContext=${toolConfig.toolResultContext}:${toolConfig.toolResultContextMessages})`); + debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${resolved.modelUid}, plannerMsgs=${passthroughMessages.length}, resultMsgs=${resultMessages.length}, tools=${useTranslator ? 0 : tools.length}, toolCallTranslator=${translator?.modelUid ?? 'none'}, intent=${toolConfig.toolIntentDetection}, resultContext=${shouldReduceToolResultContext ? `${toolConfig.toolResultContext}:${toolConfig.toolResultContextMessages}` : 'passthrough'})`); + if (shouldReduceToolResultContext) debugLog.log(`[windsurf-plugin] reduced result context roles=${roleOrder(resultMessages)}`); let eventCount = 0; let textBytes = 0; // Thread the caller's `max_tokens` into the proto's @@ -530,7 +572,7 @@ function createStreamingResponse( yield* streamChatEvents({ ...common, modelUid: resolved.modelUid, - messages: opusMessages, + messages: passthroughMessages, tools: tools.length > 0 ? tools : undefined, }); return; @@ -541,7 +583,7 @@ function createStreamingResponse( for await (const ev of streamChatEvents({ ...common, modelUid: resolved.modelUid, - messages: buildOpusToolPlanningMessages(opusMessages, tools), + messages: buildOpusToolPlanningMessages(passthroughMessages, tools), })) { opusEvents.push(ev); if (ev.kind === 'text') opusDraft += ev.text; @@ -557,10 +599,12 @@ function createStreamingResponse( let fallbackSawTool = false; const fallbackEvents: CloudChatEvent[] = []; + const translatorMessages = buildToolCallTranslatorMessages(multimodalMessages, opusDraft, toolConfig.toolTranslatorContextMessages); + debugLog.log(`[windsurf-plugin] translator context roles=${roleOrder(translatorMessages)}`); for await (const ev of streamChatEvents({ ...common, modelUid: translator.modelUid, - messages: buildToolCallTranslatorMessages(multimodalMessages, opusDraft, toolConfig.toolTranslatorContextMessages), + messages: translatorMessages, tools, })) { fallbackEvents.push(ev); @@ -580,6 +624,16 @@ function createStreamingResponse( return; } + const syntheticToolCall = syntheticBashToolCallFromDraft(opusDraft, tools); + if (syntheticToolCall) { + debugLog.log('[windsurf-plugin] synthesized bash tool call from opus draft after translator emitted no tool'); + for (const ev of syntheticToolCall) { + if (ev.kind === 'tool_call_start') storePlannerDraft(ev.id, { draft: opusDraft, modelUid: resolved.modelUid }); + yield ev; + } + return; + } + debugLog.log(`[windsurf-plugin] tool-call translator model=${translator.modelUid} emitted no tool call; streaming opus draft`); for (const ev of opusEvents) yield ev; }; @@ -817,9 +871,12 @@ async function createNonStreamingResponse( const toolConfig = resolveToolConfig(request.providerOptions); const useTranslator = !!resolved.textOnly && tools.length > 0; const translator = useTranslator ? getToolCallTranslatorModel(request.providerOptions) : undefined; - const opusMessages = resolved.textOnly + const shouldReduceToolResultContext = useTranslator && hasToolResultMessages(multimodalMessages); + const passthroughMessages = multimodalMessages; + const resultMessages = shouldReduceToolResultContext ? buildToolResultMessages(multimodalMessages, toolConfig) : multimodalMessages; + debugLog.log(`[windsurf-plugin] nonstream ChatEvents starting (model=${resolved.modelUid}, plannerMsgs=${passthroughMessages.length}, resultMsgs=${resultMessages.length}, tools=${useTranslator ? 0 : tools.length}, toolCallTranslator=${translator?.modelUid ?? 'none'}, intent=${toolConfig.toolIntentDetection}, resultContext=${shouldReduceToolResultContext ? `${toolConfig.toolResultContext}:${toolConfig.toolResultContextMessages}` : 'passthrough'})`); const eventSource = async function* (): AsyncGenerator { const common = { @@ -836,7 +893,7 @@ async function createNonStreamingResponse( yield* streamChatEvents({ ...common, modelUid: resolved.modelUid, - messages: opusMessages, + messages: passthroughMessages, tools: tools.length > 0 ? tools : undefined, }); return; @@ -847,7 +904,7 @@ async function createNonStreamingResponse( for await (const ev of streamChatEvents({ ...common, modelUid: resolved.modelUid, - messages: buildOpusToolPlanningMessages(opusMessages, tools), + messages: buildOpusToolPlanningMessages(passthroughMessages, tools), })) { opusEvents.push(ev); if (ev.kind === 'text') opusDraft += ev.text; @@ -860,10 +917,11 @@ async function createNonStreamingResponse( let fallbackSawTool = false; const fallbackEvents: CloudChatEvent[] = []; + const translatorMessages = buildToolCallTranslatorMessages(multimodalMessages, opusDraft, toolConfig.toolTranslatorContextMessages); for await (const ev of streamChatEvents({ ...common, modelUid: translator.modelUid, - messages: buildToolCallTranslatorMessages(multimodalMessages, opusDraft, toolConfig.toolTranslatorContextMessages), + messages: translatorMessages, tools, })) { fallbackEvents.push(ev); @@ -881,6 +939,17 @@ async function createNonStreamingResponse( return; } + const syntheticToolCall = syntheticBashToolCallFromDraft(opusDraft, tools); + if (syntheticToolCall) { + for (const ev of syntheticToolCall) { + if (ev.kind === 'tool_call_start') { + storePlannerDraft(ev.id, { draft: opusDraft, modelUid: resolved.modelUid }); + } + yield ev; + } + return; + } + for (const ev of opusEvents) yield ev; }; From 9755fdaf699c8a1d9f39e327f1cc0406035d721b Mon Sep 17 00:00:00 2001 From: Muhammad Rizqi Nur Date: Thu, 28 May 2026 18:47:16 +0700 Subject: [PATCH 7/7] fix context size in ui and add retry --- src/cloud-direct/chat.ts | 15 ++-- src/plugin.ts | 169 ++++++++++++++++++++++++++++++++++----- 2 files changed, 157 insertions(+), 27 deletions(-) diff --git a/src/cloud-direct/chat.ts b/src/cloud-direct/chat.ts index 225e456..4ede9f6 100644 --- a/src/cloud-direct/chat.ts +++ b/src/cloud-direct/chat.ts @@ -724,15 +724,16 @@ function decodeUsageBlock(buf: Buffer): CloudChatEvent | null { } } if (promptTokens === undefined && completionTokens === undefined) return null; - // totalTokens reflects what OpenAI's API counts as billable: input + - // output. Cached / cache-creation / reasoning subtotals are surfaced as - // additional fields so callers that want a fuller picture (e.g. cost - // breakdown for reasoning models) can read them, but they're NOT - // double-counted into total. - const total = (promptTokens ?? 0) + (completionTokens ?? 0); + // Cognition reports cache reads/writes separately from fresh input tokens. + // OpenAI-compatible callers expect `prompt_tokens` to represent the full + // effective prompt size (including cached prompt), and opencode uses it for + // context-window display. Preserve the cache subtotals too for callers that + // want cost details. + const fullPromptTokens = (promptTokens ?? 0) + (cachedInputTokens ?? 0) + (cacheCreationInputTokens ?? 0); + const total = fullPromptTokens + (completionTokens ?? 0); return { kind: 'usage', - promptTokens, + promptTokens: fullPromptTokens > 0 ? fullPromptTokens : undefined, completionTokens, totalTokens: total > 0 ? total : undefined, cachedInputTokens, diff --git a/src/plugin.ts b/src/plugin.ts index 1c67ba2..38cfabe 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -283,6 +283,35 @@ function roleOrder(messages: ChatHistoryItem[]): string { return messages.map((m) => m.role).join(','); } +function messageByteSummary(messages: ChatHistoryItem[], label: string): string { + const sizes = messages.map((m, i) => ({ i, role: m.role, bytes: Buffer.byteLength(contentToText(m.content), 'utf8') })); + const total = sizes.reduce((n, s) => n + s.bytes, 0); + const largest = sizes + .slice() + .sort((a, b) => b.bytes - a.bytes) + .slice(0, 5) + .map((s) => `${s.i}:${s.role}:${s.bytes}B`) + .join(','); + return `${label} totalText=${total}B largest=${largest}`; +} + +function usageSummary(events: CloudChatEvent[], label: string): string { + const usage = events.filter((ev): ev is Extract => ev.kind === 'usage'); + if (usage.length === 0) return `${label}=none`; + return `${label}=${usage.map((u) => JSON.stringify({ + prompt: u.promptTokens, + completion: u.completionTokens, + total: u.totalTokens, + cached: u.cachedInputTokens, + cacheCreate: u.cacheCreationInputTokens, + reasoning: u.reasoningTokens, + })).join('+')}`; +} + +function isSocketClosedError(error: unknown): boolean { + return error instanceof Error && /socket connection was closed unexpectedly/i.test(error.message); +} + function contentToText(content: ChatHistoryItem['content']): string { if (typeof content === 'string') return content; if (!Array.isArray(content)) return String(content ?? ''); @@ -297,6 +326,25 @@ function contentToText(content: ChatHistoryItem['content']): string { .join('\n'); } +function truncateMiddle(text: string, maxBytes: number): string { + if (Buffer.byteLength(text, 'utf8') <= maxBytes) return text; + const marker = `\n\n[...truncated oversized tool output for retry...]\n\n`; + const targetChars = Math.max(0, maxBytes - marker.length); + const head = Math.floor(targetChars * 0.6); + const tail = targetChars - head; + return `${text.slice(0, head)}${marker}${text.slice(-tail)}`; +} + +function compactOversizedToolMessagesForRetry(messages: ChatHistoryItem[]): ChatHistoryItem[] { + const maxToolBytes = 12_000; + return messages.map((m) => { + if (m.role !== 'tool') return m; + const text = contentToText(m.content); + if (Buffer.byteLength(text, 'utf8') <= maxToolBytes) return m; + return { ...m, content: truncateMiddle(text, maxToolBytes) } satisfies ChatHistoryItem; + }); +} + function flattenToolHistoryMessages(messages: ChatHistoryItem[]): ChatHistoryItem[] { return messages.map((m) => { const text = contentToText(m.content); @@ -417,6 +465,32 @@ function syntheticBashToolCallFromDraft(draft: string, tools: CloudToolDef[]): C ]; } +function combinedUsageEvent(...eventGroups: CloudChatEvent[][]): CloudChatEvent | undefined { + const usageEvents = eventGroups.flat().filter((ev): ev is Extract => ev.kind === 'usage'); + if (usageEvents.length === 0) return undefined; + const sum = (key: keyof Omit, 'kind'>): number | undefined => { + let total = 0; + let seen = false; + for (const ev of usageEvents) { + const value = ev[key]; + if (typeof value === 'number') { + total += value; + seen = true; + } + } + return seen ? total : undefined; + }; + return { + kind: 'usage', + promptTokens: sum('promptTokens'), + completionTokens: sum('completionTokens'), + totalTokens: sum('totalTokens'), + cachedInputTokens: sum('cachedInputTokens'), + cacheCreationInputTokens: sum('cacheCreationInputTokens'), + reasoningTokens: sum('reasoningTokens'), + }; +} + /** * Map an opencode/OpenAI-shaped chat message into the ChatHistoryItem the * cloud-direct encoder expects. Importantly, this preserves `tool_call_id` @@ -538,6 +612,10 @@ function createStreamingResponse( ? buildToolResultMessages(multimodalMessages, toolConfig) : multimodalMessages; debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${resolved.modelUid}, plannerMsgs=${passthroughMessages.length}, resultMsgs=${resultMessages.length}, tools=${useTranslator ? 0 : tools.length}, toolCallTranslator=${translator?.modelUid ?? 'none'}, intent=${toolConfig.toolIntentDetection}, resultContext=${shouldReduceToolResultContext ? `${toolConfig.toolResultContext}:${toolConfig.toolResultContextMessages}` : 'passthrough'})`); + if (debugLog.enabled) { + debugLog.log(`[windsurf-plugin] ${messageByteSummary(passthroughMessages, 'planner')}`); + if (resultMessages !== passthroughMessages) debugLog.log(`[windsurf-plugin] ${messageByteSummary(resultMessages, 'result')}`); + } if (shouldReduceToolResultContext) debugLog.log(`[windsurf-plugin] reduced result context roles=${roleOrder(resultMessages)}`); let eventCount = 0; let textBytes = 0; @@ -578,15 +656,31 @@ function createStreamingResponse( return; } - const opusEvents: CloudChatEvent[] = []; + let opusEvents: CloudChatEvent[] = []; let opusDraft = ''; - for await (const ev of streamChatEvents({ - ...common, - modelUid: resolved.modelUid, - messages: buildOpusToolPlanningMessages(passthroughMessages, tools), - })) { - opusEvents.push(ev); - if (ev.kind === 'text') opusDraft += ev.text; + try { + for await (const ev of streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: buildOpusToolPlanningMessages(passthroughMessages, tools), + })) { + opusEvents.push(ev); + if (ev.kind === 'text') opusDraft += ev.text; + } + } catch (error) { + if (!isSocketClosedError(error)) throw error; + const retryMessages = compactOversizedToolMessagesForRetry(passthroughMessages); + debugLog.log(`[windsurf-plugin] opus planner socket closed; retrying with compacted oversized tool outputs (${messageByteSummary(retryMessages, 'plannerRetry')})`); + opusEvents = []; + opusDraft = ''; + for await (const ev of streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: buildOpusToolPlanningMessages(retryMessages, tools), + })) { + opusEvents.push(ev); + if (ev.kind === 'text') opusDraft += ev.text; + } } debugLog.log(`[windsurf-plugin] opus planner draft (${opusDraft.length}B): ${opusDraft.slice(0, 500).replace(/\n/g, '\\n')}`); @@ -618,9 +712,12 @@ function createStreamingResponse( storePlannerDraft(ev.id, { draft: opusDraft, modelUid: resolved.modelUid }); debugLog.log(`[windsurf-plugin] stored opus planner draft for tool_call_id=${ev.id}`); } - if (ev.kind === 'text' || ev.kind === 'reasoning') continue; + if (ev.kind === 'text' || ev.kind === 'reasoning' || ev.kind === 'usage') continue; yield ev; } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + debugLog.log(`[windsurf-plugin] bridge usage ${usageSummary(opusEvents, 'planner')} ${usageSummary(fallbackEvents, 'translator')} combined=${usage ? JSON.stringify(usage) : 'none'}`); + if (usage) yield usage; return; } @@ -631,11 +728,19 @@ function createStreamingResponse( if (ev.kind === 'tool_call_start') storePlannerDraft(ev.id, { draft: opusDraft, modelUid: resolved.modelUid }); yield ev; } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + debugLog.log(`[windsurf-plugin] bridge usage ${usageSummary(opusEvents, 'planner')} ${usageSummary(fallbackEvents, 'translator')} combined=${usage ? JSON.stringify(usage) : 'none'}`); + if (usage) yield usage; return; } debugLog.log(`[windsurf-plugin] tool-call translator model=${translator.modelUid} emitted no tool call; streaming opus draft`); - for (const ev of opusEvents) yield ev; + for (const ev of opusEvents) { + if (ev.kind !== 'usage') yield ev; + } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + debugLog.log(`[windsurf-plugin] bridge usage ${usageSummary(opusEvents, 'planner')} ${usageSummary(fallbackEvents, 'translator')} combined=${usage ? JSON.stringify(usage) : 'none'}`); + if (usage) yield usage; }; for await (const ev of eventSource()) { @@ -899,15 +1004,31 @@ async function createNonStreamingResponse( return; } - const opusEvents: CloudChatEvent[] = []; + let opusEvents: CloudChatEvent[] = []; let opusDraft = ''; - for await (const ev of streamChatEvents({ - ...common, - modelUid: resolved.modelUid, - messages: buildOpusToolPlanningMessages(passthroughMessages, tools), - })) { - opusEvents.push(ev); - if (ev.kind === 'text') opusDraft += ev.text; + try { + for await (const ev of streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: buildOpusToolPlanningMessages(passthroughMessages, tools), + })) { + opusEvents.push(ev); + if (ev.kind === 'text') opusDraft += ev.text; + } + } catch (error) { + if (!isSocketClosedError(error)) throw error; + const retryMessages = compactOversizedToolMessagesForRetry(passthroughMessages); + debugLog.log(`[windsurf-plugin] nonstream opus planner socket closed; retrying with compacted oversized tool outputs (${messageByteSummary(retryMessages, 'plannerRetry')})`); + opusEvents = []; + opusDraft = ''; + for await (const ev of streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: buildOpusToolPlanningMessages(retryMessages, tools), + })) { + opusEvents.push(ev); + if (ev.kind === 'text') opusDraft += ev.text; + } } if (!shouldCallToolTranslator(opusDraft, toolConfig.toolIntentDetection)) { @@ -933,9 +1054,11 @@ async function createNonStreamingResponse( if (ev.kind === 'tool_call_start') { storePlannerDraft(ev.id, { draft: opusDraft, modelUid: resolved.modelUid }); } - if (ev.kind === 'text' || ev.kind === 'reasoning') continue; + if (ev.kind === 'text' || ev.kind === 'reasoning' || ev.kind === 'usage') continue; yield ev; } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + if (usage) yield usage; return; } @@ -947,10 +1070,16 @@ async function createNonStreamingResponse( } yield ev; } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + if (usage) yield usage; return; } - for (const ev of opusEvents) yield ev; + for (const ev of opusEvents) { + if (ev.kind !== 'usage') yield ev; + } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + if (usage) yield usage; }; for await (const ev of eventSource()) {