diff --git a/package.json b/package.json index 9e2c22a..77e4c7c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "opencode-windsurf-auth", - "version": "0.3.3", + "version": "0.3.4", "description": "OpenCode plugin for Windsurf/Codeium authentication - use Windsurf models in OpenCode", "repository": { "type": "git", diff --git a/src/cloud-direct/chat.ts b/src/cloud-direct/chat.ts index 0e1c01e..4ede9f6 100644 --- a/src/cloud-direct/chat.ts +++ b/src/cloud-direct/chat.ts @@ -36,14 +36,21 @@ import { getCachedUserJwt } from './auth.js'; import { getCachedCatalog, ModelNotAvailableError } from './catalog.js'; /** - * Connect-RPC streaming inactivity timeout. If the cloud sends zero bytes - * for this long after the last chunk, we abort the fetch. The cloud's own - * idle limit is around 90s on most models; we set ours a little above so - * we only trigger when the server has genuinely stopped responding. + * Connect-RPC streaming inactivity timeout. Opus can spend multiple minutes + * before its first body chunk on large hardware-debugging contexts, so keep + * this above ordinary model thinking latency and let users override it. */ -const CLOUD_STREAM_IDLE_MS = 120_000; +const CLOUD_STREAM_IDLE_MS = readPositiveIntEnv('OPENCODE_WINDSURF_CLOUD_STREAM_IDLE_MS', 300_000); /** Time-to-first-byte timeout. */ -const CLOUD_STREAM_TTFB_MS = 60_000; +const CLOUD_STREAM_TTFB_MS = readPositiveIntEnv('OPENCODE_WINDSURF_CLOUD_STREAM_TTFB_MS', 120_000); +const DEFAULT_MAX_INPUT_TOKENS = readPositiveIntEnv('OPENCODE_WINDSURF_MAX_INPUT_TOKENS', 256_000); + +function readPositiveIntEnv(name: string, fallback: number): number { + const raw = process.env[name]; + if (!raw) return fallback; + const parsed = Number(raw); + return Number.isFinite(parsed) && parsed > 0 ? Math.trunc(parsed) : fallback; +} /** * Compose multiple AbortSignals into a single signal that aborts when ANY @@ -274,7 +281,7 @@ function encodeCompletionConfiguration(opts: { }; return Buffer.concat([ encodeVarintField(1, 1), - encodeVarintField(2, opts.maxInputTokens ?? 64000), + encodeVarintField(2, opts.maxInputTokens ?? DEFAULT_MAX_INPUT_TOKENS), // Default to the catalog's most permissive `maxOutputTokens` (128K). // The cloud clamps to the per-model limit anyway. The old 4096 default // would silently truncate any callers (tests, CLI users of @@ -717,15 +724,16 @@ function decodeUsageBlock(buf: Buffer): CloudChatEvent | null { } } if (promptTokens === undefined && completionTokens === undefined) return null; - // totalTokens reflects what OpenAI's API counts as billable: input + - // output. Cached / cache-creation / reasoning subtotals are surfaced as - // additional fields so callers that want a fuller picture (e.g. cost - // breakdown for reasoning models) can read them, but they're NOT - // double-counted into total. - const total = (promptTokens ?? 0) + (completionTokens ?? 0); + // Cognition reports cache reads/writes separately from fresh input tokens. + // OpenAI-compatible callers expect `prompt_tokens` to represent the full + // effective prompt size (including cached prompt), and opencode uses it for + // context-window display. Preserve the cache subtotals too for callers that + // want cost details. + const fullPromptTokens = (promptTokens ?? 0) + (cachedInputTokens ?? 0) + (cacheCreationInputTokens ?? 0); + const total = fullPromptTokens + (completionTokens ?? 0); return { kind: 'usage', - promptTokens, + promptTokens: fullPromptTokens > 0 ? fullPromptTokens : undefined, completionTokens, totalTokens: total > 0 ? total : undefined, cachedInputTokens, diff --git a/src/plugin.ts b/src/plugin.ts index a2899c4..38cfabe 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -69,7 +69,7 @@ const debugLog = (() => { import { WindsurfCredentials, WindsurfError } from './plugin/auth.js'; import { resolveCredentials } from './plugin/credentials-resolver.js'; import { loadCredentials as loadOAuthCredentials } from './oauth/storage.js'; -import type { ChatHistoryItem } from './cloud-direct/index.js'; +import type { ChatHistoryItem, CloudChatEvent } from './cloud-direct/index.js'; import { getDefaultModel, getCanonicalModels, @@ -112,6 +112,385 @@ interface ChatCompletionRequest { type ToolDef = NonNullable[number]; +type CloudToolDef = { + name: string; + description: string; + parameters: unknown; +}; + +const DEFAULT_TOOL_CALL_TRANSLATOR_MODEL = 'swe-1.6'; +const DEFAULT_TOOL_INTENT_DETECTION: ToolIntentDetectionMode = 'always'; +const DEFAULT_TOOL_TRANSLATOR_CONTEXT_MESSAGES = 8; +const DEFAULT_TOOL_RESULT_CONTEXT: ToolResultContextMode = 'tail'; +const DEFAULT_TOOL_RESULT_CONTEXT_MESSAGES = 64; +const MAX_STORED_PLANNER_DRAFTS = 200; + +type ToolIntentDetectionMode = 'always' | 'assist' | 'marker'; +type ToolResultContextMode = 'full' | 'tail' | 'minimal'; + +interface TextOnlyToolConfig { + toolIntentDetection: ToolIntentDetectionMode; + toolTranslatorContextMessages: number; + toolResultContext: ToolResultContextMode; + toolResultContextMessages: number; +} + +interface PlannerDraftEntry { + draft: string; + modelUid: string; + createdAt: number; +} + +const plannerDraftByToolCallId = new Map(); + +function storePlannerDraft(toolCallId: string, entry: Omit): void { + if (!toolCallId || !entry.draft) return; + plannerDraftByToolCallId.set(toolCallId, { ...entry, createdAt: Date.now() }); + while (plannerDraftByToolCallId.size > MAX_STORED_PLANNER_DRAFTS) { + const oldest = plannerDraftByToolCallId.keys().next().value; + if (!oldest) break; + plannerDraftByToolCallId.delete(oldest); + } +} + +function extractToolCallTranslatorFromProviderOptions(providerOptions: Record | undefined): string | undefined { + if (!providerOptions) return undefined; + const windsurfRaw = providerOptions['windsurf']; + const windsurf = + windsurfRaw && typeof windsurfRaw === 'object' + ? (windsurfRaw as Record) + : undefined; + const pickString = (v: unknown): string | undefined => (typeof v === 'string' ? v : undefined); + return ( + pickString(windsurf?.['toolCallTranslatorModel']) ?? + pickString(windsurf?.['toolFallbackModel']) ?? + pickString(windsurf?.['fallbackModel']) ?? + pickString(providerOptions['toolCallTranslatorModel']) ?? + pickString(providerOptions['toolFallbackModel']) ?? + pickString(providerOptions['fallbackModel']) + ); +} + +function windsurfProviderOptions(providerOptions: Record | undefined): Record | undefined { + if (!providerOptions) return undefined; + const raw = providerOptions['windsurf']; + return raw && typeof raw === 'object' ? (raw as Record) : undefined; +} + +function pickStringConfig(providerOptions: Record | undefined, key: string): string | undefined { + const windsurf = windsurfProviderOptions(providerOptions); + const v = windsurf?.[key] ?? providerOptions?.[key]; + return typeof v === 'string' ? v : undefined; +} + +function pickNumberConfig(providerOptions: Record | undefined, key: string): number | undefined { + const windsurf = windsurfProviderOptions(providerOptions); + const v = windsurf?.[key] ?? providerOptions?.[key]; + if (typeof v === 'number' && Number.isFinite(v)) return v; + if (typeof v === 'string' && v.trim() !== '') { + const n = Number(v); + if (Number.isFinite(n)) return n; + } + return undefined; +} + +function clampInt(v: number | undefined, fallback: number, min: number, max: number): number { + if (v === undefined || !Number.isFinite(v)) return fallback; + return Math.max(min, Math.min(max, Math.trunc(v))); +} + +function resolveToolConfig(providerOptions: Record | undefined): TextOnlyToolConfig { + const detectionRaw = + pickStringConfig(providerOptions, 'toolIntentDetection') ?? + process.env.OPENCODE_WINDSURF_TOOL_INTENT_DETECTION ?? + DEFAULT_TOOL_INTENT_DETECTION; + const detection: ToolIntentDetectionMode = + detectionRaw === 'marker' || detectionRaw === 'assist' || detectionRaw === 'always' + ? detectionRaw + : DEFAULT_TOOL_INTENT_DETECTION; + + const resultRaw = + pickStringConfig(providerOptions, 'toolResultContext') ?? + process.env.OPENCODE_WINDSURF_TOOL_RESULT_CONTEXT ?? + DEFAULT_TOOL_RESULT_CONTEXT; + const resultContext: ToolResultContextMode = + resultRaw === 'full' || resultRaw === 'tail' || resultRaw === 'minimal' + ? resultRaw + : DEFAULT_TOOL_RESULT_CONTEXT; + + return { + toolIntentDetection: detection, + toolTranslatorContextMessages: clampInt( + pickNumberConfig(providerOptions, 'toolTranslatorContextMessages') ?? Number(process.env.OPENCODE_WINDSURF_TOOL_TRANSLATOR_CONTEXT_MESSAGES), + DEFAULT_TOOL_TRANSLATOR_CONTEXT_MESSAGES, + 1, + 64, + ), + toolResultContext: resultContext, + toolResultContextMessages: clampInt( + pickNumberConfig(providerOptions, 'toolResultContextMessages') ?? Number(process.env.OPENCODE_WINDSURF_TOOL_RESULT_CONTEXT_MESSAGES), + DEFAULT_TOOL_RESULT_CONTEXT_MESSAGES, + 1, + 128, + ), + }; +} + +function getToolCallTranslatorModel(providerOptions: Record | undefined): ReturnType { + const fallbackName = + extractToolCallTranslatorFromProviderOptions(providerOptions)?.trim() || + process.env.OPENCODE_WINDSURF_TOOL_CALL_TRANSLATOR_MODEL?.trim() || + DEFAULT_TOOL_CALL_TRANSLATOR_MODEL; + const fallback = resolveModel(fallbackName); + if (fallback.textOnly) { + throw new Error( + `Tool-call translator model "${fallbackName}" is marked text-only. ` + + `Set OPENCODE_WINDSURF_TOOL_CALL_TRANSLATOR_MODEL to a tool-capable model like swe-1.6.`, + ); + } + return fallback; +} + +function buildOpusToolPlanningMessages(messages: ChatHistoryItem[], tools: CloudToolDef[]): ChatHistoryItem[] { + const manifest = tools.map((t) => ({ name: t.name, description: t.description, parameters: t.parameters })); + return [ + ...messages, + { + role: 'system', + content: + `Native tool schemas cannot be sent to this model, but opencode can still execute tools after your decision.\n` + + `You are responsible for planning the next step.\n` + + `If the next step needs a command, file read/edit/search, todo update, web fetch, or any tool action, describe the intended tool action plainly.\n` + + `Do not ask the user for build commands, paths, files, status, or other facts that tools can inspect. Plan the tool inspection instead.\n` + + `If the user asks you to build, flash, test, inspect, continue work, or verify hardware/logs, plan a tool action unless the answer is already proven by the latest context.\n` + + `Prefer: TOOL_INTENT: .\n` + + `Do not invent tool output and do not continue as if a tool already ran.\n` + + `If no tool is needed, answer normally. Never prefix answers with "No tool needed".\n\n` + + `Available tools:\n${JSON.stringify(manifest)}`, + }, + ]; +} + +function recentMessagesWithLatestUser(messages: ChatHistoryItem[], count: number): ChatHistoryItem[] { + const nonSystem = messages.filter((m) => m.role !== 'system'); + const tail = nonSystem.slice(-count); + const latestUser = latestUserMessage(nonSystem); + if (!latestUser || tail.includes(latestUser)) return tail; + return [latestUser, ...tail]; +} + +function roleOrder(messages: ChatHistoryItem[]): string { + return messages.map((m) => m.role).join(','); +} + +function messageByteSummary(messages: ChatHistoryItem[], label: string): string { + const sizes = messages.map((m, i) => ({ i, role: m.role, bytes: Buffer.byteLength(contentToText(m.content), 'utf8') })); + const total = sizes.reduce((n, s) => n + s.bytes, 0); + const largest = sizes + .slice() + .sort((a, b) => b.bytes - a.bytes) + .slice(0, 5) + .map((s) => `${s.i}:${s.role}:${s.bytes}B`) + .join(','); + return `${label} totalText=${total}B largest=${largest}`; +} + +function usageSummary(events: CloudChatEvent[], label: string): string { + const usage = events.filter((ev): ev is Extract => ev.kind === 'usage'); + if (usage.length === 0) return `${label}=none`; + return `${label}=${usage.map((u) => JSON.stringify({ + prompt: u.promptTokens, + completion: u.completionTokens, + total: u.totalTokens, + cached: u.cachedInputTokens, + cacheCreate: u.cacheCreationInputTokens, + reasoning: u.reasoningTokens, + })).join('+')}`; +} + +function isSocketClosedError(error: unknown): boolean { + return error instanceof Error && /socket connection was closed unexpectedly/i.test(error.message); +} + +function contentToText(content: ChatHistoryItem['content']): string { + if (typeof content === 'string') return content; + if (!Array.isArray(content)) return String(content ?? ''); + return content + .map((part) => { + if (!part || typeof part !== 'object') return ''; + if ('text' in part && typeof part.text === 'string') return part.text; + if ('image_url' in part) return '[image]'; + return ''; + }) + .filter(Boolean) + .join('\n'); +} + +function truncateMiddle(text: string, maxBytes: number): string { + if (Buffer.byteLength(text, 'utf8') <= maxBytes) return text; + const marker = `\n\n[...truncated oversized tool output for retry...]\n\n`; + const targetChars = Math.max(0, maxBytes - marker.length); + const head = Math.floor(targetChars * 0.6); + const tail = targetChars - head; + return `${text.slice(0, head)}${marker}${text.slice(-tail)}`; +} + +function compactOversizedToolMessagesForRetry(messages: ChatHistoryItem[]): ChatHistoryItem[] { + const maxToolBytes = 12_000; + return messages.map((m) => { + if (m.role !== 'tool') return m; + const text = contentToText(m.content); + if (Buffer.byteLength(text, 'utf8') <= maxToolBytes) return m; + return { ...m, content: truncateMiddle(text, maxToolBytes) } satisfies ChatHistoryItem; + }); +} + +function flattenToolHistoryMessages(messages: ChatHistoryItem[]): ChatHistoryItem[] { + return messages.map((m) => { + const text = contentToText(m.content); + if (m.role === 'tool') { + return { + role: 'user', + content: `\n${text}\n`, + } satisfies ChatHistoryItem; + } + if (m.role === 'assistant' && m.tool_calls && m.tool_calls.length > 0) { + const calls = m.tool_calls + .map((tc) => `${tc.arguments}`) + .join('\n'); + return { + role: 'assistant', + content: text ? `${text}\n${calls}` : calls, + } satisfies ChatHistoryItem; + } + if (m.role === 'system') return m; + return { role: m.role, content: text } satisfies ChatHistoryItem; + }); +} + +function latestUserMessage(messages: ChatHistoryItem[]): ChatHistoryItem | undefined { + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i]?.role === 'user') return messages[i]; + } + return undefined; +} + +function plannerDraftContext(messages: ChatHistoryItem[]): string { + const ids = new Set(); + for (const m of messages) { + if (m.role === 'tool' && typeof m.tool_call_id === 'string' && m.tool_call_id) ids.add(m.tool_call_id); + } + const parts: string[] = []; + for (const id of ids) { + const entry = plannerDraftByToolCallId.get(id); + if (entry) parts.push(`tool_call_id=${id}\n${entry.draft}`); + } + return parts.join('\n\n'); +} + +function buildToolCallTranslatorMessages(messages: ChatHistoryItem[], opusDraft: string, tailCount: number): ChatHistoryItem[] { + const context: ChatHistoryItem[] = flattenToolHistoryMessages(recentMessagesWithLatestUser(messages, tailCount)); + return [ + ...context, + { + role: 'user', + content: + `The requested model cannot emit native tool calls. It produced this planned next step:\n\n` + + `\n${opusDraft}\n\n\n` + + `Convert that planned next step into at most one native tool call.\n` + + `If the draft implies command execution, file read/edit/search, todo update, web fetch, or any tool action, call exactly the matching tool.\n` + + `If the draft says it lacks build commands, paths, files, current status, logs, or other inspectable facts, call an appropriate inspection tool instead of returning NO_TOOL.\n` + + `If the latest user asks to build, flash, test, inspect, continue work, or verify hardware/logs, prefer a tool call when any available tool can make progress.\n` + + `Return exactly NO_TOOL only when the draft is already a final user-facing answer or no available tool can make progress.\n` + + `Do not answer the user. Do not add commentary.`, + }, + ]; +} + +function buildToolResultMessages(messages: ChatHistoryItem[], config: TextOnlyToolConfig): ChatHistoryItem[] { + const draftContext = plannerDraftContext(messages); + if (!draftContext) return messages; + + const injected: ChatHistoryItem = { + role: 'system', + content: + `Previous Opus planner draft(s) that led to the tool result(s) in this turn:\n` + + `${draftContext}\n\nUse this to interpret the tool result and continue from the original plan.`, + }; + + if (config.toolResultContext === 'full') return [...messages, injected]; + + const tailCount = config.toolResultContext === 'minimal' ? 6 : config.toolResultContextMessages; + const context = flattenToolHistoryMessages(recentMessagesWithLatestUser(messages, tailCount)); + return [...context, injected]; +} + +function shouldCallToolTranslator(draft: string, mode: ToolIntentDetectionMode): boolean { + if (mode === 'always') return true; + if (/\bTOOL_INTENT\s*:/i.test(draft)) return true; + if (mode === 'marker') return false; + return /\b(?:I'll|I will|let me|now|next I'll|I need to)\s+(?:run|execute|read|inspect|check|edit|search|grep|build|flash|capture|write|update)\b/i.test(draft); +} + +function hasToolResultMessages(messages: ChatHistoryItem[]): boolean { + return messages.some((m) => m.role === 'tool'); +} + +function syntheticBashToolCallFromDraft(draft: string, tools: CloudToolDef[]): CloudChatEvent[] | undefined { + if (!tools.some((t) => t.name === 'bash')) return undefined; + + const command = + draft.match(/([\s\S]*?)(?:<\/parameter>|$)/i)?.[1]?.trim() ?? + draft.match(/```tool\s*\n\s*bash\s*:\s*([\s\S]*?)```/i)?.[1]?.trim() ?? + draft.match(/\bTOOL_INTENT\s*:[^\n]*\bbash\b[^\n`]*`([^`]+)`/i)?.[1]?.trim(); + + if (!command) return undefined; + + const workdir = + draft.match(/\bworkdir\b\s*`([^`]+)`/i)?.[1] ?? + draft.match(/\bworkdir\b\s*["']([^"']+)["']/i)?.[1]; + const timeoutSeconds = Number(draft.match(/\btimeout\b\s*(\d+)\s*s\b/i)?.[1]); + const args: Record = { + command, + description: 'Run planned command', + }; + if (workdir) args.workdir = workdir; + if (Number.isFinite(timeoutSeconds) && timeoutSeconds > 0) args.timeout = timeoutSeconds * 1000; + + const id = `call_${crypto.randomBytes(12).toString('hex')}`; + return [ + { kind: 'tool_call_start', id, name: 'bash' }, + { kind: 'tool_call_args', id, argsDelta: JSON.stringify(args) }, + { kind: 'finish', reason: 'tool_calls' }, + ]; +} + +function combinedUsageEvent(...eventGroups: CloudChatEvent[][]): CloudChatEvent | undefined { + const usageEvents = eventGroups.flat().filter((ev): ev is Extract => ev.kind === 'usage'); + if (usageEvents.length === 0) return undefined; + const sum = (key: keyof Omit, 'kind'>): number | undefined => { + let total = 0; + let seen = false; + for (const ev of usageEvents) { + const value = ev[key]; + if (typeof value === 'number') { + total += value; + seen = true; + } + } + return seen ? total : undefined; + }; + return { + kind: 'usage', + promptTokens: sum('promptTokens'), + completionTokens: sum('completionTokens'), + totalTokens: sum('totalTokens'), + cachedInputTokens: sum('cachedInputTokens'), + cacheCreationInputTokens: sum('cacheCreationInputTokens'), + reasoningTokens: sum('reasoningTokens'), + }; +} + /** * Map an opencode/OpenAI-shaped chat message into the ChatHistoryItem the * cloud-direct encoder expects. Importantly, this preserves `tool_call_id` @@ -203,7 +582,6 @@ function createStreamingResponse( description: t.function?.description ?? '', parameters: t.function?.parameters ?? {}, })); - const { streamChatEvents } = await import('./cloud-direct/index.js'); // Cloud-direct accepts the FULL @ai-sdk multimodal content shape // (text + image_url parts). We pass `request.messages` straight @@ -225,7 +603,20 @@ function createStreamingResponse( let usage: { promptTokens?: number; completionTokens?: number; totalTokens?: number } | null = null; let firstChunkSent = false; const t0 = Date.now(); - debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${resolved.modelUid}, msgs=${multimodalMessages.length}, tools=${tools.length})`); + const toolConfig = resolveToolConfig(request.providerOptions); + const useTranslator = !!resolved.textOnly && tools.length > 0; + const translator = useTranslator ? getToolCallTranslatorModel(request.providerOptions) : undefined; + const shouldReduceToolResultContext = useTranslator && hasToolResultMessages(multimodalMessages); + const passthroughMessages = multimodalMessages; + const resultMessages = shouldReduceToolResultContext + ? buildToolResultMessages(multimodalMessages, toolConfig) + : multimodalMessages; + debugLog.log(`[windsurf-plugin] streamChatEvents starting (model=${resolved.modelUid}, plannerMsgs=${passthroughMessages.length}, resultMsgs=${resultMessages.length}, tools=${useTranslator ? 0 : tools.length}, toolCallTranslator=${translator?.modelUid ?? 'none'}, intent=${toolConfig.toolIntentDetection}, resultContext=${shouldReduceToolResultContext ? `${toolConfig.toolResultContext}:${toolConfig.toolResultContextMessages}` : 'passthrough'})`); + if (debugLog.enabled) { + debugLog.log(`[windsurf-plugin] ${messageByteSummary(passthroughMessages, 'planner')}`); + if (resultMessages !== passthroughMessages) debugLog.log(`[windsurf-plugin] ${messageByteSummary(resultMessages, 'result')}`); + } + if (shouldReduceToolResultContext) debugLog.log(`[windsurf-plugin] reduced result context roles=${roleOrder(resultMessages)}`); let eventCount = 0; let textBytes = 0; // Thread the caller's `max_tokens` into the proto's @@ -246,17 +637,113 @@ function createStreamingResponse( typeof request.max_tokens === 'number' && request.max_tokens > 0 ? request.max_tokens : 128_000; - for await (const ev of streamChatEvents({ - apiKey: credentials.apiKey, - apiServerUrl: credentials.apiServerUrl, - modelUid: resolved.modelUid, - messages: multimodalMessages, - tools: tools.length > 0 ? tools : undefined, - signal: abort.signal, - completionOpts: { - maxOutputTokens: requestedMaxTokens, - }, - })) { + + const eventSource = async function* (): AsyncGenerator { + const common = { + apiKey: credentials.apiKey, + apiServerUrl: credentials.apiServerUrl, + signal: abort.signal, + completionOpts: { maxOutputTokens: requestedMaxTokens }, + }; + + if (!useTranslator || !translator) { + yield* streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: passthroughMessages, + tools: tools.length > 0 ? tools : undefined, + }); + return; + } + + let opusEvents: CloudChatEvent[] = []; + let opusDraft = ''; + try { + for await (const ev of streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: buildOpusToolPlanningMessages(passthroughMessages, tools), + })) { + opusEvents.push(ev); + if (ev.kind === 'text') opusDraft += ev.text; + } + } catch (error) { + if (!isSocketClosedError(error)) throw error; + const retryMessages = compactOversizedToolMessagesForRetry(passthroughMessages); + debugLog.log(`[windsurf-plugin] opus planner socket closed; retrying with compacted oversized tool outputs (${messageByteSummary(retryMessages, 'plannerRetry')})`); + opusEvents = []; + opusDraft = ''; + for await (const ev of streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: buildOpusToolPlanningMessages(retryMessages, tools), + })) { + opusEvents.push(ev); + if (ev.kind === 'text') opusDraft += ev.text; + } + } + + debugLog.log(`[windsurf-plugin] opus planner draft (${opusDraft.length}B): ${opusDraft.slice(0, 500).replace(/\n/g, '\\n')}`); + + if (!shouldCallToolTranslator(opusDraft, toolConfig.toolIntentDetection)) { + debugLog.log(`[windsurf-plugin] tool-call translator skipped by detection=${toolConfig.toolIntentDetection}`); + for (const ev of opusEvents) yield ev; + return; + } + + let fallbackSawTool = false; + const fallbackEvents: CloudChatEvent[] = []; + const translatorMessages = buildToolCallTranslatorMessages(multimodalMessages, opusDraft, toolConfig.toolTranslatorContextMessages); + debugLog.log(`[windsurf-plugin] translator context roles=${roleOrder(translatorMessages)}`); + for await (const ev of streamChatEvents({ + ...common, + modelUid: translator.modelUid, + messages: translatorMessages, + tools, + })) { + fallbackEvents.push(ev); + if (ev.kind === 'tool_call_start') fallbackSawTool = true; + } + + if (fallbackSawTool) { + debugLog.log(`[windsurf-plugin] tool-call translator model=${translator.modelUid} emitted tool call(s)`); + for (const ev of fallbackEvents) { + if (ev.kind === 'tool_call_start') { + storePlannerDraft(ev.id, { draft: opusDraft, modelUid: resolved.modelUid }); + debugLog.log(`[windsurf-plugin] stored opus planner draft for tool_call_id=${ev.id}`); + } + if (ev.kind === 'text' || ev.kind === 'reasoning' || ev.kind === 'usage') continue; + yield ev; + } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + debugLog.log(`[windsurf-plugin] bridge usage ${usageSummary(opusEvents, 'planner')} ${usageSummary(fallbackEvents, 'translator')} combined=${usage ? JSON.stringify(usage) : 'none'}`); + if (usage) yield usage; + return; + } + + const syntheticToolCall = syntheticBashToolCallFromDraft(opusDraft, tools); + if (syntheticToolCall) { + debugLog.log('[windsurf-plugin] synthesized bash tool call from opus draft after translator emitted no tool'); + for (const ev of syntheticToolCall) { + if (ev.kind === 'tool_call_start') storePlannerDraft(ev.id, { draft: opusDraft, modelUid: resolved.modelUid }); + yield ev; + } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + debugLog.log(`[windsurf-plugin] bridge usage ${usageSummary(opusEvents, 'planner')} ${usageSummary(fallbackEvents, 'translator')} combined=${usage ? JSON.stringify(usage) : 'none'}`); + if (usage) yield usage; + return; + } + + debugLog.log(`[windsurf-plugin] tool-call translator model=${translator.modelUid} emitted no tool call; streaming opus draft`); + for (const ev of opusEvents) { + if (ev.kind !== 'usage') yield ev; + } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + debugLog.log(`[windsurf-plugin] bridge usage ${usageSummary(opusEvents, 'planner')} ${usageSummary(fallbackEvents, 'translator')} combined=${usage ? JSON.stringify(usage) : 'none'}`); + if (usage) yield usage; + }; + + for await (const ev of eventSource()) { eventCount++; if (eventCount === 1) debugLog.log(`[windsurf-plugin] streamChatEvents first event after ${Date.now() - t0}ms (kind=${ev.kind})`); // @ai-sdk expects `delta.role: 'assistant'` on the *first* chunk @@ -466,7 +953,6 @@ async function createNonStreamingResponse( description: t.function?.description ?? '', parameters: t.function?.parameters ?? {}, })); - const multimodalMessages: ChatHistoryItem[] = request.messages.map((m) => mapMessageToHistoryItem(m)); const { streamChatEvents } = await import('./cloud-direct/index.js'); @@ -487,21 +973,116 @@ async function createNonStreamingResponse( type CollectedToolCall = { id: string; name: string; args: string }; const collectedToolCalls: CollectedToolCall[] = []; let currentToolCall: CollectedToolCall | null = null; + const toolConfig = resolveToolConfig(request.providerOptions); + const useTranslator = !!resolved.textOnly && tools.length > 0; + const translator = useTranslator ? getToolCallTranslatorModel(request.providerOptions) : undefined; + const shouldReduceToolResultContext = useTranslator && hasToolResultMessages(multimodalMessages); + const passthroughMessages = multimodalMessages; + const resultMessages = shouldReduceToolResultContext + ? buildToolResultMessages(multimodalMessages, toolConfig) + : multimodalMessages; + debugLog.log(`[windsurf-plugin] nonstream ChatEvents starting (model=${resolved.modelUid}, plannerMsgs=${passthroughMessages.length}, resultMsgs=${resultMessages.length}, tools=${useTranslator ? 0 : tools.length}, toolCallTranslator=${translator?.modelUid ?? 'none'}, intent=${toolConfig.toolIntentDetection}, resultContext=${shouldReduceToolResultContext ? `${toolConfig.toolResultContext}:${toolConfig.toolResultContextMessages}` : 'passthrough'})`); + + const eventSource = async function* (): AsyncGenerator { + const common = { + apiKey: credentials.apiKey, + apiServerUrl: credentials.apiServerUrl, + completionOpts: { maxOutputTokens: requestedMaxTokens }, + // Propagate the caller's abort so a client disconnect during a + // non-streaming title-gen / summary call actually stops the upstream + // cloud request and the billable token usage with it. + signal, + }; - for await (const ev of streamChatEvents({ - apiKey: credentials.apiKey, - apiServerUrl: credentials.apiServerUrl, - modelUid: resolved.modelUid, - messages: multimodalMessages, - tools: tools.length > 0 ? tools : undefined, - completionOpts: { - maxOutputTokens: requestedMaxTokens, - }, - // Propagate the caller's abort so a client disconnect during a - // non-streaming title-gen / summary call actually stops the upstream - // cloud request and the billable token usage with it. - signal, - })) { + if (!useTranslator || !translator) { + yield* streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: passthroughMessages, + tools: tools.length > 0 ? tools : undefined, + }); + return; + } + + let opusEvents: CloudChatEvent[] = []; + let opusDraft = ''; + try { + for await (const ev of streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: buildOpusToolPlanningMessages(passthroughMessages, tools), + })) { + opusEvents.push(ev); + if (ev.kind === 'text') opusDraft += ev.text; + } + } catch (error) { + if (!isSocketClosedError(error)) throw error; + const retryMessages = compactOversizedToolMessagesForRetry(passthroughMessages); + debugLog.log(`[windsurf-plugin] nonstream opus planner socket closed; retrying with compacted oversized tool outputs (${messageByteSummary(retryMessages, 'plannerRetry')})`); + opusEvents = []; + opusDraft = ''; + for await (const ev of streamChatEvents({ + ...common, + modelUid: resolved.modelUid, + messages: buildOpusToolPlanningMessages(retryMessages, tools), + })) { + opusEvents.push(ev); + if (ev.kind === 'text') opusDraft += ev.text; + } + } + + if (!shouldCallToolTranslator(opusDraft, toolConfig.toolIntentDetection)) { + for (const ev of opusEvents) yield ev; + return; + } + + let fallbackSawTool = false; + const fallbackEvents: CloudChatEvent[] = []; + const translatorMessages = buildToolCallTranslatorMessages(multimodalMessages, opusDraft, toolConfig.toolTranslatorContextMessages); + for await (const ev of streamChatEvents({ + ...common, + modelUid: translator.modelUid, + messages: translatorMessages, + tools, + })) { + fallbackEvents.push(ev); + if (ev.kind === 'tool_call_start') fallbackSawTool = true; + } + + if (fallbackSawTool) { + for (const ev of fallbackEvents) { + if (ev.kind === 'tool_call_start') { + storePlannerDraft(ev.id, { draft: opusDraft, modelUid: resolved.modelUid }); + } + if (ev.kind === 'text' || ev.kind === 'reasoning' || ev.kind === 'usage') continue; + yield ev; + } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + if (usage) yield usage; + return; + } + + const syntheticToolCall = syntheticBashToolCallFromDraft(opusDraft, tools); + if (syntheticToolCall) { + for (const ev of syntheticToolCall) { + if (ev.kind === 'tool_call_start') { + storePlannerDraft(ev.id, { draft: opusDraft, modelUid: resolved.modelUid }); + } + yield ev; + } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + if (usage) yield usage; + return; + } + + for (const ev of opusEvents) { + if (ev.kind !== 'usage') yield ev; + } + const usage = combinedUsageEvent(opusEvents, fallbackEvents); + if (usage) yield usage; + }; + + for await (const ev of eventSource()) { if (ev.kind === 'text') { collected += ev.text; } else if (ev.kind === 'tool_call_start') { @@ -829,11 +1410,15 @@ async function ensureWindsurfProxyServer(): Promise { object: 'list', data: models.map((id) => { const variants = getModelVariants(id); + const resolved = resolveModel(id); + const supportsTools = !resolved.textOnly; return { id, object: 'model', created: Math.floor(Date.now() / 1000), owned_by: 'windsurf', + capabilities: { tools: supportsTools }, + text_only: !supportsTools, ...(variants ? { variants: Object.entries(variants).map(([name, meta]) => ({ diff --git a/src/plugin/models.ts b/src/plugin/models.ts index bf0c9c3..bed5a9e 100644 --- a/src/plugin/models.ts +++ b/src/plugin/models.ts @@ -111,6 +111,11 @@ type ModelCatalogEntry = { variants?: Record; /** Aliases accepted for backwards compatibility */ aliases?: string[]; + /** + * True when Cognition's cloud rejects tool-bearing requests for every + * variant of this model. The plugin strips tools and warns the user. + */ + textOnly?: boolean; }; // ========================================================================== @@ -134,6 +139,7 @@ const VARIANT_CATALOG: Record = { 'claude-opus-4.5': { id: 'claude-opus-4.5', defaultUid: 'MODEL_CLAUDE_4_5_OPUS', + textOnly: true, variants: { 'base': { modelUid: 'MODEL_CLAUDE_4_5_OPUS', description: 'Claude Opus 4.5' }, 'thinking': { modelUid: 'MODEL_CLAUDE_4_5_OPUS_THINKING', description: 'Claude Opus 4.5 Thinking' }, @@ -143,6 +149,7 @@ const VARIANT_CATALOG: Record = { 'claude-opus-4.6': { id: 'claude-opus-4.6', defaultUid: 'claude-opus-4-6-thinking', + textOnly: true, variants: { 'thinking': { modelUid: 'claude-opus-4-6-thinking', description: 'Claude Opus 4.6 Thinking' }, 'base': { modelUid: 'claude-opus-4-6', description: 'Claude Opus 4.6' }, @@ -156,6 +163,7 @@ const VARIANT_CATALOG: Record = { 'claude-opus-4.7': { id: 'claude-opus-4.7', defaultUid: 'claude-opus-4-7-medium', + textOnly: true, variants: { 'medium': { modelUid: 'claude-opus-4-7-medium', description: 'Claude Opus 4.7 Medium' }, 'low': { modelUid: 'claude-opus-4-7-low', description: 'Claude Opus 4.7 Low' }, @@ -173,6 +181,7 @@ const VARIANT_CATALOG: Record = { 'claude-sonnet-4.5': { id: 'claude-sonnet-4.5', defaultUid: 'MODEL_PRIVATE_2', + textOnly: true, variants: { '2': { modelUid: 'MODEL_PRIVATE_2', description: 'Claude Sonnet 4.5' }, '3': { modelUid: 'MODEL_PRIVATE_3', description: 'Claude Sonnet 4.5 Thinking' }, @@ -182,6 +191,7 @@ const VARIANT_CATALOG: Record = { 'claude-sonnet-4.6': { id: 'claude-sonnet-4.6', defaultUid: 'claude-sonnet-4-6-thinking', + textOnly: true, variants: { 'thinking': { modelUid: 'claude-sonnet-4-6-thinking', description: 'Claude Sonnet 4.6 Thinking' }, 'base': { modelUid: 'claude-sonnet-4-6', description: 'Claude Sonnet 4.6' }, @@ -420,6 +430,7 @@ const VARIANT_CATALOG: Record = { 'claude-3.7-sonnet': { id: 'claude-3.7-sonnet', defaultEnum: ModelEnum.CLAUDE_3_7_SONNET_20250219, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_3_7_SONNET_20250219_THINKING, description: 'Thinking mode' }, }, @@ -428,6 +439,7 @@ const VARIANT_CATALOG: Record = { 'claude-4-opus': { id: 'claude-4-opus', defaultEnum: ModelEnum.CLAUDE_4_OPUS, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_4_OPUS_THINKING, description: 'Thinking mode' }, }, @@ -435,6 +447,7 @@ const VARIANT_CATALOG: Record = { 'claude-4-sonnet': { id: 'claude-4-sonnet', defaultEnum: ModelEnum.CLAUDE_4_SONNET, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_4_SONNET_THINKING, description: 'Thinking mode' }, }, @@ -442,6 +455,7 @@ const VARIANT_CATALOG: Record = { 'claude-4.1-opus': { id: 'claude-4.1-opus', defaultEnum: ModelEnum.CLAUDE_4_1_OPUS, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_4_1_OPUS_THINKING, description: 'Thinking mode' }, }, @@ -450,6 +464,7 @@ const VARIANT_CATALOG: Record = { 'claude-4.5-sonnet': { id: 'claude-4.5-sonnet', defaultEnum: ModelEnum.CLAUDE_4_5_SONNET, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_4_5_SONNET_THINKING, description: 'Thinking mode' }, }, @@ -458,6 +473,7 @@ const VARIANT_CATALOG: Record = { 'claude-4.5-opus': { id: 'claude-4.5-opus', defaultEnum: ModelEnum.CLAUDE_4_5_OPUS, + textOnly: true, variants: { thinking: { enumValue: ModelEnum.CLAUDE_4_5_OPUS_THINKING, description: 'Thinking mode' }, }, @@ -1045,6 +1061,8 @@ export interface ResolvedModel { variant?: string; /** Legacy proto-enum value. Undefined for Cognition-era string-UID models. */ enumValue?: ModelEnumValue; + /** True when the cloud API rejects tool-bearing requests for this model. */ + textOnly?: boolean; } export function resolveModel(modelName: string, variantOverride?: string): ResolvedModel { @@ -1061,9 +1079,10 @@ export function resolveModel(modelName: string, variantOverride?: string): Resol modelUid: uidForVariant(v) ?? uidForEntry(entry), enumValue: v.enumValue, variant: effectiveVariant, + textOnly: entry.textOnly, }; } - return { modelId: entry.id, modelUid: uidForEntry(entry), enumValue: entry.defaultEnum }; + return { modelId: entry.id, modelUid: uidForEntry(entry), enumValue: entry.defaultEnum, textOnly: entry.textOnly }; } // Fallback to legacy alias table (proto-enum-only models). diff --git a/tests/unit/variant.test.ts b/tests/unit/variant.test.ts index ff1d977..6fbfcb8 100644 --- a/tests/unit/variant.test.ts +++ b/tests/unit/variant.test.ts @@ -46,6 +46,12 @@ describe('resolveModel variants', () => { expect(resolveModel('gpt-5.2:low').modelUid).toBe('MODEL_GPT_5_2_LOW'); expect(resolveModel('gpt-5.2:high-priority').modelUid).toBe('MODEL_GPT_5_2_HIGH_PRIORITY'); }); + + test('marks Claude cloud models as text-only', () => { + expect(resolveModel('claude-opus-4.7').textOnly).toBe(true); + expect(resolveModel('claude-sonnet-4.6').textOnly).toBe(true); + expect(resolveModel('swe-1.6').textOnly).toBeUndefined(); + }); }); describe('getModelVariants', () => {