From 1d26dbb322894872caf4655010ea292f17e06796 Mon Sep 17 00:00:00 2001 From: Alem Tuzlak Date: Mon, 6 Apr 2026 17:16:38 +0200 Subject: [PATCH 1/9] feat: add reasoning support to OpenAI Chat Completions Adds optional reasoning_content field to non-streaming ChatCompletion messages and SSE streaming deltas when a fixture includes reasoning. --- src/__tests__/reasoning-all-providers.test.ts | 174 ++++++++++++++++++ src/helpers.ts | 34 +++- src/server.ts | 4 +- src/types.ts | 2 + 4 files changed, 209 insertions(+), 5 deletions(-) create mode 100644 src/__tests__/reasoning-all-providers.test.ts diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts new file mode 100644 index 0000000..750d89b --- /dev/null +++ b/src/__tests__/reasoning-all-providers.test.ts @@ -0,0 +1,174 @@ +import { describe, it, expect, afterEach } from "vitest"; +import * as http from "node:http"; +import type { Fixture } from "../types.js"; +import { createServer, type ServerInstance } from "../server.js"; + +// --- helpers --- + +function post( + url: string, + body: unknown, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks).toString(), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +interface SSEEvent { + type?: string; + choices?: { + delta: { content?: string; reasoning_content?: string; role?: string }; + finish_reason: string | null; + }[]; + [key: string]: unknown; +} + +function parseSSEEvents(body: string): SSEEvent[] { + const events: SSEEvent[] = []; + for (const line of body.split("\n")) { + if (line.startsWith("data: ") && line.slice(6).trim() !== "[DONE]") { + events.push(JSON.parse(line.slice(6)) as SSEEvent); + } + } + return events; +} + +// --- fixtures --- + +const reasoningFixture: Fixture = { + match: { userMessage: "think" }, + response: { + content: "The answer is 42.", + reasoning: "Let me think step by step about this problem.", + }, +}; + +const plainFixture: Fixture = { + match: { userMessage: "plain" }, + response: { content: "Just plain text." }, +}; + +const allFixtures: Fixture[] = [reasoningFixture, plainFixture]; + +// --- tests --- + +let instance: ServerInstance | null = null; + +afterEach(async () => { + if (instance) { + await new Promise((resolve) => { + instance!.server.close(() => resolve()); + }); + instance = null; + } +}); + +// ─── OpenAI Chat Completions: Reasoning ───────────────────────────────────── + +describe("POST /v1/chat/completions (reasoning non-streaming)", () => { + it("includes reasoning_content field on assistant message", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "think" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.choices[0].message.content).toBe("The answer is 42."); + expect(body.choices[0].message.reasoning_content).toBe( + "Let me think step by step about this problem.", + ); + }); + + it("omits reasoning_content when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "plain" }], + stream: false, + }); + + const body = JSON.parse(res.body); + expect(body.choices[0].message.content).toBe("Just plain text."); + expect(body.choices[0].message.reasoning_content).toBeUndefined(); + }); +}); + +describe("POST /v1/chat/completions (reasoning streaming)", () => { + it("emits reasoning_content deltas before content deltas", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "think" }], + stream: true, + }); + + expect(res.status).toBe(200); + const events = parseSSEEvents(res.body); + + const reasoningDeltas = events + .filter((e) => e.choices?.[0]?.delta?.reasoning_content !== undefined) + .map((e) => e.choices![0].delta.reasoning_content); + expect(reasoningDeltas.join("")).toBe("Let me think step by step about this problem."); + + const contentDeltas = events + .filter( + (e) => e.choices?.[0]?.delta?.content !== undefined && e.choices[0].delta.content !== "", + ) + .map((e) => e.choices![0].delta.content); + expect(contentDeltas.join("")).toBe("The answer is 42."); + + const lastReasoningIdx = events.reduce( + (acc, e, idx) => (e.choices?.[0]?.delta?.reasoning_content !== undefined ? idx : acc), + -1, + ); + const firstContentIdx = events.findIndex( + (e) => e.choices?.[0]?.delta?.content !== undefined && e.choices[0].delta.content !== "", + ); + expect(lastReasoningIdx).toBeLessThan(firstContentIdx); + }); + + it("no reasoning_content deltas when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1/chat/completions`, { + model: "gpt-4", + messages: [{ role: "user", content: "plain" }], + stream: true, + }); + + const events = parseSSEEvents(res.body); + const reasoningDeltas = events.filter( + (e) => e.choices?.[0]?.delta?.reasoning_content !== undefined, + ); + expect(reasoningDeltas).toHaveLength(0); + }); +}); diff --git a/src/helpers.ts b/src/helpers.ts index 3d25272..a94e200 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -62,7 +62,12 @@ export function isEmbeddingResponse(r: FixtureResponse): r is EmbeddingResponse return "embedding" in r && Array.isArray((r as EmbeddingResponse).embedding); } -export function buildTextChunks(content: string, model: string, chunkSize: number): SSEChunk[] { +export function buildTextChunks( + content: string, + model: string, + chunkSize: number, + reasoning?: string, +): SSEChunk[] { const id = generateId(); const created = Math.floor(Date.now() / 1000); const chunks: SSEChunk[] = []; @@ -76,6 +81,20 @@ export function buildTextChunks(content: string, model: string, chunkSize: numbe choices: [{ index: 0, delta: { role: "assistant", content: "" }, finish_reason: null }], }); + // Reasoning chunks (emitted before content chunks) + if (reasoning !== undefined) { + for (let i = 0; i < reasoning.length; i += chunkSize) { + const slice = reasoning.slice(i, i + chunkSize); + chunks.push({ + id, + object: "chat.completion.chunk", + created, + model, + choices: [{ index: 0, delta: { reasoning_content: slice }, finish_reason: null }], + }); + } + } + // Content chunks for (let i = 0; i < content.length; i += chunkSize) { const slice = content.slice(i, i + chunkSize); @@ -183,7 +202,11 @@ export function buildToolCallChunks( // Non-streaming response builders -export function buildTextCompletion(content: string, model: string): ChatCompletion { +export function buildTextCompletion( + content: string, + model: string, + reasoning?: string, +): ChatCompletion { return { id: generateId(), object: "chat.completion", @@ -192,7 +215,12 @@ export function buildTextCompletion(content: string, model: string): ChatComplet choices: [ { index: 0, - message: { role: "assistant", content, refusal: null }, + message: { + role: "assistant", + content, + refusal: null, + ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}), + }, finish_reason: "stop", }, ], diff --git a/src/server.ts b/src/server.ts index 5e5f08c..0f7cc0e 100644 --- a/src/server.ts +++ b/src/server.ts @@ -454,11 +454,11 @@ async function handleCompletions( response: { status: 200, fixture }, }); if (body.stream !== true) { - const completion = buildTextCompletion(response.content, body.model); + const completion = buildTextCompletion(response.content, body.model, response.reasoning); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(completion)); } else { - const chunks = buildTextChunks(response.content, body.model, chunkSize); + const chunks = buildTextChunks(response.content, body.model, chunkSize, response.reasoning); const interruption = createInterruptionSignal(fixture); const completed = await writeSSEStream(res, chunks, { latency, diff --git a/src/types.ts b/src/types.ts index 5f76d7d..5568bd2 100644 --- a/src/types.ts +++ b/src/types.ts @@ -200,6 +200,7 @@ export interface SSEChoice { export interface SSEDelta { role?: string; content?: string | null; + reasoning_content?: string; tool_calls?: SSEToolCallDelta[]; } @@ -231,6 +232,7 @@ export interface ChatCompletionMessage { role: "assistant"; content: string | null; refusal: string | null; + reasoning_content?: string; tool_calls?: ToolCallMessage[]; } From 27ac1143c9815c995e26756717cf6cacf01622cc Mon Sep 17 00:00:00 2001 From: Alem Tuzlak Date: Mon, 6 Apr 2026 17:22:28 +0200 Subject: [PATCH 2/9] feat: add reasoning support to Gemini --- src/__tests__/reasoning-all-providers.test.ts | 93 +++++++++++++++++++ src/gemini.ts | 40 ++++++-- 2 files changed, 126 insertions(+), 7 deletions(-) diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts index 750d89b..632801d 100644 --- a/src/__tests__/reasoning-all-providers.test.ts +++ b/src/__tests__/reasoning-all-providers.test.ts @@ -172,3 +172,96 @@ describe("POST /v1/chat/completions (reasoning streaming)", () => { expect(reasoningDeltas).toHaveLength(0); }); }); + +// ─── Gemini: Reasoning ────────────────────────────────────────────────────── + +function parseGeminiSSEChunks(body: string): unknown[] { + const chunks: unknown[] = []; + for (const line of body.split("\n")) { + if (line.startsWith("data: ")) { + chunks.push(JSON.parse(line.slice(6))); + } + } + return chunks; +} + +describe("POST /v1beta/models/{model}:generateContent (reasoning non-streaming)", () => { + it("includes thought part before text part", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, { + contents: [{ role: "user", parts: [{ text: "think" }] }], + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + const parts = body.candidates[0].content.parts; + expect(parts).toHaveLength(2); + expect(parts[0].thought).toBe(true); + expect(parts[0].text).toBe("Let me think step by step about this problem."); + expect(parts[1].text).toBe("The answer is 42."); + expect(parts[1].thought).toBeUndefined(); + }); + + it("no thought part when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, { + contents: [{ role: "user", parts: [{ text: "plain" }] }], + }); + + const body = JSON.parse(res.body); + const parts = body.candidates[0].content.parts; + expect(parts).toHaveLength(1); + expect(parts[0].text).toBe("Just plain text."); + expect(parts[0].thought).toBeUndefined(); + }); +}); + +describe("POST /v1beta/models/{model}:streamGenerateContent (reasoning streaming)", () => { + it("streams thought chunks before text chunks", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, { + contents: [{ role: "user", parts: [{ text: "think" }] }], + }); + + expect(res.status).toBe(200); + const chunks = parseGeminiSSEChunks(res.body) as { + candidates: { + content: { role: string; parts: { text?: string; thought?: boolean }[] }; + finishReason?: string; + }[]; + }[]; + + const thoughtChunks = chunks.filter((c) => c.candidates[0].content.parts[0].thought === true); + const textChunks = chunks.filter((c) => c.candidates[0].content.parts[0].thought === undefined); + + expect(thoughtChunks.length).toBeGreaterThan(0); + expect(textChunks.length).toBeGreaterThan(0); + + const fullThought = thoughtChunks + .map((c) => c.candidates[0].content.parts[0].text ?? "") + .join(""); + expect(fullThought).toBe("Let me think step by step about this problem."); + + const fullText = textChunks.map((c) => c.candidates[0].content.parts[0].text ?? "").join(""); + expect(fullText).toBe("The answer is 42."); + + const lastChunk = chunks[chunks.length - 1]; + expect(lastChunk.candidates[0].finishReason).toBe("STOP"); + }); + + it("no thought chunks when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, { + contents: [{ role: "user", parts: [{ text: "plain" }] }], + }); + + const chunks = parseGeminiSSEChunks(res.body) as { + candidates: { + content: { parts: { text?: string; thought?: boolean }[] }; + }[]; + }[]; + + const thoughtChunks = chunks.filter((c) => c.candidates[0].content.parts[0].thought === true); + expect(thoughtChunks).toHaveLength(0); + }); +}); diff --git a/src/gemini.ts b/src/gemini.ts index 4229839..e99b9aa 100644 --- a/src/gemini.ts +++ b/src/gemini.ts @@ -36,6 +36,7 @@ import { proxyAndRecord } from "./recorder.js"; interface GeminiPart { text?: string; + thought?: boolean; functionCall?: { name: string; args: Record; id?: string }; functionResponse?: { name: string; response: unknown }; } @@ -187,10 +188,29 @@ interface GeminiResponseChunk { }; } -function buildGeminiTextStreamChunks(content: string, chunkSize: number): GeminiResponseChunk[] { +function buildGeminiTextStreamChunks( + content: string, + chunkSize: number, + reasoning?: string, +): GeminiResponseChunk[] { const chunks: GeminiResponseChunk[] = []; - // Content chunks + // Reasoning chunks (thought: true) + if (reasoning) { + for (let i = 0; i < reasoning.length; i += chunkSize) { + const slice = reasoning.slice(i, i + chunkSize); + chunks.push({ + candidates: [ + { + content: { role: "model", parts: [{ text: slice, thought: true }] }, + index: 0, + }, + ], + }); + } + } + + // Content chunks (original logic unchanged) for (let i = 0; i < content.length; i += chunkSize) { const slice = content.slice(i, i + chunkSize); const isLast = i + chunkSize >= content.length; @@ -215,7 +235,7 @@ function buildGeminiTextStreamChunks(content: string, chunkSize: number): Gemini chunks.push(chunk); } - // Handle empty content + // Handle empty content (original logic unchanged) if (content.length === 0) { chunks.push({ candidates: [ @@ -276,11 +296,17 @@ function buildGeminiToolCallStreamChunks( // Non-streaming response builders -function buildGeminiTextResponse(content: string): GeminiResponseChunk { +function buildGeminiTextResponse(content: string, reasoning?: string): GeminiResponseChunk { + const parts: GeminiPart[] = []; + if (reasoning) { + parts.push({ text: reasoning, thought: true }); + } + parts.push({ text: content }); + return { candidates: [ { - content: { role: "model", parts: [{ text: content }] }, + content: { role: "model", parts }, finishReason: "STOP", index: 0, }, @@ -528,11 +554,11 @@ export async function handleGemini( response: { status: 200, fixture }, }); if (!streaming) { - const body = buildGeminiTextResponse(response.content); + const body = buildGeminiTextResponse(response.content, response.reasoning); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(body)); } else { - const chunks = buildGeminiTextStreamChunks(response.content, chunkSize); + const chunks = buildGeminiTextStreamChunks(response.content, chunkSize, response.reasoning); const interruption = createInterruptionSignal(fixture); const completed = await writeGeminiSSEStream(res, chunks, { latency, From 990ad40aeba4cc27b0018f020ebb35626153be84 Mon Sep 17 00:00:00 2001 From: Alem Tuzlak Date: Mon, 6 Apr 2026 17:24:54 +0200 Subject: [PATCH 3/9] feat: add reasoning support to Bedrock InvokeModel --- src/__tests__/reasoning-all-providers.test.ts | 34 ++++++++++++ src/bedrock.ts | 53 ++++++++++++++++--- 2 files changed, 80 insertions(+), 7 deletions(-) diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts index 632801d..319f0f4 100644 --- a/src/__tests__/reasoning-all-providers.test.ts +++ b/src/__tests__/reasoning-all-providers.test.ts @@ -265,3 +265,37 @@ describe("POST /v1beta/models/{model}:streamGenerateContent (reasoning streaming expect(thoughtChunks).toHaveLength(0); }); }); + +// ─── Bedrock InvokeModel: Reasoning ───────────────────────────────────────── + +describe("POST /model/{id}/invoke (reasoning non-streaming)", () => { + it("includes thinking content block before text block", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, { + messages: [{ role: "user", content: [{ type: "text", text: "think" }] }], + max_tokens: 1024, + anthropic_version: "bedrock-2023-05-31", + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.content).toHaveLength(2); + expect(body.content[0].type).toBe("thinking"); + expect(body.content[0].thinking).toBe("Let me think step by step about this problem."); + expect(body.content[1].type).toBe("text"); + expect(body.content[1].text).toBe("The answer is 42."); + }); + + it("no thinking block when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, { + messages: [{ role: "user", content: [{ type: "text", text: "plain" }] }], + max_tokens: 1024, + anthropic_version: "bedrock-2023-05-31", + }); + + const body = JSON.parse(res.body); + expect(body.content).toHaveLength(1); + expect(body.content[0].type).toBe("text"); + }); +}); diff --git a/src/bedrock.ts b/src/bedrock.ts index d45f64e..8fc6c54 100644 --- a/src/bedrock.ts +++ b/src/bedrock.ts @@ -198,12 +198,18 @@ export function bedrockToCompletionRequest( // ─── Response builders ────────────────────────────────────────────────────── -function buildBedrockTextResponse(content: string, model: string): object { +function buildBedrockTextResponse(content: string, model: string, reasoning?: string): object { + const contentBlocks: object[] = []; + if (reasoning) { + contentBlocks.push({ type: "thinking", thinking: reasoning }); + } + contentBlocks.push({ type: "text", text: content }); + return { id: generateMessageId(), type: "message", role: "assistant", - content: [{ type: "text", text: content }], + content: contentBlocks, model, stop_reason: "end_turn", stop_sequence: null, @@ -417,7 +423,11 @@ export async function handleBedrock( body: completionReq, response: { status: 200, fixture }, }); - const body = buildBedrockTextResponse(response.content, completionReq.model); + const body = buildBedrockTextResponse( + response.content, + completionReq.model, + response.reasoning, + ); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(body)); return; @@ -463,6 +473,7 @@ export async function handleBedrock( export function buildBedrockStreamTextEvents( content: string, chunkSize: number, + reasoning?: string, ): Array<{ eventType: string; payload: object }> { const events: Array<{ eventType: string; payload: object }> = []; @@ -471,9 +482,37 @@ export function buildBedrockStreamTextEvents( payload: { role: "assistant" }, }); + // Thinking block (emitted before text when reasoning is present) + if (reasoning) { + const blockIndex = 0; + events.push({ + eventType: "contentBlockStart", + payload: { contentBlockIndex: blockIndex, start: { type: "thinking" } }, + }); + + for (let i = 0; i < reasoning.length; i += chunkSize) { + const slice = reasoning.slice(i, i + chunkSize); + events.push({ + eventType: "contentBlockDelta", + payload: { + contentBlockIndex: blockIndex, + delta: { type: "thinking_delta", thinking: slice }, + }, + }); + } + + events.push({ + eventType: "contentBlockStop", + payload: { contentBlockIndex: blockIndex }, + }); + } + + // Text block + const textBlockIndex = reasoning ? 1 : 0; + events.push({ eventType: "contentBlockStart", - payload: { contentBlockIndex: 0, start: {} }, + payload: { contentBlockIndex: textBlockIndex, start: {} }, }); for (let i = 0; i < content.length; i += chunkSize) { @@ -481,7 +520,7 @@ export function buildBedrockStreamTextEvents( events.push({ eventType: "contentBlockDelta", payload: { - contentBlockIndex: 0, + contentBlockIndex: textBlockIndex, delta: { type: "text_delta", text: slice }, }, }); @@ -489,7 +528,7 @@ export function buildBedrockStreamTextEvents( events.push({ eventType: "contentBlockStop", - payload: { contentBlockIndex: 0 }, + payload: { contentBlockIndex: textBlockIndex }, }); events.push({ @@ -728,7 +767,7 @@ export async function handleBedrockStream( body: completionReq, response: { status: 200, fixture }, }); - const events = buildBedrockStreamTextEvents(response.content, chunkSize); + const events = buildBedrockStreamTextEvents(response.content, chunkSize, response.reasoning); const interruption = createInterruptionSignal(fixture); const completed = await writeEventStream(res, events, { latency, From 4eb930098d7ef0ce506b0cdb03c22da5339db5a2 Mon Sep 17 00:00:00 2001 From: Alem Tuzlak Date: Mon, 6 Apr 2026 17:28:55 +0200 Subject: [PATCH 4/9] feat: add reasoning support to Bedrock Converse --- src/__tests__/reasoning-all-providers.test.ts | 39 +++++++++++++++++++ src/bedrock-converse.ts | 16 ++++++-- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts index 319f0f4..f20b1e8 100644 --- a/src/__tests__/reasoning-all-providers.test.ts +++ b/src/__tests__/reasoning-all-providers.test.ts @@ -299,3 +299,42 @@ describe("POST /model/{id}/invoke (reasoning non-streaming)", () => { expect(body.content[0].type).toBe("text"); }); }); + +// ─── Bedrock Converse: Reasoning ──────────────────────────────────────────── + +describe("POST /model/{id}/converse (reasoning non-streaming)", () => { + it("includes reasoningContent block before text block", async () => { + instance = await createServer(allFixtures); + const res = await post( + `${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`, + { + messages: [{ role: "user", content: [{ text: "think" }] }], + }, + ); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + const content = body.output.message.content; + expect(content).toHaveLength(2); + expect(content[0].reasoningContent).toBeDefined(); + expect(content[0].reasoningContent.reasoningText.text).toBe( + "Let me think step by step about this problem.", + ); + expect(content[1].text).toBe("The answer is 42."); + }); + + it("no reasoningContent block when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post( + `${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`, + { + messages: [{ role: "user", content: [{ text: "plain" }] }], + }, + ); + + const body = JSON.parse(res.body); + const content = body.output.message.content; + expect(content).toHaveLength(1); + expect(content[0].text).toBe("Just plain text."); + }); +}); diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts index 933e0af..69de85c 100644 --- a/src/bedrock-converse.ts +++ b/src/bedrock-converse.ts @@ -156,12 +156,20 @@ export function converseToCompletionRequest( // ─── Response builders ────────────────────────────────────────────────────── -function buildConverseTextResponse(content: string): object { +function buildConverseTextResponse(content: string, reasoning?: string): object { + const contentBlocks: object[] = []; + if (reasoning) { + contentBlocks.push({ + reasoningContent: { reasoningText: { text: reasoning } }, + }); + } + contentBlocks.push({ text: content }); + return { output: { message: { role: "assistant", - content: [{ text: content }], + content: contentBlocks, }, }, stopReason: "end_turn", @@ -363,7 +371,7 @@ export async function handleConverse( body: completionReq, response: { status: 200, fixture }, }); - const body = buildConverseTextResponse(response.content); + const body = buildConverseTextResponse(response.content, response.reasoning); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(body)); return; @@ -568,7 +576,7 @@ export async function handleConverseStream( body: completionReq, response: { status: 200, fixture }, }); - const events = buildBedrockStreamTextEvents(response.content, chunkSize); + const events = buildBedrockStreamTextEvents(response.content, chunkSize, response.reasoning); const interruption = createInterruptionSignal(fixture); const completed = await writeEventStream(res, events, { latency, From c0a1b0bcdfb3693974b0d83b10c5a2bb2beeb01c Mon Sep 17 00:00:00 2001 From: Alem Tuzlak Date: Mon, 6 Apr 2026 17:32:51 +0200 Subject: [PATCH 5/9] feat: add reasoning support to Ollama --- src/__tests__/reasoning-all-providers.test.ts | 166 ++++++++++++++++++ src/ollama.ts | 69 +++++++- 2 files changed, 228 insertions(+), 7 deletions(-) diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts index f20b1e8..e321886 100644 --- a/src/__tests__/reasoning-all-providers.test.ts +++ b/src/__tests__/reasoning-all-providers.test.ts @@ -338,3 +338,169 @@ describe("POST /model/{id}/converse (reasoning non-streaming)", () => { expect(content[0].text).toBe("Just plain text."); }); }); + +// ─── Ollama /api/chat: Reasoning ──────────────────────────────────────────── + +function parseNDJSON(body: string): object[] { + return body + .split("\n") + .filter((line) => line.trim() !== "") + .map((line) => JSON.parse(line) as object); +} + +describe("POST /api/chat (reasoning non-streaming)", () => { + it("includes reasoning_content on assistant message", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "deepseek-r1", + messages: [{ role: "user", content: "think" }], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.message.content).toBe("The answer is 42."); + expect(body.message.reasoning_content).toBe("Let me think step by step about this problem."); + }); + + it("omits reasoning_content when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "deepseek-r1", + messages: [{ role: "user", content: "plain" }], + stream: false, + }); + + const body = JSON.parse(res.body); + expect(body.message.content).toBe("Just plain text."); + expect(body.message.reasoning_content).toBeUndefined(); + }); +}); + +describe("POST /api/chat (reasoning streaming)", () => { + it("streams reasoning_content chunks before content chunks", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "deepseek-r1", + messages: [{ role: "user", content: "think" }], + stream: true, + }); + + expect(res.status).toBe(200); + const chunks = parseNDJSON(res.body) as { + message: { role: string; content: string; reasoning_content?: string }; + done: boolean; + }[]; + + const reasoningChunks = chunks.filter( + (c) => !c.done && c.message.reasoning_content !== undefined, + ); + expect(reasoningChunks.length).toBeGreaterThan(0); + const fullReasoning = reasoningChunks.map((c) => c.message.reasoning_content).join(""); + expect(fullReasoning).toBe("Let me think step by step about this problem."); + + const contentChunks = chunks.filter( + (c) => !c.done && c.message.content !== "" && c.message.reasoning_content === undefined, + ); + expect(contentChunks.length).toBeGreaterThan(0); + const fullContent = contentChunks.map((c) => c.message.content).join(""); + expect(fullContent).toBe("The answer is 42."); + }); + + it("no reasoning_content chunks when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/chat`, { + model: "deepseek-r1", + messages: [{ role: "user", content: "plain" }], + stream: true, + }); + + const chunks = parseNDJSON(res.body) as { + message: { reasoning_content?: string }; + done: boolean; + }[]; + + const reasoningChunks = chunks.filter( + (c) => !c.done && c.message.reasoning_content !== undefined, + ); + expect(reasoningChunks).toHaveLength(0); + }); +}); + +// ─── Ollama /api/generate: Reasoning ──────────────────────────────────────── + +describe("POST /api/generate (reasoning non-streaming)", () => { + it("includes reasoning_content field", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/generate`, { + model: "deepseek-r1", + prompt: "think", + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.response).toBe("The answer is 42."); + expect(body.reasoning_content).toBe("Let me think step by step about this problem."); + }); + + it("omits reasoning_content when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/generate`, { + model: "deepseek-r1", + prompt: "plain", + stream: false, + }); + + const body = JSON.parse(res.body); + expect(body.response).toBe("Just plain text."); + expect(body.reasoning_content).toBeUndefined(); + }); +}); + +describe("POST /api/generate (reasoning streaming)", () => { + it("streams reasoning_content chunks before response chunks", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/generate`, { + model: "deepseek-r1", + prompt: "think", + stream: true, + }); + + expect(res.status).toBe(200); + const chunks = parseNDJSON(res.body) as { + response: string; + reasoning_content?: string; + done: boolean; + }[]; + + const reasoningChunks = chunks.filter((c) => !c.done && c.reasoning_content !== undefined); + expect(reasoningChunks.length).toBeGreaterThan(0); + const fullReasoning = reasoningChunks.map((c) => c.reasoning_content).join(""); + expect(fullReasoning).toBe("Let me think step by step about this problem."); + + const contentChunks = chunks.filter( + (c) => !c.done && c.response !== "" && c.reasoning_content === undefined, + ); + expect(contentChunks.length).toBeGreaterThan(0); + const fullContent = contentChunks.map((c) => c.response).join(""); + expect(fullContent).toBe("The answer is 42."); + }); + + it("no reasoning_content chunks when reasoning is absent", async () => { + instance = await createServer(allFixtures); + const res = await post(`${instance.url}/api/generate`, { + model: "deepseek-r1", + prompt: "plain", + stream: true, + }); + + const chunks = parseNDJSON(res.body) as { + reasoning_content?: string; + done: boolean; + }[]; + + const reasoningChunks = chunks.filter((c) => !c.done && c.reasoning_content !== undefined); + expect(reasoningChunks).toHaveLength(0); + }); +}); diff --git a/src/ollama.ts b/src/ollama.ts index 20ed12f..0adac53 100644 --- a/src/ollama.ts +++ b/src/ollama.ts @@ -121,9 +121,26 @@ function ollamaGenerateToCompletionRequest(req: OllamaGenerateRequest): ChatComp // ─── Response builders: /api/chat ──────────────────────────────────────────── -function buildOllamaChatTextChunks(content: string, model: string, chunkSize: number): object[] { +function buildOllamaChatTextChunks( + content: string, + model: string, + chunkSize: number, + reasoning?: string, +): object[] { const chunks: object[] = []; + // Reasoning chunks (before content) + if (reasoning) { + for (let i = 0; i < reasoning.length; i += chunkSize) { + const slice = reasoning.slice(i, i + chunkSize); + chunks.push({ + model, + message: { role: "assistant", content: "", reasoning_content: slice }, + done: false, + }); + } + } + for (let i = 0; i < content.length; i += chunkSize) { const slice = content.slice(i, i + chunkSize); chunks.push({ @@ -144,10 +161,14 @@ function buildOllamaChatTextChunks(content: string, model: string, chunkSize: nu return chunks; } -function buildOllamaChatTextResponse(content: string, model: string): object { +function buildOllamaChatTextResponse(content: string, model: string, reasoning?: string): object { return { model, - message: { role: "assistant", content }, + message: { + role: "assistant", + content, + ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}), + }, done: true, ...DURATION_FIELDS, }; @@ -240,10 +261,25 @@ function buildOllamaGenerateTextChunks( content: string, model: string, chunkSize: number, + reasoning?: string, ): object[] { const chunks: object[] = []; const createdAt = new Date().toISOString(); + // Reasoning chunks (before content) + if (reasoning) { + for (let i = 0; i < reasoning.length; i += chunkSize) { + const slice = reasoning.slice(i, i + chunkSize); + chunks.push({ + model, + created_at: createdAt, + response: "", + reasoning_content: slice, + done: false, + }); + } + } + for (let i = 0; i < content.length; i += chunkSize) { const slice = content.slice(i, i + chunkSize); chunks.push({ @@ -267,11 +303,16 @@ function buildOllamaGenerateTextChunks( return chunks; } -function buildOllamaGenerateTextResponse(content: string, model: string): object { +function buildOllamaGenerateTextResponse( + content: string, + model: string, + reasoning?: string, +): object { return { model, created_at: new Date().toISOString(), response: content, + ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}), done: true, ...DURATION_FIELDS, context: [], @@ -448,11 +489,20 @@ export async function handleOllama( response: { status: 200, fixture }, }); if (!streaming) { - const body = buildOllamaChatTextResponse(response.content, completionReq.model); + const body = buildOllamaChatTextResponse( + response.content, + completionReq.model, + response.reasoning, + ); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(body)); } else { - const chunks = buildOllamaChatTextChunks(response.content, completionReq.model, chunkSize); + const chunks = buildOllamaChatTextChunks( + response.content, + completionReq.model, + chunkSize, + response.reasoning, + ); const interruption = createInterruptionSignal(fixture); const completed = await writeNDJSONStream(res, chunks, { latency, @@ -691,7 +741,11 @@ export async function handleOllamaGenerate( response: { status: 200, fixture }, }); if (!streaming) { - const body = buildOllamaGenerateTextResponse(response.content, completionReq.model); + const body = buildOllamaGenerateTextResponse( + response.content, + completionReq.model, + response.reasoning, + ); res.writeHead(200, { "Content-Type": "application/json" }); res.end(JSON.stringify(body)); } else { @@ -699,6 +753,7 @@ export async function handleOllamaGenerate( response.content, completionReq.model, chunkSize, + response.reasoning, ); const interruption = createInterruptionSignal(fixture); const completed = await writeNDJSONStream(res, chunks, { From e900e5edbfe777a86ed131fdf9f1c16cc5dc8f5e Mon Sep 17 00:00:00 2001 From: Alem Tuzlak Date: Mon, 6 Apr 2026 17:40:27 +0200 Subject: [PATCH 6/9] test: add unit tests for Bedrock streaming reasoning events --- src/__tests__/reasoning-all-providers.test.ts | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts index e321886..cc8da04 100644 --- a/src/__tests__/reasoning-all-providers.test.ts +++ b/src/__tests__/reasoning-all-providers.test.ts @@ -2,6 +2,7 @@ import { describe, it, expect, afterEach } from "vitest"; import * as http from "node:http"; import type { Fixture } from "../types.js"; import { createServer, type ServerInstance } from "../server.js"; +import { buildBedrockStreamTextEvents } from "../bedrock.js"; // --- helpers --- @@ -504,3 +505,69 @@ describe("POST /api/generate (reasoning streaming)", () => { expect(reasoningChunks).toHaveLength(0); }); }); + +// ─── Bedrock streaming reasoning: unit test ───────────────────────────────── + +describe("buildBedrockStreamTextEvents (reasoning)", () => { + it("emits thinking block events before text block events", () => { + const events = buildBedrockStreamTextEvents("The answer.", 100, "Step by step."); + const types = events.map((e) => e.eventType); + + // messageStart → thinking block → text block → messageStop + expect(types[0]).toBe("messageStart"); + + // Thinking block at index 0 + expect(events[1]).toEqual({ + eventType: "contentBlockStart", + payload: { contentBlockIndex: 0, start: { type: "thinking" } }, + }); + expect(events[2]).toEqual({ + eventType: "contentBlockDelta", + payload: { + contentBlockIndex: 0, + delta: { type: "thinking_delta", thinking: "Step by step." }, + }, + }); + expect(events[3]).toEqual({ + eventType: "contentBlockStop", + payload: { contentBlockIndex: 0 }, + }); + + // Text block at index 1 + expect(events[4]).toEqual({ + eventType: "contentBlockStart", + payload: { contentBlockIndex: 1, start: {} }, + }); + expect(events[5]).toEqual({ + eventType: "contentBlockDelta", + payload: { + contentBlockIndex: 1, + delta: { type: "text_delta", text: "The answer." }, + }, + }); + expect(events[6]).toEqual({ + eventType: "contentBlockStop", + payload: { contentBlockIndex: 1 }, + }); + + expect(events[7]).toEqual({ + eventType: "messageStop", + payload: { stopReason: "end_turn" }, + }); + }); + + it("no thinking block when reasoning is absent", () => { + const events = buildBedrockStreamTextEvents("Hello.", 100); + const types = events.map((e) => e.eventType); + + // messageStart → text block at index 0 → messageStop + expect(types).toEqual([ + "messageStart", + "contentBlockStart", + "contentBlockDelta", + "contentBlockStop", + "messageStop", + ]); + expect((events[1].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(0); + }); +}); From ed1f973faf358fc73d17e7cb5e32d65f8198563a Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Mon, 6 Apr 2026 09:49:55 -0700 Subject: [PATCH 7/9] fix: standardize reasoning guards to truthy checks Change `if (reasoning !== undefined)` to `if (reasoning)` in helpers.ts and ollama.ts for consistency with the fixture-loader convention where empty string means no reasoning. --- src/helpers.ts | 4 ++-- src/ollama.ts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/helpers.ts b/src/helpers.ts index a94e200..490746f 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -82,7 +82,7 @@ export function buildTextChunks( }); // Reasoning chunks (emitted before content chunks) - if (reasoning !== undefined) { + if (reasoning) { for (let i = 0; i < reasoning.length; i += chunkSize) { const slice = reasoning.slice(i, i + chunkSize); chunks.push({ @@ -219,7 +219,7 @@ export function buildTextCompletion( role: "assistant", content, refusal: null, - ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}), + ...(reasoning ? { reasoning_content: reasoning } : {}), }, finish_reason: "stop", }, diff --git a/src/ollama.ts b/src/ollama.ts index 1b03fdb..ab4b1c7 100644 --- a/src/ollama.ts +++ b/src/ollama.ts @@ -167,7 +167,7 @@ function buildOllamaChatTextResponse(content: string, model: string, reasoning?: message: { role: "assistant", content, - ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}), + ...(reasoning ? { reasoning_content: reasoning } : {}), }, done: true, ...DURATION_FIELDS, @@ -312,7 +312,7 @@ function buildOllamaGenerateTextResponse( model, created_at: new Date().toISOString(), response: content, - ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}), + ...(reasoning ? { reasoning_content: reasoning } : {}), done: true, ...DURATION_FIELDS, context: [], From 38f2e6125959156071e90a1ba30075353154e5b1 Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Mon, 6 Apr 2026 09:51:23 -0700 Subject: [PATCH 8/9] test: move server lifecycle to beforeAll/afterAll in reasoning tests All tests use the same fixtures, so share a single server instance across the entire file instead of creating one per test. Update the post() helper to use a shared baseUrl instead of accepting a full URL. --- src/__tests__/reasoning-all-providers.test.ts | 103 +++++++----------- 1 file changed, 40 insertions(+), 63 deletions(-) diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts index cc8da04..b069a34 100644 --- a/src/__tests__/reasoning-all-providers.test.ts +++ b/src/__tests__/reasoning-all-providers.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect, afterEach } from "vitest"; +import { describe, it, expect, beforeAll, afterAll } from "vitest"; import * as http from "node:http"; import type { Fixture } from "../types.js"; import { createServer, type ServerInstance } from "../server.js"; @@ -6,18 +6,21 @@ import { buildBedrockStreamTextEvents } from "../bedrock.js"; // --- helpers --- +let instance: ServerInstance; +let baseUrl: string; + function post( - url: string, + path: string, body: unknown, ): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> { return new Promise((resolve, reject) => { const data = JSON.stringify(body); - const parsed = new URL(url); + const parsed = new URL(baseUrl); const req = http.request( { hostname: parsed.hostname, port: parsed.port, - path: parsed.pathname, + path, method: "POST", headers: { "Content-Type": "application/json", @@ -78,25 +81,24 @@ const plainFixture: Fixture = { const allFixtures: Fixture[] = [reasoningFixture, plainFixture]; -// --- tests --- +// --- server lifecycle --- -let instance: ServerInstance | null = null; +beforeAll(async () => { + instance = await createServer(allFixtures); + baseUrl = instance.url; +}); -afterEach(async () => { - if (instance) { - await new Promise((resolve) => { - instance!.server.close(() => resolve()); - }); - instance = null; - } +afterAll(async () => { + await new Promise((resolve) => { + instance.server.close(() => resolve()); + }); }); // ─── OpenAI Chat Completions: Reasoning ───────────────────────────────────── describe("POST /v1/chat/completions (reasoning non-streaming)", () => { it("includes reasoning_content field on assistant message", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/v1/chat/completions`, { + const res = await post(`/v1/chat/completions`, { model: "gpt-4", messages: [{ role: "user", content: "think" }], stream: false, @@ -111,8 +113,7 @@ describe("POST /v1/chat/completions (reasoning non-streaming)", () => { }); it("omits reasoning_content when reasoning is absent", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/v1/chat/completions`, { + const res = await post(`/v1/chat/completions`, { model: "gpt-4", messages: [{ role: "user", content: "plain" }], stream: false, @@ -126,8 +127,7 @@ describe("POST /v1/chat/completions (reasoning non-streaming)", () => { describe("POST /v1/chat/completions (reasoning streaming)", () => { it("emits reasoning_content deltas before content deltas", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/v1/chat/completions`, { + const res = await post(`/v1/chat/completions`, { model: "gpt-4", messages: [{ role: "user", content: "think" }], stream: true, @@ -159,8 +159,7 @@ describe("POST /v1/chat/completions (reasoning streaming)", () => { }); it("no reasoning_content deltas when reasoning is absent", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/v1/chat/completions`, { + const res = await post(`/v1/chat/completions`, { model: "gpt-4", messages: [{ role: "user", content: "plain" }], stream: true, @@ -188,8 +187,7 @@ function parseGeminiSSEChunks(body: string): unknown[] { describe("POST /v1beta/models/{model}:generateContent (reasoning non-streaming)", () => { it("includes thought part before text part", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, { + const res = await post(`/v1beta/models/gemini-2.5-flash:generateContent`, { contents: [{ role: "user", parts: [{ text: "think" }] }], }); @@ -204,8 +202,7 @@ describe("POST /v1beta/models/{model}:generateContent (reasoning non-streaming)" }); it("no thought part when reasoning is absent", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, { + const res = await post(`/v1beta/models/gemini-2.5-flash:generateContent`, { contents: [{ role: "user", parts: [{ text: "plain" }] }], }); @@ -219,8 +216,7 @@ describe("POST /v1beta/models/{model}:generateContent (reasoning non-streaming)" describe("POST /v1beta/models/{model}:streamGenerateContent (reasoning streaming)", () => { it("streams thought chunks before text chunks", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, { + const res = await post(`/v1beta/models/gemini-2.5-flash:streamGenerateContent`, { contents: [{ role: "user", parts: [{ text: "think" }] }], }); @@ -251,8 +247,7 @@ describe("POST /v1beta/models/{model}:streamGenerateContent (reasoning streaming }); it("no thought chunks when reasoning is absent", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, { + const res = await post(`/v1beta/models/gemini-2.5-flash:streamGenerateContent`, { contents: [{ role: "user", parts: [{ text: "plain" }] }], }); @@ -271,8 +266,7 @@ describe("POST /v1beta/models/{model}:streamGenerateContent (reasoning streaming describe("POST /model/{id}/invoke (reasoning non-streaming)", () => { it("includes thinking content block before text block", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, { + const res = await post(`/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, { messages: [{ role: "user", content: [{ type: "text", text: "think" }] }], max_tokens: 1024, anthropic_version: "bedrock-2023-05-31", @@ -288,8 +282,7 @@ describe("POST /model/{id}/invoke (reasoning non-streaming)", () => { }); it("no thinking block when reasoning is absent", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, { + const res = await post(`/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, { messages: [{ role: "user", content: [{ type: "text", text: "plain" }] }], max_tokens: 1024, anthropic_version: "bedrock-2023-05-31", @@ -305,13 +298,9 @@ describe("POST /model/{id}/invoke (reasoning non-streaming)", () => { describe("POST /model/{id}/converse (reasoning non-streaming)", () => { it("includes reasoningContent block before text block", async () => { - instance = await createServer(allFixtures); - const res = await post( - `${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`, - { - messages: [{ role: "user", content: [{ text: "think" }] }], - }, - ); + const res = await post(`/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`, { + messages: [{ role: "user", content: [{ text: "think" }] }], + }); expect(res.status).toBe(200); const body = JSON.parse(res.body); @@ -325,13 +314,9 @@ describe("POST /model/{id}/converse (reasoning non-streaming)", () => { }); it("no reasoningContent block when reasoning is absent", async () => { - instance = await createServer(allFixtures); - const res = await post( - `${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`, - { - messages: [{ role: "user", content: [{ text: "plain" }] }], - }, - ); + const res = await post(`/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`, { + messages: [{ role: "user", content: [{ text: "plain" }] }], + }); const body = JSON.parse(res.body); const content = body.output.message.content; @@ -351,8 +336,7 @@ function parseNDJSON(body: string): object[] { describe("POST /api/chat (reasoning non-streaming)", () => { it("includes reasoning_content on assistant message", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/api/chat`, { + const res = await post(`/api/chat`, { model: "deepseek-r1", messages: [{ role: "user", content: "think" }], stream: false, @@ -365,8 +349,7 @@ describe("POST /api/chat (reasoning non-streaming)", () => { }); it("omits reasoning_content when reasoning is absent", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/api/chat`, { + const res = await post(`/api/chat`, { model: "deepseek-r1", messages: [{ role: "user", content: "plain" }], stream: false, @@ -380,8 +363,7 @@ describe("POST /api/chat (reasoning non-streaming)", () => { describe("POST /api/chat (reasoning streaming)", () => { it("streams reasoning_content chunks before content chunks", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/api/chat`, { + const res = await post(`/api/chat`, { model: "deepseek-r1", messages: [{ role: "user", content: "think" }], stream: true, @@ -409,8 +391,7 @@ describe("POST /api/chat (reasoning streaming)", () => { }); it("no reasoning_content chunks when reasoning is absent", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/api/chat`, { + const res = await post(`/api/chat`, { model: "deepseek-r1", messages: [{ role: "user", content: "plain" }], stream: true, @@ -432,8 +413,7 @@ describe("POST /api/chat (reasoning streaming)", () => { describe("POST /api/generate (reasoning non-streaming)", () => { it("includes reasoning_content field", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/api/generate`, { + const res = await post(`/api/generate`, { model: "deepseek-r1", prompt: "think", stream: false, @@ -446,8 +426,7 @@ describe("POST /api/generate (reasoning non-streaming)", () => { }); it("omits reasoning_content when reasoning is absent", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/api/generate`, { + const res = await post(`/api/generate`, { model: "deepseek-r1", prompt: "plain", stream: false, @@ -461,8 +440,7 @@ describe("POST /api/generate (reasoning non-streaming)", () => { describe("POST /api/generate (reasoning streaming)", () => { it("streams reasoning_content chunks before response chunks", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/api/generate`, { + const res = await post(`/api/generate`, { model: "deepseek-r1", prompt: "think", stream: true, @@ -489,8 +467,7 @@ describe("POST /api/generate (reasoning streaming)", () => { }); it("no reasoning_content chunks when reasoning is absent", async () => { - instance = await createServer(allFixtures); - const res = await post(`${instance.url}/api/generate`, { + const res = await post(`/api/generate`, { model: "deepseek-r1", prompt: "plain", stream: true, From a482f2db7e031314705828179e127a9e26e62b7f Mon Sep 17 00:00:00 2001 From: Jordan Ritter Date: Mon, 6 Apr 2026 09:52:54 -0700 Subject: [PATCH 9/9] test: add Bedrock streaming integration tests for reasoning Add integration tests for invoke-with-response-stream and converse-stream endpoints that verify thinking block events appear before text content events when reasoning is present. Includes a binary event stream frame decoder for parsing the AWS Event Stream wire format in tests. --- src/__tests__/reasoning-all-providers.test.ts | 236 ++++++++++++++++++ 1 file changed, 236 insertions(+) diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts index b069a34..42657c2 100644 --- a/src/__tests__/reasoning-all-providers.test.ts +++ b/src/__tests__/reasoning-all-providers.test.ts @@ -1,5 +1,6 @@ import { describe, it, expect, beforeAll, afterAll } from "vitest"; import * as http from "node:http"; +import { crc32 } from "node:zlib"; import type { Fixture } from "../types.js"; import { createServer, type ServerInstance } from "../server.js"; import { buildBedrockStreamTextEvents } from "../bedrock.js"; @@ -45,6 +46,98 @@ function post( }); } +function postRaw( + path: string, + body: unknown, +): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: Buffer }> { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(baseUrl); + const req = http.request( + { + hostname: parsed.hostname, + port: parsed.port, + path, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(data), + }, + }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c: Buffer) => chunks.push(c)); + res.on("end", () => { + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks), + }); + }); + }, + ); + req.on("error", reject); + req.write(data); + req.end(); + }); +} + +/** + * Decode AWS Event Stream binary frames from a Buffer. + * Returns an array of { eventType, payload } objects. + */ +function decodeEventStreamFrames(buf: Buffer): Array<{ eventType: string; payload: object }> { + const frames: Array<{ eventType: string; payload: object }> = []; + let offset = 0; + + while (offset < buf.length) { + if (offset + 12 > buf.length) break; + + const totalLength = buf.readUInt32BE(offset); + const headersLength = buf.readUInt32BE(offset + 4); + const preludeCrc = buf.readUInt32BE(offset + 8); + + // Verify prelude CRC + const computedPreludeCrc = crc32(buf.subarray(offset, offset + 8)); + if (computedPreludeCrc >>> 0 !== preludeCrc) { + throw new Error("Prelude CRC mismatch"); + } + + // Parse headers + const headersStart = offset + 12; + const headersEnd = headersStart + headersLength; + const headers: Record = {}; + let hOff = headersStart; + while (hOff < headersEnd) { + const nameLen = buf.readUInt8(hOff); + hOff += 1; + const name = buf.subarray(hOff, hOff + nameLen).toString("utf8"); + hOff += nameLen; + hOff += 1; // skip header type byte (7 = STRING) + const valueLen = buf.readUInt16BE(hOff); + hOff += 2; + const value = buf.subarray(hOff, hOff + valueLen).toString("utf8"); + hOff += valueLen; + headers[name] = value; + } + + // Parse payload + const payloadStart = headersEnd; + const payloadEnd = offset + totalLength - 4; // minus message CRC + const payloadBuf = buf.subarray(payloadStart, payloadEnd); + const payload = payloadBuf.length > 0 ? JSON.parse(payloadBuf.toString("utf8")) : {}; + + frames.push({ + eventType: headers[":event-type"] ?? "", + payload, + }); + + offset += totalLength; + } + + return frames; +} + interface SSEEvent { type?: string; choices?: { @@ -325,6 +418,149 @@ describe("POST /model/{id}/converse (reasoning non-streaming)", () => { }); }); +// ─── Bedrock InvokeModel Streaming: Reasoning ───────────────────────────────── + +describe("POST /model/{id}/invoke-with-response-stream (reasoning streaming)", () => { + it("emits thinking block events before text content events", async () => { + const res = await postRaw( + `/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream`, + { + messages: [{ role: "user", content: [{ type: "text", text: "think" }] }], + max_tokens: 1024, + anthropic_version: "bedrock-2023-05-31", + }, + ); + + expect(res.status).toBe(200); + const frames = decodeEventStreamFrames(res.body); + const eventTypes = frames.map((f) => f.eventType); + + // Should start with messageStart + expect(eventTypes[0]).toBe("messageStart"); + + // Find thinking and text block starts + const thinkingStartIdx = frames.findIndex( + (f) => + f.eventType === "contentBlockStart" && + (f.payload as { start?: { type?: string } }).start?.type === "thinking", + ); + const textStartIdx = frames.findIndex( + (f) => + f.eventType === "contentBlockStart" && + (f.payload as { start?: { type?: string } }).start?.type === undefined, + ); + + expect(thinkingStartIdx).toBeGreaterThan(0); + expect(textStartIdx).toBeGreaterThan(thinkingStartIdx); + + // Verify thinking content + const thinkingDeltas = frames.filter( + (f) => + f.eventType === "contentBlockDelta" && + (f.payload as { delta?: { type?: string } }).delta?.type === "thinking_delta", + ); + const fullThinking = thinkingDeltas + .map((f) => (f.payload as { delta: { thinking: string } }).delta.thinking) + .join(""); + expect(fullThinking).toBe("Let me think step by step about this problem."); + + // Verify text content + const textDeltas = frames.filter( + (f) => + f.eventType === "contentBlockDelta" && + (f.payload as { delta?: { type?: string } }).delta?.type === "text_delta", + ); + const fullText = textDeltas + .map((f) => (f.payload as { delta: { text: string } }).delta.text) + .join(""); + expect(fullText).toBe("The answer is 42."); + + // Should end with messageStop + expect(eventTypes[eventTypes.length - 1]).toBe("messageStop"); + }); + + it("no thinking block when reasoning is absent", async () => { + const res = await postRaw( + `/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream`, + { + messages: [{ role: "user", content: [{ type: "text", text: "plain" }] }], + max_tokens: 1024, + anthropic_version: "bedrock-2023-05-31", + }, + ); + + expect(res.status).toBe(200); + const frames = decodeEventStreamFrames(res.body); + + const thinkingDeltas = frames.filter( + (f) => + f.eventType === "contentBlockDelta" && + (f.payload as { delta?: { type?: string } }).delta?.type === "thinking_delta", + ); + expect(thinkingDeltas).toHaveLength(0); + }); +}); + +// ─── Bedrock Converse Streaming: Reasoning ──────────────────────────────────── + +describe("POST /model/{id}/converse-stream (reasoning streaming)", () => { + it("emits thinking block events before text content events", async () => { + const res = await postRaw(`/model/anthropic.claude-3-sonnet-20240229-v1:0/converse-stream`, { + messages: [{ role: "user", content: [{ text: "think" }] }], + }); + + expect(res.status).toBe(200); + const frames = decodeEventStreamFrames(res.body); + const eventTypes = frames.map((f) => f.eventType); + + expect(eventTypes[0]).toBe("messageStart"); + + // Find thinking and text block starts + const thinkingStartIdx = frames.findIndex( + (f) => + f.eventType === "contentBlockStart" && + (f.payload as { start?: { type?: string } }).start?.type === "thinking", + ); + const textStartIdx = frames.findIndex( + (f) => + f.eventType === "contentBlockStart" && + (f.payload as { start?: { type?: string } }).start?.type === undefined, + ); + + expect(thinkingStartIdx).toBeGreaterThan(0); + expect(textStartIdx).toBeGreaterThan(thinkingStartIdx); + + // Verify reasoning content appears in the stream + const thinkingDeltas = frames.filter( + (f) => + f.eventType === "contentBlockDelta" && + (f.payload as { delta?: { type?: string } }).delta?.type === "thinking_delta", + ); + const fullThinking = thinkingDeltas + .map((f) => (f.payload as { delta: { thinking: string } }).delta.thinking) + .join(""); + expect(fullThinking).toBe("Let me think step by step about this problem."); + + expect(eventTypes[eventTypes.length - 1]).toBe("messageStop"); + }); + + it("no thinking block when reasoning is absent", async () => { + const res = await postRaw(`/model/anthropic.claude-3-sonnet-20240229-v1:0/converse-stream`, { + messages: [{ role: "user", content: [{ text: "plain" }] }], + }); + + expect(res.status).toBe(200); + const frames = decodeEventStreamFrames(res.body); + + const thinkingDeltas = frames.filter( + (f) => + f.eventType === "contentBlockDelta" && + (f.payload as { delta?: { type?: string } }).delta?.type === "thinking_delta", + ); + expect(thinkingDeltas).toHaveLength(0); + }); +}); + // ─── Ollama /api/chat: Reasoning ──────────────────────────────────────────── function parseNDJSON(body: string): object[] {