From 1d26dbb322894872caf4655010ea292f17e06796 Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Mon, 6 Apr 2026 17:16:38 +0200
Subject: [PATCH 1/9] feat: add reasoning support to OpenAI Chat Completions

Adds optional reasoning_content field to non-streaming ChatCompletion
messages and SSE streaming deltas when a fixture includes reasoning.
---
 src/__tests__/reasoning-all-providers.test.ts | 174 ++++++++++++++++++
 src/helpers.ts                                |  34 +++-
 src/server.ts                                 |   4 +-
 src/types.ts                                  |   2 +
 4 files changed, 209 insertions(+), 5 deletions(-)
 create mode 100644 src/__tests__/reasoning-all-providers.test.ts
diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts
new file mode 100644
index 0000000..750d89b
--- /dev/null
+++ b/src/__tests__/reasoning-all-providers.test.ts
@@ -0,0 +1,174 @@
+import { describe, it, expect, afterEach } from "vitest";
+import * as http from "node:http";
+import type { Fixture } from "../types.js";
+import { createServer, type ServerInstance } from "../server.js";
+
+// --- helpers ---
+
+function post(
+  url: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(url);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path: parsed.pathname,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks).toString(),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+interface SSEEvent {
+  type?: string;
+  choices?: {
+    delta: { content?: string; reasoning_content?: string; role?: string };
+    finish_reason: string | null;
+  }[];
+  [key: string]: unknown;
+}
+
+function parseSSEEvents(body: string): SSEEvent[] {
+  const events: SSEEvent[] = [];
+  for (const line of body.split("\n")) {
+    if (line.startsWith("data: ") && line.slice(6).trim() !== "[DONE]") {
+      events.push(JSON.parse(line.slice(6)) as SSEEvent);
+    }
+  }
+  return events;
+}
+
+// --- fixtures ---
+
+const reasoningFixture: Fixture = {
+  match: { userMessage: "think" },
+  response: {
+    content: "The answer is 42.",
+    reasoning: "Let me think step by step about this problem.",
+  },
+};
+
+const plainFixture: Fixture = {
+  match: { userMessage: "plain" },
+  response: { content: "Just plain text." },
+};
+
+const allFixtures: Fixture[] = [reasoningFixture, plainFixture];
+
+// --- tests ---
+
+let instance: ServerInstance | null = null;
+
+afterEach(async () => {
+  if (instance) {
+    await new Promise<void>((resolve) => {
+      instance!.server.close(() => resolve());
+    });
+    instance = null;
+  }
+});
+
+// ─── OpenAI Chat Completions: Reasoning ─────────────────────────────────────
+
+describe("POST /v1/chat/completions (reasoning non-streaming)", () => {
+  it("includes reasoning_content field on assistant message", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "think" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.choices[0].message.content).toBe("The answer is 42.");
+    expect(body.choices[0].message.reasoning_content).toBe(
+      "Let me think step by step about this problem.",
+    );
+  });
+
+  it("omits reasoning_content when reasoning is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "plain" }],
+      stream: false,
+    });
+
+    const body = JSON.parse(res.body);
+    expect(body.choices[0].message.content).toBe("Just plain text.");
+    expect(body.choices[0].message.reasoning_content).toBeUndefined();
+  });
+});
+
+describe("POST /v1/chat/completions (reasoning streaming)", () => {
+  it("emits reasoning_content deltas before content deltas", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "think" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const events = parseSSEEvents(res.body);
+
+    const reasoningDeltas = events
+      .filter((e) => e.choices?.[0]?.delta?.reasoning_content !== undefined)
+      .map((e) => e.choices![0].delta.reasoning_content);
+    expect(reasoningDeltas.join("")).toBe("Let me think step by step about this problem.");
+
+    const contentDeltas = events
+      .filter(
+        (e) => e.choices?.[0]?.delta?.content !== undefined && e.choices[0].delta.content !== "",
+      )
+      .map((e) => e.choices![0].delta.content);
+    expect(contentDeltas.join("")).toBe("The answer is 42.");
+
+    const lastReasoningIdx = events.reduce(
+      (acc, e, idx) => (e.choices?.[0]?.delta?.reasoning_content !== undefined ? idx : acc),
+      -1,
+    );
+    const firstContentIdx = events.findIndex(
+      (e) => e.choices?.[0]?.delta?.content !== undefined && e.choices[0].delta.content !== "",
+    );
+    expect(lastReasoningIdx).toBeLessThan(firstContentIdx);
+  });
+
+  it("no reasoning_content deltas when reasoning is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1/chat/completions`, {
+      model: "gpt-4",
+      messages: [{ role: "user", content: "plain" }],
+      stream: true,
+    });
+
+    const events = parseSSEEvents(res.body);
+    const reasoningDeltas = events.filter(
+      (e) => e.choices?.[0]?.delta?.reasoning_content !== undefined,
+    );
+    expect(reasoningDeltas).toHaveLength(0);
+  });
+});
diff --git a/src/helpers.ts b/src/helpers.ts
index 3d25272..a94e200 100644
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -62,7 +62,12 @@ export function isEmbeddingResponse(r: FixtureResponse): r is EmbeddingResponse
   return "embedding" in r && Array.isArray((r as EmbeddingResponse).embedding);
 }
 
-export function buildTextChunks(content: string, model: string, chunkSize: number): SSEChunk[] {
+export function buildTextChunks(
+  content: string,
+  model: string,
+  chunkSize: number,
+  reasoning?: string,
+): SSEChunk[] {
   const id = generateId();
   const created = Math.floor(Date.now() / 1000);
   const chunks: SSEChunk[] = [];
@@ -76,6 +81,20 @@ export function buildTextChunks(content: string, model: string, chunkSize: numbe
     choices: [{ index: 0, delta: { role: "assistant", content: "" }, finish_reason: null }],
   });
 
+  // Reasoning chunks (emitted before content chunks)
+  if (reasoning !== undefined) {
+    for (let i = 0; i < reasoning.length; i += chunkSize) {
+      const slice = reasoning.slice(i, i + chunkSize);
+      chunks.push({
+        id,
+        object: "chat.completion.chunk",
+        created,
+        model,
+        choices: [{ index: 0, delta: { reasoning_content: slice }, finish_reason: null }],
+      });
+    }
+  }
+
   // Content chunks
   for (let i = 0; i < content.length; i += chunkSize) {
     const slice = content.slice(i, i + chunkSize);
@@ -183,7 +202,11 @@ export function buildToolCallChunks(
 
 // Non-streaming response builders
 
-export function buildTextCompletion(content: string, model: string): ChatCompletion {
+export function buildTextCompletion(
+  content: string,
+  model: string,
+  reasoning?: string,
+): ChatCompletion {
   return {
     id: generateId(),
     object: "chat.completion",
@@ -192,7 +215,12 @@ export function buildTextCompletion(content: string, model: string): ChatComplet
     choices: [
       {
         index: 0,
-        message: { role: "assistant", content, refusal: null },
+        message: {
+          role: "assistant",
+          content,
+          refusal: null,
+          ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}),
+        },
         finish_reason: "stop",
       },
     ],
diff --git a/src/server.ts b/src/server.ts
index 5e5f08c..0f7cc0e 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -454,11 +454,11 @@ async function handleCompletions(
       response: { status: 200, fixture },
     });
     if (body.stream !== true) {
-      const completion = buildTextCompletion(response.content, body.model);
+      const completion = buildTextCompletion(response.content, body.model, response.reasoning);
       res.writeHead(200, { "Content-Type": "application/json" });
       res.end(JSON.stringify(completion));
     } else {
-      const chunks = buildTextChunks(response.content, body.model, chunkSize);
+      const chunks = buildTextChunks(response.content, body.model, chunkSize, response.reasoning);
       const interruption = createInterruptionSignal(fixture);
       const completed = await writeSSEStream(res, chunks, {
         latency,
diff --git a/src/types.ts b/src/types.ts
index 5f76d7d..5568bd2 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -200,6 +200,7 @@ export interface SSEChoice {
 export interface SSEDelta {
   role?: string;
   content?: string | null;
+  reasoning_content?: string;
   tool_calls?: SSEToolCallDelta[];
 }
 
@@ -231,6 +232,7 @@ export interface ChatCompletionMessage {
   role: "assistant";
   content: string | null;
   refusal: string | null;
+  reasoning_content?: string;
   tool_calls?: ToolCallMessage[];
 }
 

From 27ac1143c9815c995e26756717cf6cacf01622cc Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Mon, 6 Apr 2026 17:22:28 +0200
Subject: [PATCH 2/9] feat: add reasoning support to Gemini

---
 src/__tests__/reasoning-all-providers.test.ts | 93 +++++++++++++++++++
 src/gemini.ts                                 | 40 ++++++--
 2 files changed, 126 insertions(+), 7 deletions(-)

diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts
index 750d89b..632801d 100644
--- a/src/__tests__/reasoning-all-providers.test.ts
+++ b/src/__tests__/reasoning-all-providers.test.ts
@@ -172,3 +172,96 @@ describe("POST /v1/chat/completions (reasoning streaming)", () => {
     expect(reasoningDeltas).toHaveLength(0);
   });
 });
+
+// ─── Gemini: Reasoning ──────────────────────────────────────────────────────
+
+function parseGeminiSSEChunks(body: string): unknown[] {
+  const chunks: unknown[] = [];
+  for (const line of body.split("\n")) {
+    if (line.startsWith("data: ")) {
+      chunks.push(JSON.parse(line.slice(6)));
+    }
+  }
+  return chunks;
+}
+
+describe("POST /v1beta/models/{model}:generateContent (reasoning non-streaming)", () => {
+  it("includes thought part before text part", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "think" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    const parts = body.candidates[0].content.parts;
+    expect(parts).toHaveLength(2);
+    expect(parts[0].thought).toBe(true);
+    expect(parts[0].text).toBe("Let me think step by step about this problem.");
+    expect(parts[1].text).toBe("The answer is 42.");
+    expect(parts[1].thought).toBeUndefined();
+  });
+
+  it("no thought part when reasoning is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, {
+      contents: [{ role: "user", parts: [{ text: "plain" }] }],
+    });
+
+    const body = JSON.parse(res.body);
+    const parts = body.candidates[0].content.parts;
+    expect(parts).toHaveLength(1);
+    expect(parts[0].text).toBe("Just plain text.");
+    expect(parts[0].thought).toBeUndefined();
+  });
+});
+
+describe("POST /v1beta/models/{model}:streamGenerateContent (reasoning streaming)", () => {
+  it("streams thought chunks before text chunks", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "think" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: { role: string; parts: { text?: string; thought?: boolean }[] };
+        finishReason?: string;
+      }[];
+    }[];
+
+    const thoughtChunks = chunks.filter((c) => c.candidates[0].content.parts[0].thought === true);
+    const textChunks = chunks.filter((c) => c.candidates[0].content.parts[0].thought === undefined);
+
+    expect(thoughtChunks.length).toBeGreaterThan(0);
+    expect(textChunks.length).toBeGreaterThan(0);
+
+    const fullThought = thoughtChunks
+      .map((c) => c.candidates[0].content.parts[0].text ?? "")
+      .join("");
+    expect(fullThought).toBe("Let me think step by step about this problem.");
+
+    const fullText = textChunks.map((c) => c.candidates[0].content.parts[0].text ?? "").join("");
+    expect(fullText).toBe("The answer is 42.");
+
+    const lastChunk = chunks[chunks.length - 1];
+    expect(lastChunk.candidates[0].finishReason).toBe("STOP");
+  });
+
+  it("no thought chunks when reasoning is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, {
+      contents: [{ role: "user", parts: [{ text: "plain" }] }],
+    });
+
+    const chunks = parseGeminiSSEChunks(res.body) as {
+      candidates: {
+        content: { parts: { text?: string; thought?: boolean }[] };
+      }[];
+    }[];
+
+    const thoughtChunks = chunks.filter((c) => c.candidates[0].content.parts[0].thought === true);
+    expect(thoughtChunks).toHaveLength(0);
+  });
+});
diff --git a/src/gemini.ts b/src/gemini.ts
index 4229839..e99b9aa 100644
--- a/src/gemini.ts
+++ b/src/gemini.ts
@@ -36,6 +36,7 @@ import { proxyAndRecord } from "./recorder.js";
 
 interface GeminiPart {
   text?: string;
+  thought?: boolean;
   functionCall?: { name: string; args: Record<string, unknown>; id?: string };
   functionResponse?: { name: string; response: unknown };
 }
@@ -187,10 +188,29 @@ interface GeminiResponseChunk {
   };
 }
 
-function buildGeminiTextStreamChunks(content: string, chunkSize: number): GeminiResponseChunk[] {
+function buildGeminiTextStreamChunks(
+  content: string,
+  chunkSize: number,
+  reasoning?: string,
+): GeminiResponseChunk[] {
   const chunks: GeminiResponseChunk[] = [];
 
-  // Content chunks
+  // Reasoning chunks (thought: true)
+  if (reasoning) {
+    for (let i = 0; i < reasoning.length; i += chunkSize) {
+      const slice = reasoning.slice(i, i + chunkSize);
+      chunks.push({
+        candidates: [
+          {
+            content: { role: "model", parts: [{ text: slice, thought: true }] },
+            index: 0,
+          },
+        ],
+      });
+    }
+  }
+
+  // Content chunks (original logic unchanged)
   for (let i = 0; i < content.length; i += chunkSize) {
     const slice = content.slice(i, i + chunkSize);
     const isLast = i + chunkSize >= content.length;
@@ -215,7 +235,7 @@ function buildGeminiTextStreamChunks(content: string, chunkSize: number): Gemini
     chunks.push(chunk);
   }
 
-  // Handle empty content
+  // Handle empty content (original logic unchanged)
   if (content.length === 0) {
     chunks.push({
       candidates: [
@@ -276,11 +296,17 @@ function buildGeminiToolCallStreamChunks(
 
 // Non-streaming response builders
 
-function buildGeminiTextResponse(content: string): GeminiResponseChunk {
+function buildGeminiTextResponse(content: string, reasoning?: string): GeminiResponseChunk {
+  const parts: GeminiPart[] = [];
+  if (reasoning) {
+    parts.push({ text: reasoning, thought: true });
+  }
+  parts.push({ text: content });
+
   return {
     candidates: [
       {
-        content: { role: "model", parts: [{ text: content }] },
+        content: { role: "model", parts },
         finishReason: "STOP",
         index: 0,
       },
@@ -528,11 +554,11 @@ export async function handleGemini(
       response: { status: 200, fixture },
     });
     if (!streaming) {
-      const body = buildGeminiTextResponse(response.content);
+      const body = buildGeminiTextResponse(response.content, response.reasoning);
       res.writeHead(200, { "Content-Type": "application/json" });
       res.end(JSON.stringify(body));
     } else {
-      const chunks = buildGeminiTextStreamChunks(response.content, chunkSize);
+      const chunks = buildGeminiTextStreamChunks(response.content, chunkSize, response.reasoning);
       const interruption = createInterruptionSignal(fixture);
       const completed = await writeGeminiSSEStream(res, chunks, {
         latency,

From 990ad40aeba4cc27b0018f020ebb35626153be84 Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Mon, 6 Apr 2026 17:24:54 +0200
Subject: [PATCH 3/9] feat: add reasoning support to Bedrock InvokeModel

---
 src/__tests__/reasoning-all-providers.test.ts | 34 ++++++++++++
 src/bedrock.ts                                | 53 ++++++++++++++++---
 2 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts
index 632801d..319f0f4 100644
--- a/src/__tests__/reasoning-all-providers.test.ts
+++ b/src/__tests__/reasoning-all-providers.test.ts
@@ -265,3 +265,37 @@ describe("POST /v1beta/models/{model}:streamGenerateContent (reasoning streaming
     expect(thoughtChunks).toHaveLength(0);
   });
 });
+
+// ─── Bedrock InvokeModel: Reasoning ─────────────────────────────────────────
+
+describe("POST /model/{id}/invoke (reasoning non-streaming)", () => {
+  it("includes thinking content block before text block", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, {
+      messages: [{ role: "user", content: [{ type: "text", text: "think" }] }],
+      max_tokens: 1024,
+      anthropic_version: "bedrock-2023-05-31",
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.content).toHaveLength(2);
+    expect(body.content[0].type).toBe("thinking");
+    expect(body.content[0].thinking).toBe("Let me think step by step about this problem.");
+    expect(body.content[1].type).toBe("text");
+    expect(body.content[1].text).toBe("The answer is 42.");
+  });
+
+  it("no thinking block when reasoning is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, {
+      messages: [{ role: "user", content: [{ type: "text", text: "plain" }] }],
+      max_tokens: 1024,
+      anthropic_version: "bedrock-2023-05-31",
+    });
+
+    const body = JSON.parse(res.body);
+    expect(body.content).toHaveLength(1);
+    expect(body.content[0].type).toBe("text");
+  });
+});
diff --git a/src/bedrock.ts b/src/bedrock.ts
index d45f64e..8fc6c54 100644
--- a/src/bedrock.ts
+++ b/src/bedrock.ts
@@ -198,12 +198,18 @@ export function bedrockToCompletionRequest(
 
 // ─── Response builders ──────────────────────────────────────────────────────
 
-function buildBedrockTextResponse(content: string, model: string): object {
+function buildBedrockTextResponse(content: string, model: string, reasoning?: string): object {
+  const contentBlocks: object[] = [];
+  if (reasoning) {
+    contentBlocks.push({ type: "thinking", thinking: reasoning });
+  }
+  contentBlocks.push({ type: "text", text: content });
+
   return {
     id: generateMessageId(),
     type: "message",
     role: "assistant",
-    content: [{ type: "text", text: content }],
+    content: contentBlocks,
     model,
     stop_reason: "end_turn",
     stop_sequence: null,
@@ -417,7 +423,11 @@ export async function handleBedrock(
       body: completionReq,
       response: { status: 200, fixture },
     });
-    const body = buildBedrockTextResponse(response.content, completionReq.model);
+    const body = buildBedrockTextResponse(
+      response.content,
+      completionReq.model,
+      response.reasoning,
+    );
     res.writeHead(200, { "Content-Type": "application/json" });
     res.end(JSON.stringify(body));
     return;
@@ -463,6 +473,7 @@ export async function handleBedrock(
 export function buildBedrockStreamTextEvents(
   content: string,
   chunkSize: number,
+  reasoning?: string,
 ): Array<{ eventType: string; payload: object }> {
   const events: Array<{ eventType: string; payload: object }> = [];
 
@@ -471,9 +482,37 @@ export function buildBedrockStreamTextEvents(
     payload: { role: "assistant" },
   });
 
+  // Thinking block (emitted before text when reasoning is present)
+  if (reasoning) {
+    const blockIndex = 0;
+    events.push({
+      eventType: "contentBlockStart",
+      payload: { contentBlockIndex: blockIndex, start: { type: "thinking" } },
+    });
+
+    for (let i = 0; i < reasoning.length; i += chunkSize) {
+      const slice = reasoning.slice(i, i + chunkSize);
+      events.push({
+        eventType: "contentBlockDelta",
+        payload: {
+          contentBlockIndex: blockIndex,
+          delta: { type: "thinking_delta", thinking: slice },
+        },
+      });
+    }
+
+    events.push({
+      eventType: "contentBlockStop",
+      payload: { contentBlockIndex: blockIndex },
+    });
+  }
+
+  // Text block
+  const textBlockIndex = reasoning ? 1 : 0;
+
   events.push({
     eventType: "contentBlockStart",
-    payload: { contentBlockIndex: 0, start: {} },
+    payload: { contentBlockIndex: textBlockIndex, start: {} },
   });
 
   for (let i = 0; i < content.length; i += chunkSize) {
@@ -481,7 +520,7 @@ export function buildBedrockStreamTextEvents(
     events.push({
       eventType: "contentBlockDelta",
       payload: {
-        contentBlockIndex: 0,
+        contentBlockIndex: textBlockIndex,
         delta: { type: "text_delta", text: slice },
       },
     });
@@ -489,7 +528,7 @@ export function buildBedrockStreamTextEvents(
 
   events.push({
     eventType: "contentBlockStop",
-    payload: { contentBlockIndex: 0 },
+    payload: { contentBlockIndex: textBlockIndex },
   });
 
   events.push({
@@ -728,7 +767,7 @@ export async function handleBedrockStream(
       body: completionReq,
       response: { status: 200, fixture },
     });
-    const events = buildBedrockStreamTextEvents(response.content, chunkSize);
+    const events = buildBedrockStreamTextEvents(response.content, chunkSize, response.reasoning);
     const interruption = createInterruptionSignal(fixture);
     const completed = await writeEventStream(res, events, {
       latency,

From 4eb930098d7ef0ce506b0cdb03c22da5339db5a2 Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Mon, 6 Apr 2026 17:28:55 +0200
Subject: [PATCH 4/9] feat: add reasoning support to Bedrock Converse

---
 src/__tests__/reasoning-all-providers.test.ts | 39 +++++++++++++++++++
 src/bedrock-converse.ts                       | 16 ++++++--
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts
index 319f0f4..f20b1e8 100644
--- a/src/__tests__/reasoning-all-providers.test.ts
+++ b/src/__tests__/reasoning-all-providers.test.ts
@@ -299,3 +299,42 @@ describe("POST /model/{id}/invoke (reasoning non-streaming)", () => {
     expect(body.content[0].type).toBe("text");
   });
 });
+
+// ─── Bedrock Converse: Reasoning ────────────────────────────────────────────
+
+describe("POST /model/{id}/converse (reasoning non-streaming)", () => {
+  it("includes reasoningContent block before text block", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`,
+      {
+        messages: [{ role: "user", content: [{ text: "think" }] }],
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    const content = body.output.message.content;
+    expect(content).toHaveLength(2);
+    expect(content[0].reasoningContent).toBeDefined();
+    expect(content[0].reasoningContent.reasoningText.text).toBe(
+      "Let me think step by step about this problem.",
+    );
+    expect(content[1].text).toBe("The answer is 42.");
+  });
+
+  it("no reasoningContent block when reasoning is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(
+      `${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`,
+      {
+        messages: [{ role: "user", content: [{ text: "plain" }] }],
+      },
+    );
+
+    const body = JSON.parse(res.body);
+    const content = body.output.message.content;
+    expect(content).toHaveLength(1);
+    expect(content[0].text).toBe("Just plain text.");
+  });
+});
diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts
index 933e0af..69de85c 100644
--- a/src/bedrock-converse.ts
+++ b/src/bedrock-converse.ts
@@ -156,12 +156,20 @@ export function converseToCompletionRequest(
 
 // ─── Response builders ──────────────────────────────────────────────────────
 
-function buildConverseTextResponse(content: string): object {
+function buildConverseTextResponse(content: string, reasoning?: string): object {
+  const contentBlocks: object[] = [];
+  if (reasoning) {
+    contentBlocks.push({
+      reasoningContent: { reasoningText: { text: reasoning } },
+    });
+  }
+  contentBlocks.push({ text: content });
+
   return {
     output: {
       message: {
         role: "assistant",
-        content: [{ text: content }],
+        content: contentBlocks,
       },
     },
     stopReason: "end_turn",
@@ -363,7 +371,7 @@ export async function handleConverse(
       body: completionReq,
       response: { status: 200, fixture },
     });
-    const body = buildConverseTextResponse(response.content);
+    const body = buildConverseTextResponse(response.content, response.reasoning);
     res.writeHead(200, { "Content-Type": "application/json" });
     res.end(JSON.stringify(body));
     return;
@@ -568,7 +576,7 @@ export async function handleConverseStream(
       body: completionReq,
       response: { status: 200, fixture },
     });
-    const events = buildBedrockStreamTextEvents(response.content, chunkSize);
+    const events = buildBedrockStreamTextEvents(response.content, chunkSize, response.reasoning);
     const interruption = createInterruptionSignal(fixture);
     const completed = await writeEventStream(res, events, {
       latency,

From c0a1b0bcdfb3693974b0d83b10c5a2bb2beeb01c Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Mon, 6 Apr 2026 17:32:51 +0200
Subject: [PATCH 5/9] feat: add reasoning support to Ollama

---
 src/__tests__/reasoning-all-providers.test.ts | 166 ++++++++++++++++++
 src/ollama.ts                                 |  69 +++++++-
 2 files changed, 228 insertions(+), 7 deletions(-)

diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts
index f20b1e8..e321886 100644
--- a/src/__tests__/reasoning-all-providers.test.ts
+++ b/src/__tests__/reasoning-all-providers.test.ts
@@ -338,3 +338,169 @@ describe("POST /model/{id}/converse (reasoning non-streaming)", () => {
     expect(content[0].text).toBe("Just plain text.");
   });
 });
+
+// ─── Ollama /api/chat: Reasoning ────────────────────────────────────────────
+
+function parseNDJSON(body: string): object[] {
+  return body
+    .split("\n")
+    .filter((line) => line.trim() !== "")
+    .map((line) => JSON.parse(line) as object);
+}
+
+describe("POST /api/chat (reasoning non-streaming)", () => {
+  it("includes reasoning_content on assistant message", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "deepseek-r1",
+      messages: [{ role: "user", content: "think" }],
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.message.content).toBe("The answer is 42.");
+    expect(body.message.reasoning_content).toBe("Let me think step by step about this problem.");
+  });
+
+  it("omits reasoning_content when reasoning is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "deepseek-r1",
+      messages: [{ role: "user", content: "plain" }],
+      stream: false,
+    });
+
+    const body = JSON.parse(res.body);
+    expect(body.message.content).toBe("Just plain text.");
+    expect(body.message.reasoning_content).toBeUndefined();
+  });
+});
+
+describe("POST /api/chat (reasoning streaming)", () => {
+  it("streams reasoning_content chunks before content chunks", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "deepseek-r1",
+      messages: [{ role: "user", content: "think" }],
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseNDJSON(res.body) as {
+      message: { role: string; content: string; reasoning_content?: string };
+      done: boolean;
+    }[];
+
+    const reasoningChunks = chunks.filter(
+      (c) => !c.done && c.message.reasoning_content !== undefined,
+    );
+    expect(reasoningChunks.length).toBeGreaterThan(0);
+    const fullReasoning = reasoningChunks.map((c) => c.message.reasoning_content).join("");
+    expect(fullReasoning).toBe("Let me think step by step about this problem.");
+
+    const contentChunks = chunks.filter(
+      (c) => !c.done && c.message.content !== "" && c.message.reasoning_content === undefined,
+    );
+    expect(contentChunks.length).toBeGreaterThan(0);
+    const fullContent = contentChunks.map((c) => c.message.content).join("");
+    expect(fullContent).toBe("The answer is 42.");
+  });
+
+  it("no reasoning_content chunks when reasoning is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/chat`, {
+      model: "deepseek-r1",
+      messages: [{ role: "user", content: "plain" }],
+      stream: true,
+    });
+
+    const chunks = parseNDJSON(res.body) as {
+      message: { reasoning_content?: string };
+      done: boolean;
+    }[];
+
+    const reasoningChunks = chunks.filter(
+      (c) => !c.done && c.message.reasoning_content !== undefined,
+    );
+    expect(reasoningChunks).toHaveLength(0);
+  });
+});
+
+// ─── Ollama /api/generate: Reasoning ────────────────────────────────────────
+
+describe("POST /api/generate (reasoning non-streaming)", () => {
+  it("includes reasoning_content field", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "deepseek-r1",
+      prompt: "think",
+      stream: false,
+    });
+
+    expect(res.status).toBe(200);
+    const body = JSON.parse(res.body);
+    expect(body.response).toBe("The answer is 42.");
+    expect(body.reasoning_content).toBe("Let me think step by step about this problem.");
+  });
+
+  it("omits reasoning_content when reasoning is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "deepseek-r1",
+      prompt: "plain",
+      stream: false,
+    });
+
+    const body = JSON.parse(res.body);
+    expect(body.response).toBe("Just plain text.");
+    expect(body.reasoning_content).toBeUndefined();
+  });
+});
+
+describe("POST /api/generate (reasoning streaming)", () => {
+  it("streams reasoning_content chunks before response chunks", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "deepseek-r1",
+      prompt: "think",
+      stream: true,
+    });
+
+    expect(res.status).toBe(200);
+    const chunks = parseNDJSON(res.body) as {
+      response: string;
+      reasoning_content?: string;
+      done: boolean;
+    }[];
+
+    const reasoningChunks = chunks.filter((c) => !c.done && c.reasoning_content !== undefined);
+    expect(reasoningChunks.length).toBeGreaterThan(0);
+    const fullReasoning = reasoningChunks.map((c) => c.reasoning_content).join("");
+    expect(fullReasoning).toBe("Let me think step by step about this problem.");
+
+    const contentChunks = chunks.filter(
+      (c) => !c.done && c.response !== "" && c.reasoning_content === undefined,
+    );
+    expect(contentChunks.length).toBeGreaterThan(0);
+    const fullContent = contentChunks.map((c) => c.response).join("");
+    expect(fullContent).toBe("The answer is 42.");
+  });
+
+  it("no reasoning_content chunks when reasoning is absent", async () => {
+    instance = await createServer(allFixtures);
+    const res = await post(`${instance.url}/api/generate`, {
+      model: "deepseek-r1",
+      prompt: "plain",
+      stream: true,
+    });
+
+    const chunks = parseNDJSON(res.body) as {
+      reasoning_content?: string;
+      done: boolean;
+    }[];
+
+    const reasoningChunks = chunks.filter((c) => !c.done && c.reasoning_content !== undefined);
+    expect(reasoningChunks).toHaveLength(0);
+  });
+});
diff --git a/src/ollama.ts b/src/ollama.ts
index 20ed12f..0adac53 100644
--- a/src/ollama.ts
+++ b/src/ollama.ts
@@ -121,9 +121,26 @@ function ollamaGenerateToCompletionRequest(req: OllamaGenerateRequest): ChatComp
 
 // ─── Response builders: /api/chat ────────────────────────────────────────────
 
-function buildOllamaChatTextChunks(content: string, model: string, chunkSize: number): object[] {
+function buildOllamaChatTextChunks(
+  content: string,
+  model: string,
+  chunkSize: number,
+  reasoning?: string,
+): object[] {
   const chunks: object[] = [];
 
+  // Reasoning chunks (before content)
+  if (reasoning) {
+    for (let i = 0; i < reasoning.length; i += chunkSize) {
+      const slice = reasoning.slice(i, i + chunkSize);
+      chunks.push({
+        model,
+        message: { role: "assistant", content: "", reasoning_content: slice },
+        done: false,
+      });
+    }
+  }
+
   for (let i = 0; i < content.length; i += chunkSize) {
     const slice = content.slice(i, i + chunkSize);
     chunks.push({
@@ -144,10 +161,14 @@ function buildOllamaChatTextChunks(content: string, model: string, chunkSize: nu
   return chunks;
 }
 
-function buildOllamaChatTextResponse(content: string, model: string): object {
+function buildOllamaChatTextResponse(content: string, model: string, reasoning?: string): object {
   return {
     model,
-    message: { role: "assistant", content },
+    message: {
+      role: "assistant",
+      content,
+      ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}),
+    },
     done: true,
     ...DURATION_FIELDS,
   };
@@ -240,10 +261,25 @@ function buildOllamaGenerateTextChunks(
   content: string,
   model: string,
   chunkSize: number,
+  reasoning?: string,
 ): object[] {
   const chunks: object[] = [];
   const createdAt = new Date().toISOString();
 
+  // Reasoning chunks (before content)
+  if (reasoning) {
+    for (let i = 0; i < reasoning.length; i += chunkSize) {
+      const slice = reasoning.slice(i, i + chunkSize);
+      chunks.push({
+        model,
+        created_at: createdAt,
+        response: "",
+        reasoning_content: slice,
+        done: false,
+      });
+    }
+  }
+
   for (let i = 0; i < content.length; i += chunkSize) {
     const slice = content.slice(i, i + chunkSize);
     chunks.push({
@@ -267,11 +303,16 @@ function buildOllamaGenerateTextChunks(
   return chunks;
 }
 
-function buildOllamaGenerateTextResponse(content: string, model: string): object {
+function buildOllamaGenerateTextResponse(
+  content: string,
+  model: string,
+  reasoning?: string,
+): object {
   return {
     model,
     created_at: new Date().toISOString(),
     response: content,
+    ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}),
     done: true,
     ...DURATION_FIELDS,
     context: [],
@@ -448,11 +489,20 @@ export async function handleOllama(
       response: { status: 200, fixture },
     });
     if (!streaming) {
-      const body = buildOllamaChatTextResponse(response.content, completionReq.model);
+      const body = buildOllamaChatTextResponse(
+        response.content,
+        completionReq.model,
+        response.reasoning,
+      );
       res.writeHead(200, { "Content-Type": "application/json" });
       res.end(JSON.stringify(body));
     } else {
-      const chunks = buildOllamaChatTextChunks(response.content, completionReq.model, chunkSize);
+      const chunks = buildOllamaChatTextChunks(
+        response.content,
+        completionReq.model,
+        chunkSize,
+        response.reasoning,
+      );
       const interruption = createInterruptionSignal(fixture);
       const completed = await writeNDJSONStream(res, chunks, {
         latency,
@@ -691,7 +741,11 @@ export async function handleOllamaGenerate(
       response: { status: 200, fixture },
     });
     if (!streaming) {
-      const body = buildOllamaGenerateTextResponse(response.content, completionReq.model);
+      const body = buildOllamaGenerateTextResponse(
+        response.content,
+        completionReq.model,
+        response.reasoning,
+      );
       res.writeHead(200, { "Content-Type": "application/json" });
       res.end(JSON.stringify(body));
     } else {
@@ -699,6 +753,7 @@ export async function handleOllamaGenerate(
         response.content,
         completionReq.model,
         chunkSize,
+        response.reasoning,
       );
       const interruption = createInterruptionSignal(fixture);
       const completed = await writeNDJSONStream(res, chunks, {

From e900e5edbfe777a86ed131fdf9f1c16cc5dc8f5e Mon Sep 17 00:00:00 2001
From: Alem Tuzlak <t.zlak@hotmail.com>
Date: Mon, 6 Apr 2026 17:40:27 +0200
Subject: [PATCH 6/9] test: add unit tests for Bedrock streaming reasoning
 events

---
 src/__tests__/reasoning-all-providers.test.ts | 67 +++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts
index e321886..cc8da04 100644
--- a/src/__tests__/reasoning-all-providers.test.ts
+++ b/src/__tests__/reasoning-all-providers.test.ts
@@ -2,6 +2,7 @@ import { describe, it, expect, afterEach } from "vitest";
 import * as http from "node:http";
 import type { Fixture } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
+import { buildBedrockStreamTextEvents } from "../bedrock.js";
 
 // --- helpers ---
 
@@ -504,3 +505,69 @@ describe("POST /api/generate (reasoning streaming)", () => {
     expect(reasoningChunks).toHaveLength(0);
   });
 });
+
+// ─── Bedrock streaming reasoning: unit test ─────────────────────────────────
+
+describe("buildBedrockStreamTextEvents (reasoning)", () => {
+  it("emits thinking block events before text block events", () => {
+    const events = buildBedrockStreamTextEvents("The answer.", 100, "Step by step.");
+    const types = events.map((e) => e.eventType);
+
+    // messageStart → thinking block → text block → messageStop
+    expect(types[0]).toBe("messageStart");
+
+    // Thinking block at index 0
+    expect(events[1]).toEqual({
+      eventType: "contentBlockStart",
+      payload: { contentBlockIndex: 0, start: { type: "thinking" } },
+    });
+    expect(events[2]).toEqual({
+      eventType: "contentBlockDelta",
+      payload: {
+        contentBlockIndex: 0,
+        delta: { type: "thinking_delta", thinking: "Step by step." },
+      },
+    });
+    expect(events[3]).toEqual({
+      eventType: "contentBlockStop",
+      payload: { contentBlockIndex: 0 },
+    });
+
+    // Text block at index 1
+    expect(events[4]).toEqual({
+      eventType: "contentBlockStart",
+      payload: { contentBlockIndex: 1, start: {} },
+    });
+    expect(events[5]).toEqual({
+      eventType: "contentBlockDelta",
+      payload: {
+        contentBlockIndex: 1,
+        delta: { type: "text_delta", text: "The answer." },
+      },
+    });
+    expect(events[6]).toEqual({
+      eventType: "contentBlockStop",
+      payload: { contentBlockIndex: 1 },
+    });
+
+    expect(events[7]).toEqual({
+      eventType: "messageStop",
+      payload: { stopReason: "end_turn" },
+    });
+  });
+
+  it("no thinking block when reasoning is absent", () => {
+    const events = buildBedrockStreamTextEvents("Hello.", 100);
+    const types = events.map((e) => e.eventType);
+
+    // messageStart → text block at index 0 → messageStop
+    expect(types).toEqual([
+      "messageStart",
+      "contentBlockStart",
+      "contentBlockDelta",
+      "contentBlockStop",
+      "messageStop",
+    ]);
+    expect((events[1].payload as { contentBlockIndex: number }).contentBlockIndex).toBe(0);
+  });
+});

From ed1f973faf358fc73d17e7cb5e32d65f8198563a Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Mon, 6 Apr 2026 09:49:55 -0700
Subject: [PATCH 7/9] fix: standardize reasoning guards to truthy checks

Change `if (reasoning !== undefined)` to `if (reasoning)` in helpers.ts
and ollama.ts for consistency with the fixture-loader convention where
empty string means no reasoning.
---
 src/helpers.ts | 4 ++--
 src/ollama.ts  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/helpers.ts b/src/helpers.ts
index a94e200..490746f 100644
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -82,7 +82,7 @@ export function buildTextChunks(
   });
 
   // Reasoning chunks (emitted before content chunks)
-  if (reasoning !== undefined) {
+  if (reasoning) {
     for (let i = 0; i < reasoning.length; i += chunkSize) {
       const slice = reasoning.slice(i, i + chunkSize);
       chunks.push({
@@ -219,7 +219,7 @@ export function buildTextCompletion(
           role: "assistant",
           content,
           refusal: null,
-          ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}),
+          ...(reasoning ? { reasoning_content: reasoning } : {}),
         },
         finish_reason: "stop",
       },
diff --git a/src/ollama.ts b/src/ollama.ts
index 1b03fdb..ab4b1c7 100644
--- a/src/ollama.ts
+++ b/src/ollama.ts
@@ -167,7 +167,7 @@ function buildOllamaChatTextResponse(content: string, model: string, reasoning?:
     message: {
       role: "assistant",
       content,
-      ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}),
+      ...(reasoning ? { reasoning_content: reasoning } : {}),
     },
     done: true,
     ...DURATION_FIELDS,
@@ -312,7 +312,7 @@ function buildOllamaGenerateTextResponse(
     model,
     created_at: new Date().toISOString(),
     response: content,
-    ...(reasoning !== undefined ? { reasoning_content: reasoning } : {}),
+    ...(reasoning ? { reasoning_content: reasoning } : {}),
     done: true,
     ...DURATION_FIELDS,
     context: [],

From 38f2e6125959156071e90a1ba30075353154e5b1 Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Mon, 6 Apr 2026 09:51:23 -0700
Subject: [PATCH 8/9] test: move server lifecycle to beforeAll/afterAll in
 reasoning tests

All tests use the same fixtures, so share a single server instance
across the entire file instead of creating one per test. Update the
post() helper to use a shared baseUrl instead of accepting a full URL.
---
 src/__tests__/reasoning-all-providers.test.ts | 103 +++++++-----------
 1 file changed, 40 insertions(+), 63 deletions(-)

diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts
index cc8da04..b069a34 100644
--- a/src/__tests__/reasoning-all-providers.test.ts
+++ b/src/__tests__/reasoning-all-providers.test.ts
@@ -1,4 +1,4 @@
-import { describe, it, expect, afterEach } from "vitest";
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
 import * as http from "node:http";
 import type { Fixture } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
@@ -6,18 +6,21 @@ import { buildBedrockStreamTextEvents } from "../bedrock.js";
 
 // --- helpers ---
 
+let instance: ServerInstance;
+let baseUrl: string;
+
 function post(
-  url: string,
+  path: string,
   body: unknown,
 ): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: string }> {
   return new Promise((resolve, reject) => {
     const data = JSON.stringify(body);
-    const parsed = new URL(url);
+    const parsed = new URL(baseUrl);
     const req = http.request(
       {
         hostname: parsed.hostname,
         port: parsed.port,
-        path: parsed.pathname,
+        path,
         method: "POST",
         headers: {
           "Content-Type": "application/json",
@@ -78,25 +81,24 @@ const plainFixture: Fixture = {
 
 const allFixtures: Fixture[] = [reasoningFixture, plainFixture];
 
-// --- tests ---
+// --- server lifecycle ---
 
-let instance: ServerInstance | null = null;
+beforeAll(async () => {
+  instance = await createServer(allFixtures);
+  baseUrl = instance.url;
+});
 
-afterEach(async () => {
-  if (instance) {
-    await new Promise<void>((resolve) => {
-      instance!.server.close(() => resolve());
-    });
-    instance = null;
-  }
+afterAll(async () => {
+  await new Promise<void>((resolve) => {
+    instance.server.close(() => resolve());
+  });
 });
 
 // ─── OpenAI Chat Completions: Reasoning ─────────────────────────────────────
 
 describe("POST /v1/chat/completions (reasoning non-streaming)", () => {
   it("includes reasoning_content field on assistant message", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/v1/chat/completions`, {
+    const res = await post(`/v1/chat/completions`, {
       model: "gpt-4",
       messages: [{ role: "user", content: "think" }],
       stream: false,
@@ -111,8 +113,7 @@ describe("POST /v1/chat/completions (reasoning non-streaming)", () => {
   });
 
   it("omits reasoning_content when reasoning is absent", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/v1/chat/completions`, {
+    const res = await post(`/v1/chat/completions`, {
       model: "gpt-4",
       messages: [{ role: "user", content: "plain" }],
       stream: false,
@@ -126,8 +127,7 @@ describe("POST /v1/chat/completions (reasoning non-streaming)", () => {
 
 describe("POST /v1/chat/completions (reasoning streaming)", () => {
   it("emits reasoning_content deltas before content deltas", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/v1/chat/completions`, {
+    const res = await post(`/v1/chat/completions`, {
       model: "gpt-4",
       messages: [{ role: "user", content: "think" }],
       stream: true,
@@ -159,8 +159,7 @@ describe("POST /v1/chat/completions (reasoning streaming)", () => {
   });
 
   it("no reasoning_content deltas when reasoning is absent", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/v1/chat/completions`, {
+    const res = await post(`/v1/chat/completions`, {
       model: "gpt-4",
       messages: [{ role: "user", content: "plain" }],
       stream: true,
@@ -188,8 +187,7 @@ function parseGeminiSSEChunks(body: string): unknown[] {
 
 describe("POST /v1beta/models/{model}:generateContent (reasoning non-streaming)", () => {
   it("includes thought part before text part", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, {
+    const res = await post(`/v1beta/models/gemini-2.5-flash:generateContent`, {
       contents: [{ role: "user", parts: [{ text: "think" }] }],
     });
 
@@ -204,8 +202,7 @@ describe("POST /v1beta/models/{model}:generateContent (reasoning non-streaming)"
   });
 
   it("no thought part when reasoning is absent", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:generateContent`, {
+    const res = await post(`/v1beta/models/gemini-2.5-flash:generateContent`, {
       contents: [{ role: "user", parts: [{ text: "plain" }] }],
     });
 
@@ -219,8 +216,7 @@ describe("POST /v1beta/models/{model}:generateContent (reasoning non-streaming)"
 
 describe("POST /v1beta/models/{model}:streamGenerateContent (reasoning streaming)", () => {
   it("streams thought chunks before text chunks", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, {
+    const res = await post(`/v1beta/models/gemini-2.5-flash:streamGenerateContent`, {
       contents: [{ role: "user", parts: [{ text: "think" }] }],
     });
 
@@ -251,8 +247,7 @@ describe("POST /v1beta/models/{model}:streamGenerateContent (reasoning streaming
   });
 
   it("no thought chunks when reasoning is absent", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/v1beta/models/gemini-2.5-flash:streamGenerateContent`, {
+    const res = await post(`/v1beta/models/gemini-2.5-flash:streamGenerateContent`, {
       contents: [{ role: "user", parts: [{ text: "plain" }] }],
     });
 
@@ -271,8 +266,7 @@ describe("POST /v1beta/models/{model}:streamGenerateContent (reasoning streaming
 
 describe("POST /model/{id}/invoke (reasoning non-streaming)", () => {
   it("includes thinking content block before text block", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, {
+    const res = await post(`/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, {
       messages: [{ role: "user", content: [{ type: "text", text: "think" }] }],
       max_tokens: 1024,
       anthropic_version: "bedrock-2023-05-31",
@@ -288,8 +282,7 @@ describe("POST /model/{id}/invoke (reasoning non-streaming)", () => {
   });
 
   it("no thinking block when reasoning is absent", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, {
+    const res = await post(`/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke`, {
       messages: [{ role: "user", content: [{ type: "text", text: "plain" }] }],
       max_tokens: 1024,
       anthropic_version: "bedrock-2023-05-31",
@@ -305,13 +298,9 @@ describe("POST /model/{id}/invoke (reasoning non-streaming)", () => {
 
 describe("POST /model/{id}/converse (reasoning non-streaming)", () => {
   it("includes reasoningContent block before text block", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(
-      `${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`,
-      {
-        messages: [{ role: "user", content: [{ text: "think" }] }],
-      },
-    );
+    const res = await post(`/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`, {
+      messages: [{ role: "user", content: [{ text: "think" }] }],
+    });
 
     expect(res.status).toBe(200);
     const body = JSON.parse(res.body);
@@ -325,13 +314,9 @@ describe("POST /model/{id}/converse (reasoning non-streaming)", () => {
   });
 
   it("no reasoningContent block when reasoning is absent", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(
-      `${instance.url}/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`,
-      {
-        messages: [{ role: "user", content: [{ text: "plain" }] }],
-      },
-    );
+    const res = await post(`/model/anthropic.claude-3-sonnet-20240229-v1:0/converse`, {
+      messages: [{ role: "user", content: [{ text: "plain" }] }],
+    });
 
     const body = JSON.parse(res.body);
     const content = body.output.message.content;
@@ -351,8 +336,7 @@ function parseNDJSON(body: string): object[] {
 
 describe("POST /api/chat (reasoning non-streaming)", () => {
   it("includes reasoning_content on assistant message", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/api/chat`, {
+    const res = await post(`/api/chat`, {
       model: "deepseek-r1",
       messages: [{ role: "user", content: "think" }],
       stream: false,
@@ -365,8 +349,7 @@ describe("POST /api/chat (reasoning non-streaming)", () => {
   });
 
   it("omits reasoning_content when reasoning is absent", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/api/chat`, {
+    const res = await post(`/api/chat`, {
       model: "deepseek-r1",
       messages: [{ role: "user", content: "plain" }],
       stream: false,
@@ -380,8 +363,7 @@ describe("POST /api/chat (reasoning non-streaming)", () => {
 
 describe("POST /api/chat (reasoning streaming)", () => {
   it("streams reasoning_content chunks before content chunks", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/api/chat`, {
+    const res = await post(`/api/chat`, {
       model: "deepseek-r1",
       messages: [{ role: "user", content: "think" }],
       stream: true,
@@ -409,8 +391,7 @@ describe("POST /api/chat (reasoning streaming)", () => {
   });
 
   it("no reasoning_content chunks when reasoning is absent", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/api/chat`, {
+    const res = await post(`/api/chat`, {
       model: "deepseek-r1",
       messages: [{ role: "user", content: "plain" }],
       stream: true,
@@ -432,8 +413,7 @@ describe("POST /api/chat (reasoning streaming)", () => {
 
 describe("POST /api/generate (reasoning non-streaming)", () => {
   it("includes reasoning_content field", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/api/generate`, {
+    const res = await post(`/api/generate`, {
       model: "deepseek-r1",
       prompt: "think",
       stream: false,
@@ -446,8 +426,7 @@ describe("POST /api/generate (reasoning non-streaming)", () => {
   });
 
   it("omits reasoning_content when reasoning is absent", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/api/generate`, {
+    const res = await post(`/api/generate`, {
       model: "deepseek-r1",
       prompt: "plain",
       stream: false,
@@ -461,8 +440,7 @@ describe("POST /api/generate (reasoning non-streaming)", () => {
 
 describe("POST /api/generate (reasoning streaming)", () => {
   it("streams reasoning_content chunks before response chunks", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/api/generate`, {
+    const res = await post(`/api/generate`, {
       model: "deepseek-r1",
       prompt: "think",
       stream: true,
@@ -489,8 +467,7 @@ describe("POST /api/generate (reasoning streaming)", () => {
   });
 
   it("no reasoning_content chunks when reasoning is absent", async () => {
-    instance = await createServer(allFixtures);
-    const res = await post(`${instance.url}/api/generate`, {
+    const res = await post(`/api/generate`, {
       model: "deepseek-r1",
       prompt: "plain",
       stream: true,

From a482f2db7e031314705828179e127a9e26e62b7f Mon Sep 17 00:00:00 2001
From: Jordan Ritter <jpr5@darkridge.com>
Date: Mon, 6 Apr 2026 09:52:54 -0700
Subject: [PATCH 9/9] test: add Bedrock streaming integration tests for
 reasoning

Add integration tests for invoke-with-response-stream and converse-stream
endpoints that verify thinking block events appear before text content
events when reasoning is present. Includes a binary event stream frame
decoder for parsing the AWS Event Stream wire format in tests.
---
 src/__tests__/reasoning-all-providers.test.ts | 236 ++++++++++++++++++
 1 file changed, 236 insertions(+)

diff --git a/src/__tests__/reasoning-all-providers.test.ts b/src/__tests__/reasoning-all-providers.test.ts
index b069a34..42657c2 100644
--- a/src/__tests__/reasoning-all-providers.test.ts
+++ b/src/__tests__/reasoning-all-providers.test.ts
@@ -1,5 +1,6 @@
 import { describe, it, expect, beforeAll, afterAll } from "vitest";
 import * as http from "node:http";
+import { crc32 } from "node:zlib";
 import type { Fixture } from "../types.js";
 import { createServer, type ServerInstance } from "../server.js";
 import { buildBedrockStreamTextEvents } from "../bedrock.js";
@@ -45,6 +46,98 @@ function post(
   });
 }
 
+function postRaw(
+  path: string,
+  body: unknown,
+): Promise<{ status: number; headers: http.IncomingHttpHeaders; body: Buffer }> {
+  return new Promise((resolve, reject) => {
+    const data = JSON.stringify(body);
+    const parsed = new URL(baseUrl);
+    const req = http.request(
+      {
+        hostname: parsed.hostname,
+        port: parsed.port,
+        path,
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "Content-Length": Buffer.byteLength(data),
+        },
+      },
+      (res) => {
+        const chunks: Buffer[] = [];
+        res.on("data", (c: Buffer) => chunks.push(c));
+        res.on("end", () => {
+          resolve({
+            status: res.statusCode ?? 0,
+            headers: res.headers,
+            body: Buffer.concat(chunks),
+          });
+        });
+      },
+    );
+    req.on("error", reject);
+    req.write(data);
+    req.end();
+  });
+}
+
+/**
+ * Decode AWS Event Stream binary frames from a Buffer.
+ * Returns an array of { eventType, payload } objects.
+ */
+function decodeEventStreamFrames(buf: Buffer): Array<{ eventType: string; payload: object }> {
+  const frames: Array<{ eventType: string; payload: object }> = [];
+  let offset = 0;
+
+  while (offset < buf.length) {
+    if (offset + 12 > buf.length) break;
+
+    const totalLength = buf.readUInt32BE(offset);
+    const headersLength = buf.readUInt32BE(offset + 4);
+    const preludeCrc = buf.readUInt32BE(offset + 8);
+
+    // Verify prelude CRC
+    const computedPreludeCrc = crc32(buf.subarray(offset, offset + 8));
+    if (computedPreludeCrc >>> 0 !== preludeCrc) {
+      throw new Error("Prelude CRC mismatch");
+    }
+
+    // Parse headers
+    const headersStart = offset + 12;
+    const headersEnd = headersStart + headersLength;
+    const headers: Record<string, string> = {};
+    let hOff = headersStart;
+    while (hOff < headersEnd) {
+      const nameLen = buf.readUInt8(hOff);
+      hOff += 1;
+      const name = buf.subarray(hOff, hOff + nameLen).toString("utf8");
+      hOff += nameLen;
+      hOff += 1; // skip header type byte (7 = STRING)
+      const valueLen = buf.readUInt16BE(hOff);
+      hOff += 2;
+      const value = buf.subarray(hOff, hOff + valueLen).toString("utf8");
+      hOff += valueLen;
+      headers[name] = value;
+    }
+
+    // Parse payload
+    const payloadStart = headersEnd;
+    const payloadEnd = offset + totalLength - 4; // minus message CRC
+    const payloadBuf = buf.subarray(payloadStart, payloadEnd);
+    const payload = payloadBuf.length > 0 ? JSON.parse(payloadBuf.toString("utf8")) : {};
+
+    frames.push({
+      eventType: headers[":event-type"] ?? "",
+      payload,
+    });
+
+    offset += totalLength;
+  }
+
+  return frames;
+}
+
 interface SSEEvent {
   type?: string;
   choices?: {
@@ -325,6 +418,149 @@ describe("POST /model/{id}/converse (reasoning non-streaming)", () => {
   });
 });
 
+// ─── Bedrock InvokeModel Streaming: Reasoning ─────────────────────────────────
+
+describe("POST /model/{id}/invoke-with-response-stream (reasoning streaming)", () => {
+  it("emits thinking block events before text content events", async () => {
+    const res = await postRaw(
+      `/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream`,
+      {
+        messages: [{ role: "user", content: [{ type: "text", text: "think" }] }],
+        max_tokens: 1024,
+        anthropic_version: "bedrock-2023-05-31",
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const frames = decodeEventStreamFrames(res.body);
+    const eventTypes = frames.map((f) => f.eventType);
+
+    // Should start with messageStart
+    expect(eventTypes[0]).toBe("messageStart");
+
+    // Find thinking and text block starts
+    const thinkingStartIdx = frames.findIndex(
+      (f) =>
+        f.eventType === "contentBlockStart" &&
+        (f.payload as { start?: { type?: string } }).start?.type === "thinking",
+    );
+    const textStartIdx = frames.findIndex(
+      (f) =>
+        f.eventType === "contentBlockStart" &&
+        (f.payload as { start?: { type?: string } }).start?.type === undefined,
+    );
+
+    expect(thinkingStartIdx).toBeGreaterThan(0);
+    expect(textStartIdx).toBeGreaterThan(thinkingStartIdx);
+
+    // Verify thinking content
+    const thinkingDeltas = frames.filter(
+      (f) =>
+        f.eventType === "contentBlockDelta" &&
+        (f.payload as { delta?: { type?: string } }).delta?.type === "thinking_delta",
+    );
+    const fullThinking = thinkingDeltas
+      .map((f) => (f.payload as { delta: { thinking: string } }).delta.thinking)
+      .join("");
+    expect(fullThinking).toBe("Let me think step by step about this problem.");
+
+    // Verify text content
+    const textDeltas = frames.filter(
+      (f) =>
+        f.eventType === "contentBlockDelta" &&
+        (f.payload as { delta?: { type?: string } }).delta?.type === "text_delta",
+    );
+    const fullText = textDeltas
+      .map((f) => (f.payload as { delta: { text: string } }).delta.text)
+      .join("");
+    expect(fullText).toBe("The answer is 42.");
+
+    // Should end with messageStop
+    expect(eventTypes[eventTypes.length - 1]).toBe("messageStop");
+  });
+
+  it("no thinking block when reasoning is absent", async () => {
+    const res = await postRaw(
+      `/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke-with-response-stream`,
+      {
+        messages: [{ role: "user", content: [{ type: "text", text: "plain" }] }],
+        max_tokens: 1024,
+        anthropic_version: "bedrock-2023-05-31",
+      },
+    );
+
+    expect(res.status).toBe(200);
+    const frames = decodeEventStreamFrames(res.body);
+
+    const thinkingDeltas = frames.filter(
+      (f) =>
+        f.eventType === "contentBlockDelta" &&
+        (f.payload as { delta?: { type?: string } }).delta?.type === "thinking_delta",
+    );
+    expect(thinkingDeltas).toHaveLength(0);
+  });
+});
+
+// ─── Bedrock Converse Streaming: Reasoning ────────────────────────────────────
+
+describe("POST /model/{id}/converse-stream (reasoning streaming)", () => {
+  it("emits thinking block events before text content events", async () => {
+    const res = await postRaw(`/model/anthropic.claude-3-sonnet-20240229-v1:0/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "think" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = decodeEventStreamFrames(res.body);
+    const eventTypes = frames.map((f) => f.eventType);
+
+    expect(eventTypes[0]).toBe("messageStart");
+
+    // Find thinking and text block starts
+    const thinkingStartIdx = frames.findIndex(
+      (f) =>
+        f.eventType === "contentBlockStart" &&
+        (f.payload as { start?: { type?: string } }).start?.type === "thinking",
+    );
+    const textStartIdx = frames.findIndex(
+      (f) =>
+        f.eventType === "contentBlockStart" &&
+        (f.payload as { start?: { type?: string } }).start?.type === undefined,
+    );
+
+    expect(thinkingStartIdx).toBeGreaterThan(0);
+    expect(textStartIdx).toBeGreaterThan(thinkingStartIdx);
+
+    // Verify reasoning content appears in the stream
+    const thinkingDeltas = frames.filter(
+      (f) =>
+        f.eventType === "contentBlockDelta" &&
+        (f.payload as { delta?: { type?: string } }).delta?.type === "thinking_delta",
+    );
+    const fullThinking = thinkingDeltas
+      .map((f) => (f.payload as { delta: { thinking: string } }).delta.thinking)
+      .join("");
+    expect(fullThinking).toBe("Let me think step by step about this problem.");
+
+    expect(eventTypes[eventTypes.length - 1]).toBe("messageStop");
+  });
+
+  it("no thinking block when reasoning is absent", async () => {
+    const res = await postRaw(`/model/anthropic.claude-3-sonnet-20240229-v1:0/converse-stream`, {
+      messages: [{ role: "user", content: [{ text: "plain" }] }],
+    });
+
+    expect(res.status).toBe(200);
+    const frames = decodeEventStreamFrames(res.body);
+
+    const thinkingDeltas = frames.filter(
+      (f) =>
+        f.eventType === "contentBlockDelta" &&
+        (f.payload as { delta?: { type?: string } }).delta?.type === "thinking_delta",
+    );
+    expect(thinkingDeltas).toHaveLength(0);
+  });
+});
+
 // ─── Ollama /api/chat: Reasoning ────────────────────────────────────────────
 
 function parseNDJSON(body: string): object[] {