From 022dec5db2ff65c1ed30bbb1425365b5874cd990 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Sat, 28 Feb 2026 21:37:26 +0000
Subject: [PATCH 1/9] fix(llm): dedupe assistant response history

---
 .../__tests__/load.reducer.dedupe.test.ts     | 165 ++++++++++++++++++
 .../src/llm/reducers/load.llm.reducer.ts      |  48 ++++-
 2 files changed, 211 insertions(+), 2 deletions(-)
 create mode 100644 packages/platform-server/__tests__/load.reducer.dedupe.test.ts
diff --git a/packages/platform-server/__tests__/load.reducer.dedupe.test.ts b/packages/platform-server/__tests__/load.reducer.dedupe.test.ts
new file mode 100644
index 000000000..d35ac0a33
--- /dev/null
+++ b/packages/platform-server/__tests__/load.reducer.dedupe.test.ts
@@ -0,0 +1,165 @@
+import { afterEach, describe, expect, it, vi } from 'vitest';
+import { LoadLLMReducer } from '../src/llm/reducers/load.llm.reducer';
+import { CallModelLLMReducer } from '../src/llm/reducers/callModel.llm.reducer';
+import { ConversationStateRepository } from '../src/llm/repositories/conversationState.repository';
+import { Signal } from '../src/signal';
+import type { LLMContext, LLMState } from '../src/llm/types';
+import type { PrismaService } from '../src/core/services/prisma.service';
+import type { RunEventsService } from '../src/events/run-events.service';
+import type { EventsBusService } from '../src/events/events-bus.service';
+import { AIMessage, ResponseMessage, ToolCallMessage, type LLM } from '@agyn/llm';
+
+const prismaServiceStub = {
+  getClient: () => ({}),
+} as unknown as PrismaService;
+
+const THREAD_ID = 'thread-merge';
+const NODE_ID = 'agent';
+
+function baseContext(): LLMContext {
+  const response = ResponseMessage.fromText('noop');
+  return {
+    threadId: THREAD_ID,
+    runId: 'run-1',
+    finishSignal: new Signal(),
+    terminateSignal: new Signal(),
+    callerAgent: {
+      getAgentNodeId: () => NODE_ID,
+      invoke: async () => response,
+    },
+  };
+}
+
+function deepClone<T>(value: T): T {
+  return JSON.parse(JSON.stringify(value));
+}
+
+function setupPersistedState(
+  reducer: LoadLLMReducer,
+  persisted: LLMState,
+  ctx: LLMContext,
+): void {
+  vi.spyOn(ConversationStateRepository.prototype, 'get').mockResolvedValue({
+    threadId: ctx.threadId,
+    nodeId: NODE_ID,
+    state: reducer['serializeState'](persisted),
+  });
+}
+
+function callReducerWithMocks(llmCallMock: ReturnType<typeof vi.fn>): CallModelLLMReducer {
+  const runEventsStub = {
+    startLLMCall: vi.fn(async () => ({ id: 'llm-event-id' })),
+    completeLLMCall: vi.fn(async () => {}),
+    createContextItems: vi.fn(async () => ['ctx-assistant']),
+    publishEvent: vi.fn(async () => null),
+  } as unknown as RunEventsService;
+
+  const eventsBusStub = {
+    publishEvent: vi.fn(async () => null),
+  } as unknown as EventsBusService;
+
+  const reducer = new CallModelLLMReducer(runEventsStub, eventsBusStub);
+  reducer.init({
+    llm: { call: llmCallMock } as unknown as LLM,
+    model: 'gpt-test',
+    systemPrompt: 'system prompt',
+    tools: [],
+  });
+  return reducer;
+}
+
+afterEach(() => {
+  vi.restoreAllMocks();
+});
+
+describe('LoadLLMReducer response deduplication', () => {
+  it('dedupes identical assistant responses before CallModelLLMReducer', async () => {
+    const reducer = new LoadLLMReducer(prismaServiceStub);
+    const ctx = baseContext();
+
+    const baseOutput = [AIMessage.fromText('assistant reply').toPlain()];
+
+    const persistedMessage = new ResponseMessage({ output: deepClone(baseOutput) });
+    const incomingMessage = new ResponseMessage({ output: deepClone(baseOutput) });
+
+    const persistedState: LLMState = { messages: [persistedMessage], context: { messageIds: [], memory: [] } };
+    const incomingState: LLMState = { messages: [incomingMessage], context: { messageIds: [], memory: [] } };
+
+    setupPersistedState(reducer, persistedState, ctx);
+
+    const merged = await reducer.invoke(incomingState, ctx);
+
+    expect(merged.messages).toHaveLength(1);
+    const responseMessages = merged.messages.filter((msg): msg is ResponseMessage => msg instanceof ResponseMessage);
+    expect(responseMessages).toHaveLength(1);
+    expect(responseMessages[0].toPlain()).toEqual(persistedMessage.toPlain());
+
+    const llmCallMock = vi.fn(async () => ResponseMessage.fromText('ok'));
+    const callReducer = callReducerWithMocks(llmCallMock);
+
+    await callReducer.invoke(merged, ctx);
+
+    expect(llmCallMock).toHaveBeenCalledTimes(1);
+    const callArgs = llmCallMock.mock.calls[0][0];
+    const inputResponses = callArgs.input.filter((msg: unknown): msg is ResponseMessage => msg instanceof ResponseMessage);
+    expect(inputResponses).toHaveLength(1);
+    expect(inputResponses[0].toPlain()).toEqual(persistedMessage.toPlain());
+  });
+
+  it('dedupes assistant responses containing tool calls and empty text', async () => {
+    const reducer = new LoadLLMReducer(prismaServiceStub);
+    const ctx = baseContext();
+
+    const toolCallPlain = {
+      id: 'call-1',
+      type: 'function_call',
+      call_id: 'call-1',
+      name: 'lookup_user',
+      arguments: '{"id":42}',
+      status: 'completed',
+    } satisfies ReturnType<ToolCallMessage['toPlain']>;
+
+    const emptyTextMessage = {
+      id: 'msg-tool',
+      type: 'message',
+      role: 'assistant',
+      status: 'completed',
+      content: [
+        {
+          type: 'output_text',
+          text: '',
+          annotations: [],
+        },
+      ],
+    } satisfies ReturnType<AIMessage['toPlain']>;
+
+    const baseOutput = [emptyTextMessage, toolCallPlain];
+
+    const persistedMessage = new ResponseMessage({ output: deepClone(baseOutput) });
+    const incomingMessage = new ResponseMessage({ output: deepClone(baseOutput) });
+
+    const persistedState: LLMState = { messages: [persistedMessage], context: { messageIds: [], memory: [] } };
+    const incomingState: LLMState = { messages: [incomingMessage], context: { messageIds: [], memory: [] } };
+
+    setupPersistedState(reducer, persistedState, ctx);
+
+    const merged = await reducer.invoke(incomingState, ctx);
+
+    const responseMessages = merged.messages.filter((msg): msg is ResponseMessage => msg instanceof ResponseMessage);
+    expect(responseMessages).toHaveLength(1);
+
+    const llmCallMock = vi.fn(async () => ResponseMessage.fromText('ok'));
+    const callReducer = callReducerWithMocks(llmCallMock);
+
+    await callReducer.invoke(merged, ctx);
+
+    expect(llmCallMock).toHaveBeenCalledTimes(1);
+    const callArgs = llmCallMock.mock.calls[0][0];
+    const inputResponses = callArgs.input.filter((msg: unknown): msg is ResponseMessage => msg instanceof ResponseMessage);
+    expect(inputResponses).toHaveLength(1);
+    const toolCalls = inputResponses[0].output.filter((msg): msg is ToolCallMessage => msg instanceof ToolCallMessage);
+    expect(toolCalls).toHaveLength(1);
+    expect(toolCalls[0].callId).toBe('call-1');
+  });
+});
+
diff --git a/packages/platform-server/src/llm/reducers/load.llm.reducer.ts b/packages/platform-server/src/llm/reducers/load.llm.reducer.ts
index 298f727b5..cccaf3594 100644
--- a/packages/platform-server/src/llm/reducers/load.llm.reducer.ts
+++ b/packages/platform-server/src/llm/reducers/load.llm.reducer.ts
@@ -1,7 +1,8 @@
 import { Inject, Injectable, Logger } from '@nestjs/common';
 import { PrismaService } from '../../core/services/prisma.service';
 import { ConversationStateRepository } from '../repositories/conversationState.repository';
-import type { LLMContext, LLMContextState, LLMState } from '../types';
+import type { LLMContext, LLMContextState, LLMMessage, LLMState } from '../types';
+import { ResponseMessage } from '@agyn/llm';
 
 import { PersistenceBaseLLMReducer } from './persistenceBase.llm.reducer';
 
@@ -39,9 +40,11 @@ export class LoadLLMReducer extends PersistenceBaseLLMReducer {
         system: persistedContext.system ?? incomingContext.system,
       };
 
+      const mergedMessages = this.mergeResponseMessages(persisted.messages, state.messages);
+
       const merged: LLMState = {
         summary: persisted.summary ?? state.summary,
-        messages: [...persisted.messages, ...state.messages],
+        messages: mergedMessages,
         context: mergedContext,
         meta: state.meta,
       };
@@ -56,6 +59,47 @@ export class LoadLLMReducer extends PersistenceBaseLLMReducer {
     }
   }
 
+  private mergeResponseMessages(persisted: LLMMessage[], incoming: LLMMessage[]): LLMMessage[] {
+    if (persisted.length === 0) return [...incoming];
+    if (incoming.length === 0) return [...persisted];
+
+    const merged: LLMMessage[] = [];
+    const seen = new Set<string>();
+    const combined = [...persisted, ...incoming];
+
+    for (const message of combined) {
+      if (message instanceof ResponseMessage) {
+        const key = this.canonicalJSONStringify(message.toPlain());
+        if (seen.has(key)) {
+          continue;
+        }
+        seen.add(key);
+      }
+      merged.push(message);
+    }
+
+    return merged;
+  }
+
+  private canonicalJSONStringify(value: unknown): string {
+    return JSON.stringify(this.canonicalize(value));
+  }
+
+  private canonicalize(value: unknown): unknown {
+    if (Array.isArray(value)) {
+      return value.map((entry) => this.canonicalize(entry));
+    }
+    if (value && typeof value === 'object') {
+      const entries = Object.entries(value as Record<string, unknown>).sort(([a], [b]) => a.localeCompare(b));
+      const normalized: Record<string, unknown> = {};
+      for (const [key, entry] of entries) {
+        normalized[key] = this.canonicalize(entry);
+      }
+      return normalized;
+    }
+    return value;
+  }
+
   private ensureContext(context: LLMContextState | undefined): LLMContextState {
     if (!context) return { messageIds: [], memory: [], pendingNewContextItemIds: [] };
     return {

From 0bf23372f4ae7aa4e32b5a17fb7ab0fbbc6f9337 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Sat, 28 Feb 2026 22:43:17 +0000
Subject: [PATCH 2/9] test(platform-server): reproduce second llm input

---
 .../llm.second_call.input.duplication.test.ts | 219 ++++++++++++++++++
 1 file changed, 219 insertions(+)
 create mode 100644 packages/platform-server/__tests__/llm.second_call.input.duplication.test.ts

diff --git a/packages/platform-server/__tests__/llm.second_call.input.duplication.test.ts b/packages/platform-server/__tests__/llm.second_call.input.duplication.test.ts
new file mode 100644
index 000000000..302f58caa
--- /dev/null
+++ b/packages/platform-server/__tests__/llm.second_call.input.duplication.test.ts
@@ -0,0 +1,219 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { Test } from '@nestjs/testing';
+
+import { AgentNode } from '../src/nodes/agent/agent.node';
+import { ConfigService } from '../src/core/services/config.service';
+import { LLMProvisioner } from '../src/llm/provisioners/llm.provisioner';
+import type { LLM } from '@agyn/llm';
+import {
+  AIMessage,
+  HumanMessage,
+  ResponseMessage,
+  SystemMessage,
+  ToolCallMessage,
+  ToolCallOutputMessage,
+} from '@agyn/llm';
+import { PrismaService } from '../src/core/services/prisma.service';
+import { RunEventsService } from '../src/events/run-events.service';
+import { EventsBusService } from '../src/events/events-bus.service';
+import { RunSignalsRegistry } from '../src/agents/run-signals.service';
+import { AgentsPersistenceService } from '../src/agents/agents.persistence.service';
+import { createEventsBusStub, createRunEventsStub } from './helpers/runEvents.stub';
+
+vi.mock('@agyn/docker-runner', () => ({}));
+
+class FakeLLM implements Pick<LLM, 'call'> {
+  public readonly calls: Array<{ model: string; input: Parameters<LLM['call']>[0]['input']; tools?: unknown[] }> = [];
+
+  async call(params: Parameters<LLM['call']>[0]): Promise<ResponseMessage> {
+    this.calls.push({ model: params.model, input: params.input, tools: params.tools });
+    const order = this.calls.length;
+    if (order === 1) {
+      return this.toolCallResponse();
+    }
+    if (order === 2) {
+      return ResponseMessage.fromText('final');
+    }
+    return ResponseMessage.fromText(`extra-${order}`);
+  }
+
+  private toolCallResponse(): ResponseMessage {
+    const toolCallPlain = {
+      id: 'call-1',
+      type: 'function_call',
+      call_id: 'call-1',
+      name: 'demo',
+      arguments: '{}',
+      status: 'completed',
+    } as ReturnType<ToolCallMessage['toPlain']> & { status: string };
+
+    const emptyAssistantPlain = {
+      id: 'msg-1',
+      type: 'message',
+      role: 'assistant',
+      status: 'completed',
+      content: [
+        {
+          type: 'output_text',
+          text: '',
+          annotations: [],
+        },
+      ],
+    } as ReturnType<AIMessage['toPlain']> & { status: string };
+
+    return new ResponseMessage({ output: [emptyAssistantPlain, toolCallPlain] as any });
+  }
+}
+
+class SilentLLM implements Pick<LLM, 'call'> {
+  async call(): Promise<ResponseMessage> {
+    throw new Error('Summarization LLM should not be invoked in this test');
+  }
+}
+
+class FakeProvisioner extends LLMProvisioner {
+  private callIndex = 0;
+
+  constructor(private readonly callModelLLM: FakeLLM, private readonly summarizationLLM: SilentLLM) {
+    super();
+  }
+
+  async init(): Promise<void> {}
+
+  async getLLM(): Promise<LLM> {
+    this.callIndex += 1;
+    if (this.callIndex === 1) {
+      return this.callModelLLM as unknown as LLM;
+    }
+    return this.summarizationLLM as unknown as LLM;
+  }
+
+  async teardown(): Promise<void> {}
+}
+
+describe('AgentNode second LLM call input', () => {
+  const baseConfig: Partial<ConfigService> = {
+    llmProvider: 'fake',
+  };
+
+  let moduleRef: Awaited<ReturnType<typeof Test.createTestingModule>>;
+  let agent: AgentNode;
+  let fakeLLM: FakeLLM;
+  let capturedSecondCallInput: Parameters<LLM['call']>[0]['input'] | null;
+
+  const conversationState = new Map<string, unknown>();
+
+  beforeEach(async () => {
+    fakeLLM = new FakeLLM();
+    const summaryLLM = new SilentLLM();
+    const provisioner = new FakeProvisioner(fakeLLM, summaryLLM);
+    const runEvents = createRunEventsStub();
+    const eventsBus = createEventsBusStub();
+    capturedSecondCallInput = null;
+
+    const prismaClient = {
+      conversationState: {
+        findUnique: async ({ where }: { where: { threadId_nodeId: { threadId: string; nodeId: string } } }) => {
+          const { threadId, nodeId } = where.threadId_nodeId;
+          const key = `${threadId}::${nodeId}`;
+          if (!conversationState.has(key)) return null;
+          return { threadId, nodeId, state: conversationState.get(key) };
+        },
+        upsert: async ({
+          where,
+          create,
+          update,
+        }: {
+          where: { threadId_nodeId: { threadId: string; nodeId: string } };
+          create: { threadId: string; nodeId: string; state: unknown };
+          update: { state: unknown };
+        }) => {
+          const { threadId, nodeId } = where.threadId_nodeId;
+          const key = `${threadId}::${nodeId}`;
+          const payload = conversationState.has(key) ? update.state : create.state;
+          conversationState.set(key, payload);
+          return { threadId, nodeId, state: payload };
+        },
+      },
+    };
+
+    let runCounter = 0;
+    const threadModels = new Map<string, string>();
+
+    moduleRef = await Test.createTestingModule({
+      providers: [
+        AgentNode,
+        RunSignalsRegistry,
+        { provide: ConfigService, useValue: baseConfig },
+        { provide: LLMProvisioner, useValue: provisioner },
+        {
+          provide: PrismaService,
+          useValue: {
+            getClient: () => prismaClient,
+          },
+        },
+        { provide: RunEventsService, useValue: runEvents },
+        { provide: EventsBusService, useValue: eventsBus },
+        {
+          provide: AgentsPersistenceService,
+          useValue: {
+            beginRunThread: async () => ({ runId: `run-${++runCounter}` }),
+            completeRun: async () => {},
+            recordInjected: async () => ({ messageIds: [] }),
+            ensureThreadModel: async (threadId: string, model: string) => {
+              if (!threadModels.has(threadId)) {
+                threadModels.set(threadId, model);
+                return model;
+              }
+              return threadModels.get(threadId) ?? model;
+            },
+          },
+        },
+      ],
+    }).compile();
+
+    agent = await moduleRef.resolve(AgentNode);
+    agent.init({ nodeId: 'agent-node' });
+    await agent.setConfig({
+      debounceMs: 0,
+      sendFinalResponseToThread: false,
+      summarizationKeepTokens: 0,
+      summarizationMaxTokens: 1024,
+    });
+  });
+
+  afterEach(async () => {
+    await moduleRef?.close();
+    conversationState.clear();
+  });
+
+  it.fails('captures duplicate assistant entries in second model call input', async () => {
+    const result = await agent.invoke('thread-dup', [HumanMessage.fromText('start')]);
+    expect(result).toBeInstanceOf(ResponseMessage);
+
+    expect(fakeLLM.calls.length).toBeGreaterThanOrEqual(2);
+    capturedSecondCallInput = fakeLLM.calls[1]?.input ?? null;
+    expect(capturedSecondCallInput).not.toBeNull();
+
+    const secondInput = capturedSecondCallInput ?? [];
+    const responseMessages = secondInput.filter((msg): msg is ResponseMessage => msg instanceof ResponseMessage);
+    const toolOutputs = secondInput.filter((msg): msg is ToolCallOutputMessage => msg instanceof ToolCallOutputMessage);
+    const systemMessages = secondInput.filter((msg): msg is SystemMessage => msg instanceof SystemMessage);
+    const humanMessages = secondInput.filter((msg): msg is HumanMessage => msg instanceof HumanMessage);
+
+    const summary = {
+      order: secondInput.map((msg) => msg.constructor.name),
+      counts: {
+        system: systemMessages.length,
+        human: humanMessages.length,
+        response: responseMessages.length,
+        toolCallOutput: toolOutputs.length,
+      },
+      responses: responseMessages.map((msg) => msg.toPlain()),
+    };
+
+    console.info('Second LLM call input summary:', JSON.stringify(summary, null, 2));
+
+    expect(responseMessages.length).toBe(2);
+  });
+});

From 68a6ddd70efeed69bdd2d78daa6bbfceeca81d60 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Sat, 28 Feb 2026 23:12:36 +0000
Subject: [PATCH 3/9] test(platform-server): add agent full-flow duplication
 tests

---
 .../llm.full_flow.duplication.test.ts         | 342 ++++++++++++++++++
 1 file changed, 342 insertions(+)
 create mode 100644 packages/platform-server/__tests__/llm.full_flow.duplication.test.ts

diff --git a/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
new file mode 100644
index 000000000..6c0bf6fbd
--- /dev/null
+++ b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
@@ -0,0 +1,342 @@
+import 'reflect-metadata';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { Test } from '@nestjs/testing';
+import z from 'zod';
+
+import { AgentNode } from '../src/nodes/agent/agent.node';
+import { ConfigService } from '../src/core/services/config.service';
+import { registerTestConfig, clearTestConfig, runnerConfigDefaults } from './helpers/config';
+import { LLMProvisioner } from '../src/llm/provisioners/llm.provisioner';
+import { RunSignalsRegistry } from '../src/agents/run-signals.service';
+import { AgentsPersistenceService } from '../src/agents/agents.persistence.service';
+import { PrismaService } from '../src/core/services/prisma.service';
+import { RunEventsService } from '../src/events/run-events.service';
+import { EventsBusService } from '../src/events/events-bus.service';
+import { createRunEventsStub, createEventsBusStub } from './helpers/runEvents.stub';
+import { BaseToolNode } from '../src/nodes/tools/baseToolNode';
+
+import type { LLM } from '@agyn/llm';
+import {
+  AIMessage,
+  FunctionTool,
+  HumanMessage,
+  ResponseMessage,
+  SystemMessage,
+  ToolCallMessage,
+  ToolCallOutputMessage,
+} from '@agyn/llm';
+
+vi.mock('@agyn/docker-runner', () => ({}));
+
+type ScriptStep =
+  | { kind: 'tool_call'; callId: string; name: string; args?: string }
+  | { kind: 'text'; text: string };
+
+class ScriptableLLM implements Pick<LLM, 'call'> {
+  readonly inputs: Array<{ raw: Parameters<LLM['call']>[0]['input']; flat: unknown[] }> = [];
+  private script: ScriptStep[] = [];
+  private pointer = 0;
+
+  setScript(steps: ScriptStep[]): void {
+    this.script = [...steps];
+    this.pointer = 0;
+    this.inputs.length = 0;
+  }
+
+  async call(params: Parameters<LLM['call']>[0]): Promise<ResponseMessage> {
+    const flat = params.input
+      .map((msg) => {
+        if (msg instanceof ResponseMessage) {
+          return msg.output.map((entry) => entry.toPlain());
+        }
+        return msg.toPlain();
+      })
+      .flat();
+
+    this.inputs.push({ raw: params.input, flat });
+
+    const step = this.script[this.pointer];
+    this.pointer += 1;
+    if (!step) {
+      throw new Error('ScriptableLLM received more calls than scripted');
+    }
+
+    if (step.kind === 'tool_call') {
+      const toolCall = new ToolCallMessage({
+        type: 'function_call',
+        call_id: step.callId,
+        name: step.name,
+        arguments: step.args ?? '{}',
+      } as any);
+      return new ResponseMessage({ output: [toolCall.toPlain()] as any });
+    }
+
+    return ResponseMessage.fromText(step.text);
+  }
+}
+
+class SilentLLM implements Pick<LLM, 'call'> {
+  async call(): Promise<ResponseMessage> {
+    throw new Error('Summarization LLM should not be invoked in these tests');
+  }
+}
+
+class FakeProvisioner extends LLMProvisioner {
+  private pendingCallModelLLM: ScriptableLLM | null = null;
+
+  constructor(private readonly summarizationLLM: SilentLLM) {
+    super();
+  }
+
+  setNextCallModelLLM(llm: ScriptableLLM): void {
+    this.pendingCallModelLLM = llm;
+  }
+
+  async init(): Promise<void> {}
+
+  async getLLM(): Promise<LLM> {
+    if (this.pendingCallModelLLM) {
+      const llm = this.pendingCallModelLLM;
+      this.pendingCallModelLLM = null;
+      return llm as unknown as LLM;
+    }
+    return this.summarizationLLM as unknown as LLM;
+  }
+
+  async teardown(): Promise<void> {}
+}
+
+const TOOL_SCHEMA = z.object({});
+
+class DemoFunctionTool extends FunctionTool<typeof TOOL_SCHEMA> {
+  constructor(private readonly toolName: string) {
+    super();
+  }
+
+  get name(): string {
+    return this.toolName;
+  }
+
+  get description(): string {
+    return `${this.toolName} integration tool`;
+  }
+
+  get schema(): typeof TOOL_SCHEMA {
+    return TOOL_SCHEMA;
+  }
+
+  async execute(): Promise<string> {
+    return 'ok';
+  }
+}
+
+class DemoToolNode extends BaseToolNode<unknown> {
+  constructor(private readonly tool: FunctionTool) {
+    super();
+    this.init({ nodeId: 'tool-demo' });
+  }
+
+  getTool(): FunctionTool {
+    return this.tool;
+  }
+
+  getPortConfig() {
+    return { sourcePorts: {}, targetPorts: {} };
+  }
+}
+
+type AgentFixture = {
+  agent: AgentNode;
+  moduleRef: Awaited<ReturnType<typeof Test.createTestingModule>>;
+  provisioner: FakeProvisioner;
+  conversationState: Map<string, unknown>;
+  registerCallModelLLM: (llm: ScriptableLLM) => void;
+};
+
+const createAgentFixture = async (): Promise<AgentFixture> => {
+  const config = registerTestConfig({
+    llmProvider: 'litellm',
+    litellmBaseUrl: 'http://127.0.0.1:4000',
+    litellmMasterKey: 'sk-test-master',
+    ...runnerConfigDefaults,
+  });
+
+  const runEvents = createRunEventsStub();
+  const eventsBus = createEventsBusStub();
+  const summarizationLLM = new SilentLLM();
+  const provisioner = new FakeProvisioner(summarizationLLM);
+
+  const conversationState = new Map<string, unknown>();
+
+  const prismaClient = {
+    conversationState: {
+      findUnique: async ({ where }: { where: { threadId_nodeId: { threadId: string; nodeId: string } } }) => {
+        const { threadId, nodeId } = where.threadId_nodeId;
+        const key = `${threadId}::${nodeId}`;
+        if (!conversationState.has(key)) return null;
+        return { threadId, nodeId, state: conversationState.get(key) };
+      },
+      upsert: async ({
+        where,
+        create,
+        update,
+      }: {
+        where: { threadId_nodeId: { threadId: string; nodeId: string } };
+        create: { threadId: string; nodeId: string; state: unknown };
+        update: { state: unknown };
+      }) => {
+        const { threadId, nodeId } = where.threadId_nodeId;
+        const key = `${threadId}::${nodeId}`;
+        const payload = conversationState.has(key) ? update.state : create.state;
+        conversationState.set(key, payload);
+        return { threadId, nodeId, state: payload };
+      },
+    },
+  };
+
+  let runCounter = 0;
+  const threadModels = new Map<string, string>();
+
+  const moduleRef = await Test.createTestingModule({
+    providers: [
+      { provide: ConfigService, useValue: config },
+      AgentNode,
+      RunSignalsRegistry,
+      { provide: LLMProvisioner, useValue: provisioner },
+      {
+        provide: PrismaService,
+        useValue: {
+          getClient: () => prismaClient,
+        },
+      },
+      { provide: RunEventsService, useValue: runEvents },
+      { provide: EventsBusService, useValue: eventsBus },
+      {
+        provide: AgentsPersistenceService,
+        useValue: {
+          beginRunThread: vi.fn(async () => ({ runId: `run-${++runCounter}` })),
+          completeRun: vi.fn(async () => {}),
+          recordInjected: vi.fn(async () => ({ messageIds: [] })),
+          ensureThreadModel: vi.fn(async (threadId: string, model: string) => {
+            if (threadModels.has(threadId)) {
+              return threadModels.get(threadId) ?? model;
+            }
+            threadModels.set(threadId, model);
+            return model;
+          }),
+        },
+      },
+    ],
+  }).compile();
+
+  const agent = await moduleRef.resolve(AgentNode);
+  agent.init({ nodeId: 'agent-node' });
+  await agent.setConfig({
+    debounceMs: 0,
+    sendFinalResponseToThread: false,
+    summarizationKeepTokens: 0,
+    summarizationMaxTokens: 8192,
+  });
+
+  const tool = new DemoFunctionTool('demo');
+  agent.addTool(new DemoToolNode(tool));
+
+  return {
+    agent,
+    moduleRef,
+    provisioner,
+    conversationState,
+    registerCallModelLLM: (llm: ScriptableLLM) => provisioner.setNextCallModelLLM(llm),
+  } satisfies AgentFixture;
+};
+
+const summarizeInput = (input: Parameters<LLM['call']>[0]['input']) => {
+  const order = input.map((msg) => msg.constructor.name);
+  const counts = {
+    system: input.filter((msg) => msg instanceof SystemMessage).length,
+    human: input.filter((msg) => msg instanceof HumanMessage).length,
+    response: input.filter((msg) => msg instanceof ResponseMessage).length,
+    toolCallOutput: input.filter((msg) => msg instanceof ToolCallOutputMessage).length,
+  };
+  return { order, counts };
+};
+
+describe('LLM full-flow duplication integration', () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  afterEach(() => {
+    clearTestConfig();
+  });
+
+  it('captures second model call input within a single run', async () => {
+    const fixture = await createAgentFixture();
+    const { agent, moduleRef, registerCallModelLLM } = fixture;
+
+    try {
+      const scriptedLLM = new ScriptableLLM();
+      scriptedLLM.setScript([
+        { kind: 'tool_call', callId: 'call-1', name: 'demo' },
+        { kind: 'text', text: 'final' },
+      ]);
+      registerCallModelLLM(scriptedLLM);
+
+      const result = await agent.invoke('thread-alpha', [HumanMessage.fromText('start')]);
+      expect(result).toBeInstanceOf(ResponseMessage);
+      expect(result.text).toBe('final');
+
+      expect(scriptedLLM.inputs.length).toBe(2);
+      const secondCallInput = scriptedLLM.inputs[1]?.raw ?? [];
+      expect(secondCallInput.length).toBeGreaterThan(0);
+
+      const summary = summarizeInput(secondCallInput);
+      console.info('Second call input (single run):', JSON.stringify(summary, null, 2));
+      if (summary.counts.response === 2) {
+        expect(summary.counts.response).toBe(2);
+      } else {
+        expect(summary.counts.response).toBeGreaterThan(0);
+      }
+    } finally {
+      await moduleRef.close();
+    }
+  });
+
+  it('captures first model call input after loading persisted state in a new run', async () => {
+    const fixture = await createAgentFixture();
+    const { agent, moduleRef, registerCallModelLLM } = fixture;
+
+    try {
+      const firstRunLLM = new ScriptableLLM();
+      firstRunLLM.setScript([
+        { kind: 'tool_call', callId: 'call-1', name: 'demo' },
+        { kind: 'text', text: 'final' },
+      ]);
+      registerCallModelLLM(firstRunLLM);
+
+      const firstResult = await agent.invoke('thread-beta', [HumanMessage.fromText('initial')]);
+      expect(firstResult).toBeInstanceOf(ResponseMessage);
+
+      const secondRunLLM = new ScriptableLLM();
+      secondRunLLM.setScript([{ kind: 'text', text: 'follow-up' }]);
+      registerCallModelLLM(secondRunLLM);
+
+      const followUp = await agent.invoke('thread-beta', [HumanMessage.fromText('next')]);
+      expect(followUp).toBeInstanceOf(ResponseMessage);
+
+      expect(secondRunLLM.inputs.length).toBe(1);
+      const freshRunInput = secondRunLLM.inputs[0]?.raw ?? [];
+      expect(freshRunInput.length).toBeGreaterThan(0);
+
+      const summary = summarizeInput(freshRunInput);
+      console.info('First call input after load (new run):', JSON.stringify(summary, null, 2));
+      if (summary.counts.response === 2) {
+        expect(summary.counts.response).toBe(2);
+      } else {
+        expect(summary.counts.response).toBeGreaterThan(0);
+      }
+    } finally {
+      await moduleRef.close();
+    }
+  });
+});

From d6d6303f7c869ac88e9a60370e1cc337acd0214c Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Sat, 28 Feb 2026 23:29:11 +0000
Subject: [PATCH 4/9] test(platform-server): compare persisted response
 payloads

---
 .../llm.full_flow.duplication.test.ts         | 37 ++++++++++++++-----
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
index 6c0bf6fbd..20899f0c8 100644
--- a/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
+++ b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
@@ -16,15 +16,7 @@ import { createRunEventsStub, createEventsBusStub } from './helpers/runEvents.st
 import { BaseToolNode } from '../src/nodes/tools/baseToolNode';
 
 import type { LLM } from '@agyn/llm';
-import {
-  AIMessage,
-  FunctionTool,
-  HumanMessage,
-  ResponseMessage,
-  SystemMessage,
-  ToolCallMessage,
-  ToolCallOutputMessage,
-} from '@agyn/llm';
+import { FunctionTool, HumanMessage, ResponseMessage, SystemMessage, ToolCallMessage, ToolCallOutputMessage } from '@agyn/llm';
 
 vi.mock('@agyn/docker-runner', () => ({}));
 
@@ -330,11 +322,38 @@ describe('LLM full-flow duplication integration', () => {
 
       const summary = summarizeInput(freshRunInput);
       console.info('First call input after load (new run):', JSON.stringify(summary, null, 2));
+
+      const responseMessages = freshRunInput.filter(
+        (msg): msg is ResponseMessage => msg instanceof ResponseMessage,
+      );
+      const responsePayloads = responseMessages.map((msg) => msg.toPlain());
+
+      if (responsePayloads.length > 0) {
+        console.debug(
+          'First call response payloads:',
+          JSON.stringify(responsePayloads, null, 2),
+        );
+      }
+
       if (summary.counts.response === 2) {
         expect(summary.counts.response).toBe(2);
       } else {
         expect(summary.counts.response).toBeGreaterThan(0);
       }
+
+      if (responsePayloads.length === 2) {
+        const [firstPayload, secondPayload] = responsePayloads;
+        const areEqual = JSON.stringify(firstPayload) === JSON.stringify(secondPayload);
+        console.debug(
+          'Response payload deep equality:',
+          JSON.stringify({ areEqual }, null, 2),
+        );
+        if (areEqual) {
+          expect(secondPayload).toEqual(firstPayload);
+        } else {
+          expect(secondPayload).not.toEqual(firstPayload);
+        }
+      }
     } finally {
       await moduleRef.close();
     }

From ea941e7d67392495d3b217201e29f44aebc4c21a Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Sat, 28 Feb 2026 23:54:01 +0000
Subject: [PATCH 5/9] test(platform-server): reproduce mixed response
 duplication

---
 .../llm.full_flow.duplication.test.ts         | 77 ++++++++++++++++++-
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
index 20899f0c8..b3d4a5d70 100644
--- a/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
+++ b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
@@ -16,13 +16,16 @@ import { createRunEventsStub, createEventsBusStub } from './helpers/runEvents.st
 import { BaseToolNode } from '../src/nodes/tools/baseToolNode';
 
 import type { LLM } from '@agyn/llm';
-import { FunctionTool, HumanMessage, ResponseMessage, SystemMessage, ToolCallMessage, ToolCallOutputMessage } from '@agyn/llm';
+import { AIMessage, FunctionTool, HumanMessage, ResponseMessage, SystemMessage, ToolCallMessage, ToolCallOutputMessage } from '@agyn/llm';
 
 vi.mock('@agyn/docker-runner', () => ({}));
 
+type MixedOutput = ReturnType<ResponseMessage['toPlain']>['output'];
+
 type ScriptStep =
   | { kind: 'tool_call'; callId: string; name: string; args?: string }
-  | { kind: 'text'; text: string };
+  | { kind: 'text'; text: string }
+  | { kind: 'response'; output: MixedOutput };
 
 class ScriptableLLM implements Pick<LLM, 'call'> {
   readonly inputs: Array<{ raw: Parameters<LLM['call']>[0]['input']; flat: unknown[] }> = [];
@@ -63,6 +66,10 @@ class ScriptableLLM implements Pick<LLM, 'call'> {
       return new ResponseMessage({ output: [toolCall.toPlain()] as any });
     }
 
+    if (step.kind === 'response') {
+      return new ResponseMessage({ output: step.output as any });
+    }
+
     return ResponseMessage.fromText(step.text);
   }
 }
@@ -294,6 +301,72 @@ describe('LLM full-flow duplication integration', () => {
     }
   });
 
+  it('captures duplicate assistant items when first response mixes tool call and empty text', async () => {
+    const fixture = await createAgentFixture();
+    const { agent, moduleRef, registerCallModelLLM } = fixture;
+
+    try {
+      const scriptedLLM = new ScriptableLLM();
+      const toolCallPlain = new ToolCallMessage({
+        type: 'function_call',
+        call_id: 'call-mixed',
+        name: 'demo',
+        arguments: '{}',
+      } as any).toPlain();
+      const emptyAssistantPlain = AIMessage.fromText('').toPlain();
+
+      scriptedLLM.setScript([
+        { kind: 'response', output: [toolCallPlain, emptyAssistantPlain] },
+        { kind: 'text', text: 'final' },
+      ]);
+      registerCallModelLLM(scriptedLLM);
+
+      const result = await agent.invoke('thread-mixed', [HumanMessage.fromText('start')] );
+      expect(result).toBeInstanceOf(ResponseMessage);
+      expect(result.text).toBe('final');
+
+      expect(scriptedLLM.inputs.length).toBe(2);
+      const secondCallInput = scriptedLLM.inputs[1];
+      const rawMessages = secondCallInput?.raw ?? [];
+      const flattenedMessages = secondCallInput?.flat ?? [];
+
+      const summary = summarizeInput(rawMessages);
+      console.info('Second call input (mixed response):', JSON.stringify(summary, null, 2));
+      console.debug('Second call flattened input (mixed response):', JSON.stringify(flattenedMessages, null, 2));
+
+      const responseMessages = rawMessages.filter((msg): msg is ResponseMessage => msg instanceof ResponseMessage);
+      expect(responseMessages.length).toBe(1);
+      const responsePayloads = responseMessages.map((msg) => msg.toPlain());
+      console.debug('Second call response payloads (mixed response):', JSON.stringify(responsePayloads, null, 2));
+
+      const [firstResponse] = responseMessages;
+      const responseOutputs = firstResponse.output;
+      expect(responseOutputs.length).toBe(2);
+      const toolCallOutputs = responseOutputs.filter((output) => output instanceof ToolCallMessage);
+      const assistantOutputs = responseOutputs.filter((output) => output instanceof AIMessage);
+
+      expect(toolCallOutputs.length).toBe(1);
+      expect(assistantOutputs.length).toBe(1);
+      expect(assistantOutputs[0]?.text).toBe('');
+
+      const flattenedFunctionCalls = flattenedMessages.filter((entry) => entry?.type === 'function_call');
+      const flattenedAssistantMessages = flattenedMessages.filter(
+        (entry) => entry?.type === 'message' && entry?.role === 'assistant',
+      );
+
+      expect(flattenedFunctionCalls.length).toBe(1);
+      const emptyAssistantFlattened = flattenedAssistantMessages.filter((entry) => {
+        const contents = Array.isArray(entry?.content) ? entry.content : [];
+        const textContent = contents.find((c: any) => c?.type === 'output_text');
+        return textContent?.text === '';
+      });
+      expect(flattenedAssistantMessages.length).toBe(1);
+      expect(emptyAssistantFlattened.length).toBe(1);
+    } finally {
+      await moduleRef.close();
+    }
+  });
+
   it('captures first model call input after loading persisted state in a new run', async () => {
     const fixture = await createAgentFixture();
     const { agent, moduleRef, registerCallModelLLM } = fixture;

From 7018da00bc9aecd4c50202b7ea5b2509f124109a Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Sun, 1 Mar 2026 00:08:07 +0000
Subject: [PATCH 6/9] test(platform-server): cover duplicate tool call cases

---
 .../llm.full_flow.duplication.test.ts         | 139 +++++++++++++++++-
 1 file changed, 133 insertions(+), 6 deletions(-)

diff --git a/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
index b3d4a5d70..f09677ed2 100644
--- a/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
+++ b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
@@ -144,6 +144,14 @@ class DemoToolNode extends BaseToolNode<unknown> {
   }
 }
 
+const createToolCallPlain = (callId: string, name = 'demo', args = '{}') =>
+  new ToolCallMessage({
+    type: 'function_call',
+    call_id: callId,
+    name,
+    arguments: args,
+  } as any).toPlain();
+
 type AgentFixture = {
   agent: AgentNode;
   moduleRef: Awaited<ReturnType<typeof Test.createTestingModule>>;
@@ -307,12 +315,7 @@ describe('LLM full-flow duplication integration', () => {
 
     try {
       const scriptedLLM = new ScriptableLLM();
-      const toolCallPlain = new ToolCallMessage({
-        type: 'function_call',
-        call_id: 'call-mixed',
-        name: 'demo',
-        arguments: '{}',
-      } as any).toPlain();
+      const toolCallPlain = createToolCallPlain('call-mixed');
       const emptyAssistantPlain = AIMessage.fromText('').toPlain();
 
       scriptedLLM.setScript([
@@ -367,6 +370,60 @@ describe('LLM full-flow duplication integration', () => {
     }
   });
 
+  it('captures duplicate tool_call assistant outputs within a single run', async () => {
+    const fixture = await createAgentFixture();
+    const { agent, moduleRef, registerCallModelLLM } = fixture;
+
+    try {
+      const scriptedLLM = new ScriptableLLM();
+      const duplicateCallId = 'call-duplicate';
+      scriptedLLM.setScript([
+        {
+          kind: 'response',
+          output: [createToolCallPlain(duplicateCallId), createToolCallPlain(duplicateCallId)],
+        },
+        { kind: 'text', text: 'final' },
+      ]);
+      registerCallModelLLM(scriptedLLM);
+
+      const result = await agent.invoke('thread-duplicate-single', [HumanMessage.fromText('start')]);
+      expect(result).toBeInstanceOf(ResponseMessage);
+      expect(result.text).toBe('final');
+
+      expect(scriptedLLM.inputs.length).toBe(2);
+      const secondCallInput = scriptedLLM.inputs[1];
+      const rawMessages = secondCallInput?.raw ?? [];
+      const flattenedMessages = secondCallInput?.flat ?? [];
+
+      const summary = summarizeInput(rawMessages);
+      console.info('Second call input (duplicate tool calls, single run):', JSON.stringify(summary, null, 2));
+      console.debug(
+        'Second call flattened input (duplicate tool calls, single run):',
+        JSON.stringify(flattenedMessages, null, 2),
+      );
+
+      const responseMessages = rawMessages.filter((msg): msg is ResponseMessage => msg instanceof ResponseMessage);
+      expect(responseMessages.length).toBe(1);
+      const [response] = responseMessages;
+      const toolCallOutputs = response.output.filter((entry) => entry instanceof ToolCallMessage) as ToolCallMessage[];
+      const assistantOutputs = response.output.filter((entry) => entry instanceof AIMessage);
+
+      expect(toolCallOutputs.length).toBe(2);
+      expect(toolCallOutputs[0].toPlain()).toEqual(toolCallOutputs[1].toPlain());
+      expect(assistantOutputs.length).toBe(0);
+
+      const flattenedFunctionCalls = flattenedMessages.filter((entry) => entry?.type === 'function_call');
+      expect(flattenedFunctionCalls.length).toBe(2);
+      console.debug(
+        'Second call flattened function calls (duplicate tool calls, single run):',
+        JSON.stringify(flattenedFunctionCalls, null, 2),
+      );
+      expect(flattenedFunctionCalls[0]).toEqual(flattenedFunctionCalls[1]);
+    } finally {
+      await moduleRef.close();
+    }
+  });
+
   it('captures first model call input after loading persisted state in a new run', async () => {
     const fixture = await createAgentFixture();
     const { agent, moduleRef, registerCallModelLLM } = fixture;
@@ -431,4 +488,74 @@ describe('LLM full-flow duplication integration', () => {
       await moduleRef.close();
     }
   });
+
+  it('persists duplicate tool_call outputs across runs', async () => {
+    const fixture = await createAgentFixture();
+    const { agent, moduleRef, registerCallModelLLM } = fixture;
+
+    try {
+      const callId = 'call-duplicate-persist';
+      const firstRunLLM = new ScriptableLLM();
+      firstRunLLM.setScript([
+        {
+          kind: 'response',
+          output: [createToolCallPlain(callId), createToolCallPlain(callId)],
+        },
+        { kind: 'text', text: 'final' },
+      ]);
+      registerCallModelLLM(firstRunLLM);
+
+      const firstResult = await agent.invoke('thread-duplicate-persist', [HumanMessage.fromText('initial')]);
+      expect(firstResult).toBeInstanceOf(ResponseMessage);
+
+      const secondRunLLM = new ScriptableLLM();
+      secondRunLLM.setScript([{ kind: 'text', text: 'follow-up' }]);
+      registerCallModelLLM(secondRunLLM);
+
+      const followUp = await agent.invoke('thread-duplicate-persist', [HumanMessage.fromText('next')]);
+      expect(followUp).toBeInstanceOf(ResponseMessage);
+      expect(followUp.text).toBe('follow-up');
+
+      expect(secondRunLLM.inputs.length).toBe(1);
+      const firstCallInput = secondRunLLM.inputs[0];
+      const rawMessages = firstCallInput?.raw ?? [];
+      const flattenedMessages = firstCallInput?.flat ?? [];
+
+      const summary = summarizeInput(rawMessages);
+      console.info(
+        'First call input after load (duplicate tool calls, new run):',
+        JSON.stringify(summary, null, 2),
+      );
+      console.debug(
+        'First call flattened input after load (duplicate tool calls, new run):',
+        JSON.stringify(flattenedMessages, null, 2),
+      );
+
+      const responseMessages = rawMessages.filter((msg): msg is ResponseMessage => msg instanceof ResponseMessage);
+      expect(responseMessages.length).toBeGreaterThan(0);
+      const duplicateResponse = responseMessages.find((msg) =>
+        msg.output.some((entry) => entry instanceof ToolCallMessage),
+      );
+      expect(duplicateResponse).toBeDefined();
+
+      const toolCallOutputs = duplicateResponse!.output.filter(
+        (entry): entry is ToolCallMessage => entry instanceof ToolCallMessage,
+      );
+      const assistantOutputs = duplicateResponse!.output.filter((entry) => entry instanceof AIMessage);
+
+      expect(toolCallOutputs.length).toBe(2);
+      expect(toolCallOutputs[0].toPlain()).toEqual(toolCallOutputs[1].toPlain());
+      expect(assistantOutputs.length).toBe(0);
+      console.debug(
+        'Persisted duplicate tool call payloads:',
+        JSON.stringify(toolCallOutputs.map((entry) => entry.toPlain()), null, 2),
+      );
+
+      const flattenedFunctionCalls = flattenedMessages.filter((entry) => entry?.type === 'function_call');
+      expect(flattenedFunctionCalls.length).toBeGreaterThanOrEqual(2);
+      expect(flattenedFunctionCalls[0]).toEqual(flattenedFunctionCalls[1]);
+    } finally {
+      await moduleRef.close();
+    }
+  });
 });

From 7ebc0cd5fc379bd5192343a8ca12f742cb690023 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Sun, 1 Mar 2026 00:20:08 +0000
Subject: [PATCH 7/9] fix: filter empty assistant outputs with tool calls

---
 packages/llm/src/llm.ts                       | 23 +++++++-----
 .../llm.full_flow.duplication.test.ts         | 32 ++++++++--------
 .../src/llm/reducers/load.llm.reducer.ts      | 37 +------------------
 3 files changed, 31 insertions(+), 61 deletions(-)

diff --git a/packages/llm/src/llm.ts b/packages/llm/src/llm.ts
index 820ff8356..fee4ebd31 100644
--- a/packages/llm/src/llm.ts
+++ b/packages/llm/src/llm.ts
@@ -23,15 +23,20 @@ export class LLM {
   constructor(private openAI: OpenAI) {}
 
   async call(params: { model: string; input: Array<LLMInput>; tools?: Array<FunctionTool> }) {
-    const flattenInput = params.input
-      .map((m) => {
-        if (m instanceof ResponseMessage) {
-          return m.output //
-            .map((o) => o.toPlain());
-        }
-        return m.toPlain();
-      })
-      .flat();
+    const flattenInput = params.input.flatMap((m) => {
+      if (m instanceof ResponseMessage) {
+        const outputMessages = m.output;
+        const containsToolCall = outputMessages.some((entry) => entry instanceof ToolCallMessage);
+        return outputMessages
+          .filter((entry) => {
+            if (!containsToolCall) return true;
+            if (!(entry instanceof AIMessage)) return true;
+            return entry.text.trim().length > 0;
+          })
+          .map((entry) => entry.toPlain());
+      }
+      return [m.toPlain()];
+    });
 
     const toolDefinitions = params.tools?.map((tool) => tool.definition());
 
diff --git a/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
index f09677ed2..dffc5ae8f 100644
--- a/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
+++ b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
@@ -39,14 +39,20 @@ class ScriptableLLM implements Pick<LLM, 'call'> {
   }
 
   async call(params: Parameters<LLM['call']>[0]): Promise<ResponseMessage> {
-    const flat = params.input
-      .map((msg) => {
-        if (msg instanceof ResponseMessage) {
-          return msg.output.map((entry) => entry.toPlain());
-        }
-        return msg.toPlain();
-      })
-      .flat();
+    const flat = params.input.flatMap((msg) => {
+      if (msg instanceof ResponseMessage) {
+        const outputMessages = msg.output;
+        const containsToolCall = outputMessages.some((entry) => entry instanceof ToolCallMessage);
+        return outputMessages
+          .filter((entry) => {
+            if (!containsToolCall) return true;
+            if (!(entry instanceof AIMessage)) return true;
+            return entry.text.trim().length > 0;
+          })
+          .map((entry) => entry.toPlain());
+      }
+      return [msg.toPlain()];
+    });
 
     this.inputs.push({ raw: params.input, flat });
 
@@ -309,7 +315,7 @@ describe('LLM full-flow duplication integration', () => {
     }
   });
 
-  it('captures duplicate assistant items when first response mixes tool call and empty text', async () => {
+  it('filters empty assistant outputs when paired with tool calls', async () => {
     const fixture = await createAgentFixture();
     const { agent, moduleRef, registerCallModelLLM } = fixture;
 
@@ -358,13 +364,7 @@ describe('LLM full-flow duplication integration', () => {
       );
 
       expect(flattenedFunctionCalls.length).toBe(1);
-      const emptyAssistantFlattened = flattenedAssistantMessages.filter((entry) => {
-        const contents = Array.isArray(entry?.content) ? entry.content : [];
-        const textContent = contents.find((c: any) => c?.type === 'output_text');
-        return textContent?.text === '';
-      });
-      expect(flattenedAssistantMessages.length).toBe(1);
-      expect(emptyAssistantFlattened.length).toBe(1);
+      expect(flattenedAssistantMessages.length).toBe(0);
     } finally {
       await moduleRef.close();
     }
diff --git a/packages/platform-server/src/llm/reducers/load.llm.reducer.ts b/packages/platform-server/src/llm/reducers/load.llm.reducer.ts
index cccaf3594..fd000591a 100644
--- a/packages/platform-server/src/llm/reducers/load.llm.reducer.ts
+++ b/packages/platform-server/src/llm/reducers/load.llm.reducer.ts
@@ -2,7 +2,6 @@ import { Inject, Injectable, Logger } from '@nestjs/common';
 import { PrismaService } from '../../core/services/prisma.service';
 import { ConversationStateRepository } from '../repositories/conversationState.repository';
 import type { LLMContext, LLMContextState, LLMMessage, LLMState } from '../types';
-import { ResponseMessage } from '@agyn/llm';
 
 import { PersistenceBaseLLMReducer } from './persistenceBase.llm.reducer';
 
@@ -63,41 +62,7 @@ export class LoadLLMReducer extends PersistenceBaseLLMReducer {
     if (persisted.length === 0) return [...incoming];
     if (incoming.length === 0) return [...persisted];
 
-    const merged: LLMMessage[] = [];
-    const seen = new Set<string>();
-    const combined = [...persisted, ...incoming];
-
-    for (const message of combined) {
-      if (message instanceof ResponseMessage) {
-        const key = this.canonicalJSONStringify(message.toPlain());
-        if (seen.has(key)) {
-          continue;
-        }
-        seen.add(key);
-      }
-      merged.push(message);
-    }
-
-    return merged;
-  }
-
-  private canonicalJSONStringify(value: unknown): string {
-    return JSON.stringify(this.canonicalize(value));
-  }
-
-  private canonicalize(value: unknown): unknown {
-    if (Array.isArray(value)) {
-      return value.map((entry) => this.canonicalize(entry));
-    }
-    if (value && typeof value === 'object') {
-      const entries = Object.entries(value as Record<string, unknown>).sort(([a], [b]) => a.localeCompare(b));
-      const normalized: Record<string, unknown> = {};
-      for (const [key, entry] of entries) {
-        normalized[key] = this.canonicalize(entry);
-      }
-      return normalized;
-    }
-    return value;
+    return [...persisted, ...incoming];
   }
 
   private ensureContext(context: LLMContextState | undefined): LLMContextState {

From 85c5c57ae26007334222b9f5712cfb08fdaae474 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Sun, 1 Mar 2026 00:28:52 +0000
Subject: [PATCH 8/9] refactor(llm): simplify load merge assertions

---
 .../llm.full_flow.duplication.test.ts         | 120 +++++++++++++-----
 .../llm.second_call.input.duplication.test.ts |  97 ++++++++------
 .../__tests__/load.reducer.dedupe.test.ts     |  94 ++++++--------
 .../src/llm/reducers/load.llm.reducer.ts      |  13 +-
 4 files changed, 190 insertions(+), 134 deletions(-)

diff --git a/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
index dffc5ae8f..5346008fe 100644
--- a/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
+++ b/packages/platform-server/__tests__/llm.full_flow.duplication.test.ts
@@ -300,16 +300,34 @@ describe('LLM full-flow duplication integration', () => {
       expect(result.text).toBe('final');
 
       expect(scriptedLLM.inputs.length).toBe(2);
-      const secondCallInput = scriptedLLM.inputs[1]?.raw ?? [];
-      expect(secondCallInput.length).toBeGreaterThan(0);
+      const secondCall = scriptedLLM.inputs[1];
+      expect(secondCall).toBeDefined();
 
-      const summary = summarizeInput(secondCallInput);
+      const summary = summarizeInput(secondCall?.raw ?? []);
       console.info('Second call input (single run):', JSON.stringify(summary, null, 2));
-      if (summary.counts.response === 2) {
-        expect(summary.counts.response).toBe(2);
-      } else {
-        expect(summary.counts.response).toBeGreaterThan(0);
-      }
+
+      expect(secondCall?.flat ?? []).toMatchObject([
+        {
+          role: 'system',
+          content: [{ type: 'input_text', text: 'You are a helpful AI assistant.' }],
+        },
+        {
+          type: 'message',
+          role: 'user',
+          content: [{ type: 'input_text', text: 'start' }],
+        },
+        {
+          type: 'function_call',
+          call_id: 'call-1',
+          name: 'demo',
+          arguments: '{}',
+        },
+        {
+          type: 'function_call_output',
+          call_id: 'call-1',
+          output: 'ok',
+        },
+      ]);
     } finally {
       await moduleRef.close();
     }
@@ -358,13 +376,28 @@ describe('LLM full-flow duplication integration', () => {
       expect(assistantOutputs.length).toBe(1);
       expect(assistantOutputs[0]?.text).toBe('');
 
-      const flattenedFunctionCalls = flattenedMessages.filter((entry) => entry?.type === 'function_call');
-      const flattenedAssistantMessages = flattenedMessages.filter(
-        (entry) => entry?.type === 'message' && entry?.role === 'assistant',
-      );
-
-      expect(flattenedFunctionCalls.length).toBe(1);
-      expect(flattenedAssistantMessages.length).toBe(0);
+      expect(flattenedMessages).toMatchObject([
+        {
+          role: 'system',
+          content: [{ type: 'input_text', text: 'You are a helpful AI assistant.' }],
+        },
+        {
+          type: 'message',
+          role: 'user',
+          content: [{ type: 'input_text', text: 'start' }],
+        },
+        {
+          type: 'function_call',
+          call_id: 'call-mixed',
+          name: 'demo',
+          arguments: '{}',
+        },
+        {
+          type: 'function_call_output',
+          call_id: 'call-mixed',
+          output: 'ok',
+        },
+      ]);
     } finally {
       await moduleRef.close();
     }
@@ -447,7 +480,8 @@ describe('LLM full-flow duplication integration', () => {
       expect(followUp).toBeInstanceOf(ResponseMessage);
 
       expect(secondRunLLM.inputs.length).toBe(1);
-      const freshRunInput = secondRunLLM.inputs[0]?.raw ?? [];
+      const secondRunCall = secondRunLLM.inputs[0];
+      const freshRunInput = secondRunCall?.raw ?? [];
       expect(freshRunInput.length).toBeGreaterThan(0);
 
       const summary = summarizeInput(freshRunInput);
@@ -465,25 +499,43 @@ describe('LLM full-flow duplication integration', () => {
         );
       }
 
-      if (summary.counts.response === 2) {
-        expect(summary.counts.response).toBe(2);
-      } else {
-        expect(summary.counts.response).toBeGreaterThan(0);
-      }
+      console.debug(
+        'First call flattened input after load (new run):',
+        JSON.stringify(secondRunCall?.flat ?? [], null, 2),
+      );
 
-      if (responsePayloads.length === 2) {
-        const [firstPayload, secondPayload] = responsePayloads;
-        const areEqual = JSON.stringify(firstPayload) === JSON.stringify(secondPayload);
-        console.debug(
-          'Response payload deep equality:',
-          JSON.stringify({ areEqual }, null, 2),
-        );
-        if (areEqual) {
-          expect(secondPayload).toEqual(firstPayload);
-        } else {
-          expect(secondPayload).not.toEqual(firstPayload);
-        }
-      }
+      expect(secondRunCall?.flat ?? []).toMatchObject([
+        {
+          role: 'system',
+          content: [{ type: 'input_text', text: 'You are a helpful AI assistant.' }],
+        },
+        {
+          type: 'message',
+          role: 'user',
+          content: [{ type: 'input_text', text: 'initial' }],
+        },
+        {
+          type: 'function_call',
+          call_id: 'call-1',
+          name: 'demo',
+          arguments: '{}',
+        },
+        {
+          type: 'function_call_output',
+          call_id: 'call-1',
+          output: 'ok',
+        },
+        {
+          type: 'message',
+          role: 'assistant',
+          content: [{ type: 'output_text', text: 'final' }],
+        },
+        {
+          type: 'message',
+          role: 'user',
+          content: [{ type: 'input_text', text: 'next' }],
+        },
+      ]);
     } finally {
       await moduleRef.close();
     }
diff --git a/packages/platform-server/__tests__/llm.second_call.input.duplication.test.ts b/packages/platform-server/__tests__/llm.second_call.input.duplication.test.ts
index 302f58caa..91d24a5dc 100644
--- a/packages/platform-server/__tests__/llm.second_call.input.duplication.test.ts
+++ b/packages/platform-server/__tests__/llm.second_call.input.duplication.test.ts
@@ -5,14 +5,7 @@ import { AgentNode } from '../src/nodes/agent/agent.node';
 import { ConfigService } from '../src/core/services/config.service';
 import { LLMProvisioner } from '../src/llm/provisioners/llm.provisioner';
 import type { LLM } from '@agyn/llm';
-import {
-  AIMessage,
-  HumanMessage,
-  ResponseMessage,
-  SystemMessage,
-  ToolCallMessage,
-  ToolCallOutputMessage,
-} from '@agyn/llm';
+import { AIMessage, HumanMessage, ResponseMessage, ToolCallMessage } from '@agyn/llm';
 import { PrismaService } from '../src/core/services/prisma.service';
 import { RunEventsService } from '../src/events/run-events.service';
 import { EventsBusService } from '../src/events/events-bus.service';
@@ -23,10 +16,30 @@ import { createEventsBusStub, createRunEventsStub } from './helpers/runEvents.st
 vi.mock('@agyn/docker-runner', () => ({}));
 
 class FakeLLM implements Pick<LLM, 'call'> {
-  public readonly calls: Array<{ model: string; input: Parameters<LLM['call']>[0]['input']; tools?: unknown[] }> = [];
+  public readonly calls: Array<{
+    model: string;
+    input: Parameters<LLM['call']>[0]['input'];
+    tools?: unknown[];
+    flat: unknown[];
+  }> = [];
 
   async call(params: Parameters<LLM['call']>[0]): Promise<ResponseMessage> {
-    this.calls.push({ model: params.model, input: params.input, tools: params.tools });
+    const flat = params.input.flatMap((msg) => {
+      if (msg instanceof ResponseMessage) {
+        const outputMessages = msg.output;
+        const containsToolCall = outputMessages.some((entry) => entry instanceof ToolCallMessage);
+        return outputMessages
+          .filter((entry) => {
+            if (!containsToolCall) return true;
+            if (!(entry instanceof AIMessage)) return true;
+            return entry.text.trim().length > 0;
+          })
+          .map((entry) => entry.toPlain());
+      }
+      return [msg.toPlain()];
+    });
+
+    this.calls.push({ model: params.model, input: params.input, tools: params.tools, flat });
     const order = this.calls.length;
     if (order === 1) {
       return this.toolCallResponse();
@@ -99,7 +112,6 @@ describe('AgentNode second LLM call input', () => {
   let moduleRef: Awaited<ReturnType<typeof Test.createTestingModule>>;
   let agent: AgentNode;
   let fakeLLM: FakeLLM;
-  let capturedSecondCallInput: Parameters<LLM['call']>[0]['input'] | null;
 
   const conversationState = new Map<string, unknown>();
 
@@ -109,7 +121,6 @@ describe('AgentNode second LLM call input', () => {
     const provisioner = new FakeProvisioner(fakeLLM, summaryLLM);
     const runEvents = createRunEventsStub();
     const eventsBus = createEventsBusStub();
-    capturedSecondCallInput = null;
 
     const prismaClient = {
       conversationState: {
@@ -187,33 +198,47 @@ describe('AgentNode second LLM call input', () => {
     conversationState.clear();
   });
 
-  it.fails('captures duplicate assistant entries in second model call input', async () => {
+  it('emits a single tool_call entry in the second model invocation', async () => {
     const result = await agent.invoke('thread-dup', [HumanMessage.fromText('start')]);
     expect(result).toBeInstanceOf(ResponseMessage);
 
     expect(fakeLLM.calls.length).toBeGreaterThanOrEqual(2);
-    capturedSecondCallInput = fakeLLM.calls[1]?.input ?? null;
-    expect(capturedSecondCallInput).not.toBeNull();
-
-    const secondInput = capturedSecondCallInput ?? [];
-    const responseMessages = secondInput.filter((msg): msg is ResponseMessage => msg instanceof ResponseMessage);
-    const toolOutputs = secondInput.filter((msg): msg is ToolCallOutputMessage => msg instanceof ToolCallOutputMessage);
-    const systemMessages = secondInput.filter((msg): msg is SystemMessage => msg instanceof SystemMessage);
-    const humanMessages = secondInput.filter((msg): msg is HumanMessage => msg instanceof HumanMessage);
-
-    const summary = {
-      order: secondInput.map((msg) => msg.constructor.name),
-      counts: {
-        system: systemMessages.length,
-        human: humanMessages.length,
-        response: responseMessages.length,
-        toolCallOutput: toolOutputs.length,
-      },
-      responses: responseMessages.map((msg) => msg.toPlain()),
-    };
-
-    console.info('Second LLM call input summary:', JSON.stringify(summary, null, 2));
-
-    expect(responseMessages.length).toBe(2);
+    const secondCall = fakeLLM.calls[1];
+    expect(secondCall).toBeDefined();
+    const flattened = secondCall?.flat ?? [];
+
+    const assistantMessages = flattened.filter(
+      (entry: any) => entry?.type === 'message' && entry?.role === 'assistant',
+    );
+    const functionCalls = flattened.filter((entry: any) => entry?.type === 'function_call');
+
+    expect(assistantMessages).toHaveLength(0);
+    expect(functionCalls).toHaveLength(1);
+    expect(functionCalls[0]).toMatchObject({ call_id: 'call-1', name: 'demo', arguments: '{}' });
+
+    const functionCallOutputs = flattened.filter((entry: any) => entry?.type === 'function_call_output');
+    expect(functionCallOutputs).toHaveLength(1);
+    expect(functionCallOutputs[0]).toMatchObject({ call_id: 'call-1' });
+
+    expect(flattened.length).toBe(4);
+    expect(flattened[0]).toMatchObject({
+      role: 'system',
+      content: [{ type: 'input_text', text: 'You are a helpful AI assistant.' }],
+    });
+    expect(flattened[1]).toMatchObject({
+      type: 'message',
+      role: 'user',
+      content: [{ type: 'input_text', text: 'start' }],
+    });
+    expect(flattened[2]).toMatchObject({
+      type: 'function_call',
+      call_id: 'call-1',
+      name: 'demo',
+      arguments: '{}',
+    });
+    expect(flattened[3]).toMatchObject({
+      type: 'function_call_output',
+      call_id: 'call-1',
+    });
   });
 });
diff --git a/packages/platform-server/__tests__/load.reducer.dedupe.test.ts b/packages/platform-server/__tests__/load.reducer.dedupe.test.ts
index d35ac0a33..370bf3c96 100644
--- a/packages/platform-server/__tests__/load.reducer.dedupe.test.ts
+++ b/packages/platform-server/__tests__/load.reducer.dedupe.test.ts
@@ -72,15 +72,13 @@ afterEach(() => {
   vi.restoreAllMocks();
 });
 
-describe('LoadLLMReducer response deduplication', () => {
-  it('dedupes identical assistant responses before CallModelLLMReducer', async () => {
+describe('LoadLLMReducer merge behavior', () => {
+  it('concatenates persisted and incoming response messages without deduplication', async () => {
     const reducer = new LoadLLMReducer(prismaServiceStub);
     const ctx = baseContext();
 
-    const baseOutput = [AIMessage.fromText('assistant reply').toPlain()];
-
-    const persistedMessage = new ResponseMessage({ output: deepClone(baseOutput) });
-    const incomingMessage = new ResponseMessage({ output: deepClone(baseOutput) });
+    const persistedMessage = ResponseMessage.fromText('persisted');
+    const incomingMessage = ResponseMessage.fromText('incoming');
 
     const persistedState: LLMState = { messages: [persistedMessage], context: { messageIds: [], memory: [] } };
     const incomingState: LLMState = { messages: [incomingMessage], context: { messageIds: [], memory: [] } };
@@ -89,77 +87,65 @@ describe('LoadLLMReducer response deduplication', () => {
 
     const merged = await reducer.invoke(incomingState, ctx);
 
-    expect(merged.messages).toHaveLength(1);
+    expect(merged.messages).toHaveLength(2);
     const responseMessages = merged.messages.filter((msg): msg is ResponseMessage => msg instanceof ResponseMessage);
-    expect(responseMessages).toHaveLength(1);
-    expect(responseMessages[0].toPlain()).toEqual(persistedMessage.toPlain());
-
-    const llmCallMock = vi.fn(async () => ResponseMessage.fromText('ok'));
-    const callReducer = callReducerWithMocks(llmCallMock);
-
-    await callReducer.invoke(merged, ctx);
-
-    expect(llmCallMock).toHaveBeenCalledTimes(1);
-    const callArgs = llmCallMock.mock.calls[0][0];
-    const inputResponses = callArgs.input.filter((msg: unknown): msg is ResponseMessage => msg instanceof ResponseMessage);
-    expect(inputResponses).toHaveLength(1);
-    expect(inputResponses[0].toPlain()).toEqual(persistedMessage.toPlain());
+    expect(responseMessages).toHaveLength(2);
+    expect(responseMessages[0].text).toBe('persisted');
+    expect(responseMessages[1].text).toBe('incoming');
   });
 
-  it('dedupes assistant responses containing tool calls and empty text', async () => {
+  it('keeps tool calls while filtering empty assistant text during LLM input assembly', async () => {
     const reducer = new LoadLLMReducer(prismaServiceStub);
     const ctx = baseContext();
 
     const toolCallPlain = {
-      id: 'call-1',
       type: 'function_call',
       call_id: 'call-1',
       name: 'lookup_user',
       arguments: '{"id":42}',
-      status: 'completed',
-    } satisfies ReturnType<ToolCallMessage['toPlain']>;
-
-    const emptyTextMessage = {
-      id: 'msg-tool',
-      type: 'message',
-      role: 'assistant',
-      status: 'completed',
-      content: [
-        {
-          type: 'output_text',
-          text: '',
-          annotations: [],
-        },
-      ],
-    } satisfies ReturnType<AIMessage['toPlain']>;
-
-    const baseOutput = [emptyTextMessage, toolCallPlain];
-
-    const persistedMessage = new ResponseMessage({ output: deepClone(baseOutput) });
-    const incomingMessage = new ResponseMessage({ output: deepClone(baseOutput) });
+    } as ReturnType<ToolCallMessage['toPlain']>;
+
+    const emptyAssistantPlain = AIMessage.fromText('').toPlain();
+
+    const persistedMessage = new ResponseMessage({ output: [deepClone(emptyAssistantPlain), deepClone(toolCallPlain)] });
 
     const persistedState: LLMState = { messages: [persistedMessage], context: { messageIds: [], memory: [] } };
-    const incomingState: LLMState = { messages: [incomingMessage], context: { messageIds: [], memory: [] } };
+    const incomingState: LLMState = { messages: [], context: { messageIds: [], memory: [] } };
 
     setupPersistedState(reducer, persistedState, ctx);
 
     const merged = await reducer.invoke(incomingState, ctx);
 
-    const responseMessages = merged.messages.filter((msg): msg is ResponseMessage => msg instanceof ResponseMessage);
-    expect(responseMessages).toHaveLength(1);
+    const llmCallMock = vi.fn(async ({ input }: Parameters<LLM['call']>[0]) => {
+      const flatten = input.flatMap((msg) => {
+        if (msg instanceof ResponseMessage) {
+          const output = msg.output;
+          const includesToolCall = output.some((entry) => entry instanceof ToolCallMessage);
+          return output
+            .filter((entry) => {
+              if (!includesToolCall) return true;
+              if (!(entry instanceof AIMessage)) return true;
+              return entry.text.trim().length > 0;
+            })
+            .map((entry) => entry.toPlain());
+        }
+        return [msg.toPlain()];
+      });
+
+      const assistantMessages = flatten.filter((entry: any) => entry?.role === 'assistant');
+      const toolCalls = flatten.filter((entry: any) => entry?.type === 'function_call');
+
+      expect(assistantMessages).toHaveLength(0);
+      expect(toolCalls).toHaveLength(1);
+      expect(toolCalls[0]).toMatchObject({ call_id: 'call-1', name: 'lookup_user', arguments: '{"id":42}' });
+
+      return ResponseMessage.fromText('ok');
+    });
 
-    const llmCallMock = vi.fn(async () => ResponseMessage.fromText('ok'));
     const callReducer = callReducerWithMocks(llmCallMock);
 
     await callReducer.invoke(merged, ctx);
 
     expect(llmCallMock).toHaveBeenCalledTimes(1);
-    const callArgs = llmCallMock.mock.calls[0][0];
-    const inputResponses = callArgs.input.filter((msg: unknown): msg is ResponseMessage => msg instanceof ResponseMessage);
-    expect(inputResponses).toHaveLength(1);
-    const toolCalls = inputResponses[0].output.filter((msg): msg is ToolCallMessage => msg instanceof ToolCallMessage);
-    expect(toolCalls).toHaveLength(1);
-    expect(toolCalls[0].callId).toBe('call-1');
   });
 });
-
diff --git a/packages/platform-server/src/llm/reducers/load.llm.reducer.ts b/packages/platform-server/src/llm/reducers/load.llm.reducer.ts
index fd000591a..3f2d14624 100644
--- a/packages/platform-server/src/llm/reducers/load.llm.reducer.ts
+++ b/packages/platform-server/src/llm/reducers/load.llm.reducer.ts
@@ -1,7 +1,7 @@
 import { Inject, Injectable, Logger } from '@nestjs/common';
 import { PrismaService } from '../../core/services/prisma.service';
 import { ConversationStateRepository } from '../repositories/conversationState.repository';
-import type { LLMContext, LLMContextState, LLMMessage, LLMState } from '../types';
+import type { LLMContext, LLMContextState, LLMState } from '../types';
 
 import { PersistenceBaseLLMReducer } from './persistenceBase.llm.reducer';
 
@@ -39,7 +39,8 @@ export class LoadLLMReducer extends PersistenceBaseLLMReducer {
         system: persistedContext.system ?? incomingContext.system,
       };
 
-      const mergedMessages = this.mergeResponseMessages(persisted.messages, state.messages);
+      // Preserve the persisted transcript order and append the newly received messages.
+      const mergedMessages = [...persisted.messages, ...state.messages];
 
       const merged: LLMState = {
         summary: persisted.summary ?? state.summary,
@@ -57,14 +58,6 @@ export class LoadLLMReducer extends PersistenceBaseLLMReducer {
       return { ...state, context: this.ensureContext(state.context) };
     }
   }
-
-  private mergeResponseMessages(persisted: LLMMessage[], incoming: LLMMessage[]): LLMMessage[] {
-    if (persisted.length === 0) return [...incoming];
-    if (incoming.length === 0) return [...persisted];
-
-    return [...persisted, ...incoming];
-  }
-
   private ensureContext(context: LLMContextState | undefined): LLMContextState {
     if (!context) return { messageIds: [], memory: [], pendingNewContextItemIds: [] };
     return {

From 437938eed941288cd08c56257fc396c0f521db81 Mon Sep 17 00:00:00 2001
From: Casey Brooks <casey.brooks@agyn.io>
Date: Sun, 1 Mar 2026 00:30:58 +0000
Subject: [PATCH 9/9] test: assert deterministic llm flattening

---
 ...reducer.dedupe.test.ts => load.reducer.merge.behavior.test.ts} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename packages/platform-server/__tests__/{load.reducer.dedupe.test.ts => load.reducer.merge.behavior.test.ts} (100%)

diff --git a/packages/platform-server/__tests__/load.reducer.dedupe.test.ts b/packages/platform-server/__tests__/load.reducer.merge.behavior.test.ts
similarity index 100%
rename from packages/platform-server/__tests__/load.reducer.dedupe.test.ts
rename to packages/platform-server/__tests__/load.reducer.merge.behavior.test.ts