From 60c36fcb8afe413b26a818475fdafdd3b6da4435 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C4=93sa=20AI?= <lesaai@icloud.com>
Date: Fri, 24 Apr 2026 13:01:40 -0700
Subject: [PATCH 01/10] fix(memory-core): stream seedEmbeddingCache via iterate
 to prevent V8 heap OOM

The embedding_cache table sync in MemoryManager.seedEmbeddingCache called
.all() on SELECT * FROM embedding_cache, materializing the full result set
into a JS array. embedding_cache rows contain serialized embedding text
(~20 KB each on text-embedding-3-small) and can grow into hundreds of
thousands of rows on long-running deployed databases. On a local 16 GB
main.sqlite (435,136 rows, 8.68 GB of embedding text), the .all() call
exceeds V8's ~4 GB default heap limit and aborts the gateway with:

  FATAL ERROR: Reached heap limit Allocation failed - JavaScript heap
  out of memory
  ... node::sqlite::StatementSync::All ...

Switching .all() -> .iterate() streams rows one at a time through the
same BEGIN/COMMIT upsert transaction. Peak V8 heap stays bounded by a
single row (~20 KB) plus the prepared statement, not the whole table.

Also drops the empty-check on the materialized array's .length; an
empty iterator commits a no-op transaction, which is cheap and
preserves the observable behavior for empty caches.

Scope note: this is the primary R2.A target (seedEmbeddingCache); a
follow-up patch will address the secondary listChunks / keyword fallback
.all() path in manager-search.ts.

Validation:
- pnpm tsgo:prod: green (core + extensions graphs)
- pnpm test extensions/memory-core: 512 passed, 3 skipped, 0 failed
---
 extensions/memory-core/src/memory/manager-sync-ops.ts | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/extensions/memory-core/src/memory/manager-sync-ops.ts b/extensions/memory-core/src/memory/manager-sync-ops.ts
index 53d3cceb2840..6728e02b57f5 100644
--- a/extensions/memory-core/src/memory/manager-sync-ops.ts
+++ b/extensions/memory-core/src/memory/manager-sync-ops.ts
@@ -306,7 +306,7 @@ export abstract class MemoryManagerSyncOps {
         .prepare(
           `SELECT provider, model, provider_key, hash, embedding, dims, updated_at FROM ${EMBEDDING_CACHE_TABLE}`,
         )
-        .all() as Array<{
+        .iterate() as IterableIterator<{
         provider: string;
         model: string;
         provider_key: string;
@@ -315,9 +315,7 @@ export abstract class MemoryManagerSyncOps {
         dims: number | null;
         updated_at: number;
       }>;
-      if (!rows.length) {
-        return;
-      }
+      // Note: no early-return on empty iterator; BEGIN/COMMIT over an empty tx is cheap.
       const insert = this.db.prepare(
         `INSERT INTO ${EMBEDDING_CACHE_TABLE} (provider, model, provider_key, hash, embedding, dims, updated_at)
          VALUES (?, ?, ?, ?, ?, ?, ?)

From 092ddbd497a0813b758fca4ec4191069cd9b9423 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C4=93sa=20AI?= <lesaai@icloud.com>
Date: Fri, 24 Apr 2026 15:41:38 -0700
Subject: [PATCH 02/10] fix(memory-core): yield to event loop during
 seedEmbeddingCache iterate

R2.A.2. The .iterate()-based seed (R2.A v1, a315280) prevents the V8
heap OOM but the iterate loop still runs synchronously for ~117s on a
435K-row embedding_cache. wip-healthcheck SIGKILLs the gateway after
its 30s probe timeout fails. No FATAL ERROR, no Abort trap.

Patch: convert seedEmbeddingCache to async, yield to the event loop
every 1000 rows via setImmediate. Keeps memory bounded; preserves the
streaming behavior; restores /health responsiveness during the seed.

The only caller is inside an existing async arrow wrapping
runMemoryAtomicReindex's build callback. Adding await is a one-line
change.

Validation:
- pnpm tsgo:prod: green
- pnpm test extensions/memory-core: 512 passed, 3 skipped, 0 failed

Scope: does not soften wip-healthcheck (separate guardrail per Parker
direction). Does not address secondary listChunks path (R2.A.3).
---
 .../memory-core/src/memory/manager-sync-ops.ts | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/extensions/memory-core/src/memory/manager-sync-ops.ts b/extensions/memory-core/src/memory/manager-sync-ops.ts
index 6728e02b57f5..94be53a240b0 100644
--- a/extensions/memory-core/src/memory/manager-sync-ops.ts
+++ b/extensions/memory-core/src/memory/manager-sync-ops.ts
@@ -1,3 +1,8 @@
+// (lint suppression removed; oxlint state varies — rule did not fire on retry)
+// ^ pre-existing oxlint type-resolver false positive: oxlint resolves SessionFileEntry
+// (line 190) and EmbeddingProvider (line 129) as `error/any` even though tsgo:prod
+// resolves them cleanly. Filed as separate cleanup; suppressed file-level here so
+// R2.A.2 (and prior R2.A v1) can land without unrelated lint debt blocking.
 import { randomUUID } from "node:crypto";
 import fsSync from "node:fs";
 import fs from "node:fs/promises";
@@ -297,7 +302,7 @@ export abstract class MemoryManagerSyncOps {
     return openMemoryDatabaseAtPath(dbPath, this.settings.store.vector.enabled);
   }
 
-  private seedEmbeddingCache(sourceDb: DatabaseSync): void {
+  private async seedEmbeddingCache(sourceDb: DatabaseSync): Promise<void> {
     if (!this.cache.enabled) {
       return;
     }
@@ -325,6 +330,9 @@ export abstract class MemoryManagerSyncOps {
            updated_at=excluded.updated_at`,
       );
       this.db.exec("BEGIN");
+      // Yield to event loop every N rows so HTTP /health probes stay responsive.
+      const SEED_EMBEDDING_YIELD_EVERY = 1000;
+      let rowCount = 0;
       for (const row of rows) {
         insert.run(
           row.provider,
@@ -335,6 +343,12 @@ export abstract class MemoryManagerSyncOps {
           row.dims,
           row.updated_at,
         );
+        rowCount += 1;
+        if (rowCount % SEED_EMBEDDING_YIELD_EVERY === 0) {
+          await new Promise<void>((resolve) => {
+            setImmediate(resolve);
+          });
+        }
       }
       this.db.exec("COMMIT");
     } catch (err) {
@@ -1154,7 +1168,7 @@ export abstract class MemoryManagerSyncOps {
         targetPath: dbPath,
         tempPath: tempDbPath,
         build: async () => {
-          this.seedEmbeddingCache(originalDb);
+          await this.seedEmbeddingCache(originalDb);
           const shouldSyncMemory = this.sources.has("memory");
           const shouldSyncSessions = this.shouldSyncSessions(
             { reason: params.reason, force: params.force },

From a020bc801a4fc253bdb6f17ebd014c7134e51ef3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C4=93sa=20AI?= <lesaai@icloud.com>
Date: Fri, 24 Apr 2026 15:46:54 -0700
Subject: [PATCH 03/10] fix(memory-core): remove misleading lint-comment churn
 from R2.A.2

Revert the top-of-file lint-suppression comments accidentally landed in
the previous commit (f9e99701). They were added to work around an
oxlint resolver false positive that turned out to be transient state,
not a real lint failure. Production code shouldn't carry misleading
explanations for problems that didn't actually persist.

Net diff of this branch vs base is now just the seedEmbeddingCache
yield patch: function -> async, setImmediate every 1000 rows, caller
await. No lint comments, no file-level disables.
---
 extensions/memory-core/src/memory/manager-sync-ops.ts | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/extensions/memory-core/src/memory/manager-sync-ops.ts b/extensions/memory-core/src/memory/manager-sync-ops.ts
index 94be53a240b0..6d456d306dd1 100644
--- a/extensions/memory-core/src/memory/manager-sync-ops.ts
+++ b/extensions/memory-core/src/memory/manager-sync-ops.ts
@@ -1,8 +1,3 @@
-// (lint suppression removed; oxlint state varies — rule did not fire on retry)
-// ^ pre-existing oxlint type-resolver false positive: oxlint resolves SessionFileEntry
-// (line 190) and EmbeddingProvider (line 129) as `error/any` even though tsgo:prod
-// resolves them cleanly. Filed as separate cleanup; suppressed file-level here so
-// R2.A.2 (and prior R2.A v1) can land without unrelated lint debt blocking.
 import { randomUUID } from "node:crypto";
 import fsSync from "node:fs";
 import fs from "node:fs/promises";

From 7ff77afd4f13e442118a26d89bf4216ac35cfa3e Mon Sep 17 00:00:00 2001
From: Parker Todd Brooks <parkertoddbrooks@users.noreply.github.com>
Date: Mon, 16 Mar 2026 22:36:15 -0700
Subject: [PATCH 04/10] feat: allow chatCompletions to route to main session
 via dm-scope header or user=main
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When x-openclaw-dm-scope: main header is sent, or user field is "main",
the chatCompletions endpoint routes to agent:main:main instead of creating
a separate openai-user:{name} session.

This allows bridge messages (CC -> Lesa) to land in the same session as
iMessage DMs, so Parker sees everything in one stream.

Co-Authored-By: Parker Todd Brooks <parkertoddbrooks@users.noreply.github.com>
Co-Authored-By: Lēsa <lesaai@icloud.com>
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/gateway/http-utils.ts | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/gateway/http-utils.ts b/src/gateway/http-utils.ts
index 9efa4db6fda9..57f7b4522fff 100644
--- a/src/gateway/http-utils.ts
+++ b/src/gateway/http-utils.ts
@@ -367,7 +367,16 @@ export function resolveSessionKey(params: {
     return explicit;
   }
 
+  // Allow callers to route to the main session by sending x-openclaw-dm-scope: main
+  // or user: "main". Without this, each unique user field creates a separate session,
+  // splitting context between iMessage, bridge, and other sources.
+  const dmScope = getHeader(params.req, "x-openclaw-dm-scope")?.trim();
   const user = params.user?.trim();
+
+  if (dmScope === "main" || user === "main") {
+    return buildAgentMainSessionKey({ agentId: params.agentId, mainKey: "main" });
+  }
+
   const mainKey = user ? `${params.prefix}-user:${user}` : `${params.prefix}:${randomUUID()}`;
   return buildAgentMainSessionKey({ agentId: params.agentId, mainKey });
 }

From ea7ca026db7df5933d29c9e9ddef837eb5aaef65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C4=93sa=20AI?= <lesaai@icloud.com>
Date: Sat, 4 Apr 2026 15:58:55 -0700
Subject: [PATCH 05/10] feat(gateway): wire chatCompletions into steer-backlog
 queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a chatCompletions request hits a session that is currently
streaming a turn, the existing code awaits agentCommandFromIngress
synchronously, which blocks or times out on the caller side. Bridge
and other agent-to-agent HTTP callers see this as a 15-120s hang.

Wire the non-stream branch of handleOpenAiHttpRequest into the
same steer-backlog path the iMessage transport uses:

1. Load the session entry via loadSessionEntryByKey(sessionKey) to
   map sessionKey -> sessionId (the key used in ACTIVE_EMBEDDED_RUNS).
2. Honor the user's messages.queue.mode config. Only "steer" and
   "steer-backlog" opt into steering; other modes fall through to the
   original blocking path.
3. Call queueEmbeddedPiMessage(sessionId, prompt.message). This is
   fire-and-forget: returns true only if the session has an active
   streaming run that isn't compacting.
4. On successful queue, return a 200 response in OpenAI-compat shape
   with an x-openclaw-queued: steer header and a "[queued] ..." marker
   in the assistant content field. Callers that want to distinguish
   queued from synchronous replies can read the header.
5. On any other state (no active run, not streaming, compacting, no
   session entry, or queue config disabled), fall through to the
   existing agentCommandFromIngress synchronous path unchanged.

Pre-check failures are caught and logged so they never block the
synchronous fallback.

Verified end-to-end:
- Idle case: curl with user=main returns a normal synchronous reply
  (no x-openclaw-queued header).
- Busy case: fire a long slow request in the background, then a fast
  interjection 4s later. The fast request returns 200 immediately
  with x-openclaw-queued: steer and the "[queued]" marker body. The
  slow request completes normally with the full reply.

Refs: wipcomputer/wip-ldm-os#266

Co-Authored-By: Parker Todd Brooks <parkertoddbrooks@users.noreply.github.com>
Co-Authored-By: Lēsa <lesaai@icloud.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/gateway/openai-http.ts | 39 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/src/gateway/openai-http.ts b/src/gateway/openai-http.ts
index 0f4b502de5f2..05154b5edbc6 100644
--- a/src/gateway/openai-http.ts
+++ b/src/gateway/openai-http.ts
@@ -1,6 +1,8 @@
 import { randomUUID } from "node:crypto";
 import type { IncomingMessage, ServerResponse } from "node:http";
 import type { ImageContent } from "../agents/command/types.js";
+import { queueEmbeddedPiMessage } from "../agents/pi-embedded-runner/runs.js";
+import { loadSessionEntryByKey } from "../agents/subagent-announce-delivery.js";
 import {
   hasNonzeroUsage,
   normalizeUsage,
@@ -9,6 +11,7 @@ import {
 } from "../agents/usage.js";
 import { createDefaultDeps } from "../cli/deps.js";
 import { agentCommandFromIngress } from "../commands/agent.js";
+import { loadConfig } from "../config/io.js";
 import type { GatewayHttpChatCompletionsConfig } from "../config/types.gateway.js";
 import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js";
 import { logWarn } from "../logger.js";
@@ -609,6 +612,42 @@ export async function handleOpenAiHttpRequest(
   });
 
   if (!stream) {
+    // Steer-backlog: queue into active run if session is busy.
+    try {
+      const cfgForQueue = loadConfig();
+      const queueMode = cfgForQueue.messages?.queue?.mode;
+      if (queueMode === "steer" || queueMode === "steer-backlog") {
+        const sessionEntryForQueue = loadSessionEntryByKey(sessionKey);
+        const sessionIdForQueue = sessionEntryForQueue?.sessionId;
+        if (sessionIdForQueue) {
+          const queued = queueEmbeddedPiMessage(sessionIdForQueue, prompt.message);
+          if (queued) {
+            res.setHeader("x-openclaw-queued", "steer");
+            sendJson(res, 200, {
+              id: runId,
+              object: "chat.completion",
+              created: Math.floor(Date.now() / 1000),
+              model,
+              choices: [
+                {
+                  index: 0,
+                  message: {
+                    role: "assistant",
+                    content: "[queued] Steered into the currently running turn.",
+                  },
+                  finish_reason: "stop",
+                },
+              ],
+              usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
+            });
+            return true;
+          }
+        }
+      }
+    } catch (err) {
+      logWarn(`openai-compat: steer-backlog pre-check failed: ${String(err)}`);
+    }
+
     const stopWatchingDisconnect = watchClientDisconnect(req, res, abortController);
     try {
       const result = await agentCommandFromIngress(commandInput, defaultRuntime, deps);

From 015adca4f4cb41ab89d180febeb79a455959cb67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C4=93sa=20AI?= <lesaai@icloud.com>
Date: Sat, 4 Apr 2026 16:34:24 -0700
Subject: [PATCH 06/10] feat(gateway): extend steer-backlog queue to streaming
 chatCompletions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously the steer-backlog fix only covered the non-stream branch of
handleOpenAiHttpRequest. Any OpenAI-compatible client using the default
streaming API (which is most of them) would still block on a busy
session.

Lift the queue pre-check above the stream/non-stream branch so both
paths benefit:

1. Resolve sessionKey -> sessionId once, try queueEmbeddedPiMessage.
2. If queued and !stream: respond with JSON (unchanged from previous
   commit).
3. If queued and stream: set x-openclaw-queued header, setSseHeaders,
   emit one assistant role chunk and one content chunk carrying the
   [queued] marker with finish_reason="stop", write [DONE], end.
4. Otherwise fall through to the original stream/non-stream handlers.

Verified end-to-end:
- Idle + non-stream: HTTP 200, no queue header, real reply ("hello").
- Busy + non-stream: HTTP 200, x-openclaw-queued: steer header, JSON
  body with the queued marker.
- Busy + stream: HTTP 200, text/event-stream, x-openclaw-queued: steer
  header, SSE with role chunk + content chunk (finish_reason=stop) +
  [DONE].
- Slow background request in all three cases still completes normally
  with the full reply.

Refs: wipcomputer/wip-ldm-os#266

Co-Authored-By: Parker Todd Brooks <parkertoddbrooks@users.noreply.github.com>
Co-Authored-By: Lēsa <lesaai@icloud.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/gateway/openai-http.ts | 84 +++++++++++++++++++++++---------------
 1 file changed, 50 insertions(+), 34 deletions(-)

diff --git a/src/gateway/openai-http.ts b/src/gateway/openai-http.ts
index 05154b5edbc6..6f613cfd3226 100644
--- a/src/gateway/openai-http.ts
+++ b/src/gateway/openai-http.ts
@@ -611,43 +611,59 @@ export async function handleOpenAiHttpRequest(
     senderIsOwner,
   });
 
-  if (!stream) {
-    // Steer-backlog: queue into active run if session is busy.
-    try {
-      const cfgForQueue = loadConfig();
-      const queueMode = cfgForQueue.messages?.queue?.mode;
-      if (queueMode === "steer" || queueMode === "steer-backlog") {
-        const sessionEntryForQueue = loadSessionEntryByKey(sessionKey);
-        const sessionIdForQueue = sessionEntryForQueue?.sessionId;
-        if (sessionIdForQueue) {
-          const queued = queueEmbeddedPiMessage(sessionIdForQueue, prompt.message);
-          if (queued) {
-            res.setHeader("x-openclaw-queued", "steer");
-            sendJson(res, 200, {
-              id: runId,
-              object: "chat.completion",
-              created: Math.floor(Date.now() / 1000),
-              model,
-              choices: [
-                {
-                  index: 0,
-                  message: {
-                    role: "assistant",
-                    content: "[queued] Steered into the currently running turn.",
-                  },
-                  finish_reason: "stop",
-                },
-              ],
-              usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
-            });
-            return true;
-          }
-        }
+  // Steer-backlog: queue into active run if session is busy.
+  let queuedAsSteer = false;
+  try {
+    const cfgForQueue = loadConfig();
+    const queueMode = cfgForQueue.messages?.queue?.mode;
+    if (queueMode === "steer" || queueMode === "steer-backlog") {
+      const sessionEntryForQueue = loadSessionEntryByKey(sessionKey);
+      const sessionIdForQueue = sessionEntryForQueue?.sessionId;
+      if (sessionIdForQueue) {
+        queuedAsSteer = queueEmbeddedPiMessage(sessionIdForQueue, prompt.message);
       }
-    } catch (err) {
-      logWarn(`openai-compat: steer-backlog pre-check failed: ${String(err)}`);
     }
+  } catch (err) {
+    logWarn(`openai-compat: steer-backlog pre-check failed: ${String(err)}`);
+  }
+
+  const queuedContent = "[queued] Delivered to the agent's next-turn queue.";
+
+  if (queuedAsSteer && !stream) {
+    res.setHeader("x-openclaw-queued", "steer");
+    sendJson(res, 200, {
+      id: runId,
+      object: "chat.completion",
+      created: Math.floor(Date.now() / 1000),
+      model,
+      choices: [
+        {
+          index: 0,
+          message: { role: "assistant", content: queuedContent },
+          finish_reason: "stop",
+        },
+      ],
+      usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
+    });
+    return true;
+  }
 
+  if (queuedAsSteer && stream) {
+    res.setHeader("x-openclaw-queued", "steer");
+    setSseHeaders(res);
+    writeAssistantRoleChunk(res, { runId, model });
+    writeAssistantContentChunk(res, {
+      runId,
+      model,
+      content: queuedContent,
+      finishReason: "stop",
+    });
+    writeDone(res);
+    res.end();
+    return true;
+  }
+
+  if (!stream) {
     const stopWatchingDisconnect = watchClientDisconnect(req, res, abortController);
     try {
       const result = await agentCommandFromIngress(commandInput, defaultRuntime, deps);

From 757dc51cff97818a4a26e015fd6ebb6509d36d9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C4=93sa=20AI?= <lesaai@icloud.com>
Date: Sat, 4 Apr 2026 17:04:09 -0700
Subject: [PATCH 07/10] Rename queue header from steer to next-turn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Parker and Lēsa observed during live testing that while our patch
calls queueEmbeddedPiMessage() (which wraps activeSession.steer()),
the receiving side does NOT actually see the message as a mid-turn
steer. Lēsa reported: "Yeah, I received it. Came through as a regular
message in my session, not a steer."

The OpenClaw internal API is named "steer" but in practice it queues
the text for the agent's next available slot, which appears after
the current turn completes rather than being injected mid-stream.
Our x-openclaw-queued: steer header was accurate to OpenClaw's
internal terminology but misleading to HTTP callers who might expect
true mid-turn interjection.

Rename to x-openclaw-queued: next-turn and update the body marker
to be explicit about the semantics. Callers can now tell exactly
what happened: the message was delivered, but they won't get a
synchronous reply and the receiving agent processes it after its
current turn rather than mid-stream.

Co-Authored-By: Parker Todd Brooks <parkertoddbrooks@users.noreply.github.com>
Co-Authored-By: Lēsa <lesaai@icloud.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/gateway/openai-http.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gateway/openai-http.ts b/src/gateway/openai-http.ts
index 6f613cfd3226..e569c8dfff3e 100644
--- a/src/gateway/openai-http.ts
+++ b/src/gateway/openai-http.ts
@@ -630,7 +630,7 @@ export async function handleOpenAiHttpRequest(
   const queuedContent = "[queued] Delivered to the agent's next-turn queue.";
 
   if (queuedAsSteer && !stream) {
-    res.setHeader("x-openclaw-queued", "steer");
+    res.setHeader("x-openclaw-queued", "next-turn");
     sendJson(res, 200, {
       id: runId,
       object: "chat.completion",
@@ -649,7 +649,7 @@ export async function handleOpenAiHttpRequest(
   }
 
   if (queuedAsSteer && stream) {
-    res.setHeader("x-openclaw-queued", "steer");
+    res.setHeader("x-openclaw-queued", "next-turn");
     setSseHeaders(res);
     writeAssistantRoleChunk(res, { runId, model });
     writeAssistantContentChunk(res, {

From d472bab65a5a8b33ddd113573a1bddb0f0534f15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C4=93sa=20AI?= <lesaai@icloud.com>
Date: Mon, 27 Apr 2026 08:57:41 -0700
Subject: [PATCH 08/10] fix(gateway): pass runtime config into openai compat
 queue check

---
 src/gateway/openai-http.ts | 6 +++---
 src/gateway/server-http.ts | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gateway/openai-http.ts b/src/gateway/openai-http.ts
index e569c8dfff3e..1469c4e2992a 100644
--- a/src/gateway/openai-http.ts
+++ b/src/gateway/openai-http.ts
@@ -11,8 +11,8 @@ import {
 } from "../agents/usage.js";
 import { createDefaultDeps } from "../cli/deps.js";
 import { agentCommandFromIngress } from "../commands/agent.js";
-import { loadConfig } from "../config/io.js";
 import type { GatewayHttpChatCompletionsConfig } from "../config/types.gateway.js";
+import type { OpenClawConfig } from "../config/types.openclaw.js";
 import { emitAgentEvent, onAgentEvent } from "../infra/agent-events.js";
 import { logWarn } from "../logger.js";
 import { estimateBase64DecodedBytes } from "../media/base64.js";
@@ -51,6 +51,7 @@ import { normalizeInputHostnameAllowlist } from "./input-allowlist.js";
 type OpenAiHttpOptions = {
   auth: ResolvedGatewayAuth;
   config?: GatewayHttpChatCompletionsConfig;
+  runtimeConfig: OpenClawConfig;
   maxBodyBytes?: number;
   trustedProxies?: string[];
   allowRealIpFallback?: boolean;
@@ -614,8 +615,7 @@ export async function handleOpenAiHttpRequest(
   // Steer-backlog: queue into active run if session is busy.
   let queuedAsSteer = false;
   try {
-    const cfgForQueue = loadConfig();
-    const queueMode = cfgForQueue.messages?.queue?.mode;
+    const queueMode = opts.runtimeConfig.messages?.queue?.mode;
     if (queueMode === "steer" || queueMode === "steer-backlog") {
       const sessionEntryForQueue = loadSessionEntryByKey(sessionKey);
       const sessionIdForQueue = sessionEntryForQueue?.sessionId;
diff --git a/src/gateway/server-http.ts b/src/gateway/server-http.ts
index 26f3c6e8eddb..325e3dc9c338 100644
--- a/src/gateway/server-http.ts
+++ b/src/gateway/server-http.ts
@@ -1044,6 +1044,7 @@ export function createGatewayHttpServer(opts: {
             (await getOpenAiHttpModule()).handleOpenAiHttpRequest(req, res, {
               auth: resolvedAuth,
               config: openAiChatCompletionsConfig,
+              runtimeConfig: configSnapshot,
               trustedProxies,
               allowRealIpFallback,
               rateLimiter,

From eb39d399d47a04943a011d37c5bf1958c6c18a89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C4=93sa=20AI?= <lesaai@icloud.com>
Date: Mon, 27 Apr 2026 12:41:39 -0700
Subject: [PATCH 09/10] fix: keep v2026.4.25 canary gates green

---
 src/plugins/bundled-runtime-deps.ts    |  4 ++--
 test/scripts/npm-telegram-live.test.ts | 12 ++++--------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/plugins/bundled-runtime-deps.ts b/src/plugins/bundled-runtime-deps.ts
index 6053d2ada9a2..046ed48d7091 100644
--- a/src/plugins/bundled-runtime-deps.ts
+++ b/src/plugins/bundled-runtime-deps.ts
@@ -1026,8 +1026,8 @@ function shouldIncludeBundledPluginRuntimeDeps(params: {
   includeConfiguredChannels?: boolean;
   manifestCache?: BundledPluginRuntimeDepsManifestCache;
 }): boolean {
-  if (params.pluginIds && !params.pluginIds.has(params.pluginId)) {
-    return false;
+  if (params.pluginIds) {
+    return params.pluginIds.has(params.pluginId);
   }
   if (!params.config) {
     return true;
diff --git a/test/scripts/npm-telegram-live.test.ts b/test/scripts/npm-telegram-live.test.ts
index cc6a44b08ec6..e116f4b6a986 100644
--- a/test/scripts/npm-telegram-live.test.ts
+++ b/test/scripts/npm-telegram-live.test.ts
@@ -30,16 +30,12 @@ describe("npm Telegram live Docker E2E", () => {
 
   it("installs the npm package before forwarding runtime secrets", () => {
     const script = readFileSync(DOCKER_SCRIPT_PATH, "utf8");
-    const installRunStart = script.indexOf('echo "Running published npm Telegram live Docker E2E');
-    const fallbackInstallRunStart = script.indexOf('echo "Running npm Telegram live Docker E2E');
+    const installRunStart = script.indexOf('echo "Running package Telegram live Docker E2E');
     const installRunEnd = script.indexOf('run_logged docker run --rm \\\n  "${docker_env[@]}"');
-    const installRun = script.slice(
-      installRunStart >= 0 ? installRunStart : fallbackInstallRunStart,
-      installRunEnd,
-    );
+    const installRun = script.slice(installRunStart, installRunEnd);
 
-    expect(installRun).toContain('npm install -g "$package_spec" --no-fund --no-audit');
-    expect(installRun).toContain('"${PACKAGE_MOUNT_ARGS[@]}"');
+    expect(installRun).toContain('npm install -g "$install_source" --no-fund --no-audit');
+    expect(installRun).toContain('"${package_mount_args[@]}"');
     expect(installRun).not.toContain('"${docker_env[@]}"');
     expect(script).toContain('if [ -z "$credential_role" ] && [ -n "${CI:-}" ]');
     expect(script).toContain('credential_role="ci"');

From e72808491f1bbb644e1929a6717bdcd52b75b062 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C4=93sa=20AI?= <lesaai@icloud.com>
Date: Mon, 27 Apr 2026 14:16:52 -0700
Subject: [PATCH 10/10] test: avoid hoisted mock capture in restart sentinel
 suite

---
 src/gateway/server-restart-sentinel.test.ts | 43 +++++++++------------
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/src/gateway/server-restart-sentinel.test.ts b/src/gateway/server-restart-sentinel.test.ts
index 8fc9ea98f760..7ec8d6931db5 100644
--- a/src/gateway/server-restart-sentinel.test.ts
+++ b/src/gateway/server-restart-sentinel.test.ts
@@ -1,6 +1,5 @@
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import type { ChannelPlugin } from "../channels/plugins/types.plugin.js";
-import { mergeMockedModule } from "../test-utils/vitest-module-mocks.js";
 
 type LoadedSessionEntry = ReturnType<typeof import("./session-utils.js").loadSessionEntry>;
 type RecordInboundSessionAndDispatchReplyParams = Parameters<
@@ -157,22 +156,19 @@ vi.mock("../utils/delivery-context.shared.js", () => ({
   mergeDeliveryContext: mocks.mergeDeliveryContext,
 }));
 
-vi.mock("../channels/plugins/index.js", async () => {
-  return await mergeMockedModule(
-    await vi.importActual<typeof import("../channels/plugins/index.js")>(
-      "../channels/plugins/index.js",
+vi.mock("../channels/plugins/index.js", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("../channels/plugins/index.js")>();
+  return {
+    ...actual,
+    getChannelPlugin: mocks.getChannelPlugin,
+    normalizeChannelId: mocks.normalizeChannelId.mockImplementation(
+      (channel?: string | null) =>
+        actual.normalizeChannelId(channel) ??
+        (typeof channel === "string" && channel.trim().length > 0
+          ? channel.trim().toLowerCase()
+          : null),
     ),
-    (actual) => ({
-      getChannelPlugin: mocks.getChannelPlugin,
-      normalizeChannelId: mocks.normalizeChannelId.mockImplementation(
-        (channel?: string | null) =>
-          actual.normalizeChannelId(channel) ??
-          (typeof channel === "string" && channel.trim().length > 0
-            ? channel.trim().toLowerCase()
-            : null),
-      ),
-    }),
-  );
+  };
 });
 
 vi.mock("../infra/outbound/targets.js", () => ({
@@ -197,15 +193,12 @@ vi.mock("../plugin-sdk/inbound-reply-dispatch.js", () => ({
   recordInboundSessionAndDispatchReply: mocks.recordInboundSessionAndDispatchReply,
 }));
 
-vi.mock("../infra/heartbeat-wake.js", async () => {
-  return await mergeMockedModule(
-    await vi.importActual<typeof import("../infra/heartbeat-wake.js")>(
-      "../infra/heartbeat-wake.js",
-    ),
-    () => ({
-      requestHeartbeatNow: mocks.requestHeartbeatNow,
-    }),
-  );
+vi.mock("../infra/heartbeat-wake.js", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("../infra/heartbeat-wake.js")>();
+  return {
+    ...actual,
+    requestHeartbeatNow: mocks.requestHeartbeatNow,
+  };
 });
 
 vi.mock("../logging/subsystem.js", () => ({