From 5507fd169f2567d41f77504526096c8b891c672f Mon Sep 17 00:00:00 2001 From: opencode Date: Fri, 5 Jun 2026 11:00:29 +0200 Subject: [PATCH] fix(activation): add per-call timeout to embedding fetch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EmbeddingClient.embedBatch fetch had no AbortSignal, so a slow or rate-limited embedding proxy (e.g. the unified LLM proxy) caused the request to hang indefinitely. The held TCP connection silently broke the /activate HTTP route and the MCP 'activate' tool — the MCP 15s timeout surfaced the hang as a generic TimeoutError with no indication of the actual cause. Add a configurable per-call timeout (default 20s, override via EMBEDDING_TIMEOUT_MS) wired into the fetch via AbortSignal.timeout. Also expose timeoutMs as an EmbeddingClient constructor option for testability. Regression test uses a hanging local Bun.serve to verify the fetch aborts within the configured window. --- src/activation/embeddings.ts | 9 ++++++- src/config.ts | 8 ++++++ tests/embeddings.test.ts | 52 +++++++++++++++++++++++++++++++++++- 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/src/activation/embeddings.ts b/src/activation/embeddings.ts index 9040db1..3abda51 100644 --- a/src/activation/embeddings.ts +++ b/src/activation/embeddings.ts @@ -17,8 +17,9 @@ export class EmbeddingClient { private apiKey: string; private model: string; private dimensions: number | undefined; + private timeoutMs: number; - constructor() { + constructor(opts?: { timeoutMs?: number }) { // Resolve endpoint and key using the priority chain. if (config.embedding.baseURL) { // Dedicated embedding endpoint — use as-is (user provides full base URL). @@ -42,6 +43,7 @@ export class EmbeddingClient { } this.model = config.embedding.model; this.dimensions = config.embedding.dimensions; + this.timeoutMs = opts?.timeoutMs ?? config.embedding.timeoutMs; } /** @@ -77,6 +79,11 @@ export class EmbeddingClient { // text-embedding-3-* models; omitting it lets the model use its default. ...(this.dimensions !== undefined && { dimensions: this.dimensions }), }), + // Hard per-call timeout. Without this, a slow or rate-limited + // embedding proxy causes the fetch to hang indefinitely and + // holds the TCP connection open — silently breaking /activate + // (HTTP route + MCP tool) for the duration. See config.embedding.timeoutMs. + signal: AbortSignal.timeout(this.timeoutMs), }); if (!response.ok) { diff --git a/src/config.ts b/src/config.ts index 9775b73..1e287ae 100644 --- a/src/config.ts +++ b/src/config.ts @@ -243,6 +243,14 @@ export const config = { dimensions: process.env.EMBEDDING_DIMENSIONS ? parseIntEnv(process.env.EMBEDDING_DIMENSIONS, 1, 1) : undefined, + // Hard per-call timeout for the embedding fetch. Without this, a slow or + // rate-limited embedding proxy (e.g. the unified LLM proxy) causes the + // request to hang indefinitely and holds the TCP connection open until + // the OS-level keepalive eventually reaps it — silently breaking both + // the /activate HTTP route and the MCP `activate` tool. + // + // Override via EMBEDDING_TIMEOUT_MS env var. Must be a positive integer. + timeoutMs: parseIntEnv(process.env.EMBEDDING_TIMEOUT_MS, 20_000, 1), }, // Decay parameters diff --git a/tests/embeddings.test.ts b/tests/embeddings.test.ts index fb4b3e6..42f97d2 100644 --- a/tests/embeddings.test.ts +++ b/tests/embeddings.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it } from "bun:test"; +import { afterAll, describe, expect, it } from "bun:test"; import { cosineSimilarity } from "../src/activation/embeddings"; describe("cosineSimilarity", () => { @@ -46,3 +46,53 @@ describe("cosineSimilarity", () => { expect(simAB).toBeGreaterThan(simAC); }); }); + +describe("EmbeddingClient.embedBatch timeout", () => { + // Regression test: without an AbortSignal on the fetch, a slow or + // rate-limited embedding proxy causes the request to hang indefinitely + // and silently breaks the /activate route and MCP `activate` tool + // (the MCP 15s timeout then surfaces the hang as a generic TimeoutError). + + const server = Bun.serve({ + port: 0, // OS-assigned + fetch: () => new Promise(() => {}), // hang forever, no response + }); + const baseURL = `http://127.0.0.1:${server.port}`; + + afterAll(() => { + server.stop(true); + }); + + it("aborts the fetch when the embedding proxy hangs past the timeout", async () => { + // `config.embedding.baseURL` is captured at module load. We must set + // the env var BEFORE the config module is evaluated, then dynamically + // import with a cache-busting suffix so the modules re-evaluate and + // re-read the env. Mutating process.env post-import would not affect + // the already-resolved config object. + const prev = process.env.EMBEDDING_BASE_URL; + process.env.EMBEDDING_BASE_URL = baseURL; + try { + const { EmbeddingClient: DynamicClient } = await import( + `../src/activation/embeddings.ts?timeout=${Date.now()}-${Math.random()}` + ); + const client = new DynamicClient({ timeoutMs: 100 }); + const start = Date.now(); + let thrown: unknown; + try { + await client.embed("test query"); + } catch (e) { + thrown = e; + } + const elapsed = Date.now() - start; + expect(thrown).toBeDefined(); + // Must abort close to the 100ms timeout, not hang forever. + expect(elapsed).toBeLessThan(2_000); + } finally { + if (prev === undefined) { + process.env.EMBEDDING_BASE_URL = undefined; + } else { + process.env.EMBEDDING_BASE_URL = prev; + } + } + }); +});