Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/activation/embeddings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ export class EmbeddingClient {
private apiKey: string;
private model: string;
private dimensions: number | undefined;
private timeoutMs: number;

constructor() {
constructor(opts?: { timeoutMs?: number }) {
// Resolve endpoint and key using the priority chain.
if (config.embedding.baseURL) {
// Dedicated embedding endpoint — use as-is (user provides full base URL).
Expand All @@ -42,6 +43,7 @@ export class EmbeddingClient {
}
this.model = config.embedding.model;
this.dimensions = config.embedding.dimensions;
this.timeoutMs = opts?.timeoutMs ?? config.embedding.timeoutMs;
}

/**
Expand Down Expand Up @@ -77,6 +79,11 @@ export class EmbeddingClient {
// text-embedding-3-* models; omitting it lets the model use its default.
...(this.dimensions !== undefined && { dimensions: this.dimensions }),
}),
// Hard per-call timeout. Without this, a slow or rate-limited
// embedding proxy causes the fetch to hang indefinitely and
// holds the TCP connection open — silently breaking /activate
// (HTTP route + MCP tool) for the duration. See config.embedding.timeoutMs.
signal: AbortSignal.timeout(this.timeoutMs),
});

if (!response.ok) {
Expand Down
8 changes: 8 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,14 @@ export const config = {
dimensions: process.env.EMBEDDING_DIMENSIONS
? parseIntEnv(process.env.EMBEDDING_DIMENSIONS, 1, 1)
: undefined,
// Hard per-call timeout for the embedding fetch. Without this, a slow or
// rate-limited embedding proxy (e.g. the unified LLM proxy) causes the
// request to hang indefinitely and holds the TCP connection open until
// the OS-level keepalive eventually reaps it — silently breaking both
// the /activate HTTP route and the MCP `activate` tool.
//
// Override via EMBEDDING_TIMEOUT_MS env var. Must be a positive integer.
timeoutMs: parseIntEnv(process.env.EMBEDDING_TIMEOUT_MS, 20_000, 1),
},

// Decay parameters
Expand Down
52 changes: 51 additions & 1 deletion tests/embeddings.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { describe, expect, it } from "bun:test";
import { afterAll, describe, expect, it } from "bun:test";
import { cosineSimilarity } from "../src/activation/embeddings";

describe("cosineSimilarity", () => {
Expand Down Expand Up @@ -46,3 +46,53 @@ describe("cosineSimilarity", () => {
expect(simAB).toBeGreaterThan(simAC);
});
});

describe("EmbeddingClient.embedBatch timeout", () => {
// Regression test: without an AbortSignal on the fetch, a slow or
// rate-limited embedding proxy causes the request to hang indefinitely
// and silently breaks the /activate route and MCP `activate` tool
// (the MCP 15s timeout then surfaces the hang as a generic TimeoutError).

const server = Bun.serve({
port: 0, // OS-assigned
fetch: () => new Promise(() => {}), // hang forever, no response
});
const baseURL = `http://127.0.0.1:${server.port}`;

afterAll(() => {
server.stop(true);
});

it("aborts the fetch when the embedding proxy hangs past the timeout", async () => {
// `config.embedding.baseURL` is captured at module load. We must set
// the env var BEFORE the config module is evaluated, then dynamically
// import with a cache-busting suffix so the modules re-evaluate and
// re-read the env. Mutating process.env post-import would not affect
// the already-resolved config object.
const prev = process.env.EMBEDDING_BASE_URL;
process.env.EMBEDDING_BASE_URL = baseURL;
try {
const { EmbeddingClient: DynamicClient } = await import(
`../src/activation/embeddings.ts?timeout=${Date.now()}-${Math.random()}`
);
const client = new DynamicClient({ timeoutMs: 100 });
const start = Date.now();
let thrown: unknown;
try {
await client.embed("test query");
} catch (e) {
thrown = e;
}
const elapsed = Date.now() - start;
expect(thrown).toBeDefined();
// Must abort close to the 100ms timeout, not hang forever.
expect(elapsed).toBeLessThan(2_000);
} finally {
if (prev === undefined) {
process.env.EMBEDDING_BASE_URL = undefined;
} else {
process.env.EMBEDDING_BASE_URL = prev;
}
}
});
});
Loading