diff --git a/packages/test/src/test/ai-provider/WebBrowserProvider.test.ts b/packages/test/src/test/ai-provider/WebBrowserProvider.test.ts index 0afa5ab63..c753554ca 100644 --- a/packages/test/src/test/ai-provider/WebBrowserProvider.test.ts +++ b/packages/test/src/test/ai-provider/WebBrowserProvider.test.ts @@ -23,6 +23,7 @@ const { WebBrowser_ToolCalling, sessions, chatHistory, + chromeHelpers, probe, } = _testOnly; @@ -756,6 +757,136 @@ describe("WebBrowser_StructuredGeneration validation", () => { }); }); +// -------------------------------------------------------------------------- +// WebBrowser_Chat session cache (HIGH-1) +// -------------------------------------------------------------------------- + +/** + * Fake `LanguageModel` for chat tests. Each `create()` returns a fresh + * session whose `promptStreaming` emits one canned text snapshot per turn. + * The factory records each call's options so we can inspect what was + * passed. + */ +// eslint-disable-next-line @typescript-eslint/no-explicit-any +function makeFakeChatModel(repliesPerTurn: readonly string[]): any { + let turn = 0; + const sessions: Array<{ destroy: ReturnType; promptStreaming: ReturnType }> = + []; + const factory = { + availability: vi.fn().mockResolvedValue("available"), + create: vi.fn(async () => { + const promptStreaming = vi.fn(() => { + const value = repliesPerTurn[turn++] ?? ""; + return new ReadableStream({ + start(controller) { + controller.enqueue(value); + controller.close(); + }, + }); + }); + const session = { destroy: vi.fn(), promptStreaming }; + sessions.push(session); + return session; + }), + }; + return { factory, sessions }; +} + +describe("WebBrowser_Chat session cache", () => { + const sid = "chat-test-1"; + const userMsg = (text: string): ChatMessage => ({ + role: "user", + content: [{ type: "text", text }], + }); + const assistantMsg = (text: string): ChatMessage => ({ + role: "assistant", + content: [{ type: "text", text }], + }); + + afterEach(() => { + sessions.deleteChromeSession?.(sid); + }); + + it("reuses the cached session across consecutive turns (one factory.create)", async () => { + const { factory, sessions: fakeSessions } = makeFakeChatModel(["hi back", "sure"]); + const restore = installLanguageModelGlobal(factory); + try { + const emit = vi.fn(); + const turn1: ChatMessage[] = [userMsg("hi")]; + await WebBrowser_TextGeneration_Unified( + { messages: turn1 }, + undefined, + new AbortController().signal, + emit, + undefined, + sid + ); + // After turn 1 cache should be at messages.length + 1 == 2. + expect(_testOnly.sessions.getChromeSession(sid)?.messageCount).toBe(2); + const turn2: ChatMessage[] = [ + userMsg("hi"), + assistantMsg("hi back"), + userMsg("how are you?"), + ]; + await WebBrowser_TextGeneration_Unified( + { messages: turn2 }, + undefined, + new AbortController().signal, + emit, + undefined, + sid + ); + // Same session reused: only one factory.create call total. + expect(factory.create).toHaveBeenCalledTimes(1); + // promptStreaming called twice on the SAME session reference. + expect(fakeSessions).toHaveLength(1); + expect(fakeSessions[0]?.promptStreaming).toHaveBeenCalledTimes(2); + // After turn 2, cache watermark = messages.length + 1 = 4. + expect(_testOnly.sessions.getChromeSession(sid)?.messageCount).toBe(4); + } finally { + restore(); + } + }); + + it("rebuilds the session when messageCount diverges (e.g. retroactive edit)", async () => { + const { factory, sessions: fakeSessions } = makeFakeChatModel(["a", "b"]); + const restore = installLanguageModelGlobal(factory); + try { + const emit = vi.fn(); + await WebBrowser_TextGeneration_Unified( + { messages: [userMsg("first")] }, + undefined, + new AbortController().signal, + emit, + undefined, + sid + ); + // Cache is at messageCount=2 after turn 1. + expect(_testOnly.sessions.getChromeSession(sid)?.messageCount).toBe(2); + // Now simulate a retroactive history mutation by shrinking the history: + // the caller resends a single user message (messages.length=1, so + // lastUserIdx=0 and expectedPriorCount=1), but the cache still has + // messageCount=2 from the previous turn. The mismatch (1 !== 2) forces + // the run-fn to destroy the cached session and rebuild from scratch. + await WebBrowser_TextGeneration_Unified( + { messages: [userMsg("reset")] }, + undefined, + new AbortController().signal, + emit, + undefined, + sid + ); + expect(factory.create).toHaveBeenCalledTimes(2); + // First session was destroyed during the divergence rebuild. + expect(fakeSessions[0]?.destroy).toHaveBeenCalled(); + // Watermark after the rebuilt turn = messages.length + 1 = 2. + expect(_testOnly.sessions.getChromeSession(sid)?.messageCount).toBe(2); + } finally { + restore(); + } + }); +}); + // -------------------------------------------------------------------------- // ToolCalling session lifecycle // -------------------------------------------------------------------------- @@ -843,7 +974,7 @@ describe("WebBrowser_ToolCalling session lifecycle", () => { undefined, sid ); - // Same tool set, same conversation thread → cache reuse, one create(). + // Tool-calling intentionally rebuilds per turn — two creates expected. expect(factory.create).toHaveBeenCalledTimes(2); expect(sessions.getChromeSession(sid)).toBeUndefined(); } finally { @@ -971,3 +1102,176 @@ describe("WebBrowser_ToolCalling argument validation", () => { } }); }); + +// -------------------------------------------------------------------------- +// ToolCalling prototype-pollution sanitization (HIGH-2) +// -------------------------------------------------------------------------- + +describe("WebBrowser_ToolCalling sanitizes captured args", () => { + const looseTool: ToolDefinition = { + name: "loose", + description: "loose", + // Permissive schema so the validator doesn't reject the cleaned object. + inputSchema: { type: "object", additionalProperties: true }, + }; + + it("strips __proto__ and constructor keys from captured tool args", async () => { + // Build a payload as if the model hallucinated a prototype-pollution attempt. + const polluted: Record = { ok: true }; + // Use Object.defineProperty so `__proto__` is captured as a real own key, + // not as the actual prototype link — mirrors what JSON.parse can do. + Object.defineProperty(polluted, "__proto__", { + value: { polluted: true }, + enumerable: true, + configurable: true, + writable: true, + }); + Object.defineProperty(polluted, "constructor", { + value: { evil: 1 }, + enumerable: true, + configurable: true, + writable: true, + }); + const { factory } = makeFakeToolCallingModel({ loose: polluted }); + const restore = installLanguageModelGlobal(factory); + try { + const events: Array<{ type: string; port?: string; objectDelta?: unknown }> = []; + const emit = (e: unknown): void => { + events.push(e as { type: string; port?: string; objectDelta?: unknown }); + }; + await WebBrowser_ToolCalling( + asTCI({ prompt: "go", tools: [looseTool] }), + undefined, + new AbortController().signal, + emit + ); + const tcEvent = events.find((e) => e.type === "object-delta" && e.port === "toolCalls"); + const calls = (tcEvent?.objectDelta as Array<{ name: string; input: Record }>) ?? []; + expect(calls).toHaveLength(1); + const input = calls[0]!.input; + // Legitimate key preserved. + expect(input.ok).toBe(true); + // Forbidden keys scrubbed. + expect(Object.prototype.hasOwnProperty.call(input, "__proto__")).toBe(false); + expect(Object.prototype.hasOwnProperty.call(input, "constructor")).toBe(false); + // Prototype is plain Object.prototype — not the tainted attacker object. + expect(Object.getPrototypeOf(input)).toBe(Object.prototype); + // And the actual Object prototype was not polluted as a side-effect. + expect(({} as Record).polluted).toBeUndefined(); + } finally { + restore(); + } + }); + + it("strips forbidden keys recursively in nested objects and arrays", async () => { + const inner: Record = { ok: true }; + Object.defineProperty(inner, "constructor", { + value: { x: 1 }, + enumerable: true, + configurable: true, + writable: true, + }); + const outer: Record = { + list: [inner], + }; + Object.defineProperty(outer, "__proto__", { + value: { p: 1 }, + enumerable: true, + configurable: true, + writable: true, + }); + const payload: Record = { outer }; + const { factory } = makeFakeToolCallingModel({ loose: payload }); + const restore = installLanguageModelGlobal(factory); + try { + const events: Array<{ type: string; port?: string; objectDelta?: unknown }> = []; + const emit = (e: unknown): void => { + events.push(e as { type: string; port?: string; objectDelta?: unknown }); + }; + await WebBrowser_ToolCalling( + asTCI({ prompt: "go", tools: [looseTool] }), + undefined, + new AbortController().signal, + emit + ); + const tcEvent = events.find((e) => e.type === "object-delta" && e.port === "toolCalls"); + const calls = (tcEvent?.objectDelta as Array<{ input: Record }>) ?? []; + expect(calls).toHaveLength(1); + const input = calls[0]!.input; + const o = input.outer as Record; + expect(Object.prototype.hasOwnProperty.call(o, "__proto__")).toBe(false); + expect(Object.getPrototypeOf(o)).toBe(Object.prototype); + const list = o.list as Array>; + expect(Array.isArray(list)).toBe(true); + expect(list).toHaveLength(1); + const first = list[0]!; + expect(first.ok).toBe(true); + expect(Object.prototype.hasOwnProperty.call(first, "constructor")).toBe(false); + expect(Object.getPrototypeOf(first)).toBe(Object.prototype); + } finally { + restore(); + } + }); +}); + +// -------------------------------------------------------------------------- +// snapshotStreamToTextDeltas reset semantics (HIGH-3) +// -------------------------------------------------------------------------- + +/** Drain an async iterable of stream events into an array. */ +async function drain(it: AsyncIterable): Promise { + const out: T[] = []; + for await (const e of it) out.push(e); + return out; +} + +/** Build a ReadableStream that emits the given strings in order. */ +function streamOf(values: readonly string[]): ReadableStream { + return new ReadableStream({ + start(controller) { + for (const v of values) controller.enqueue(v); + controller.close(); + }, + }); +} + +describe("snapshotStreamToTextDeltas", () => { + it("emits incremental deltas on prefix-extending snapshots", async () => { + const events = await drain( + chromeHelpers.snapshotStreamToTextDeltas( + streamOf(["hel", "hello", "hello world"]), + "text" + ) + ); + const deltas = events + .filter((e) => (e as { type: string }).type === "text-delta") + .map((e) => (e as { textDelta: string }).textDelta); + expect(deltas).toEqual(["hel", "lo", " world"]); + }); + + it("resets on a non-prefix snapshot", async () => { + const events = await drain( + chromeHelpers.snapshotStreamToTextDeltas( + streamOf(["hello world", "hello sailor"]), + "text" + ) + ); + const deltas = events + .filter((e) => (e as { type: string }).type === "text-delta") + .map((e) => (e as { textDelta: string }).textDelta); + expect(deltas).toEqual(["hello world", "hello sailor"]); + // No buggy concatenation anywhere in the emitted stream. + for (const d of deltas) { + expect(d).not.toContain("hello worldhello sailor"); + } + }); + + it("does not emit an empty delta on identical snapshots", async () => { + const events = await drain( + chromeHelpers.snapshotStreamToTextDeltas(streamOf(["hi", "hi"]), "text") + ); + const deltas = events.filter((e) => (e as { type: string }).type === "text-delta"); + expect(deltas).toHaveLength(1); + expect((deltas[0] as { textDelta: string }).textDelta).toBe("hi"); + }); +}); diff --git a/providers/chrome-ai/src/ai/common/WebBrowser_Chat.ts b/providers/chrome-ai/src/ai/common/WebBrowser_Chat.ts index 8347e838c..ff3b73c51 100644 --- a/providers/chrome-ai/src/ai/common/WebBrowser_Chat.ts +++ b/providers/chrome-ai/src/ai/common/WebBrowser_Chat.ts @@ -53,20 +53,15 @@ export const WebBrowser_Chat: AiProviderRunFn< throw new Error("WebBrowser_Chat: trailing user message has no text content"); } - // History the session should already have heard by the time we prompt. - // After this turn the session will additionally contain the trailing user - // turn + the assistant response we generate — i.e. `messages.length + 1` - // messages, which is the watermark we cache for the next call. - const priorHistory = messages.slice(0, lastUserIdx); - const { initialPrompts, fingerprint: historyFingerprint } = - buildInitialPromptsFromHistory(priorHistory); - - // Cache hygiene: only reuse the cached session if its watermark exactly - // matches the history we'd otherwise re-feed. Out-of-sync caches (task - // reset mid-conversation, retroactive edits to `messages`) are torn down - // and rebuilt. + // Cache reuse requires: same sessionId, AND the cache's high-water mark + // equals the number of messages we expect Chrome to have heard BEFORE + // this turn (everything up to but not including the trailing user + // message). This is robust against retroactive edits to `messages` and + // against task resets that re-run from a smaller history. let cached = sessionId ? getChromeSession(sessionId) : undefined; - if (sessionId !== undefined && cached && cached.historyFingerprint !== historyFingerprint) { + const expectedPriorCount = lastUserIdx; + if (sessionId !== undefined && cached && cached.messageCount !== expectedPriorCount) { + // History diverged — tear down the stale session and rebuild. deleteChromeSession(sessionId); cached = undefined; } @@ -76,6 +71,11 @@ export const WebBrowser_Chat: AiProviderRunFn< if (cached) { session = cached.session; } else { + // Fresh session: replay all prior history via initialPrompts so the + // model has full context for the trailing user turn. + const { initialPrompts } = buildInitialPromptsFromHistory( + messages.slice(0, lastUserIdx) + ); session = await factory.create({ signal, // `temperature` is `@deprecated` for non-extension contexts in the @@ -89,6 +89,9 @@ export const WebBrowser_Chat: AiProviderRunFn< let cacheWritten = false; try { + // `promptStreaming` both runs the turn AND mutates the session's + // internal history so the next call's "prior count" is + // `messages.length + 1`. const stream = session.promptStreaming(promptText, { signal }); for await (const e of snapshotStreamToTextDeltas(stream, "text")) { emit(e); @@ -99,9 +102,8 @@ export const WebBrowser_Chat: AiProviderRunFn< // to the cache; `WebBrowserProvider.disposeSession` (wired into // ResourceScope by AiChatTask) reclaims it at end of run. setChromeSession(sessionId, { - session, - messageCount: messages.length + 1, - historyFingerprint, + session, + messageCount: messages.length + 1, }); cacheWritten = true; } diff --git a/providers/chrome-ai/src/ai/common/WebBrowser_ChromeHelpers.ts b/providers/chrome-ai/src/ai/common/WebBrowser_ChromeHelpers.ts index bfd78dd0f..687fb048f 100644 --- a/providers/chrome-ai/src/ai/common/WebBrowser_ChromeHelpers.ts +++ b/providers/chrome-ai/src/ai/common/WebBrowser_ChromeHelpers.ts @@ -131,7 +131,19 @@ export function createDownloadMonitor( /** * Chrome streaming APIs return progressive full-text snapshots. This helper - * converts them to append-mode text-delta events by diffing successive snapshots. + * converts them to append-mode text-delta events by diffing successive + * snapshots. + * + * **Reset semantics**: most snapshots are prefix-extensions of the previous + * one (the model is appending). When a snapshot is NOT a prefix extension + * (a self-correction: Chrome replaced rather than extended prior text), the + * accumulator is RESET to the new snapshot and the full new snapshot is + * emitted as a single delta. Consumers that reconstruct full text by + * concatenating successive deltas should treat a non-prefix delta as a + * reset boundary; use {@link snapshotStreamToSnapshots} if you need + * explicit replace-mode events. + * + * Identical consecutive snapshots emit no delta. */ export async function* snapshotStreamToTextDeltas( stream: ReadableStream, @@ -150,7 +162,12 @@ export async function* snapshotStreamToTextDeltas( yield { type: "text-delta", port, textDelta: delta }; } } else { - accumulatedText += value; + // Self-correction snapshot: Chrome replaced (not extended) prior text. + // Reset the accumulator and surface the full new snapshot as the + // delta. Consumers reconstructing full text by concatenation should + // treat any subsequent non-prefix delta as a reset boundary; use + // `snapshotStreamToSnapshots` if you need replace-mode semantics. + accumulatedText = value; yield { type: "text-delta", port, textDelta: value }; } } diff --git a/providers/chrome-ai/src/ai/common/WebBrowser_Sessions.ts b/providers/chrome-ai/src/ai/common/WebBrowser_Sessions.ts index fda8a84c8..b668ad0d7 100644 --- a/providers/chrome-ai/src/ai/common/WebBrowser_Sessions.ts +++ b/providers/chrome-ai/src/ai/common/WebBrowser_Sessions.ts @@ -20,6 +20,13 @@ */ export interface ChromeChatSessionState { readonly session: LanguageModel; + /** + * Count of messages the session has heard *after* the most recent turn + * completes — i.e., `messages.length + 1` (the new assistant reply + * counts). Used by the next turn as a high-water mark to decide cache + * reuse: reuse iff `messageCount === lastUserIdx` (everything before + * the trailing user message has already been played into the session). + */ readonly messageCount: number; /** * Stable fingerprint of the `outputSchema` the session was created for @@ -35,7 +42,12 @@ export interface ChromeChatSessionState { * and can't be hot-swapped per turn. */ readonly toolsFingerprint?: string; - /** Stable fingerprint of the filtered chat history replayed into the session. */ + /** + * Stable fingerprint of the filtered chat history replayed into the + * session. No longer used for the chat-cache reuse decision (replaced by + * `messageCount`), but kept optional to preserve the public shape in + * case another caller still passes it. + */ readonly historyFingerprint?: string; } diff --git a/providers/chrome-ai/src/ai/common/WebBrowser_ToolCalling.ts b/providers/chrome-ai/src/ai/common/WebBrowser_ToolCalling.ts index 5beacdd1b..c9ada5da4 100644 --- a/providers/chrome-ai/src/ai/common/WebBrowser_ToolCalling.ts +++ b/providers/chrome-ai/src/ai/common/WebBrowser_ToolCalling.ts @@ -31,6 +31,32 @@ import { } from "./WebBrowser_ChromeHelpers"; import type { WebBrowserModelConfig } from "./WebBrowser_ModelSchema"; +const FORBIDDEN_KEYS = new Set(["__proto__", "constructor", "prototype"]); + +/** + * Recursively rebuild a model-supplied JSON value, dropping any + * `__proto__`, `constructor`, or `prototype` keys at every depth. + * Returns a plain object (Object.prototype), never inheriting from a + * tainted source. Used to sanitize tool-call arguments captured from + * Chrome's LanguageModel before validation and propagation. + * + * Tool input schemas frequently set `additionalProperties: true` (or + * omit it entirely), so a hallucinated `__proto__` key would otherwise + * pass JSON-schema validation and leak into downstream consumers — a + * classic prototype-pollution vector. Sanitization must happen BEFORE + * validation so the validator sees the clean object. + */ +function sanitizeToolArgs(value: unknown): unknown { + if (value === null || typeof value !== "object") return value; + if (Array.isArray(value)) return value.map(sanitizeToolArgs); + const out: Record = {}; + for (const k of Object.keys(value as Record)) { + if (FORBIDDEN_KEYS.has(k)) continue; + out[k] = sanitizeToolArgs((value as Record)[k]); + } + return out; +} + function flattenPrompt(prompt: ToolCallingTaskInput["prompt"]): string { if (typeof prompt === "string") return prompt; if (!Array.isArray(prompt)) return ""; @@ -119,6 +145,9 @@ function buildToolCallPrompt(input: ToolCallingTaskInput): { * drop+log calls that fail. Tools whose `inputSchema` fails to compile * fall through to name-only validation (same as today's behavior) with * a single warning so a malformed schema doesn't crash the run. + * + * Captured args are also passed through {@link sanitizeToolArgs} before + * validation to scrub `__proto__` / `constructor` / `prototype` keys. */ export const WebBrowser_ToolCalling: AiProviderRunFn< ToolCallingTaskInput, @@ -166,7 +195,12 @@ export const WebBrowser_ToolCalling: AiProviderRunFn< // — the surrounding `promptStreaming` will throw on its next // read and the finally below will tear down the session. if (signal?.aborted) return ""; - const callInput = (args[0] ?? {}) as Record; + const raw = (args[0] ?? {}) as Record; + // Sanitize BEFORE validation so the validator sees a clean, + // Object.prototype-only object. Tool schemas without + // `additionalProperties: false` would otherwise let prototype- + // pollution payloads pass through. + const callInput = sanitizeToolArgs(raw) as Record; capturedCalls.push({ id: uuid4(), name: td.name, input: callInput }); return ""; }, diff --git a/providers/chrome-ai/src/ai/index.ts b/providers/chrome-ai/src/ai/index.ts index 378edc6e0..9185cf819 100644 --- a/providers/chrome-ai/src/ai/index.ts +++ b/providers/chrome-ai/src/ai/index.ts @@ -22,6 +22,7 @@ import { findLastUserIndex, messageText, } from "./common/WebBrowser_ChatHistory"; +import { snapshotStreamToTextDeltas } from "./common/WebBrowser_ChromeHelpers"; import { WEB_BROWSER_RUN_FNS, WebBrowser_TextGeneration_Unified, @@ -57,6 +58,9 @@ export const _testOnly = { findLastUserIndex, buildInitialPromptsFromHistory, }, + chromeHelpers: { + snapshotStreamToTextDeltas, + }, probe: { probeWebBrowserCapabilities, inferWebBrowserCapabilities,