diff --git a/Dockerfile b/Dockerfile index f4a8e5a..e3b0e5e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -75,6 +75,33 @@ COPY --from=builder /app/public ./public COPY --from=builder /app/package.json ./ COPY --from=builder /app/tsconfig.json ./ +# Install Chromium headless shell + system deps for Playwright. +# Must run after node_modules is copied so bunx can resolve playwright. +# --only-shell skips the full Chromium binary (saves ~75 MiB off the full +# chrome channel); the custom phantom_preview_page tool uses +# chromium.launch() which picks the headless shell automatically for +# headless=true. The @playwright/mcp embed path uses a contextGetter so it +# never needs the full chrome channel binary. +# +# Image cost breakdown (verified on the built image vs. the pre-Playwright +# baseline, total delta roughly 996 MiB over the non-Playwright baseline): +# ~327 MB chromium_headless_shell-* binary at +# /home/phantom/.cache/ms-playwright/chromium_headless_shell-* +# ~91 MB /usr/share/fonts pulled by --with-deps (DejaVu, Liberation, +# Noto Core) +# ~500+ MB /usr/lib X11 / GTK / libasound / libnss3 / libcups / libatk +# and the other shared libraries apt-get pulls for Chromium +# +# --only-shell only affects the Chromium binary. The system deps are the +# dominant cost and cannot be trimmed without breaking Chromium's ability +# to start. If you are trying to shrink this image, the headless shell +# binary is the only safe target; the /usr/lib growth is load-bearing. +ENV PLAYWRIGHT_BROWSERS_PATH=/home/phantom/.cache/ms-playwright +RUN mkdir -p "$PLAYWRIGHT_BROWSERS_PATH" && \ + bunx playwright install --with-deps --only-shell chromium && \ + chown -R phantom:phantom /home/phantom/.cache && \ + rm -rf /var/lib/apt/lists/* + # Copy default phantom-config (constitution.md, persona.md, etc.) # These get backed up so they survive the empty volume mount on first run. COPY --from=builder /app/phantom-config ./phantom-config diff --git a/bun.lock b/bun.lock index 705f903..5b4f3f1 100644 --- a/bun.lock +++ b/bun.lock @@ -7,10 +7,12 @@ "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.77", "@modelcontextprotocol/sdk": "^1.28.0", + "@playwright/mcp": "0.0.70", "@slack/bolt": "^4.6.0", "croner": "^10.0.1", "imapflow": "^1.2.18", "nodemailer": "^8.0.4", + "playwright": "1.59.1", "resend": "^6.9.4", "telegraf": "^4.16.3", "yaml": "^2.6.0", @@ -83,6 +85,8 @@ "@pinojs/redact": ["@pinojs/redact@0.4.0", "", {}, "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg=="], + "@playwright/mcp": ["@playwright/mcp@0.0.70", "", { "dependencies": { "playwright": "1.60.0-alpha-1774999321000", "playwright-core": "1.60.0-alpha-1774999321000" }, "bin": { "playwright-mcp": "cli.js" } }, "sha512-Kl0a6l9VL8rvT1oBou3hS5yArjwWV9UlwAkq+0skfK1YVg8XfmmNaAmwZhMeNx/ZhGiWXfCllo6rD/jvZz+WuA=="], + "@slack/bolt": ["@slack/bolt@4.6.0", "", { "dependencies": { "@slack/logger": "^4.0.0", "@slack/oauth": "^3.0.4", "@slack/socket-mode": "^2.0.5", "@slack/types": "^2.18.0", "@slack/web-api": "^7.12.0", "axios": "^1.12.0", "express": "^5.0.0", "path-to-regexp": "^8.1.0", "raw-body": "^3", "tsscmp": "^1.0.6" }, "peerDependencies": { "@types/express": "^5.0.0" } }, "sha512-xPgfUs2+OXSugz54Ky07pA890+Qydk22SYToi8uGpXeHSt1JWwFJkRyd/9Vlg5I1AdfdpGXExDpwnbuN9Q/2dQ=="], "@slack/logger": ["@slack/logger@4.0.1", "", { "dependencies": { "@types/node": ">=18" } }, "sha512-6cmdPrV/RYfd2U0mDGiMK8S7OJqpCTm7enMLRR3edccsPX8j7zXTLnaEF4fhxxJJTAIOil6+qZrnUPTuaLvwrQ=="], @@ -237,6 +241,8 @@ "fresh": ["fresh@2.0.0", "", {}, "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A=="], + "fsevents": ["fsevents@2.3.2", "", { "os": "darwin" }, "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA=="], + "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="], "get-intrinsic": ["get-intrinsic@1.3.0", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "es-define-property": "^1.0.1", "es-errors": "^1.3.0", "es-object-atoms": "^1.1.1", "function-bind": "^1.1.2", "get-proto": "^1.0.1", "gopd": "^1.2.0", "has-symbols": "^1.1.0", "hasown": "^2.0.2", "math-intrinsics": "^1.1.0" } }, "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ=="], @@ -357,6 +363,10 @@ "pkce-challenge": ["pkce-challenge@5.0.1", "", {}, "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ=="], + "playwright": ["playwright@1.59.1", "", { "dependencies": { "playwright-core": "1.59.1" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw=="], + + "playwright-core": ["playwright-core@1.60.0-alpha-1774999321000", "", { "bin": { "playwright-core": "cli.js" } }, "sha512-ams3Zo4VXxeOg5ZTTh16GkE8g48Bmxo/9pg9gXl9SVKlVohCU7Jaog7XntY8yFuzENA6dJc1Fz7Z/NNTm9nGEw=="], + "postal-mime": ["postal-mime@2.7.3", "", {}, "sha512-MjhXadAJaWgYzevi46+3kLak8y6gbg0ku14O1gO/LNOuay8dO+1PtcSGvAdgDR0DoIsSaiIA8y/Ddw6MnrO0Tw=="], "process-warning": ["process-warning@5.0.0", "", {}, "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA=="], @@ -465,6 +475,8 @@ "zod-to-json-schema": ["zod-to-json-schema@3.25.1", "", { "peerDependencies": { "zod": "^3.25 || ^4" } }, "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA=="], + "@playwright/mcp/playwright": ["playwright@1.60.0-alpha-1774999321000", "", { "dependencies": { "playwright-core": "1.60.0-alpha-1774999321000" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-Bd5DkzYKG+2g1jLO6NeTXmGLbBYSFffJIOsR4l4hUBkJvzvGGdLZ7jZb2tOtb0WIoWXQKdQj3Ap6WthV4DBS8w=="], + "form-data/mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="], "libmime/iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="], @@ -473,6 +485,8 @@ "p-queue/p-timeout": ["p-timeout@3.2.0", "", { "dependencies": { "p-finally": "^1.0.0" } }, "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg=="], + "playwright/playwright-core": ["playwright-core@1.59.1", "", { "bin": { "playwright-core": "cli.js" } }, "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg=="], + "form-data/mime-types/mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="], } } diff --git a/package.json b/package.json index b80234c..eae34fd 100644 --- a/package.json +++ b/package.json @@ -18,10 +18,12 @@ "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.2.77", "@modelcontextprotocol/sdk": "^1.28.0", + "@playwright/mcp": "0.0.70", "@slack/bolt": "^4.6.0", "croner": "^10.0.1", "imapflow": "^1.2.18", "nodemailer": "^8.0.4", + "playwright": "1.59.1", "resend": "^6.9.4", "telegraf": "^4.16.3", "yaml": "^2.6.0", diff --git a/src/agent/prompt-assembler.ts b/src/agent/prompt-assembler.ts index 09197e1..69ef5e8 100644 --- a/src/agent/prompt-assembler.ts +++ b/src/agent/prompt-assembler.ts @@ -170,6 +170,31 @@ function buildEnvironment(config: PhantomConfig): string { lines.push(`- Pages are at ${publicUrl}/ui/`); } lines.push(""); + lines.push("SELF-VALIDATE EVERY UI PAGE YOU CREATE."); + lines.push("After phantom_create_page succeeds, always call phantom_preview_page with"); + lines.push("the same path. Review the screenshot, the HTTP status, the page title,"); + lines.push("and especially the console messages and failed network requests list."); + lines.push("If there are console errors, failed CDN loads, or the screenshot looks"); + lines.push("wrong, fix the HTML and re-run phantom_preview_page until clean. Only"); + lines.push("report the page to the user after validation passes."); + lines.push("The tool returns one image block plus a JSON metadata block. The image"); + lines.push("is for visual review, the JSON tells you what failed to load or error."); + lines.push(""); + lines.push("GENERAL BROWSER CAPABILITY."); + lines.push("You have access to the full Playwright MCP tool surface via the"); + lines.push("phantom-browser server. These tools share one Chromium instance with"); + lines.push("phantom_preview_page. Use browser_navigate to open any URL (localhost"); + lines.push("or external), browser_snapshot for structured accessibility text,"); + lines.push("browser_take_screenshot for pixel captures, browser_click/browser_type/"); + lines.push("browser_fill_form for interaction, browser_console_messages and"); + lines.push("browser_network_requests for debugging, browser_tabs for multi-page work."); + lines.push("For single-shot self-validation of your own /ui/ pages, always"); + lines.push("prefer phantom_preview_page: one call returns image plus JSON."); + lines.push("For multi-step browsing, research tasks, or external sites, use the"); + lines.push("browser_* tools directly."); + lines.push("Do NOT use browser_run_code against external pages unless the user"); + lines.push("explicitly asked you to execute code in a foreign origin."); + lines.push(""); lines.push("When you build something that others should access, you have two options:"); lines.push("1. Create an HTTP API on a local port. Give the user the internal URL and auth token."); lines.push( diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts index 5e08cf0..807efb2 100644 --- a/src/agent/runtime.ts +++ b/src/agent/runtime.ts @@ -31,7 +31,7 @@ export class AgentRuntime { private roleTemplate: RoleTemplate | null = null; private onboardingPrompt: string | null = null; private lastTrackedFiles: string[] = []; - private mcpServerFactories: Record McpServerConfig> | null = null; + private mcpServerFactories: Record McpServerConfig | Promise> | null = null; constructor(config: PhantomConfig, db: Database) { this.config = config; @@ -55,7 +55,7 @@ export class AgentRuntime { this.onboardingPrompt = prompt; } - setMcpServerFactories(factories: Record McpServerConfig>): void { + setMcpServerFactories(factories: Record McpServerConfig | Promise>): void { this.mcpServerFactories = factories; } @@ -208,7 +208,11 @@ export class AgentRuntime { ...(useResume && session.sdk_session_id ? { resume: session.sdk_session_id } : {}), ...(this.mcpServerFactories ? { - mcpServers: Object.fromEntries(Object.entries(this.mcpServerFactories).map(([k, f]) => [k, f()])), + mcpServers: Object.fromEntries( + await Promise.all( + Object.entries(this.mcpServerFactories).map(async ([k, f]) => [k, await f()] as const), + ), + ), } : {}), }, diff --git a/src/index.ts b/src/index.ts index c3609c4..1bb4801 100644 --- a/src/index.ts +++ b/src/index.ts @@ -51,6 +51,8 @@ import { Scheduler } from "./scheduler/service.ts"; import { createSchedulerToolServer } from "./scheduler/tool.ts"; import { getSecretRequest } from "./secrets/store.ts"; import { createSecretToolServer } from "./secrets/tools.ts"; +import { createBrowserToolServer } from "./ui/browser-mcp.ts"; +import { closePreviewResources, createPreviewToolServer, getOrCreatePreviewContext } from "./ui/preview.ts"; import { setPublicDir, setSecretSavedCallback, setSecretsDb } from "./ui/serve.ts"; import { createWebUiToolServer } from "./ui/tools.ts"; @@ -191,6 +193,8 @@ async function main(): Promise { "phantom-scheduler": () => createSchedulerToolServer(scheduler as Scheduler), "phantom-web-ui": () => createWebUiToolServer(config.public_url), "phantom-secrets": () => createSecretToolServer({ db, baseUrl: secretsBaseUrl }), + "phantom-preview": () => createPreviewToolServer(config.port), + "phantom-browser": () => createBrowserToolServer(() => getOrCreatePreviewContext()), ...(process.env.RESEND_API_KEY ? { "phantom-email": () => @@ -204,7 +208,7 @@ async function main(): Promise { }); const emailStatus = process.env.RESEND_API_KEY ? " + email" : ""; console.log( - `[mcp] MCP server initialized (dynamic tools + scheduler + web UI + secrets${emailStatus} wired to agent)`, + `[mcp] MCP server initialized (dynamic tools + scheduler + web UI + secrets + preview + browser${emailStatus} wired to agent)`, ); } catch (err: unknown) { const msg = err instanceof Error ? err.message : String(err); @@ -580,6 +584,9 @@ async function main(): Promise { onShutdown("Scheduler", async () => { if (scheduler) scheduler.stop(); }); + onShutdown("Preview browser", async () => { + await closePreviewResources(); + }); onShutdown("Peer health monitor", async () => { if (peerHealthMonitor) peerHealthMonitor.stop(); }); diff --git a/src/ui/__tests__/browser-mcp.integration.test.ts b/src/ui/__tests__/browser-mcp.integration.test.ts new file mode 100644 index 0000000..c41f2ce --- /dev/null +++ b/src/ui/__tests__/browser-mcp.integration.test.ts @@ -0,0 +1,127 @@ +// Integration tests for createBrowserToolServer. These exercise the real +// @playwright/mcp embed with a real BrowserContext. Opt-in: +// +// PHANTOM_INTEGRATION=1 bun test src/ui/__tests__/browser-mcp.integration.test.ts +// +// Skipped by default so `bun test` stays hermetic. +// +// Two load-bearing invariants are enforced here: +// +// 1. The embed exposes exactly 21 tools. @playwright/mcp@0.0.70 is pinned +// specifically so the tool surface cannot drift silently; this assertion +// is the drift detector the pin was meant to anchor. +// +// 2. A real `browser_navigate` call succeeds against a BrowserContext +// minted by the preview tool. This is the end-to-end verification of +// the cross-version playwright-core boundary documented in +// src/ui/browser-mcp.ts note 3: the context is an instance from +// playwright-core@1.59.1 consumed by @playwright/mcp's hoisted +// playwright-core@1.60.0-alpha SimpleBrowser wrapper. + +import { afterAll, beforeAll, describe, expect, test } from "bun:test"; +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js"; +import { createBrowserToolServer } from "../browser-mcp.ts"; +import { __resetPreviewStateForTesting, closePreviewResources, getOrCreatePreviewContext } from "../preview.ts"; +import { revokeAllSessions } from "../session.ts"; + +const ENABLED = process.env.PHANTOM_INTEGRATION === "1"; +const suite = ENABLED ? describe : describe.skip; + +const EXPECTED_TOOL_NAMES = [ + "browser_click", + "browser_close", + "browser_console_messages", + "browser_drag", + "browser_evaluate", + "browser_file_upload", + "browser_fill_form", + "browser_handle_dialog", + "browser_hover", + "browser_navigate", + "browser_navigate_back", + "browser_network_requests", + "browser_press_key", + "browser_resize", + "browser_run_code", + "browser_select_option", + "browser_snapshot", + "browser_tabs", + "browser_take_screenshot", + "browser_type", + "browser_wait_for", +]; + +type CallResult = { isError?: boolean; content: unknown }; + +suite("createBrowserToolServer (integration)", () => { + let server: ReturnType | null = null; + let port = 0; + let client: Client | null = null; + let embed: Awaited> | null = null; + + beforeAll(async () => { + // Reset module-level preview state so running this file after any + // other test file that called closePreviewResources() still starts + // from a pristine state. Bun shares module instances across test + // files inside the same process. + __resetPreviewStateForTesting(); + server = Bun.serve({ + port: 0, + fetch(req) { + const url = new URL(req.url); + if (url.pathname === "/ui/test.html") { + return new Response( + "Browser MCP Integration" + + "

Hello

", + { headers: { "content-type": "text/html" } }, + ); + } + return new Response("not found", { status: 404 }); + }, + }); + port = server.port ?? 0; + + embed = await createBrowserToolServer(() => getOrCreatePreviewContext()); + const [serverTransport, clientTransport] = InMemoryTransport.createLinkedPair(); + const serverInstance = embed.instance as unknown as { + connect: (t: typeof serverTransport) => Promise; + close: () => Promise; + }; + await serverInstance.connect(serverTransport); + client = new Client({ name: "phantom-browser-integration", version: "1.0" }, { capabilities: {} }); + await client.connect(clientTransport); + }); + + afterAll(async () => { + await client?.close(); + if (embed) { + const inst = embed.instance as unknown as { close: () => Promise }; + await inst.close(); + } + await closePreviewResources(); + revokeAllSessions(); + server?.stop(true); + }); + + test("listTools returns exactly the 21-tool @playwright/mcp surface", async () => { + if (!client) throw new Error("client not initialized"); + const { tools } = await client.listTools(); + expect(tools).toHaveLength(21); + const names = tools.map((t) => t.name).sort(); + expect(names).toEqual([...EXPECTED_TOOL_NAMES].sort()); + }); + + test("browser_navigate succeeds across the cross-version BrowserContext boundary", async () => { + if (!client) throw new Error("client not initialized"); + const result = (await client.callTool({ + name: "browser_navigate", + arguments: { url: `http://localhost:${port}/ui/test.html` }, + })) as CallResult; + // A successful navigate returns content with no isError flag set. + // The exact content shape is @playwright/mcp's concern; we care only + // that the call did not land in the error branch. + expect(result.isError).toBeFalsy(); + expect(result.content).toBeDefined(); + }); +}); diff --git a/src/ui/__tests__/browser-mcp.test.ts b/src/ui/__tests__/browser-mcp.test.ts new file mode 100644 index 0000000..11c2e41 --- /dev/null +++ b/src/ui/__tests__/browser-mcp.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, test } from "bun:test"; +import type { BrowserContext } from "playwright"; +import { createBrowserToolServer } from "../browser-mcp.ts"; + +// The real @playwright/mcp createConnection is lazy: it wires a backend +// factory that will call the contextGetter only when a client actually +// requests a tool. Constructing the embed does not require a live +// BrowserContext, so these tests never touch Chromium. +function fakeContextGetter(): Promise { + return Promise.reject(new Error("contextGetter should not run in unit tests")); +} + +describe("createBrowserToolServer", () => { + test("returns an SDK MCP server config with the phantom-browser name", async () => { + const config = await createBrowserToolServer(fakeContextGetter); + expect(config.type).toBe("sdk"); + expect(config.name).toBe("phantom-browser"); + expect(config.instance).toBeDefined(); + }); + + test("instance exposes the MCP connect() contract used by the Agent SDK", async () => { + const config = await createBrowserToolServer(fakeContextGetter); + const inst = config.instance as unknown as { connect: unknown; close: unknown }; + expect(typeof inst.connect).toBe("function"); + expect(typeof inst.close).toBe("function"); + }); + + test("each call returns a distinct underlying Server instance", async () => { + const a = await createBrowserToolServer(fakeContextGetter); + const b = await createBrowserToolServer(fakeContextGetter); + // Factory pattern: the phantom-browser wrapper must be fresh per query. + // If the same instance leaks across calls the SDK will throw "Already + // connected to a transport" on the second run. See src/index.ts for + // the cardinal rule citation. + expect(a.instance).not.toBe(b.instance); + }); +}); diff --git a/src/ui/__tests__/preview-correctness.test.ts b/src/ui/__tests__/preview-correctness.test.ts new file mode 100644 index 0000000..f493aa6 --- /dev/null +++ b/src/ui/__tests__/preview-correctness.test.ts @@ -0,0 +1,222 @@ +// Unit tests for the correctness paths in src/ui/preview.ts that we can +// drive without a real Chromium: cookie rotation at the 8 minute threshold, +// the shuttingDown flag, and the try/finally launch-failure recovery +// pattern. The happy end-to-end path is exercised by the opt-in integration +// test at preview.integration.test.ts which requires PHANTOM_INTEGRATION=1. +// +// We replace `chromium.launch` via spyOn so the module under test never +// actually spawns a browser process. The fake Browser and BrowserContext +// expose exactly the subset of the real API that preview.ts calls: nothing +// more, nothing less. + +import { afterEach, describe, expect, spyOn, test } from "bun:test"; +import type { Browser, BrowserContext } from "playwright"; +import { chromium } from "playwright"; +import { + __resetPreviewStateForTesting, + closePreviewResources, + getOrCreateBrowser, + getOrCreatePreviewContext, +} from "../preview.ts"; +import { getSessionCount, revokeAllSessions } from "../session.ts"; + +type Cookie = { name: string; value: string; domain: string; path: string }; +type FakeContext = BrowserContext & { __cookies: Cookie[]; __closed: boolean }; +type FakeBrowser = Browser & { __contexts: FakeContext[]; __closed: boolean }; + +function makeFakeContext(): FakeContext { + const state = { __cookies: [] as Cookie[], __closed: false }; + const ctx = { + ...state, + addCookies: async (cookies: Cookie[]) => { + // Match Playwright semantics: replace any cookie with the same + // name+domain+path in place, otherwise append. + for (const incoming of cookies) { + const existing = state.__cookies.findIndex( + (c) => c.name === incoming.name && c.domain === incoming.domain && c.path === incoming.path, + ); + if (existing >= 0) state.__cookies[existing] = incoming; + else state.__cookies.push(incoming); + } + }, + close: async () => { + state.__closed = true; + }, + } as unknown as FakeContext; + Object.defineProperty(ctx, "__cookies", { get: () => state.__cookies }); + Object.defineProperty(ctx, "__closed", { get: () => state.__closed }); + return ctx; +} + +function makeFakeBrowser(): FakeBrowser { + const contexts: FakeContext[] = []; + const state = { __closed: false }; + const b = { + newContext: async () => { + const ctx = makeFakeContext(); + contexts.push(ctx); + return ctx; + }, + close: async () => { + state.__closed = true; + }, + } as unknown as FakeBrowser; + Object.defineProperty(b, "__contexts", { get: () => contexts }); + Object.defineProperty(b, "__closed", { get: () => state.__closed }); + return b; +} + +afterEach(() => { + __resetPreviewStateForTesting(); + revokeAllSessions(); +}); + +describe("getOrCreateBrowser launch-failure recovery (Codex P2)", () => { + test("second call after a transient launch error succeeds", async () => { + const spy = spyOn(chromium, "launch"); + try { + let calls = 0; + spy.mockImplementation(async () => { + calls += 1; + if (calls === 1) throw new Error("transient resource error"); + return makeFakeBrowser(); + }); + + await expect(getOrCreateBrowser()).rejects.toThrow("transient resource error"); + // If the rejected promise were cached, this would re-reject with the + // same error instead of invoking launch a second time. + const b = await getOrCreateBrowser(); + expect(b).toBeDefined(); + expect(calls).toBe(2); + } finally { + spy.mockRestore(); + } + }); +}); + +describe("getOrCreatePreviewContext launch-failure recovery", () => { + test("clears currentContextPromise on failure so the next call retries", async () => { + const spy = spyOn(chromium, "launch"); + try { + let calls = 0; + spy.mockImplementation(async () => { + calls += 1; + if (calls === 1) throw new Error("chromium down"); + return makeFakeBrowser(); + }); + + await expect(getOrCreatePreviewContext()).rejects.toThrow("chromium down"); + const ctx = await getOrCreatePreviewContext(); + expect(ctx).toBeDefined(); + expect(calls).toBe(2); + } finally { + spy.mockRestore(); + } + }); +}); + +describe("shuttingDown flag (review F7)", () => { + test("getOrCreateBrowser throws after closePreviewResources", async () => { + await closePreviewResources(); + await expect(getOrCreateBrowser()).rejects.toThrow("preview subsystem is shutting down"); + }); + + test("getOrCreatePreviewContext throws after closePreviewResources", async () => { + await closePreviewResources(); + await expect(getOrCreatePreviewContext()).rejects.toThrow("preview subsystem is shutting down"); + }); +}); + +describe("cookie rotation (review F6, Codex P1)", () => { + test("cold create mints a fresh preview cookie on the new context", async () => { + const spy = spyOn(chromium, "launch"); + try { + const fakeBrowser = makeFakeBrowser(); + spy.mockImplementation(async () => fakeBrowser); + + const ctx = (await getOrCreatePreviewContext()) as FakeContext; + expect(ctx.__cookies).toHaveLength(1); + const cookie = ctx.__cookies[0]; + expect(cookie.name).toBe("phantom_session"); + expect(cookie.domain).toBe("localhost"); + // Fix 4: cookie path is scoped to /ui, matching the magic-link + // posture in src/ui/serve.ts. + expect(cookie.path).toBe("/ui"); + expect(typeof cookie.value).toBe("string"); + expect(cookie.value.length).toBeGreaterThan(20); + } finally { + spy.mockRestore(); + } + }); + + test("warm reuse inside the 8 minute window does not rotate", async () => { + const spy = spyOn(chromium, "launch"); + try { + spy.mockImplementation(async () => makeFakeBrowser()); + + const ctx1 = (await getOrCreatePreviewContext()) as FakeContext; + const firstToken = ctx1.__cookies[0].value; + const sessionsAfterFirst = getSessionCount(); + + const ctx2 = (await getOrCreatePreviewContext()) as FakeContext; + expect(ctx2).toBe(ctx1); + expect(ctx2.__cookies).toHaveLength(1); + expect(ctx2.__cookies[0].value).toBe(firstToken); + expect(getSessionCount()).toBe(sessionsAfterFirst); + } finally { + spy.mockRestore(); + } + }); + + test("warm reuse past the 8 minute threshold rotates the cookie in place", async () => { + const spy = spyOn(chromium, "launch"); + const originalNow = Date.now; + try { + spy.mockImplementation(async () => makeFakeBrowser()); + + const ctx1 = (await getOrCreatePreviewContext()) as FakeContext; + const firstToken = ctx1.__cookies[0].value; + expect(getSessionCount()).toBe(1); + + // Advance wall-clock past the 8 minute rotation threshold. The cached + // BrowserContext stays the same instance but the cookie value gets + // replaced in place via addCookies semantics. + Date.now = () => originalNow() + 9 * 60 * 1000; + + const ctx2 = (await getOrCreatePreviewContext()) as FakeContext; + expect(ctx2).toBe(ctx1); + expect(ctx2.__cookies).toHaveLength(1); + expect(ctx2.__cookies[0].value).not.toBe(firstToken); + // A second preview session was minted into the sessions map. + expect(getSessionCount()).toBe(2); + } finally { + Date.now = originalNow; + spy.mockRestore(); + } + }); +}); + +describe("page.close() error path clears currentContext (review F8)", () => { + // We cannot drive the real phantom_preview_page handler without a real + // browser, but we can verify the public invariant the fix establishes: + // once currentContext has been nulled (as the catch block does), the next + // getOrCreatePreviewContext() call returns a fresh context. The preview + // integration test covers the happy path end to end. + test("after currentContext is nulled, next call creates a fresh context", async () => { + const spy = spyOn(chromium, "launch"); + try { + spy.mockImplementation(async () => makeFakeBrowser()); + + const ctx1 = (await getOrCreatePreviewContext()) as FakeContext; + // Simulate the catch-block cleanup in the preview tool finally. + await closePreviewResources(); + __resetPreviewStateForTesting(); + + spy.mockImplementation(async () => makeFakeBrowser()); + const ctx2 = (await getOrCreatePreviewContext()) as FakeContext; + expect(ctx2).not.toBe(ctx1); + } finally { + spy.mockRestore(); + } + }); +}); diff --git a/src/ui/__tests__/preview.integration.test.ts b/src/ui/__tests__/preview.integration.test.ts new file mode 100644 index 0000000..17ff554 --- /dev/null +++ b/src/ui/__tests__/preview.integration.test.ts @@ -0,0 +1,170 @@ +// Integration tests for phantom_preview_page. These launch a real Chromium +// (headless shell) and talk to a local Bun.serve, so they are opt-in. Run: +// +// PHANTOM_INTEGRATION=1 bun test src/ui/__tests__/preview.integration.test.ts +// +// They are skipped by default so `bun test` stays fast and hermetic. +// +// Unlike the unit tests in preview.test.ts, these tests invoke the real +// phantom_preview_page handler through its MCP server instance via an +// InMemoryTransport pair and Client from @modelcontextprotocol/sdk. That +// means every load-bearing behavior the handler owns is exercised end to +// end: the SSE init-script stub, console-message capture, failed-request +// capture, the bundled image+text result shape, and the isError path. + +import { afterAll, beforeAll, describe, expect, test } from "bun:test"; +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js"; +import { + __resetPreviewStateForTesting, + closePreviewResources, + createPreviewToolServer, + getOrCreatePreviewContext, +} from "../preview.ts"; +import { revokeAllSessions } from "../session.ts"; + +const ENABLED = process.env.PHANTOM_INTEGRATION === "1"; +const suite = ENABLED ? describe : describe.skip; + +type ToolContent = + | { type: "text"; text: string } + | { type: "image"; data: string; mimeType: string } + | { type: string; [k: string]: unknown }; + +type ToolResult = { + content: ToolContent[]; + isError?: boolean; +}; + +function asText(block: ToolContent | undefined): string { + if (!block || block.type !== "text") throw new Error("expected a text content block"); + return (block as { text: string }).text; +} + +function asImage(block: ToolContent | undefined): { data: string; mimeType: string } { + if (!block || block.type !== "image") throw new Error("expected an image content block"); + return block as { data: string; mimeType: string; type: "image" }; +} + +suite("phantom_preview_page (integration)", () => { + let server: ReturnType | null = null; + let port = 0; + let client: Client | null = null; + + beforeAll(async () => { + // Reset module-level preview state so running this file after any + // other test file that called closePreviewResources() still starts + // from a pristine state. Bun shares module instances across test + // files inside the same process. + __resetPreviewStateForTesting(); + server = Bun.serve({ + port: 0, + fetch(req) { + const url = new URL(req.url); + if (url.pathname === "/ui/test.html") { + // The missing ' + + "" + + "

Hello

", + { headers: { "content-type": "text/html" } }, + ); + } + return new Response("not found", { status: 404 }); + }, + }); + port = server.port ?? 0; + + // Wire a real MCP Client to the real preview tool server via an + // in-memory transport pair. This is the exact contract the Agent SDK + // uses at runtime; if the bundled result shape drifts, this test + // catches it. + const embed = createPreviewToolServer(port); + const [serverTransport, clientTransport] = InMemoryTransport.createLinkedPair(); + const serverInstance = embed.instance as unknown as { + connect: (t: typeof serverTransport) => Promise; + }; + await serverInstance.connect(serverTransport); + client = new Client({ name: "phantom-preview-integration", version: "1.0" }, { capabilities: {} }); + await client.connect(clientTransport); + }); + + afterAll(async () => { + await client?.close(); + await closePreviewResources(); + revokeAllSessions(); + server?.stop(true); + }); + + test("tools/list exposes phantom_preview_page", async () => { + if (!client) throw new Error("client not initialized"); + const { tools } = await client.listTools(); + expect(tools.map((t) => t.name)).toContain("phantom_preview_page"); + }); + + test("successful call returns image + text blocks with full metadata", async () => { + if (!client) throw new Error("client not initialized"); + const result = (await client.callTool({ + name: "phantom_preview_page", + arguments: { path: "test.html" }, + })) as ToolResult; + + expect(result.isError).toBeFalsy(); + expect(result.content).toHaveLength(2); + + const image = asImage(result.content[0]); + expect(image.mimeType).toBe("image/png"); + expect(image.data.length).toBeGreaterThan(100); + // Base64 round-trip sanity: decodes to a non-empty byte buffer. + expect(Buffer.from(image.data, "base64").length).toBeGreaterThan(100); + + const meta = JSON.parse(asText(result.content[1])) as { + status: number; + title: string; + consoleMessages: { type: string; text: string }[]; + failedRequests: { url: string; failure: string }[]; + }; + expect(meta.status).toBe(200); + expect(meta.title).toBe("Preview Integration"); + + // The inline