From 41369cfda90ed9651b94acf4cbe0f9deade44994 Mon Sep 17 00:00:00 2001
From: Muhammad Ahmed Cheema <cheemawrites@gmail.com>
Date: Sun, 12 Apr 2026 20:45:08 -0700
Subject: [PATCH 1/2] feat: playwright self-validation and general browser
 capability

Adds two new MCP tool servers that share one Chromium instance and one
BrowserContext per query:

- phantom_preview_page: one-call self-validation for /ui/<path> pages.
  Navigates headless Chromium, captures a full-page PNG, and bundles HTTP
  status, title, console messages, and failed network requests alongside
  the screenshot. Stubs window.EventSource in an init script so Phantom's
  live-reload SSE wiring cannot hold the page open past 'load'.
- phantom-browser: embeds the full 21-tool @playwright/mcp surface in
  process via createConnection with a contextGetter, giving the agent
  browser_navigate, browser_click, browser_snapshot, browser_run_code,
  and the rest out of the box.

Both share one Browser singleton launched lazily on first use and one
BrowserContext minted per query with a short-lived phantom_session
cookie (10 minute TTL, domain: localhost). The browser and context are
torn down on SIGTERM via the existing graceful-shutdown registry.

The contextGetter path is load-bearing: the naive createConnection hangs
under Bun on Linux amd64 (traced across five probes in research 01b).
browser.isolated: false is mandatory because SimpleBrowser.newContext()
throws; the embed defers to our shared context instead.

mcpServerFactories now accepts async factories. The per-query resolution
path in AgentRuntime uses Promise.all so sync factories pass through as
resolved promises without behavior change.

Agent prompt gains two new guidance blocks: always validate after
phantom_create_page, and prefer phantom_preview_page for self-validation
while using browser_* directly for multi-step browsing.

Dockerfile installs the --only-shell chromium build (~75 MiB saved over
the full binary) with PLAYWRIGHT_BROWSERS_PATH set to a phantom-owned
cache directory. playwright@1.59.1 and @playwright/mcp@0.0.70 are
pinned exactly because contextGetter and the capability surface have
shifted between point releases already.

Research: local/2026-04-12-phantom-ui-chapter/research/01-playwright-self-validation.md
Research: local/2026-04-12-phantom-ui-chapter/research/01b-playwright-expanded/implementation-plan.md
Research: local/2026-04-12-phantom-ui-chapter/research/01b-playwright-expanded/findings.md

Test count: 931 -> 940 pass (+9) plus 6 opt-in integration tests skipped
by default. Zero regressions. lint and typecheck clean.
Docker image delta: +996 MiB (914 MiB -> 1.91 GiB).
---
 Dockerfile                                    |  12 +
 bun.lock                                      |  14 ++
 package.json                                  |   2 +
 src/agent/prompt-assembler.ts                 |  25 ++
 src/agent/runtime.ts                          |  10 +-
 src/index.ts                                  |   9 +-
 .../__tests__/browser-mcp.integration.test.ts |  33 +++
 src/ui/__tests__/browser-mcp.test.ts          |  37 +++
 src/ui/__tests__/preview.integration.test.ts  |  63 +++++
 src/ui/__tests__/preview.test.ts              |  58 +++++
 src/ui/browser-mcp.ts                         |  70 ++++++
 src/ui/preview.ts                             | 218 ++++++++++++++++++
 src/ui/session.ts                             |  17 ++
 13 files changed, 564 insertions(+), 4 deletions(-)
 create mode 100644 src/ui/__tests__/browser-mcp.integration.test.ts
 create mode 100644 src/ui/__tests__/browser-mcp.test.ts
 create mode 100644 src/ui/__tests__/preview.integration.test.ts
 create mode 100644 src/ui/__tests__/preview.test.ts
 create mode 100644 src/ui/browser-mcp.ts
 create mode 100644 src/ui/preview.ts
diff --git a/Dockerfile b/Dockerfile
index f4a8e5a..be03408 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -75,6 +75,18 @@ COPY --from=builder /app/public ./public
 COPY --from=builder /app/package.json ./
 COPY --from=builder /app/tsconfig.json ./
 
+# Install Chromium headless shell + system deps for Playwright.
+# Must run after node_modules is copied so bunx can resolve playwright.
+# --only-shell skips the full Chromium binary (saves ~75 MiB); the custom
+# phantom_preview_page tool uses chromium.launch() which picks the shell
+# automatically for headless=true. The @playwright/mcp embed path uses a
+# contextGetter so it never needs the full chrome channel binary.
+ENV PLAYWRIGHT_BROWSERS_PATH=/home/phantom/.cache/ms-playwright
+RUN mkdir -p "$PLAYWRIGHT_BROWSERS_PATH" && \
+    bunx playwright install --with-deps --only-shell chromium && \
+    chown -R phantom:phantom /home/phantom/.cache && \
+    rm -rf /var/lib/apt/lists/*
+
 # Copy default phantom-config (constitution.md, persona.md, etc.)
 # These get backed up so they survive the empty volume mount on first run.
 COPY --from=builder /app/phantom-config ./phantom-config
diff --git a/bun.lock b/bun.lock
index 705f903..5b4f3f1 100644
--- a/bun.lock
+++ b/bun.lock
@@ -7,10 +7,12 @@
       "dependencies": {
         "@anthropic-ai/claude-agent-sdk": "^0.2.77",
         "@modelcontextprotocol/sdk": "^1.28.0",
+        "@playwright/mcp": "0.0.70",
         "@slack/bolt": "^4.6.0",
         "croner": "^10.0.1",
         "imapflow": "^1.2.18",
         "nodemailer": "^8.0.4",
+        "playwright": "1.59.1",
         "resend": "^6.9.4",
         "telegraf": "^4.16.3",
         "yaml": "^2.6.0",
@@ -83,6 +85,8 @@
 
     "@pinojs/redact": ["@pinojs/redact@0.4.0", "", {}, "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg=="],
 
+    "@playwright/mcp": ["@playwright/mcp@0.0.70", "", { "dependencies": { "playwright": "1.60.0-alpha-1774999321000", "playwright-core": "1.60.0-alpha-1774999321000" }, "bin": { "playwright-mcp": "cli.js" } }, "sha512-Kl0a6l9VL8rvT1oBou3hS5yArjwWV9UlwAkq+0skfK1YVg8XfmmNaAmwZhMeNx/ZhGiWXfCllo6rD/jvZz+WuA=="],
+
     "@slack/bolt": ["@slack/bolt@4.6.0", "", { "dependencies": { "@slack/logger": "^4.0.0", "@slack/oauth": "^3.0.4", "@slack/socket-mode": "^2.0.5", "@slack/types": "^2.18.0", "@slack/web-api": "^7.12.0", "axios": "^1.12.0", "express": "^5.0.0", "path-to-regexp": "^8.1.0", "raw-body": "^3", "tsscmp": "^1.0.6" }, "peerDependencies": { "@types/express": "^5.0.0" } }, "sha512-xPgfUs2+OXSugz54Ky07pA890+Qydk22SYToi8uGpXeHSt1JWwFJkRyd/9Vlg5I1AdfdpGXExDpwnbuN9Q/2dQ=="],
 
     "@slack/logger": ["@slack/logger@4.0.1", "", { "dependencies": { "@types/node": ">=18" } }, "sha512-6cmdPrV/RYfd2U0mDGiMK8S7OJqpCTm7enMLRR3edccsPX8j7zXTLnaEF4fhxxJJTAIOil6+qZrnUPTuaLvwrQ=="],
@@ -237,6 +241,8 @@
 
     "fresh": ["fresh@2.0.0", "", {}, "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A=="],
 
+    "fsevents": ["fsevents@2.3.2", "", { "os": "darwin" }, "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA=="],
+
     "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="],
 
     "get-intrinsic": ["get-intrinsic@1.3.0", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "es-define-property": "^1.0.1", "es-errors": "^1.3.0", "es-object-atoms": "^1.1.1", "function-bind": "^1.1.2", "get-proto": "^1.0.1", "gopd": "^1.2.0", "has-symbols": "^1.1.0", "hasown": "^2.0.2", "math-intrinsics": "^1.1.0" } }, "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ=="],
@@ -357,6 +363,10 @@
 
     "pkce-challenge": ["pkce-challenge@5.0.1", "", {}, "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ=="],
 
+    "playwright": ["playwright@1.59.1", "", { "dependencies": { "playwright-core": "1.59.1" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw=="],
+
+    "playwright-core": ["playwright-core@1.60.0-alpha-1774999321000", "", { "bin": { "playwright-core": "cli.js" } }, "sha512-ams3Zo4VXxeOg5ZTTh16GkE8g48Bmxo/9pg9gXl9SVKlVohCU7Jaog7XntY8yFuzENA6dJc1Fz7Z/NNTm9nGEw=="],
+
     "postal-mime": ["postal-mime@2.7.3", "", {}, "sha512-MjhXadAJaWgYzevi46+3kLak8y6gbg0ku14O1gO/LNOuay8dO+1PtcSGvAdgDR0DoIsSaiIA8y/Ddw6MnrO0Tw=="],
 
     "process-warning": ["process-warning@5.0.0", "", {}, "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA=="],
@@ -465,6 +475,8 @@
 
     "zod-to-json-schema": ["zod-to-json-schema@3.25.1", "", { "peerDependencies": { "zod": "^3.25 || ^4" } }, "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA=="],
 
+    "@playwright/mcp/playwright": ["playwright@1.60.0-alpha-1774999321000", "", { "dependencies": { "playwright-core": "1.60.0-alpha-1774999321000" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-Bd5DkzYKG+2g1jLO6NeTXmGLbBYSFffJIOsR4l4hUBkJvzvGGdLZ7jZb2tOtb0WIoWXQKdQj3Ap6WthV4DBS8w=="],
+
     "form-data/mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="],
 
     "libmime/iconv-lite": ["iconv-lite@0.6.3", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw=="],
@@ -473,6 +485,8 @@
 
     "p-queue/p-timeout": ["p-timeout@3.2.0", "", { "dependencies": { "p-finally": "^1.0.0" } }, "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg=="],
 
+    "playwright/playwright-core": ["playwright-core@1.59.1", "", { "bin": { "playwright-core": "cli.js" } }, "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg=="],
+
     "form-data/mime-types/mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="],
   }
 }
diff --git a/package.json b/package.json
index b80234c..eae34fd 100644
--- a/package.json
+++ b/package.json
@@ -18,10 +18,12 @@
   "dependencies": {
     "@anthropic-ai/claude-agent-sdk": "^0.2.77",
     "@modelcontextprotocol/sdk": "^1.28.0",
+    "@playwright/mcp": "0.0.70",
     "@slack/bolt": "^4.6.0",
     "croner": "^10.0.1",
     "imapflow": "^1.2.18",
     "nodemailer": "^8.0.4",
+    "playwright": "1.59.1",
     "resend": "^6.9.4",
     "telegraf": "^4.16.3",
     "yaml": "^2.6.0",
diff --git a/src/agent/prompt-assembler.ts b/src/agent/prompt-assembler.ts
index 09197e1..69ef5e8 100644
--- a/src/agent/prompt-assembler.ts
+++ b/src/agent/prompt-assembler.ts
@@ -170,6 +170,31 @@ function buildEnvironment(config: PhantomConfig): string {
 		lines.push(`- Pages are at ${publicUrl}/ui/<filename>`);
 	}
 	lines.push("");
+	lines.push("SELF-VALIDATE EVERY UI PAGE YOU CREATE.");
+	lines.push("After phantom_create_page succeeds, always call phantom_preview_page with");
+	lines.push("the same path. Review the screenshot, the HTTP status, the page title,");
+	lines.push("and especially the console messages and failed network requests list.");
+	lines.push("If there are console errors, failed CDN loads, or the screenshot looks");
+	lines.push("wrong, fix the HTML and re-run phantom_preview_page until clean. Only");
+	lines.push("report the page to the user after validation passes.");
+	lines.push("The tool returns one image block plus a JSON metadata block. The image");
+	lines.push("is for visual review, the JSON tells you what failed to load or error.");
+	lines.push("");
+	lines.push("GENERAL BROWSER CAPABILITY.");
+	lines.push("You have access to the full Playwright MCP tool surface via the");
+	lines.push("phantom-browser server. These tools share one Chromium instance with");
+	lines.push("phantom_preview_page. Use browser_navigate to open any URL (localhost");
+	lines.push("or external), browser_snapshot for structured accessibility text,");
+	lines.push("browser_take_screenshot for pixel captures, browser_click/browser_type/");
+	lines.push("browser_fill_form for interaction, browser_console_messages and");
+	lines.push("browser_network_requests for debugging, browser_tabs for multi-page work.");
+	lines.push("For single-shot self-validation of your own /ui/<path> pages, always");
+	lines.push("prefer phantom_preview_page: one call returns image plus JSON.");
+	lines.push("For multi-step browsing, research tasks, or external sites, use the");
+	lines.push("browser_* tools directly.");
+	lines.push("Do NOT use browser_run_code against external pages unless the user");
+	lines.push("explicitly asked you to execute code in a foreign origin.");
+	lines.push("");
 	lines.push("When you build something that others should access, you have two options:");
 	lines.push("1. Create an HTTP API on a local port. Give the user the internal URL and auth token.");
 	lines.push(
diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts
index 5e08cf0..807efb2 100644
--- a/src/agent/runtime.ts
+++ b/src/agent/runtime.ts
@@ -31,7 +31,7 @@ export class AgentRuntime {
 	private roleTemplate: RoleTemplate | null = null;
 	private onboardingPrompt: string | null = null;
 	private lastTrackedFiles: string[] = [];
-	private mcpServerFactories: Record<string, () => McpServerConfig> | null = null;
+	private mcpServerFactories: Record<string, () => McpServerConfig | Promise<McpServerConfig>> | null = null;
 
 	constructor(config: PhantomConfig, db: Database) {
 		this.config = config;
@@ -55,7 +55,7 @@ export class AgentRuntime {
 		this.onboardingPrompt = prompt;
 	}
 
-	setMcpServerFactories(factories: Record<string, () => McpServerConfig>): void {
+	setMcpServerFactories(factories: Record<string, () => McpServerConfig | Promise<McpServerConfig>>): void {
 		this.mcpServerFactories = factories;
 	}
 
@@ -208,7 +208,11 @@ export class AgentRuntime {
 					...(useResume && session.sdk_session_id ? { resume: session.sdk_session_id } : {}),
 					...(this.mcpServerFactories
 						? {
-								mcpServers: Object.fromEntries(Object.entries(this.mcpServerFactories).map(([k, f]) => [k, f()])),
+								mcpServers: Object.fromEntries(
+									await Promise.all(
+										Object.entries(this.mcpServerFactories).map(async ([k, f]) => [k, await f()] as const),
+									),
+								),
 							}
 						: {}),
 				},
diff --git a/src/index.ts b/src/index.ts
index c3609c4..1bb4801 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -51,6 +51,8 @@ import { Scheduler } from "./scheduler/service.ts";
 import { createSchedulerToolServer } from "./scheduler/tool.ts";
 import { getSecretRequest } from "./secrets/store.ts";
 import { createSecretToolServer } from "./secrets/tools.ts";
+import { createBrowserToolServer } from "./ui/browser-mcp.ts";
+import { closePreviewResources, createPreviewToolServer, getOrCreatePreviewContext } from "./ui/preview.ts";
 import { setPublicDir, setSecretSavedCallback, setSecretsDb } from "./ui/serve.ts";
 import { createWebUiToolServer } from "./ui/tools.ts";
 
@@ -191,6 +193,8 @@ async function main(): Promise<void> {
 			"phantom-scheduler": () => createSchedulerToolServer(scheduler as Scheduler),
 			"phantom-web-ui": () => createWebUiToolServer(config.public_url),
 			"phantom-secrets": () => createSecretToolServer({ db, baseUrl: secretsBaseUrl }),
+			"phantom-preview": () => createPreviewToolServer(config.port),
+			"phantom-browser": () => createBrowserToolServer(() => getOrCreatePreviewContext()),
 			...(process.env.RESEND_API_KEY
 				? {
 						"phantom-email": () =>
@@ -204,7 +208,7 @@ async function main(): Promise<void> {
 		});
 		const emailStatus = process.env.RESEND_API_KEY ? " + email" : "";
 		console.log(
-			`[mcp] MCP server initialized (dynamic tools + scheduler + web UI + secrets${emailStatus} wired to agent)`,
+			`[mcp] MCP server initialized (dynamic tools + scheduler + web UI + secrets + preview + browser${emailStatus} wired to agent)`,
 		);
 	} catch (err: unknown) {
 		const msg = err instanceof Error ? err.message : String(err);
@@ -580,6 +584,9 @@ async function main(): Promise<void> {
 	onShutdown("Scheduler", async () => {
 		if (scheduler) scheduler.stop();
 	});
+	onShutdown("Preview browser", async () => {
+		await closePreviewResources();
+	});
 	onShutdown("Peer health monitor", async () => {
 		if (peerHealthMonitor) peerHealthMonitor.stop();
 	});
diff --git a/src/ui/__tests__/browser-mcp.integration.test.ts b/src/ui/__tests__/browser-mcp.integration.test.ts
new file mode 100644
index 0000000..f507bee
--- /dev/null
+++ b/src/ui/__tests__/browser-mcp.integration.test.ts
@@ -0,0 +1,33 @@
+// Integration tests for createBrowserToolServer. These exercise the real
+// @playwright/mcp embed with a real BrowserContext. Opt-in:
+//
+//   PHANTOM_INTEGRATION=1 bun test src/ui/__tests__/browser-mcp.integration.test.ts
+//
+// Skipped by default so `bun test` stays hermetic.
+
+import { afterAll, describe, expect, test } from "bun:test";
+import { createBrowserToolServer } from "../browser-mcp.ts";
+import { closePreviewResources, getOrCreatePreviewContext } from "../preview.ts";
+import { revokeAllSessions } from "../session.ts";
+
+const ENABLED = process.env.PHANTOM_INTEGRATION === "1";
+const suite = ENABLED ? describe : describe.skip;
+
+suite("createBrowserToolServer (integration)", () => {
+	afterAll(async () => {
+		await closePreviewResources();
+		revokeAllSessions();
+	});
+
+	test("builds an embed server wired to a shared BrowserContext", async () => {
+		const embed = await createBrowserToolServer(() => getOrCreatePreviewContext());
+		expect(embed.type).toBe("sdk");
+		expect(embed.name).toBe("phantom-browser");
+		const inst = embed.instance as unknown as {
+			connect: unknown;
+			close: () => Promise<void>;
+		};
+		expect(typeof inst.connect).toBe("function");
+		await inst.close();
+	});
+});
diff --git a/src/ui/__tests__/browser-mcp.test.ts b/src/ui/__tests__/browser-mcp.test.ts
new file mode 100644
index 0000000..11c2e41
--- /dev/null
+++ b/src/ui/__tests__/browser-mcp.test.ts
@@ -0,0 +1,37 @@
+import { describe, expect, test } from "bun:test";
+import type { BrowserContext } from "playwright";
+import { createBrowserToolServer } from "../browser-mcp.ts";
+
+// The real @playwright/mcp createConnection is lazy: it wires a backend
+// factory that will call the contextGetter only when a client actually
+// requests a tool. Constructing the embed does not require a live
+// BrowserContext, so these tests never touch Chromium.
+function fakeContextGetter(): Promise<BrowserContext> {
+	return Promise.reject(new Error("contextGetter should not run in unit tests"));
+}
+
+describe("createBrowserToolServer", () => {
+	test("returns an SDK MCP server config with the phantom-browser name", async () => {
+		const config = await createBrowserToolServer(fakeContextGetter);
+		expect(config.type).toBe("sdk");
+		expect(config.name).toBe("phantom-browser");
+		expect(config.instance).toBeDefined();
+	});
+
+	test("instance exposes the MCP connect() contract used by the Agent SDK", async () => {
+		const config = await createBrowserToolServer(fakeContextGetter);
+		const inst = config.instance as unknown as { connect: unknown; close: unknown };
+		expect(typeof inst.connect).toBe("function");
+		expect(typeof inst.close).toBe("function");
+	});
+
+	test("each call returns a distinct underlying Server instance", async () => {
+		const a = await createBrowserToolServer(fakeContextGetter);
+		const b = await createBrowserToolServer(fakeContextGetter);
+		// Factory pattern: the phantom-browser wrapper must be fresh per query.
+		// If the same instance leaks across calls the SDK will throw "Already
+		// connected to a transport" on the second run. See src/index.ts for
+		// the cardinal rule citation.
+		expect(a.instance).not.toBe(b.instance);
+	});
+});
diff --git a/src/ui/__tests__/preview.integration.test.ts b/src/ui/__tests__/preview.integration.test.ts
new file mode 100644
index 0000000..d2287c6
--- /dev/null
+++ b/src/ui/__tests__/preview.integration.test.ts
@@ -0,0 +1,63 @@
+// Integration tests for phantom_preview_page. These launch a real Chromium
+// (headless shell) and talk to a local Bun.serve, so they are opt-in. Run:
+//
+//   PHANTOM_INTEGRATION=1 bun test src/ui/__tests__/preview.integration.test.ts
+//
+// They are skipped by default so `bun test` stays fast and hermetic.
+
+import { afterAll, beforeAll, describe, expect, test } from "bun:test";
+import { closePreviewResources, getOrCreatePreviewContext } from "../preview.ts";
+import { revokeAllSessions } from "../session.ts";
+
+const ENABLED = process.env.PHANTOM_INTEGRATION === "1";
+const suite = ENABLED ? describe : describe.skip;
+
+suite("phantom_preview_page (integration)", () => {
+	let server: ReturnType<typeof Bun.serve> | null = null;
+	let port = 0;
+
+	beforeAll(() => {
+		server = Bun.serve({
+			port: 0,
+			fetch(req) {
+				const url = new URL(req.url);
+				if (url.pathname === "/ui/test.html") {
+					return new Response(
+						"<!DOCTYPE html><html><head><title>Preview Integration</title></head>" +
+							"<body><script>console.log('hi from preview test')</script>" +
+							"<h1>Hello</h1></body></html>",
+						{ headers: { "content-type": "text/html" } },
+					);
+				}
+				return new Response("not found", { status: 404 });
+			},
+		});
+		port = server.port ?? 0;
+	});
+
+	afterAll(async () => {
+		await closePreviewResources();
+		revokeAllSessions();
+		server?.stop(true);
+	});
+
+	test("navigates and screenshots a live page", async () => {
+		const ctx = await getOrCreatePreviewContext();
+		const page = await ctx.newPage();
+		try {
+			const response = await page.goto(`http://localhost:${port}/ui/test.html`);
+			expect(response?.status()).toBe(200);
+			expect(await page.title()).toBe("Preview Integration");
+			const shot = await page.screenshot({ type: "png" });
+			expect(shot.length).toBeGreaterThan(100);
+		} finally {
+			await page.close();
+		}
+	});
+
+	test("two preview calls share the same BrowserContext", async () => {
+		const a = await getOrCreatePreviewContext();
+		const b = await getOrCreatePreviewContext();
+		expect(a).toBe(b);
+	});
+});
diff --git a/src/ui/__tests__/preview.test.ts b/src/ui/__tests__/preview.test.ts
new file mode 100644
index 0000000..ac0b6f4
--- /dev/null
+++ b/src/ui/__tests__/preview.test.ts
@@ -0,0 +1,58 @@
+import { afterEach, describe, expect, test } from "bun:test";
+import { createPreviewToolServer } from "../preview.ts";
+import { createPreviewSession, isValidSession, revokeAllSessions } from "../session.ts";
+
+afterEach(() => {
+	revokeAllSessions();
+});
+
+describe("createPreviewSession", () => {
+	test("returns a token accepted by isValidSession", () => {
+		const { sessionToken } = createPreviewSession();
+		expect(typeof sessionToken).toBe("string");
+		expect(sessionToken.length).toBeGreaterThan(20);
+		expect(isValidSession(sessionToken)).toBe(true);
+	});
+
+	test("tokens are unique per call", () => {
+		const a = createPreviewSession().sessionToken;
+		const b = createPreviewSession().sessionToken;
+		expect(a).not.toBe(b);
+	});
+
+	test("token expires after the 10 minute TTL", () => {
+		const { sessionToken } = createPreviewSession();
+		expect(isValidSession(sessionToken)).toBe(true);
+
+		const originalNow = Date.now;
+		try {
+			// Eleven minutes later, the session should have expired.
+			Date.now = () => originalNow() + 11 * 60 * 1000;
+			expect(isValidSession(sessionToken)).toBe(false);
+		} finally {
+			Date.now = originalNow;
+		}
+	});
+});
+
+describe("createPreviewToolServer", () => {
+	test("returns an SDK MCP server config with a name", () => {
+		const server = createPreviewToolServer(3100);
+		expect(server).toBeDefined();
+		expect(server.type).toBe("sdk");
+		expect(server.name).toBe("phantom-preview");
+		expect(server.instance).toBeDefined();
+	});
+
+	test("each call returns a distinct instance (factory pattern)", () => {
+		const a = createPreviewToolServer(3100);
+		const b = createPreviewToolServer(3100);
+		expect(a.instance).not.toBe(b.instance);
+	});
+
+	test("instance exposes the MCP connect() contract", () => {
+		const server = createPreviewToolServer(3100);
+		const inst = server.instance as unknown as { connect: unknown };
+		expect(typeof inst.connect).toBe("function");
+	});
+});
diff --git a/src/ui/browser-mcp.ts b/src/ui/browser-mcp.ts
new file mode 100644
index 0000000..223aae0
--- /dev/null
+++ b/src/ui/browser-mcp.ts
@@ -0,0 +1,70 @@
+// Embed factory for the first-party `@playwright/mcp` 21-tool surface. We
+// host it in-process via `createConnection(config, contextGetter)` rather
+// than spawning it as a stdio subprocess: the subprocess path hangs on
+// Linux amd64 under Bun (see research 01b, five-probe trace in findings
+// Section 3), and the in-process path lets us share one Chromium Browser
+// and one BrowserContext with `phantom_preview_page`.
+//
+// Two load-bearing details:
+//
+//  1. `browser.isolated: false` is mandatory when using `contextGetter`.
+//     Playwright MCP's SimpleBrowser.newContext() throws
+//     ("Creating a new context is not supported in SimpleBrowserContextFactory")
+//     and the MCP backend only calls `newContext` when `isolated` is true.
+//     With `isolated: false`, the backend defers to our contextGetter and
+//     never touches its own context factory.
+//
+//  2. The return type of `createConnection` is the low-level
+//     `Server` class from `@modelcontextprotocol/sdk`, not the high-level
+//     `McpServer`. Both inherit `.connect(transport)` from `Protocol`, and
+//     the Agent SDK's `connectSdkMcpServer` only ever calls
+//     `.connect(transport)` on the stored instance. The declared
+//     `McpSdkServerConfigWithInstance.instance: McpServer` type is narrower
+//     than the runtime contract. We widen with a single
+//     `as unknown as McpServer` cast, which is the minimum-surface type
+//     escape hatch the CLAUDE.md standards allow for this exact case.
+//     See findings 01b Section 3.3 for the source citation.
+//
+//  3. `@playwright/mcp@0.0.70` ships a nested `playwright-core` under its
+//     own `node_modules`, so its `BrowserContext` type root is structurally
+//     identical to but nominally disjoint from the top-level `playwright`
+//     package's `BrowserContext`. At runtime they are the same object (the
+//     nested playwright-core is never constructed; we pass in our own
+//     context). We declare the `getContext` parameter against the top-level
+//     type (what every caller in this codebase has on hand) and widen the
+//     function reference to `unknown` at the `createConnection` boundary.
+//     Any change to this line should preserve that single-point widening.
+
+import type { McpSdkServerConfigWithInstance } from "@anthropic-ai/claude-agent-sdk";
+import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { createConnection } from "@playwright/mcp";
+import type { BrowserContext } from "playwright";
+
+// @playwright/mcp's createConnection declares contextGetter against its own
+// nested playwright-core types. At runtime both roots are structurally the
+// same BrowserContext; see header note 3 for the full explanation.
+type AnyContextGetter = Parameters<typeof createConnection>[1];
+
+export async function createBrowserToolServer(
+	getContext: () => Promise<BrowserContext>,
+): Promise<McpSdkServerConfigWithInstance> {
+	const server = await createConnection(
+		{
+			browser: {
+				// Mandatory: see file header for the newContext() throw.
+				isolated: false,
+			},
+			outputDir: "/tmp/phantom-browser-mcp-out",
+			imageResponses: "allow",
+		},
+		getContext as unknown as AnyContextGetter,
+	);
+	return {
+		type: "sdk" as const,
+		name: "phantom-browser",
+		// Structural widening: Server has the same .connect(transport) the
+		// Agent SDK calls on McpServer. See file header for the full
+		// justification and the source citation.
+		instance: server as unknown as McpServer,
+	};
+}
diff --git a/src/ui/preview.ts b/src/ui/preview.ts
new file mode 100644
index 0000000..6306a71
--- /dev/null
+++ b/src/ui/preview.ts
@@ -0,0 +1,218 @@
+// Custom in-process MCP tool server exposing `phantom_preview_page`: a
+// one-call self-validation tool for Phantom's /ui/<path> pages. Navigates a
+// headless Chromium to the page, captures a full-page PNG, and bundles HTTP
+// status, title, console messages, and failed network requests alongside the
+// screenshot so the agent can reason about the page in a single tool call.
+//
+// The underlying Chromium Browser and the per-query BrowserContext are
+// module-level singletons. This mirrors the Phase 1 factory pattern used by
+// `DynamicToolRegistry` and `Scheduler`: the MCP server wrapper is recreated
+// per query (required so the SDK can attach a fresh transport each time),
+// but the expensive resources it wraps are process-scoped and stay warm.
+//
+// Both `phantom-preview` and the embedded `@playwright/mcp` browser surface
+// share `getOrCreatePreviewContext()` so cookies minted by the preview tool
+// are visible to the broader `browser_*` tools within the same query. This
+// is what lets the agent mix `phantom_preview_page` with `browser_click` and
+// `browser_snapshot` against its own /ui/ pages without re-authenticating.
+
+import type { McpSdkServerConfigWithInstance } from "@anthropic-ai/claude-agent-sdk";
+import { createSdkMcpServer, tool } from "@anthropic-ai/claude-agent-sdk";
+import type { Browser, BrowserContext } from "playwright";
+import { chromium } from "playwright";
+import { z } from "zod";
+import { createPreviewSession } from "./session.ts";
+
+let browser: Browser | null = null;
+let browserPromise: Promise<Browser> | null = null;
+let currentContext: BrowserContext | null = null;
+let currentContextPromise: Promise<BrowserContext> | null = null;
+
+const CHROMIUM_LAUNCH_ARGS = [
+	// Required because the container runs as a non-root user and Chromium's
+	// default sandbox needs privileged setuid helpers we intentionally do not
+	// ship. The container boundary is our sandbox; Chromium does not need its
+	// own layer on top.
+	"--no-sandbox",
+	"--disable-setuid-sandbox",
+	// /dev/shm defaults to 64 MiB in containers, which is too small for
+	// Chromium's IPC shared memory. Fall back to /tmp (disk-backed).
+	"--disable-dev-shm-usage",
+];
+
+export async function getOrCreateBrowser(): Promise<Browser> {
+	if (browser) return browser;
+	if (browserPromise) return browserPromise;
+	browserPromise = (async () => {
+		const b = await chromium.launch({
+			headless: true,
+			args: CHROMIUM_LAUNCH_ARGS,
+		});
+		browser = b;
+		browserPromise = null;
+		return b;
+	})();
+	return browserPromise;
+}
+
+export async function getOrCreatePreviewContext(): Promise<BrowserContext> {
+	if (currentContext) return currentContext;
+	if (currentContextPromise) return currentContextPromise;
+	currentContextPromise = (async () => {
+		const b = await getOrCreateBrowser();
+		const ctx = await b.newContext();
+		const { sessionToken } = createPreviewSession();
+		// Scoped to localhost: external navigations initiated by any browser_*
+		// tool will not see this cookie, so the auth surface stays identical to
+		// the existing /ui/ cookie model.
+		await ctx.addCookies([
+			{
+				name: "phantom_session",
+				value: sessionToken,
+				domain: "localhost",
+				path: "/",
+				httpOnly: true,
+				secure: false,
+				sameSite: "Strict",
+			},
+		]);
+		currentContext = ctx;
+		currentContextPromise = null;
+		return ctx;
+	})();
+	return currentContextPromise;
+}
+
+export async function closePreviewContext(): Promise<void> {
+	const ctx = currentContext;
+	currentContext = null;
+	currentContextPromise = null;
+	if (ctx) {
+		try {
+			await ctx.close();
+		} catch {
+			// Context may already be closed if the browser was torn down first.
+		}
+	}
+}
+
+export async function closePreviewResources(): Promise<void> {
+	await closePreviewContext();
+	const b = browser;
+	browser = null;
+	browserPromise = null;
+	if (b) {
+		try {
+			await b.close();
+		} catch {
+			// Swallow: we are shutting down, any error is terminal anyway.
+		}
+	}
+}
+
+type ConsoleMessage = { type: string; text: string };
+type FailedRequest = { url: string; failure: string };
+
+type PreviewSuccess = {
+	content: [{ type: "image"; data: string; mimeType: "image/png" }, { type: "text"; text: string }];
+};
+
+type PreviewError = {
+	content: [{ type: "text"; text: string }];
+	isError: true;
+};
+
+export function createPreviewToolServer(port: number): McpSdkServerConfigWithInstance {
+	const previewPageTool = tool(
+		"phantom_preview_page",
+		"Screenshot and validate a Phantom /ui/<path> page. Returns a PNG image " +
+			"block plus a JSON metadata block containing the HTTP status, page " +
+			"title, console messages, and failed network requests. Use this " +
+			"after phantom_create_page to verify the page rendered correctly " +
+			"before reporting success to the user.",
+		{
+			path: z.string().min(1).describe("Path under /ui/, e.g. 'dashboard.html' or 'reports/weekly.html'"),
+			viewport: z
+				.object({
+					width: z.number().int().min(320).max(3840),
+					height: z.number().int().min(240).max(2160),
+				})
+				.optional()
+				.describe("Viewport size in CSS pixels. Defaults to 1280x800."),
+			fullPage: z.boolean().optional().describe("Capture full scroll height. Defaults to true."),
+		},
+		async (input): Promise<PreviewSuccess | PreviewError> => {
+			const ctx = await getOrCreatePreviewContext();
+			const page = await ctx.newPage();
+			try {
+				const viewport = input.viewport ?? { width: 1280, height: 800 };
+				await page.setViewportSize(viewport);
+				// SSE EventSource connections created by Phantom's live-reload
+				// wiring will hold the page open indefinitely on 'load' and
+				// cause the tool to time out. Stub it before any page JS runs.
+				// Research 01 verified init scripts run before page JS.
+				await page.addInitScript(() => {
+					(window as unknown as { EventSource: undefined }).EventSource = undefined;
+				});
+				const consoleMessages: ConsoleMessage[] = [];
+				page.on("console", (m) => {
+					consoleMessages.push({ type: m.type(), text: m.text() });
+				});
+				const failedRequests: FailedRequest[] = [];
+				page.on("requestfailed", (r) => {
+					failedRequests.push({
+						url: r.url(),
+						failure: r.failure()?.errorText ?? "unknown",
+					});
+				});
+				const safePath = input.path.replace(/^\/+/, "");
+				const url = `http://localhost:${port}/ui/${safePath}`;
+				const response = await page.goto(url, { waitUntil: "load", timeout: 15000 });
+				const status = response?.status() ?? 0;
+				const title = await page.title();
+				const shot = await page.screenshot({
+					fullPage: input.fullPage !== false,
+					type: "png",
+				});
+				return {
+					content: [
+						{
+							type: "image" as const,
+							data: shot.toString("base64"),
+							mimeType: "image/png",
+						},
+						{
+							type: "text" as const,
+							text: JSON.stringify({ status, title, consoleMessages, failedRequests }, null, 2),
+						},
+					],
+				};
+			} catch (err: unknown) {
+				const message = err instanceof Error ? err.message : String(err);
+				return {
+					content: [
+						{
+							type: "text" as const,
+							text: JSON.stringify({ error: message }),
+						},
+					],
+					isError: true as const,
+				};
+			} finally {
+				// Always close the page, even on throw, so idle tabs never leak
+				// onto the shared context. The context lives for the whole query
+				// but tabs are per-call.
+				try {
+					await page.close();
+				} catch {
+					// Page already closed by cleanup path.
+				}
+			}
+		},
+	);
+
+	return createSdkMcpServer({
+		name: "phantom-preview",
+		tools: [previewPageTool],
+	});
+}
diff --git a/src/ui/session.ts b/src/ui/session.ts
index d95299e..ccadd69 100644
--- a/src/ui/session.ts
+++ b/src/ui/session.ts
@@ -15,6 +15,7 @@ type MagicLink = {
 
 const SESSION_TTL_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
 const MAGIC_LINK_TTL_MS = 10 * 60 * 1000; // 10 minutes
+const PREVIEW_SESSION_TTL_MS = 10 * 60 * 1000; // 10 minutes
 
 const sessions = new Map<string, Session>();
 const magicLinks = new Map<string, MagicLink>();
@@ -40,6 +41,22 @@ export function createSession(): { sessionToken: string; magicToken: string } {
 	return { sessionToken, magicToken };
 }
 
+// Mint a short-lived session for phantom_preview_page. The returned token is
+// injected as a cookie into the Playwright BrowserContext so the preview tool
+// can authenticate against /ui/<path> without minting a magic link. The short
+// TTL bounds blast radius: if the cookie ever leaks out of the container, it
+// self-destructs in ten minutes and cannot be refreshed.
+export function createPreviewSession(): { sessionToken: string } {
+	const sessionToken = randomBytes(32).toString("base64url");
+	const now = Date.now();
+	sessions.set(sessionToken, {
+		token: sessionToken,
+		createdAt: now,
+		expiresAt: now + PREVIEW_SESSION_TTL_MS,
+	});
+	return { sessionToken };
+}
+
 export function isValidSession(token: string): boolean {
 	const session = sessions.get(token);
 	if (!session) return false;

From 6a7147858a08f5fcce2b0fbe450c630ea0a8a8f4 Mon Sep 17 00:00:00 2001
From: Muhammad Ahmed Cheema <cheemawrites@gmail.com>
Date: Sun, 12 Apr 2026 21:32:32 -0700
Subject: [PATCH 2/2] fix: address review findings and Codex feedback on
 playwright PR

Surgical fix pass on top of 41369cf in response to the internal review
at local/2026-04-12-phantom-ui-chapter/review/01-playwright-review.md
and Codex PR comments on #53. Fourteen acceptance criteria met.

Preview correctness (src/ui/preview.ts):
- F5: cookie path reverted from / to /ui, matching the magic-link posture
  in src/ui/serve.ts (buildPreviewCookie helper extracted).
- F6 / Codex P1: rotate the preview session cookie on every warm
  getOrCreatePreviewContext call once Date.now() is inside the 2 minute
  safety margin before the 10 minute TTL. Uses addCookies in place so
  the cached BrowserContext stays the same instance.
- F7: shuttingDown module flag set in closePreviewResources and guarded
  at the top of both getOrCreateBrowser and getOrCreatePreviewContext.
  Concurrent tool calls during SIGTERM now throw cleanly instead of
  spawning a resurrected Chromium.
- F8: null currentContext in the page.close() catch block so the next
  call recreates a fresh context instead of handing back a dead one.
- F9: replace window.EventSource = undefined with
  Reflect.deleteProperty(globalThis, "EventSource"). Covers the 'in'
  operator and drops a TypeScript widening.
- Codex P2: try/finally pattern around browserPromise and
  currentContextPromise so a transient chromium.launch error no longer
  permanently poisons the cache. Subsequent calls retry cleanly.

Browser MCP (src/ui/browser-mcp.ts):
- F17: header note 3 rewritten to reflect the actual on-disk layout
  (top-level hoisted playwright-core@1.60.0-alpha vs. nested
  playwright/node_modules/playwright-core@1.59.1; no nested
  playwright-core under @playwright/mcp/node_modules). Cites the
  integration test that verifies the cross-version BrowserContext
  boundary end to end.

Tests:
- F1: preview.integration.test.ts rewritten to drive the real
  phantom_preview_page handler through a Client + InMemoryTransport
  pair. Asserts the bundled {image, text} result shape, base64
  round-trip, status, title, console capture, and requestfailed capture.
- F2 + F4: browser-mcp.integration.test.ts extended with a real
  Client.listTools() call asserting exactly 21 tools by name and a real
  browser_navigate round-trip across the cross-version BrowserContext
  boundary.
- New preview-correctness.test.ts with 8 unit tests covering
  launch-failure recovery, shuttingDown, cookie rotation threshold,
  cookie path, and the F8 invariant. Uses spyOn(chromium, "launch")
  plus a fake Browser / BrowserContext so no real Chromium is spawned.

Docs:
- F3 + F11: Dockerfile image cost breakdown comment expanded to record
  the 996 MiB delta split: ~327 MB headless shell, ~91 MB fonts, ~500+
  MB X11/GTK shared libs. Documents why --only-shell cannot trim more.

Test count: 940 pass + 6 skip baseline -> 948 pass + 10 skip after
(strictly greater, zero regressions). bun run lint and bun run
typecheck both clean. PHANTOM_INTEGRATION=1 integration run on macOS
arm64 passes 14/14 with real Chromium.

Deferred per fix brief anti-patterns: F10 (v1.1 error-path screenshot),
F11 (afterEach scope narrowing, pre-existing), F12 (graceful.ts
tasks.reverse, out of scope).
---
 Dockerfile                                    |  23 +-
 .../__tests__/browser-mcp.integration.test.ts | 118 +++++++++-
 src/ui/__tests__/preview-correctness.test.ts  | 222 ++++++++++++++++++
 src/ui/__tests__/preview.integration.test.ts  | 137 +++++++++--
 src/ui/browser-mcp.ts                         |  35 ++-
 src/ui/preview.ts                             | 118 +++++++---
 6 files changed, 580 insertions(+), 73 deletions(-)
 create mode 100644 src/ui/__tests__/preview-correctness.test.ts

diff --git a/Dockerfile b/Dockerfile
index be03408..e3b0e5e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -77,10 +77,25 @@ COPY --from=builder /app/tsconfig.json ./
 
 # Install Chromium headless shell + system deps for Playwright.
 # Must run after node_modules is copied so bunx can resolve playwright.
-# --only-shell skips the full Chromium binary (saves ~75 MiB); the custom
-# phantom_preview_page tool uses chromium.launch() which picks the shell
-# automatically for headless=true. The @playwright/mcp embed path uses a
-# contextGetter so it never needs the full chrome channel binary.
+# --only-shell skips the full Chromium binary (saves ~75 MiB off the full
+# chrome channel); the custom phantom_preview_page tool uses
+# chromium.launch() which picks the headless shell automatically for
+# headless=true. The @playwright/mcp embed path uses a contextGetter so it
+# never needs the full chrome channel binary.
+#
+# Image cost breakdown (verified on the built image vs. the pre-Playwright
+# baseline, total delta roughly 996 MiB over the non-Playwright baseline):
+#   ~327 MB  chromium_headless_shell-* binary at
+#            /home/phantom/.cache/ms-playwright/chromium_headless_shell-*
+#   ~91 MB   /usr/share/fonts pulled by --with-deps (DejaVu, Liberation,
+#            Noto Core)
+#   ~500+ MB /usr/lib X11 / GTK / libasound / libnss3 / libcups / libatk
+#            and the other shared libraries apt-get pulls for Chromium
+#
+# --only-shell only affects the Chromium binary. The system deps are the
+# dominant cost and cannot be trimmed without breaking Chromium's ability
+# to start. If you are trying to shrink this image, the headless shell
+# binary is the only safe target; the /usr/lib growth is load-bearing.
 ENV PLAYWRIGHT_BROWSERS_PATH=/home/phantom/.cache/ms-playwright
 RUN mkdir -p "$PLAYWRIGHT_BROWSERS_PATH" && \
     bunx playwright install --with-deps --only-shell chromium && \
diff --git a/src/ui/__tests__/browser-mcp.integration.test.ts b/src/ui/__tests__/browser-mcp.integration.test.ts
index f507bee..c41f2ce 100644
--- a/src/ui/__tests__/browser-mcp.integration.test.ts
+++ b/src/ui/__tests__/browser-mcp.integration.test.ts
@@ -4,30 +4,124 @@
 //   PHANTOM_INTEGRATION=1 bun test src/ui/__tests__/browser-mcp.integration.test.ts
 //
 // Skipped by default so `bun test` stays hermetic.
+//
+// Two load-bearing invariants are enforced here:
+//
+//  1. The embed exposes exactly 21 tools. @playwright/mcp@0.0.70 is pinned
+//     specifically so the tool surface cannot drift silently; this assertion
+//     is the drift detector the pin was meant to anchor.
+//
+//  2. A real `browser_navigate` call succeeds against a BrowserContext
+//     minted by the preview tool. This is the end-to-end verification of
+//     the cross-version playwright-core boundary documented in
+//     src/ui/browser-mcp.ts note 3: the context is an instance from
+//     playwright-core@1.59.1 consumed by @playwright/mcp's hoisted
+//     playwright-core@1.60.0-alpha SimpleBrowser wrapper.
 
-import { afterAll, describe, expect, test } from "bun:test";
+import { afterAll, beforeAll, describe, expect, test } from "bun:test";
+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js";
 import { createBrowserToolServer } from "../browser-mcp.ts";
-import { closePreviewResources, getOrCreatePreviewContext } from "../preview.ts";
+import { __resetPreviewStateForTesting, closePreviewResources, getOrCreatePreviewContext } from "../preview.ts";
 import { revokeAllSessions } from "../session.ts";
 
 const ENABLED = process.env.PHANTOM_INTEGRATION === "1";
 const suite = ENABLED ? describe : describe.skip;
 
+const EXPECTED_TOOL_NAMES = [
+	"browser_click",
+	"browser_close",
+	"browser_console_messages",
+	"browser_drag",
+	"browser_evaluate",
+	"browser_file_upload",
+	"browser_fill_form",
+	"browser_handle_dialog",
+	"browser_hover",
+	"browser_navigate",
+	"browser_navigate_back",
+	"browser_network_requests",
+	"browser_press_key",
+	"browser_resize",
+	"browser_run_code",
+	"browser_select_option",
+	"browser_snapshot",
+	"browser_tabs",
+	"browser_take_screenshot",
+	"browser_type",
+	"browser_wait_for",
+];
+
+type CallResult = { isError?: boolean; content: unknown };
+
 suite("createBrowserToolServer (integration)", () => {
+	let server: ReturnType<typeof Bun.serve> | null = null;
+	let port = 0;
+	let client: Client | null = null;
+	let embed: Awaited<ReturnType<typeof createBrowserToolServer>> | null = null;
+
+	beforeAll(async () => {
+		// Reset module-level preview state so running this file after any
+		// other test file that called closePreviewResources() still starts
+		// from a pristine state. Bun shares module instances across test
+		// files inside the same process.
+		__resetPreviewStateForTesting();
+		server = Bun.serve({
+			port: 0,
+			fetch(req) {
+				const url = new URL(req.url);
+				if (url.pathname === "/ui/test.html") {
+					return new Response(
+						"<!DOCTYPE html><html><head><title>Browser MCP Integration</title></head>" +
+							"<body><h1>Hello</h1></body></html>",
+						{ headers: { "content-type": "text/html" } },
+					);
+				}
+				return new Response("not found", { status: 404 });
+			},
+		});
+		port = server.port ?? 0;
+
+		embed = await createBrowserToolServer(() => getOrCreatePreviewContext());
+		const [serverTransport, clientTransport] = InMemoryTransport.createLinkedPair();
+		const serverInstance = embed.instance as unknown as {
+			connect: (t: typeof serverTransport) => Promise<void>;
+			close: () => Promise<void>;
+		};
+		await serverInstance.connect(serverTransport);
+		client = new Client({ name: "phantom-browser-integration", version: "1.0" }, { capabilities: {} });
+		await client.connect(clientTransport);
+	});
+
 	afterAll(async () => {
+		await client?.close();
+		if (embed) {
+			const inst = embed.instance as unknown as { close: () => Promise<void> };
+			await inst.close();
+		}
 		await closePreviewResources();
 		revokeAllSessions();
+		server?.stop(true);
 	});
 
-	test("builds an embed server wired to a shared BrowserContext", async () => {
-		const embed = await createBrowserToolServer(() => getOrCreatePreviewContext());
-		expect(embed.type).toBe("sdk");
-		expect(embed.name).toBe("phantom-browser");
-		const inst = embed.instance as unknown as {
-			connect: unknown;
-			close: () => Promise<void>;
-		};
-		expect(typeof inst.connect).toBe("function");
-		await inst.close();
+	test("listTools returns exactly the 21-tool @playwright/mcp surface", async () => {
+		if (!client) throw new Error("client not initialized");
+		const { tools } = await client.listTools();
+		expect(tools).toHaveLength(21);
+		const names = tools.map((t) => t.name).sort();
+		expect(names).toEqual([...EXPECTED_TOOL_NAMES].sort());
+	});
+
+	test("browser_navigate succeeds across the cross-version BrowserContext boundary", async () => {
+		if (!client) throw new Error("client not initialized");
+		const result = (await client.callTool({
+			name: "browser_navigate",
+			arguments: { url: `http://localhost:${port}/ui/test.html` },
+		})) as CallResult;
+		// A successful navigate returns content with no isError flag set.
+		// The exact content shape is @playwright/mcp's concern; we care only
+		// that the call did not land in the error branch.
+		expect(result.isError).toBeFalsy();
+		expect(result.content).toBeDefined();
 	});
 });
diff --git a/src/ui/__tests__/preview-correctness.test.ts b/src/ui/__tests__/preview-correctness.test.ts
new file mode 100644
index 0000000..f493aa6
--- /dev/null
+++ b/src/ui/__tests__/preview-correctness.test.ts
@@ -0,0 +1,222 @@
+// Unit tests for the correctness paths in src/ui/preview.ts that we can
+// drive without a real Chromium: cookie rotation at the 8 minute threshold,
+// the shuttingDown flag, and the try/finally launch-failure recovery
+// pattern. The happy end-to-end path is exercised by the opt-in integration
+// test at preview.integration.test.ts which requires PHANTOM_INTEGRATION=1.
+//
+// We replace `chromium.launch` via spyOn so the module under test never
+// actually spawns a browser process. The fake Browser and BrowserContext
+// expose exactly the subset of the real API that preview.ts calls: nothing
+// more, nothing less.
+
+import { afterEach, describe, expect, spyOn, test } from "bun:test";
+import type { Browser, BrowserContext } from "playwright";
+import { chromium } from "playwright";
+import {
+	__resetPreviewStateForTesting,
+	closePreviewResources,
+	getOrCreateBrowser,
+	getOrCreatePreviewContext,
+} from "../preview.ts";
+import { getSessionCount, revokeAllSessions } from "../session.ts";
+
+type Cookie = { name: string; value: string; domain: string; path: string };
+type FakeContext = BrowserContext & { __cookies: Cookie[]; __closed: boolean };
+type FakeBrowser = Browser & { __contexts: FakeContext[]; __closed: boolean };
+
+function makeFakeContext(): FakeContext {
+	const state = { __cookies: [] as Cookie[], __closed: false };
+	const ctx = {
+		...state,
+		addCookies: async (cookies: Cookie[]) => {
+			// Match Playwright semantics: replace any cookie with the same
+			// name+domain+path in place, otherwise append.
+			for (const incoming of cookies) {
+				const existing = state.__cookies.findIndex(
+					(c) => c.name === incoming.name && c.domain === incoming.domain && c.path === incoming.path,
+				);
+				if (existing >= 0) state.__cookies[existing] = incoming;
+				else state.__cookies.push(incoming);
+			}
+		},
+		close: async () => {
+			state.__closed = true;
+		},
+	} as unknown as FakeContext;
+	Object.defineProperty(ctx, "__cookies", { get: () => state.__cookies });
+	Object.defineProperty(ctx, "__closed", { get: () => state.__closed });
+	return ctx;
+}
+
+function makeFakeBrowser(): FakeBrowser {
+	const contexts: FakeContext[] = [];
+	const state = { __closed: false };
+	const b = {
+		newContext: async () => {
+			const ctx = makeFakeContext();
+			contexts.push(ctx);
+			return ctx;
+		},
+		close: async () => {
+			state.__closed = true;
+		},
+	} as unknown as FakeBrowser;
+	Object.defineProperty(b, "__contexts", { get: () => contexts });
+	Object.defineProperty(b, "__closed", { get: () => state.__closed });
+	return b;
+}
+
+afterEach(() => {
+	__resetPreviewStateForTesting();
+	revokeAllSessions();
+});
+
+describe("getOrCreateBrowser launch-failure recovery (Codex P2)", () => {
+	test("second call after a transient launch error succeeds", async () => {
+		const spy = spyOn(chromium, "launch");
+		try {
+			let calls = 0;
+			spy.mockImplementation(async () => {
+				calls += 1;
+				if (calls === 1) throw new Error("transient resource error");
+				return makeFakeBrowser();
+			});
+
+			await expect(getOrCreateBrowser()).rejects.toThrow("transient resource error");
+			// If the rejected promise were cached, this would re-reject with the
+			// same error instead of invoking launch a second time.
+			const b = await getOrCreateBrowser();
+			expect(b).toBeDefined();
+			expect(calls).toBe(2);
+		} finally {
+			spy.mockRestore();
+		}
+	});
+});
+
+describe("getOrCreatePreviewContext launch-failure recovery", () => {
+	test("clears currentContextPromise on failure so the next call retries", async () => {
+		const spy = spyOn(chromium, "launch");
+		try {
+			let calls = 0;
+			spy.mockImplementation(async () => {
+				calls += 1;
+				if (calls === 1) throw new Error("chromium down");
+				return makeFakeBrowser();
+			});
+
+			await expect(getOrCreatePreviewContext()).rejects.toThrow("chromium down");
+			const ctx = await getOrCreatePreviewContext();
+			expect(ctx).toBeDefined();
+			expect(calls).toBe(2);
+		} finally {
+			spy.mockRestore();
+		}
+	});
+});
+
+describe("shuttingDown flag (review F7)", () => {
+	test("getOrCreateBrowser throws after closePreviewResources", async () => {
+		await closePreviewResources();
+		await expect(getOrCreateBrowser()).rejects.toThrow("preview subsystem is shutting down");
+	});
+
+	test("getOrCreatePreviewContext throws after closePreviewResources", async () => {
+		await closePreviewResources();
+		await expect(getOrCreatePreviewContext()).rejects.toThrow("preview subsystem is shutting down");
+	});
+});
+
+describe("cookie rotation (review F6, Codex P1)", () => {
+	test("cold create mints a fresh preview cookie on the new context", async () => {
+		const spy = spyOn(chromium, "launch");
+		try {
+			const fakeBrowser = makeFakeBrowser();
+			spy.mockImplementation(async () => fakeBrowser);
+
+			const ctx = (await getOrCreatePreviewContext()) as FakeContext;
+			expect(ctx.__cookies).toHaveLength(1);
+			const cookie = ctx.__cookies[0];
+			expect(cookie.name).toBe("phantom_session");
+			expect(cookie.domain).toBe("localhost");
+			// Fix 4: cookie path is scoped to /ui, matching the magic-link
+			// posture in src/ui/serve.ts.
+			expect(cookie.path).toBe("/ui");
+			expect(typeof cookie.value).toBe("string");
+			expect(cookie.value.length).toBeGreaterThan(20);
+		} finally {
+			spy.mockRestore();
+		}
+	});
+
+	test("warm reuse inside the 8 minute window does not rotate", async () => {
+		const spy = spyOn(chromium, "launch");
+		try {
+			spy.mockImplementation(async () => makeFakeBrowser());
+
+			const ctx1 = (await getOrCreatePreviewContext()) as FakeContext;
+			const firstToken = ctx1.__cookies[0].value;
+			const sessionsAfterFirst = getSessionCount();
+
+			const ctx2 = (await getOrCreatePreviewContext()) as FakeContext;
+			expect(ctx2).toBe(ctx1);
+			expect(ctx2.__cookies).toHaveLength(1);
+			expect(ctx2.__cookies[0].value).toBe(firstToken);
+			expect(getSessionCount()).toBe(sessionsAfterFirst);
+		} finally {
+			spy.mockRestore();
+		}
+	});
+
+	test("warm reuse past the 8 minute threshold rotates the cookie in place", async () => {
+		const spy = spyOn(chromium, "launch");
+		const originalNow = Date.now;
+		try {
+			spy.mockImplementation(async () => makeFakeBrowser());
+
+			const ctx1 = (await getOrCreatePreviewContext()) as FakeContext;
+			const firstToken = ctx1.__cookies[0].value;
+			expect(getSessionCount()).toBe(1);
+
+			// Advance wall-clock past the 8 minute rotation threshold. The cached
+			// BrowserContext stays the same instance but the cookie value gets
+			// replaced in place via addCookies semantics.
+			Date.now = () => originalNow() + 9 * 60 * 1000;
+
+			const ctx2 = (await getOrCreatePreviewContext()) as FakeContext;
+			expect(ctx2).toBe(ctx1);
+			expect(ctx2.__cookies).toHaveLength(1);
+			expect(ctx2.__cookies[0].value).not.toBe(firstToken);
+			// A second preview session was minted into the sessions map.
+			expect(getSessionCount()).toBe(2);
+		} finally {
+			Date.now = originalNow;
+			spy.mockRestore();
+		}
+	});
+});
+
+describe("page.close() error path clears currentContext (review F8)", () => {
+	// We cannot drive the real phantom_preview_page handler without a real
+	// browser, but we can verify the public invariant the fix establishes:
+	// once currentContext has been nulled (as the catch block does), the next
+	// getOrCreatePreviewContext() call returns a fresh context. The preview
+	// integration test covers the happy path end to end.
+	test("after currentContext is nulled, next call creates a fresh context", async () => {
+		const spy = spyOn(chromium, "launch");
+		try {
+			spy.mockImplementation(async () => makeFakeBrowser());
+
+			const ctx1 = (await getOrCreatePreviewContext()) as FakeContext;
+			// Simulate the catch-block cleanup in the preview tool finally.
+			await closePreviewResources();
+			__resetPreviewStateForTesting();
+
+			spy.mockImplementation(async () => makeFakeBrowser());
+			const ctx2 = (await getOrCreatePreviewContext()) as FakeContext;
+			expect(ctx2).not.toBe(ctx1);
+		} finally {
+			spy.mockRestore();
+		}
+	});
+});
diff --git a/src/ui/__tests__/preview.integration.test.ts b/src/ui/__tests__/preview.integration.test.ts
index d2287c6..17ff554 100644
--- a/src/ui/__tests__/preview.integration.test.ts
+++ b/src/ui/__tests__/preview.integration.test.ts
@@ -4,26 +4,69 @@
 //   PHANTOM_INTEGRATION=1 bun test src/ui/__tests__/preview.integration.test.ts
 //
 // They are skipped by default so `bun test` stays fast and hermetic.
+//
+// Unlike the unit tests in preview.test.ts, these tests invoke the real
+// phantom_preview_page handler through its MCP server instance via an
+// InMemoryTransport pair and Client from @modelcontextprotocol/sdk. That
+// means every load-bearing behavior the handler owns is exercised end to
+// end: the SSE init-script stub, console-message capture, failed-request
+// capture, the bundled image+text result shape, and the isError path.
 
 import { afterAll, beforeAll, describe, expect, test } from "bun:test";
-import { closePreviewResources, getOrCreatePreviewContext } from "../preview.ts";
+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js";
+import {
+	__resetPreviewStateForTesting,
+	closePreviewResources,
+	createPreviewToolServer,
+	getOrCreatePreviewContext,
+} from "../preview.ts";
 import { revokeAllSessions } from "../session.ts";
 
 const ENABLED = process.env.PHANTOM_INTEGRATION === "1";
 const suite = ENABLED ? describe : describe.skip;
 
+type ToolContent =
+	| { type: "text"; text: string }
+	| { type: "image"; data: string; mimeType: string }
+	| { type: string; [k: string]: unknown };
+
+type ToolResult = {
+	content: ToolContent[];
+	isError?: boolean;
+};
+
+function asText(block: ToolContent | undefined): string {
+	if (!block || block.type !== "text") throw new Error("expected a text content block");
+	return (block as { text: string }).text;
+}
+
+function asImage(block: ToolContent | undefined): { data: string; mimeType: string } {
+	if (!block || block.type !== "image") throw new Error("expected an image content block");
+	return block as { data: string; mimeType: string; type: "image" };
+}
+
 suite("phantom_preview_page (integration)", () => {
 	let server: ReturnType<typeof Bun.serve> | null = null;
 	let port = 0;
+	let client: Client | null = null;
 
-	beforeAll(() => {
+	beforeAll(async () => {
+		// Reset module-level preview state so running this file after any
+		// other test file that called closePreviewResources() still starts
+		// from a pristine state. Bun shares module instances across test
+		// files inside the same process.
+		__resetPreviewStateForTesting();
 		server = Bun.serve({
 			port: 0,
 			fetch(req) {
 				const url = new URL(req.url);
 				if (url.pathname === "/ui/test.html") {
+					// The missing <script src="/nope.js"> exercises requestfailed
+					// capture. The inline console.log exercises console capture.
 					return new Response(
-						"<!DOCTYPE html><html><head><title>Preview Integration</title></head>" +
+						"<!DOCTYPE html><html><head><title>Preview Integration</title>" +
+							'<script src="/nope.js"></script></head>' +
 							"<body><script>console.log('hi from preview test')</script>" +
 							"<h1>Hello</h1></body></html>",
 						{ headers: { "content-type": "text/html" } },
@@ -33,29 +76,93 @@ suite("phantom_preview_page (integration)", () => {
 			},
 		});
 		port = server.port ?? 0;
+
+		// Wire a real MCP Client to the real preview tool server via an
+		// in-memory transport pair. This is the exact contract the Agent SDK
+		// uses at runtime; if the bundled result shape drifts, this test
+		// catches it.
+		const embed = createPreviewToolServer(port);
+		const [serverTransport, clientTransport] = InMemoryTransport.createLinkedPair();
+		const serverInstance = embed.instance as unknown as {
+			connect: (t: typeof serverTransport) => Promise<void>;
+		};
+		await serverInstance.connect(serverTransport);
+		client = new Client({ name: "phantom-preview-integration", version: "1.0" }, { capabilities: {} });
+		await client.connect(clientTransport);
 	});
 
 	afterAll(async () => {
+		await client?.close();
 		await closePreviewResources();
 		revokeAllSessions();
 		server?.stop(true);
 	});
 
-	test("navigates and screenshots a live page", async () => {
-		const ctx = await getOrCreatePreviewContext();
-		const page = await ctx.newPage();
-		try {
-			const response = await page.goto(`http://localhost:${port}/ui/test.html`);
-			expect(response?.status()).toBe(200);
-			expect(await page.title()).toBe("Preview Integration");
-			const shot = await page.screenshot({ type: "png" });
-			expect(shot.length).toBeGreaterThan(100);
-		} finally {
-			await page.close();
+	test("tools/list exposes phantom_preview_page", async () => {
+		if (!client) throw new Error("client not initialized");
+		const { tools } = await client.listTools();
+		expect(tools.map((t) => t.name)).toContain("phantom_preview_page");
+	});
+
+	test("successful call returns image + text blocks with full metadata", async () => {
+		if (!client) throw new Error("client not initialized");
+		const result = (await client.callTool({
+			name: "phantom_preview_page",
+			arguments: { path: "test.html" },
+		})) as ToolResult;
+
+		expect(result.isError).toBeFalsy();
+		expect(result.content).toHaveLength(2);
+
+		const image = asImage(result.content[0]);
+		expect(image.mimeType).toBe("image/png");
+		expect(image.data.length).toBeGreaterThan(100);
+		// Base64 round-trip sanity: decodes to a non-empty byte buffer.
+		expect(Buffer.from(image.data, "base64").length).toBeGreaterThan(100);
+
+		const meta = JSON.parse(asText(result.content[1])) as {
+			status: number;
+			title: string;
+			consoleMessages: { type: string; text: string }[];
+			failedRequests: { url: string; failure: string }[];
+		};
+		expect(meta.status).toBe(200);
+		expect(meta.title).toBe("Preview Integration");
+
+		// The inline <script> emits "hi from preview test" via console.log.
+		const logMessage = meta.consoleMessages.find((m) => m.text.includes("hi from preview test"));
+		expect(logMessage).toBeDefined();
+		expect(logMessage?.type).toBe("log");
+
+		// The missing /nope.js asset should land in failedRequests via the
+		// requestfailed listener.
+		const failed = meta.failedRequests.find((r) => r.url.endsWith("/nope.js"));
+		expect(failed).toBeDefined();
+		expect(typeof failed?.failure).toBe("string");
+	});
+
+	test("unreachable path returns isError with an error message", async () => {
+		if (!client) throw new Error("client not initialized");
+		const result = (await client.callTool({
+			name: "phantom_preview_page",
+			arguments: { path: "does-not-exist.html" },
+		})) as ToolResult;
+
+		// The handler returns content: [text] + isError:true on page.goto throw.
+		// A 404 response does NOT throw (status 404 is returned as-is). So we
+		// only assert the error path shape when it triggers; on a clean 404
+		// the handler returns a normal success result with status=404.
+		if (result.isError) {
+			expect(result.content).toHaveLength(1);
+			const body = asText(result.content[0]);
+			expect(body.length).toBeGreaterThan(0);
+		} else {
+			const meta = JSON.parse(asText(result.content[1])) as { status: number };
+			expect(meta.status).toBe(404);
 		}
 	});
 
-	test("two preview calls share the same BrowserContext", async () => {
+	test("two getOrCreatePreviewContext calls share the same cached context", async () => {
 		const a = await getOrCreatePreviewContext();
 		const b = await getOrCreatePreviewContext();
 		expect(a).toBe(b);
diff --git a/src/ui/browser-mcp.ts b/src/ui/browser-mcp.ts
index 223aae0..4d67b30 100644
--- a/src/ui/browser-mcp.ts
+++ b/src/ui/browser-mcp.ts
@@ -25,24 +25,35 @@
 //     escape hatch the CLAUDE.md standards allow for this exact case.
 //     See findings 01b Section 3.3 for the source citation.
 //
-//  3. `@playwright/mcp@0.0.70` ships a nested `playwright-core` under its
-//     own `node_modules`, so its `BrowserContext` type root is structurally
-//     identical to but nominally disjoint from the top-level `playwright`
-//     package's `BrowserContext`. At runtime they are the same object (the
-//     nested playwright-core is never constructed; we pass in our own
-//     context). We declare the `getContext` parameter against the top-level
-//     type (what every caller in this codebase has on hand) and widen the
-//     function reference to `unknown` at the `createConnection` boundary.
-//     Any change to this line should preserve that single-point widening.
+//  3. Two `playwright-core` versions coexist in `node_modules`. `src/ui/preview.ts`
+//     launches via `playwright@1.59.1 -> nested playwright-core@1.59.1` at
+//     `node_modules/playwright/node_modules/playwright-core`, while
+//     `@playwright/mcp@0.0.70` resolves via the hoisted top-level
+//     `playwright-core@1.60.0-alpha-1774999321000` at `node_modules/playwright-core`.
+//     No `playwright-core` is nested under `@playwright/mcp/node_modules`
+//     itself; only a nested `playwright` wrapper is there. The `BrowserContext`
+//     we hand across the boundary is an instance from `1.59.1` consumed by
+//     `1.60.0-alpha`'s `SimpleBrowser` wrapper. The public `BrowserContext`
+//     API (`contexts()`, `addCookies`, `newPage`) is stable across this
+//     version range. The cross-version bridge is verified end to end by the
+//     integration test at `src/ui/__tests__/browser-mcp.integration.test.ts`,
+//     which drives a real `browser_navigate` through the embed against a
+//     context minted by `getOrCreatePreviewContext()`. We declare the
+//     `getContext` parameter against the top-level `playwright` type (what
+//     every caller in this codebase has on hand) and widen the function
+//     reference at the `createConnection` boundary. Any change to this line
+//     should preserve that single-point widening.
 
 import type { McpSdkServerConfigWithInstance } from "@anthropic-ai/claude-agent-sdk";
 import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { createConnection } from "@playwright/mcp";
 import type { BrowserContext } from "playwright";
 
-// @playwright/mcp's createConnection declares contextGetter against its own
-// nested playwright-core types. At runtime both roots are structurally the
-// same BrowserContext; see header note 3 for the full explanation.
+// @playwright/mcp's createConnection declares contextGetter against the
+// hoisted playwright-core@1.60.0-alpha types, while our callers hold a
+// BrowserContext from the nested playwright-core@1.59.1. The public surface
+// SimpleBrowser touches is stable across both versions. See header note 3
+// for the full explanation and the runtime verification path.
 type AnyContextGetter = Parameters<typeof createConnection>[1];
 
 export async function createBrowserToolServer(
diff --git a/src/ui/preview.ts b/src/ui/preview.ts
index 6306a71..b1efba3 100644
--- a/src/ui/preview.ts
+++ b/src/ui/preview.ts
@@ -27,6 +27,13 @@ let browser: Browser | null = null;
 let browserPromise: Promise<Browser> | null = null;
 let currentContext: BrowserContext | null = null;
 let currentContextPromise: Promise<BrowserContext> | null = null;
+let lastCookieMintAt = 0;
+let shuttingDown = false;
+
+// The preview session cookie has a 10 minute TTL (see createPreviewSession in
+// session.ts). We rotate before minute 8 to leave a 2 minute safety margin so
+// a long-running multi-step query can never navigate with an expired cookie.
+const COOKIE_ROTATE_AFTER_MS = 8 * 60 * 1000;
 
 const CHROMIUM_LAUNCH_ARGS = [
 	// Required because the container runs as a non-root user and Chromium's
@@ -40,45 +47,74 @@ const CHROMIUM_LAUNCH_ARGS = [
 	"--disable-dev-shm-usage",
 ];
 
+function buildPreviewCookie(sessionToken: string) {
+	return {
+		name: "phantom_session",
+		value: sessionToken,
+		domain: "localhost",
+		// Scoped to /ui so the cookie matches the existing magic-link posture in
+		// src/ui/serve.ts. The only cookie-authenticated route in Phantom is
+		// /ui/*; /health, /mcp, /trigger, and /webhook use bearer or HMAC auth
+		// and never read phantom_session.
+		path: "/ui",
+		httpOnly: true,
+		secure: false,
+		sameSite: "Strict" as const,
+	};
+}
+
+async function injectFreshPreviewCookie(ctx: BrowserContext): Promise<void> {
+	const { sessionToken } = createPreviewSession();
+	await ctx.addCookies([buildPreviewCookie(sessionToken)]);
+	lastCookieMintAt = Date.now();
+}
+
 export async function getOrCreateBrowser(): Promise<Browser> {
+	if (shuttingDown) throw new Error("preview subsystem is shutting down");
 	if (browser) return browser;
 	if (browserPromise) return browserPromise;
+	// try/finally pattern: on either success OR failure we clear the cached
+	// promise. If we did not, a transient chromium.launch() throw would leave
+	// a rejected promise cached and every subsequent call would re-reject
+	// with the same error until process restart (Codex P2).
 	browserPromise = (async () => {
-		const b = await chromium.launch({
-			headless: true,
-			args: CHROMIUM_LAUNCH_ARGS,
-		});
-		browser = b;
-		browserPromise = null;
-		return b;
+		try {
+			const b = await chromium.launch({
+				headless: true,
+				args: CHROMIUM_LAUNCH_ARGS,
+			});
+			browser = b;
+			return b;
+		} finally {
+			browserPromise = null;
+		}
 	})();
 	return browserPromise;
 }
 
 export async function getOrCreatePreviewContext(): Promise<BrowserContext> {
-	if (currentContext) return currentContext;
+	if (shuttingDown) throw new Error("preview subsystem is shutting down");
+	if (currentContext) {
+		// Warm cache path: rotate the preview cookie if we are inside the 2
+		// minute safety margin before the 10 minute TTL expires. Playwright's
+		// addCookies replaces cookies with the same name+domain+path in place,
+		// so this is an O(1) refresh of the cached context (review F6, Codex P1).
+		if (Date.now() - lastCookieMintAt >= COOKIE_ROTATE_AFTER_MS) {
+			await injectFreshPreviewCookie(currentContext);
+		}
+		return currentContext;
+	}
 	if (currentContextPromise) return currentContextPromise;
 	currentContextPromise = (async () => {
-		const b = await getOrCreateBrowser();
-		const ctx = await b.newContext();
-		const { sessionToken } = createPreviewSession();
-		// Scoped to localhost: external navigations initiated by any browser_*
-		// tool will not see this cookie, so the auth surface stays identical to
-		// the existing /ui/ cookie model.
-		await ctx.addCookies([
-			{
-				name: "phantom_session",
-				value: sessionToken,
-				domain: "localhost",
-				path: "/",
-				httpOnly: true,
-				secure: false,
-				sameSite: "Strict",
-			},
-		]);
-		currentContext = ctx;
-		currentContextPromise = null;
-		return ctx;
+		try {
+			const b = await getOrCreateBrowser();
+			const ctx = await b.newContext();
+			await injectFreshPreviewCookie(ctx);
+			currentContext = ctx;
+			return ctx;
+		} finally {
+			currentContextPromise = null;
+		}
 	})();
 	return currentContextPromise;
 }
@@ -97,6 +133,7 @@ export async function closePreviewContext(): Promise<void> {
 }
 
 export async function closePreviewResources(): Promise<void> {
+	shuttingDown = true;
 	await closePreviewContext();
 	const b = browser;
 	browser = null;
@@ -110,6 +147,20 @@ export async function closePreviewResources(): Promise<void> {
 	}
 }
 
+// Test-only reset hook. Clears every module-level singleton so the next unit
+// test starts from a pristine state. Not exported from the public index; only
+// tests under src/ui/__tests__ import it via the relative path. Keeping this
+// in the production module avoids an indirection layer that would complicate
+// the happy path.
+export function __resetPreviewStateForTesting(): void {
+	browser = null;
+	browserPromise = null;
+	currentContext = null;
+	currentContextPromise = null;
+	lastCookieMintAt = 0;
+	shuttingDown = false;
+}
+
 type ConsoleMessage = { type: string; text: string };
 type FailedRequest = { url: string; failure: string };
 
@@ -152,7 +203,7 @@ export function createPreviewToolServer(port: number): McpSdkServerConfigWithIns
 				// cause the tool to time out. Stub it before any page JS runs.
 				// Research 01 verified init scripts run before page JS.
 				await page.addInitScript(() => {
-					(window as unknown as { EventSource: undefined }).EventSource = undefined;
+					Reflect.deleteProperty(globalThis, "EventSource");
 				});
 				const consoleMessages: ConsoleMessage[] = [];
 				page.on("console", (m) => {
@@ -205,7 +256,14 @@ export function createPreviewToolServer(port: number): McpSdkServerConfigWithIns
 				try {
 					await page.close();
 				} catch {
-					// Page already closed by cleanup path.
+					// page.close() can throw if the browser or context was torn
+					// down mid-call (e.g. SIGTERM during a successful return).
+					// Null currentContext so the next call re-creates a fresh
+					// one instead of handing back a dead reference. The cost on
+					// a genuinely cosmetic error is one extra ~60 ms context
+					// creation, which is cheap (review F8).
+					currentContext = null;
+					currentContextPromise = null;
 				}
 			}
 		},