ghostwright
diff --git a/‎Dockerfile‎
Lines changed: 27 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎bun.lock‎
Lines changed: 14 additions & 0 deletions b/‎bun.lock‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 2 additions & 0 deletions b/‎package.json‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/agent/prompt-assembler.ts‎
Lines changed: 25 additions & 0 deletions b/‎src/agent/prompt-assembler.ts‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎src/agent/runtime.ts‎
Lines changed: 7 additions & 3 deletions b/‎src/agent/runtime.ts‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎src/index.ts‎
Lines changed: 8 additions & 1 deletion b/‎src/index.ts‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎src/ui/__tests__/browser-mcp.integration.test.ts‎
Lines changed: 127 additions & 0 deletions b/‎src/ui/__tests__/browser-mcp.integration.test.ts‎
Lines changed: 127 additions & 0 deletions
diff --git a/‎src/ui/__tests__/browser-mcp.test.ts‎
Lines changed: 37 additions & 0 deletions b/‎src/ui/__tests__/browser-mcp.test.ts‎
Lines changed: 37 additions & 0 deletions
@@ -75,6 +75,33 @@ COPY --from=builder /app/public ./public
 COPY --from=builder /app/package.json ./
 COPY --from=builder /app/tsconfig.json ./
 
+# Install Chromium headless shell + system deps for Playwright.
+# Must run after node_modules is copied so bunx can resolve playwright.
+# --only-shell skips the full Chromium binary (saves ~75 MiB off the full
+# chrome channel); the custom phantom_preview_page tool uses
+# chromium.launch() which picks the headless shell automatically for
+# headless=true. The @playwright/mcp embed path uses a contextGetter so it
+# never needs the full chrome channel binary.
+#
+# Image cost breakdown (verified on the built image vs. the pre-Playwright
+# baseline, total delta roughly 996 MiB over the non-Playwright baseline):
+#   ~327 MB  chromium_headless_shell-* binary at
+#            /home/phantom/.cache/ms-playwright/chromium_headless_shell-*
+#   ~91 MB   /usr/share/fonts pulled by --with-deps (DejaVu, Liberation,
+#            Noto Core)
+#   ~500+ MB /usr/lib X11 / GTK / libasound / libnss3 / libcups / libatk
+#            and the other shared libraries apt-get pulls for Chromium
+#
+# --only-shell only affects the Chromium binary. The system deps are the
+# dominant cost and cannot be trimmed without breaking Chromium's ability
+# to start. If you are trying to shrink this image, the headless shell
+# binary is the only safe target; the /usr/lib growth is load-bearing.
+ENV PLAYWRIGHT_BROWSERS_PATH=/home/phantom/.cache/ms-playwright
+RUN mkdir -p "$PLAYWRIGHT_BROWSERS_PATH" && \
+    bunx playwright install --with-deps --only-shell chromium && \
+    chown -R phantom:phantom /home/phantom/.cache && \
+    rm -rf /var/lib/apt/lists/*
+
 # Copy default phantom-config (constitution.md, persona.md, etc.)
 # These get backed up so they survive the empty volume mount on first run.
 COPY --from=builder /app/phantom-config ./phantom-config
 
@@ -18,10 +18,12 @@
   "dependencies": {
     "@anthropic-ai/claude-agent-sdk": "^0.2.77",
     "@modelcontextprotocol/sdk": "^1.28.0",
+    "@playwright/mcp": "0.0.70",
     "@slack/bolt": "^4.6.0",
     "croner": "^10.0.1",
     "imapflow": "^1.2.18",
     "nodemailer": "^8.0.4",
+    "playwright": "1.59.1",
     "resend": "^6.9.4",
     "telegraf": "^4.16.3",
     "yaml": "^2.6.0",
 
@@ -170,6 +170,31 @@ function buildEnvironment(config: PhantomConfig): string {
 		lines.push(`- Pages are at ${publicUrl}/ui/<filename>`);
 	}
 	lines.push("");
+	lines.push("SELF-VALIDATE EVERY UI PAGE YOU CREATE.");
+	lines.push("After phantom_create_page succeeds, always call phantom_preview_page with");
+	lines.push("the same path. Review the screenshot, the HTTP status, the page title,");
+	lines.push("and especially the console messages and failed network requests list.");
+	lines.push("If there are console errors, failed CDN loads, or the screenshot looks");
+	lines.push("wrong, fix the HTML and re-run phantom_preview_page until clean. Only");
+	lines.push("report the page to the user after validation passes.");
+	lines.push("The tool returns one image block plus a JSON metadata block. The image");
+	lines.push("is for visual review, the JSON tells you what failed to load or error.");
+	lines.push("");
+	lines.push("GENERAL BROWSER CAPABILITY.");
+	lines.push("You have access to the full Playwright MCP tool surface via the");
+	lines.push("phantom-browser server. These tools share one Chromium instance with");
+	lines.push("phantom_preview_page. Use browser_navigate to open any URL (localhost");
+	lines.push("or external), browser_snapshot for structured accessibility text,");
+	lines.push("browser_take_screenshot for pixel captures, browser_click/browser_type/");
+	lines.push("browser_fill_form for interaction, browser_console_messages and");
+	lines.push("browser_network_requests for debugging, browser_tabs for multi-page work.");
+	lines.push("For single-shot self-validation of your own /ui/<path> pages, always");
+	lines.push("prefer phantom_preview_page: one call returns image plus JSON.");
+	lines.push("For multi-step browsing, research tasks, or external sites, use the");
+	lines.push("browser_* tools directly.");
+	lines.push("Do NOT use browser_run_code against external pages unless the user");
+	lines.push("explicitly asked you to execute code in a foreign origin.");
+	lines.push("");
 	lines.push("When you build something that others should access, you have two options:");
 	lines.push("1. Create an HTTP API on a local port. Give the user the internal URL and auth token.");
 	lines.push(
 
@@ -31,7 +31,7 @@ export class AgentRuntime {
 	private roleTemplate: RoleTemplate | null = null;
 	private onboardingPrompt: string | null = null;
 	private lastTrackedFiles: string[] = [];
-	private mcpServerFactories: Record<string, () => McpServerConfig> | null = null;
+	private mcpServerFactories: Record<string, () => McpServerConfig | Promise<McpServerConfig>> | null = null;
 
 	constructor(config: PhantomConfig, db: Database) {
 		this.config = config;
@@ -55,7 +55,7 @@ export class AgentRuntime {
 		this.onboardingPrompt = prompt;
 	}
 
-	setMcpServerFactories(factories: Record<string, () => McpServerConfig>): void {
+	setMcpServerFactories(factories: Record<string, () => McpServerConfig | Promise<McpServerConfig>>): void {
 		this.mcpServerFactories = factories;
 	}
 
@@ -208,7 +208,11 @@ export class AgentRuntime {
 					...(useResume && session.sdk_session_id ? { resume: session.sdk_session_id } : {}),
 					...(this.mcpServerFactories
 						? {
-								mcpServers: Object.fromEntries(Object.entries(this.mcpServerFactories).map(([k, f]) => [k, f()])),
+								mcpServers: Object.fromEntries(
+									await Promise.all(
+										Object.entries(this.mcpServerFactories).map(async ([k, f]) => [k, await f()] as const),
+									),
+								),
 							}
 						: {}),
 				},
 
@@ -51,6 +51,8 @@ import { Scheduler } from "./scheduler/service.ts";
 import { createSchedulerToolServer } from "./scheduler/tool.ts";
 import { getSecretRequest } from "./secrets/store.ts";
 import { createSecretToolServer } from "./secrets/tools.ts";
+import { createBrowserToolServer } from "./ui/browser-mcp.ts";
+import { closePreviewResources, createPreviewToolServer, getOrCreatePreviewContext } from "./ui/preview.ts";
 import { setPublicDir, setSecretSavedCallback, setSecretsDb } from "./ui/serve.ts";
 import { createWebUiToolServer } from "./ui/tools.ts";
 
@@ -191,6 +193,8 @@ async function main(): Promise<void> {
 			"phantom-scheduler": () => createSchedulerToolServer(scheduler as Scheduler),
 			"phantom-web-ui": () => createWebUiToolServer(config.public_url),
 			"phantom-secrets": () => createSecretToolServer({ db, baseUrl: secretsBaseUrl }),
+			"phantom-preview": () => createPreviewToolServer(config.port),
+			"phantom-browser": () => createBrowserToolServer(() => getOrCreatePreviewContext()),
 			...(process.env.RESEND_API_KEY
 				? {
 						"phantom-email": () =>
@@ -204,7 +208,7 @@ async function main(): Promise<void> {
 		});
 		const emailStatus = process.env.RESEND_API_KEY ? " + email" : "";
 		console.log(
-			`[mcp] MCP server initialized (dynamic tools + scheduler + web UI + secrets${emailStatus} wired to agent)`,
+			`[mcp] MCP server initialized (dynamic tools + scheduler + web UI + secrets + preview + browser${emailStatus} wired to agent)`,
 		);
 	} catch (err: unknown) {
 		const msg = err instanceof Error ? err.message : String(err);
@@ -580,6 +584,9 @@ async function main(): Promise<void> {
 	onShutdown("Scheduler", async () => {
 		if (scheduler) scheduler.stop();
 	});
+	onShutdown("Preview browser", async () => {
+		await closePreviewResources();
+	});
 	onShutdown("Peer health monitor", async () => {
 		if (peerHealthMonitor) peerHealthMonitor.stop();
 	});
 
@@ -0,0 +1,127 @@
+// Integration tests for createBrowserToolServer. These exercise the real
+// @playwright/mcp embed with a real BrowserContext. Opt-in:
+//
+//   PHANTOM_INTEGRATION=1 bun test src/ui/__tests__/browser-mcp.integration.test.ts
+//
+// Skipped by default so `bun test` stays hermetic.
+//
+// Two load-bearing invariants are enforced here:
+//
+//  1. The embed exposes exactly 21 tools. @playwright/mcp@0.0.70 is pinned
+//     specifically so the tool surface cannot drift silently; this assertion
+//     is the drift detector the pin was meant to anchor.
+//
+//  2. A real `browser_navigate` call succeeds against a BrowserContext
+//     minted by the preview tool. This is the end-to-end verification of
+//     the cross-version playwright-core boundary documented in
+//     src/ui/browser-mcp.ts note 3: the context is an instance from
+//     playwright-core@1.59.1 consumed by @playwright/mcp's hoisted
+//     playwright-core@1.60.0-alpha SimpleBrowser wrapper.
+
+import { afterAll, beforeAll, describe, expect, test } from "bun:test";
+import { Client } from "@modelcontextprotocol/sdk/client/index.js";
+import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js";
+import { createBrowserToolServer } from "../browser-mcp.ts";
+import { __resetPreviewStateForTesting, closePreviewResources, getOrCreatePreviewContext } from "../preview.ts";
+import { revokeAllSessions } from "../session.ts";
+
+const ENABLED = process.env.PHANTOM_INTEGRATION === "1";
+const suite = ENABLED ? describe : describe.skip;
+
+const EXPECTED_TOOL_NAMES = [
+	"browser_click",
+	"browser_close",
+	"browser_console_messages",
+	"browser_drag",
+	"browser_evaluate",
+	"browser_file_upload",
+	"browser_fill_form",
+	"browser_handle_dialog",
+	"browser_hover",
+	"browser_navigate",
+	"browser_navigate_back",
+	"browser_network_requests",
+	"browser_press_key",
+	"browser_resize",
+	"browser_run_code",
+	"browser_select_option",
+	"browser_snapshot",
+	"browser_tabs",
+	"browser_take_screenshot",
+	"browser_type",
+	"browser_wait_for",
+];
+
+type CallResult = { isError?: boolean; content: unknown };
+
+suite("createBrowserToolServer (integration)", () => {
+	let server: ReturnType<typeof Bun.serve> | null = null;
+	let port = 0;
+	let client: Client | null = null;
+	let embed: Awaited<ReturnType<typeof createBrowserToolServer>> | null = null;
+
+	beforeAll(async () => {
+		// Reset module-level preview state so running this file after any
+		// other test file that called closePreviewResources() still starts
+		// from a pristine state. Bun shares module instances across test
+		// files inside the same process.
+		__resetPreviewStateForTesting();
+		server = Bun.serve({
+			port: 0,
+			fetch(req) {
+				const url = new URL(req.url);
+				if (url.pathname === "/ui/test.html") {
+					return new Response(
+						"<!DOCTYPE html><html><head><title>Browser MCP Integration</title></head>" +
+							"<body><h1>Hello</h1></body></html>",
+						{ headers: { "content-type": "text/html" } },
+					);
+				}
+				return new Response("not found", { status: 404 });
+			},
+		});
+		port = server.port ?? 0;
+
+		embed = await createBrowserToolServer(() => getOrCreatePreviewContext());
+		const [serverTransport, clientTransport] = InMemoryTransport.createLinkedPair();
+		const serverInstance = embed.instance as unknown as {
+			connect: (t: typeof serverTransport) => Promise<void>;
+			close: () => Promise<void>;
+		};
+		await serverInstance.connect(serverTransport);
+		client = new Client({ name: "phantom-browser-integration", version: "1.0" }, { capabilities: {} });
+		await client.connect(clientTransport);
+	});
+
+	afterAll(async () => {
+		await client?.close();
+		if (embed) {
+			const inst = embed.instance as unknown as { close: () => Promise<void> };
+			await inst.close();
+		}
+		await closePreviewResources();
+		revokeAllSessions();
+		server?.stop(true);
+	});
+
+	test("listTools returns exactly the 21-tool @playwright/mcp surface", async () => {
+		if (!client) throw new Error("client not initialized");
+		const { tools } = await client.listTools();
+		expect(tools).toHaveLength(21);
+		const names = tools.map((t) => t.name).sort();
+		expect(names).toEqual([...EXPECTED_TOOL_NAMES].sort());
+	});
+
+	test("browser_navigate succeeds across the cross-version BrowserContext boundary", async () => {
+		if (!client) throw new Error("client not initialized");
+		const result = (await client.callTool({
+			name: "browser_navigate",
+			arguments: { url: `http://localhost:${port}/ui/test.html` },
+		})) as CallResult;
+		// A successful navigate returns content with no isError flag set.
+		// The exact content shape is @playwright/mcp's concern; we care only
+		// that the call did not land in the error branch.
+		expect(result.isError).toBeFalsy();
+		expect(result.content).toBeDefined();
+	});
+});
@@ -0,0 +1,37 @@
+import { describe, expect, test } from "bun:test";
+import type { BrowserContext } from "playwright";
+import { createBrowserToolServer } from "../browser-mcp.ts";
+
+// The real @playwright/mcp createConnection is lazy: it wires a backend
+// factory that will call the contextGetter only when a client actually
+// requests a tool. Constructing the embed does not require a live
+// BrowserContext, so these tests never touch Chromium.
+function fakeContextGetter(): Promise<BrowserContext> {
+	return Promise.reject(new Error("contextGetter should not run in unit tests"));
+}
+
+describe("createBrowserToolServer", () => {
+	test("returns an SDK MCP server config with the phantom-browser name", async () => {
+		const config = await createBrowserToolServer(fakeContextGetter);
+		expect(config.type).toBe("sdk");
+		expect(config.name).toBe("phantom-browser");
+		expect(config.instance).toBeDefined();
+	});
+
+	test("instance exposes the MCP connect() contract used by the Agent SDK", async () => {
+		const config = await createBrowserToolServer(fakeContextGetter);
+		const inst = config.instance as unknown as { connect: unknown; close: unknown };
+		expect(typeof inst.connect).toBe("function");
+		expect(typeof inst.close).toBe("function");
+	});
+
+	test("each call returns a distinct underlying Server instance", async () => {
+		const a = await createBrowserToolServer(fakeContextGetter);
+		const b = await createBrowserToolServer(fakeContextGetter);
+		// Factory pattern: the phantom-browser wrapper must be fresh per query.
+		// If the same instance leaks across calls the SDK will throw "Already
+		// connected to a transport" on the second run. See src/index.ts for
+		// the cardinal rule citation.
+		expect(a.instance).not.toBe(b.instance);
+	});
+});