Skip to content

Commit 527fa30

Browse files
authored
Merge pull request #53 from ghostwright/feat/playwright-browser-capability
feat: playwright self-validation and general browser capability
2 parents d07b739 + 6a71478 commit 527fa30

14 files changed

Lines changed: 1071 additions & 4 deletions

Dockerfile

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,33 @@ COPY --from=builder /app/public ./public
7575
COPY --from=builder /app/package.json ./
7676
COPY --from=builder /app/tsconfig.json ./
7777

78+
# Install Chromium headless shell + system deps for Playwright.
79+
# Must run after node_modules is copied so bunx can resolve playwright.
80+
# --only-shell skips the full Chromium binary (saves ~75 MiB off the full
81+
# chrome channel); the custom phantom_preview_page tool uses
82+
# chromium.launch() which picks the headless shell automatically for
83+
# headless=true. The @playwright/mcp embed path uses a contextGetter so it
84+
# never needs the full chrome channel binary.
85+
#
86+
# Image cost breakdown (verified on the built image vs. the pre-Playwright
87+
# baseline, total delta roughly 996 MiB over the non-Playwright baseline):
88+
# ~327 MB chromium_headless_shell-* binary at
89+
# /home/phantom/.cache/ms-playwright/chromium_headless_shell-*
90+
# ~91 MB /usr/share/fonts pulled by --with-deps (DejaVu, Liberation,
91+
# Noto Core)
92+
# ~500+ MB /usr/lib X11 / GTK / libasound / libnss3 / libcups / libatk
93+
# and the other shared libraries apt-get pulls for Chromium
94+
#
95+
# --only-shell only affects the Chromium binary. The system deps are the
96+
# dominant cost and cannot be trimmed without breaking Chromium's ability
97+
# to start. If you are trying to shrink this image, the headless shell
98+
# binary is the only safe target; the /usr/lib growth is load-bearing.
99+
ENV PLAYWRIGHT_BROWSERS_PATH=/home/phantom/.cache/ms-playwright
100+
RUN mkdir -p "$PLAYWRIGHT_BROWSERS_PATH" && \
101+
bunx playwright install --with-deps --only-shell chromium && \
102+
chown -R phantom:phantom /home/phantom/.cache && \
103+
rm -rf /var/lib/apt/lists/*
104+
78105
# Copy default phantom-config (constitution.md, persona.md, etc.)
79106
# These get backed up so they survive the empty volume mount on first run.
80107
COPY --from=builder /app/phantom-config ./phantom-config

bun.lock

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@
1818
"dependencies": {
1919
"@anthropic-ai/claude-agent-sdk": "^0.2.77",
2020
"@modelcontextprotocol/sdk": "^1.28.0",
21+
"@playwright/mcp": "0.0.70",
2122
"@slack/bolt": "^4.6.0",
2223
"croner": "^10.0.1",
2324
"imapflow": "^1.2.18",
2425
"nodemailer": "^8.0.4",
26+
"playwright": "1.59.1",
2527
"resend": "^6.9.4",
2628
"telegraf": "^4.16.3",
2729
"yaml": "^2.6.0",

src/agent/prompt-assembler.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,31 @@ function buildEnvironment(config: PhantomConfig): string {
170170
lines.push(`- Pages are at ${publicUrl}/ui/<filename>`);
171171
}
172172
lines.push("");
173+
lines.push("SELF-VALIDATE EVERY UI PAGE YOU CREATE.");
174+
lines.push("After phantom_create_page succeeds, always call phantom_preview_page with");
175+
lines.push("the same path. Review the screenshot, the HTTP status, the page title,");
176+
lines.push("and especially the console messages and failed network requests list.");
177+
lines.push("If there are console errors, failed CDN loads, or the screenshot looks");
178+
lines.push("wrong, fix the HTML and re-run phantom_preview_page until clean. Only");
179+
lines.push("report the page to the user after validation passes.");
180+
lines.push("The tool returns one image block plus a JSON metadata block. The image");
181+
lines.push("is for visual review, the JSON tells you what failed to load or error.");
182+
lines.push("");
183+
lines.push("GENERAL BROWSER CAPABILITY.");
184+
lines.push("You have access to the full Playwright MCP tool surface via the");
185+
lines.push("phantom-browser server. These tools share one Chromium instance with");
186+
lines.push("phantom_preview_page. Use browser_navigate to open any URL (localhost");
187+
lines.push("or external), browser_snapshot for structured accessibility text,");
188+
lines.push("browser_take_screenshot for pixel captures, browser_click/browser_type/");
189+
lines.push("browser_fill_form for interaction, browser_console_messages and");
190+
lines.push("browser_network_requests for debugging, browser_tabs for multi-page work.");
191+
lines.push("For single-shot self-validation of your own /ui/<path> pages, always");
192+
lines.push("prefer phantom_preview_page: one call returns image plus JSON.");
193+
lines.push("For multi-step browsing, research tasks, or external sites, use the");
194+
lines.push("browser_* tools directly.");
195+
lines.push("Do NOT use browser_run_code against external pages unless the user");
196+
lines.push("explicitly asked you to execute code in a foreign origin.");
197+
lines.push("");
173198
lines.push("When you build something that others should access, you have two options:");
174199
lines.push("1. Create an HTTP API on a local port. Give the user the internal URL and auth token.");
175200
lines.push(

src/agent/runtime.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ export class AgentRuntime {
3131
private roleTemplate: RoleTemplate | null = null;
3232
private onboardingPrompt: string | null = null;
3333
private lastTrackedFiles: string[] = [];
34-
private mcpServerFactories: Record<string, () => McpServerConfig> | null = null;
34+
private mcpServerFactories: Record<string, () => McpServerConfig | Promise<McpServerConfig>> | null = null;
3535

3636
constructor(config: PhantomConfig, db: Database) {
3737
this.config = config;
@@ -55,7 +55,7 @@ export class AgentRuntime {
5555
this.onboardingPrompt = prompt;
5656
}
5757

58-
setMcpServerFactories(factories: Record<string, () => McpServerConfig>): void {
58+
setMcpServerFactories(factories: Record<string, () => McpServerConfig | Promise<McpServerConfig>>): void {
5959
this.mcpServerFactories = factories;
6060
}
6161

@@ -208,7 +208,11 @@ export class AgentRuntime {
208208
...(useResume && session.sdk_session_id ? { resume: session.sdk_session_id } : {}),
209209
...(this.mcpServerFactories
210210
? {
211-
mcpServers: Object.fromEntries(Object.entries(this.mcpServerFactories).map(([k, f]) => [k, f()])),
211+
mcpServers: Object.fromEntries(
212+
await Promise.all(
213+
Object.entries(this.mcpServerFactories).map(async ([k, f]) => [k, await f()] as const),
214+
),
215+
),
212216
}
213217
: {}),
214218
},

src/index.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ import { Scheduler } from "./scheduler/service.ts";
5151
import { createSchedulerToolServer } from "./scheduler/tool.ts";
5252
import { getSecretRequest } from "./secrets/store.ts";
5353
import { createSecretToolServer } from "./secrets/tools.ts";
54+
import { createBrowserToolServer } from "./ui/browser-mcp.ts";
55+
import { closePreviewResources, createPreviewToolServer, getOrCreatePreviewContext } from "./ui/preview.ts";
5456
import { setPublicDir, setSecretSavedCallback, setSecretsDb } from "./ui/serve.ts";
5557
import { createWebUiToolServer } from "./ui/tools.ts";
5658

@@ -191,6 +193,8 @@ async function main(): Promise<void> {
191193
"phantom-scheduler": () => createSchedulerToolServer(scheduler as Scheduler),
192194
"phantom-web-ui": () => createWebUiToolServer(config.public_url),
193195
"phantom-secrets": () => createSecretToolServer({ db, baseUrl: secretsBaseUrl }),
196+
"phantom-preview": () => createPreviewToolServer(config.port),
197+
"phantom-browser": () => createBrowserToolServer(() => getOrCreatePreviewContext()),
194198
...(process.env.RESEND_API_KEY
195199
? {
196200
"phantom-email": () =>
@@ -204,7 +208,7 @@ async function main(): Promise<void> {
204208
});
205209
const emailStatus = process.env.RESEND_API_KEY ? " + email" : "";
206210
console.log(
207-
`[mcp] MCP server initialized (dynamic tools + scheduler + web UI + secrets${emailStatus} wired to agent)`,
211+
`[mcp] MCP server initialized (dynamic tools + scheduler + web UI + secrets + preview + browser${emailStatus} wired to agent)`,
208212
);
209213
} catch (err: unknown) {
210214
const msg = err instanceof Error ? err.message : String(err);
@@ -580,6 +584,9 @@ async function main(): Promise<void> {
580584
onShutdown("Scheduler", async () => {
581585
if (scheduler) scheduler.stop();
582586
});
587+
onShutdown("Preview browser", async () => {
588+
await closePreviewResources();
589+
});
583590
onShutdown("Peer health monitor", async () => {
584591
if (peerHealthMonitor) peerHealthMonitor.stop();
585592
});
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
// Integration tests for createBrowserToolServer. These exercise the real
2+
// @playwright/mcp embed with a real BrowserContext. Opt-in:
3+
//
4+
// PHANTOM_INTEGRATION=1 bun test src/ui/__tests__/browser-mcp.integration.test.ts
5+
//
6+
// Skipped by default so `bun test` stays hermetic.
7+
//
8+
// Two load-bearing invariants are enforced here:
9+
//
10+
// 1. The embed exposes exactly 21 tools. @playwright/mcp@0.0.70 is pinned
11+
// specifically so the tool surface cannot drift silently; this assertion
12+
// is the drift detector the pin was meant to anchor.
13+
//
14+
// 2. A real `browser_navigate` call succeeds against a BrowserContext
15+
// minted by the preview tool. This is the end-to-end verification of
16+
// the cross-version playwright-core boundary documented in
17+
// src/ui/browser-mcp.ts note 3: the context is an instance from
18+
// playwright-core@1.59.1 consumed by @playwright/mcp's hoisted
19+
// playwright-core@1.60.0-alpha SimpleBrowser wrapper.
20+
21+
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
22+
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
23+
import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js";
24+
import { createBrowserToolServer } from "../browser-mcp.ts";
25+
import { __resetPreviewStateForTesting, closePreviewResources, getOrCreatePreviewContext } from "../preview.ts";
26+
import { revokeAllSessions } from "../session.ts";
27+
28+
const ENABLED = process.env.PHANTOM_INTEGRATION === "1";
29+
const suite = ENABLED ? describe : describe.skip;
30+
31+
const EXPECTED_TOOL_NAMES = [
32+
"browser_click",
33+
"browser_close",
34+
"browser_console_messages",
35+
"browser_drag",
36+
"browser_evaluate",
37+
"browser_file_upload",
38+
"browser_fill_form",
39+
"browser_handle_dialog",
40+
"browser_hover",
41+
"browser_navigate",
42+
"browser_navigate_back",
43+
"browser_network_requests",
44+
"browser_press_key",
45+
"browser_resize",
46+
"browser_run_code",
47+
"browser_select_option",
48+
"browser_snapshot",
49+
"browser_tabs",
50+
"browser_take_screenshot",
51+
"browser_type",
52+
"browser_wait_for",
53+
];
54+
55+
type CallResult = { isError?: boolean; content: unknown };
56+
57+
suite("createBrowserToolServer (integration)", () => {
58+
let server: ReturnType<typeof Bun.serve> | null = null;
59+
let port = 0;
60+
let client: Client | null = null;
61+
let embed: Awaited<ReturnType<typeof createBrowserToolServer>> | null = null;
62+
63+
beforeAll(async () => {
64+
// Reset module-level preview state so running this file after any
65+
// other test file that called closePreviewResources() still starts
66+
// from a pristine state. Bun shares module instances across test
67+
// files inside the same process.
68+
__resetPreviewStateForTesting();
69+
server = Bun.serve({
70+
port: 0,
71+
fetch(req) {
72+
const url = new URL(req.url);
73+
if (url.pathname === "/ui/test.html") {
74+
return new Response(
75+
"<!DOCTYPE html><html><head><title>Browser MCP Integration</title></head>" +
76+
"<body><h1>Hello</h1></body></html>",
77+
{ headers: { "content-type": "text/html" } },
78+
);
79+
}
80+
return new Response("not found", { status: 404 });
81+
},
82+
});
83+
port = server.port ?? 0;
84+
85+
embed = await createBrowserToolServer(() => getOrCreatePreviewContext());
86+
const [serverTransport, clientTransport] = InMemoryTransport.createLinkedPair();
87+
const serverInstance = embed.instance as unknown as {
88+
connect: (t: typeof serverTransport) => Promise<void>;
89+
close: () => Promise<void>;
90+
};
91+
await serverInstance.connect(serverTransport);
92+
client = new Client({ name: "phantom-browser-integration", version: "1.0" }, { capabilities: {} });
93+
await client.connect(clientTransport);
94+
});
95+
96+
afterAll(async () => {
97+
await client?.close();
98+
if (embed) {
99+
const inst = embed.instance as unknown as { close: () => Promise<void> };
100+
await inst.close();
101+
}
102+
await closePreviewResources();
103+
revokeAllSessions();
104+
server?.stop(true);
105+
});
106+
107+
test("listTools returns exactly the 21-tool @playwright/mcp surface", async () => {
108+
if (!client) throw new Error("client not initialized");
109+
const { tools } = await client.listTools();
110+
expect(tools).toHaveLength(21);
111+
const names = tools.map((t) => t.name).sort();
112+
expect(names).toEqual([...EXPECTED_TOOL_NAMES].sort());
113+
});
114+
115+
test("browser_navigate succeeds across the cross-version BrowserContext boundary", async () => {
116+
if (!client) throw new Error("client not initialized");
117+
const result = (await client.callTool({
118+
name: "browser_navigate",
119+
arguments: { url: `http://localhost:${port}/ui/test.html` },
120+
})) as CallResult;
121+
// A successful navigate returns content with no isError flag set.
122+
// The exact content shape is @playwright/mcp's concern; we care only
123+
// that the call did not land in the error branch.
124+
expect(result.isError).toBeFalsy();
125+
expect(result.content).toBeDefined();
126+
});
127+
});
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import { describe, expect, test } from "bun:test";
2+
import type { BrowserContext } from "playwright";
3+
import { createBrowserToolServer } from "../browser-mcp.ts";
4+
5+
// The real @playwright/mcp createConnection is lazy: it wires a backend
6+
// factory that will call the contextGetter only when a client actually
7+
// requests a tool. Constructing the embed does not require a live
8+
// BrowserContext, so these tests never touch Chromium.
9+
function fakeContextGetter(): Promise<BrowserContext> {
10+
return Promise.reject(new Error("contextGetter should not run in unit tests"));
11+
}
12+
13+
describe("createBrowserToolServer", () => {
14+
test("returns an SDK MCP server config with the phantom-browser name", async () => {
15+
const config = await createBrowserToolServer(fakeContextGetter);
16+
expect(config.type).toBe("sdk");
17+
expect(config.name).toBe("phantom-browser");
18+
expect(config.instance).toBeDefined();
19+
});
20+
21+
test("instance exposes the MCP connect() contract used by the Agent SDK", async () => {
22+
const config = await createBrowserToolServer(fakeContextGetter);
23+
const inst = config.instance as unknown as { connect: unknown; close: unknown };
24+
expect(typeof inst.connect).toBe("function");
25+
expect(typeof inst.close).toBe("function");
26+
});
27+
28+
test("each call returns a distinct underlying Server instance", async () => {
29+
const a = await createBrowserToolServer(fakeContextGetter);
30+
const b = await createBrowserToolServer(fakeContextGetter);
31+
// Factory pattern: the phantom-browser wrapper must be fresh per query.
32+
// If the same instance leaks across calls the SDK will throw "Already
33+
// connected to a transport" on the second run. See src/index.ts for
34+
// the cardinal rule citation.
35+
expect(a.instance).not.toBe(b.instance);
36+
});
37+
});

0 commit comments

Comments
 (0)