diff --git a/skills/autobrowse/.env.example b/skills/autobrowse/.env.example index 08aff37..b42f362 100644 --- a/skills/autobrowse/.env.example +++ b/skills/autobrowse/.env.example @@ -1,3 +1,9 @@ ANTHROPIC_API_KEY=sk-ant-... +# Optional: use OpenAI-compatible Chat Completions instead of Anthropic. +# AUTOBROWSE_PROVIDER=openai +# AUTOBROWSE_MODEL=gpt-4.1 +# OPENAI_API_KEY=sk-... +# OPENAI_BASE_URL=https://api.openai.com/v1 +# For OpenRouter/LiteLLM/etc, point OPENAI_BASE_URL at that provider's /v1 endpoint. BROWSERBASE_API_KEY=bb_live_... BROWSERBASE_PROJECT_ID=your-project-id diff --git a/skills/autobrowse/README.md b/skills/autobrowse/README.md index 8b4d044..5e35eac 100644 --- a/skills/autobrowse/README.md +++ b/skills/autobrowse/README.md @@ -13,7 +13,7 @@ The output is a `skill.md` — a site-specific playbook any agent can follow. On - Node.js 18+ - [Claude Code](https://claude.ai/code) - `browse` CLI: `npm install -g @browserbasehq/browse-cli` -- `ANTHROPIC_API_KEY` in your environment +- `ANTHROPIC_API_KEY` in your environment, or `AUTOBROWSE_PROVIDER=openai` with `OPENAI_API_KEY` - For bot-protected sites: `BROWSERBASE_API_KEY` + `BROWSERBASE_PROJECT_ID` ## Setup @@ -25,6 +25,23 @@ npm install cp .env.example .env # fill in your API keys ``` +By default, the inner agent uses Anthropic. To use an OpenAI-compatible provider instead: + +```bash +AUTOBROWSE_PROVIDER=openai \ +OPENAI_API_KEY=sk-... \ +node scripts/evaluate.mjs --task my-portal --model gpt-4.1 +``` + +For OpenRouter, LiteLLM, or another Chat Completions-compatible gateway, set `OPENAI_BASE_URL`: + +```bash +AUTOBROWSE_PROVIDER=openai \ +OPENAI_API_KEY=sk-or-... \ +OPENAI_BASE_URL=https://openrouter.ai/api/v1 \ +node scripts/evaluate.mjs --task my-portal --model anthropic/claude-sonnet-4.5 +``` + ## Your project structure Create this in your working directory before running `/autobrowse`: @@ -80,7 +97,7 @@ Inspired by [Karpathy's autoresearch](https://github.com/karpathy/autoresearch) outer agent (Claude Code + /autobrowse skill) └── reads trace → improves strategy.md → repeats -inner agent (scripts/evaluate.mjs → Anthropic API) +inner agent (scripts/evaluate.mjs → Anthropic API or OpenAI-compatible Chat Completions) └── browse open → snapshot → click → snapshot → ... └── writes traces/ with summary, full trace, screenshots ``` diff --git a/skills/autobrowse/REFERENCE.md b/skills/autobrowse/REFERENCE.md index 9b13d8f..b328659 100644 --- a/skills/autobrowse/REFERENCE.md +++ b/skills/autobrowse/REFERENCE.md @@ -10,14 +10,22 @@ node ${CLAUDE_SKILL_DIR}/scripts/evaluate.mjs --task [options] |------|---------|-------------| | `--task ` | required | Task name — matches `tasks//` directory | | `--env local\|remote` | `local` | Browser environment | -| `--model ` | `claude-sonnet-4-6` | Claude model for the inner agent | +| `--provider anthropic\|openai` | `anthropic` | Model provider for the inner agent | +| `--model ` | provider-specific | Model for the inner agent (`claude-sonnet-4-6` for Anthropic, `gpt-4.1` for OpenAI-compatible) | | `--run-number N` | auto-increment | Force a specific run number | ## Environment variables | Variable | Required | Description | |----------|----------|-------------| -| `ANTHROPIC_API_KEY` | Yes | Claude API key | +| `AUTOBROWSE_PROVIDER` | No | `anthropic` or `openai`; same as `--provider` | +| `AUTOBROWSE_MODEL` | No | Default model when `--model` is omitted | +| `ANTHROPIC_API_KEY` | Anthropic only | Claude API key | +| `OPENAI_API_KEY` | OpenAI-compatible only | API key for OpenAI, OpenRouter, LiteLLM, etc. | +| `OPENAI_BASE_URL` | No | OpenAI-compatible `/v1` base URL; defaults to `https://api.openai.com/v1` | +| `OPENAI_ORGANIZATION` | No | Optional OpenAI organization header | +| `OPENAI_SITE_URL` | No | Optional `HTTP-Referer` header for gateways such as OpenRouter | +| `OPENAI_APP_NAME` | No | Optional `X-Title` header for gateways such as OpenRouter | | `BROWSERBASE_API_KEY` | Remote only | Browserbase API key | | `BROWSERBASE_PROJECT_ID` | Remote only | Browserbase project ID | @@ -29,7 +37,7 @@ Each run writes to `traces//run-NNN/`: |------|-------------| | `summary.md` | Duration, cost, turn-by-turn decision log, final output | | `trace.json` | Full tool call log — every command and response | -| `messages.json` | Raw Anthropic API message history | +| `messages.json` | Raw normalized message history | | `screenshots/` | Visual captures saved during the run | `traces//latest` is a symlink to the most recent run. @@ -41,6 +49,8 @@ Each run writes to `traces//run-NNN/`: | `claude-sonnet-4-6` | $$ | Default — good balance of speed and accuracy | | `claude-opus-4-6` | $$$$ | Hardest tasks, complex multi-step workflows | | `claude-haiku-4-5-20251001` | $ | Simple tasks, high-volume iteration | +| `gpt-4.1` | $$ | Default for OpenAI-compatible mode | +| `gpt-4.1-mini` | $ | Lower-cost OpenAI-compatible iteration | ## Skill lifecycle diff --git a/skills/autobrowse/SKILL.md b/skills/autobrowse/SKILL.md index 39d4f44..1e1d6aa 100644 --- a/skills/autobrowse/SKILL.md +++ b/skills/autobrowse/SKILL.md @@ -2,7 +2,7 @@ name: autobrowse description: Self-improving browser automation via the auto-research loop. Iteratively runs a browsing task, reads the trace, and improves the navigation skill (strategy.md) until it reliably passes. Supports parallel runs across multiple tasks using sub-agents. Use when you want to build or improve browser automation skills for specific website tasks. license: See LICENSE.txt -compatibility: "Requires Node.js 18+, browse CLI, and ANTHROPIC_API_KEY. Run from the autobrowse app directory." +compatibility: "Requires Node.js 18+, browse CLI, and either ANTHROPIC_API_KEY or AUTOBROWSE_PROVIDER=openai with OPENAI_API_KEY. Run from the autobrowse app directory." allowed-tools: Bash Read Write Edit Glob Grep Agent metadata: author: browserbase @@ -96,7 +96,8 @@ Check that `./autobrowse/tasks//task.md` exists (scaffold it from the temp ### Requirements -- `ANTHROPIC_API_KEY` must be in the environment (or in a `.env` file in CWD — `evaluate.mjs` auto-loads it). If missing, the harness prints a clear error and exits; don't hunt for keys in other paths. +- By default, `ANTHROPIC_API_KEY` must be in the environment (or in a `.env` file in CWD — `evaluate.mjs` auto-loads it). If missing, the harness prints a clear error and exits; don't hunt for keys in other paths. +- To use OpenAI-compatible Chat Completions instead, set `AUTOBROWSE_PROVIDER=openai` or pass `--provider openai`, then set `OPENAI_API_KEY`. For OpenRouter, LiteLLM, or another compatible gateway, also set `OPENAI_BASE_URL` to that provider's `/v1` endpoint. ### Run the inner agent @@ -104,6 +105,8 @@ Check that `./autobrowse/tasks//task.md` exists (scaffold it from the temp node ${CLAUDE_SKILL_DIR}/scripts/evaluate.mjs --task --workspace ./autobrowse # or for bot-protected sites: node ${CLAUDE_SKILL_DIR}/scripts/evaluate.mjs --task --workspace ./autobrowse --env remote +# or with an OpenAI-compatible provider: +AUTOBROWSE_PROVIDER=openai OPENAI_API_KEY=sk-... node ${CLAUDE_SKILL_DIR}/scripts/evaluate.mjs --task --workspace ./autobrowse --model gpt-4.1 ``` This runs the browser session and writes a full trace to `./autobrowse/traces//latest/`. diff --git a/skills/autobrowse/package.json b/skills/autobrowse/package.json index 8571791..d70555b 100644 --- a/skills/autobrowse/package.json +++ b/skills/autobrowse/package.json @@ -1,7 +1,7 @@ { "name": "autobrowse", "version": "0.1.0", - "description": "Self-improving browser agent via skill learning — autoresearch pattern + Browse CLI + Anthropic API", + "description": "Self-improving browser agent via skill learning — autoresearch pattern + Browse CLI + Anthropic/OpenAI-compatible APIs", "type": "module", "scripts": { "evaluate": "node scripts/evaluate.mjs" diff --git a/skills/autobrowse/scripts/evaluate.mjs b/skills/autobrowse/scripts/evaluate.mjs index bf42758..d6f3522 100644 --- a/skills/autobrowse/scripts/evaluate.mjs +++ b/skills/autobrowse/scripts/evaluate.mjs @@ -3,11 +3,14 @@ /** * evaluate.mjs — Inner agent harness. * - * Runs a browsing agent using the raw Anthropic API with a single `execute` - * tool. The agent calls browse CLI commands to navigate websites. Full trace - * is captured incrementally and written to disk. + * Runs a browsing agent with a single `execute` tool. The agent calls browse CLI + * commands to navigate websites. Full trace is captured incrementally and + * written to disk. * - * Usage: node scripts/evaluate.mjs --task [--workspace ] [--env local|remote] [--model ] [--run-number N] + * Supports Anthropic by default, plus OpenAI-compatible Chat Completions + * providers via --provider openai (OpenAI, OpenRouter, LiteLLM, etc.). + * + * Usage: node scripts/evaluate.mjs --task [--workspace ] [--env local|remote] [--provider anthropic|openai] [--model ] [--run-number N] */ import "dotenv/config"; @@ -22,7 +25,9 @@ const SKILL_DIR = path.resolve(__dirname, ".."); // ── Config ───────────────────────────────────────────────────────── -const DEFAULT_MODEL = "claude-sonnet-4-6"; +const DEFAULT_PROVIDER = "anthropic"; +const DEFAULT_ANTHROPIC_MODEL = "claude-sonnet-4-6"; +const DEFAULT_OPENAI_MODEL = "gpt-4.1"; const MAX_TURNS = 30; const MAX_TOKENS = 4096; const EXEC_TIMEOUT_MS = 30_000; @@ -71,6 +76,15 @@ function getArg(name, fallback) { return fallback; } +function getProvider() { + return getArg("provider", process.env.AUTOBROWSE_PROVIDER ?? DEFAULT_PROVIDER).toLowerCase(); +} + +function getModel(provider) { + const fallback = process.env.AUTOBROWSE_MODEL ?? (provider === "openai" ? DEFAULT_OPENAI_MODEL : DEFAULT_ANTHROPIC_MODEL); + return getArg("model", fallback); +} + function showHelp() { console.log(`evaluate.mjs — Inner agent harness for autobrowse skill @@ -80,12 +94,20 @@ Options: --task Task name — matches tasks// directory (required) --workspace Workspace root holding tasks/ and traces/ (default: ./autobrowse) --env local|remote Browser environment (default: local) - --model Claude model for the inner agent (default: ${DEFAULT_MODEL}) + --provider Model provider: anthropic or openai (default: ${DEFAULT_PROVIDER}) + --model Inner-agent model (default: ${DEFAULT_ANTHROPIC_MODEL} for Anthropic, ${DEFAULT_OPENAI_MODEL} for OpenAI-compatible) --run-number N Force a specific run number (default: auto-increment) --help Show this help message Environment variables: - ANTHROPIC_API_KEY Required — Claude API key + AUTOBROWSE_PROVIDER Optional — anthropic or openai + AUTOBROWSE_MODEL Optional — default model when --model is omitted + ANTHROPIC_API_KEY Required for Anthropic provider + OPENAI_API_KEY Required for OpenAI-compatible provider + OPENAI_BASE_URL Optional for OpenAI-compatible provider (default: https://api.openai.com/v1) + OPENAI_ORGANIZATION Optional for OpenAI-compatible provider + OPENAI_SITE_URL Optional HTTP-Referer header for gateways such as OpenRouter + OPENAI_APP_NAME Optional X-Title header for gateways such as OpenRouter BROWSERBASE_API_KEY Required for --env remote BROWSERBASE_PROJECT_ID Required for --env remote @@ -98,7 +120,9 @@ Output: Examples: node scripts/evaluate.mjs --task google-flights node scripts/evaluate.mjs --task my-portal --env remote - node scripts/evaluate.mjs --task checkout --model claude-opus-4-6`); + node scripts/evaluate.mjs --task checkout --model claude-opus-4-6 + AUTOBROWSE_PROVIDER=openai OPENAI_API_KEY=sk-... node scripts/evaluate.mjs --task checkout --model gpt-4.1 + AUTOBROWSE_PROVIDER=openai OPENAI_BASE_URL=https://openrouter.ai/api/v1 OPENAI_API_KEY=sk-or-... node scripts/evaluate.mjs --task checkout --model anthropic/claude-sonnet-4.5`); process.exit(0); } @@ -132,18 +156,207 @@ function getTaskName(workspace) { return task; } -function ensureApiKey() { - if (!process.env.ANTHROPIC_API_KEY) { - console.error("ERROR: ANTHROPIC_API_KEY is not set."); - console.error(""); - console.error("Set it one of these ways:"); - console.error(" 1. export ANTHROPIC_API_KEY=sk-ant-..."); - console.error(" 2. Create a .env file in the current directory with:"); - console.error(" ANTHROPIC_API_KEY=sk-ant-..."); - console.error(""); - console.error("Get a key at https://console.anthropic.com/settings/keys"); - process.exit(1); +function ensureApiKey(provider) { + if (provider === "anthropic") { + if (!process.env.ANTHROPIC_API_KEY) { + console.error("ERROR: ANTHROPIC_API_KEY is not set."); + console.error(""); + console.error("Set it one of these ways:"); + console.error(" 1. export ANTHROPIC_API_KEY=sk-ant-..."); + console.error(" 2. Create a .env file in the current directory with:"); + console.error(" ANTHROPIC_API_KEY=sk-ant-..."); + console.error(""); + console.error("Get a key at https://console.anthropic.com/settings/keys"); + process.exit(1); + } + return; + } + + if (provider === "openai") { + if (!process.env.OPENAI_API_KEY) { + console.error("ERROR: OPENAI_API_KEY is not set."); + console.error(""); + console.error("Set it one of these ways:"); + console.error(" 1. export OPENAI_API_KEY=sk-..."); + console.error(" 2. Create a .env file in the current directory with:"); + console.error(" AUTOBROWSE_PROVIDER=openai"); + console.error(" OPENAI_API_KEY=sk-..."); + console.error(" # optional for OpenRouter, LiteLLM, etc."); + console.error(" OPENAI_BASE_URL=https://openrouter.ai/api/v1"); + process.exit(1); + } + return; + } + + console.error(`ERROR: unsupported provider "${provider}". Use anthropic or openai.`); + process.exit(1); +} + +// ── Provider adapters ─────────────────────────────────────────────── + +function createClient(provider) { + if (provider === "anthropic") { + return new Anthropic(); + } + return { + apiKey: process.env.OPENAI_API_KEY, + baseUrl: (process.env.OPENAI_BASE_URL ?? "https://api.openai.com/v1").replace(/\/$/, ""), + organization: process.env.OPENAI_ORGANIZATION, + }; +} + +function toOpenAITools(tools) { + return tools.map(tool => ({ + type: "function", + function: { + name: tool.name, + description: tool.description, + parameters: tool.input_schema, + }, + })); +} + +function toOpenAIMessages(systemPrompt, messages) { + const converted = [{ role: "system", content: systemPrompt }]; + + for (const message of messages) { + if (message.role === "user" && typeof message.content === "string") { + converted.push({ role: "user", content: message.content }); + continue; + } + + if (message.role === "assistant" && Array.isArray(message.content)) { + const text = []; + const toolCalls = []; + for (const block of message.content) { + if (block.type === "text" && block.text) { + text.push(block.text); + } + if (block.type === "tool_use") { + toolCalls.push({ + id: block.id, + type: "function", + function: { + name: block.name, + arguments: JSON.stringify(block.input ?? {}), + }, + }); + } + } + converted.push({ + role: "assistant", + content: text.join("\n") || null, + ...(toolCalls.length ? { tool_calls: toolCalls } : {}), + }); + continue; + } + + if (message.role === "user" && Array.isArray(message.content)) { + for (const block of message.content) { + if (block.type === "tool_result") { + converted.push({ + role: "tool", + tool_call_id: block.tool_use_id, + content: block.content ?? "", + }); + } + } + continue; + } + + converted.push({ role: message.role, content: String(message.content ?? "") }); + } + + return converted; +} + +function normalizeOpenAIResponse(data) { + const choice = data.choices?.[0]; + if (!choice) { + throw new Error(`OpenAI-compatible response had no choices: ${JSON.stringify(data).slice(0, 500)}`); + } + + const message = choice.message ?? {}; + const content = []; + if (message.content) { + content.push({ type: "text", text: message.content }); + } + for (const toolCall of message.tool_calls ?? []) { + let input = {}; + const rawArgs = toolCall.function?.arguments ?? "{}"; + try { + input = JSON.parse(rawArgs || "{}"); + } catch { + input = { command: rawArgs }; + } + content.push({ + type: "tool_use", + id: toolCall.id, + name: toolCall.function?.name, + input, + }); + } + + const finishReason = choice.finish_reason; + const usage = data.usage ?? {}; + return { + content, + stop_reason: finishReason === "tool_calls" ? "tool_use" : "end_turn", + usage: { + input_tokens: usage.prompt_tokens ?? 0, + output_tokens: usage.completion_tokens ?? 0, + }, + }; +} + +async function callModel({ provider, client, model, systemPrompt, messages }) { + if (provider === "anthropic") { + return client.messages.create({ + model, + max_tokens: MAX_TOKENS, + system: systemPrompt, + tools: TOOLS, + messages, + }); + } + + const headers = { + "Authorization": `Bearer ${client.apiKey}`, + "Content-Type": "application/json", + }; + if (client.organization) { + headers["OpenAI-Organization"] = client.organization; } + if (process.env.OPENAI_SITE_URL) { + headers["HTTP-Referer"] = process.env.OPENAI_SITE_URL; + } + if (process.env.OPENAI_APP_NAME) { + headers["X-Title"] = process.env.OPENAI_APP_NAME; + } + + const response = await fetch(`${client.baseUrl}/chat/completions`, { + method: "POST", + headers, + body: JSON.stringify({ + model, + max_tokens: MAX_TOKENS, + messages: toOpenAIMessages(systemPrompt, messages), + tools: toOpenAITools(TOOLS), + tool_choice: "auto", + }), + }); + + const text = await response.text(); + let data; + try { + data = JSON.parse(text); + } catch { + throw new Error(`OpenAI-compatible API returned non-JSON (${response.status}): ${text.slice(0, 500)}`); + } + if (!response.ok) { + throw new Error(`OpenAI-compatible API error ${response.status}: ${JSON.stringify(data).slice(0, 1000)}`); + } + return normalizeOpenAIResponse(data); } // ── Helpers ───────────────────────────────────────────────────────── @@ -370,9 +583,10 @@ ${strategy} async function main() { const workspace = resolveWorkspace(); const taskName = getTaskName(workspace); - ensureApiKey(); + const provider = getProvider(); + ensureApiKey(provider); - const model = getArg("model", DEFAULT_MODEL); + const model = getModel(provider); const taskDir = path.join(workspace, "tasks", taskName); const tracesDir = path.join(workspace, "traces", taskName); @@ -391,7 +605,7 @@ async function main() { } const browseEnv = getArg("env", "local"); - const client = new Anthropic(); + const client = createClient(provider); const runNumber = getNextRunNumber(tracesDir); const runId = `run-${String(runNumber).padStart(3, "0")}`; const traceDir = path.join(tracesDir, runId); @@ -405,7 +619,7 @@ async function main() { console.error(`\n${"=".repeat(60)}`); console.error(` AUTOBROWSE — ${taskName} — Run ${runNumber}`); console.error(`${"=".repeat(60)}`); - console.error(`Model: ${model} | Env: ${browseEnv} | Max turns: ${MAX_TURNS} | Trace: ${traceDir}\n`); + console.error(`Provider: ${provider} | Model: ${model} | Env: ${browseEnv} | Max turns: ${MAX_TURNS} | Trace: ${traceDir}\n`); const trace = []; const messages = [ @@ -423,13 +637,7 @@ async function main() { while (turn < MAX_TURNS) { turn++; - const response = await client.messages.create({ - model, - max_tokens: MAX_TOKENS, - system: systemPrompt, - tools: TOOLS, - messages, - }); + const response = await callModel({ provider, client, model, systemPrompt, messages }); totalInputTokens += response.usage.input_tokens; totalOutputTokens += response.usage.output_tokens; @@ -536,6 +744,8 @@ async function main() { "claude-opus-4-6": [5, 25], "claude-sonnet-4-6": [3, 15], "claude-haiku-4-5-20251001": [1, 5], + "gpt-4.1": [2, 8], + "gpt-4.1-mini": [0.4, 1.6], }; const [inputRate, outputRate] = pricing[model] ?? [3, 15]; const costUsd = (totalInputTokens * inputRate + totalOutputTokens * outputRate) / 1_000_000; @@ -544,6 +754,7 @@ async function main() { `# ${taskName} — Run ${runId} Summary`, "", `**Status:** ${runStatus}${finalStopReason ? ` (${finalStopReason})` : ""}`, + `**Provider:** ${provider} | **Model:** ${model}`, `**Duration:** ${durationSec.toFixed(1)}s | **Turns:** ${turn} | **Cost:** ~$${costUsd.toFixed(2)}`, `**Tokens:** ${totalInputTokens.toLocaleString()} in / ${totalOutputTokens.toLocaleString()} out`, "", @@ -602,6 +813,8 @@ async function main() { run: runId, status: runStatus, stop_reason: finalStopReason ?? (runStatus === "max_turns" ? "max_turns" : null), + provider, + model, duration_sec: parseFloat(durationSec.toFixed(1)), cost_usd: parseFloat(costUsd.toFixed(2)), turns: turn,