diff --git a/.github/workflows/endpoint-audit.yml b/.github/workflows/endpoint-audit.yml new file mode 100644 index 0000000..ffefa64 --- /dev/null +++ b/.github/workflows/endpoint-audit.yml @@ -0,0 +1,143 @@ +name: Endpoint Audit + +on: + push: + branches: [ '**' ] + pull_request: + +# Needed so the audit can post its report as a comment on the PR. +permissions: + contents: read + pull-requests: write + +jobs: + # Verify the client doesn't call endpoints the agent-server no longer exposes. + # The audit diffs the client's HTTP surface against the live server's OpenAPI + # spec. See scripts/endpoint-audit.mjs. + endpoint-audit: + runs-on: ubuntu-latest + timeout-minutes: 15 + env: + AGENT_SERVER_PORT: 8010 + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Use Node.js 20.x + uses: actions/setup-node@v4 + with: + node-version: 20.x + cache: 'npm' + + - name: Install dependencies + run: npm ci + + # Single source of truth: package.json's config.agentServerImage. + - name: Resolve agent-server image + run: echo "AGENT_SERVER_IMAGE=$(node -p "require('./package.json').config.agentServerImage")" >> "$GITHUB_ENV" + + - name: Start agent-server (ground-truth OpenAPI spec) + run: | + docker pull "${AGENT_SERVER_IMAGE}" + docker run -d --name agent-server -p ${AGENT_SERVER_PORT}:8000 \ + -e LOG_JSON=true "${AGENT_SERVER_IMAGE}" + for i in $(seq 1 30); do + curl -sf http://localhost:${AGENT_SERVER_PORT}/health && break + echo "waiting for agent-server ($i)..."; sleep 2 + done + + - name: Endpoint audit (gates on client/server mismatch) + run: npm run audit:endpoints + + - name: Upload audit report + if: always() + uses: actions/upload-artifact@v4 + with: + name: endpoint-audit + path: .audit/ + # .audit/ is a dot-directory; upload-artifact@v4 skips hidden paths by default. + include-hidden-files: true + + # Post the report as a single, self-updating comment on the PR. Runs even + # when the audit gate fails (the report is written before the gate), so + # reviewers see the mismatches that failed the check. + - name: Comment audit report on PR + if: always() && github.event_name == 'pull_request' + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const reportPath = '.audit/endpoint-audit.json'; + if (!fs.existsSync(reportPath)) { + core.warning(`No audit report at ${reportPath}; skipping PR comment.`); + return; + } + const r = JSON.parse(fs.readFileSync(reportPath, 'utf8')); + const list = (items) => + items && items.length ? items.map((i) => `- \`${i}\``).join('\n') : '_none_'; + const marker = ''; + const NO_BACKEND = '(no known backend)'; + const offContract = r.mismatch || []; + const byBackend = r.byBackend || {}; + const status = offContract.length + ? `❌ ${offContract.length} off-contract call(s) — not on the agent-server` + : '✅ All client calls are on the agent-server'; + const backendSections = Object.keys(byBackend) + .sort() + .map((name) => { + const heading = + name === NO_BACKEND + ? `#### ⛔ ${name} — served by no backend we can see (${byBackend[name].length})` + : `#### ↗️ served by \`${name}\` (${byBackend[name].length})`; + return [heading, '', list(byBackend[name]), ''].join('\n'); + }) + .join('\n'); + const classifiers = + r.classifiers && r.classifiers.length ? ` · classifiers: ${r.classifiers.join(', ')}` : ''; + const summary = [ + '| Category | Count |', + '| --- | ---: |', + `| ❌ Off-contract (not on agent-server) | ${offContract.length} |`, + ...Object.keys(byBackend) + .sort() + .map((name) => + name === NO_BACKEND + ? `|   ⛔ no known backend | ${byBackend[name].length} |` + : `|   ↗️ served by \`${name}\` | ${byBackend[name].length} |` + ), + `| ➕ Missing API (agent-server has, client lacks) | ${(r.missingApi || []).length} |`, + `| agent-server endpoints | ${r.agentServer} |`, + `| client endpoints | ${r.client} |`, + ].join('\n'); + const body = [ + marker, + '## Endpoint audit', + '', + `**${status}**${classifiers}`, + '', + summary, + '', + `### ❌ Not on agent-server (gated, ${offContract.length})`, + '', + offContract.length ? backendSections : '_none_', + `### ➕ Missing API — agent-server exposes it, client does not implement (${(r.missingApi || []).length})`, + '', + list(r.missingApi), + ].join('\n'); + const { owner, repo } = context.repo; + const issue_number = context.issue.number; + const comments = await github.paginate(github.rest.issues.listComments, { + owner, + repo, + issue_number, + }); + const existing = comments.find((c) => c.body && c.body.includes(marker)); + if (existing) { + await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body }); + } else { + await github.rest.issues.createComment({ owner, repo, issue_number, body }); + } + + - name: Stop agent-server + if: always() + run: docker rm -f agent-server || true diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index e3b0541..e16ab31 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -16,7 +16,6 @@ env: AGENT_SERVER_PORT: 8010 HOST_WORKSPACE_DIR: /tmp/agent-workspace AGENT_WORKSPACE_DIR: /workspace - AGENT_SERVER_IMAGE: ghcr.io/openhands/agent-server:1.29.0-python jobs: integration-test: @@ -36,6 +35,10 @@ jobs: - name: Install dependencies run: npm ci + # Single source of truth: package.json's config.agentServerImage. + - name: Resolve agent-server image + run: echo "AGENT_SERVER_IMAGE=$(node -p "require('./package.json').config.agentServerImage")" >> "$GITHUB_ENV" + - name: Build package run: npm run build @@ -162,6 +165,10 @@ jobs: - name: Install dependencies run: npm ci + # Single source of truth: package.json's config.agentServerImage. + - name: Resolve agent-server image + run: echo "AGENT_SERVER_IMAGE=$(node -p "require('./package.json').config.agentServerImage")" >> "$GITHUB_ENV" + - name: Build package run: npm run build diff --git a/.gitignore b/.gitignore index cf3977d..e5e4da9 100644 --- a/.gitignore +++ b/.gitignore @@ -93,3 +93,6 @@ jspm_packages/ # TernJS port file .tern-port + +# endpoint-audit runtime coverage logs +.audit/ diff --git a/AGENTS.md b/AGENTS.md index 4321006..8b84bf8 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -304,7 +304,8 @@ Integration tests are in `src/__tests__/integration/` and require a running agen export LLM_API_KEY="your-api-key" export LLM_MODEL="anthropic/claude-sonnet-4-5-20250929" -# Start agent-server in Docker (software-agent-sdk v1.29.0) +# Start agent-server in Docker (software-agent-sdk v1.29.0; +# canonical pin: package.json -> config.agentServerImage) docker run -d --name agent-server -p 8010:8000 \ -v /tmp/agent-workspace:/workspace \ ghcr.io/openhands/agent-server:1.29.0-python @@ -336,7 +337,7 @@ Required GitHub secrets: ### CI Image Version -- The integration workflow pins `ghcr.io/openhands/agent-server:1.29.0-python`, which corresponds to the `software-agent-sdk` release `v1.29.0`. +- The agent-server image is defined **once** in `package.json` under `config.agentServerImage`. The `integration-tests.yml` and `endpoint-audit.yml` workflows read it from there at runtime, so bump the version in that single place (and the local-setup snippets above). - Keep the TypeScript client tests strict against that released server image rather than adding compatibility fallbacks for older prerelease builds. ## Agent Behavior Guidelines diff --git a/README.md b/README.md index 17eafc6..fea9fb7 100644 --- a/README.md +++ b/README.md @@ -377,7 +377,8 @@ Integration tests require a running agent-server in Docker with a mounted worksp chmod 777 /tmp/agent-workspace ``` -2. Start the agent-server container (software-agent-sdk v1.29.0): +2. Start the agent-server container (software-agent-sdk v1.29.0; the canonical + image pin lives in `package.json` under `config.agentServerImage`): ```bash docker run -d \ diff --git a/endpoint-audit.config.json b/endpoint-audit.config.json new file mode 100644 index 0000000..e8d70e4 --- /dev/null +++ b/endpoint-audit.config.json @@ -0,0 +1,23 @@ +{ + "_comment": "Endpoint audit config. The client is GATED against the agent-server only: any client call not on the agent-server fails the gate. Additional `classify` specs are fetched live purely to label each gated call with the backend that actually serves it (cloud) vs no known backend at all; if a classify spec is unreachable its labels just degrade to 'no known backend'. See scripts/endpoint-audit.mjs.", + "specs": [ + { + "name": "agent-server", + "role": "gate", + "url": "http://localhost:8010/openapi.json" + }, + { + "name": "cloud", + "role": "classify", + "url": "https://app.all-hands.dev/openapi.json" + } + ], + "clientGlobs": ["src/client", "src/conversation", "src/workspace"], + + "ignoreServerOnly": ["/v1/"], + + "gate": { + "mismatch": true, + "missingApi": false + } +} diff --git a/package.json b/package.json index 1649d11..e02e4bd 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,9 @@ } } }, + "config": { + "agentServerImage": "ghcr.io/openhands/agent-server:1.29.0-python" + }, "scripts": { "build": "tsc && node ./scripts/copy-json-assets.mjs && node ./scripts/rewrite-relative-imports.mjs", "lint": "eslint src/**/*.ts", @@ -56,6 +59,7 @@ "test:integration:llm": "jest --config jest.integration.config.cjs --runInBand --testPathIgnorePatterns=\"deterministic-api.integration.test.ts|bash.integration.test.ts\"", "test:git-dependency": "node ./scripts/smoke-test-git-dependency.mjs", "test:all": "npm run test && npm run test:integration && npm run test:git-dependency", + "audit:endpoints": "node ./scripts/endpoint-audit.mjs", "dev": "tsc --watch", "clean": "rimraf dist", "prepare": "npm run clean && npm run build", diff --git a/scripts/endpoint-audit.mjs b/scripts/endpoint-audit.mjs new file mode 100644 index 0000000..3823104 --- /dev/null +++ b/scripts/endpoint-audit.mjs @@ -0,0 +1,185 @@ +#!/usr/bin/env node +/** + * Endpoint audit — reconciles the client's HTTP surface against the + * agent-server OpenAPI spec: + * + * - The client is GATED against the agent-server spec only. Any endpoint the + * client calls that the agent-server does NOT expose fails the gate — this + * is an agent-server client, so off-contract calls are surfaced as errors + * rather than silently allowlisted. + * - To explain WHERE each off-contract call actually goes, extra `classify` + * specs (e.g. the cloud app at app.all-hands.dev) are loaded and used only + * to label each gated endpoint with the backend that serves it, or to mark + * it as served by NO known backend (genuinely unsupported). + * - missing API — agent-server exposes an endpoint the client does NOT + * implement (informational, not gated). + * + * Specs are fetched live from each backend (preferred) or read from a committed + * fallback file. A `gate` spec with neither is a hard error; a `classify` spec + * that is unavailable is skipped (its endpoints just go unlabeled). + * + * Exit code is non-zero on gating violations (see config.gate). + */ +import fs from 'node:fs'; +import path from 'node:path'; + +const ROOT = process.cwd(); +const cfg = JSON.parse(fs.readFileSync(path.join(ROOT, 'endpoint-audit.config.json'), 'utf8')); + +const VERBS = ['get', 'post', 'put', 'patch', 'delete']; + +// "/api/x/${id}" or "/api/x/{name}" -> "/api/x/{}", strip query + trailing slash +const norm = (verb, p) => + `${verb.toUpperCase()} ${ + p + .split('?')[0] + .replace(/\$?\{[^{}]*\}/g, '{}') + .replace(/\/+$/, '') || '/' + }`; + +// --------------------------------------------------------------------------- +// 1. Backend specs: agent-server (gate) + others (classify) +// --------------------------------------------------------------------------- +async function loadSpec(spec) { + if (spec.url) { + try { + const res = await fetch(spec.url, { signal: AbortSignal.timeout(8000) }); + if (res.ok) return await res.json(); + console.warn(` ! ${spec.name}: ${spec.url} -> HTTP ${res.status}, falling back to file`); + } catch (e) { + console.warn(` ! ${spec.name}: ${spec.url} unreachable (${e.message}), falling back to file`); + } + } + if (spec.file && fs.existsSync(path.join(ROOT, spec.file))) + return JSON.parse(fs.readFileSync(path.join(ROOT, spec.file), 'utf8')); + if (spec.role === 'classify') { + console.warn(` ! ${spec.name}: no reachable url and no fallback file — skipping (classify only)`); + return null; + } + throw new Error(`spec "${spec.name}": no reachable url and no fallback file`); +} + +const specToSet = (doc) => { + const set = new Set(); // normalized "VERB /path" + for (const [p, methods] of Object.entries(doc.paths ?? {})) + for (const verb of Object.keys(methods)) if (VERBS.includes(verb)) set.add(norm(verb, p)); + return set; +}; + +const gate = new Set(); // union of all `gate` specs (the agent-server contract) +const classifiers = []; // [{ name, set }] for labelling off-contract calls +for (const spec of cfg.specs) { + const doc = await loadSpec(spec); + if (!doc) continue; + const set = specToSet(doc); + if (spec.role === 'classify') classifiers.push({ name: spec.name, set }); + else for (const k of set) gate.add(k); +} + +// --------------------------------------------------------------------------- +// 2. Client surface: static extraction of every endpoint the client can call +// --------------------------------------------------------------------------- +function walk(dir) { + const out = []; + for (const e of fs.readdirSync(dir, { withFileTypes: true })) { + const full = path.join(dir, e.name); + if (e.isDirectory()) { + if (e.name === '__tests__' || e.name === 'node_modules') continue; + out.push(...walk(full)); + } else if (e.name.endsWith('.ts')) out.push(full); + } + return out; +} + +const VERB_CALL = /\.(get|post|put|patch|delete)\s*(<[^>]*>)?\(/; +const PATH_LIT = /[`'"](\/(?:api|server_info|alive|health|ready)[^`'"]*)[`'"]/; + +const client = new Set(); +for (const glob of cfg.clientGlobs) { + for (const file of walk(path.join(ROOT, glob))) { + const lines = fs.readFileSync(file, 'utf8').split('\n'); + lines.forEach((ln, i) => { + // direct `.get/post/...(` calls, plus generic `.request({ method, url })` + let verb = ln.match(VERB_CALL)?.[1]; + let win = 3; + if (!verb && /\.request\s*\(/.test(ln)) { + const block = lines.slice(i, i + 6).join('\n'); + verb = block.match(/method:\s*['"`](GET|POST|PUT|PATCH|DELETE)['"`]/i)?.[1]; + win = 6; + } + if (!verb) return; + const p = lines + .slice(i, i + win) + .join('\n') + .match(PATH_LIT)?.[1]; + if (p) client.add(norm(verb, p)); + }); + } +} + +// --------------------------------------------------------------------------- +// 3. Diff + classify +// --------------------------------------------------------------------------- +const sorted = (it) => [...it].sort(); +const ignoredApi = (k) => (cfg.ignoreServerOnly ?? []).some((pre) => k.includes(pre)); + +const NO_BACKEND = '(no known backend)'; +const classify = (k) => classifiers.find((c) => c.set.has(k))?.name ?? NO_BACKEND; + +// Every client call the agent-server does NOT expose is off-contract -> gated. +const mismatch = sorted([...client].filter((k) => !gate.has(k))); +const byBackend = {}; // backend name -> [endpoints] +for (const k of mismatch) (byBackend[classify(k)] ??= []).push(k); + +const missingApi = sorted([...gate].filter((k) => !client.has(k) && !ignoredApi(k))); + +// --------------------------------------------------------------------------- +// 4. Report +// --------------------------------------------------------------------------- +const section = (title, items) => { + console.log(`\n${title} (${items.length})`); + for (const k of items) console.log(` ${k}`); +}; +console.log( + `\n=== Endpoint audit — agent-server=${gate.size} client=${client.size}` + + ` classifiers=[${classifiers.map((c) => c.name).join(', ') || 'none'}] ===` +); +console.log(`\n❌ NOT ON AGENT-SERVER — off-contract client calls (${mismatch.length})`); +for (const name of Object.keys(byBackend).sort()) { + const tag = name === NO_BACKEND ? `${name} ⛔` : `served by: ${name}`; + console.log(` ${tag} (${byBackend[name].length})`); + for (const k of byBackend[name]) console.log(` ${k}`); +} +section('➕ MISSING API — agent-server has it, client does not implement', missingApi); + +fs.mkdirSync(path.join(ROOT, '.audit'), { recursive: true }); +fs.writeFileSync( + path.join(ROOT, '.audit/endpoint-audit.json'), + JSON.stringify( + { + agentServer: gate.size, + client: client.size, + classifiers: classifiers.map((c) => c.name), + mismatch, + byBackend, + missingApi, + }, + null, + 2 + ) +); + +// --------------------------------------------------------------------------- +// 5. Gate +// --------------------------------------------------------------------------- +const g = cfg.gate ?? {}; +const violations = []; +if (g.mismatch !== false && mismatch.length) + violations.push(`${mismatch.length} off-contract (not on agent-server)`); +if (g.missingApi && missingApi.length) violations.push(`${missingApi.length} missing API`); + +if (violations.length) { + console.error(`\n❌ endpoint audit failed: ${violations.join(', ')}`); + process.exit(1); +} +console.log('\n✅ endpoint audit passed');