Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions .github/workflows/endpoint-audit.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
name: Endpoint Audit

on:
push:
branches: [ '**' ]
pull_request:

# Needed so the audit can post its report as a comment on the PR.
permissions:
contents: read
pull-requests: write

jobs:
# Verify the client doesn't call endpoints the agent-server no longer exposes.
# The audit diffs the client's HTTP surface against the live server's OpenAPI
# spec. See scripts/endpoint-audit.mjs.
endpoint-audit:
runs-on: ubuntu-latest
timeout-minutes: 15
env:
AGENT_SERVER_PORT: 8010
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Use Node.js 20.x
uses: actions/setup-node@v4
with:
node-version: 20.x
cache: 'npm'

- name: Install dependencies
run: npm ci

# Single source of truth: package.json's config.agentServerImage.
- name: Resolve agent-server image
run: echo "AGENT_SERVER_IMAGE=$(node -p "require('./package.json').config.agentServerImage")" >> "$GITHUB_ENV"

- name: Start agent-server (ground-truth OpenAPI spec)
run: |
docker pull "${AGENT_SERVER_IMAGE}"
docker run -d --name agent-server -p ${AGENT_SERVER_PORT}:8000 \
-e LOG_JSON=true "${AGENT_SERVER_IMAGE}"
for i in $(seq 1 30); do
curl -sf http://localhost:${AGENT_SERVER_PORT}/health && break
echo "waiting for agent-server ($i)..."; sleep 2
done

- name: Endpoint audit (gates on client/server mismatch)
run: npm run audit:endpoints

- name: Upload audit report
if: always()
uses: actions/upload-artifact@v4
with:
name: endpoint-audit
path: .audit/
# .audit/ is a dot-directory; upload-artifact@v4 skips hidden paths by default.
include-hidden-files: true

# Post the report as a single, self-updating comment on the PR. Runs even
# when the audit gate fails (the report is written before the gate), so
# reviewers see the mismatches that failed the check.
- name: Comment audit report on PR
if: always() && github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const reportPath = '.audit/endpoint-audit.json';
if (!fs.existsSync(reportPath)) {
core.warning(`No audit report at ${reportPath}; skipping PR comment.`);
return;
}
const r = JSON.parse(fs.readFileSync(reportPath, 'utf8'));
const list = (items) =>
items && items.length ? items.map((i) => `- \`${i}\``).join('\n') : '_none_';
const marker = '<!-- endpoint-audit-report -->';
const NO_BACKEND = '(no known backend)';
const offContract = r.mismatch || [];
const byBackend = r.byBackend || {};
const status = offContract.length
? `❌ ${offContract.length} off-contract call(s) — not on the agent-server`
: '✅ All client calls are on the agent-server';
const backendSections = Object.keys(byBackend)
.sort()
.map((name) => {
const heading =
name === NO_BACKEND
? `#### ⛔ ${name} — served by no backend we can see (${byBackend[name].length})`
: `#### ↗️ served by \`${name}\` (${byBackend[name].length})`;
return [heading, '', list(byBackend[name]), ''].join('\n');
})
.join('\n');
const classifiers =
r.classifiers && r.classifiers.length ? ` · classifiers: ${r.classifiers.join(', ')}` : '';
const summary = [
'| Category | Count |',
'| --- | ---: |',
`| ❌ Off-contract (not on agent-server) | ${offContract.length} |`,
...Object.keys(byBackend)
.sort()
.map((name) =>
name === NO_BACKEND
? `| &nbsp;&nbsp;⛔ no known backend | ${byBackend[name].length} |`
: `| &nbsp;&nbsp;↗️ served by \`${name}\` | ${byBackend[name].length} |`
),
`| ➕ Missing API (agent-server has, client lacks) | ${(r.missingApi || []).length} |`,
`| agent-server endpoints | ${r.agentServer} |`,
`| client endpoints | ${r.client} |`,
].join('\n');
const body = [
marker,
'## Endpoint audit',
'',
`**${status}**${classifiers}`,
'',
summary,
'',
`### ❌ Not on agent-server (gated, ${offContract.length})`,
'',
offContract.length ? backendSections : '_none_',
`### ➕ Missing API — agent-server exposes it, client does not implement (${(r.missingApi || []).length})`,
'',
list(r.missingApi),
].join('\n');
const { owner, repo } = context.repo;
const issue_number = context.issue.number;
const comments = await github.paginate(github.rest.issues.listComments, {
owner,
repo,
issue_number,
});
const existing = comments.find((c) => c.body && c.body.includes(marker));
if (existing) {
await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body });
} else {
await github.rest.issues.createComment({ owner, repo, issue_number, body });
}

- name: Stop agent-server
if: always()
run: docker rm -f agent-server || true
9 changes: 8 additions & 1 deletion .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ env:
AGENT_SERVER_PORT: 8010
HOST_WORKSPACE_DIR: /tmp/agent-workspace
AGENT_WORKSPACE_DIR: /workspace
AGENT_SERVER_IMAGE: ghcr.io/openhands/agent-server:1.29.0-python

jobs:
integration-test:
Expand All @@ -36,6 +35,10 @@ jobs:
- name: Install dependencies
run: npm ci

# Single source of truth: package.json's config.agentServerImage.
- name: Resolve agent-server image
run: echo "AGENT_SERVER_IMAGE=$(node -p "require('./package.json').config.agentServerImage")" >> "$GITHUB_ENV"

- name: Build package
run: npm run build

Expand Down Expand Up @@ -162,6 +165,10 @@ jobs:
- name: Install dependencies
run: npm ci

# Single source of truth: package.json's config.agentServerImage.
- name: Resolve agent-server image
run: echo "AGENT_SERVER_IMAGE=$(node -p "require('./package.json').config.agentServerImage")" >> "$GITHUB_ENV"

- name: Build package
run: npm run build

Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,6 @@ jspm_packages/

# TernJS port file
.tern-port

# endpoint-audit runtime coverage logs
.audit/
5 changes: 3 additions & 2 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,8 @@ Integration tests are in `src/__tests__/integration/` and require a running agen
export LLM_API_KEY="your-api-key"
export LLM_MODEL="anthropic/claude-sonnet-4-5-20250929"

# Start agent-server in Docker (software-agent-sdk v1.29.0)
# Start agent-server in Docker (software-agent-sdk v1.29.0;
# canonical pin: package.json -> config.agentServerImage)
docker run -d --name agent-server -p 8010:8000 \
-v /tmp/agent-workspace:/workspace \
ghcr.io/openhands/agent-server:1.29.0-python
Expand Down Expand Up @@ -336,7 +337,7 @@ Required GitHub secrets:

### CI Image Version

- The integration workflow pins `ghcr.io/openhands/agent-server:1.29.0-python`, which corresponds to the `software-agent-sdk` release `v1.29.0`.
- The agent-server image is defined **once** in `package.json` under `config.agentServerImage`. The `integration-tests.yml` and `endpoint-audit.yml` workflows read it from there at runtime, so bump the version in that single place (and the local-setup snippets above).
- Keep the TypeScript client tests strict against that released server image rather than adding compatibility fallbacks for older prerelease builds.

## Agent Behavior Guidelines
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,8 @@ Integration tests require a running agent-server in Docker with a mounted worksp
chmod 777 /tmp/agent-workspace
```

2. Start the agent-server container (software-agent-sdk v1.29.0):
2. Start the agent-server container (software-agent-sdk v1.29.0; the canonical
image pin lives in `package.json` under `config.agentServerImage`):

```bash
docker run -d \
Expand Down
23 changes: 23 additions & 0 deletions endpoint-audit.config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"_comment": "Endpoint audit config. The client is GATED against the agent-server only: any client call not on the agent-server fails the gate. Additional `classify` specs are fetched live purely to label each gated call with the backend that actually serves it (cloud) vs no known backend at all; if a classify spec is unreachable its labels just degrade to 'no known backend'. See scripts/endpoint-audit.mjs.",
"specs": [
{
"name": "agent-server",
"role": "gate",
"url": "http://localhost:8010/openapi.json"
},
{
"name": "cloud",
"role": "classify",
"url": "https://app.all-hands.dev/openapi.json"
}
],
"clientGlobs": ["src/client", "src/conversation", "src/workspace"],

"ignoreServerOnly": ["/v1/"],

"gate": {
"mismatch": true,
"missingApi": false
}
}
4 changes: 4 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
}
}
},
"config": {
"agentServerImage": "ghcr.io/openhands/agent-server:1.29.0-python"
},
"scripts": {
"build": "tsc && node ./scripts/copy-json-assets.mjs && node ./scripts/rewrite-relative-imports.mjs",
"lint": "eslint src/**/*.ts",
Expand All @@ -56,6 +59,7 @@
"test:integration:llm": "jest --config jest.integration.config.cjs --runInBand --testPathIgnorePatterns=\"deterministic-api.integration.test.ts|bash.integration.test.ts\"",
"test:git-dependency": "node ./scripts/smoke-test-git-dependency.mjs",
"test:all": "npm run test && npm run test:integration && npm run test:git-dependency",
"audit:endpoints": "node ./scripts/endpoint-audit.mjs",
"dev": "tsc --watch",
"clean": "rimraf dist",
"prepare": "npm run clean && npm run build",
Expand Down
Loading
Loading