diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml new file mode 100644 index 0000000..ae2b126 --- /dev/null +++ b/.github/workflows/codeql.yaml @@ -0,0 +1,42 @@ +name: CodeQL + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + - cron: '0 3 * * 0' + +concurrency: + group: codeql-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [python] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: ${{ matrix.language }} + + - name: Autobuild + uses: github/codeql-action/autobuild@v4 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v4 \ No newline at end of file diff --git a/.github/workflows/premerge.yaml b/.github/workflows/premerge.yaml index 22a0ff2..976ffec 100644 --- a/.github/workflows/premerge.yaml +++ b/.github/workflows/premerge.yaml @@ -52,3 +52,56 @@ jobs: - name: Run ruff run: uvx ruff check . + + integration-tests: + # Runs the suite gated by `-m integration` against the Docker daemon that + # ships pre-installed on ubuntu-latest runners. Skipped if the unit tests + # didn't pass — there's no point burning daemon time on a broken branch. + runs-on: [ubuntu-latest] + name: Run integration tests + needs: [pytest-run] + timeout-minutes: 20 + # The Hub-login step below gates on `env.DOCKERHUB_TOKEN`, which means the env + # var needs to be resolved by the time the step `if:` is evaluated. Step-scoped + # `env:` is set up too late for that, so we hoist the values to job-level env + # (which IS resolved before any step `if:` runs). Referencing `secrets.*` + # directly in the step `if:` is rejected at workflow-validation time with + # "Unrecognized named-value: 'secrets'", so this hoist is the only working + # shape for an opt-in-via-secret gate. + env: + DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }} + DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }} + steps: + - name: Check out source repository + uses: actions/checkout@v4 + + - name: Set up Python environment + uses: actions/setup-python@v5 + with: + python-version: "3.14" + + - name: Set up uv + uses: astral-sh/setup-uv@v5 + + - name: Install dependencies + run: uv sync --all-groups + + - name: Verify Docker tooling on the runner + # ubuntu-latest ships Docker Engine + docker CLI + compose v2 + buildx. + # Scout is NOT pre-installed; tests skip cleanly via has_plugin(). + run: | + docker version + docker info | head -20 + docker compose version + docker buildx version + docker scout version 2>/dev/null || echo "scout not installed; scout integration tests will skip" + + - name: Optional Docker Hub login to dodge anonymous rate limits + # Opt-in: set DOCKERHUB_USER / DOCKERHUB_TOKEN as repo secrets to authenticate. + # Without them, the job runs anonymous and the existing skip-on-pull-failure + # behaviour in the compose lifecycle test catches throttling. + if: ${{ env.DOCKERHUB_TOKEN != '' && env.DOCKERHUB_USER != '' }} + run: echo "$DOCKERHUB_TOKEN" | docker login --username "$DOCKERHUB_USER" --password-stdin + + - name: Run integration tests + run: uv run pytest -m integration -v --tb=short diff --git a/CLAUDE.md b/CLAUDE.md index fddf7db..fedd36e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -67,7 +67,9 @@ Each file maps to one Docker SDK domain (or, for CLI-only and registry-only feat | `tools/swarm.py` | Swarm init, join, leave | docker-py | | `tools/compose.py` | Docker Compose v2 | `docker compose` CLI via `_cli.py` | | `tools/context.py` | Docker CLI contexts | `docker context` CLI via `_cli.py` | -| `tools/registry.py` | OCI v2 registries + Docker Hub | HTTPS via `httpx` (no daemon) | +| `tools/buildx.py` | Buildx / BuildKit (multi-arch builds, imagetools — supersedes `docker manifest`) | `docker buildx` CLI via `_cli.py` | +| `tools/scout.py` | Vulnerability scanning, SBOMs, base-image recommendations | `docker scout` CLI via `_cli.py` | +| `tools/registry.py` | OCI v2 registries + Docker Hub (with 429 retry policy) | HTTPS via `httpx` (no daemon) | | `tools/prompts.py` | `@mcp.prompt()` workflow templates | — | | `tools/resources.py` | `@mcp.resource()` doc endpoints | — | diff --git a/README.md b/README.md index dd0586c..3cf90e4 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,9 @@ Once loaded, the agent gets MCP tools grouped by Docker domain. A few examples: - **System** — `ping`, `info`, `version`, `df`, `events` - **Compose** — `compose_up`, `compose_down`, `compose_ps`, `compose_logs`, `compose_config`, `compose_build`, `compose_pull`, `compose_run`, `compose_exec`, `compose_ls` *(wraps the `docker compose` CLI plugin)* - **Contexts** — `context_ls`, `context_inspect`, `context_create`, `context_use`, `context_rm` *(wraps the `docker context` CLI)* -- **Registry / Hub** — `registry_list_tags`, `registry_inspect_manifest`, `hub_list_tags`, `hub_repo_info` *(HTTPS to OCI v2 registries and the Docker Hub API — no daemon required)* +- **Registry / Hub** — `registry_list_tags`, `registry_inspect_manifest`, `hub_list_tags`, `hub_repo_info` *(HTTPS to OCI v2 registries and the Docker Hub API — no daemon required; transparent retry on a brief 429)* +- **Buildx** — `buildx_build`, `buildx_bake`, `buildx_imagetools_inspect`, `buildx_imagetools_create`, `buildx_ls`, `buildx_inspect`, `buildx_du`, `buildx_prune`, `buildx_create`, `buildx_use`, `buildx_rm` *(wraps the `docker buildx` CLI plugin). Use `buildx_imagetools_*` in place of `docker manifest` — that command is in maintenance mode and lacks support for OCI image indexes and attestations.* +- **Scout** — `scout_cves`, `scout_quickview`, `scout_recommendations`, `scout_compare`, `scout_sbom` *(wraps the `docker scout` CLI plugin; most features benefit from `docker login` on the host running this server).* The SDK-backed surface mirrors the [Docker SDK reference](https://docker-py.readthedocs.io/en/stable/) — if it's documented there, it's available here. The Compose and Context surfaces follow the [Compose CLI](https://docs.docker.com/reference/cli/docker/compose/) and [docker context](https://docs.docker.com/reference/cli/docker/context/) references. @@ -97,6 +99,18 @@ Many AI clients let you invoke registered MCP prompts directly (in Claude Code, /find_latest_image_tag image=ghcr.io/org/repo ``` +**Buildx, Scout, and multi-arch manifests** + +``` +/plan_multiarch_build image=ghcr.io/org/app:v1 platforms=linux/amd64,linux/arm64 +/audit_image_cves image=alpine:3.19 +/compare_image_versions old_image=org/app:v1 new_image=org/app:v2 +/recommend_base_image image=org/app:v1 +/inspect_multiarch_manifest image=alpine:3.19 +/create_multiarch_manifest target_tag=org/app:v1 source_tags=org/app:v1-amd64,org/app:v1-arm64 +/migrate_from_docker_manifest +``` + …or in plain English: > Pull `redis:7-alpine` and run it as a container called `cache` on a new `app-net` network, exposing port 6379 only inside that network. @@ -120,7 +134,7 @@ Connecting this server to an AI agent grants it the same level of access as a lo - **HTTPS-backed registry tools** (`registry_list_tags`, `registry_inspect_manifest`, `hub_list_tags`, `hub_repo_info`) talk to the registry directly over HTTPS and do NOT read `~/.docker/config.json`. The `registry_*` tools accept `username` / `password` for private registries; the `hub_*` tools currently support public Hub repositories only. Use a per-invocation token with the minimum required scope rather than a long-lived password. - **`exec_in_container`, `compose_exec`, and `compose_run` run arbitrary commands.** When any part of the command is derived from agent-controlled input, use an exec-form argv list that does not invoke a shell (e.g. `["python", "-V"]`). A list like `["sh", "-c", template]` that invokes a shell will interpret shell metacharacters in the untrusted substrings. - **Container archive paths.** `get_container_archive` and `put_container_archive` forward the supplied path verbatim to the daemon. The container is the trust boundary — if you do not trust its filesystem, do not assume `..` traversal will be rejected. -- **Destructive operations have no built-in confirmation.** `prune_*`, `remove_*`, `kill_container`, `leave_swarm`, and `compose_down(volumes=True)` execute immediately. The shipped `clean_environment` prompt asks the agent to confirm before pruning volumes, but tool calls themselves are not gated. If you need an approval step, configure it at the MCP client (e.g. Claude Code's permission prompts) rather than relying on the server. +- **Destructive operations have no built-in confirmation.** `prune_*`, `remove_*`, `kill_container`, `leave_swarm`, `compose_down(volumes=True)`, `buildx_prune` (always runs with `--force`), and `buildx_rm` execute immediately. The shipped `clean_environment` prompt asks the agent to confirm before pruning volumes, but tool calls themselves are not gated. If you need an approval step, configure it at the MCP client (e.g. Claude Code's permission prompts) rather than relying on the server. - **CLI shell-out attack surface.** Compose and Context tools spawn `docker` subprocesses on the host running this MCP server. Every invocation passes arguments as a list (no shell, no metacharacter interpretation), resolves the binary via `shutil.which`, and runs against a scrubbed environment (DOCKER_HOST and related vars only). Filesystem paths supplied to `compose_*` (project_dir, files) are read by the docker CLI on the server host — passing an unfamiliar path can expose any compose file the server's user can read. - **Docker Context retargeting.** `context_use` only changes the CLI default for subsequent CLI-backed tools. SDK-backed tools (`list_containers`, `pull_image`, etc.) keep using whatever daemon the docker-py client connected to at server startup. Restart the server with a different `DOCKER_HOST` / `DOCKER_CONTEXT` to retarget those. `context_create(skip_tls_verify=True)` disables TLS verification for a context; use only against trusted local daemons. @@ -150,6 +164,8 @@ Contributions are welcome. The project values a tight mapping between the Docker │ ├── plugins.py │ ├── compose.py # `docker compose` CLI plugin (shells out via _cli.py) │ ├── context.py # `docker context` CLI (shells out via _cli.py) +│ ├── buildx.py # `docker buildx` CLI plugin (shells out via _cli.py) +│ ├── scout.py # `docker scout` CLI plugin (shells out via _cli.py) │ ├── registry.py # OCI v2 registries + Docker Hub HTTPS APIs (no daemon) │ ├── prompts.py # @mcp.prompt() templates for common docker workflows │ └── resources.py # @mcp.resource() endpoints exposing SDK + CLI + registry docs diff --git a/tests/integration/test_buildx.py b/tests/integration/test_buildx.py new file mode 100644 index 0000000..e4f360a --- /dev/null +++ b/tests/integration/test_buildx.py @@ -0,0 +1,62 @@ +# integration tests for buildx — require a real Docker daemon AND the `docker buildx` plugin. +# run with: uv run pytest -m integration + +from pathlib import Path + +import pytest + +from tools._cli import has_plugin +from tools.buildx import buildx_build, buildx_du, buildx_imagetools_inspect, buildx_ls + +# A minimal Dockerfile that produces a tiny image without pulling anything large. +# `scratch` is the empty base image and ships with the buildx plugin's defaults. +_DOCKERFILE = """\ +FROM scratch +COPY hello.txt /hello.txt +""" + + +@pytest.fixture(scope="module", autouse=True) +def _require_buildx_plugin(): + if not has_plugin("buildx"): + pytest.skip("docker buildx plugin not installed on this host; skipping buildx integration tests") + yield + + +@pytest.fixture +def build_context(tmp_path: Path) -> Path: + (tmp_path / "Dockerfile").write_text(_DOCKERFILE) + (tmp_path / "hello.txt").write_text("hello\n") + return tmp_path + + +def test_buildx_ls_lists_at_least_one_builder(): + builders = buildx_ls() + assert isinstance(builders, list) + assert builders, "expected at least one buildx builder to be configured" + assert all("Name" in b for b in builders) + + +def test_buildx_du_returns_records(): + records = buildx_du() + # An empty cache is allowed but the call must succeed and return a list. + assert isinstance(records, list) + + +def test_buildx_build_scratch_context_succeeds(build_context: Path): + result = buildx_build( + context=str(build_context), + tags=["docker-mcp-it-buildx-scratch:test"], + load=True, + timeout_seconds=300.0, + ) + assert result["returncode"] == 0, result["stderr"] + + +def test_buildx_imagetools_inspect_alpine_returns_manifest(): + # `alpine:3` is a multi-arch manifest list on Docker Hub. The call hits the registry + # over HTTPS via buildx; no local image is required. + result = buildx_imagetools_inspect("alpine:3", raw=True) + if result["returncode"] != 0: + pytest.skip(f"buildx imagetools inspect unreachable (registry/network?): {result['stderr'][:200]}") + assert result["stdout"].strip().startswith("{") diff --git a/tests/integration/test_scout.py b/tests/integration/test_scout.py new file mode 100644 index 0000000..f73036c --- /dev/null +++ b/tests/integration/test_scout.py @@ -0,0 +1,27 @@ +# integration tests for scout — require a real Docker daemon AND the `docker scout` plugin. +# Scout is NOT pre-installed on plain Engine hosts (only Docker Desktop), so the whole +# module skips cleanly when the plugin isn't available. +# run with: uv run pytest -m integration + +import pytest + +from tools._cli import has_plugin +from tools.scout import scout_quickview + + +@pytest.fixture(scope="module", autouse=True) +def _require_scout_plugin(): + if not has_plugin("scout"): + pytest.skip("docker scout plugin not installed on this host; skipping scout integration tests") + yield + + +def test_scout_quickview_alpine_returns_json_or_skip(): + # Scout requires network access to its CDN. If the CDN is unreachable or the host + # is offline, skip rather than fail — this test exercises the wiring, not Scout itself. + result = scout_quickview("alpine:3") + if result["raw"]["returncode"] != 0: + pytest.skip(f"scout quickview unreachable (offline or auth required?): {result['raw']['stderr'][:200]}") + assert result["format"] == "json" + # `result` should be a parsed dict or the raw text (if Scout returned non-JSON for some reason). + assert result["result"] is not None diff --git a/tests/test_buildx.py b/tests/test_buildx.py new file mode 100644 index 0000000..c55ce8b --- /dev/null +++ b/tests/test_buildx.py @@ -0,0 +1,395 @@ +from unittest.mock import patch + +import pytest + +from tools._cli import CliResult +from tools.buildx import ( + _parse_json_lines, + buildx_bake, + buildx_build, + buildx_create, + buildx_du, + buildx_imagetools_create, + buildx_imagetools_inspect, + buildx_inspect, + buildx_ls, + buildx_prune, + buildx_rm, + buildx_use, +) + + +@pytest.fixture(autouse=True) +def _stub_plugin_check(): # pyright: ignore[reportUnusedFunction] + with patch("tools.buildx.require_plugin"): + yield + + +def _ok(stdout: str = "", stderr: str = "") -> CliResult: + return CliResult(returncode=0, stdout=stdout, stderr=stderr, truncated=False) + + +def _fail(stderr: str, returncode: int = 1) -> CliResult: + return CliResult(returncode=returncode, stdout="", stderr=stderr, truncated=False) + + +# ---------- _parse_json_lines ---------- + + +def test_parse_json_lines_handles_ndjson(): + assert _parse_json_lines('{"a": 1}\n{"a": 2}\n') == [{"a": 1}, {"a": 2}] + + +def test_parse_json_lines_skips_blank_lines(): + assert _parse_json_lines('{"a": 1}\n\n{"a": 2}\n') == [{"a": 1}, {"a": 2}] + + +def test_parse_json_lines_empty_returns_empty_list(): + assert _parse_json_lines("") == [] + + +def test_parse_json_lines_drops_partial_last_line_when_truncated(): + # Last entry is cut off (no closing brace) — should be dropped when truncated=True. + body = '{"a": 1}\n{"a": 2}\n{"a": 3, "b":' + assert _parse_json_lines(body, truncated=True) == [{"a": 1}, {"a": 2}] + + +def test_parse_json_lines_raises_descriptively_on_garbage_when_not_truncated(): + body = '{"a": 1}\nnot-json-at-all' + with pytest.raises(RuntimeError, match="Could not parse .* JSON.*line 2.*truncated=False"): + _parse_json_lines(body, truncated=False, what="buildx test output") + + +def test_parse_json_lines_truncated_keeps_complete_earlier_records(): + body = '{"a": 1}\n{"a": 2}' # the second is "complete" but we still drop it when truncated + # When truncated=True, the last line is always dropped on the assumption it may be partial. + # That's a conservative call — the alternative (trying to detect completeness) is brittle. + assert _parse_json_lines(body, truncated=True) == [{"a": 1}] + + +# ---------- buildx_build ---------- + + +def test_buildx_build_minimal_context_only(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_build(context=".") + args = run.call_args.args[0] + assert args[:3] == ["buildx", "build", "--progress=plain"] + assert args[-1] == "." # context is positional and last + + +def test_buildx_build_passes_tags_and_platforms(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_build(context=".", tags=["org/app:v1", "org/app:latest"], platforms=["linux/amd64", "linux/arm64"]) + args = run.call_args.args[0] + assert args.count("--tag") == 2 + assert args[args.index("org/app:v1") - 1] == "--tag" + # buildx --platform takes a comma-joined list as one value (the documented convention). + assert args.count("--platform") == 1 + assert args[args.index("--platform") + 1] == "linux/amd64,linux/arm64" + + +def test_buildx_build_single_platform_passes_one_flag(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_build(context=".", platforms=["linux/amd64"]) + args = run.call_args.args[0] + assert args.count("--platform") == 1 + assert args[args.index("--platform") + 1] == "linux/amd64" + + +def test_buildx_build_omits_platform_when_not_supplied(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_build(context=".") + assert "--platform" not in run.call_args.args[0] + + +def test_buildx_build_dict_args_emit_repeated_flags(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_build( + context=".", + build_args={"VERSION": "1.0", "DEBUG": "1"}, + build_contexts={"deps": "./vendor"}, + labels={"org.opencontainers.image.source": "https://example.com"}, + ) + args = run.call_args.args[0] + build_arg_values = [args[i + 1] for i, a in enumerate(args) if a == "--build-arg"] + assert set(build_arg_values) == {"VERSION=1.0", "DEBUG=1"} + assert "--build-context" in args + assert args[args.index("--build-context") + 1] == "deps=./vendor" + assert args[args.index("--label") + 1] == "org.opencontainers.image.source=https://example.com" + + +def test_buildx_build_push_and_load_flags_independent(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_build(context=".", push=True) + assert "--push" in run.call_args.args[0] + + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_build(context=".", load=True) + assert "--load" in run.call_args.args[0] + + +def test_buildx_build_cache_and_attestation_flags(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_build( + context=".", + cache_from=["type=registry,ref=org/cache"], + cache_to=["type=registry,ref=org/cache,mode=max"], + sbom="true", + provenance="mode=max", + attest=["type=foo"], + no_cache_filter=["build", "test"], + ) + args = run.call_args.args[0] + assert "type=registry,ref=org/cache" in args + assert "type=registry,ref=org/cache,mode=max" in args + assert args[args.index("--sbom") + 1] == "true" + assert args[args.index("--provenance") + 1] == "mode=max" + assert "--attest" in args + assert args.count("--no-cache-filter") == 2 + + +def test_buildx_build_secret_and_ssh(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_build(context=".", secret=["id=npmrc,src=/home/user/.npmrc"], ssh=["default"]) + args = run.call_args.args[0] + assert args[args.index("--secret") + 1] == "id=npmrc,src=/home/user/.npmrc" + assert args[args.index("--ssh") + 1] == "default" + + +def test_buildx_build_returns_returncode_dict(): + with patch("tools.buildx.run_docker", return_value=_fail("build failed", returncode=2)): + result = buildx_build(context=".") + assert result["returncode"] == 2 + assert result["stderr"] == "build failed" + + +def test_buildx_build_rejects_push_and_load_together(): + with pytest.raises(ValueError, match="`push` and `load` are mutually exclusive"): + buildx_build(context=".", push=True, load=True) + + +# ---------- buildx_bake ---------- + + +def test_buildx_bake_minimal_uses_progress_plain(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_bake() + args = run.call_args.args[0] + assert args[:3] == ["buildx", "bake", "--progress=plain"] + + +def test_buildx_bake_targets_appended_last(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_bake(targets=["app", "tests"], files=["docker-bake.hcl"], push=True) + args = run.call_args.args[0] + # Targets are positional, must come after all flags + assert args[-2:] == ["app", "tests"] + assert "--push" in args + assert args[args.index("-f") + 1] == "docker-bake.hcl" + + +def test_buildx_bake_set_overrides_repeat(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_bake(set_overrides=["app.platform=linux/amd64", "tests.no-cache=true"]) + args = run.call_args.args[0] + assert args.count("--set") == 2 + + +# ---------- buildx_imagetools_inspect ---------- + + +def test_buildx_imagetools_inspect_default_args(): + with patch("tools.buildx.run_docker", return_value=_ok("[ ... ]")) as run: + result = buildx_imagetools_inspect("alpine:3.19") + args = run.call_args.args[0] + assert args[:3] == ["buildx", "imagetools", "inspect"] + assert args[-1] == "alpine:3.19" + assert "--raw" not in args + assert "--format" not in args + assert result["returncode"] == 0 + + +def test_buildx_imagetools_inspect_raw_and_format(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_imagetools_inspect("alpine:3.19", raw=True) + assert "--raw" in run.call_args.args[0] + + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_imagetools_inspect("alpine:3.19", format="{{json .}}") + args = run.call_args.args[0] + assert args[args.index("--format") + 1] == "{{json .}}" + + +def test_buildx_imagetools_inspect_rejects_raw_and_format_together(): + with pytest.raises(ValueError, match="`raw` and `format` are mutually exclusive"): + buildx_imagetools_inspect("alpine:3.19", raw=True, format="{{json .}}") + + +# ---------- buildx_imagetools_create ---------- + + +def test_buildx_imagetools_create_target_and_sources(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_imagetools_create( + target="org/app:v1", + sources=["org/app:v1-amd64", "org/app:v1-arm64"], + ) + args = run.call_args.args[0] + assert args[:3] == ["buildx", "imagetools", "create"] + assert args[args.index("--tag") + 1] == "org/app:v1" + assert args[-2:] == ["org/app:v1-amd64", "org/app:v1-arm64"] + + +def test_buildx_imagetools_create_append_dry_run_annotations(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_imagetools_create( + target="org/app:v1", + sources=["org/app:v1-amd64"], + append=True, + dry_run=True, + annotations=["manifest:com.example.k=v"], + ) + args = run.call_args.args[0] + assert "--append" in args + assert "--dry-run" in args + assert args[args.index("--annotation") + 1] == "manifest:com.example.k=v" + + +def test_buildx_imagetools_create_requires_sources_or_files(): + with pytest.raises(ValueError, match="at least one source ref or file"): + buildx_imagetools_create(target="org/app:v1", sources=[]) + + +# ---------- buildx_ls / buildx_du / buildx_inspect ---------- + + +def test_buildx_ls_parses_ndjson(): + body = '{"Name":"default","Driver":"docker","Current":true}\n{"Name":"remote","Driver":"docker-container","Current":false}\n' + with patch("tools.buildx.run_docker", return_value=_ok(body)) as run: + result = buildx_ls() + args = run.call_args.args[0] + assert args == ["buildx", "ls", "--format", "{{json .}}"] + assert result == [ + {"Name": "default", "Driver": "docker", "Current": True}, + {"Name": "remote", "Driver": "docker-container", "Current": False}, + ] + + +def test_buildx_ls_raises_on_failure(): + with patch("tools.buildx.run_docker", return_value=_fail("daemon error")): + with pytest.raises(RuntimeError, match="daemon error"): + buildx_ls() + + +def test_buildx_ls_drops_partial_record_when_truncated(): + body = '{"Name":"default","Current":true}\n{"Name":"remote",' # second record cut off + truncated_result = CliResult(returncode=0, stdout=body, stderr="", truncated=True) + with patch("tools.buildx.run_docker", return_value=truncated_result): + result = buildx_ls() + assert result == [{"Name": "default", "Current": True}] + + +def test_buildx_du_parses_ndjson(): + body = '{"ID":"abc","Size":"1MB"}\n{"ID":"def","Size":"2MB"}\n' + with patch("tools.buildx.run_docker", return_value=_ok(body)) as run: + result = buildx_du(builder="builder-x") + args = run.call_args.args[0] + assert args[:4] == ["buildx", "du", "--format", "{{json .}}"] + assert args[args.index("--builder") + 1] == "builder-x" + assert result == [{"ID": "abc", "Size": "1MB"}, {"ID": "def", "Size": "2MB"}] + + +def test_buildx_inspect_with_bootstrap(): + with patch("tools.buildx.run_docker", return_value=_ok("Name: default")) as run: + buildx_inspect(bootstrap=True) + args = run.call_args.args[0] + assert "--bootstrap" in args + + +# ---------- buildx_prune ---------- + + +def test_buildx_prune_always_passes_force(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_prune() + args = run.call_args.args[0] + assert args[:3] == ["buildx", "prune", "--force"] + + +def test_buildx_prune_filter_and_space_flags(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_prune( + all=True, + filter={"until": "24h", "type": "exec.cachemount"}, + reserved_space="10GB", + max_used_space="20GB", + min_free_space="5GB", + ) + args = run.call_args.args[0] + assert "--all" in args + assert args.count("--filter") == 2 + assert args[args.index("--reserved-space") + 1] == "10GB" + assert args[args.index("--max-used-space") + 1] == "20GB" + assert args[args.index("--min-free-space") + 1] == "5GB" + + +# ---------- buildx_create / buildx_use / buildx_rm ---------- + + +def test_buildx_create_driver_opts_repeat(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_create( + name="builder-x", + driver="docker-container", + driver_opts={"image": "moby/buildkit:latest", "network": "host"}, + use=True, + bootstrap=True, + platforms=["linux/amd64", "linux/arm64"], + ) + args = run.call_args.args[0] + assert args[:2] == ["buildx", "create"] + assert args[args.index("--driver") + 1] == "docker-container" + assert args.count("--driver-opt") == 2 + assert "--use" in args + assert "--bootstrap" in args + # Comma-joined platforms (the documented buildx convention). + assert args.count("--platform") == 1 + assert args[args.index("--platform") + 1] == "linux/amd64,linux/arm64" + assert args[args.index("--name") + 1] == "builder-x" + + +def test_buildx_use_with_default_flags(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_use("builder-x", default=True, global_default=True) + args = run.call_args.args[0] + assert "--default" in args + assert "--global" in args + assert args[-1] == "builder-x" + + +def test_buildx_rm_requires_target(): + with pytest.raises(ValueError, match="`name` or `all_inactive=True`"): + buildx_rm() + + +def test_buildx_rm_rejects_name_and_all_inactive_together(): + with pytest.raises(ValueError, match="mutually exclusive"): + buildx_rm(name="builder-x", all_inactive=True) + + +def test_buildx_rm_all_inactive(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_rm(all_inactive=True, keep_state=True) + args = run.call_args.args[0] + assert args[:2] == ["buildx", "rm"] + assert "--all-inactive" in args + assert "--keep-state" in args + + +def test_buildx_rm_named_with_force(): + with patch("tools.buildx.run_docker", return_value=_ok()) as run: + buildx_rm(name="builder-x", force=True) + args = run.call_args.args[0] + assert "--force" in args + assert args[-1] == "builder-x" diff --git a/tests/test_prompts.py b/tests/test_prompts.py index 59f440b..48de631 100644 --- a/tests/test_prompts.py +++ b/tests/test_prompts.py @@ -1,13 +1,20 @@ from tools.prompts import ( audit_docker_contexts, + audit_image_cves, clean_environment, + compare_image_versions, + create_multiarch_manifest, deploy_compose_project, deploy_container, find_latest_image_tag, + inspect_multiarch_manifest, inspect_stack, lookup_docker_docs, migrate_container, + migrate_from_docker_manifest, plan_compose_stack, + plan_multiarch_build, + recommend_base_image, troubleshoot_compose_project, troubleshoot_container, verify_docker_method, @@ -118,3 +125,77 @@ def test_find_latest_image_tag_uses_registry_tools(): assert "registry_inspect_manifest" in out assert "hub_repo_info" in out assert "do not pull" in out.lower() + + +def test_plan_multiarch_build_uses_buildx_and_emulation_warning(): + out = plan_multiarch_build("ghcr.io/org/app:v1", platforms="linux/amd64,linux/arm64") + assert "ghcr.io/org/app:v1" in out + assert "buildx_ls" in out + assert "buildx_imagetools_inspect" in out + assert "buildx_build" in out + assert "linux/amd64" in out and "linux/arm64" in out + assert "emulation" in out.lower() + + +def test_plan_multiarch_build_creates_docker_container_when_no_buildx_driver(): + out = plan_multiarch_build("ghcr.io/org/app:v1") + assert "buildx_create" in out + assert "docker-container" in out + + +def test_audit_image_cves_walks_quickview_then_cves(): + out = audit_image_cves("alpine:3.19") + assert "alpine:3.19" in out + # quickview first, then drill in + assert out.index("scout_quickview") < out.index("scout_cves") + # Should mention severity filtering AND base separation + assert "critical" in out.lower() + assert "ignore_base" in out + + +def test_compare_image_versions_uses_scout_compare(): + out = compare_image_versions("org/app:v1", "org/app:v2") + assert "org/app:v1" in out + assert "org/app:v2" in out + assert "scout_compare" in out + assert "ignore_unchanged" in out + assert "regression" in out.lower() + + +def test_recommend_base_image_uses_recommendations_and_verifies_with_compare(): + out = recommend_base_image("org/app:v1") + assert "scout_recommendations" in out + assert "scout_compare" in out + # Manifest verification step must use buildx_imagetools_inspect (accepts a full ref), + # not registry_inspect_manifest (which strips tag/digest from `image`). + assert "buildx_imagetools_inspect" in out + + +def test_inspect_multiarch_manifest_uses_buildx_imagetools_and_explains_replacement(): + out = inspect_multiarch_manifest("alpine:3.19") + assert "alpine:3.19" in out + assert "buildx_imagetools_inspect" in out + # The prompt must explain it replaces docker manifest inspect for discovery. + assert "docker manifest inspect" in out + # Should mention both image index and manifest list media types. + assert "image.index" in out or "manifest.list" in out + + +def test_create_multiarch_manifest_dry_run_first(): + out = create_multiarch_manifest("org/app:v1", "org/app:v1-amd64,org/app:v1-arm64") + assert "org/app:v1" in out + assert "buildx_imagetools_create" in out + assert "dry_run" in out + # Dry-run before live push + assert out.index("dry_run=True") < out.lower().rindex("approves") + + +def test_migrate_from_docker_manifest_returns_mapping_table(): + out = migrate_from_docker_manifest() + # Must mention each docker manifest subcommand and its replacement. + for cmd in ("inspect REF", "create NEW SRC", "annotate", "push NEW", "rm NEW"): + assert cmd in out + assert "buildx_imagetools_inspect" in out + assert "buildx_imagetools_create" in out + # And explain the why + assert "maintenance mode" in out.lower() diff --git a/tests/test_registry.py b/tests/test_registry.py index 6e1ef51..2eac59a 100644 --- a/tests/test_registry.py +++ b/tests/test_registry.py @@ -312,15 +312,145 @@ def test_hub_repo_info_returns_body(): "pull_count": 999999999, "is_private": False, } - with patch("tools.registry.httpx.get") as fake_get: - fake_get.return_value = httpx.Response( + seen_urls: list[str] = [] + + def handler(request: httpx.Request) -> httpx.Response: + seen_urls.append(str(request.url)) + return httpx.Response( 200, content=json.dumps(expected).encode(), - request=httpx.Request("GET", "https://hub.docker.com/v2/repositories/library/alpine/"), headers={"Content-Type": "application/json"}, ) + + with _mock_client(handler): result = hub_repo_info("alpine") assert result == expected - fake_get.assert_called_once() - called_url = fake_get.call_args.args[0] - assert called_url == "https://hub.docker.com/v2/repositories/library/alpine/" + assert seen_urls == ["https://hub.docker.com/v2/repositories/library/alpine/"] + + +# ---------- 429 rate-limit policy ---------- + + +def test_registry_list_tags_retries_once_on_short_retry_after(): + calls: list[int] = [] + + def handler(request: httpx.Request) -> httpx.Response: + calls.append(1) + if len(calls) == 1: + return httpx.Response(429, headers={"Retry-After": "0"}) + return httpx.Response(200, json={"tags": ["v1", "v2"]}) + + with _mock_client(handler): + result = registry_list_tags("alpine") + + assert result["tags"] == ["v1", "v2"] + assert len(calls) == 2 + + +def test_registry_list_tags_raises_when_retry_after_is_long(): + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(429, headers={"Retry-After": "3600"}) + + with _mock_client(handler): + with pytest.raises(RuntimeError, match="rate-limited.*retry after ~3600s") as excinfo: + registry_list_tags("alpine") + # Default registry is Docker Hub — message should mention the Hub-specific cap. + assert "Docker Hub" in str(excinfo.value) + + +def test_registry_list_tags_message_is_generic_for_non_hub_registry(): + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(429, headers={"Retry-After": "3600"}) + + with _mock_client(handler): + with pytest.raises(RuntimeError, match="rate-limited") as excinfo: + registry_list_tags("ghcr.io/org/repo") + # GHCR is not Docker Hub — the Hub-specific guidance must not appear; the + # registry-agnostic hint about authenticating should. + msg = str(excinfo.value) + assert "Docker Hub" not in msg + assert "authenticate" in msg.lower() + + +def test_registry_list_tags_raises_when_retry_after_missing(): + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(429) + + with _mock_client(handler): + with pytest.raises(RuntimeError, match="rate-limited"): + registry_list_tags("alpine") + + +def test_registry_list_tags_raises_on_second_429_after_retry(): + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(429, headers={"Retry-After": "0"}) + + with _mock_client(handler): + with pytest.raises(RuntimeError, match="rate-limited"): + registry_list_tags("alpine") + + +def test_hub_list_tags_applies_429_policy(): + calls: list[int] = [] + + def handler(request: httpx.Request) -> httpx.Response: + calls.append(1) + if len(calls) == 1: + return httpx.Response(429, headers={"Retry-After": "0"}) + return httpx.Response(200, json={"next": None, "results": [{"name": "v1"}]}) + + with _mock_client(handler): + result = hub_list_tags("alpine") + + assert [t["name"] for t in result["tags"]] == ["v1"] + assert len(calls) == 2 + + +def test_hub_repo_info_applies_429_policy(): + def handler(request: httpx.Request) -> httpx.Response: + return httpx.Response(429, headers={"Retry-After": "3600"}) + + with _mock_client(handler): + with pytest.raises(RuntimeError, match="rate-limited"): + hub_repo_info("alpine") + + +def test_parse_retry_after_seconds(): + from tools.registry import _parse_retry_after + + assert _parse_retry_after("0") == 0.0 + assert _parse_retry_after("30") == 30.0 + assert _parse_retry_after(" 5 ") == 5.0 + + +def test_parse_retry_after_http_date_in_future(): + from tools.registry import _parse_retry_after + + # An HTTP date far in the future should produce a positive value (the absolute number + # depends on the wall clock, so only assert ordering). + result = _parse_retry_after("Wed, 21 Oct 2099 07:28:00 GMT") + assert result is not None + assert result > 1000 + + +def test_parse_retry_after_treats_naive_date_as_utc(): + """RFC 7231 says HTTP-dates are UTC. `-0000` parses to a naive datetime; we must + treat it as UTC rather than letting `.timestamp()` re-interpret in local time.""" + from tools.registry import _parse_retry_after + + # `-0000` is the only HTTP-date timezone notation that produces a naive datetime + # out of email.utils.parsedate_to_datetime. The same wall-clock moment expressed + # as `-0000` and `+0000` must yield the same delay value. + naive = _parse_retry_after("Wed, 21 Oct 2099 07:28:00 -0000") + aware = _parse_retry_after("Wed, 21 Oct 2099 07:28:00 +0000") + assert naive is not None and aware is not None + # Allow a 1s slack because two calls to time.time() bracket the math. + assert abs(naive - aware) < 1.0 + + +def test_parse_retry_after_invalid_returns_none(): + from tools.registry import _parse_retry_after + + assert _parse_retry_after(None) is None + assert _parse_retry_after("") is None + assert _parse_retry_after("not a date or number") is None diff --git a/tests/test_scout.py b/tests/test_scout.py new file mode 100644 index 0000000..73d08f2 --- /dev/null +++ b/tests/test_scout.py @@ -0,0 +1,191 @@ +from unittest.mock import patch + +import pytest + +from tools._cli import CliResult +from tools.scout import ( + _maybe_parse_json, + scout_compare, + scout_cves, + scout_quickview, + scout_recommendations, + scout_sbom, +) + + +@pytest.fixture(autouse=True) +def _stub_plugin_check(): # pyright: ignore[reportUnusedFunction] + with patch("tools.scout.require_plugin"): + yield + + +def _ok(stdout: str = "", stderr: str = "") -> CliResult: + return CliResult(returncode=0, stdout=stdout, stderr=stderr, truncated=False) + + +# ---------- _maybe_parse_json ---------- + + +def test_maybe_parse_json_returns_dict_when_format_is_json(): + assert _maybe_parse_json('{"a": 1}', "json") == {"a": 1} + + +def test_maybe_parse_json_returns_raw_text_for_non_json_format(): + assert _maybe_parse_json("plain text", "text") == "plain text" + + +def test_maybe_parse_json_returns_none_for_empty_input(): + assert _maybe_parse_json("", "json") is None + + +def test_maybe_parse_json_returns_raw_when_json_invalid(): + # Bad JSON when format=json — return raw text rather than raise so the agent can debug. + assert _maybe_parse_json("not-json", "json") == "not-json" + + +# ---------- scout_cves ---------- + + +def test_scout_cves_minimal_args_and_default_json_format(): + with patch("tools.scout.run_docker", return_value=_ok('{"vulnerabilities": []}')) as run: + result = scout_cves("alpine:3.19") + args = run.call_args.args[0] + assert args[:2] == ["scout", "cves"] + assert args[args.index("--format") + 1] == "json" + assert args[-1] == "alpine:3.19" + assert result["format"] == "json" + assert result["result"] == {"vulnerabilities": []} + assert result["raw"]["returncode"] == 0 + + +def test_scout_cves_only_severity_joins_with_commas(): + with patch("tools.scout.run_docker", return_value=_ok("{}")) as run: + scout_cves("alpine:3.19", only_severity=["critical", "high"]) + args = run.call_args.args[0] + assert args[args.index("--only-severity") + 1] == "critical,high" + + +def test_scout_cves_flags_set_correctly(): + with patch("tools.scout.run_docker", return_value=_ok("{}")) as run: + scout_cves("alpine:3.19", only_fixed=True, ignore_base=True, platform="linux/amd64") + args = run.call_args.args[0] + assert "--only-fixed" in args + assert "--ignore-base" in args + assert args[args.index("--platform") + 1] == "linux/amd64" + + +def test_scout_cves_sarif_format_returned_as_text(): + sarif_text = '{"$schema":"https://example.com/sarif"}' + with patch("tools.scout.run_docker", return_value=_ok(sarif_text)) as run: + result = scout_cves("alpine:3.19", format="sarif") + args = run.call_args.args[0] + assert args[args.index("--format") + 1] == "sarif" + assert result["format"] == "sarif" + assert result["result"] == sarif_text + + +# ---------- scout_quickview ---------- + + +def test_scout_quickview_parses_json(): + body = '{"critical": 0, "high": 2}' + with patch("tools.scout.run_docker", return_value=_ok(body)): + result = scout_quickview("alpine:3.19") + assert result["result"] == {"critical": 0, "high": 2} + + +def test_scout_quickview_text_format_unparsed(): + with patch("tools.scout.run_docker", return_value=_ok("Image: alpine:3.19\nCritical: 0")) as run: + result = scout_quickview("alpine:3.19", format="text") + args = run.call_args.args[0] + assert args[args.index("--format") + 1] == "text" + assert "Critical: 0" in result["result"] + + +# ---------- scout_recommendations ---------- + + +def test_scout_recommendations_passes_only_flags(): + with patch("tools.scout.run_docker", return_value=_ok("[]")) as run: + scout_recommendations("alpine:3.19", only_refresh=True, only_update=True, tag="3.*") + args = run.call_args.args[0] + assert "--only-refresh" in args + assert "--only-update" in args + assert args[args.index("--tag") + 1] == "3.*" + + +# ---------- scout_compare ---------- + + +def test_scout_compare_to_ref_target(): + with patch("tools.scout.run_docker", return_value=_ok('{"delta": []}')) as run: + scout_compare("org/app:v2", to="org/app:v1") + args = run.call_args.args[0] + assert args[:2] == ["scout", "compare"] + assert args[args.index("--to") + 1] == "org/app:v1" + assert args[-1] == "org/app:v2" + # `--to-latest` is a separate flag and must not be set when `--to` is. + assert "--to-latest" not in args + + +def test_scout_compare_to_latest_target(): + with patch("tools.scout.run_docker", return_value=_ok("{}")) as run: + scout_compare("org/app:v2", to_latest=True) + args = run.call_args.args[0] + assert "--to-latest" in args + assert "--to" not in args + + +def test_scout_compare_to_env_target(): + with patch("tools.scout.run_docker", return_value=_ok("{}")) as run: + scout_compare("org/app:v2", to_env="prod") + args = run.call_args.args[0] + assert args[args.index("--to-env") + 1] == "prod" + + +def test_scout_compare_requires_exactly_one_target(): + with pytest.raises(ValueError, match="exactly one of"): + scout_compare("org/app:v2") + with pytest.raises(ValueError, match="exactly one of"): + scout_compare("org/app:v2", to="org/app:v1", to_latest=True) + + +def test_scout_compare_ignore_unchanged_and_severity(): + with patch("tools.scout.run_docker", return_value=_ok("{}")) as run: + scout_compare("org/app:v2", to="org/app:v1", ignore_unchanged=True, only_severity=["critical"]) + args = run.call_args.args[0] + assert "--ignore-unchanged" in args + assert args[args.index("--only-severity") + 1] == "critical" + + +# ---------- scout_sbom ---------- + + +def test_scout_sbom_default_spdx_format_parses_json(): + body = '{"spdxVersion": "SPDX-2.3"}' + with patch("tools.scout.run_docker", return_value=_ok(body)) as run: + result = scout_sbom("alpine:3.19") + args = run.call_args.args[0] + assert args[args.index("--format") + 1] == "spdx" + assert result["format"] == "spdx" + assert result["result"] == {"spdxVersion": "SPDX-2.3"} + + +def test_scout_sbom_cyclonedx_format_parses_json(): + body = '{"bomFormat": "CycloneDX"}' + with patch("tools.scout.run_docker", return_value=_ok(body)): + result = scout_sbom("alpine:3.19", format="cyclonedx") + assert result["result"] == {"bomFormat": "CycloneDX"} + + +def test_scout_sbom_list_format_returned_as_text(): + with patch("tools.scout.run_docker", return_value=_ok("alpine 3.19\nlibc 2.39")): + result = scout_sbom("alpine:3.19", format="list") + assert "libc 2.39" in result["result"] + + +def test_scout_sbom_with_platform(): + with patch("tools.scout.run_docker", return_value=_ok("{}")) as run: + scout_sbom("alpine:3.19", platform="linux/arm64") + args = run.call_args.args[0] + assert args[args.index("--platform") + 1] == "linux/arm64" diff --git a/tools/__init__.py b/tools/__init__.py index 0116626..cb0e2a8 100644 --- a/tools/__init__.py +++ b/tools/__init__.py @@ -1,5 +1,6 @@ # library of MCP tools for managing docker +from tools.buildx import * from tools.client import * from tools.compose import * from tools.configs import * @@ -12,6 +13,7 @@ from tools.prompts import * from tools.registry import * from tools.resources import * +from tools.scout import * from tools.secrets import * from tools.services import * from tools.swarm import * diff --git a/tools/buildx.py b/tools/buildx.py new file mode 100644 index 0000000..bc469a8 --- /dev/null +++ b/tools/buildx.py @@ -0,0 +1,542 @@ +# library of mcp tools for `docker buildx`. +# +# Buildx is a CLI plugin layered on BuildKit; it covers multi-platform builds, modern +# cache export/import, attestations (SBOM/provenance), and manifest-list operations. +# These tools wrap the CLI via tools/_cli.py for cross-platform safety. + +import json + +from server import mcp +from tools._cli import CliResult, require_plugin, run_docker + +# Per-operation timeout ceilings (seconds). Builds and pulls against slow registries or +# large contexts routinely run for many minutes, so they get longer ceilings than queries. +_TIMEOUT_QUERY = 60.0 +_TIMEOUT_BUILD = 1800.0 +_TIMEOUT_BAKE = 1800.0 +_TIMEOUT_IMAGETOOLS_CREATE = 600.0 +_TIMEOUT_PRUNE = 600.0 + + +def _run_buildx(args: list[str], *, cwd: str | None = None, timeout: float) -> CliResult: + require_plugin("buildx") + return run_docker(["buildx", *args], cwd=cwd, timeout=timeout) + + +def _raise_on_failure(result: CliResult, action: str) -> None: + if result.returncode != 0: + raise RuntimeError( + f"`docker buildx {action}` failed with exit code {result.returncode}: " + f"{result.stderr.strip() or result.stdout.strip() or ''}" + ) + + +def _parse_json_lines(text: str, *, truncated: bool = False, what: str = "buildx output") -> list[dict]: + """ + Parse one JSON object per non-blank line of `text`. + + args: + text: NDJSON to parse + truncated: True if the underlying stdout was capped by run_docker's byte limit. + When set, the final non-blank line is assumed to be a partial record + and is dropped before parsing rather than crashing on a half-record. + what: short label used in error messages, e.g. "buildx du output". + """ + lines = [line.strip() for line in text.splitlines() if line.strip()] + if truncated and lines: + lines = lines[:-1] + items: list[dict] = [] + for line_number, line in enumerate(lines, start=1): + try: + items.append(json.loads(line)) + except json.JSONDecodeError as exc: + raise RuntimeError( + f"Could not parse {what} as JSON (line {line_number}, truncated={truncated}): {exc}. " + f"Snippet: {line[:200]!r}" + ) from exc + return items + + +@mcp.tool() +def buildx_build( + context: str, + tags: list[str] | None = None, + platforms: list[str] | None = None, + file: str | None = None, + build_args: dict | None = None, + build_contexts: dict | None = None, + labels: dict | None = None, + annotations: list[str] | None = None, + target: str | None = None, + push: bool = False, + load: bool = False, + output: list[str] | None = None, + no_cache: bool = False, + no_cache_filter: list[str] | None = None, + pull: bool = False, + cache_from: list[str] | None = None, + cache_to: list[str] | None = None, + builder: str | None = None, + sbom: str | None = None, + provenance: str | None = None, + attest: list[str] | None = None, + secret: list[str] | None = None, + ssh: list[str] | None = None, + timeout_seconds: float = _TIMEOUT_BUILD, +) -> dict: + """ + Build an image with BuildKit via `docker buildx build`. + + Replaces the legacy `build_image` tool when you need any of: multi-platform output + (`platforms`), modern cache export (`cache_from`/`cache_to`), SBOM or provenance + attestations, build secrets, or multi-stage builds with `target`. Always runs with + `--progress=plain` so output is captured rather than redrawn on a TTY. + + args: + context: str - Build context: a filesystem path or a Git/HTTP URL. Passed verbatim + to docker; no shell expansion of `~` or globs. The `-` form (tarball + on stdin) is NOT supported — this tool doesn't forward stdin to the + subprocess, so `-` would block on the MCP server's own stdin. Pre-pack + a tarball and serve it via an HTTP URL if you need that workflow. + tags: list[str] - Image references to apply (`-t`, repeatable) + platforms: list[str] - Target platforms, e.g. ["linux/amd64", "linux/arm64"] + file: str - Dockerfile path (relative to context unless absolute) + build_args: dict - Build-time variables (each becomes `--build-arg KEY=VALUE`) + build_contexts: dict - Additional named build contexts (e.g. {"deps": "./vendor"}) + labels: dict - Image labels (each becomes `--label KEY=VALUE`) + annotations: list[str] - OCI manifest annotations (passed verbatim, repeatable) + target: str - Target build stage to stop at + push: bool - Push the result to the registry (mutually exclusive with `load`) + load: bool - Load the result into the local image store (single-platform builds only) + output: list[str] - Custom `--output` specs (e.g. ["type=tar,dest=out.tar"]) + no_cache: bool - Do not use cache when building + no_cache_filter: list[str] - Stage names to exclude from caching + pull: bool - Always attempt to pull a newer version of each base image + cache_from: list[str] - Cache import specs, e.g. ["type=registry,ref=user/img:cache"] + cache_to: list[str] - Cache export specs + builder: str - Override the active builder + sbom: str - Shorthand for `--attest=type=sbom`; pass "true" or a config string + provenance: str - Shorthand for `--attest=type=provenance`; pass "true", "false", or a config string + attest: list[str] - Custom attestation specs (repeatable) + secret: list[str] - Secret specs (e.g. ["id=npmrc,src=/home/user/.npmrc"] or + ["id=npmrc,env=NPM_TOKEN"]). Neither this tool nor the docker CLI + expands `~` in `src=…`; use an absolute path or pre-expand via + `pathlib.Path("~/.npmrc").expanduser()` before passing the spec. + ssh: list[str] - SSH agent socket / key specs (e.g. ["default"], which uses + $SSH_AUTH_SOCK from the environment) + timeout_seconds: float - Subprocess timeout (default 1800s) + returns: dict - {"returncode": int, "stdout": str, "stderr": str, "truncated": bool} + """ + if context == "-": + raise ValueError( + "buildx_build: context='-' (read a tarball from stdin) is not supported by this " + "tool because we don't forward stdin to the buildx subprocess — `-` would block " + "on the MCP server's own stdin. Use a filesystem path or an HTTP/Git URL instead, " + "or pre-stage the context on disk." + ) + if push and load: + raise ValueError( + "buildx_build: `push` and `load` are mutually exclusive; --load only works for " + "single-platform builds loaded into the local image store, --push uploads to a " + "registry. Pick one (or use `output=` for a custom output spec)." + ) + args: list[str] = ["build", "--progress=plain"] + for tag in tags or []: + args.extend(["--tag", tag]) + # buildx documents `--platform` as a comma-separated list (e.g. `linux/amd64,linux/arm64`). + # The underlying flag is a stringArray, so repeating it would also work, but the comma + # form is the canonical invocation shown in all upstream docs. + if platforms: + args.extend(["--platform", ",".join(platforms)]) + if file is not None: + args.extend(["--file", file]) + for key, value in (build_args or {}).items(): + args.extend(["--build-arg", f"{key}={value}"]) + for key, value in (build_contexts or {}).items(): + args.extend(["--build-context", f"{key}={value}"]) + for key, value in (labels or {}).items(): + args.extend(["--label", f"{key}={value}"]) + for annotation in annotations or []: + args.extend(["--annotation", annotation]) + if target is not None: + args.extend(["--target", target]) + if push: + args.append("--push") + if load: + args.append("--load") + for spec in output or []: + args.extend(["--output", spec]) + if no_cache: + args.append("--no-cache") + for stage in no_cache_filter or []: + args.extend(["--no-cache-filter", stage]) + if pull: + args.append("--pull") + for spec in cache_from or []: + args.extend(["--cache-from", spec]) + for spec in cache_to or []: + args.extend(["--cache-to", spec]) + if builder is not None: + args.extend(["--builder", builder]) + if sbom is not None: + args.extend(["--sbom", sbom]) + if provenance is not None: + args.extend(["--provenance", provenance]) + for spec in attest or []: + args.extend(["--attest", spec]) + for spec in secret or []: + args.extend(["--secret", spec]) + for spec in ssh or []: + args.extend(["--ssh", spec]) + args.append(context) + return _run_buildx(args, timeout=timeout_seconds).to_dict() + + +@mcp.tool() +def buildx_bake( + targets: list[str] | None = None, + files: list[str] | None = None, + set_overrides: list[str] | None = None, + push: bool = False, + load: bool = False, + no_cache: bool = False, + pull: bool = False, + builder: str | None = None, + cwd: str | None = None, + timeout_seconds: float = _TIMEOUT_BAKE, +) -> dict: + """ + Build multiple targets defined in a bake file (HCL, JSON, or compose). + + args: + targets: list[str] - Bake targets to build (default: the `default` group) + files: list[str] - Bake file paths (`-f`, repeatable) + set_overrides: list[str] - Per-target overrides, e.g. ["app.platform=linux/amd64"] + push: bool - Push results to the registry + load: bool - Load results into the local image store + no_cache: bool - Do not use cache when building + pull: bool - Always pull a newer base image + builder: str - Override the active builder + cwd: str - Working directory containing the bake file (defaults to the server's cwd) + timeout_seconds: float - Subprocess timeout (default 1800s) + returns: dict - {"returncode": int, "stdout": str, "stderr": str, "truncated": bool} + """ + args: list[str] = ["bake", "--progress=plain"] + for f in files or []: + args.extend(["-f", f]) + for override in set_overrides or []: + args.extend(["--set", override]) + if push: + args.append("--push") + if load: + args.append("--load") + if no_cache: + args.append("--no-cache") + if pull: + args.append("--pull") + if builder is not None: + args.extend(["--builder", builder]) + if targets: + args.extend(targets) + return _run_buildx(args, cwd=cwd, timeout=timeout_seconds).to_dict() + + +@mcp.tool() +def buildx_imagetools_inspect( + image: str, + raw: bool = False, + format: str | None = None, + builder: str | None = None, +) -> dict: + """ + Inspect a manifest in a registry without pulling. + + Replaces `docker manifest inspect`. The standalone `docker manifest` command is in + maintenance mode and lacks support for OCI image indexes, attestations, and + annotations — `buildx imagetools inspect` is the path forward and handles both + single-platform manifests and multi-platform manifest lists / OCI indexes. + + args: + image: str - Image reference, e.g. "alpine:3.19" or "ghcr.io/org/repo@sha256:..." + raw: bool - Return the raw manifest bytes (a JSON document) instead of the + human-rendered tree + format: str - Go template format string (mutually exclusive with `raw`) + builder: str - Override the active builder + returns: dict - {"returncode": int, "stdout": str, "stderr": str, "truncated": bool}. + When `raw=True` or `format="{{json .}}"`, `stdout` is a JSON document + the caller can parse. + """ + if raw and format is not None: + raise ValueError( + "buildx_imagetools_inspect: `raw` and `format` are mutually exclusive — `raw` " + "always emits the unmodified manifest JSON, while `format` runs a Go template " + "against a rendered view. Pick one." + ) + args: list[str] = ["imagetools", "inspect"] + if raw: + args.append("--raw") + if format is not None: + args.extend(["--format", format]) + if builder is not None: + args.extend(["--builder", builder]) + args.append(image) + return _run_buildx(args, timeout=_TIMEOUT_QUERY).to_dict() + + +@mcp.tool() +def buildx_imagetools_create( + target: str, + sources: list[str], + append: bool = False, + dry_run: bool = False, + annotations: list[str] | None = None, + platforms: list[str] | None = None, + files: list[str] | None = None, + builder: str | None = None, + timeout_seconds: float = _TIMEOUT_IMAGETOOLS_CREATE, +) -> dict: + """ + Create a manifest list / OCI image index from existing per-platform tags. + + Replaces `docker manifest create` + `docker manifest push` — `imagetools create` + builds the index and pushes it in one operation. The source tags must already be + pushed to the registry; this command only stitches them together. + + args: + target: str - Tag for the new manifest list (`-t`) + sources: list[str] - Source image references to combine + append: bool - Append to the existing manifest at `target` rather than replacing + dry_run: bool - Print the resulting manifest without pushing + annotations: list[str] - OCI annotations (repeatable; passed verbatim) + platforms: list[str] - Filter source platforms when combining + files: list[str] - Read source descriptors from files instead of refs + builder: str - Override the active builder + timeout_seconds: float - Subprocess timeout (default 600s) + returns: dict - {"returncode": int, "stdout": str, "stderr": str, "truncated": bool} + """ + if not sources and not files: + raise ValueError("buildx_imagetools_create requires at least one source ref or file") + args: list[str] = ["imagetools", "create", "--tag", target] + if append: + args.append("--append") + if dry_run: + args.append("--dry-run") + for annotation in annotations or []: + args.extend(["--annotation", annotation]) + if platforms: + args.extend(["--platform", ",".join(platforms)]) + for f in files or []: + args.extend(["--file", f]) + if builder is not None: + args.extend(["--builder", builder]) + args.extend(sources) + return _run_buildx(args, timeout=timeout_seconds).to_dict() + + +@mcp.tool() +def buildx_ls() -> list: + """ + List builder instances. + + returns: list - One dict per builder (parsed from `--format '{{json .}}'`). + If the captured stdout was truncated by MAX_CLI_OUTPUT_BYTES the + last (likely partial) record is dropped before parsing. + """ + result = _run_buildx(["ls", "--format", "{{json .}}"], timeout=_TIMEOUT_QUERY) + _raise_on_failure(result, "ls") + return _parse_json_lines(result.stdout, truncated=result.truncated, what="buildx ls output") + + +@mcp.tool() +def buildx_inspect(name: str | None = None, bootstrap: bool = False) -> dict: + """ + Inspect a builder instance. + + args: + name: str - Builder name (defaults to the active builder) + bootstrap: bool - Boot the builder if it isn't already running + returns: dict - {"returncode": int, "stdout": str, "stderr": str, "truncated": bool}. + stdout is human-readable; parse with the agent or call buildx_ls for JSON. + """ + args: list[str] = ["inspect"] + if bootstrap: + args.append("--bootstrap") + if name is not None: + args.append(name) + return _run_buildx(args, timeout=_TIMEOUT_QUERY).to_dict() + + +@mcp.tool() +def buildx_du(builder: str | None = None) -> list: + """ + Report BuildKit cache disk usage as a list of records. + + A large cache can easily generate more output than MAX_CLI_OUTPUT_BYTES; if that + happens the captured stdout is truncated and this tool drops the final (partial) + record before parsing. For an exhaustive accounting on a busy builder, run + `docker buildx du --format '{{json .}}'` on the host directly. + + args: builder: str - Override the active builder + returns: list - One dict per cache record (parsed from `--format '{{json .}}'`) + """ + args: list[str] = ["du", "--format", "{{json .}}"] + if builder is not None: + args.extend(["--builder", builder]) + result = _run_buildx(args, timeout=_TIMEOUT_QUERY) + _raise_on_failure(result, "du") + return _parse_json_lines(result.stdout, truncated=result.truncated, what="buildx du output") + + +@mcp.tool() +def buildx_prune( + all: bool = False, + filter: dict | None = None, + keep_storage: str | None = None, + reserved_space: str | None = None, + max_used_space: str | None = None, + min_free_space: str | None = None, + builder: str | None = None, + timeout_seconds: float = _TIMEOUT_PRUNE, +) -> dict: + """ + Remove BuildKit cache entries. + + Destructive: this tool always passes `--force` because no interactive prompt is + available under MCP. Pair with `buildx_du` first to inventory what would be removed. + + args: + all: bool - Include internal/frontend images + filter: dict - Filter values (e.g. {"until": "24h", "type": "exec.cachemount"}) + keep_storage: str - DEPRECATED; older buildx flag. Use `reserved_space` instead. + reserved_space: str - Amount of disk to always keep (e.g. "10GB") + max_used_space: str - Maximum disk space the cache may use (e.g. "20GB") + min_free_space: str - Target amount of free disk after pruning (e.g. "5GB") + builder: str - Override the active builder + timeout_seconds: float - Subprocess timeout (default 600s) + returns: dict - {"returncode": int, "stdout": str, "stderr": str, "truncated": bool} + """ + args: list[str] = ["prune", "--force"] + if all: + args.append("--all") + for key, value in (filter or {}).items(): + args.extend(["--filter", f"{key}={value}"]) + if keep_storage is not None: + args.extend(["--keep-storage", keep_storage]) + if reserved_space is not None: + args.extend(["--reserved-space", reserved_space]) + if max_used_space is not None: + args.extend(["--max-used-space", max_used_space]) + if min_free_space is not None: + args.extend(["--min-free-space", min_free_space]) + if builder is not None: + args.extend(["--builder", builder]) + return _run_buildx(args, timeout=timeout_seconds).to_dict() + + +@mcp.tool() +def buildx_create( + name: str | None = None, + driver: str | None = None, + driver_opts: dict | None = None, + use: bool = False, + bootstrap: bool = False, + platforms: list[str] | None = None, + config: str | None = None, + node_name: str | None = None, + append: bool = False, +) -> dict: + """ + Create a new builder instance. + + args: + name: str - Name for the new builder (defaults to a generated name) + driver: str - BuildKit driver (e.g. "docker-container", "kubernetes", "remote") + driver_opts: dict - Driver-specific options (each becomes `--driver-opt KEY=VALUE`) + use: bool - Set the new builder as the current one + bootstrap: bool - Boot the builder immediately + platforms: list[str] - Platforms the builder advertises + config: str - Path to a buildkitd config file + node_name: str - Node name within the builder (for multi-node builders) + append: bool - Append a node to an existing builder named `name` + returns: dict - {"returncode": int, "stdout": str, "stderr": str, "truncated": bool} + """ + args: list[str] = ["create"] + if driver is not None: + args.extend(["--driver", driver]) + for key, value in (driver_opts or {}).items(): + args.extend(["--driver-opt", f"{key}={value}"]) + if use: + args.append("--use") + if bootstrap: + args.append("--bootstrap") + if platforms: + args.extend(["--platform", ",".join(platforms)]) + if config is not None: + args.extend(["--config", config]) + if node_name is not None: + args.extend(["--node", node_name]) + if append: + args.append("--append") + if name is not None: + args.extend(["--name", name]) + return _run_buildx(args, timeout=_TIMEOUT_QUERY).to_dict() + + +@mcp.tool() +def buildx_use(name: str, default: bool = False, global_default: bool = False) -> dict: + """ + Switch the current builder. + + args: + name: str - Builder name to activate + default: bool - Set as default for this context + global_default: bool - Set as default across all contexts + returns: dict - {"returncode": int, "stdout": str, "stderr": str, "truncated": bool} + """ + args: list[str] = ["use"] + if default: + args.append("--default") + if global_default: + args.append("--global") + args.append(name) + return _run_buildx(args, timeout=_TIMEOUT_QUERY).to_dict() + + +@mcp.tool() +def buildx_rm( + name: str | None = None, + all_inactive: bool = False, + keep_state: bool = False, + keep_daemon: bool = False, + force: bool = False, +) -> dict: + """ + Remove a builder instance. + + args: + name: str - Builder name to remove (mutually exclusive with `all_inactive`) + all_inactive: bool - Remove every inactive builder + keep_state: bool - Keep the BuildKit state volume + keep_daemon: bool - Keep the BuildKit daemon process running + force: bool - Force removal even if the builder is in use + returns: dict - {"returncode": int, "stdout": str, "stderr": str, "truncated": bool} + """ + if not name and not all_inactive: + raise ValueError("buildx_rm requires either `name` or `all_inactive=True`") + if name and all_inactive: + raise ValueError( + "buildx_rm: `name` and `all_inactive=True` are mutually exclusive — pass `name` to " + "remove a specific builder, or `all_inactive=True` to sweep every inactive one." + ) + args: list[str] = ["rm"] + if all_inactive: + args.append("--all-inactive") + if keep_state: + args.append("--keep-state") + if keep_daemon: + args.append("--keep-daemon") + if force: + args.append("--force") + if name is not None: + args.append(name) + return _run_buildx(args, timeout=_TIMEOUT_QUERY).to_dict() diff --git a/tools/prompts.py b/tools/prompts.py index aeac33e..45deb63 100644 --- a/tools/prompts.py +++ b/tools/prompts.py @@ -254,3 +254,192 @@ def find_latest_image_tag(image: str) -> str: f"the user can sanity-check the image's provenance.\n" f"Report the recommended tag, its digest, and the supported platforms. Do not pull the image." ) + + +@mcp.prompt(description="Plan and run a multi-platform image build with buildx.") +def plan_multiarch_build(image: str, platforms: str = "linux/amd64,linux/arm64", context: str = ".") -> str: + """ + Generate a plan for building and pushing a multi-platform image with buildx. + + args: image: str - Target image reference, e.g. "ghcr.io/org/app:v1" + args: platforms: str - Comma-separated platform list (default "linux/amd64,linux/arm64") + args: context: str - Build context path (default ".") + returns: str - A prompt instructing the agent to plan, build, and verify a multi-arch image + """ + platforms_list = ", ".join(f'"{p.strip()}"' for p in platforms.split(",") if p.strip()) + return ( + f"Build and push `{image}` for multiple platforms ({platforms}) using buildx:\n" + f"1. Call `buildx_ls` and confirm a non-`docker` driver is active (the default `docker` driver " + f"cannot do multi-platform; you need `docker-container` or another buildx driver). If only `docker` " + f"is available, call `buildx_create(name='multi', driver='docker-container', use=True, bootstrap=True)`.\n" + f'2. Call `buildx_imagetools_inspect(image="", raw=True)` on each `FROM` reference to ' + f"confirm every base image actually publishes the requested platforms — multi-arch builds silently " + f"fall back to slow QEMU emulation when a platform is missing.\n" + f'3. Call `buildx_build(context="{context}", tags=["{image}"], platforms=[{platforms_list}], ' + f'push=True, provenance="mode=max", sbom="true")` to build, attest, and push in one step. The ' + f"`--load` flag cannot be combined with multi-platform; results live only in the registry.\n" + f'4. After the build, call `buildx_imagetools_inspect(image="{image}", raw=True)` and confirm the ' + f"published manifest list contains every requested platform.\n" + f"Surface any platform that was skipped or built via emulation before declaring success." + ) + + +@mcp.prompt(description="Audit an image's CVE posture with Docker Scout.") +def audit_image_cves(image: str) -> str: + """ + Generate a plan for walking through Scout's CVE reporting for an image. + + args: image: str - Image reference to scan + returns: str - A prompt instructing the agent to scan, prioritize, and report + """ + return ( + f"Audit `{image}` for known vulnerabilities using Docker Scout:\n" + f'1. Call `scout_quickview(image="{image}")` first to get a one-screen summary of total CVE counts ' + f"by severity. Stop here if everything is `0` and the user just needs reassurance.\n" + f'2. Call `scout_cves(image="{image}", only_severity=["critical", "high"], only_fixed=True)` to ' + f"list actionable CVEs (high+critical with a fix available). Ignore lower-severity findings unless " + f"the user asks for them.\n" + f'3. Call `scout_cves(image="{image}", only_severity=["critical", "high"], ignore_base=True)` to ' + f"separate CVEs introduced by the application image from those inherited from the base. CVEs that " + f"only appear in the unfiltered call are base-image issues — the right fix is a base bump, not a " + f"package patch.\n" + f"4. For each remaining CVE, report the package, installed version, fixed version, and CVE ID. " + f"Recommend the smallest patch that addresses the high-priority findings.\n" + f"Note: Scout's most useful data requires `docker login` on the host running this MCP server. If the " + f"output looks sparse, ask the user whether the host is authenticated." + ) + + +@mcp.prompt(description="Compare two image versions and report the CVE delta.") +def compare_image_versions(old_image: str, new_image: str) -> str: + """ + Generate a plan for comparing two image references via Scout. + + args: old_image: str - The baseline image reference + args: new_image: str - The candidate image reference + returns: str - A prompt instructing the agent to compare and report + """ + return ( + f"Compare `{old_image}` against `{new_image}` and report the security delta:\n" + f'1. Call `scout_compare(image="{new_image}", to="{old_image}", ignore_unchanged=True, ' + f'only_severity=["critical", "high"])` to get the CVE diff filtered to actionable severities.\n' + f"2. Categorize the diff into:\n" + f" - Resolved CVEs (present in old, absent in new)\n" + f" - New CVEs (absent in old, present in new) — these are regressions worth flagging\n" + f" - Carried-forward CVEs (unchanged)\n" + f"3. If there are new high/critical CVEs in the candidate, recommend whether to proceed, hold, " + f"or wait for a base-image refresh. Use `scout_recommendations` to check whether a different " + f"base tag would resolve them.\n" + f"Render the result as a short table; stop and ask before any rebuild or rollback." + ) + + +@mcp.prompt(description="Recommend a safer base image via Docker Scout.") +def recommend_base_image(image: str) -> str: + """ + Generate a plan for picking a better base image using Scout. + + args: image: str - Image reference whose base should be reviewed + returns: str - A prompt instructing the agent to fetch and present recommendations + """ + return ( + f"Recommend a safer base image for `{image}`:\n" + f'1. Call `scout_recommendations(image="{image}")` to fetch Scout\'s base-image suggestions. ' + f"Distinguish `refresh` recommendations (same major/minor, newer patches) from `update` " + f"recommendations (a different major/minor release).\n" + f'2. For each viable candidate base, call `scout_compare(image=, to="{image}", ' + f'only_severity=["critical", "high"])` to confirm it actually resolves more CVEs than it ' + f"introduces. A refresh that fixes 3 highs and introduces 4 is not progress.\n" + f"3. Verify the candidate exists on the registry and supports the platforms you build for: call " + f"`buildx_imagetools_inspect(image=, raw=True)` (which accepts a full ref like " + f"`python:3.13-slim`) and check the platforms list in the returned manifest. Avoid " + f"`registry_inspect_manifest` here — its `image` argument strips any `:tag`/`@digest`, so a full " + f"candidate ref would need to be split into separate `image` and `reference` arguments.\n" + f"Report the recommended base, the CVEs it resolves, the CVEs it introduces (if any), and the " + f"single-line Dockerfile change required. Do not modify any Dockerfile." + ) + + +@mcp.prompt(description="Inspect a multi-arch manifest list / OCI image index without pulling.") +def inspect_multiarch_manifest(image: str) -> str: + """ + Generate a plan for inspecting an image's manifest list. + + Use this when reaching for `docker manifest inspect` — that command is in maintenance mode + and lacks support for OCI image indexes and attestations. `buildx_imagetools_inspect` is + the path forward. + + args: image: str - Image reference (tag or digest), e.g. "alpine:3.19" + returns: str - A prompt instructing the agent to inspect and interpret the manifest + """ + return ( + f"Inspect the manifest for `{image}` without pulling it:\n" + f'1. Call `buildx_imagetools_inspect(image="{image}", raw=True)` to fetch the raw manifest JSON. ' + f"This replaces `docker manifest inspect` and handles both single-platform manifests and " + f"multi-platform manifest lists / OCI image indexes.\n" + f"2. Identify the response shape:\n" + f" - `application/vnd.oci.image.manifest.v1+json` or `…/docker.distribution.manifest.v2+json` " + f"=> single-platform image; report the architecture, OS, and layer count.\n" + f" - `application/vnd.oci.image.index.v1+json` or `…/docker.distribution.manifest.list.v2+json` " + f"=> multi-platform index; report each entry's platform and digest.\n" + f"3. If the index also lists `attestation-manifest` entries (provenance / SBOM), call " + f"`buildx_imagetools_inspect` again on each attestation digest to surface those payloads.\n" + f"Render the result as a single table; do not pull or modify the image." + ) + + +@mcp.prompt(description="Create a multi-arch manifest list from existing per-platform tags.") +def create_multiarch_manifest(target_tag: str, source_tags: str) -> str: + """ + Generate a plan for stitching per-platform tags into a manifest list. + + Use this when reaching for `docker manifest create` + `docker manifest push` — + `buildx_imagetools_create` does both in one step and handles OCI image indexes. + + args: target_tag: str - The new combined tag, e.g. "org/app:v1" + args: source_tags: str - Comma-separated source tags (each must already be pushed), + e.g. "org/app:v1-amd64,org/app:v1-arm64" + returns: str - A prompt instructing the agent to create and verify the manifest list + """ + source_list = ", ".join(f'"{s.strip()}"' for s in source_tags.split(",") if s.strip()) + return ( + f"Create the manifest list `{target_tag}` from {source_tags}:\n" + f"1. Confirm each source tag is already pushed to the registry by calling " + f"`buildx_imagetools_inspect` on each one — `imagetools create` only stitches; it cannot upload " + f"missing image layers.\n" + f'2. Call `buildx_imagetools_create(target="{target_tag}", sources=[{source_list}], dry_run=True)` ' + f"first to print the resulting manifest without pushing. Show the user which platforms will be " + f"published under the combined tag.\n" + f"3. After the user approves, repeat without `dry_run` to actually push. This replaces the " + f"`docker manifest create && docker manifest push` pair in one operation.\n" + f'4. Verify with `buildx_imagetools_inspect(image="{target_tag}", raw=True)` that the published ' + f"index contains every expected platform.\n" + f"Report the digest of the combined manifest at the end." + ) + + +@mcp.prompt(description="Translate `docker manifest …` commands into buildx imagetools equivalents.") +def migrate_from_docker_manifest() -> str: + """ + Generate a reference table mapping each `docker manifest` subcommand to its + buildx imagetools replacement. The standalone `docker manifest` command is in + maintenance mode and lacks support for OCI image indexes, attestations, and + annotations. + + returns: str - A prompt the agent can hand to the user as a migration cheat-sheet + """ + return ( + "`docker manifest` is in maintenance mode. Use `buildx imagetools` for new work — it supports OCI " + "image indexes, attestations, and richer annotations.\n\n" + "Mapping:\n\n" + "| `docker manifest …` | This MCP server |\n" + "|--------------------------------------|------------------------------------------|\n" + "| `inspect REF` | `buildx_imagetools_inspect(image=REF)` |\n" + "| `inspect --verbose REF` | `buildx_imagetools_inspect(image=REF, raw=True)` |\n" + "| `create NEW SRC…` + `push NEW` | `buildx_imagetools_create(target=NEW, sources=[SRC…])` (push is implicit) |\n" + "| `create --amend NEW SRC…` | `buildx_imagetools_create(target=NEW, sources=[SRC…], append=True)` |\n" + "| `annotate NEW SRC --os/--arch/--variant` | `buildx_imagetools_create(target=NEW, sources=[SRC…], annotations=[…])` (re-create from sources) |\n" + "| `push NEW` | Not needed — `buildx_imagetools_create` pushes |\n" + "| `rm NEW` | Not needed — `buildx_imagetools_create` overwrites |\n" + "\nWhen in doubt, run `buildx_imagetools_inspect(image=REF, raw=True)` first to see the current shape." + ) diff --git a/tools/registry.py b/tools/registry.py index df3e4ff..19912e1 100644 --- a/tools/registry.py +++ b/tools/registry.py @@ -4,8 +4,11 @@ # they work without a running daemon and without the docker CLI. Anonymous # (unauthenticated) access is used unless a username/password is supplied. +import datetime +import email.utils import re -from typing import Any +import time +from typing import Any, NoReturn import httpx @@ -17,6 +20,16 @@ _DEFAULT_REGISTRY = "registry-1.docker.io" _MAX_TAG_PAGES = 50 # cap on registry/Hub pagination follow-through +# 429 rate-limit policy: if the registry tells us to wait this many seconds or less, +# we sleep and transparently retry once. Anything longer is surfaced to the caller +# so an agent / human can decide whether to back off rather than blocking inside a tool. +_RATE_LIMIT_RETRY_THRESHOLD_SECONDS = 10.0 + +# Errors emitted by email.utils.parsedate_to_datetime for non-date input. Bound to a +# module-level tuple so ruff format leaves the `except` form alone — PEP 758 makes the +# parentheses optional on Python 3.14, but we keep them for clarity to review bots. +_RETRY_AFTER_PARSE_ERRORS: tuple[type[BaseException], ...] = (TypeError, ValueError) + # Manifest media types we accept when inspecting a reference. The order matters: # clients with no preference get a manifest list first when one exists. _MANIFEST_ACCEPT = ", ".join( @@ -105,6 +118,88 @@ def _get_bearer_token( return token +def _parse_retry_after(value: str | None) -> float | None: + """ + Decode a `Retry-After` header (RFC 7231). + + The value is either an integer number of seconds (``"30"``) or an HTTP-date + (``"Wed, 21 Oct 2026 07:28:00 GMT"``). Returns the delay in seconds, or None + if the header is missing or unparseable. + """ + if not value: + return None + value = value.strip() + try: + return max(0.0, float(value)) + except ValueError: + pass + try: + parsed = email.utils.parsedate_to_datetime(value) + except _RETRY_AFTER_PARSE_ERRORS: + return None + if parsed is None: + return None + # RFC 7231 / 9110 mandates that HTTP-dates are UTC. `parsedate_to_datetime` returns + # a naive datetime when the source said `-0000` (and only then); naive `.timestamp()` + # would re-interpret in local time and skew the delay. Force UTC explicitly. + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=datetime.timezone.utc) + delta = parsed.timestamp() - time.time() + return max(0.0, delta) + + +# Hosts where Docker Hub's documented anonymous-pull cap applies. Used to tailor the +# 429 error message — every other registry (GHCR, ECR, GAR, Quay, self-hosted, …) +# enforces its own limits with different remedies, so a Hub-specific hint there is +# more misleading than helpful. +_DOCKER_HUB_HOSTS = frozenset({"registry-1.docker.io", "index.docker.io", "hub.docker.com"}) + + +def _raise_rate_limited(resp: httpx.Response, url: str) -> NoReturn: + retry_after = _parse_retry_after(resp.headers.get("Retry-After")) + suffix = f"; retry after ~{retry_after:.0f}s" if retry_after is not None else "" + parsed_host = httpx.URL(url).host + if parsed_host in _DOCKER_HUB_HOSTS: + guidance = ( + " Docker Hub caps anonymous pulls at ~100 requests / 6h per IP — " + "authenticate with `docker login` (for SDK-backed tools) or pass " + "`username`/`password` to `registry_list_tags` to raise the limit." + ) + else: + guidance = ( + " Consult the target registry's rate-limit policy; most registries raise the " + "limit substantially once you authenticate with `username`/`password`." + ) + raise RuntimeError(f"Registry rate-limited (HTTP 429) for {url}{suffix}.{guidance}") + + +def _get_with_429_policy( + client: httpx.Client, + url: str, + *, + headers: dict[str, str], + params: dict[str, str] | None = None, +) -> httpx.Response: + """ + Single GET that applies the project's 429 retry policy. + + - On HTTP 429 with `Retry-After <= 10s`: sleep + retry once. + - On HTTP 429 with no Retry-After, or a longer delay, or a second 429: raise. + - Other status codes are returned as-is for the caller to handle. + """ + resp = client.get(url, headers=headers, params=params) + if resp.status_code != 429: + return resp + retry_after = _parse_retry_after(resp.headers.get("Retry-After")) + if retry_after is None or retry_after > _RATE_LIMIT_RETRY_THRESHOLD_SECONDS: + _raise_rate_limited(resp, url) + time.sleep(retry_after) + resp = client.get(url, headers=headers, params=params) + if resp.status_code == 429: + _raise_rate_limited(resp, url) + return resp + + def _registry_get( registry: str, path: str, @@ -114,20 +209,20 @@ def _registry_get( accept: str | None = None, timeout: float, ) -> httpx.Response: - """GET https:///, transparently handling a Bearer 401 challenge.""" + """GET https:///, transparently handling a Bearer 401 challenge and 429 rate limits.""" url = f"https://{registry}{path}" headers: dict[str, str] = {"User-Agent": _USER_AGENT} if accept: headers["Accept"] = accept with httpx.Client(timeout=timeout, follow_redirects=True) as client: - resp = client.get(url, headers=headers) + resp = _get_with_429_policy(client, url, headers=headers) if resp.status_code == 401: challenge = _parse_bearer_challenge(resp.headers.get("WWW-Authenticate", "")) if not challenge: resp.raise_for_status() token = _get_bearer_token(client, challenge, username=username, password=password) headers["Authorization"] = f"Bearer {token}" - resp = client.get(url, headers=headers) + resp = _get_with_429_policy(client, url, headers=headers) resp.raise_for_status() return resp @@ -286,7 +381,7 @@ def hub_list_tags(repository: str, limit: int = 100) -> dict: pages = 0 with httpx.Client(timeout=_DEFAULT_TIMEOUT, follow_redirects=True) as client: while url and pages < _MAX_TAG_PAGES: - resp = client.get(url, headers={"User-Agent": _USER_AGENT}) + resp = _get_with_429_policy(client, url, headers={"User-Agent": _USER_AGENT}) resp.raise_for_status() body = resp.json() for entry in body.get("results", []) or []: @@ -321,11 +416,8 @@ def hub_repo_info(repository: str) -> dict: pull_count, last_updated, is_private, etc.) """ repo = _hub_normalize(repository) - resp = httpx.get( - f"{_HUB_API_BASE}/repositories/{repo}/", - timeout=_DEFAULT_TIMEOUT, - headers={"User-Agent": _USER_AGENT}, - follow_redirects=True, - ) + url = f"{_HUB_API_BASE}/repositories/{repo}/" + with httpx.Client(timeout=_DEFAULT_TIMEOUT, follow_redirects=True) as client: + resp = _get_with_429_policy(client, url, headers={"User-Agent": _USER_AGENT}) resp.raise_for_status() return resp.json() diff --git a/tools/resources.py b/tools/resources.py index 7da7eae..dbebf97 100644 --- a/tools/resources.py +++ b/tools/resources.py @@ -36,6 +36,11 @@ "registry-api": "https://distribution.github.io/distribution/spec/api/", "oci-distribution-spec": "https://github.com/opencontainers/distribution-spec/blob/main/spec.md", "hub-api": "https://docs.docker.com/reference/api/hub/latest/", + "buildx": "https://docs.docker.com/build/builders/", + "buildx-cli": "https://docs.docker.com/reference/cli/docker/buildx/", + "buildx-bake": "https://docs.docker.com/build/bake/reference/", + "scout": "https://docs.docker.com/scout/", + "scout-cli": "https://docs.docker.com/reference/cli/docker/scout/", } diff --git a/tools/scout.py b/tools/scout.py new file mode 100644 index 0000000..6d6a94f --- /dev/null +++ b/tools/scout.py @@ -0,0 +1,221 @@ +# library of mcp tools for `docker scout`. +# +# Scout is a CLI plugin that talks to Docker's vulnerability database. Most operations +# require `docker login` against Docker Hub to fetch policy data and per-image scans; +# anonymous calls work for basic CVE listing on public images but degrade for the +# `recommendations` and policy-related subcommands. + +import json + +from server import mcp +from tools._cli import CliResult, require_plugin, run_docker + +# Scout calls are CDN-backed network queries; 5 minutes is plenty for any one image. +_TIMEOUT_SCOUT = 300.0 + + +def _run_scout(args: list[str], *, timeout: float = _TIMEOUT_SCOUT) -> CliResult: + require_plugin("scout") + return run_docker(["scout", *args], timeout=timeout) + + +def _maybe_parse_json(text: str, format: str) -> dict | list | str | None: + """Parse `text` as JSON when `format=='json'`, otherwise return the raw text.""" + if format != "json": + return text + stripped = text.strip() + if not stripped: + return None + try: + return json.loads(stripped) + except json.JSONDecodeError: + return text + + +@mcp.tool() +def scout_cves( + image: str, + only_fixed: bool = False, + only_severity: list[str] | None = None, + ignore_base: bool = False, + format: str = "json", + platform: str | None = None, +) -> dict: + """ + List vulnerabilities (CVEs) in an image via Docker Scout. + + Anonymous scans work for public images, but Hub policy enforcement and richer + recommendations require `docker login` on the host running this MCP server. + + args: + image: str - Image reference (a tag or a digest) + only_fixed: bool - Only report CVEs with a fixed version available + only_severity: list[str] - Filter to these severities. Accepted values: + "critical", "high", "medium", "low", "unspecified" + ignore_base: bool - Exclude CVEs introduced by the base image + format: str - Output format: "json" (default; parsed into the return dict), + "sarif", "spdx", "list", "markdown", or "text" + platform: str - Platform of the image to analyze, e.g. "linux/amd64" + returns: dict - {"format": , "result": , + "raw": } + """ + args: list[str] = ["cves", "--format", format] + if only_fixed: + args.append("--only-fixed") + if only_severity: + args.extend(["--only-severity", ",".join(only_severity)]) + if ignore_base: + args.append("--ignore-base") + if platform is not None: + args.extend(["--platform", platform]) + args.append(image) + result = _run_scout(args) + return {"format": format, "result": _maybe_parse_json(result.stdout, format), "raw": result.to_dict()} + + +@mcp.tool() +def scout_quickview(image: str, format: str = "json", platform: str | None = None) -> dict: + """ + Render a compact summary of an image's CVE posture. + + args: + image: str - Image reference + format: str - Output format: "json" (default) or "text" + platform: str - Platform of the image to analyze, e.g. "linux/amd64" + returns: dict - {"format": , "result": , + "raw": } + """ + args: list[str] = ["quickview", "--format", format] + if platform is not None: + args.extend(["--platform", platform]) + args.append(image) + result = _run_scout(args) + return {"format": format, "result": _maybe_parse_json(result.stdout, format), "raw": result.to_dict()} + + +@mcp.tool() +def scout_recommendations( + image: str, + only_refresh: bool = False, + only_update: bool = False, + tag: str | None = None, + format: str = "json", + platform: str | None = None, +) -> dict: + """ + Suggest base-image upgrades for an image. + + Recommendations are computed against Docker Scout's catalog and generally require + `docker login` on the host running this MCP server to return useful results for + private or rarely-scanned base images. + + args: + image: str - Image reference + only_refresh: bool - Only show "refresh" recommendations (same major/minor) + only_update: bool - Only show "update" recommendations (newer minor/major) + tag: str - Restrict to suggestions matching this tag pattern + format: str - Output format: "json" (default) or "text" + platform: str - Platform of the image to analyze + returns: dict - {"format": , "result": , + "raw": } + """ + args: list[str] = ["recommendations", "--format", format] + if only_refresh: + args.append("--only-refresh") + if only_update: + args.append("--only-update") + if tag is not None: + args.extend(["--tag", tag]) + if platform is not None: + args.extend(["--platform", platform]) + args.append(image) + result = _run_scout(args) + return {"format": format, "result": _maybe_parse_json(result.stdout, format), "raw": result.to_dict()} + + +@mcp.tool() +def scout_compare( + image: str, + to: str | None = None, + to_env: str | None = None, + to_latest: bool = False, + only_severity: list[str] | None = None, + ignore_unchanged: bool = False, + format: str = "json", + platform: str | None = None, +) -> dict: + """ + Compare two image references and report the CVE delta. + + Exactly one of `to`, `to_env`, or `to_latest=True` must be supplied to identify + the comparison target. + + args: + image: str - The new / candidate image reference + to: str - Compare against this image reference, directory, or archive + to_env: str - Compare against an image associated with this Scout environment + to_latest: bool - Compare against the latest scan of `image` + only_severity: list[str] - Filter to these severities + ("critical", "high", "medium", "low", "unspecified") + ignore_unchanged: bool - Exclude unchanged packages from the diff + format: str - Output format: "json" (default), "markdown", or "text" + platform: str - Platform of the image to analyze + returns: dict - {"format": , "result": , + "raw": } + """ + targets = [bool(to), bool(to_env), bool(to_latest)] + if sum(targets) != 1: + raise ValueError("scout_compare requires exactly one of `to`, `to_env`, or `to_latest=True`") + args: list[str] = ["compare", "--format", format] + if to is not None: + args.extend(["--to", to]) + if to_env is not None: + args.extend(["--to-env", to_env]) + if to_latest: + args.append("--to-latest") + if only_severity: + args.extend(["--only-severity", ",".join(only_severity)]) + if ignore_unchanged: + args.append("--ignore-unchanged") + if platform is not None: + args.extend(["--platform", platform]) + args.append(image) + result = _run_scout(args) + return {"format": format, "result": _maybe_parse_json(result.stdout, format), "raw": result.to_dict()} + + +@mcp.tool() +def scout_sbom( + image: str, + format: str = "spdx", + platform: str | None = None, +) -> dict: + """ + Generate a Software Bill of Materials (SBOM) for an image. + + SBOMs can be large; the captured stdout is subject to the standard MAX_CLI_OUTPUT_BYTES + cap and may be flagged as truncated for very large images. If that's a concern, run + `docker scout sbom -o file.json …` on the host directly and load the file separately. + + args: + image: str - Image reference + format: str - SBOM format. Accepted values (per `docker scout sbom --format`): + - "spdx" (the default for this tool) — SPDX JSON + - "cyclonedx" — CycloneDX JSON + - "json" — Scout's native SBOM JSON (the CLI's own default) + - "list" — a plain-text list of packages, no schema + platform: str - Platform of the image to analyze + returns: dict - {"format": , "result": <…>, "raw": }. + `result` is a parsed dict when `format` is one of "spdx", "cyclonedx", + or "json" (all JSON serializations) and the stdout parses cleanly; + when `format="list"` or the JSON fails to parse, `result` is the raw text. + """ + args: list[str] = ["sbom", "--format", format] + if platform is not None: + args.extend(["--platform", platform]) + args.append(image) + result = _run_scout(args) + # SPDX and CycloneDX are both JSON; the cyclonedx-xml variant returns XML. + parse_as_json = format in {"spdx", "cyclonedx", "json"} + parsed = _maybe_parse_json(result.stdout, "json") if parse_as_json else result.stdout + return {"format": format, "result": parsed, "raw": result.to_dict()}