Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions src/mcp/client/experimental/ai_catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Ingest AI Catalogs.

WARNING: These APIs are experimental and may change without notice.

A client discovers the AI artifacts a host advertises by fetching its catalog
from the well-known location::

from mcp.client.experimental.ai_catalog import fetch_ai_catalog, well_known_ai_catalog_url

catalog = await fetch_ai_catalog(well_known_ai_catalog_url("https://dice.example.com"))
for entry in catalog.entries:
print(entry.identifier, entry.media_type, entry.url)

For the MCP-specific flow — fetch the catalog and the Server Cards it
advertises in one call — see
``mcp.client.experimental.server_card.discover_server_cards``.
"""

from __future__ import annotations

from urllib.parse import urljoin, urlsplit

import httpx

from mcp.shared._httpx_utils import create_mcp_http_client
from mcp.shared.experimental.ai_catalog.types import (
AI_CATALOG_MEDIA_TYPE,
AI_CATALOG_WELL_KNOWN_PATH,
AICatalog,
)

__all__ = ["well_known_ai_catalog_url", "fetch_ai_catalog"]


def well_known_ai_catalog_url(url: str, *, well_known_path: str = AI_CATALOG_WELL_KNOWN_PATH) -> str:
"""Resolve the well-known AI Catalog URL for a server's origin.

Accepts either a bare origin (``https://example.com``) or any URL on the
server (e.g. its ``/mcp`` endpoint); the catalog lives at the host root.

Raises:
ValueError: If ``url`` is not an absolute http(s) URL.
"""
parts = urlsplit(url)
if parts.scheme not in ("http", "https") or not parts.netloc:
raise ValueError(f"Expected an absolute http(s) URL, got {url!r}")
return urljoin(f"{parts.scheme}://{parts.netloc}", well_known_path)


async def fetch_ai_catalog(url: str, *, http_client: httpx.AsyncClient | None = None) -> AICatalog:
"""Fetch and validate the AI Catalog at ``url``.

``url`` is fetched as-is — catalogs are location-independent; use
:func:`well_known_ai_catalog_url` to resolve a host's conventional
location. Pass an existing ``http_client`` to reuse connection pooling /
auth, otherwise a short-lived client with MCP defaults is used.

Raises:
httpx.HTTPError: If the request fails or returns a non-2xx status.
pydantic.ValidationError: If the document is not a valid AI Catalog.
"""
if http_client is None:
async with create_mcp_http_client() as client:
return await fetch_ai_catalog(url, http_client=client)
response = await http_client.get(url, headers={"Accept": f"{AI_CATALOG_MEDIA_TYPE}, application/json"})
response.raise_for_status()
return AICatalog.model_validate(response.json())

Check warning on line 67 in src/mcp/client/experimental/ai_catalog.py

View check run for this annotation

Claude / Claude Code Review

fetch_* Raises sections omit json.JSONDecodeError from response.json()

The `Raises:` sections of `fetch_ai_catalog` (and `fetch_server_card` / `discover_server_cards` in `server_card.py`) only list `httpx.HTTPError` and `pydantic.ValidationError`, but `response.json()` raises `json.JSONDecodeError` when a 2xx response carries a non-JSON body — a realistic outcome when probing well-known paths on arbitrary hosts (e.g. a 200 + HTML index page). Consider adding `json.JSONDecodeError` to the `Raises:` sections (as `load_server_card` already does) or wrapping it.
Comment on lines +50 to +67
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 The Raises: sections of fetch_ai_catalog (and fetch_server_card / discover_server_cards in server_card.py) only list httpx.HTTPError and pydantic.ValidationError, but response.json() raises json.JSONDecodeError when a 2xx response carries a non-JSON body — a realistic outcome when probing well-known paths on arbitrary hosts (e.g. a 200 + HTML index page). Consider adding json.JSONDecodeError to the Raises: sections (as load_server_card already does) or wrapping it.

Extended reasoning...

What the gap is. fetch_ai_catalog ends with return AICatalog.model_validate(response.json()). httpx.Response.json() delegates to json.loads and does not wrap decode failures, so if the response body is not valid JSON the call raises json.JSONDecodeError straight out of the function. The docstring's Raises: section only documents httpx.HTTPError and pydantic.ValidationError. The same pattern (and the same omission) exists in fetch_server_card and, transitively, discover_server_cards in src/mcp/client/experimental/server_card.py.

Why this path is realistic. These functions are explicitly designed for probing well-known paths on arbitrary hosts (/.well-known/ai-catalog.json, falling back to /.well-known/mcp/catalog.json). A very common failure mode for such probes is a server that answers unknown paths with 200 + an HTML page — SPAs with catch-all routes, reverse proxies serving an index page, captive portals, misconfigured CDNs. In that case raise_for_status() passes (it's a 2xx), and the next line, response.json(), raises json.JSONDecodeError, which the caller has no documented reason to expect.

Step-by-step example.

  1. A client calls await discover_server_cards("https://intranet.example.com").
  2. well_known_ai_catalog_url resolves to https://intranet.example.com/.well-known/ai-catalog.json.
  3. The host is fronted by an SPA / reverse proxy that returns 200 OK with <!DOCTYPE html>... for any unknown path.
  4. response.raise_for_status() succeeds (status is 200), so the documented httpx.HTTPError path is not taken.
  5. response.json() calls json.loads("<!DOCTYPE html>...")json.JSONDecodeError: Expecting value: line 1 column 1 (char 0) propagates out of fetch_ai_catalog / discover_server_cards.
  6. A caller that wrapped the call in except (httpx.HTTPError, pydantic.ValidationError) — exactly what the docstring tells them to expect — does not catch it and crashes.

Why nothing else prevents it. Nothing between raise_for_status() and model_validate() checks the Content-Type or guards the decode, and pydantic.ValidationError is only reached after a successful JSON parse. This is also internally inconsistent within the PR: load_server_card in the same module does document json.JSONDecodeError: If the file is not valid JSON in its Raises: section, and the repo's AGENTS.md asks that public APIs document exceptions a caller would reasonably catch — a non-JSON body from an untrusted remote host is at least as likely as a non-JSON local file.

Impact and fix. This is a documentation-completeness issue, not a runtime bug — the exception still propagates and is debuggable — so it shouldn't block the PR. The smallest fix is to add json.JSONDecodeError: If the response body is not valid JSON. to the Raises: sections of fetch_ai_catalog, fetch_server_card, and discover_server_cards. Alternatively, wrap the decode (e.g. catch json.JSONDecodeError and re-raise as a ValidationError-style "document is not a valid AI Catalog / Server Card" error), which would make the existing two documented exceptions exhaustive.

122 changes: 122 additions & 0 deletions src/mcp/client/experimental/server_card.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"""Ingest MCP Server Cards (SEP-2127).

WARNING: These APIs are experimental and may change without notice.

A client discovers how to connect to the servers a host advertises by
fetching its AI Catalog and the Server Cards the catalog references::

from mcp.client.experimental.server_card import discover_server_cards

for card in await discover_server_cards("https://dice.example.com"):
for remote in card.remotes or []:
print(remote.type, remote.url, remote.supported_protocol_versions)

Returned :class:`ServerCard` objects are validated; malformed documents raise
``pydantic.ValidationError``. Ingestion is deliberately lenient about a
missing ``$schema`` key — see ``ServerCard.schema_uri``.
"""

from __future__ import annotations

import json
from pathlib import Path
from urllib.parse import urljoin, urlsplit

import httpx

from mcp.client.experimental.ai_catalog import fetch_ai_catalog, well_known_ai_catalog_url
from mcp.shared._httpx_utils import create_mcp_http_client
from mcp.shared.experimental.ai_catalog.types import (
MCP_CATALOG_WELL_KNOWN_PATH,
MCP_SERVER_CARD_MEDIA_TYPE,
)
from mcp.shared.experimental.server_card.types import ServerCard

__all__ = ["fetch_server_card", "load_server_card", "discover_server_cards"]

# The MCP discovery extension and the AI Catalog specification currently name
# the Server Card media type differently; accept either when filtering.
_SERVER_CARD_MEDIA_TYPES = frozenset({MCP_SERVER_CARD_MEDIA_TYPE, "application/mcp-server-card+json"})


async def fetch_server_card(url: str, *, http_client: httpx.AsyncClient | None = None) -> ServerCard:
"""Fetch and validate the Server Card at ``url``.

``url`` is the card's location, typically taken from an AI Catalog
entry's ``url``. Pass an existing ``http_client`` to reuse connection
pooling / auth, otherwise a short-lived client with MCP defaults is used.

Raises:
httpx.HTTPError: If the request fails or returns a non-2xx status.
pydantic.ValidationError: If the document is not a valid Server Card.
"""
if http_client is None:
async with create_mcp_http_client() as client:
return await fetch_server_card(url, http_client=client)
response = await http_client.get(url, headers={"Accept": f"{MCP_SERVER_CARD_MEDIA_TYPE}, application/json"})
response.raise_for_status()
return ServerCard.model_validate(response.json())


async def discover_server_cards(url: str, *, http_client: httpx.AsyncClient | None = None) -> list[ServerCard]:
"""Discover the MCP servers advertised by the host of ``url``.

Fetches the host's AI Catalog from ``/.well-known/ai-catalog.json``
(falling back to the transitional ``/.well-known/mcp/catalog.json`` on a
404), then validates the Server Card of every MCP server entry — fetched
from the entry's ``url`` or read from its inline ``data``. Entries with
other media types are ignored.

Card URLs are taken from the fetched catalog and may point anywhere,
including other domains. Non-http(s) card URLs are rejected; beyond that,
applications discovering hosts they don't trust should pass an
``http_client`` that enforces their network policy (e.g. rejecting
private address ranges or capping redirects) — the SDK imposes none
because loopback and intranet servers are legitimate discovery targets.

Raises:
ValueError: If ``url`` is not an absolute http(s) URL, or the catalog
references a card at a non-http(s) URL.
httpx.HTTPError: If a request fails or returns a non-2xx status.
pydantic.ValidationError: If the catalog or a referenced card is invalid.
"""
if http_client is None:
async with create_mcp_http_client() as client:
return await discover_server_cards(url, http_client=client)

catalog_url = well_known_ai_catalog_url(url)
try:
catalog = await fetch_ai_catalog(catalog_url, http_client=http_client)
except httpx.HTTPStatusError as exc:
if exc.response.status_code != 404:
raise
catalog_url = well_known_ai_catalog_url(url, well_known_path=MCP_CATALOG_WELL_KNOWN_PATH)
catalog = await fetch_ai_catalog(catalog_url, http_client=http_client)

cards: list[ServerCard] = []
for entry in catalog.entries:
if entry.media_type not in _SERVER_CARD_MEDIA_TYPES:
continue
if entry.url is not None:
# Entry URLs are usually absolute; resolve relative ones against
# the catalog's location. The catalog is remote input — never
# follow it to a non-http(s) scheme.
card_url = urljoin(catalog_url, entry.url)
if urlsplit(card_url).scheme not in ("http", "https"):
raise ValueError(f"catalog entry {entry.identifier!r} has a non-http(s) card URL: {card_url!r}")
cards.append(await fetch_server_card(card_url, http_client=http_client))
else:
cards.append(ServerCard.model_validate(entry.data))
return cards


def load_server_card(path: str | Path) -> ServerCard:
"""Load and validate a Server Card from a JSON file.

Raises:
OSError: If the file cannot be read.
json.JSONDecodeError: If the file is not valid JSON.
pydantic.ValidationError: If the document is not a valid Server Card.
"""
text = Path(path).read_text(encoding="utf-8")
return ServerCard.model_validate(json.loads(text))
92 changes: 92 additions & 0 deletions src/mcp/server/experimental/ai_catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""Generate and serve AI Catalogs.

WARNING: These APIs are experimental and may change without notice.

A server author advertises their MCP server by serving an AI Catalog from the
well-known path, with an entry pointing at the server's Server Card::

from mcp.server.experimental.ai_catalog import mount_ai_catalog, server_card_entry
from mcp.server.experimental.server_card import build_server_card, mount_server_card
from mcp.shared.experimental.ai_catalog import AICatalog

card = build_server_card(server, name="io.modelcontextprotocol.examples/dice-roller")

app = server.streamable_http_app()
mount_server_card(app, card, path="/server-card.json")
catalog = AICatalog(entries=[server_card_entry(card, "https://dice.example.com/server-card.json")])
mount_ai_catalog(app, catalog) # GET /.well-known/ai-catalog.json

To write a catalog to a file instead, serialize it with
``catalog.model_dump_json(by_alias=True, exclude_none=True)``.
"""

from __future__ import annotations

from starlette.applications import Starlette
from starlette.requests import Request
from starlette.responses import Response
from starlette.routing import Route

from mcp.shared.experimental.ai_catalog.types import (
AI_CATALOG_MEDIA_TYPE,
AI_CATALOG_WELL_KNOWN_PATH,
MCP_SERVER_CARD_MEDIA_TYPE,
MCP_SERVER_URN_PREFIX,
AICatalog,
CatalogEntry,
)
from mcp.shared.experimental.server_card.types import ServerCard

__all__ = ["DISCOVERY_HEADERS", "server_card_entry", "ai_catalog_route", "mount_ai_catalog"]

#: Response headers for discovery endpoints (catalogs and the artifacts they
#: reference). Browser-based clients must be able to read them: the discovery
#: spec makes the CORS headers a MUST and the caching header a SHOULD.
DISCOVERY_HEADERS = {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "GET",
"Access-Control-Allow-Headers": "Content-Type",
"Cache-Control": "public, max-age=3600",
}


def server_card_entry(card: ServerCard, url: str) -> CatalogEntry:
"""Build the catalog entry advertising ``card``, served at ``url``.

The entry's identifier is derived from the card's ``name`` per the MCP
discovery extension (``urn:mcp:server:<name>``); display name, description
and version are taken from the card. ``url`` should be the absolute URL
the card is retrievable from, since catalogs may be fetched cross-domain.
"""
return CatalogEntry(
identifier=f"{MCP_SERVER_URN_PREFIX}{card.name}",
display_name=card.title or card.name,
media_type=MCP_SERVER_CARD_MEDIA_TYPE,
url=url,
description=card.description,
version=card.version,
)


def ai_catalog_route(catalog: AICatalog, *, path: str = AI_CATALOG_WELL_KNOWN_PATH) -> Route:
"""Build a Starlette GET route that serves ``catalog`` at ``path``.

Add it to a new app — ``Starlette(routes=[ai_catalog_route(catalog)])`` —
or an existing one via :func:`mount_ai_catalog`. The payload is serialized
once and served with the CORS and caching headers discovery requires.
"""
body = catalog.model_dump_json(by_alias=True, exclude_none=True).encode()

async def endpoint(_request: Request) -> Response:
return Response(body, media_type=AI_CATALOG_MEDIA_TYPE, headers=DISCOVERY_HEADERS)

return Route(path, endpoint=endpoint, methods=["GET"], name="ai_catalog")


def mount_ai_catalog(app: Starlette, catalog: AICatalog, *, path: str = AI_CATALOG_WELL_KNOWN_PATH) -> None:
"""Attach an AI Catalog route to an existing Starlette application.

Discovery expects the catalog to be reachable without authentication;
mount it outside any auth middleware.
"""
app.router.routes.append(ai_catalog_route(catalog, path=path))
Loading
Loading