From 3ec4518c1bae41fe9e024d9fc55c146ebeaf3587 Mon Sep 17 00:00:00 2001 From: beyhangl Date: Sat, 30 May 2026 13:01:32 +0300 Subject: [PATCH] fix: scrub the unregistered evalcraft.dev domain; release 0.2.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit evalcraft.dev was never registered (NXDOMAIN), but the cloud feature defaulted to a non-existent api.evalcraft.dev endpoint and the landing page linked hello@evalcraft.dev. - cloud client: no default endpoint; resolve base_url from arg / EVALCRAFT_BASE_URL / ~/.evalcraft/config.json, and raise a clear 'no public service — self-host' error when unset (new test). save_config only writes base_url when provided. - 'evalcraft cloud --url': no default; init-template example -> localhost self-host. - landing page + demo scripts: dead-domain emails -> real/placeholder addresses. - bump 0.2.0 -> 0.2.1 + CHANGELOG. 804 tests, ruff 0, mypy 0; built + twine-checked locally. --- CHANGELOG.md | 5 +++ evalcraft/__init__.py | 2 +- evalcraft/cli/main.py | 6 ++-- evalcraft/cli/templates/evalcraft.toml | 2 +- evalcraft/cloud/client.py | 45 ++++++++++++++++++-------- evalcraft/core/models.py | 2 +- pyproject.toml | 2 +- scripts/seed_demo.py | 6 ++-- scripts/smoke_test.py | 2 +- site/CNAME | 2 +- site/index.html | 6 ++-- tests/test_cloud.py | 25 ++++++++++---- tests/test_e2e_pipeline.py | 2 +- 13 files changed, 70 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88688f6a..1117a900 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.2.1] — 2026-05-30 + +### Fixed +- **Removed references to the unregistered `evalcraft.dev` domain.** The cloud client and the `evalcraft cloud` CLI no longer default to a non-existent `api.evalcraft.dev` endpoint. There is **no public hosted service** — configure a self-hosted dashboard URL explicitly via `base_url=`, the `EVALCRAFT_BASE_URL` env var, or `~/.evalcraft/config.json`. A cloud call with no URL configured now raises a clear, self-host-pointing error instead of failing against a dead host. Also scrubbed the dead domain from the `evalcraft init` config template and the landing-page contact links. + ## [0.2.0] — 2026-05-30 Ships everything developed since the initial `0.1.0` PyPI upload — a much larger diff --git a/evalcraft/__init__.py b/evalcraft/__init__.py index 53ede5e7..d012406c 100644 --- a/evalcraft/__init__.py +++ b/evalcraft/__init__.py @@ -4,7 +4,7 @@ mock LLMs/tools, score runs, and catch real model drift with live-eval. """ -__version__ = "0.2.0" +__version__ = "0.2.1" from evalcraft.capture.recorder import CaptureContext, capture from evalcraft.cloud.client import EvalcraftCloud diff --git a/evalcraft/cli/main.py b/evalcraft/cli/main.py index 1a12e0f0..eeddb540 100644 --- a/evalcraft/cli/main.py +++ b/evalcraft/cli/main.py @@ -58,7 +58,7 @@ def _fmt_cost(usd: float) -> str: # ─── CLI root ───────────────────────────────────────────────────────────────── @click.group() -@click.version_option(version="0.2.0", prog_name="evalcraft") +@click.version_option(version="0.2.1", prog_name="evalcraft") def cli() -> None: """evalcraft — capture, replay, and evaluate AI agent runs.""" @@ -1111,8 +1111,8 @@ def cloud() -> None: @cloud.command("login") @click.option("--api-key", prompt="API key", hide_input=True, help="Your Evalcraft API key (ec_...)") -@click.option("--url", default="https://api.evalcraft.dev/v1", - help="Override API base URL") +@click.option("--url", default="", + help="Your self-hosted dashboard URL (optional; there is no public hosted service)") def cloud_login(api_key: str, url: str) -> None: """Save your API key to ~/.evalcraft/config.json. diff --git a/evalcraft/cli/templates/evalcraft.toml b/evalcraft/cli/templates/evalcraft.toml index f697ce50..34aedf18 100644 --- a/evalcraft/cli/templates/evalcraft.toml +++ b/evalcraft/cli/templates/evalcraft.toml @@ -37,4 +37,4 @@ auto_upload = false [cloud] # Override the Evalcraft cloud API endpoint. -# base_url = "https://api.evalcraft.dev/v1" +# base_url = "http://localhost:8000/v1" # your self-hosted dashboard (no public service) diff --git a/evalcraft/cloud/client.py b/evalcraft/cloud/client.py index c7ee0748..3e5cf0b6 100644 --- a/evalcraft/cloud/client.py +++ b/evalcraft/cloud/client.py @@ -34,13 +34,11 @@ logger = logging.getLogger(__name__) -# NOTE: The hosted Evalcraft dashboard/API is not yet publicly available. -# This default points at the *planned* hosted endpoint; until it ships, set -# ``base_url`` (or the ``base_url`` field in ~/.evalcraft/config.json) to your -# own self-hosted dashboard — see the ``dashboard/`` directory. All cloud -# features are optional: the core capture / replay / eval workflow runs fully -# offline and never contacts this endpoint. -_DEFAULT_BASE_URL = "https://api.evalcraft.dev/v1" +# There is no public hosted Evalcraft API. Cloud features are optional and target +# a *self-hosted* dashboard (see the ``dashboard/`` directory); configure the +# endpoint explicitly via the ``base_url`` argument, the ``EVALCRAFT_BASE_URL`` +# environment variable, or ``~/.evalcraft/config.json``. The core capture / +# replay / eval workflow runs fully offline and never contacts any endpoint. _CONFIG_DIR = Path.home() / ".evalcraft" _CONFIG_FILE = _CONFIG_DIR / "config.json" _QUEUE_DIR = _CONFIG_DIR / "queue" @@ -104,7 +102,9 @@ class EvalcraftCloud: api_key: Bearer token (``ec_...``). If None, reads from ``~/.evalcraft/config.json`` or the ``EVALCRAFT_API_KEY`` environment variable. - base_url: Override the default API endpoint. + base_url: URL of your self-hosted Evalcraft dashboard. Required for any + cloud call — there is no public hosted service. Falls back to the + ``EVALCRAFT_BASE_URL`` env var, then ``~/.evalcraft/config.json``. timeout: Request timeout in seconds (default 30). max_retries: Maximum number of retry attempts for transient errors (default 3). Uses exponential backoff with jitter. @@ -115,13 +115,13 @@ class EvalcraftCloud: def __init__( self, api_key: str | None = None, - base_url: str = _DEFAULT_BASE_URL, + base_url: str | None = None, timeout: int = 30, max_retries: int = 3, queue_dir: Path | None = None, ): self.api_key = api_key or self._load_api_key() - self.base_url = base_url.rstrip("/") + self.base_url = (base_url or self._load_base_url()).rstrip("/") self.timeout = timeout self.max_retries = max_retries self.queue_dir = queue_dir or _QUEUE_DIR @@ -237,8 +237,8 @@ def queue_size(self) -> int: # ────────────────────────────────────────── @staticmethod - def save_config(api_key: str, base_url: str = _DEFAULT_BASE_URL) -> None: - """Persist API key and base URL to ``~/.evalcraft/config.json``.""" + def save_config(api_key: str, base_url: str = "") -> None: + """Persist the API key (and optional dashboard URL) to ``~/.evalcraft/config.json``.""" _CONFIG_DIR.mkdir(parents=True, exist_ok=True) config: dict = {} if _CONFIG_FILE.exists(): @@ -247,7 +247,8 @@ def save_config(api_key: str, base_url: str = _DEFAULT_BASE_URL) -> None: except Exception: pass config["api_key"] = api_key - config["base_url"] = base_url + if base_url: + config["base_url"] = base_url _CONFIG_FILE.write_text(json.dumps(config, indent=2)) _CONFIG_FILE.chmod(0o600) @@ -288,6 +289,15 @@ def _load_api_key(self) -> str: config = self.load_config() return str(config.get("api_key", "")) + def _load_base_url(self) -> str: + """Resolve the dashboard base URL from env or config (empty if unset).""" + import os + env_url = os.environ.get("EVALCRAFT_BASE_URL", "") + if env_url: + return env_url + config = self.load_config() + return str(config.get("base_url", "")) + def _request( self, method: str, @@ -307,11 +317,18 @@ def _request( Raises: CloudUploadError: After max_retries exhausted or on 4xx errors. """ + if not self.base_url: + raise CloudUploadError( + "No Evalcraft dashboard URL is configured. There is no public " + "hosted service — point the client at your own self-hosted " + "dashboard (see the dashboard/ directory) via base_url=..., the " + "EVALCRAFT_BASE_URL env var, or ~/.evalcraft/config.json." + ) url = f"{self.base_url}{path}" body: bytes | None = None headers: dict[str, str] = { "Accept": "application/json", - "User-Agent": "evalcraft-sdk/0.2.0", + "User-Agent": "evalcraft-sdk/0.2.1", } if self.api_key: headers["Authorization"] = f"Bearer {self.api_key}" diff --git a/evalcraft/core/models.py b/evalcraft/core/models.py index 08a245d0..75592488 100644 --- a/evalcraft/core/models.py +++ b/evalcraft/core/models.py @@ -288,7 +288,7 @@ def to_dict(self) -> dict: self.compute_metrics() self.compute_fingerprint() return { - "evalcraft_version": "0.2.0", + "evalcraft_version": "0.2.1", "cassette": { "id": self.id, "name": self.name, diff --git a/pyproject.toml b/pyproject.toml index 9c1a7d12..32e1d4db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "evalcraft" -version = "0.2.0" +version = "0.2.1" description = "VCR for AI agents — record agent runs as cassettes and replay them deterministically in CI for $0." readme = "README.md" license = "MIT" diff --git a/scripts/seed_demo.py b/scripts/seed_demo.py index 925e6534..c8632205 100644 --- a/scripts/seed_demo.py +++ b/scripts/seed_demo.py @@ -258,7 +258,7 @@ def main(): # 2. Sign up demo user print("Creating demo user...", end=" ") r = client.post("/auth/signup", json={ - "email": "demo@evalcraft.dev", + "email": "demo@example.com", "password": "demodemo123", "full_name": "Demo User", "team_name": "Demo Team", @@ -266,7 +266,7 @@ def main(): if r.status_code == 409: print("already exists, logging in.") r = client.post("/auth/login", json={ - "email": "demo@evalcraft.dev", + "email": "demo@example.com", "password": "demodemo123", }) r.raise_for_status() @@ -382,7 +382,7 @@ def main(): print() print("Seed complete!") - print(f" Login: demo@evalcraft.dev / demodemo123") + print(f" Login: demo@example.com / demodemo123") print(f" Frontend: http://localhost:3000") print(f" Backend: http://localhost:8000") print(f" API docs: http://localhost:8000/docs") diff --git a/scripts/smoke_test.py b/scripts/smoke_test.py index 30705dd4..ecb4dcda 100755 --- a/scripts/smoke_test.py +++ b/scripts/smoke_test.py @@ -140,7 +140,7 @@ def test_auth(client: httpx.Client) -> str | None: print(f"\n{BOLD}Auth{RESET}") unique = uuid.uuid4().hex[:8] - email = f"smoke-{unique}@evalcraft.dev" + email = f"smoke-{unique}@example.com" password = "smoketest123" # Signup diff --git a/site/CNAME b/site/CNAME index 3b602217..81e71b46 100644 --- a/site/CNAME +++ b/site/CNAME @@ -1 +1 @@ -# Add custom domain here: evalcraft.dev +# No custom domain configured. diff --git a/site/index.html b/site/index.html index 9ad077a7..cf034d0a 100644 --- a/site/index.html +++ b/site/index.html @@ -940,7 +940,7 @@

Design Partner Pilot

  • Weekly check-in & roadmap input
  • Migration & setup help
  • - Apply as Design Partner + Apply as Design Partner

    Team

    @@ -966,7 +966,7 @@

    Enterprise

  • Self-hosted option
  • Dedicated support
  • - Contact us + Contact us
    @@ -976,7 +976,7 @@

    Enterprise

    Looking for 10 design partners

    Get hands-on onboarding, direct Slack access, and shape the roadmap. Limited to teams building AI agents in production.

    - Apply Now + Apply Now
    diff --git a/tests/test_cloud.py b/tests/test_cloud.py index 722432a9..2ef0a2d7 100644 --- a/tests/test_cloud.py +++ b/tests/test_cloud.py @@ -52,7 +52,7 @@ def golden_set(cassette): def client(tmp_path): return EvalcraftCloud( api_key="ec_test_key", - base_url="https://api.evalcraft.dev/v1", + base_url="https://dash.example.com/v1", timeout=5, max_retries=2, queue_dir=tmp_path / "queue", @@ -97,12 +97,23 @@ def test_api_key_from_config(tmp_path, monkeypatch): assert c.api_key == "ec_from_config" +def test_no_base_url_configured_raises_clear_error(tmp_path, monkeypatch): + """With no base_url (arg / env / config), a cloud request fails with a clear, + self-host-pointing error instead of hitting a non-existent default host.""" + monkeypatch.delenv("EVALCRAFT_BASE_URL", raising=False) + with patch("evalcraft.cloud.client._CONFIG_FILE", tmp_path / "missing.json"): + c = EvalcraftCloud(api_key="ec_x", queue_dir=tmp_path / "queue") + assert c.base_url == "" + with pytest.raises(CloudUploadError, match="self-hosted"): + c.list_cassettes("proj") + + # ────────────────────────────────────────────── # upload() # ────────────────────────────────────────────── def test_upload_cassette_success(client, cassette): - server_resp = {"id": "cas_abc123", "url": "https://app.evalcraft.dev/cassettes/cas_abc123"} + server_resp = {"id": "cas_abc123", "url": "https://app.example.com/cassettes/cas_abc123"} mock_resp = _make_mock_response(server_resp) with patch("urllib.request.urlopen", return_value=mock_resp) as mock_open: @@ -113,7 +124,7 @@ def test_upload_cassette_success(client, cassette): # Verify correct URL and method req = mock_open.call_args[0][0] - assert req.full_url == "https://api.evalcraft.dev/v1/cassettes" + assert req.full_url == "https://dash.example.com/v1/cassettes" assert req.method == "POST" assert req.headers.get("Authorization") == "Bearer ec_test_key" assert req.headers.get("Content-type") == "application/json" @@ -142,7 +153,7 @@ def test_upload_cassette_queued_on_failure(client, cassette, tmp_path): # ────────────────────────────────────────────── def test_upload_golden_success(client, golden_set): - server_resp = {"id": "gs_xyz", "url": "https://app.evalcraft.dev/golden/gs_xyz"} + server_resp = {"id": "gs_xyz", "url": "https://app.example.com/golden/gs_xyz"} mock_resp = _make_mock_response(server_resp) with patch("urllib.request.urlopen", return_value=mock_resp) as mock_open: @@ -150,7 +161,7 @@ def test_upload_golden_success(client, golden_set): assert result["id"] == "gs_xyz" req = mock_open.call_args[0][0] - assert req.full_url == "https://api.evalcraft.dev/v1/golden-sets" + assert req.full_url == "https://dash.example.com/v1/golden-sets" payload = json.loads(req.data.decode("utf-8")) assert payload["name"] == "weather_golden" @@ -203,7 +214,7 @@ def test_get_regressions(client): def test_retry_on_5xx_then_success(client, cassette): """Should retry on 5xx and succeed on the next attempt.""" server_err = urllib.error.HTTPError( - url="https://api.evalcraft.dev/v1/cassettes", + url="https://dash.example.com/v1/cassettes", code=503, msg="Service Unavailable", hdrs=MagicMock(), # type: ignore[arg-type] @@ -221,7 +232,7 @@ def test_retry_on_5xx_then_success(client, cassette): def test_no_retry_on_4xx(client, cassette): """4xx errors should not be retried.""" err = urllib.error.HTTPError( - url="https://api.evalcraft.dev/v1/cassettes", + url="https://dash.example.com/v1/cassettes", code=401, msg="Unauthorized", hdrs=MagicMock(), # type: ignore[arg-type] diff --git a/tests/test_e2e_pipeline.py b/tests/test_e2e_pipeline.py index 2429716d..3d58b376 100644 --- a/tests/test_e2e_pipeline.py +++ b/tests/test_e2e_pipeline.py @@ -76,7 +76,7 @@ def test_capture_full_agent_run(self, tmp_path): # Verify JSON is valid and contains expected data data = json.loads(cassette_path.read_text()) - assert data.get("evalcraft_version") == "0.2.0" + assert data.get("evalcraft_version") == "0.2.1" assert data["cassette"]["name"] == "weather_agent_run" assert data["cassette"]["agent_name"] == "weather_bot" assert data["cassette"]["framework"] == "openai"