Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.2.1] — 2026-05-30

### Fixed
- **Removed references to the unregistered `evalcraft.dev` domain.** The cloud client and the `evalcraft cloud` CLI no longer default to a non-existent `api.evalcraft.dev` endpoint. There is **no public hosted service** — configure a self-hosted dashboard URL explicitly via `base_url=`, the `EVALCRAFT_BASE_URL` env var, or `~/.evalcraft/config.json`. A cloud call with no URL configured now raises a clear, self-host-pointing error instead of failing against a dead host. Also scrubbed the dead domain from the `evalcraft init` config template and the landing-page contact links.

## [0.2.0] — 2026-05-30

Ships everything developed since the initial `0.1.0` PyPI upload — a much larger
Expand Down
2 changes: 1 addition & 1 deletion evalcraft/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
mock LLMs/tools, score runs, and catch real model drift with live-eval.
"""

__version__ = "0.2.0"
__version__ = "0.2.1"

from evalcraft.capture.recorder import CaptureContext, capture
from evalcraft.cloud.client import EvalcraftCloud
Expand Down
6 changes: 3 additions & 3 deletions evalcraft/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _fmt_cost(usd: float) -> str:
# ─── CLI root ─────────────────────────────────────────────────────────────────

@click.group()
@click.version_option(version="0.2.0", prog_name="evalcraft")
@click.version_option(version="0.2.1", prog_name="evalcraft")
def cli() -> None:
"""evalcraft — capture, replay, and evaluate AI agent runs."""

Expand Down Expand Up @@ -1111,8 +1111,8 @@ def cloud() -> None:
@cloud.command("login")
@click.option("--api-key", prompt="API key", hide_input=True,
help="Your Evalcraft API key (ec_...)")
@click.option("--url", default="https://api.evalcraft.dev/v1",
help="Override API base URL")
@click.option("--url", default="",
help="Your self-hosted dashboard URL (optional; there is no public hosted service)")
def cloud_login(api_key: str, url: str) -> None:
"""Save your API key to ~/.evalcraft/config.json.

Expand Down
2 changes: 1 addition & 1 deletion evalcraft/cli/templates/evalcraft.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ auto_upload = false

[cloud]
# Override the Evalcraft cloud API endpoint.
# base_url = "https://api.evalcraft.dev/v1"
# base_url = "http://localhost:8000/v1" # your self-hosted dashboard (no public service)
45 changes: 31 additions & 14 deletions evalcraft/cloud/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,11 @@

logger = logging.getLogger(__name__)

# NOTE: The hosted Evalcraft dashboard/API is not yet publicly available.
# This default points at the *planned* hosted endpoint; until it ships, set
# ``base_url`` (or the ``base_url`` field in ~/.evalcraft/config.json) to your
# own self-hosted dashboard — see the ``dashboard/`` directory. All cloud
# features are optional: the core capture / replay / eval workflow runs fully
# offline and never contacts this endpoint.
_DEFAULT_BASE_URL = "https://api.evalcraft.dev/v1"
# There is no public hosted Evalcraft API. Cloud features are optional and target
# a *self-hosted* dashboard (see the ``dashboard/`` directory); configure the
# endpoint explicitly via the ``base_url`` argument, the ``EVALCRAFT_BASE_URL``
# environment variable, or ``~/.evalcraft/config.json``. The core capture /
# replay / eval workflow runs fully offline and never contacts any endpoint.
_CONFIG_DIR = Path.home() / ".evalcraft"
_CONFIG_FILE = _CONFIG_DIR / "config.json"
_QUEUE_DIR = _CONFIG_DIR / "queue"
Expand Down Expand Up @@ -104,7 +102,9 @@ class EvalcraftCloud:
api_key: Bearer token (``ec_...``). If None, reads from
``~/.evalcraft/config.json`` or the ``EVALCRAFT_API_KEY``
environment variable.
base_url: Override the default API endpoint.
base_url: URL of your self-hosted Evalcraft dashboard. Required for any
cloud call — there is no public hosted service. Falls back to the
``EVALCRAFT_BASE_URL`` env var, then ``~/.evalcraft/config.json``.
timeout: Request timeout in seconds (default 30).
max_retries: Maximum number of retry attempts for transient errors
(default 3). Uses exponential backoff with jitter.
Expand All @@ -115,13 +115,13 @@ class EvalcraftCloud:
def __init__(
self,
api_key: str | None = None,
base_url: str = _DEFAULT_BASE_URL,
base_url: str | None = None,
timeout: int = 30,
max_retries: int = 3,
queue_dir: Path | None = None,
):
self.api_key = api_key or self._load_api_key()
self.base_url = base_url.rstrip("/")
self.base_url = (base_url or self._load_base_url()).rstrip("/")
self.timeout = timeout
self.max_retries = max_retries
self.queue_dir = queue_dir or _QUEUE_DIR
Expand Down Expand Up @@ -237,8 +237,8 @@ def queue_size(self) -> int:
# ──────────────────────────────────────────

@staticmethod
def save_config(api_key: str, base_url: str = _DEFAULT_BASE_URL) -> None:
"""Persist API key and base URL to ``~/.evalcraft/config.json``."""
def save_config(api_key: str, base_url: str = "") -> None:
"""Persist the API key (and optional dashboard URL) to ``~/.evalcraft/config.json``."""
_CONFIG_DIR.mkdir(parents=True, exist_ok=True)
config: dict = {}
if _CONFIG_FILE.exists():
Expand All @@ -247,7 +247,8 @@ def save_config(api_key: str, base_url: str = _DEFAULT_BASE_URL) -> None:
except Exception:
pass
config["api_key"] = api_key
config["base_url"] = base_url
if base_url:
config["base_url"] = base_url
_CONFIG_FILE.write_text(json.dumps(config, indent=2))
_CONFIG_FILE.chmod(0o600)

Expand Down Expand Up @@ -288,6 +289,15 @@ def _load_api_key(self) -> str:
config = self.load_config()
return str(config.get("api_key", ""))

def _load_base_url(self) -> str:
"""Resolve the dashboard base URL from env or config (empty if unset)."""
import os
env_url = os.environ.get("EVALCRAFT_BASE_URL", "")
if env_url:
return env_url
config = self.load_config()
return str(config.get("base_url", ""))

def _request(
self,
method: str,
Expand All @@ -307,11 +317,18 @@ def _request(
Raises:
CloudUploadError: After max_retries exhausted or on 4xx errors.
"""
if not self.base_url:
raise CloudUploadError(
"No Evalcraft dashboard URL is configured. There is no public "
"hosted service — point the client at your own self-hosted "
"dashboard (see the dashboard/ directory) via base_url=..., the "
"EVALCRAFT_BASE_URL env var, or ~/.evalcraft/config.json."
)
url = f"{self.base_url}{path}"
body: bytes | None = None
headers: dict[str, str] = {
"Accept": "application/json",
"User-Agent": "evalcraft-sdk/0.2.0",
"User-Agent": "evalcraft-sdk/0.2.1",
}
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"
Expand Down
2 changes: 1 addition & 1 deletion evalcraft/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def to_dict(self) -> dict:
self.compute_metrics()
self.compute_fingerprint()
return {
"evalcraft_version": "0.2.0",
"evalcraft_version": "0.2.1",
"cassette": {
"id": self.id,
"name": self.name,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "evalcraft"
version = "0.2.0"
version = "0.2.1"
description = "VCR for AI agents — record agent runs as cassettes and replay them deterministically in CI for $0."
readme = "README.md"
license = "MIT"
Expand Down
6 changes: 3 additions & 3 deletions scripts/seed_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,15 +258,15 @@ def main():
# 2. Sign up demo user
print("Creating demo user...", end=" ")
r = client.post("/auth/signup", json={
"email": "demo@evalcraft.dev",
"email": "demo@example.com",
"password": "demodemo123",
"full_name": "Demo User",
"team_name": "Demo Team",
})
if r.status_code == 409:
print("already exists, logging in.")
r = client.post("/auth/login", json={
"email": "demo@evalcraft.dev",
"email": "demo@example.com",
"password": "demodemo123",
})
r.raise_for_status()
Expand Down Expand Up @@ -382,7 +382,7 @@ def main():

print()
print("Seed complete!")
print(f" Login: demo@evalcraft.dev / demodemo123")
print(f" Login: demo@example.com / demodemo123")
print(f" Frontend: http://localhost:3000")
print(f" Backend: http://localhost:8000")
print(f" API docs: http://localhost:8000/docs")
Expand Down
2 changes: 1 addition & 1 deletion scripts/smoke_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_auth(client: httpx.Client) -> str | None:
print(f"\n{BOLD}Auth{RESET}")

unique = uuid.uuid4().hex[:8]
email = f"smoke-{unique}@evalcraft.dev"
email = f"smoke-{unique}@example.com"
password = "smoketest123"

# Signup
Expand Down
2 changes: 1 addition & 1 deletion site/CNAME
Original file line number Diff line number Diff line change
@@ -1 +1 @@
# Add custom domain here: evalcraft.dev
# No custom domain configured.
6 changes: 3 additions & 3 deletions site/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -940,7 +940,7 @@ <h3>Design Partner Pilot</h3>
<li>Weekly check-in &amp; roadmap input</li>
<li>Migration &amp; setup help</li>
</ul>
<a href="mailto:hello@evalcraft.dev?subject=Design Partner Application&body=Hi, I'm interested in the design partner pilot.%0A%0ATeam:%0AWhat we build:%0AFramework (OpenAI/LangGraph/CrewAI/etc):%0A" class="btn btn-primary">Apply as Design Partner</a>
<a href="mailto:beyhangl@gmail.com?subject=Design Partner Application&body=Hi, I'm interested in the design partner pilot.%0A%0ATeam:%0AWhat we build:%0AFramework (OpenAI/LangGraph/CrewAI/etc):%0A" class="btn btn-primary">Apply as Design Partner</a>
</div>
<div class="price-card">
<h3>Team</h3>
Expand All @@ -966,7 +966,7 @@ <h3>Enterprise</h3>
<li>Self-hosted option</li>
<li>Dedicated support</li>
</ul>
<a href="mailto:hello@evalcraft.dev" class="btn btn-secondary">Contact us</a>
<a href="mailto:beyhangl@gmail.com" class="btn btn-secondary">Contact us</a>
</div>
</div>
</div>
Expand All @@ -976,7 +976,7 @@ <h3>Enterprise</h3>
<section style="text-align:center;padding:56px 24px;background:var(--accent-glow);border-top:1px solid rgba(37,99,235,0.12);border-bottom:1px solid rgba(37,99,235,0.12);">
<h3 style="font-size:22px;font-weight:700;margin-bottom:8px;">Looking for 10 design partners</h3>
<p style="font-size:16px;color:var(--text-2);max-width:560px;margin:0 auto 24px;line-height:1.7;">Get hands-on onboarding, direct Slack access, and shape the roadmap. Limited to teams building AI agents in production.</p>
<a href="mailto:hello@evalcraft.dev?subject=Design Partner Application&body=Hi, I'm interested in the design partner pilot.%0A%0ATeam:%0AWhat we build:%0AFramework (OpenAI/LangGraph/CrewAI/etc):%0A" class="btn btn-primary" style="font-size:16px;">Apply Now</a>
<a href="mailto:beyhangl@gmail.com?subject=Design Partner Application&body=Hi, I'm interested in the design partner pilot.%0A%0ATeam:%0AWhat we build:%0AFramework (OpenAI/LangGraph/CrewAI/etc):%0A" class="btn btn-primary" style="font-size:16px;">Apply Now</a>
</section>

<!-- ─── Star CTA ──────────────────────────────────────── -->
Expand Down
25 changes: 18 additions & 7 deletions tests/test_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def golden_set(cassette):
def client(tmp_path):
return EvalcraftCloud(
api_key="ec_test_key",
base_url="https://api.evalcraft.dev/v1",
base_url="https://dash.example.com/v1",
timeout=5,
max_retries=2,
queue_dir=tmp_path / "queue",
Expand Down Expand Up @@ -97,12 +97,23 @@ def test_api_key_from_config(tmp_path, monkeypatch):
assert c.api_key == "ec_from_config"


def test_no_base_url_configured_raises_clear_error(tmp_path, monkeypatch):
"""With no base_url (arg / env / config), a cloud request fails with a clear,
self-host-pointing error instead of hitting a non-existent default host."""
monkeypatch.delenv("EVALCRAFT_BASE_URL", raising=False)
with patch("evalcraft.cloud.client._CONFIG_FILE", tmp_path / "missing.json"):
c = EvalcraftCloud(api_key="ec_x", queue_dir=tmp_path / "queue")
assert c.base_url == ""
with pytest.raises(CloudUploadError, match="self-hosted"):
c.list_cassettes("proj")


# ──────────────────────────────────────────────
# upload()
# ──────────────────────────────────────────────

def test_upload_cassette_success(client, cassette):
server_resp = {"id": "cas_abc123", "url": "https://app.evalcraft.dev/cassettes/cas_abc123"}
server_resp = {"id": "cas_abc123", "url": "https://app.example.com/cassettes/cas_abc123"}
mock_resp = _make_mock_response(server_resp)

with patch("urllib.request.urlopen", return_value=mock_resp) as mock_open:
Expand All @@ -113,7 +124,7 @@ def test_upload_cassette_success(client, cassette):

# Verify correct URL and method
req = mock_open.call_args[0][0]
assert req.full_url == "https://api.evalcraft.dev/v1/cassettes"
assert req.full_url == "https://dash.example.com/v1/cassettes"
assert req.method == "POST"
assert req.headers.get("Authorization") == "Bearer ec_test_key"
assert req.headers.get("Content-type") == "application/json"
Expand Down Expand Up @@ -142,15 +153,15 @@ def test_upload_cassette_queued_on_failure(client, cassette, tmp_path):
# ──────────────────────────────────────────────

def test_upload_golden_success(client, golden_set):
server_resp = {"id": "gs_xyz", "url": "https://app.evalcraft.dev/golden/gs_xyz"}
server_resp = {"id": "gs_xyz", "url": "https://app.example.com/golden/gs_xyz"}
mock_resp = _make_mock_response(server_resp)

with patch("urllib.request.urlopen", return_value=mock_resp) as mock_open:
result = client.upload_golden(golden_set)

assert result["id"] == "gs_xyz"
req = mock_open.call_args[0][0]
assert req.full_url == "https://api.evalcraft.dev/v1/golden-sets"
assert req.full_url == "https://dash.example.com/v1/golden-sets"
payload = json.loads(req.data.decode("utf-8"))
assert payload["name"] == "weather_golden"

Expand Down Expand Up @@ -203,7 +214,7 @@ def test_get_regressions(client):
def test_retry_on_5xx_then_success(client, cassette):
"""Should retry on 5xx and succeed on the next attempt."""
server_err = urllib.error.HTTPError(
url="https://api.evalcraft.dev/v1/cassettes",
url="https://dash.example.com/v1/cassettes",
code=503,
msg="Service Unavailable",
hdrs=MagicMock(), # type: ignore[arg-type]
Expand All @@ -221,7 +232,7 @@ def test_retry_on_5xx_then_success(client, cassette):
def test_no_retry_on_4xx(client, cassette):
"""4xx errors should not be retried."""
err = urllib.error.HTTPError(
url="https://api.evalcraft.dev/v1/cassettes",
url="https://dash.example.com/v1/cassettes",
code=401,
msg="Unauthorized",
hdrs=MagicMock(), # type: ignore[arg-type]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_e2e_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_capture_full_agent_run(self, tmp_path):

# Verify JSON is valid and contains expected data
data = json.loads(cassette_path.read_text())
assert data.get("evalcraft_version") == "0.2.0"
assert data.get("evalcraft_version") == "0.2.1"
assert data["cassette"]["name"] == "weather_agent_run"
assert data["cassette"]["agent_name"] == "weather_bot"
assert data["cassette"]["framework"] == "openai"
Expand Down
Loading