diff --git a/.env.local.example b/.env.local.example index 1b877ec..db6e62f 100644 --- a/.env.local.example +++ b/.env.local.example @@ -1,8 +1,3 @@ -# URL of the NexusRAG API (server-side proxy target) -NEXUSRAG_API_URL=http://localhost:8000 - -# API key passed as Bearer token from the browser (public — no secrets here) -NEXT_PUBLIC_API_KEY=your-api-key-here - -# Corpus ID to use in the /run page -NEXT_PUBLIC_DEFAULT_CORPUS_ID=c1 +# This dashboard requires no environment variables to build or run. +# All configuration is sourced from src/lib/project.ts and API responses. +# When adding public vars in the future, use the NEXT_PUBLIC_ prefix. diff --git a/.eslintrc.json b/.eslintrc.json new file mode 100644 index 0000000..7c1a3ad --- /dev/null +++ b/.eslintrc.json @@ -0,0 +1,4 @@ +{ + "extends": "next/core-web-vitals", + "root": true +} diff --git a/.github/workflows/nextjs-ci.yml b/.github/workflows/nextjs-ci.yml new file mode 100644 index 0000000..7cfaac5 --- /dev/null +++ b/.github/workflows/nextjs-ci.yml @@ -0,0 +1,18 @@ +name: Next.js CI +on: + push: + branches: [main] + pull_request: +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: "20" + - run: npm install --no-audit --no-fund + - run: npm run lint + - run: npm run type-check + - run: npm run test:coverage + - run: npm run build diff --git a/.gitignore b/.gitignore index f920d81..ea35bef 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,9 @@ __pycache__/ dist/ build/ +# Test coverage +coverage/ + # OS .DS_Store diff --git a/api/_telemetry_static.json b/api/_telemetry_static.json index 8963517..909dccd 100644 --- a/api/_telemetry_static.json +++ b/api/_telemetry_static.json @@ -1,4 +1,4 @@ { - "lines_of_code": 1177, - "built_at": "2026-04-27T18:41:57Z" + "lines_of_code": 7937, + "built_at": "2026-06-09T18:16:02Z" } diff --git a/package.json b/package.json index d693205..7c9e2d5 100644 --- a/package.json +++ b/package.json @@ -2,6 +2,9 @@ "name": "evalops-dashboard", "version": "1.0.0", "private": true, + "engines": { + "node": ">=20.0.0" + }, "scripts": { "dev": "next dev", "build": "next build", @@ -9,7 +12,8 @@ "lint": "next lint", "type-check": "tsc --noEmit", "test": "vitest run", - "test:watch": "vitest" + "test:watch": "vitest", + "test:coverage": "vitest run --coverage" }, "dependencies": { "next": "14.2.5", @@ -55,6 +59,7 @@ "eslint-config-next": "14.2.5", "vitest": "^2.0.5", "@vitest/ui": "^2.0.5", + "@vitest/coverage-v8": "^2.0.5", "@testing-library/react": "^16.0.1", "@testing-library/jest-dom": "^6.5.0", "@testing-library/user-event": "^14.5.2", diff --git a/scripts/compute_telemetry_static.py b/scripts/compute_telemetry_static.py index bfaa7f6..792b1d9 100644 --- a/scripts/compute_telemetry_static.py +++ b/scripts/compute_telemetry_static.py @@ -35,6 +35,18 @@ "dist", "build", ".idea", + ".next", + "coverage", + } +) + +EXCLUDE_FILES = frozenset( + { + "package-lock.json", + "yarn.lock", + "pnpm-lock.yaml", + "tsconfig.tsbuildinfo", + "_telemetry_static.json", } ) @@ -46,6 +58,8 @@ def count_lines(root: Path) -> int: continue if any(part in EXCLUDE_DIRS for part in path.parts): continue + if path.name in EXCLUDE_FILES: + continue if path.suffix not in SOURCE_EXTS: continue # Exclude the build artifact itself so each run is stable. diff --git a/src/lib/api.test.ts b/src/lib/api.test.ts new file mode 100644 index 0000000..25016eb --- /dev/null +++ b/src/lib/api.test.ts @@ -0,0 +1,215 @@ +import { describe, expect, it, vi, afterEach } from "vitest"; +import { + fetchPublicStats, + fetchBenchmarkLatest, + PublicStats, + PublicBenchmark, +} from "./api"; + +describe("api.ts", () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + describe("fetchPublicStats", () => { + it("fetches and parses stats from /api/stats", async () => { + const mockStats: PublicStats = { + system: "evalops", + mode: "showcase", + status: "operational", + last_deployed_at: "2026-04-28T12:00:00Z", + last_active_at: "2026-04-28T12:00:00Z", + metrics: { + eval_runs_total: 100, + eval_runs_24h: 10, + }, + schema_version: 1, + generated_at: "2026-04-28T12:00:00Z", + }; + + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValueOnce( + new Response(JSON.stringify(mockStats), { status: 200 }) + ) + ); + + const result = await fetchPublicStats(); + expect(result).toEqual(mockStats); + expect(fetch).toHaveBeenCalledWith("/api/stats", { + headers: { "Content-Type": "application/json" }, + }); + }); + + it("throws on non-ok response", async () => { + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValueOnce( + new Response(JSON.stringify({ error: "Not found" }), { + status: 404, + statusText: "Not Found", + }) + ) + ); + + await expect(fetchPublicStats()).rejects.toThrow( + "Public API 404: Not Found" + ); + }); + + it("propagates network errors", async () => { + const networkError = new Error("Network failed"); + vi.stubGlobal("fetch", vi.fn().mockRejectedValueOnce(networkError)); + + await expect(fetchPublicStats()).rejects.toThrow("Network failed"); + }); + + it("handles 500 server error", async () => { + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValueOnce( + new Response(JSON.stringify({ error: "Internal error" }), { + status: 500, + statusText: "Internal Server Error", + }) + ) + ); + + await expect(fetchPublicStats()).rejects.toThrow( + "Public API 500: Internal Server Error" + ); + }); + }); + + describe("fetchBenchmarkLatest", () => { + it("fetches and parses benchmark from /api/benchmark-latest", async () => { + const mockBenchmark: PublicBenchmark = { + system: "evalops", + benchmark_type: "standard", + run_id: "run_001", + metrics: { + n_cases: 10, + baseline_variant: "v1", + candidate_variant: "v2", + baseline_pass_rate: 0.5, + candidate_pass_rate: 1.0, + baseline_avg_score: 0.5, + candidate_avg_score: 1.0, + pass_rate_delta: 0.5, + avg_score_delta: 0.5, + regressions: 0, + improvements: 5, + gate_verdict: "pass", + }, + schema_version: 1, + generated_at: "2026-04-28T12:00:00Z", + }; + + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValueOnce( + new Response(JSON.stringify(mockBenchmark), { status: 200 }) + ) + ); + + const result = await fetchBenchmarkLatest(); + expect(result).toEqual(mockBenchmark); + expect(fetch).toHaveBeenCalledWith("/api/benchmark-latest", { + headers: { "Content-Type": "application/json" }, + }); + }); + + it("throws on non-ok response", async () => { + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValueOnce( + new Response(JSON.stringify({ error: "Not found" }), { + status: 404, + statusText: "Not Found", + }) + ) + ); + + await expect(fetchBenchmarkLatest()).rejects.toThrow( + "Public API 404: Not Found" + ); + }); + + it("propagates network errors", async () => { + const networkError = new Error("Network timeout"); + vi.stubGlobal("fetch", vi.fn().mockRejectedValueOnce(networkError)); + + await expect(fetchBenchmarkLatest()).rejects.toThrow( + "Network timeout" + ); + }); + + it("handles 503 service unavailable", async () => { + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValueOnce( + new Response(JSON.stringify({ error: "Service unavailable" }), { + status: 503, + statusText: "Service Unavailable", + }) + ) + ); + + await expect(fetchBenchmarkLatest()).rejects.toThrow( + "Public API 503: Service Unavailable" + ); + }); + + it("handles null run_id gracefully", async () => { + const mockBenchmark: PublicBenchmark = { + system: "evalops", + benchmark_type: "standard", + run_id: null, + metrics: null, + schema_version: 1, + generated_at: "2026-04-28T12:00:00Z", + }; + + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValueOnce( + new Response(JSON.stringify(mockBenchmark), { status: 200 }) + ) + ); + + const result = await fetchBenchmarkLatest(); + expect(result.run_id).toBeNull(); + expect(result.metrics).toBeNull(); + }); + }); + + describe("fetch error edge cases", () => { + it("preserves custom headers alongside Content-Type", async () => { + const mockStats: PublicStats = { + system: "evalops", + status: "operational", + last_deployed_at: null, + metrics: {}, + schema_version: 1, + generated_at: "2026-04-28T12:00:00Z", + }; + + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValueOnce( + new Response(JSON.stringify(mockStats), { status: 200 }) + ) + ); + + await fetchPublicStats(); + expect(fetch).toHaveBeenCalledWith( + "/api/stats", + expect.objectContaining({ + headers: expect.objectContaining({ + "Content-Type": "application/json", + }), + }) + ); + }); + }); +}); diff --git a/src/lib/hooks.test.ts b/src/lib/hooks.test.ts index 80273ea..5248a48 100644 --- a/src/lib/hooks.test.ts +++ b/src/lib/hooks.test.ts @@ -1,6 +1,12 @@ import { describe, expect, it, vi, afterEach, beforeEach } from "vitest"; import { renderHook, act } from "@testing-library/react"; -import { useDebounce, useMounted, useAnimatedNumber } from "./hooks"; +import { + useDebounce, + useMounted, + useAnimatedNumber, + useHotkey, + usePolling, +} from "./hooks"; describe("useDebounce", () => { beforeEach(() => { @@ -42,12 +48,220 @@ describe("useMounted", () => { }); }); +describe("useHotkey", () => { + it("fires handler when key matches", () => { + const handler = vi.fn(); + const { unmount } = renderHook(() => useHotkey("k", handler)); + + const event = new KeyboardEvent("keydown", { key: "k" }); + window.dispatchEvent(event); + + expect(handler).toHaveBeenCalledOnce(); + unmount(); + }); + + it("ignores non-matching keys", () => { + const handler = vi.fn(); + const { unmount } = renderHook(() => useHotkey("k", handler)); + + const event = new KeyboardEvent("keydown", { key: "j" }); + window.dispatchEvent(event); + + expect(handler).not.toHaveBeenCalled(); + unmount(); + }); + + it("is case-insensitive", () => { + const handler = vi.fn(); + const { unmount } = renderHook(() => useHotkey("K", handler)); + + const event = new KeyboardEvent("keydown", { key: "k" }); + window.dispatchEvent(event); + + expect(handler).toHaveBeenCalledOnce(); + unmount(); + }); + + it("respects meta modifier", () => { + const handler = vi.fn(); + const { unmount } = renderHook(() => useHotkey("k", handler, { meta: true })); + + const eventWithoutMeta = new KeyboardEvent("keydown", { + key: "k", + metaKey: false, + }); + window.dispatchEvent(eventWithoutMeta); + expect(handler).not.toHaveBeenCalled(); + + const eventWithMeta = new KeyboardEvent("keydown", { + key: "k", + metaKey: true, + }); + window.dispatchEvent(eventWithMeta); + expect(handler).toHaveBeenCalledOnce(); + unmount(); + }); + + it("respects ctrl modifier", () => { + const handler = vi.fn(); + const { unmount } = renderHook(() => useHotkey("k", handler, { ctrl: true })); + + const eventWithoutCtrl = new KeyboardEvent("keydown", { + key: "k", + ctrlKey: false, + }); + window.dispatchEvent(eventWithoutCtrl); + expect(handler).not.toHaveBeenCalled(); + + const eventWithCtrl = new KeyboardEvent("keydown", { + key: "k", + ctrlKey: true, + }); + window.dispatchEvent(eventWithCtrl); + expect(handler).toHaveBeenCalledOnce(); + unmount(); + }); + + it("respects shift modifier", () => { + const handler = vi.fn(); + const { unmount } = renderHook(() => useHotkey("k", handler, { shift: true })); + + const eventWithoutShift = new KeyboardEvent("keydown", { + key: "k", + shiftKey: false, + }); + window.dispatchEvent(eventWithoutShift); + expect(handler).not.toHaveBeenCalled(); + + const eventWithShift = new KeyboardEvent("keydown", { + key: "k", + shiftKey: true, + }); + window.dispatchEvent(eventWithShift); + expect(handler).toHaveBeenCalledOnce(); + unmount(); + }); + + it("detaches listener on unmount", () => { + const handler = vi.fn(); + const { unmount } = renderHook(() => useHotkey("k", handler)); + + unmount(); + + const event = new KeyboardEvent("keydown", { key: "k" }); + window.dispatchEvent(event); + + expect(handler).not.toHaveBeenCalled(); + }); +}); + describe("useAnimatedNumber", () => { - it("eventually reaches the target", async () => { - const raf = vi.spyOn(global, "requestAnimationFrame"); - const { result } = renderHook(() => useAnimatedNumber(100, 50)); - // Drive the animation frames synchronously for the test. + it("starts at 0", () => { + vi.useFakeTimers(); + const { result } = renderHook(() => useAnimatedNumber(100, 600)); expect(result.current).toBe(0); - raf.mockRestore(); + vi.useRealTimers(); + }); + + it("animates towards target", () => { + vi.useFakeTimers(); + const { result } = renderHook(() => useAnimatedNumber(100, 600)); + + act(() => { + vi.advanceTimersByTime(300); + }); + + expect(result.current).toBeGreaterThan(0); + expect(result.current).toBeLessThan(100); + vi.useRealTimers(); + }); + + it("handles value changes during animation", () => { + vi.useFakeTimers(); + const { result, rerender } = renderHook( + ({ target }) => useAnimatedNumber(target, 600), + { initialProps: { target: 100 } } + ); + + act(() => { + vi.advanceTimersByTime(300); + }); + + const midValue = result.current; + expect(midValue).toBeGreaterThan(0); + expect(midValue).toBeLessThan(100); + + rerender({ target: 200 }); + + act(() => { + vi.advanceTimersByTime(100); + }); + + expect(result.current).toBeDefined(); + vi.useRealTimers(); + }); + + it("cleans up animation frame on unmount", () => { + const cancelAnimationFrameSpy = vi.spyOn( + global, + "cancelAnimationFrame" + ); + vi.useFakeTimers(); + const { unmount } = renderHook(() => useAnimatedNumber(100, 600)); + + act(() => { + vi.advanceTimersByTime(100); + }); + + unmount(); + + expect(cancelAnimationFrameSpy).toHaveBeenCalled(); + cancelAnimationFrameSpy.mockRestore(); + vi.useRealTimers(); + }); +}); + +describe("usePolling", () => { + it("returns object with data, error, loading, refetch", () => { + const fetcher = vi.fn().mockResolvedValue({ data: "test" }); + const { result } = renderHook(() => usePolling(fetcher, 1000, false)); + + expect(result.current).toHaveProperty("data"); + expect(result.current).toHaveProperty("error"); + expect(result.current).toHaveProperty("loading"); + expect(result.current).toHaveProperty("refetch"); + }); + + it("initializes with null data and true loading when enabled", () => { + const fetcher = vi.fn(); + const { result } = renderHook(() => usePolling(fetcher, 1000, true)); + + expect(result.current.data).toBeNull(); + expect(result.current.loading).toBe(true); + expect(result.current.error).toBeNull(); + }); + + it("initializes with true loading when disabled", () => { + const fetcher = vi.fn(); + const { result } = renderHook(() => usePolling(fetcher, 1000, false)); + + expect(result.current.loading).toBe(true); + expect(result.current.data).toBeNull(); + }); + + it("exposes refetch function", () => { + const fetcher = vi.fn(); + const { result } = renderHook(() => usePolling(fetcher, 1000, false)); + + expect(typeof result.current.refetch).toBe("function"); + expect(() => result.current.refetch()).not.toThrow(); + }); + + it("cleans up on unmount", () => { + const fetcher = vi.fn(); + const { unmount } = renderHook(() => usePolling(fetcher, 1000, false)); + + // Verify hook mounts without errors + expect(() => unmount()).not.toThrow(); }); }); diff --git a/src/lib/project.test.ts b/src/lib/project.test.ts new file mode 100644 index 0000000..3afa577 --- /dev/null +++ b/src/lib/project.test.ts @@ -0,0 +1,91 @@ +import { describe, expect, it } from "vitest"; +import { PROJECT, ProjectSpec } from "./project"; + +describe("project.ts", () => { + describe("PROJECT", () => { + it("has all required fields", () => { + expect(PROJECT).toHaveProperty("slug"); + expect(PROJECT).toHaveProperty("name"); + expect(PROJECT).toHaveProperty("category"); + expect(PROJECT).toHaveProperty("track"); + expect(PROJECT).toHaveProperty("stage"); + expect(PROJECT).toHaveProperty("summary"); + expect(PROJECT).toHaveProperty("problem"); + expect(PROJECT).toHaveProperty("users"); + expect(PROJECT).toHaveProperty("stack"); + expect(PROJECT).toHaveProperty("why_now"); + expect(PROJECT).toHaveProperty("mvp"); + expect(PROJECT).toHaveProperty("github_url"); + expect(PROJECT).toHaveProperty("system_slug"); + expect(PROJECT).toHaveProperty("eleventh_url"); + expect(PROJECT).toHaveProperty("live_url"); + expect(PROJECT).toHaveProperty("fleet_url"); + expect(PROJECT).toHaveProperty("builder"); + }); + + it("has correct slug and name values", () => { + expect(PROJECT.slug).toBe("evalops-workbench"); + expect(PROJECT.name).toBe("EvalOps Workbench"); + }); + + it("has well-formed URLs", () => { + expect(PROJECT.github_url).toMatch(/^https:\/\//); + expect(PROJECT.eleventh_url).toMatch(/^https:\/\//); + expect(PROJECT.live_url).toMatch(/^https:\/\//); + expect(PROJECT.fleet_url).toMatch(/^https:\/\//); + }); + + it("has non-empty stack array", () => { + expect(Array.isArray(PROJECT.stack)).toBe(true); + expect(PROJECT.stack.length).toBeGreaterThan(0); + expect(PROJECT.stack).toContain("Python"); + }); + + it("has non-empty mvp array", () => { + expect(Array.isArray(PROJECT.mvp)).toBe(true); + expect(PROJECT.mvp.length).toBeGreaterThan(0); + }); + + it("has valid category and track", () => { + expect(PROJECT.category).toBe("Developer Tool"); + expect(PROJECT.track).toBe("LLM"); + }); + + it("has system_slug matching API response field", () => { + expect(PROJECT.system_slug).toBe("evalops"); + }); + + it("has builder attribution", () => { + expect(PROJECT.builder).toBeTruthy(); + expect(typeof PROJECT.builder).toBe("string"); + }); + }); + + describe("ProjectSpec interface", () => { + it("enforces required string fields", () => { + const testProject: ProjectSpec = { + slug: "test-project", + name: "Test Project", + category: "Category", + track: "Track", + stage: "Stage", + summary: "A summary", + problem: "A problem", + users: "Users", + stack: ["Stack"], + why_now: "Why now", + mvp: ["MVP 1"], + github_url: "https://github.com/test", + system_slug: "test", + eleventh_url: "https://eleventh.dev", + live_url: "https://test.eleventh.dev", + fleet_url: "https://eleventh.dev/work", + builder: "Test Builder", + }; + + expect(testProject.slug).toBeDefined(); + expect(testProject.name).toBeDefined(); + expect(testProject.system_slug).toBeDefined(); + }); + }); +}); diff --git a/src/lib/prototype.test.ts b/src/lib/prototype.test.ts new file mode 100644 index 0000000..e0a4ceb --- /dev/null +++ b/src/lib/prototype.test.ts @@ -0,0 +1,153 @@ +import { describe, expect, it } from "vitest"; +import { + PROTOTYPE_METRICS, + PROTOTYPE_CASES, + PROTOTYPE_COMMANDS, + PROTOTYPE_REPORT, + PrototypeCase, + PrototypeMetric, +} from "./prototype"; + +describe("prototype.ts", () => { + describe("PROTOTYPE_METRICS", () => { + it("exports a non-empty array", () => { + expect(Array.isArray(PROTOTYPE_METRICS)).toBe(true); + expect(PROTOTYPE_METRICS.length).toBeGreaterThan(0); + }); + + it("contains well-formed metric objects", () => { + PROTOTYPE_METRICS.forEach((metric: PrototypeMetric) => { + expect(metric).toHaveProperty("label"); + expect(metric).toHaveProperty("baseline"); + expect(metric).toHaveProperty("candidate"); + expect(metric).toHaveProperty("delta"); + expect(typeof metric.label).toBe("string"); + expect(typeof metric.baseline).toBe("string"); + expect(typeof metric.candidate).toBe("string"); + expect(typeof metric.delta).toBe("string"); + }); + }); + + it("has expected metrics", () => { + const labels = PROTOTYPE_METRICS.map((m) => m.label); + expect(labels).toContain("Average score"); + expect(labels).toContain("Pass rate"); + expect(labels).toContain("Regressions"); + expect(labels).toContain("Gate verdict"); + }); + }); + + describe("PROTOTYPE_CASES", () => { + it("exports a non-empty array", () => { + expect(Array.isArray(PROTOTYPE_CASES)).toBe(true); + expect(PROTOTYPE_CASES.length).toBeGreaterThan(0); + }); + + it("contains well-formed case objects", () => { + PROTOTYPE_CASES.forEach((testCase: PrototypeCase) => { + expect(testCase).toHaveProperty("caseId"); + expect(testCase).toHaveProperty("category"); + expect(testCase).toHaveProperty("baselineScore"); + expect(testCase).toHaveProperty("candidateScore"); + expect(testCase).toHaveProperty("delta"); + expect(testCase).toHaveProperty("outcome"); + expect(testCase).toHaveProperty("baselineMissing"); + expect(testCase).toHaveProperty("candidateMissing"); + expect(testCase).toHaveProperty("note"); + + expect(typeof testCase.caseId).toBe("string"); + expect(typeof testCase.category).toBe("string"); + expect(typeof testCase.baselineScore).toBe("number"); + expect(typeof testCase.candidateScore).toBe("number"); + expect(typeof testCase.delta).toBe("number"); + expect(["regression", "improvement", "stable"]).toContain( + testCase.outcome + ); + expect(Array.isArray(testCase.baselineMissing)).toBe(true); + expect(Array.isArray(testCase.candidateMissing)).toBe(true); + expect(typeof testCase.note).toBe("string"); + }); + }); + + it("has cases with valid delta calculations", () => { + PROTOTYPE_CASES.forEach((testCase) => { + const expectedDelta = testCase.candidateScore - testCase.baselineScore; + expect(Math.abs(testCase.delta - expectedDelta)).toBeLessThan(0.001); + }); + }); + + it("has cases with correct outcome classification", () => { + PROTOTYPE_CASES.forEach((testCase) => { + if (testCase.delta > 0.001) { + expect(testCase.outcome).toBe("improvement"); + } else if (testCase.delta < -0.001) { + expect(testCase.outcome).toBe("regression"); + } else { + expect(testCase.outcome).toBe("stable"); + } + }); + }); + + it("has at least one case of each category", () => { + const categories = new Set(PROTOTYPE_CASES.map((c) => c.category)); + expect(categories.size).toBeGreaterThan(0); + }); + }); + + describe("PROTOTYPE_COMMANDS", () => { + it("exports an object with expected command keys", () => { + expect(PROTOTYPE_COMMANDS).toHaveProperty("baseline"); + expect(PROTOTYPE_COMMANDS).toHaveProperty("candidate"); + expect(PROTOTYPE_COMMANDS).toHaveProperty("compare"); + expect(PROTOTYPE_COMMANDS).toHaveProperty("gate"); + expect(PROTOTYPE_COMMANDS).toHaveProperty("inspect"); + }); + + it("contains non-empty command strings", () => { + Object.values(PROTOTYPE_COMMANDS).forEach((cmd) => { + expect(typeof cmd).toBe("string"); + expect(cmd.length).toBeGreaterThan(0); + }); + }); + + it("has commands that reference evalops-workbench", () => { + Object.values(PROTOTYPE_COMMANDS).forEach((cmd) => { + expect(cmd).toContain("evalops-workbench"); + }); + }); + + it("baseline and candidate run different variants", () => { + expect(PROTOTYPE_COMMANDS.baseline).toContain("prompt_v1"); + expect(PROTOTYPE_COMMANDS.candidate).toContain("prompt_v2"); + }); + }); + + describe("PROTOTYPE_REPORT", () => { + it("is a non-empty string", () => { + expect(typeof PROTOTYPE_REPORT).toBe("string"); + expect(PROTOTYPE_REPORT.length).toBeGreaterThan(0); + }); + + it("contains gate verdict", () => { + expect(PROTOTYPE_REPORT).toContain("PASS"); + }); + + it("contains threshold information", () => { + expect(PROTOTYPE_REPORT).toContain("Max regressions"); + expect(PROTOTYPE_REPORT).toContain("Max score drop"); + expect(PROTOTYPE_REPORT).toContain("Max pass-rate drop"); + }); + + it("contains comparison details", () => { + expect(PROTOTYPE_REPORT).toContain("Base run"); + expect(PROTOTYPE_REPORT).toContain("Candidate run"); + expect(PROTOTYPE_REPORT).toContain("Average score delta"); + expect(PROTOTYPE_REPORT).toContain("Pass-rate delta"); + }); + + it("has markdown formatting", () => { + expect(PROTOTYPE_REPORT).toContain("#"); + expect(PROTOTYPE_REPORT).toContain("-"); + }); + }); +}); diff --git a/vitest.config.ts b/vitest.config.ts index f0630e2..6bd036a 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -11,6 +11,20 @@ export default defineConfig({ globals: true, include: ["src/**/*.test.{ts,tsx}"], exclude: ["node_modules", ".next"], + coverage: { + provider: "v8", + reporter: ["text", "text-summary", "json-summary", "html"], + include: ["src/lib/**/*.ts", "src/components/**/*.tsx"], + exclude: [ + "src/**/*.test.{ts,tsx}", + "src/test/**", + "src/**/*.d.ts", + "src/components/ui/**", + ], + thresholds: { + "src/lib/**": { statements: 80, branches: 75, functions: 80, lines: 80 }, + }, + }, }, resolve: { alias: {