From 4af5c97a8fa63f1d8da473eb89e152b811acf585 Mon Sep 17 00:00:00 2001 From: Nathan Toups Date: Thu, 21 May 2026 07:08:30 -0600 Subject: [PATCH] feat(e2e): in-process fake blob server so tests exercise the real paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the CI dev server logged 'BLOB_READ_WRITE_TOKEN is not set' on every tripwire-touching test because there's no token in the CI environment. The tests passed only because errors landed inside an error boundary or a fire-and-forget catch — the blob code paths weren't actually exercised. This adds a tiny in-memory HTTP server (e2e/fake-blob.ts) that Playwright runs as a second webServer alongside `next dev`. The fake honors the subset of the Vercel Blob surface we actually use: PUT a pathname, GET it back, and a GET /api/blob/ list endpoint for ingest. It seeds stats/tripwire-aggregates.json from a fixture so /x/tripwire renders real numbers. Production code stays on @vercel/blob. A new src/lib/blob.ts wrapper routes put() to the SDK in prod and to the fake's HTTP PUT when BLOB_BASE_URL is set. aggregates.ts and ingest.ts gain one-line overrides on the URL builders. A new e2e/x/tripwire/page.spec.ts loads /x/tripwire and asserts the hero numbers match the seeded fixture, proving the read path end-to- end. --- e2e/fake-blob.ts | 115 +++++++++++++++++++++++++++++++++ e2e/fixtures/aggregates.ts | 47 ++++++++++++++ e2e/x/tripwire/page.spec.ts | 17 +++++ playwright.config.ts | 33 +++++++--- src/lib/blob.ts | 52 +++++++++++++++ src/lib/tripwire/aggregates.ts | 4 ++ src/lib/tripwire/ingest.ts | 5 +- src/lib/tripwire/stats.ts | 4 +- src/lib/tripwire/sync-geoip.ts | 4 +- src/proxy.ts | 4 +- 10 files changed, 269 insertions(+), 16 deletions(-) create mode 100644 e2e/fake-blob.ts create mode 100644 e2e/fixtures/aggregates.ts create mode 100644 e2e/x/tripwire/page.spec.ts create mode 100644 src/lib/blob.ts diff --git a/e2e/fake-blob.ts b/e2e/fake-blob.ts new file mode 100644 index 0000000..2346ea5 --- /dev/null +++ b/e2e/fake-blob.ts @@ -0,0 +1,115 @@ +// e2e/fake-blob.ts +// +// In-memory stand-in for Vercel Blob. Run as a Playwright webServer +// alongside `next dev`. Honors the subset of the blob HTTP surface that +// proxy.ts (PUT events), aggregates.ts (GET stats), and ingest.ts +// (LIST events) actually use. +// +// Seeds `stats/tripwire-aggregates.json` from the fixture so the +// tripwire page renders real numbers instead of an error boundary. +// +// Not a fidelity emulator. If a test needs a behavior the real +// platform has (signed URLs, multi-part uploads, store-id auth), add +// it here explicitly. + +import http from "node:http" +import { STATS_BLOB_KEY } from "../src/lib/tripwire/aggregate-shape" +import { FIXTURE_AGGREGATES } from "./fixtures/aggregates" + +interface StoredBlob { + body: Buffer + contentType: string + uploadedAt: string + size: number +} + +const PORT = Number(process.env.FAKE_BLOB_PORT ?? 7777) +const store = new Map() + +function setBlob(pathname: string, body: Buffer, contentType: string): void { + store.set(pathname, { + body, + contentType, + uploadedAt: new Date().toISOString(), + size: body.length, + }) +} + +setBlob( + STATS_BLOB_KEY, + Buffer.from(JSON.stringify(FIXTURE_AGGREGATES)), + "application/json", +) + +function readBody(req: http.IncomingMessage): Promise { + return new Promise((resolve, reject) => { + const chunks: Buffer[] = [] + req.on("data", (c) => chunks.push(Buffer.isBuffer(c) ? c : Buffer.from(c))) + req.on("end", () => resolve(Buffer.concat(chunks))) + req.on("error", reject) + }) +} + +function listResponse(prefix: string): { blobs: Array<{ pathname: string; url: string; size: number; uploadedAt: string }>; hasMore: false } { + const blobs = [...store.entries()] + .filter(([pathname]) => pathname.startsWith(prefix)) + .map(([pathname, blob]) => ({ + pathname, + url: `http://localhost:${PORT}/${pathname}`, + size: blob.size, + uploadedAt: blob.uploadedAt, + })) + return { blobs, hasMore: false } +} + +const server = http.createServer(async (req, res) => { + try { + const url = new URL(req.url ?? "/", `http://localhost:${PORT}`) + + if (req.method === "GET" && url.pathname === "/_health") { + res.writeHead(200, { "content-type": "text/plain" }) + res.end("ok") + return + } + + if (req.method === "GET" && url.pathname === "/api/blob/") { + const prefix = url.searchParams.get("prefix") ?? "" + res.writeHead(200, { "content-type": "application/json" }) + res.end(JSON.stringify(listResponse(prefix))) + return + } + + const pathname = url.pathname.replace(/^\/+/, "") + + if (req.method === "PUT") { + const body = await readBody(req) + const contentType = req.headers["content-type"]?.toString() ?? "application/octet-stream" + setBlob(pathname, body, contentType) + res.writeHead(200, { "content-type": "application/json" }) + res.end(JSON.stringify({ url: `http://localhost:${PORT}/${pathname}`, pathname })) + return + } + + if (req.method === "GET") { + const blob = store.get(pathname) + if (!blob) { + res.writeHead(404, { "content-type": "text/plain" }) + res.end("not found") + return + } + res.writeHead(200, { "content-type": blob.contentType }) + res.end(blob.body) + return + } + + res.writeHead(405, { "content-type": "text/plain" }) + res.end("method not allowed") + } catch (err) { + res.writeHead(500, { "content-type": "text/plain" }) + res.end(`fake blob error: ${err instanceof Error ? err.message : String(err)}`) + } +}) + +server.listen(PORT, () => { + console.log(`[fake-blob] listening on http://localhost:${PORT}`) +}) diff --git a/e2e/fixtures/aggregates.ts b/e2e/fixtures/aggregates.ts new file mode 100644 index 0000000..97b3fd0 --- /dev/null +++ b/e2e/fixtures/aggregates.ts @@ -0,0 +1,47 @@ +// e2e/fixtures/aggregates.ts +// +// Realistic but synthetic Aggregates payload. Seeded into the fake blob +// server on startup so `/x/tripwire` renders real numbers in E2E +// instead of an error boundary. Numbers are deliberately distinctive +// (12345, 678 ASNs, etc.) so test assertions can spot them. + +import type { Aggregates } from "../../src/lib/tripwire/aggregate-shape" + +export const FIXTURE_AGGREGATES: Aggregates = { + generatedAt: "2026-05-21T00:00:00.000Z", + lifetime: { + totalEvents: 12345, + earliestTs: "2026-04-25T00:00:00.000Z", + latestTs: "2026-05-21T00:00:00.000Z", + daysSinceFirst: 26, + distinctIps: 4321, + distinctPaths: 89, + distinctAsns: 678, + }, + byCategory: [ + { category: "wp-admin", count: 5000 }, + { category: "env-files", count: 3200 }, + { category: "phpmyadmin", count: 2100 }, + { category: "actuator", count: 1045 }, + ], + byUaFamily: [ + { ua: "curl", count: 4200 }, + { ua: "Mozilla", count: 3800 }, + { ua: "Go-http-client", count: 2100 }, + ], + byDay: [ + { date: "2026-05-19", count: 800 }, + { date: "2026-05-20", count: 950 }, + { date: "2026-05-21", count: 1100 }, + ], + topPaths: [ + { path: "/wp-login.php", count: 3200, category: "wp-admin" }, + { path: "/.env", count: 2100, category: "env-files" }, + { path: "/phpmyadmin/", count: 1500, category: "phpmyadmin" }, + ], + byAsn: [ + { asn: "AS14061", name: "DigitalOcean", count: 2200 }, + { asn: "AS16509", name: "Amazon", count: 1800 }, + { asn: "AS24940", name: "Hetzner", count: 1100 }, + ], +} diff --git a/e2e/x/tripwire/page.spec.ts b/e2e/x/tripwire/page.spec.ts new file mode 100644 index 0000000..8ce009b --- /dev/null +++ b/e2e/x/tripwire/page.spec.ts @@ -0,0 +1,17 @@ +import { test, expect } from "@playwright/test" + +import { FIXTURE_AGGREGATES } from "../../fixtures/aggregates" + +// Loads /x/tripwire and asserts that the hero numbers come from the fake +// blob server seeded with FIXTURE_AGGREGATES. If this fails, either the +// blob fetch path is broken or the fake didn't seed. +test("tripwire page renders hero numbers from the seeded aggregates", async ({ page }) => { + await page.goto("/x/tripwire") + await expect(page.locator("h1")).toContainText("tripwire") + + const { lifetime } = FIXTURE_AGGREGATES + await expect(page.getByText(String(lifetime.totalEvents), { exact: true })).toBeVisible() + await expect(page.getByText(String(lifetime.distinctIps), { exact: true })).toBeVisible() + await expect(page.getByText(String(lifetime.distinctPaths), { exact: true })).toBeVisible() + await expect(page.getByText(String(lifetime.distinctAsns), { exact: true })).toBeVisible() +}) diff --git a/playwright.config.ts b/playwright.config.ts index 7b432da..12b1af6 100644 --- a/playwright.config.ts +++ b/playwright.config.ts @@ -18,14 +18,29 @@ export default defineConfig({ use: { ...devices["Desktop Chrome"] }, }, ], - webServer: { - command: "bun run dev", - url: "http://localhost:3000", - reuseExistingServer: !process.env.CI, - // Tripwire proxy is prod-gated by default (so local dev doesn't self-bomb). - // TRIPWIRE_FORCE=1 overrides the gate for E2E tests. - env: { - TRIPWIRE_FORCE: "1", + webServer: [ + { + // In-memory stand-in for Vercel Blob (see e2e/fake-blob.ts). Lets + // proxy.ts (PUT events) and the tripwire page (GET stats) exercise + // their real fetch paths without a token or network round-trip. + command: "bun run e2e/fake-blob.ts", + url: "http://localhost:7777/_health", + reuseExistingServer: !process.env.CI, }, - }, + { + command: "bun run dev", + url: "http://localhost:3000", + reuseExistingServer: !process.env.CI, + // Tripwire proxy is prod-gated by default (so local dev doesn't self-bomb). + // TRIPWIRE_FORCE=1 overrides the gate for E2E tests. BLOB_BASE_URL + // points the blob client at the fake; the token's value doesn't + // matter to the fake, but the format has to satisfy the storeId + // parser in aggregates.ts (token.split('_')[3]). + env: { + TRIPWIRE_FORCE: "1", + BLOB_BASE_URL: "http://localhost:7777", + BLOB_READ_WRITE_TOKEN: "vercel_blob_rw_faketestbloblbs_dummy", + }, + }, + ], }); diff --git a/src/lib/blob.ts b/src/lib/blob.ts new file mode 100644 index 0000000..77b3fc6 --- /dev/null +++ b/src/lib/blob.ts @@ -0,0 +1,52 @@ +// src/lib/blob.ts +// +// Thin wrapper over @vercel/blob.put(). Production calls pass straight +// through. When BLOB_BASE_URL is set the call becomes an HTTP PUT to a +// local fake server, which is what e2e/fake-blob.ts uses so Playwright +// can exercise the write path without a real blob store. + +import { put as vercelPut, type PutBlobResult } from "@vercel/blob" + +type PutBlobBody = Parameters[1] +type PutBlobOptions = Parameters[2] + +export async function putBlob( + pathname: string, + body: PutBlobBody, + opts: PutBlobOptions, +): Promise { + const fakeBaseUrl = process.env.BLOB_BASE_URL + if (fakeBaseUrl) return fakePut(fakeBaseUrl, pathname, body, opts) + return vercelPut(pathname, body, opts) +} + +async function fakePut( + baseUrl: string, + pathname: string, + body: PutBlobBody, + opts: PutBlobOptions, +): Promise { + const contentType = + "contentType" in opts && typeof opts.contentType === "string" + ? opts.contentType + : undefined + const headers: Record = {} + if (contentType) headers["content-type"] = contentType + const res = await fetch(`${baseUrl}/${pathname}`, { + method: "PUT", + headers, + body: body as BodyInit, + }) + if (!res.ok) { + throw new Error(`fake blob PUT failed: ${res.status} ${res.statusText}`) + } + const url = `${baseUrl}/${pathname}` + return { + url, + pathname, + contentType: contentType ?? "application/octet-stream", + contentDisposition: `attachment; filename="${pathname.split("/").pop() ?? pathname}"`, + downloadUrl: url, + etag: "fake-etag", + } +} diff --git a/src/lib/tripwire/aggregates.ts b/src/lib/tripwire/aggregates.ts index 39f2688..f2a5872 100644 --- a/src/lib/tripwire/aggregates.ts +++ b/src/lib/tripwire/aggregates.ts @@ -34,7 +34,11 @@ let cached: { data: Aggregates; fetchedAt: number } | null = null // Token format is `vercel_blob_rw__`. The SDK does the // same split internally to construct private blob URLs. +// BLOB_BASE_URL overrides the host so E2E tests can point reads at the +// local fake server (see e2e/fake-blob.ts). function privateBlobUrl(pathname: string, token: string): string { + const fake = process.env.BLOB_BASE_URL + if (fake) return `${fake}/${pathname}` const storeId = token.split("_")[3] if (!storeId) { throw new Error("could not extract store id from BLOB_READ_WRITE_TOKEN") diff --git a/src/lib/tripwire/ingest.ts b/src/lib/tripwire/ingest.ts index f309a5c..1dd1938 100644 --- a/src/lib/tripwire/ingest.ts +++ b/src/lib/tripwire/ingest.ts @@ -71,12 +71,15 @@ interface BlobListPage { // after the Response object goes out of scope, which under Bun on Vercel // can leave the body stream stuck waiting for EOF. By keeping our own // Response in scope across the .json() drain, the request completes. +// BLOB_BASE_URL overrides the host so E2E can point the list call at the +// local fake server (see e2e/fake-blob.ts). async function listBlobsPage(prefix: string, cursor: string | undefined): Promise { const token = process.env.BLOB_READ_WRITE_TOKEN if (!token) throw new Error("BLOB_READ_WRITE_TOKEN is not set") const params = new URLSearchParams({ prefix }) if (cursor) params.set("cursor", cursor) - const res = await fetch(`https://vercel.com/api/blob/?${params}`, { + const base = process.env.BLOB_BASE_URL ?? "https://vercel.com" + const res = await fetch(`${base}/api/blob/?${params}`, { headers: { authorization: `Bearer ${token}`, "x-api-version": "12", diff --git a/src/lib/tripwire/stats.ts b/src/lib/tripwire/stats.ts index 424b4ff..34de4a4 100644 --- a/src/lib/tripwire/stats.ts +++ b/src/lib/tripwire/stats.ts @@ -11,7 +11,7 @@ // it across cron invocations and only the first cold instance pays the // ~10MB blob fetch. -import { put } from "@vercel/blob" +import { putBlob } from "@/lib/blob" import { Reader, type Asn, type ReaderModel } from "@maxmind/geoip2-node" import { sql } from "drizzle-orm" import { getDb } from "@/db" @@ -254,7 +254,7 @@ export async function publishAggregates(agg: Aggregates): Promise { const body = JSON.stringify(agg, null, 2) const t0 = Date.now() slog.debug({ step: "publish.put_start", key: STATS_BLOB_KEY, bytes: body.length }) - await put(STATS_BLOB_KEY, body, { + await putBlob(STATS_BLOB_KEY, body, { access: "private", contentType: "application/json", addRandomSuffix: false, diff --git a/src/lib/tripwire/sync-geoip.ts b/src/lib/tripwire/sync-geoip.ts index b5c9c0f..f3a8a8e 100644 --- a/src/lib/tripwire/sync-geoip.ts +++ b/src/lib/tripwire/sync-geoip.ts @@ -9,7 +9,7 @@ // so the tarball is decompressed and parsed in memory: no shelling out // to tar, no temp files. -import { put } from "@vercel/blob" +import { putBlob } from "@/lib/blob" import { gunzipSync } from "node:zlib" import { log } from "@/lib/log" @@ -128,7 +128,7 @@ export async function syncGeoipToBlob(): Promise { const tPut = Date.now() glog.debug({ step: "blob.put_start", key: ASN_BLOB_KEY, bytes: mmdb.length }) - await put(ASN_BLOB_KEY, mmdb, { + await putBlob(ASN_BLOB_KEY, mmdb, { access: "private", contentType: "application/octet-stream", addRandomSuffix: false, diff --git a/src/proxy.ts b/src/proxy.ts index 17c5d3d..ced9125 100644 --- a/src/proxy.ts +++ b/src/proxy.ts @@ -1,7 +1,7 @@ // src/proxy.ts import { NextResponse, after, type NextRequest } from "next/server" -import { put } from "@vercel/blob" import { createId } from "@paralleldrive/cuid2" +import { putBlob } from "@/lib/blob" import { matchBait, categoryToBomb, @@ -25,7 +25,7 @@ function archiveEvent(event: TripwireEvent): void { const id = event.req_id ?? createId() const pathname = `events/${date}/${ms}-${id}.json` after(() => - put(pathname, JSON.stringify(event), { + putBlob(pathname, JSON.stringify(event), { access: "private", contentType: "application/json", addRandomSuffix: false,