diff --git a/biome.json b/biome.json index a5ef691..d0693f1 100644 --- a/biome.json +++ b/biome.json @@ -3,7 +3,7 @@ "files": { "includes": [ "**", - "!mockups", + "!**/mockups", "!.internal", "!**/.cache", "!**/dist", diff --git a/package.json b/package.json index 584935a..8b34191 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,9 @@ "test": "bun test packages/fallbacks tools/corpus", "corpus:acquire": "bun run tools/corpus/acquire.ts", "corpus:compare": "bun run tools/corpus/compare.ts", + "corpus:bakeoff": "bun run tools/corpus/bakeoff.ts", + "corpus:app": "bun run tools/corpus/server.ts", + "corpus:visual": "bun run tools/corpus/visual-review.ts", "check": "bun run typecheck && bun run test && bun run lint && bun run build", "check:fast": "bun run typecheck && bun run lint", "prepare": "if [ -z \"$CI\" ]; then bunx lefthook install; fi" diff --git a/tools/corpus/README.md b/tools/corpus/README.md index 8c9e4ff..c5cdbe5 100644 --- a/tools/corpus/README.md +++ b/tools/corpus/README.md @@ -9,6 +9,8 @@ They download fonts into an ignored cache, compare a licensed local reference ag ```sh bun run corpus:acquire bun run corpus:compare -- --reference /path/to/reference.ttf --family "Verdana" +bun run corpus:bakeoff -- --reference /path/to/reference.ttf --candidate "Inter=/path/to/Inter.ttf" +bun run corpus:app ``` ## Acquire @@ -35,4 +37,47 @@ bun run corpus:compare -- \ - `--model latin` is the default. Proportional Latin ranking uses text-carrying codepoints for tier, mean, and max while still reporting full Latin outliers. - `--model monospace` reports matching mono cells as `cell_width_only`, not `metric_safe`. -Comparison output is a lead finder. A public fallback row still needs review, provenance, face-scope checks, and visual sanity. +Rows are ranked by advance tier, coverage, feature coverage (`fcov`), feature distance (`fscore`), then mean advance delta. `flags` marks strong advance matches whose font metadata disagrees enough to need review. + +Comparison output is a lead finder, not a fallback decision. + +## App + +```sh +bun run corpus:app +``` + +The local app compares real reference faces against the corpus and shows the top candidates with overlays. It runs on localhost and stores temporary font files in `.cache/corpus-app`. + +## Bake-off + +```sh +bun run corpus:bakeoff -- \ + --reference /path/to/reference.ttf \ + --family "Arial Rounded MT Bold" \ + --candidate "Inter=/path/to/Inter.ttf" \ + --candidate "Nunito=/path/to/Nunito.ttf" \ + --visual +``` + +Bake-off compares a reference against a handful of manually chosen candidates side by side, printing the same advance and feature metrics per candidate. Pass `--visual` to add an experimental rendered-glyph difference column, which needs ImageMagick 7 (`magick`) on PATH; without it, no rendering is done. It calibrates the metrics against human judgment and writes nothing to the repo. + +## Visual review + +```sh +bun run corpus:visual -- \ + --reference /path/to/reference.ttf \ + --family "Verdana" \ + --candidate "Inter=/path/to/Inter.ttf" \ + --candidate "Nunito=/path/to/Nunito.ttf" +``` + +Visual review writes a small HTML page for a known shortlist of candidates. + +- `--reference` and at least one `--candidate "Label=/path"` are required. +- `--reference` and `--candidate` are regular-face shorthands. +- Use `--reference-face face=/path` and `--candidate-face "Label:face=/path"` for `regular`, `bold`, `italic`, and `boldItalic`. +- `--family` is the report heading. +- `--out` sets the output path. A `.html` path is the file; any other path is a directory that receives `review.html`. The default is `.cache/corpus-visual/review.html`. + +The generated app copies the selected font files into the ignored output directory so the browser can load them locally. diff --git a/tools/corpus/app/app.js b/tools/corpus/app/app.js new file mode 100644 index 0000000..7f5f1f7 --- /dev/null +++ b/tools/corpus/app/app.js @@ -0,0 +1,758 @@ +// docfonts Corpus Review - local workbench UI. +// Numbers come from the compare engine. The overlay is a visual aid, not the measurement. + +const FACE_SLOTS = [ + { id: "regular", label: "Regular", short: "R" }, + { id: "bold", label: "Bold", short: "B" }, + { id: "italic", label: "Italic", short: "I" }, + { id: "boldItalic", label: "Bold Italic", short: "BI" }, +]; +const GLYPHS = [ + "a", + "g", + "e", + "s", + "t", + "R", + "Q", + "M", + "&", + "@", + "0", + "1", + "i", + "l", + "y", + "?", +]; +const SPECIMEN = "Hamburgefonstiv 0123"; +const BODY = "The quick brown fox jumps over the lazy dog."; + +const VERDICT = { + metric_safe: { cls: "v-safe", label: "metric-safe" }, + near_metric: { cls: "v-near", label: "near-metric" }, + cell_width_only: { cls: "v-cell", label: "cell-width-only" }, + visual_only: { cls: "v-visual", label: "visual-only" }, +}; +const verdictOf = (tier) => VERDICT[tier] ?? { cls: "v-visual", label: tier }; + +const el = (id) => document.getElementById(id); +const fileInput = el("reference"); +const fnameLabel = el("fname"); +const detectedSel = el("detected"); +const detectedList = el("detected-list"); +const scopeSel = el("scope"); +const modelSeg = el("model"); +const faceSources = el("face-sources"); +const limitInput = el("limit"); +const sampleInput = el("sample"); +const runButton = el("run"); +const toastEl = el("toast"); +const main = el("main"); + +let model = "latin"; +let activeFace = "regular"; +let families = []; +let corpusFonts = []; +const referenceByFace = Object.fromEntries( + FACE_SLOTS.map((slot) => [slot.id, null]), +); +const resultsByFace = Object.fromEntries( + FACE_SLOTS.map((slot) => [slot.id, null]), +); +const selectedIndexByFace = Object.fromEntries( + FACE_SLOTS.map((slot) => [slot.id, -1]), +); +// Transient per-face run state ("measuring" | "failed"), surfaced on the face pills. +const faceStatus = Object.fromEntries( + FACE_SLOTS.map((slot) => [slot.id, null]), +); +const faceError = Object.fromEntries(FACE_SLOTS.map((slot) => [slot.id, null])); + +function node(tag, attrs = {}, children = []) { + const n = document.createElement(tag); + for (const [k, v] of Object.entries(attrs)) { + if (v === undefined || v === null || v === false) continue; + if (k === "class") n.className = v; + else if (k === "html") n.innerHTML = v; + else n.setAttribute(k, v); + } + for (const c of [].concat(children)) n.append(c); + return n; +} + +function faceSlot(id) { + return FACE_SLOTS.find((slot) => slot.id === id) ?? FACE_SLOTS[0]; +} + +let toastTimer = null; +/** Transient corner toast for one-off feedback (pins, errors). Run progress lives on the face pills. */ +function setStatus(message, kind = "info") { + if (!toastEl) return; + toastEl.textContent = message; + toastEl.className = `toast show${kind === "error" ? " err" : ""}`; + if (toastTimer) clearTimeout(toastTimer); + toastTimer = setTimeout(() => { + toastEl.className = "toast"; + }, 3600); +} + +function codepoint(glyph) { + return `U+${glyph.codePointAt(0).toString(16).toUpperCase().padStart(4, "0")}`; +} + +function referenceLabel(ref) { + if (!ref) return "missing"; + return ref.name; +} + +function fontDescriptors(ref) { + return { + style: ref?.style || "normal", + weight: ref?.weight || "400", + }; +} + +function fontStyle(family, ref) { + const desc = fontDescriptors(ref); + return `font-family:"${family}";font-style:${desc.style};font-weight:${desc.weight}`; +} + +async function loadFont(family, url, ref = null) { + const face = new FontFace(family, `url("${url}")`, fontDescriptors(ref)); + await face.load(); + document.fonts.add(face); +} + +async function loadSources() { + try { + const res = await fetch("/api/sources"); + const data = await res.json(); + if (!res.ok) throw new Error(data.error || "could not load sources"); + for (const source of data.sources) + scopeSel.append( + node("option", { value: source.sourceId }, [source.family]), + ); + } catch { + // Keep the default "All acquired" scope when the snapshot is not available. + } +} + +/** Load the full corpus catalog once so the review can pin and compare any specific font. */ +async function loadCorpusFonts() { + try { + const res = await fetch("/api/corpus-fonts"); + const data = await res.json(); + corpusFonts = Array.isArray(data.fonts) ? data.fonts : []; + } catch { + corpusFonts = []; + } +} + +async function loadDetected() { + try { + const res = await fetch("/api/local-fonts"); + const data = await res.json(); + families = res.ok && Array.isArray(data.families) ? data.families : []; + detectedList.innerHTML = ""; + detectedSel.placeholder = "Type a detected family..."; + if (!families.length) { + detectedSel.placeholder = "No local fonts detected"; + return; + } + families.forEach((family, index) => { + const count = Object.keys(family.faces).length; + detectedList.append( + node("option", { + value: family.family, + label: `${count} face${count === 1 ? "" : "s"}`, + "data-index": String(index), + }), + ); + }); + } catch { + detectedList.innerHTML = ""; + detectedSel.placeholder = "Detection unavailable"; + } +} + +function updateFaceControls() { + const regular = referenceByFace.regular; + fnameLabel.textContent = + regular?.kind === "file" ? regular.name : "No file loaded"; +} + +function faceState(slot) { + if (faceStatus[slot.id] === "measuring") return "measuring"; + if (faceStatus[slot.id] === "failed") return "failed"; + if (resultsByFace[slot.id]) return "measured"; + if (referenceByFace[slot.id]) return "ready"; + return "missing"; +} + +function renderFaceSources() { + faceSources.innerHTML = ""; + for (const slot of FACE_SLOTS) { + const ref = referenceByFace[slot.id]; + const state = faceState(slot); + const classes = ["face-source", `face-${slot.id}`, `state-${state}`]; + if (slot.id === activeFace) classes.push("active"); + if (ref) classes.push("loaded"); + faceSources.append( + node( + "button", + { + class: classes.join(" "), + type: "button", + "data-face": slot.id, + title: + state === "failed" + ? faceError[slot.id] || "compare failed" + : undefined, + }, + [ + node("span", { class: "face-short" }, [slot.short]), + node("span", { class: "face-name" }, [ + ref ? referenceLabel(ref) : `${slot.label} missing`, + ]), + node("span", { class: "face-state" }, [state]), + ], + ), + ); + } +} + +function setActiveFace(face) { + activeFace = face; + updateFaceControls(); + renderFaceSources(); + renderActiveFace(); +} + +function clearFaces() { + for (const slot of FACE_SLOTS) { + referenceByFace[slot.id] = null; + resultsByFace[slot.id] = null; + selectedIndexByFace[slot.id] = -1; + } +} + +/** Detected family pick: auto-fill every face the family actually has. */ +function loadFamily(family) { + clearFaces(); + for (const slot of FACE_SLOTS) { + const face = family.faces[slot.id]; + if (face) + referenceByFace[slot.id] = { + kind: "path", + path: face.path, + fontIndex: face.fontIndex, + style: face.style, + synthetic: face.synthetic, + weight: face.weight, + name: face.name, + }; + } + activeFace = ( + FACE_SLOTS.find((slot) => referenceByFace[slot.id]) ?? FACE_SLOTS[0] + ).id; + updateFaceControls(); + renderFaceSources(); + renderActiveFace(); +} + +/** A one-off uploaded file compares as a single Regular face. */ +function loadSingleFile(file) { + clearFaces(); + referenceByFace.regular = { kind: "file", file, name: file.name }; + activeFace = "regular"; + detectedSel.value = ""; + updateFaceControls(); + renderFaceSources(); + renderActiveFace(); +} + +function candidateFamily(data, candidate) { + return `docfonts-${data.faceId}-candidate-${candidate.index}`; +} + +function referenceFamily(data) { + return `docfonts-${data.faceId}-reference-${data.runId}`; +} + +/** Compact candidate switcher shown in the review header. Pinned fonts lead, then the ranked corpus matches. */ +function candidatePicker(data, currentIndex) { + const select = node("select", { + class: "field cand-picker", + "aria-label": "Candidate", + }); + const entries = data.candidates.map((candidate, index) => ({ + candidate, + index, + })); + const ordered = [ + ...entries.filter((entry) => entry.candidate.pinned), + ...entries.filter((entry) => !entry.candidate.pinned), + ]; + for (const { candidate, index } of ordered) { + const verdict = verdictOf(candidate.tier); + const rank = candidate.pinned ? "Pinned" : `${index + 1}.`; + const option = node("option", { value: String(index) }, [ + `${rank} ${candidate.file} - ${verdict.label} (${candidate.mean} / ${candidate.max})`, + ]); + if (index === currentIndex) option.setAttribute("selected", ""); + select.append(option); + } + select.addEventListener("change", () => { + selectCandidate(Number(select.value)); + }); + return select; +} + +/** Typeahead over the full corpus so any specific font can be pinned and compared on demand. */ +function pinSearch() { + const input = node("input", { + class: "field pin-input", + type: "text", + placeholder: "Compare a specific font...", + "aria-label": "Compare a specific font", + autocomplete: "off", + }); + const results = node("div", { class: "pin-results", hidden: "" }); + + const render = (matches) => { + results.innerHTML = ""; + if (!matches.length) { + results.hidden = true; + return; + } + for (const match of matches) { + const option = node("button", { type: "button", class: "pin-opt" }, [ + node("span", { class: "pf" }, [match.file]), + node("span", { class: "ps" }, [match.sourceId]), + ]); + option.addEventListener("mousedown", (event) => { + event.preventDefault(); + results.hidden = true; + input.value = ""; + pinTarget(match); + }); + results.append(option); + } + results.hidden = false; + }; + + input.addEventListener("input", () => { + const query = input.value.trim().toLowerCase(); + if (query.length < 2) { + render([]); + return; + } + const tokens = query.split(/\s+/).filter(Boolean); + const matches = corpusFonts + .filter((font) => { + const haystack = `${font.file} ${font.sourceId}`.toLowerCase(); + return tokens.every((token) => haystack.includes(token)); + }) + .slice(0, 25); + render(matches); + }); + input.addEventListener("blur", () => { + setTimeout(() => { + results.hidden = true; + }, 120); + }); + + return node("div", { class: "pin" }, [input, results]); +} + +/** Score the reference against one chosen corpus font, then add it to the review and select it. */ +async function pinTarget(match) { + const data = resultsByFace[activeFace]; + if (!data) return; + const existing = data.candidates.findIndex( + (candidate) => + candidate.sourceId === match.sourceId && candidate.file === match.file, + ); + if (existing >= 0) { + selectCandidate(existing); + return; + } + const ref = referenceByFace[activeFace]; + if (!ref) return; + + setStatus(`Comparing ${match.file}...`); + try { + const form = new FormData(); + if (ref.kind === "file") form.set("reference", ref.file); + else form.set("referencePath", ref.path); + if (ref.fontIndex !== undefined) + form.set("referenceIndex", String(ref.fontIndex)); + form.set("model", data.model || model); + form.set("runId", data.runId); + form.set("sourceId", match.sourceId); + form.set("file", match.file); + const res = await fetch("/api/compare-target", { + method: "POST", + body: form, + }); + const out = await res.json(); + if (!res.ok) throw new Error(out.error || "compare failed"); + + const candidate = out.candidate; + candidate.index = data.candidates.length; + candidate.pinned = true; + data.candidates.push(candidate); + await loadFont(candidateFamily(data, candidate), candidate.url); + selectCandidate(candidate.index); + setStatus( + `Pinned ${match.file} (${verdictOf(candidate.tier).label}). ${data.candidates.length} candidates in review.`, + ); + } catch (error) { + setStatus(error instanceof Error ? error.message : String(error), "error"); + } +} + +function overlay(data, candidate, text) { + const refFamily = referenceFamily(data); + const candFamily = candidateFamily(data, candidate); + const ref = referenceByFace[data.faceId]; + return node("div", { class: "overlay" }, [ + node("span", { class: "ref", style: fontStyle(refFamily, ref) }, [text]), + node("span", { class: "cand", style: `font-family:"${candFamily}"` }, [ + text, + ]), + ]); +} + +function glyphGrid(data, candidate) { + const refFamily = referenceFamily(data); + const candFamily = candidateFamily(data, candidate); + const ref = referenceByFace[data.faceId]; + const grid = node("div", { class: "glyphgrid" }); + for (const glyph of GLYPHS) { + grid.append( + node("div", { class: "g" }, [ + node("span", { class: "ref", style: fontStyle(refFamily, ref) }, [ + glyph, + ]), + node("span", { class: "cand", style: `font-family:"${candFamily}"` }, [ + glyph, + ]), + node("label", {}, [codepoint(glyph)]), + ]), + ); + } + return grid; +} + +function renderFacebar() { + return node("div", { class: "facebar" }, [ + node( + "div", + { class: "seg" }, + FACE_SLOTS.map((slot) => + node( + "button", + { + type: "button", + disabled: resultsByFace[slot.id] ? undefined : "", + "aria-selected": slot.id === activeFace ? "true" : "false", + "data-face": slot.id, + }, + [slot.label], + ), + ), + ), + ]); +} + +function selectCandidate(index) { + const data = resultsByFace[activeFace]; + if (!data) return; + const candidate = data.candidates[index]; + if (!candidate) return; + + selectedIndexByFace[activeFace] = index; + const verdict = verdictOf(candidate.tier); + const specimen = sampleInput.value.trim() || SPECIMEN; + const refFamily = referenceFamily(data); + const candFamily = candidateFamily(data, candidate); + const ref = referenceByFace[data.faceId]; + + const valueRow = (key, value) => + node("div", { class: "dr" }, [ + node("span", { class: "k" }, [key]), + node("span", { class: "v" }, [value]), + ]); + + const worst = candidate.worst.length + ? candidate.worst.map((worstGlyph) => + node("span", { class: "w" }, [worstGlyph]), + ) + : [node("span", { class: "fine" }, ["none over threshold"])]; + + const flags = candidate.flags.length + ? candidate.flags.map((flag) => node("span", { class: "flag" }, [flag])) + : [node("span", { class: "fine" }, ["no feature gaps flagged"])]; + + main.innerHTML = ""; + main.append( + node("div", { class: "review-head" }, [ + node("div", { class: "review-pick" }, [ + node("div", { class: "section-label", style: "margin:0" }, [ + `${data.reference.name} · ${faceSlot(activeFace).label} · ${data.candidates.length} candidates`, + ]), + candidatePicker(data, index), + pinSearch(), + ]), + node("span", { class: `badge ${verdict.cls}` }, [verdict.label]), + ]), + node("div", { class: "subline" }, [ + `${faceSlot(activeFace).label}. ${candidate.sourceId}. Closest by current metrics. Confirm by eye before trusting.`, + ]), + renderFacebar(), + + node("div", { class: "block" }, [ + node("p", { class: "section-label" }, ["Overlay"]), + overlay(data, candidate, "Rg"), + node("div", { class: "legend" }, [ + node("span", { + html: 'reference', + }), + node("span", { + html: 'candidate', + }), + node("span", { html: 'overlap' }), + node("span", { + class: "grow", + html: `advance mean ${candidate.mean} / max ${candidate.max}`, + }), + ]), + node("div", { class: "samples" }, [ + node("div", { class: "s" }, [ + node("div", { class: "tag" }, ["reference"]), + node("div", { class: "txt", style: fontStyle(refFamily, ref) }, [ + specimen, + ]), + ]), + node("div", { class: "s" }, [ + node("div", { class: "tag" }, ["candidate"]), + node("div", { class: "txt", style: `font-family:"${candFamily}"` }, [ + specimen, + ]), + ]), + ]), + ]), + + node("div", { class: "cols" }, [ + node("div", {}, [ + node("p", { class: "section-label" }, ["Glyph overlay"]), + glyphGrid(data, candidate), + node("p", { class: "section-label", style: "margin:18px 0 8px" }, [ + "Body sample - candidate", + ]), + node( + "div", + { + class: "s", + style: `font-family:"${candFamily}";font-size:19px;padding:12px 14px`, + }, + [BODY], + ), + ]), + node("div", {}, [ + node("p", { class: "section-label" }, ["Measurement"]), + node("div", { class: "datarows" }, [ + valueRow("face", faceSlot(activeFace).label), + valueRow("verdict", verdict.label), + valueRow("advance mean", candidate.mean), + valueRow("advance max", candidate.max), + valueRow("advance coverage", candidate.coverage), + valueRow("feature score", candidate.fscore), + valueRow("feature coverage", candidate.fcov), + ]), + node("p", { class: "section-label", style: "margin:16px 0 8px" }, [ + "Feature flags", + ]), + node("div", { class: "worst" }, flags), + node("p", { class: "section-label", style: "margin:16px 0 8px" }, [ + "Worst glyphs (full sample)", + ]), + node("div", { class: "worst" }, worst), + node("p", { class: "section-label", style: "margin:16px 0 8px" }, [ + "Provenance", + ]), + node("div", { class: "datarows" }, [ + valueRow("source", candidate.sourceId), + valueRow("method / date", "analytic_advance / today"), + ]), + node( + "button", + { class: "btn full", style: "margin-top:16px", type: "button" }, + ["Mark as selected fallback"], + ), + node("p", { class: "fine", style: "margin-top:9px" }, [ + "Needs visual review before it becomes a published row.", + ]), + ]), + ]), + ); + + main.querySelectorAll(".facebar button").forEach((button) => { + button.addEventListener("click", () => setActiveFace(button.dataset.face)); + }); + main.querySelector(".btn.full").addEventListener("click", () => { + setStatus( + `Selected ${faceSlot(activeFace).label}: ${candidate.file} (${verdict.label}). Recorded for review, not published.`, + ); + }); +} + +function renderEmpty() { + const ref = referenceByFace[activeFace]; + const message = ref + ? "Run a comparison to populate this face." + : "Load a real reference file for this face."; + main.innerHTML = ""; + main.append( + node("div", { class: "empty" }, [ + node("div", { class: "big" }, [ + `${faceSlot(activeFace).label} not measured`, + ]), + node("div", {}, [message]), + ]), + ); +} + +function renderActiveFace() { + const data = resultsByFace[activeFace]; + if (!data?.candidates?.length) { + renderEmpty(); + return; + } + const index = + selectedIndexByFace[activeFace] >= 0 ? selectedIndexByFace[activeFace] : 0; + selectCandidate(index); +} + +async function loadResultFonts(faceId, data) { + await loadFont( + referenceFamily(data), + data.reference.url, + referenceByFace[faceId], + ); + await Promise.all( + data.candidates.map((candidate) => + loadFont(candidateFamily({ faceId }, candidate), candidate.url), + ), + ); +} + +async function compareFace(slot) { + const ref = referenceByFace[slot.id]; + if (!ref) return null; + + const form = new FormData(); + if (ref.kind === "file") form.set("reference", ref.file); + else form.set("referencePath", ref.path); + if (ref.fontIndex !== undefined) + form.set("referenceIndex", String(ref.fontIndex)); + form.set("limit", limitInput.value); + form.set("model", model); + form.set("sources", scopeSel.value); + + const res = await fetch("/api/compare", { method: "POST", body: form }); + const data = await res.json(); + if (!res.ok) throw new Error(data.error || "compare failed"); + data.faceId = slot.id; + data.model = model; + await loadResultFonts(slot.id, data); + return data; +} + +async function runCompare() { + const loaded = FACE_SLOTS.filter((slot) => referenceByFace[slot.id]); + if (loaded.length === 0) { + setStatus("Load at least one real reference face."); + return; + } + + runButton.disabled = true; + for (const slot of loaded) { + faceStatus[slot.id] = "measuring"; + faceError[slot.id] = null; + renderFaceSources(); + try { + resultsByFace[slot.id] = await compareFace(slot); + selectedIndexByFace[slot.id] = 0; + faceStatus[slot.id] = null; + } catch (error) { + faceStatus[slot.id] = "failed"; + faceError[slot.id] = + error instanceof Error ? error.message : String(error); + } + updateFaceControls(); + renderFaceSources(); + } + if (!resultsByFace[activeFace]) { + const done = loaded.find((slot) => resultsByFace[slot.id]); + if (done) activeFace = done.id; + } + updateFaceControls(); + renderFaceSources(); + renderActiveFace(); + runButton.disabled = false; +} + +el("choose").addEventListener("click", () => { + fileInput.value = ""; + fileInput.click(); +}); +fileInput.addEventListener("change", () => { + const file = fileInput.files?.[0]; + if (!file) return; + loadSingleFile(file); +}); + +function selectDetectedFamily() { + const query = detectedSel.value.trim().toLowerCase(); + if (!query) return; + const family = families.find((entry) => entry.family.toLowerCase() === query); + if (family) { + loadFamily(family); + return; + } + setStatus("Choose a family from the detected list."); +} + +detectedSel.addEventListener("change", selectDetectedFamily); +detectedSel.addEventListener("keydown", (event) => { + if (event.key === "Enter") selectDetectedFamily(); +}); + +modelSeg.querySelectorAll("button").forEach((button) => { + button.addEventListener("click", () => { + model = button.dataset.model; + modelSeg.querySelectorAll("button").forEach((item) => { + item.setAttribute("aria-selected", item === button ? "true" : "false"); + }); + }); +}); + +faceSources.addEventListener("click", (event) => { + const button = event.target.closest("[data-face]"); + if (button) setActiveFace(button.dataset.face); +}); + +runButton.addEventListener("click", runCompare); +sampleInput.addEventListener("input", () => { + if (resultsByFace[activeFace]) renderActiveFace(); +}); + +renderFaceSources(); +updateFaceControls(); +renderActiveFace(); +loadSources(); +loadDetected(); +loadCorpusFonts(); diff --git a/tools/corpus/app/index.html b/tools/corpus/app/index.html new file mode 100644 index 0000000..ffb3bc4 --- /dev/null +++ b/tools/corpus/app/index.html @@ -0,0 +1,86 @@ + + + + + + docfonts - Corpus Review + + + +
+
+ + docfonts + Corpus Review +
+
+
+ +
+ + + + +
+
+
No comparison yet
+
Choose a licensed reference font and run a comparison.
The closest open candidates load into the review, switchable from the header.
+
+
+
+ + + +
+ + + + diff --git a/tools/corpus/app/style.css b/tools/corpus/app/style.css new file mode 100644 index 0000000..964fd82 --- /dev/null +++ b/tools/corpus/app/style.css @@ -0,0 +1,715 @@ +/* docfonts Corpus Review - local workbench. */ + +:root { + --paper: #fbfaf7; + --surface: #fff; + --ink: #191b20; + --ink2: #5b6169; + --ink3: #8a8f97; + --grid: #ede9e0; + --line: #e7e3da; + --hair: #f0ede6; + --accent: #0f766e; + --accent-d: #115e59; + --v-safe: #15803d; + --v-near: #3f6212; + --v-cell: #b45309; + --v-visual: #92400e; + --v-preserve: #475569; + --v-customer: #574b90; + --v-clay: #a8392b; + --diff-ref: #0f766e; /* overlay reference = teal accent */ + --diff-cand: #39414e; /* overlay candidate = neutral slate (not a verdict color) */ + --mono: ui-monospace, SFMono-Regular, Menlo, monospace; + --ui: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; + --r: 8px; +} +* { + box-sizing: border-box; +} +html, +body { + height: 100%; + margin: 0; +} +body { + font-family: var(--ui); + color: var(--ink); + background: var(--paper); + font-size: 15px; + line-height: 1.45; + display: flex; + flex-direction: column; +} +.num { + font-family: var(--mono); + font-variant-numeric: tabular-nums; + letter-spacing: 0; +} +h1, +h2, +h3 { + margin: 0; + font-weight: 600; + letter-spacing: 0; +} +.muted { + color: var(--ink2); +} +.fine { + color: var(--ink3); + font-size: 12px; +} +button { + font-family: var(--ui); + cursor: pointer; +} +.section-label { + font-size: 11px; + text-transform: uppercase; + letter-spacing: 0; + color: var(--ink2); + font-weight: 500; + margin: 0 0 8px; +} + +/* top bar */ +.topbar { + display: flex; + align-items: center; + gap: 12px; + height: 54px; + padding: 0 18px; + background: var(--surface); + border-bottom: 1px solid var(--line); + flex: 0 0 auto; +} +.brand { + display: flex; + align-items: center; + gap: 9px; +} +.logo { + width: 20px; + height: 20px; + border: 2px solid var(--accent); + border-radius: 5px; + display: flex; + align-items: center; + justify-content: center; +} +.logo svg { + width: 11px; + height: 11px; + stroke: var(--accent); + stroke-width: 2.4; + fill: none; +} +.word { + font-weight: 600; + letter-spacing: 0; + font-size: 16px; +} +.crumb { + color: var(--ink2); + font-size: 14px; + padding-left: 8px; + border-left: 1px solid var(--line); +} +.spacer { + flex: 1; +} +.status { + font-family: var(--mono); + font-size: 12px; + color: var(--ink2); + display: flex; + align-items: center; + gap: 7px; + background: color-mix(in srgb, var(--v-safe) 7%, transparent); + border: 1px solid color-mix(in srgb, var(--v-safe) 25%, transparent); + padding: 4px 9px; + border-radius: 20px; +} +.status .dot { + width: 7px; + height: 7px; + border-radius: 50%; + background: var(--v-safe); +} + +/* layout: left rail (controls + candidate list) + main review */ +.app { + display: grid; + grid-template-columns: 344px 1fr; + flex: 1 1 auto; + min-height: 0; +} +.rail { + border-right: 1px solid var(--line); + display: flex; + flex-direction: column; + min-height: 0; + background: color-mix(in srgb, var(--paper) 55%, var(--surface)); +} +.controls { + padding: 15px 16px; + border-bottom: 1px solid var(--line); + flex: 0 0 auto; +} +.main { + overflow: auto; + min-height: 0; + padding: 22px 26px; +} + +/* controls */ +label.ctl { + display: block; + margin-bottom: 11px; +} +label.ctl > span { + display: block; + font-size: 12px; + color: var(--ink2); + margin-bottom: 4px; + font-weight: 500; +} +.field { + width: 100%; + padding: 7px 9px; + border: 1px solid var(--line); + border-radius: 6px; + background: var(--surface); + font-family: var(--ui); + font-size: 14px; + color: var(--ink); +} +.field:focus { + outline: 2px solid color-mix(in srgb, var(--accent) 40%, transparent); + border-color: var(--accent); +} +.filepick { + display: flex; + gap: 8px; + align-items: center; + padding: 7px 9px; + border: 1px dashed var(--line); + border-radius: 6px; + background: var(--surface); +} +.filepick input[type="file"] { + display: none; +} +.filepick .fname { + font-family: var(--mono); + font-size: 12px; + color: var(--ink); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.row2 { + display: flex; + gap: 10px; +} +.row2 > * { + flex: 1; +} +.btn { + padding: 8px 12px; + border: 1px solid var(--accent); + background: var(--accent); + color: #fff; + border-radius: 6px; + font-size: 14px; + font-weight: 500; +} +.btn:disabled { + opacity: 0.55; + cursor: default; +} +.btn.ghost { + background: var(--surface); + color: var(--accent); +} +.btn.full { + width: 100%; +} +.seg { + display: inline-flex; + border: 1px solid var(--line); + border-radius: 7px; + overflow: hidden; + background: var(--surface); + width: 100%; +} +.seg button { + flex: 1; + padding: 6px 11px; + border: 0; + background: transparent; + font-size: 13px; + color: var(--ink2); + border-right: 1px solid var(--line); +} +.seg button:last-child { + border-right: 0; +} +.seg button[aria-selected="true"] { + background: color-mix(in srgb, var(--accent) 10%, transparent); + color: var(--accent); + font-weight: 500; +} +.seg button.loaded::after, +.seg button.measured::after { + content: ""; + display: inline-block; + width: 5px; + height: 5px; + margin-left: 5px; + border-radius: 50%; + background: currentColor; + vertical-align: 2px; +} +.seg button.measured::after { + background: var(--v-safe); +} +.seg button:disabled { + color: var(--ink3); + cursor: not-allowed; + background: repeating-linear-gradient( + 45deg, + transparent, + transparent 4px, + var(--hair) 4px, + var(--hair) 5px + ); +} + +/* verdict badge */ +.badge { + display: inline-flex; + align-items: center; + gap: 5px; + font-family: var(--mono); + font-size: 11px; + font-weight: 500; + padding: 2px 7px 2px 6px; + border-radius: 5px; + border: 1px solid color-mix(in srgb, currentColor 35%, transparent); + background: color-mix(in srgb, currentColor 9%, transparent); + white-space: nowrap; +} +.badge::before { + content: ""; + width: 6px; + height: 6px; + border-radius: 2px; + background: currentColor; +} +.v-safe { + color: var(--v-safe); +} +.v-near { + color: var(--v-near); +} +.v-cell { + color: var(--v-cell); +} +.v-visual { + color: var(--v-visual); +} + +/* face source slots */ +.face-sources { + display: grid; + gap: 6px; + margin: -2px 0 12px; +} +.face-source { + display: grid; + grid-template-columns: 26px 1fr auto; + gap: 7px; + align-items: center; + width: 100%; + padding: 6px 8px; + border: 1px solid var(--line); + border-radius: 6px; + background: var(--surface); + color: var(--ink2); + text-align: left; +} +.face-source.active { + border-color: var(--accent); + box-shadow: inset 2px 0 0 var(--accent); +} +.face-source .face-short { + display: inline-flex; + align-items: center; + justify-content: center; + min-width: 22px; + height: 20px; + border: 1px solid var(--line); + border-radius: 4px; + color: var(--ink3); + font-family: var(--mono); + font-size: 10px; +} +.face-source.loaded .face-short { + color: var(--accent); + border-color: color-mix(in srgb, var(--accent) 38%, transparent); + background: color-mix(in srgb, var(--accent) 10%, transparent); +} +.face-source .face-name { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + color: var(--ink); + font-family: var(--mono); + font-size: 11px; +} +.face-source .face-state { + color: var(--ink3); + font-family: var(--mono); + font-size: 10px; +} + +/* render each face name in its own style so the pill reads as that face */ +.face-source.face-bold .face-name, +.face-source.face-boldItalic .face-name { + font-weight: 700; +} +.face-source.face-italic .face-name, +.face-source.face-boldItalic .face-name { + font-style: italic; +} + +/* quiet per-face status: the state word carries it */ +.face-source.state-measuring .face-state, +.face-source.state-measured .face-state { + color: var(--accent); +} +.face-source.state-measuring .face-state::after { + content: ""; + animation: face-dots 1.4s steps(1, end) infinite; +} +.face-source.state-failed .face-state { + color: var(--v-clay, #a8392b); +} +@keyframes face-dots { + 0% { + content: ""; + } + 25% { + content: "."; + } + 50% { + content: ".."; + } + 75% { + content: "..."; + } +} + +/* transient corner toast (replaces the old sidebar status line) */ +.toast { + position: fixed; + right: 18px; + bottom: 18px; + z-index: 50; + max-width: 360px; + padding: 10px 14px; + border: 1px solid var(--line); + border-left: 3px solid var(--accent); + border-radius: 8px; + background: var(--surface); + box-shadow: 0 10px 30px rgba(25, 27, 32, 0.16); + color: var(--ink); + font-size: 13px; + opacity: 0; + transform: translateY(8px); + pointer-events: none; + transition: + opacity 0.2s ease, + transform 0.2s ease; +} +.toast.show { + opacity: 1; + transform: translateY(0); +} +.toast.err { + border-left-color: var(--v-clay, #a8392b); + color: var(--v-clay, #a8392b); +} + +/* flags */ +.flag { + font-family: var(--mono); + font-size: 11px; + color: var(--v-cell); + background: color-mix(in srgb, var(--v-cell) 9%, transparent); + padding: 1px 6px; + border-radius: 4px; +} + +/* main review */ +.review-head { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 6px; + gap: 12px; +} +.review-pick { + display: flex; + flex-direction: column; + gap: 7px; + min-width: 0; + flex: 1 1 auto; +} +.cand-picker { + max-width: 540px; + font-family: var(--mono); + font-variant-numeric: tabular-nums; + font-size: 13px; +} +.pin { + position: relative; + max-width: 540px; +} +.pin-input { + font-size: 13px; +} +.pin-results { + position: absolute; + z-index: 20; + top: calc(100% + 4px); + left: 0; + right: 0; + max-height: 320px; + overflow: auto; + padding: 5px; + background: var(--surface); + border: 1px solid var(--line); + border-radius: 8px; + box-shadow: 0 8px 24px rgba(25, 27, 32, 0.12); +} +.pin-results[hidden] { + display: none; +} +.pin-opt { + display: flex; + align-items: baseline; + justify-content: space-between; + gap: 10px; + width: 100%; + padding: 7px 9px; + border: 0; + border-radius: 6px; + background: transparent; + text-align: left; + cursor: pointer; +} +.pin-opt:hover { + background: color-mix(in srgb, var(--accent) 10%, transparent); +} +.pin-opt .pf { + overflow: hidden; + font-family: var(--mono); + font-size: 12px; + color: var(--ink); + text-overflow: ellipsis; + white-space: nowrap; +} +.pin-opt .ps { + flex: 0 0 auto; + font-size: 11px; + color: var(--ink2); +} +.subline { + color: var(--ink2); + font-size: 13px; + margin-bottom: 16px; +} +.facebar { + margin: 0 0 18px; +} +.block { + margin-bottom: 20px; +} +.overlay { + position: relative; + height: 200px; + background: var(--surface); + border: 1px solid var(--line); + border-radius: var(--r); + overflow: hidden; + display: flex; + align-items: center; + justify-content: center; +} +.overlay .ref, +.overlay .cand { + position: absolute; + font-size: 150px; + line-height: 1; + mix-blend-mode: multiply; + user-select: none; +} +.overlay .ref { + color: var(--diff-ref); +} +.overlay .cand { + color: var(--diff-cand); +} +.legend { + display: flex; + gap: 16px; + align-items: center; + font-family: var(--mono); + font-size: 12px; + color: var(--ink2); + margin-top: 9px; +} +.legend i { + width: 10px; + height: 10px; + border-radius: 2px; + display: inline-block; + margin-right: 5px; + vertical-align: -1px; +} +.legend .grow { + margin-left: auto; + color: var(--ink); +} +.samples { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 12px; + margin-top: 12px; +} +.samples .s { + border: 1px solid var(--line); + border-radius: 6px; + padding: 12px 14px; + background: var(--surface); + overflow: hidden; +} +.samples .s .tag { + font-family: var(--mono); + font-size: 11px; + color: var(--ink2); + margin-bottom: 6px; +} +.samples .s .txt { + font-size: 26px; + line-height: 1.25; + white-space: nowrap; +} +.glyphgrid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(64px, 1fr)); + gap: 7px; +} +.glyphgrid .g { + position: relative; + height: 66px; + border: 1px solid var(--line); + border-radius: 6px; + background: var(--surface); + display: flex; + align-items: center; + justify-content: center; + overflow: hidden; +} +.glyphgrid .g span { + position: absolute; + font-size: 46px; + line-height: 1; + mix-blend-mode: multiply; +} +.glyphgrid .g .ref { + color: var(--diff-ref); +} +.glyphgrid .g .cand { + color: var(--diff-cand); +} +.glyphgrid .g label { + position: absolute; + bottom: 2px; + right: 4px; + font-family: var(--mono); + font-size: 8px; + color: var(--ink3); +} + +.cols { + display: grid; + grid-template-columns: 1fr 300px; + gap: 22px; + align-items: start; +} +.datarows { + font-family: var(--mono); + font-size: 13px; +} +.datarows .dr { + display: flex; + justify-content: space-between; + gap: 12px; + padding: 7px 0; + border-bottom: 1px solid var(--hair); +} +.datarows .dr:last-child { + border-bottom: 0; +} +.datarows .dr .k { + color: var(--ink2); +} +.datarows .dr .v { + color: var(--ink); + text-align: right; +} +.worst { + display: flex; + gap: 8px; + flex-wrap: wrap; +} +.worst .w { + font-family: var(--mono); + font-size: 12px; + border: 1px solid var(--line); + border-radius: 5px; + padding: 3px 7px; + background: var(--surface); +} + +/* empty / status state */ +.empty { + height: 100%; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + text-align: center; + color: var(--ink2); + padding: 40px; +} +.empty .big { + font-size: 18px; + color: var(--ink); + margin-bottom: 6px; +} + +/* footer */ +.foot { + flex: 0 0 auto; + display: flex; + justify-content: flex-start; + align-items: center; + padding: 9px 18px; + border-top: 1px solid var(--line); + background: var(--surface); +} diff --git a/tools/corpus/bakeoff.test.ts b/tools/corpus/bakeoff.test.ts new file mode 100644 index 0000000..8836833 --- /dev/null +++ b/tools/corpus/bakeoff.test.ts @@ -0,0 +1,252 @@ +import { describe, expect, test } from "bun:test"; +import { execFileSync } from "node:child_process"; +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { + type BakeoffRow, + formatVisualDiff, + parseArgs, + parseCompareMetric, + renderBakeoff, +} from "./bakeoff"; +import { featureDistance } from "./src/features"; +import { scoreAdvances } from "./src/score"; + +const BAKEOFF_CLI = join(import.meta.dir, "bakeoff.ts"); + +/** Build a row from real scoring helpers so the table reflects genuine non-visual output. */ +function rowFor( + label: string, + referenceAdvance: number, + candidateAdvance: number, + extra: Partial = {}, +): BakeoffRow { + const sample = [0x41, 0x42, 0x43]; + const reference = new Map( + sample.map((cp) => [cp, referenceAdvance] as const), + ); + const candidate = new Map( + sample.map((cp) => [cp, candidateAdvance] as const), + ); + return { + label, + score: scoreAdvances(reference, candidate, sample), + feature: featureDistance( + { weightClass: 400, widthClass: 5 }, + { weightClass: 400, widthClass: 5 }, + ), + ...extra, + }; +} + +// --- Argument parsing ------------------------------------------------------- + +describe("parseArgs", () => { + test("collects repeated labeled candidates in order", () => { + const args = parseArgs([ + "--reference", + "ref.ttf", + "--candidate", + "Inter=/fonts/inter.ttf", + "--candidate", + "Roboto=/fonts/roboto.ttf", + ]); + expect(args.reference).toBe("ref.ttf"); + expect(args.candidates).toEqual([ + { label: "Inter", path: "/fonts/inter.ttf" }, + { label: "Roboto", path: "/fonts/roboto.ttf" }, + ]); + }); + + test("defaults to the latin model, no ranks, no visual", () => { + const args = parseArgs(["--reference", "ref.ttf"]); + expect(args.model).toBe("latin"); + expect(args.visual).toBe(false); + expect(args.ranks.size).toBe(0); + expect(args.candidates).toEqual([]); + }); + + test("accepts --model monospace and the --visual flag", () => { + const args = parseArgs(["--model", "monospace", "--visual"]); + expect(args.model).toBe("monospace"); + expect(args.visual).toBe(true); + }); + + test("parses ranks keyed by label and keeps the note verbatim", () => { + const args = parseArgs([ + "--candidate", + "A=/a.ttf", + "--rank", + "A=good|review", + ]); + expect(args.ranks.get("A")).toBe("good|review"); + }); + + test("splits a labeled value at the first '=' only", () => { + const args = parseArgs(["--candidate", "A=/path/with=equals.ttf"]); + expect(args.candidates[0]).toEqual({ + label: "A", + path: "/path/with=equals.ttf", + }); + }); + + test("rejects a candidate without a Label=value shape", () => { + expect(() => parseArgs(["--candidate", "/no/label.ttf"])).toThrow( + /Label=value/, + ); + expect(() => parseArgs(["--candidate", "OnlyLabel="])).toThrow( + /Label=value/, + ); + }); + + test("rejects duplicate candidate labels", () => { + expect(() => + parseArgs(["--candidate", "A=/a.ttf", "--candidate", "A=/b.ttf"]), + ).toThrow(/duplicate candidate label/); + }); + + test("rejects an unknown model and a missing value", () => { + expect(() => parseArgs(["--model", "serif"])).toThrow(/--model requires/); + expect(() => parseArgs(["--reference"])).toThrow(/requires a value/); + }); + + test("rejects unknown arguments", () => { + expect(() => parseArgs(["--bogus"])).toThrow(/unknown argument/); + }); +}); + +// --- Non-visual table output ------------------------------------------------ + +describe("renderBakeoff", () => { + test("prints the metric columns and preserves candidate order", () => { + const report = renderBakeoff( + [rowFor("Match", 0.5, 0.5), rowFor("Off", 0.5, 0.9)], + { visual: false }, + ); + const lines = report.split("\n"); + expect(lines[0]).toContain("candidate"); + expect(lines[0]).toContain("tier"); + expect(lines[0]).toContain("mean"); + expect(lines[0]).toContain("fscore"); + expect(lines[0]).toContain("fcov"); + expect(lines[0]).toContain("flags"); + // Input order is preserved; the bake-off does not re-rank the chosen set. + expect(lines[1]).toContain("Match"); + expect(lines[1]).toContain("metric_safe"); + expect(lines[2]).toContain("Off"); + expect(lines[2]).toContain("visual_only"); + }); + + test("omits the vdiff column unless the visual probe ran", () => { + const rows = [rowFor("Match", 0.5, 0.5)]; + expect(renderBakeoff(rows, { visual: false })).not.toContain("vdiff"); + const withVisual = renderBakeoff( + [rowFor("Match", 0.5, 0.5, { visual: 0 })], + { + visual: true, + }, + ); + expect(withVisual.split("\n")[0]).toContain("vdiff"); + expect(withVisual.split("\n")[1]).toContain("0.0000"); + }); + + test("shows the rank column only when a note is present", () => { + expect( + renderBakeoff([rowFor("A", 0.5, 0.5)], { visual: false }), + ).not.toContain("rank"); + const ranked = renderBakeoff( + [rowFor("A", 0.5, 0.5, { rank: "good" }), rowFor("B", 0.5, 0.5)], + { visual: false }, + ); + expect(ranked.split("\n")[0]).toContain("rank"); + expect(ranked.split("\n")[1]).toContain("good"); + // A candidate without a note fills the column with a placeholder. + expect(ranked.split("\n")[2]).toMatch(/\bB\b.*-\s*$/); + }); +}); + +// --- Visual probe pure helpers ---------------------------------------------- + +describe("parseCompareMetric", () => { + test("reads the normalized value from parentheses", () => { + expect(parseCompareMetric("1234.5 (0.0188324)")).toBeCloseTo(0.0188324, 10); + expect(parseCompareMetric("0 (0)")).toBe(0); + }); + + test("falls back to a bare leading number", () => { + expect(parseCompareMetric("0.5")).toBe(0.5); + }); + + test("throws when no metric is present", () => { + expect(() => parseCompareMetric("magick: not found")).toThrow( + /could not read a metric/, + ); + }); +}); + +describe("formatVisualDiff", () => { + test("formats a value and degrades to n/a", () => { + expect(formatVisualDiff(0.1234)).toBe("0.1234"); + expect(formatVisualDiff(undefined)).toBe("n/a"); + expect(formatVisualDiff(Number.NaN)).toBe("n/a"); + }); +}); + +// --- CLI validation --------------------------------------------------------- + +describe("bakeoff CLI", () => { + /** Run the CLI and return its exit code and combined stderr. */ + function run(argv: string[]): { status: number; stderr: string } { + try { + execFileSync("bun", ["run", BAKEOFF_CLI, ...argv], { + encoding: "utf8", + stdio: ["ignore", "pipe", "pipe"], + }); + return { status: 0, stderr: "" }; + } catch (err) { + const e = err as { status?: number; stderr?: Buffer | string }; + return { + status: e.status ?? 1, + stderr: (e.stderr ?? "").toString(), + }; + } + } + + test("fails when no reference is given", () => { + const result = run(["--candidate", "A=/a.ttf"]); + expect(result.status).not.toBe(0); + expect(result.stderr).toContain("missing --reference"); + }); + + test("fails when no candidate is given", () => { + const dir = mkdtempSync(join(tmpdir(), "docfonts-bakeoff-")); + try { + const ref = join(dir, "ref.ttf"); + writeFileSync(ref, new Uint8Array([0, 1, 0, 0])); + const result = run(["--reference", ref]); + expect(result.status).not.toBe(0); + expect(result.stderr).toContain("missing --candidate"); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + test("fails when a candidate path does not exist", () => { + const dir = mkdtempSync(join(tmpdir(), "docfonts-bakeoff-")); + try { + const ref = join(dir, "ref.ttf"); + writeFileSync(ref, new Uint8Array([0, 1, 0, 0])); + const result = run([ + "--reference", + ref, + "--candidate", + "A=/does/not/exist.ttf", + ]); + expect(result.status).not.toBe(0); + expect(result.stderr).toContain("candidate font not found"); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); diff --git a/tools/corpus/bakeoff.ts b/tools/corpus/bakeoff.ts new file mode 100644 index 0000000..1d18c09 --- /dev/null +++ b/tools/corpus/bakeoff.ts @@ -0,0 +1,286 @@ +/** + * Local maintainer tool: compare a licensed reference font against a manually chosen candidate set. + * Prints calibration evidence only; it never publishes fallback decisions. + */ +import { + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + rmSync, +} from "node:fs"; +import { basename, join } from "node:path"; +import { + type FeatureDistance, + type FontFeatures, + featureDistance, + parseFeatures, +} from "./src/features"; +import { parseFont, sampleMetrics } from "./src/font"; +import { + formatDelta, + formatFeatureCoverage, + formatFeatureScore, + formatFlags, + formatTable, +} from "./src/report"; +import { LATIN_SAMPLE, LATIN_TEXT_SAMPLE } from "./src/samples"; +import { type CompareScore, scoreAdvances } from "./src/score"; +import type { CompareModel } from "./src/tiers"; +import { + compareImages, + formatVisualDiff, + renderGlyphGrid, + requireMagick, +} from "./src/visual"; + +export { + compareImages, + formatVisualDiff, + magickAvailable, + parseCompareMetric, + renderGlyphGrid, + requireMagick, + VISUAL_GLYPH_GRID, +} from "./src/visual"; + +const REPO_DIR = join(import.meta.dir, "..", ".."); +const DEFAULT_PROBE_DIR = join(REPO_DIR, ".cache", "corpus-bakeoff"); + +export interface BakeoffCandidate { + label: string; + path: string; +} + +export interface ParsedArgs { + reference?: string; + family?: string; + candidates: BakeoffCandidate[]; + ranks: Map; + model: CompareModel; + visual: boolean; +} + +function parseLabeledValue(flag: string, raw: string): [string, string] { + const eq = raw.indexOf("="); + if (eq <= 0) throw new Error(`${flag} expects "Label=value", got "${raw}"`); + const label = raw.slice(0, eq).trim(); + const value = raw.slice(eq + 1).trim(); + if (!label || !value) + throw new Error(`${flag} expects "Label=value", got "${raw}"`); + return [label, value]; +} + +export function parseArgs(argv: string[]): ParsedArgs { + const args: ParsedArgs = { + candidates: [], + ranks: new Map(), + model: "latin", + visual: false, + }; + const readValue = (flag: string, index: number): string => { + const value = argv[index + 1]; + if (!value || value.startsWith("--")) + throw new Error(`${flag} requires a value`); + return value; + }; + for (let i = 0; i < argv.length; i++) { + const flag = argv[i]; + switch (flag) { + case "--reference": + args.reference = readValue(flag, i); + i++; + break; + case "--family": + args.family = readValue(flag, i); + i++; + break; + case "--candidate": { + const [label, path] = parseLabeledValue(flag, readValue(flag, i)); + if (args.candidates.some((c) => c.label === label)) + throw new Error(`duplicate candidate label: ${label}`); + args.candidates.push({ label, path }); + i++; + break; + } + case "--rank": { + const [label, note] = parseLabeledValue(flag, readValue(flag, i)); + args.ranks.set(label, note); + i++; + break; + } + case "--model": { + const value = readValue(flag, i); + if (value !== "latin" && value !== "monospace") + throw new Error("--model requires 'latin' or 'monospace'"); + args.model = value; + i++; + break; + } + case "--visual": + args.visual = true; + break; + default: + throw new Error(`unknown argument: ${flag}`); + } + } + return args; +} + +export interface BakeoffRow { + label: string; + score: CompareScore; + feature: FeatureDistance; + visual?: number; + rank?: string; +} + +export function scoreCandidate( + reference: ReadonlyMap, + referenceFeatures: FontFeatures, + bytes: Uint8Array, + model: CompareModel, +): { score: CompareScore; feature: FeatureDistance } { + const score = scoreAdvances(reference, sampleMetrics(parseFont(bytes)), { + reportSample: LATIN_SAMPLE, + tierSample: model === "latin" ? LATIN_TEXT_SAMPLE : LATIN_SAMPLE, + model, + }); + const feature = featureDistance(referenceFeatures, parseFeatures(bytes)); + return { score, feature }; +} + +/** + * Render candidates in caller order. The maintainer chooses this set, often with an eye ranking. + */ +export function renderBakeoff( + rows: BakeoffRow[], + options: { visual: boolean }, +): string { + const hasRanks = rows.some((row) => row.rank !== undefined); + const header = [ + "candidate", + "tier", + "mean", + "max", + "coverage", + "fscore", + "fcov", + "flags", + ]; + if (options.visual) header.push("vdiff"); + if (hasRanks) header.push("rank"); + + const body = rows.map((row) => { + const cells = [ + row.label, + row.score.tier, + formatDelta(row.score.meanDelta), + formatDelta(row.score.maxDelta), + `${row.score.compared}/${row.score.total}`, + formatFeatureScore(row.feature), + formatFeatureCoverage(row.feature), + formatFlags(row.score.tier, row.feature), + ]; + if (options.visual) cells.push(formatVisualDiff(row.visual)); + if (hasRanks) cells.push(row.rank ?? "-"); + return cells; + }); + + return formatTable(header, body); +} + +function attachRanks(rows: BakeoffRow[], ranks: Map): void { + const labels = new Set(rows.map((row) => row.label)); + for (const label of ranks.keys()) + if (!labels.has(label)) + throw new Error( + `--rank label "${label}" does not match any --candidate label`, + ); + for (const row of rows) { + const note = ranks.get(row.label); + if (note !== undefined) row.rank = note; + } +} + +/** + * Run the visual probe for every row, filling in `row.visual`. Renders into a unique child directory of + * the cache root and removes only that child, so a misconfigured `DOCFONTS_BAKEOFF_CACHE` can never make + * cleanup delete unrelated files: the root is left untouched. + */ +function runVisualProbe( + referencePath: string, + candidates: BakeoffCandidate[], + rows: BakeoffRow[], +): void { + const cacheRoot = process.env.DOCFONTS_BAKEOFF_CACHE ?? DEFAULT_PROBE_DIR; + mkdirSync(cacheRoot, { recursive: true }); + const probeDir = mkdtempSync(join(cacheRoot, "probe-")); + try { + const referencePng = join(probeDir, "reference.png"); + renderGlyphGrid(referencePath, referencePng); + candidates.forEach((candidate, index) => { + const candidatePng = join(probeDir, `candidate-${index}.png`); + renderGlyphGrid(candidate.path, candidatePng); + rows[index].visual = compareImages(referencePng, candidatePng); + }); + } finally { + rmSync(probeDir, { recursive: true, force: true }); + } +} + +function main(): void { + const args = parseArgs(process.argv.slice(2)); + + if (!args.reference) + throw new Error( + "missing --reference: pass the path to the reference font file.", + ); + if (!existsSync(args.reference)) + throw new Error(`reference font not found: ${args.reference}`); + if (args.candidates.length === 0) + throw new Error( + 'missing --candidate: pass at least one "Label=/path/to/font.ttf".', + ); + for (const candidate of args.candidates) + if (!existsSync(candidate.path)) + throw new Error( + `candidate font not found for "${candidate.label}": ${candidate.path}`, + ); + if (args.visual) requireMagick(); + + const referenceBytes = readFileSync(args.reference); + const reference = sampleMetrics(parseFont(referenceBytes)); + const referenceFeatures = parseFeatures(referenceBytes); + + const rows: BakeoffRow[] = args.candidates.map((candidate) => { + const { score, feature } = scoreCandidate( + reference, + referenceFeatures, + readFileSync(candidate.path), + args.model, + ); + return { label: candidate.label, score, feature }; + }); + attachRanks(rows, args.ranks); + if (args.visual) runVisualProbe(args.reference, args.candidates, rows); + + const label = args.family ?? "(family not specified)"; + console.log( + `reference ${basename(args.reference)} as "${label}" vs ${rows.length} candidate(s); model ${args.model}; visual ${args.visual ? "on" : "off"}\n`, + ); + console.log(renderBakeoff(rows, { visual: args.visual })); + if (args.visual) + console.log( + "\nvdiff is an experimental rendered-glyph difference (0 = identical), not a verdict. Advance tier and fscore stay the primary signals.", + ); +} + +if (import.meta.main) { + try { + main(); + } catch (err) { + console.error(err instanceof Error ? err.message : err); + process.exit(1); + } +} diff --git a/tools/corpus/compare.test.ts b/tools/corpus/compare.test.ts index 677a498..694936e 100644 --- a/tools/corpus/compare.test.ts +++ b/tools/corpus/compare.test.ts @@ -6,9 +6,13 @@ import { join } from "node:path"; import { classifyTier, collectCandidates, + compareReferenceToTarget, + extractFont, type FontMetrics, LATIN_SAMPLE, LATIN_TEXT_SAMPLE, + listCandidateFiles, + listCorpusFonts, parseArgs, parseFont, renderReport, @@ -80,8 +84,8 @@ function hheaTable(numberOfHMetrics: number): number[] { return bytes; } -function hmtxTable(): number[] { - return ADVANCES.flatMap((advance) => [...u16(advance), ...i16(0)]); +function hmtxTable(advances: readonly number[] = ADVANCES): number[] { + return advances.flatMap((advance) => [...u16(advance), ...i16(0)]); } function cmapFormat4(): number[] { @@ -160,16 +164,44 @@ function buildFont(tables: { name: string; data: number[] }[]): Uint8Array { return bytes; } -function syntheticFont(): Uint8Array { +function syntheticFont(advances: readonly number[] = ADVANCES): Uint8Array { return buildFont([ { name: "cmap", data: cmapFormat4() }, { name: "head", data: headTable() }, - { name: "hhea", data: hheaTable(ADVANCES.length) }, - { name: "hmtx", data: hmtxTable() }, - { name: "maxp", data: maxpTable(ADVANCES.length) }, + { name: "hhea", data: hheaTable(advances.length) }, + { name: "hmtx", data: hmtxTable(advances) }, + { name: "maxp", data: maxpTable(advances.length) }, ]); } +function buildCollection(fonts: readonly Uint8Array[]): Uint8Array { + const headerSize = 12 + fonts.length * 4; + let offset = headerSize; + const offsets = fonts.map((font) => { + offset = (offset + 3) & ~3; + const at = offset; + offset += font.byteLength; + return at; + }); + const out = new Uint8Array((offset + 3) & ~3); + out.set([...tag("ttcf"), ...u32(0x00010000), ...u32(fonts.length)], 0); + offsets.forEach((at, index) => { + out.set(u32(at), 12 + index * 4); + }); + fonts.forEach((font, index) => { + const at = offsets[index]; + const shifted = new Uint8Array(font); + const view = new DataView(shifted.buffer); + const numTables = view.getUint16(4); + for (let i = 0; i < numTables; i++) { + const recordOffset = 12 + i * 16; + view.setUint32(recordOffset + 8, view.getUint32(recordOffset + 8) + at); + } + out.set(shifted, at); + }); + return out; +} + // --- Latin sample ----------------------------------------------------------- describe("LATIN_SAMPLE", () => { @@ -399,9 +431,31 @@ describe("parseFont", () => { expect(() => parseFont(noCmap)).toThrow(/missing required table/); }); - test("throws on a font collection container", () => { - const ttcf = new Uint8Array([...u32(0x74746366), ...u32(0), ...u32(0)]); - expect(() => parseFont(ttcf)).toThrow(/collection/); + test("reads a selected font from a collection container", () => { + const collection = buildCollection([ + syntheticFont([500, 600, 300, 750]), + syntheticFont([500, 900, 300, 750]), + ]); + expect(parseFont(collection).normalizedAdvance(0x41)).toBeCloseTo(0.6, 10); + expect(parseFont(collection, 1).normalizedAdvance(0x41)).toBeCloseTo( + 0.9, + 10, + ); + }); + + test("extracts a collection member into a standalone SFNT", () => { + const collection = buildCollection([ + syntheticFont([500, 600, 300, 750]), + syntheticFont([500, 900, 300, 750]), + ]); + const extracted = extractFont(collection, 1); + expect(extracted[0]).not.toBe("t".charCodeAt(0)); + expect(parseFont(extracted).normalizedAdvance(0x41)).toBeCloseTo(0.9, 10); + }); + + test("rejects an out-of-range collection index", () => { + const collection = buildCollection([syntheticFont()]); + expect(() => parseFont(collection, 1)).toThrow(/out of range/); }); test("throws on bytes that are not an SFNT", () => { @@ -707,3 +761,137 @@ describe("collectCandidates (archive sources)", () => { } }); }); + +// --- Corpus catalog + targeted compare -------------------------------------- + +function writeGithubTreeSource( + cacheDir: string, + sourceId: string, + names: string[], +): SnapshotSource { + mkdirSync(join(cacheDir, sourceId), { recursive: true }); + for (const name of names) + writeFileSync(join(cacheDir, sourceId, name), syntheticFont()); + return { + sourceId, + family: sourceId, + targetFamilies: ["Some Proprietary"], + kind: "github-tree", + files: names.map((name) => ({ name, path: `${sourceId}/${name}` })), + }; +} + +function writeSnapshot(cacheDir: string, sources: SnapshotSource[]): void { + writeFileSync( + join(cacheDir, "source-snapshot.json"), + JSON.stringify({ snapshots: sources }), + ); +} + +describe("listCandidateFiles", () => { + test("lists github-tree names without reading font bytes", () => { + const cacheDir = mkdtempSync(join(tmpdir(), "docfonts-list-")); + try { + const names = ["Example-Regular.ttf", "Example[wght].ttf"]; + const source = writeGithubTreeSource(cacheDir, "google-list", names); + expect(listCandidateFiles(source, cacheDir)).toEqual(names); + } finally { + rmSync(cacheDir, { recursive: true, force: true }); + } + }); +}); + +describe("listCorpusFonts", () => { + test("flattens every source into sourceId/file pairs", () => { + const cacheDir = mkdtempSync(join(tmpdir(), "docfonts-corpus-")); + try { + const source = writeGithubTreeSource(cacheDir, "google-corpus", [ + "A-Regular.ttf", + "B-Bold.ttf", + ]); + writeSnapshot(cacheDir, [source]); + expect(listCorpusFonts(cacheDir)).toEqual([ + { sourceId: "google-corpus", file: "A-Regular.ttf" }, + { sourceId: "google-corpus", file: "B-Bold.ttf" }, + ]); + } finally { + rmSync(cacheDir, { recursive: true, force: true }); + } + }); + + test("skips sources whose cache files are missing rather than failing", () => { + const cacheDir = mkdtempSync(join(tmpdir(), "docfonts-corpus-skip-")); + try { + const present = writeGithubTreeSource(cacheDir, "google-present", [ + "Present-Regular.ttf", + ]); + // An archive source with no cached archive: listing it throws, so it is skipped, not fatal. + const missing: SnapshotSource = { + sourceId: "missing-archive", + family: "Missing", + targetFamilies: ["X"], + kind: "archive", + archiveFormat: "tar.gz", + }; + writeSnapshot(cacheDir, [present, missing]); + expect(listCorpusFonts(cacheDir)).toEqual([ + { sourceId: "google-present", file: "Present-Regular.ttf" }, + ]); + } finally { + rmSync(cacheDir, { recursive: true, force: true }); + } + }); +}); + +describe("compareReferenceToTarget", () => { + test("scores the reference against one named corpus font", () => { + const cacheDir = mkdtempSync(join(tmpdir(), "docfonts-target-")); + try { + const source = writeGithubTreeSource(cacheDir, "google-target", [ + "Wanted-Regular.ttf", + "Other-Regular.ttf", + ]); + writeSnapshot(cacheDir, [source]); + const row = compareReferenceToTarget(syntheticFont(), { + cacheDir, + sourceId: "google-target", + file: "Wanted-Regular.ttf", + model: "latin", + }); + expect(row.sourceId).toBe("google-target"); + expect(row.file).toBe("Wanted-Regular.ttf"); + // The font scored against itself has zero advance deltas over the shared glyphs. + expect(row.score.meanDelta).toBe(0); + expect(row.score.maxDelta).toBe(0); + expect(row.bytes.length).toBeGreaterThan(0); + } finally { + rmSync(cacheDir, { recursive: true, force: true }); + } + }); + + test("throws for an unknown source or font", () => { + const cacheDir = mkdtempSync(join(tmpdir(), "docfonts-target-miss-")); + try { + const source = writeGithubTreeSource(cacheDir, "google-target", [ + "Wanted-Regular.ttf", + ]); + writeSnapshot(cacheDir, [source]); + expect(() => + compareReferenceToTarget(syntheticFont(), { + cacheDir, + sourceId: "nope", + file: "Wanted-Regular.ttf", + }), + ).toThrow(/source not in cache/); + expect(() => + compareReferenceToTarget(syntheticFont(), { + cacheDir, + sourceId: "google-target", + file: "missing.ttf", + }), + ).toThrow(/font not found/); + } finally { + rmSync(cacheDir, { recursive: true, force: true }); + } + }); +}); diff --git a/tools/corpus/compare.ts b/tools/corpus/compare.ts index c5a4b30..b4e6424 100644 --- a/tools/corpus/compare.ts +++ b/tools/corpus/compare.ts @@ -4,27 +4,42 @@ */ import { existsSync, readFileSync } from "node:fs"; import { basename, join } from "node:path"; -import { - archiveFormatOf, - collectCandidates, - loadSnapshot, - requireArchiveTool, - type SnapshotSource, -} from "./src/cache"; -import { parseFont, sampleMetrics } from "./src/font"; +import { compareReferenceToCorpus } from "./src/compare-engine"; import { renderReport } from "./src/report"; import { LATIN_SAMPLE, LATIN_TEXT_SAMPLE } from "./src/samples"; -import { type CompareScore, scoreAdvances } from "./src/score"; import type { CompareModel } from "./src/tiers"; export { - archiveFormatOf, collectCandidates, + listCandidateFiles, loadSnapshot, requireArchiveTool, type SnapshotSource, } from "./src/cache"; -export { type FontMetrics, parseFont, sampleMetrics } from "./src/font"; +export { + type CorpusFont, + compareReferenceToCorpus, + compareReferenceToTarget, + listCorpusFonts, + requireArchiveTools, + scoreCandidateBytes, + selectSources, +} from "./src/compare-engine"; +export { + DEFAULT_FEATURE_WEIGHTS, + FEATURE_COUNT, + type FeatureDistance, + type FeatureWeights, + type FontFeatures, + featureDistance, + parseFeatures, +} from "./src/features"; +export { + extractFont, + type FontMetrics, + parseFont, + sampleMetrics, +} from "./src/font"; export { renderReport } from "./src/report"; export { LATIN_SAMPLE, LATIN_TEXT_SAMPLE } from "./src/samples"; export { @@ -42,12 +57,6 @@ export { const REPO_DIR = join(import.meta.dir, "..", ".."); const DEFAULT_CACHE_DIR = join(REPO_DIR, ".cache", "corpus"); -interface CompareRow { - sourceId: string; - file: string; - score: CompareScore; -} - export interface ParsedArgs { reference?: string; family?: string; @@ -111,55 +120,6 @@ export function parseArgs(argv: string[]): ParsedArgs { return args; } -function selectSources( - snapshot: SnapshotSource[], - requestedIds: string[], -): SnapshotSource[] { - if (requestedIds.length === 0) return snapshot; - - const byId = new Map(snapshot.map((source) => [source.sourceId, source])); - const unknown = requestedIds.filter((id) => !byId.has(id)); - if (unknown.length > 0) - throw new Error( - `source(s) not in cache: ${unknown.join(", ")}. Acquired: ${[...byId.keys()].join(", ")}`, - ); - return requestedIds.map((id) => byId.get(id) as SnapshotSource); -} - -function scoreSources( - reference: ReadonlyMap, - selected: SnapshotSource[], - cacheDir: string, - model: CompareModel, -): { rows: CompareRow[]; skipped: number } { - const rows: CompareRow[] = []; - let skipped = 0; - for (const source of selected) { - for (const candidate of collectCandidates(source, cacheDir)) { - try { - const font = parseFont(candidate.bytes); - const score = scoreAdvances(reference, sampleMetrics(font), { - reportSample: LATIN_SAMPLE, - tierSample: model === "latin" ? LATIN_TEXT_SAMPLE : LATIN_SAMPLE, - model, - }); - rows.push({ sourceId: source.sourceId, file: candidate.file, score }); - } catch { - skipped++; - } - } - } - return { rows, skipped }; -} - -function requireArchiveTools(selected: SnapshotSource[]): void { - const archiveSources = selected.filter( - (source) => source.kind !== "github-tree", - ); - for (const format of new Set(archiveSources.map(archiveFormatOf))) - requireArchiveTool(format); -} - function main(): void { const args = parseArgs(process.argv.slice(2)); @@ -171,29 +131,28 @@ function main(): void { throw new Error(`reference font not found: ${args.reference}`); const cacheDir = process.env.DOCFONTS_SOURCE_CACHE ?? DEFAULT_CACHE_DIR; - const selected = selectSources(loadSnapshot(cacheDir), args.sources); - requireArchiveTools(selected); - - const reference = sampleMetrics(parseFont(readFileSync(args.reference))); - const { rows, skipped } = scoreSources( - reference, - selected, - cacheDir, - args.model, + const referenceBytes = readFileSync(args.reference); + const { rows, totalRows, skipped } = compareReferenceToCorpus( + referenceBytes, + { + cacheDir, + sources: args.sources, + model: args.model, + limit: args.limit, + }, ); const label = args.family ?? "(family not specified)"; - const shown = - args.limit === null ? rows.length : Math.min(args.limit, rows.length); + const shown = rows.length; const skippedText = skipped === 0 ? "" : `; skipped ${skipped} unsupported`; const modelText = args.model === "latin" ? `; tier/mean/max ${LATIN_TEXT_SAMPLE.length} text codepoints` : `; model ${args.model}`; console.log( - `reference ${basename(args.reference)} as "${label}" vs ${rows.length} candidate(s) over ${LATIN_SAMPLE.length} Latin codepoints${modelText}; showing ${shown}${skippedText}\n`, + `reference ${basename(args.reference)} as "${label}" vs ${totalRows} candidate(s) over ${LATIN_SAMPLE.length} Latin codepoints${modelText}; showing ${shown}${skippedText}\n`, ); - console.log(renderReport(rows, { limit: args.limit })); + console.log(renderReport(rows, { limit: null })); } if (import.meta.main) { diff --git a/tools/corpus/features.test.ts b/tools/corpus/features.test.ts new file mode 100644 index 0000000..a2d77ce --- /dev/null +++ b/tools/corpus/features.test.ts @@ -0,0 +1,447 @@ +import { describe, expect, test } from "bun:test"; +import { + type FontFeatures, + featureDistance, + parseFeatures, + renderReport, + scoreAdvances, +} from "./compare"; + +// --- Synthetic SFNT builder ------------------------------------------------- +// +// `parseFeatures` only needs `openFont` to validate the container, which reads head.unitsPerEm and +// checks that the five required tables exist. It never parses cmap/hmtx here, so those tables can be +// empty stubs. We attach configurable OS/2 and post tables to exercise the feature reader. + +const UNITS_PER_EM = 1000; + +function u16(value: number): number[] { + return [(value >> 8) & 0xff, value & 0xff]; +} +function i16(value: number): number[] { + return u16(value & 0xffff); +} +function u32(value: number): number[] { + return [ + (value >>> 24) & 0xff, + (value >>> 16) & 0xff, + (value >>> 8) & 0xff, + value & 0xff, + ]; +} +function tag(name: string): number[] { + return [...name].map((c) => c.charCodeAt(0)); +} +function writeI16(bytes: number[], at: number, value: number): void { + const [hi, lo] = i16(value); + bytes[at] = hi; + bytes[at + 1] = lo; +} + +function headTable(): number[] { + const bytes = new Array(54).fill(0); + bytes.splice(0, 4, ...u32(0x00010000)); // version + bytes.splice(18, 2, ...u16(UNITS_PER_EM)); // unitsPerEm @ offset 18 + return bytes; +} + +interface Os2Spec { + version: number; + weightClass?: number; + widthClass?: number; + panose?: number[]; + sxHeight?: number; + sCapHeight?: number; +} + +/** Build an OS/2 table whose length matches the requested version, with the given fields set. */ +function os2Table(spec: Os2Spec): number[] { + const length = spec.version >= 2 ? 96 : spec.version === 1 ? 86 : 78; + const bytes = new Array(length).fill(0); + writeI16(bytes, 0, spec.version); + if (spec.weightClass !== undefined) writeI16(bytes, 4, spec.weightClass); + if (spec.widthClass !== undefined) writeI16(bytes, 6, spec.widthClass); + if (spec.panose) for (let i = 0; i < 10; i++) bytes[32 + i] = spec.panose[i]; + if (spec.version >= 2) { + if (spec.sxHeight !== undefined) writeI16(bytes, 86, spec.sxHeight); + if (spec.sCapHeight !== undefined) writeI16(bytes, 88, spec.sCapHeight); + } + return bytes; +} + +/** Build a post table (version 3.0) with the given italicAngle in degrees (16.16 fixed). */ +function postTable(italicAngleDegrees: number): number[] { + const bytes = new Array(32).fill(0); + bytes.splice(0, 4, ...u32(0x00030000)); // version 3.0 + bytes.splice(4, 4, ...u32((italicAngleDegrees * 0x10000) & 0xffffffff)); // italicAngle @ 4 + return bytes; +} + +function buildFont(tables: { name: string; data: number[] }[]): Uint8Array { + const numTables = tables.length; + const headerSize = 12 + numTables * 16; + let offset = headerSize; + const placed = tables.map((t) => { + const at = offset; + offset += t.data.length; + offset = (offset + 3) & ~3; // 4-byte align + return { ...t, offset: at }; + }); + + const header = [ + ...u32(0x00010000), // sfntVersion + ...u16(numTables), + ...u16(0), + ...u16(0), + ...u16(0), + ]; + const directory = placed.flatMap((t) => [ + ...tag(t.name), + ...u32(0), // checksum (ignored) + ...u32(t.offset), + ...u32(t.data.length), + ]); + + const bytes = new Uint8Array(offset); + bytes.set([...header, ...directory], 0); + for (const t of placed) bytes.set(t.data, t.offset); + return bytes; +} + +/** A minimal valid SFNT plus the optional tables a caller wants to test. */ +function fontWith(extra: { name: string; data: number[] }[]): Uint8Array { + return buildFont([ + { name: "cmap", data: [0, 0] }, + { name: "head", data: headTable() }, + { name: "hhea", data: new Array(36).fill(0) }, + { name: "hmtx", data: [0, 0, 0, 0] }, + { name: "maxp", data: [...u32(0x00005000), ...u16(1)] }, + ...extra, + ]); +} + +// --- OS/2 parsing ----------------------------------------------------------- + +describe("parseFeatures OS/2", () => { + test("reads weight class and width class", () => { + const features = parseFeatures( + fontWith([ + { + name: "OS/2", + data: os2Table({ version: 4, weightClass: 700, widthClass: 5 }), + }, + ]), + ); + expect(features.weightClass).toBe(700); + expect(features.widthClass).toBe(5); + }); + + test("treats an all-zero PANOSE as unset", () => { + const features = parseFeatures( + fontWith([ + { + name: "OS/2", + data: os2Table({ + version: 4, + panose: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + }), + }, + ]), + ); + expect(features.panose).toBeUndefined(); + }); + + test("treats a family-byte-only PANOSE [2,0,...] as unset", () => { + const features = parseFeatures( + fontWith([ + { + name: "OS/2", + data: os2Table({ + version: 4, + panose: [2, 0, 0, 0, 0, 0, 0, 0, 0, 0], + }), + }, + ]), + ); + expect(features.panose).toBeUndefined(); + }); + + test("keeps a PANOSE with real style data", () => { + const panose = [2, 11, 6, 4, 2, 2, 2, 2, 2, 4]; + const features = parseFeatures( + fontWith([{ name: "OS/2", data: os2Table({ version: 4, panose }) }]), + ); + expect(features.panose).toEqual(panose); + }); + + test("normalizes positive sxHeight and sCapHeight by unitsPerEm", () => { + const features = parseFeatures( + fontWith([ + { + name: "OS/2", + data: os2Table({ version: 2, sxHeight: 520, sCapHeight: 700 }), + }, + ]), + ); + expect(features.xHeight).toBeCloseTo(520 / UNITS_PER_EM, 10); + expect(features.capHeight).toBeCloseTo(700 / UNITS_PER_EM, 10); + }); + + test("treats zero heights as unset", () => { + const features = parseFeatures( + fontWith([ + { + name: "OS/2", + data: os2Table({ version: 2, sxHeight: 0, sCapHeight: 0 }), + }, + ]), + ); + expect(features.xHeight).toBeUndefined(); + expect(features.capHeight).toBeUndefined(); + }); + + test("treats an old OS/2 without height fields as missing heights", () => { + // Version 0 has no sxHeight/sCapHeight fields at all. + const features = parseFeatures( + fontWith([ + { name: "OS/2", data: os2Table({ version: 0, weightClass: 400 }) }, + ]), + ); + expect(features.weightClass).toBe(400); + expect(features.xHeight).toBeUndefined(); + expect(features.capHeight).toBeUndefined(); + }); + + test("leaves every OS/2 feature unset when the table is absent", () => { + const features = parseFeatures(fontWith([])); + expect(features.weightClass).toBeUndefined(); + expect(features.widthClass).toBeUndefined(); + expect(features.xHeight).toBeUndefined(); + expect(features.capHeight).toBeUndefined(); + expect(features.panose).toBeUndefined(); + }); +}); + +// --- post parsing ----------------------------------------------------------- + +describe("parseFeatures post", () => { + test("reads a slanted italic angle", () => { + const features = parseFeatures( + fontWith([{ name: "post", data: postTable(-12) }]), + ); + expect(features.italicAngle).toBeCloseTo(-12, 10); + }); + + test("reads an upright zero angle as real data, not missing", () => { + const features = parseFeatures( + fontWith([{ name: "post", data: postTable(0) }]), + ); + expect(features.italicAngle).toBe(0); + }); + + test("leaves italic angle unset when post is absent", () => { + const features = parseFeatures(fontWith([])); + expect(features.italicAngle).toBeUndefined(); + }); +}); + +// --- Feature distance ------------------------------------------------------- + +describe("featureDistance", () => { + const reference: FontFeatures = { + weightClass: 400, + widthClass: 5, + xHeight: 0.5, + capHeight: 0.7, + italicAngle: 0, + panose: [2, 11, 6, 4, 2, 2, 2, 2, 2, 4], + }; + + test("is zero for identical features and counts all of them", () => { + const result = featureDistance(reference, reference); + expect(result.score).toBe(0); + expect(result.compared).toBe(6); + expect(result.missing).toBe(0); + expect(result.total).toBe(6); + expect(result.gaps).toEqual([]); + }); + + test("skips features the candidate does not declare", () => { + const candidate: FontFeatures = { weightClass: 400, xHeight: 0.5 }; + const result = featureDistance(reference, candidate); + expect(result.compared).toBe(2); + expect(result.missing).toBe(4); + expect(result.score).toBe(0); // both compared features match exactly + expect(result.gaps).toEqual([]); + }); + + test("returns NaN and zero coverage when nothing overlaps", () => { + const result = featureDistance(reference, {}); + expect(Number.isNaN(result.score)).toBe(true); + expect(result.compared).toBe(0); + expect(result.missing).toBe(6); + expect(result.gaps).toEqual([]); + }); + + test("grows with divergence and stays in [0, 1]", () => { + const far: FontFeatures = { + weightClass: 900, + widthClass: 9, + xHeight: 0.3, + capHeight: 0.9, + italicAngle: -30, + panose: [5, 2, 1, 1, 1, 1, 1, 1, 1, 1], + }; + const result = featureDistance(reference, far); + expect(result.compared).toBe(6); + expect(result.score).toBeGreaterThan(0); + expect(result.score).toBeLessThanOrEqual(1); + expect(result.gaps.length).toBeGreaterThan(0); + }); + + test("reports large per-feature gaps for manual review", () => { + const result = featureDistance(reference, { + ...reference, + weightClass: 500, + }); + expect(result.gaps).toEqual([{ feature: "weight", distance: 0.1 }]); + }); +}); + +// --- Report ordering on feature distance ------------------------------------ + +describe("renderReport feature ordering", () => { + test("orders fuller feature evidence before thinner feature evidence", () => { + const reference = new Map([[0x41, 0.5]]); + const score = scoreAdvances(reference, new Map([[0x41, 0.5]]), [0x41]); + const thinExactFeature = { + score: 0, + compared: 1, + missing: 5, + total: 6, + gaps: [], + }; + const fullerFeature = { + score: 0.2, + compared: 5, + missing: 1, + total: 6, + gaps: [], + }; + + const report = renderReport([ + { + sourceId: "thin-exact", + file: "thin.otf", + score, + feature: thinExactFeature, + }, + { + sourceId: "fuller", + file: "fuller.otf", + score, + feature: fullerFeature, + }, + ]); + const lines = report.split("\n"); + expect(lines[1]).toContain("fuller"); + expect(lines[2]).toContain("thin-exact"); + }); + + test("orders by feature distance within the same advance tier and coverage", () => { + // Identical advance scores (same tier, coverage, mean), so feature distance is the only tiebreaker. + const reference = new Map([[0x41, 0.5]]); + const score = scoreAdvances(reference, new Map([[0x41, 0.5]]), [0x41]); + expect(score.tier).toBe("metric_safe"); + + const closeFeature = { + score: 0.1, + compared: 6, + missing: 0, + total: 6, + gaps: [], + }; + const farFeature = { + score: 0.4, + compared: 6, + missing: 0, + total: 6, + gaps: [], + }; + + const report = renderReport([ + { sourceId: "far-src", file: "far.otf", score, feature: farFeature }, + { + sourceId: "close-src", + file: "close.otf", + score, + feature: closeFeature, + }, + ]); + const lines = report.split("\n"); + expect(lines[0]).toContain("fscore"); + expect(lines[0]).toContain("fcov"); + expect(lines[0]).toContain("flags"); + expect(lines[1]).toContain("close-src"); + expect(lines[2]).toContain("far-src"); + }); + + test("sinks rows with no comparable features below rows that have them", () => { + const reference = new Map([[0x41, 0.5]]); + const score = scoreAdvances(reference, new Map([[0x41, 0.5]]), [0x41]); + const withFeature = { + score: 0.4, + compared: 3, + missing: 3, + total: 6, + gaps: [], + }; + + const report = renderReport([ + { sourceId: "no-feature", file: "a.otf", score }, + { sourceId: "has-feature", file: "b.otf", score, feature: withFeature }, + ]); + const lines = report.split("\n"); + expect(lines[1]).toContain("has-feature"); + expect(lines[2]).toContain("no-feature"); + }); + + test("flags strong advance rows whose features disagree", () => { + const reference = new Map([[0x41, 0.5]]); + const score = scoreAdvances(reference, new Map([[0x41, 0.5]]), [0x41]); + const feature = { + score: 0.2, + compared: 6, + missing: 0, + total: 6, + gaps: [{ feature: "weight" as const, distance: 0.1 }], + }; + + const report = renderReport([ + { sourceId: "weight-gap", file: "a.otf", score, feature }, + ]); + const lines = report.split("\n"); + expect(lines[0]).toContain("flags"); + expect(lines[1]).toContain("weight_gap"); + }); + + test("does not flag weak visual-only rows", () => { + const reference = new Map([[0x41, 0.5]]); + const score = scoreAdvances(reference, new Map([[0x41, 0.9]]), [0x41]); + expect(score.tier).toBe("visual_only"); + const feature = { + score: 0.2, + compared: 6, + missing: 0, + total: 6, + gaps: [{ feature: "weight" as const, distance: 0.1 }], + }; + + const report = renderReport([ + { sourceId: "visual-row", file: "a.otf", score, feature }, + ]); + const lines = report.split("\n"); + expect(lines[1]).toContain(" - "); + expect(lines[1]).not.toContain("weight_gap"); + }); +}); diff --git a/tools/corpus/server.test.ts b/tools/corpus/server.test.ts new file mode 100644 index 0000000..4ff88de --- /dev/null +++ b/tools/corpus/server.test.ts @@ -0,0 +1,145 @@ +import { describe, expect, test } from "bun:test"; +import { + completeSyntheticFaces, + type FontFamily, + parseArgs, + resolveLocalFont, + runDirFor, + summarizeCandidate, +} from "./server"; +import { featureDistance } from "./src/features"; +import { scoreAdvances } from "./src/score"; + +describe("parseArgs", () => { + test("defaults to the local app port", () => { + expect(parseArgs([])).toEqual({ port: 5177 }); + }); + + test("accepts --port", () => { + expect(parseArgs(["--port", "5180"])).toEqual({ port: 5180 }); + }); + + test("rejects invalid input", () => { + expect(() => parseArgs(["--port", "0"])).toThrow( + "--port requires a positive integer", + ); + expect(() => parseArgs(["--bogus"])).toThrow("unknown argument: --bogus"); + }); +}); + +describe("resolveLocalFont", () => { + test("rejects paths outside known font directories", () => { + expect(() => resolveLocalFont("/etc/hosts")).toThrow( + "reference path is outside the known font directories", + ); + }); +}); + +describe("completeSyntheticFaces", () => { + test("fills missing styles from a regular face", () => { + const family: FontFamily = { + family: "Demo", + faces: { + regular: { + name: "Demo.ttf", + path: "/fonts/Demo.ttf", + style: "normal", + weight: "400", + }, + }, + }; + + completeSyntheticFaces(family); + + expect(family.faces.bold).toMatchObject({ + path: "/fonts/Demo.ttf", + synthetic: true, + weight: "700", + style: "normal", + }); + expect(family.faces.italic).toMatchObject({ + path: "/fonts/Demo.ttf", + synthetic: true, + weight: "400", + style: "italic", + }); + expect(family.faces.boldItalic).toMatchObject({ + path: "/fonts/Demo.ttf", + synthetic: true, + weight: "700", + style: "italic", + }); + }); + + test("keeps a real bold face and derives bold italic from it", () => { + const family: FontFamily = { + family: "Demo", + faces: { + regular: { + name: "Demo.ttf", + path: "/fonts/Demo.ttf", + style: "normal", + weight: "400", + }, + bold: { + name: "Demo Bold.ttf", + path: "/fonts/Demo Bold.ttf", + style: "normal", + weight: "700", + }, + }, + }; + + completeSyntheticFaces(family); + + expect(family.faces.bold?.name).toBe("Demo Bold.ttf"); + expect(family.faces.bold?.synthetic).toBeUndefined(); + expect(family.faces.boldItalic).toMatchObject({ + path: "/fonts/Demo Bold.ttf", + synthetic: true, + weight: "700", + style: "italic", + }); + }); +}); + +describe("runDirFor", () => { + test("accepts a server-minted run id", () => { + expect(() => runDirFor("mq6no3ov-4srqme")).not.toThrow(); + }); + + test("rejects ids that could escape the cache dir", () => { + for (const bad of ["../etc", "a/b", "..", "abc", "A-B", "a-b-c", "a_b", ""]) + expect(() => runDirFor(bad)).toThrow("invalid run id"); + }); +}); + +describe("summarizeCandidate", () => { + test("serializes scores and feature flags for the app", () => { + const sample = [0x41, 0x42, 0x43]; + const reference = new Map(sample.map((cp) => [cp, 0.5] as const)); + const candidate = new Map(sample.map((cp) => [cp, 0.5] as const)); + const summary = summarizeCandidate( + { + sourceId: "test-source", + file: "Candidate.ttf", + bytes: new Uint8Array(), + score: scoreAdvances(reference, candidate, sample), + feature: featureDistance( + { weightClass: 400, widthClass: 5 }, + { weightClass: 700, widthClass: 5 }, + ), + }, + 2, + "/runs/r/candidate.ttf", + ); + + expect(summary.index).toBe(2); + expect(summary.sourceId).toBe("test-source"); + expect(summary.file).toBe("Candidate.ttf"); + expect(summary.url).toBe("/runs/r/candidate.ttf"); + expect(summary.tier).toBe("metric_safe"); + expect(summary.coverage).toBe("3/3"); + expect(summary.flags).toEqual(["weight_gap"]); + }); +}); diff --git a/tools/corpus/server.ts b/tools/corpus/server.ts new file mode 100644 index 0000000..da35751 --- /dev/null +++ b/tools/corpus/server.ts @@ -0,0 +1,779 @@ +import { + closeSync, + existsSync, + mkdirSync, + openSync, + readdirSync, + readFileSync, + readSync, + rmSync, + statSync, + writeFileSync, +} from "node:fs"; +import { homedir } from "node:os"; +import { + basename, + extname, + isAbsolute, + join, + normalize, + relative, + resolve, +} from "node:path"; +import { + type CorpusFont, + compareReferenceToCorpus, + compareReferenceToTarget, + listCorpusFonts, + type ScoredCandidate, +} from "./src/compare-engine"; +import { extractFont } from "./src/font"; +import { + formatDelta, + formatFeatureCoverage, + formatFeatureScore, +} from "./src/report"; +import type { CompareModel } from "./src/tiers"; + +const REPO_DIR = join(import.meta.dir, "..", ".."); +const APP_DIR = join(import.meta.dir, "app"); +const DEFAULT_CACHE_DIR = join(REPO_DIR, ".cache", "corpus"); +const DEFAULT_APP_CACHE_DIR = join(REPO_DIR, ".cache", "corpus-app"); +const DEFAULT_REFERENCE_FONT_DIR = join(REPO_DIR, ".cache", "reference-fonts"); +const DEFAULT_PORT = 5177; + +export interface ServerArgs { + port: number; +} + +export interface CandidateSummary { + index: number; + sourceId: string; + file: string; + url: string; + tier: string; + mean: string; + max: string; + coverage: string; + fscore: string; + fcov: string; + flags: string[]; + worst: string[]; +} + +export function parseArgs(argv: string[]): ServerArgs { + const args: ServerArgs = { port: DEFAULT_PORT }; + const readValue = (flag: string, index: number): string => { + const value = argv[index + 1]; + if (!value || value.startsWith("--")) + throw new Error(`${flag} requires a value`); + return value; + }; + for (let i = 0; i < argv.length; i++) { + const flag = argv[i]; + switch (flag) { + case "--port": { + const port = Number(readValue(flag, i)); + if (!Number.isInteger(port) || port <= 0) + throw new Error("--port requires a positive integer"); + args.port = port; + i++; + break; + } + default: + throw new Error(`unknown argument: ${flag}`); + } + } + return args; +} + +function mimeFor(path: string): string { + if (path.endsWith(".html")) return "text/html; charset=utf-8"; + if (path.endsWith(".css")) return "text/css; charset=utf-8"; + if (path.endsWith(".js")) return "text/javascript; charset=utf-8"; + if (path.endsWith(".json")) return "application/json; charset=utf-8"; + if (path.endsWith(".ttf")) return "font/ttf"; + if (path.endsWith(".otf")) return "font/otf"; + if (path.endsWith(".woff")) return "font/woff"; + if (path.endsWith(".woff2")) return "font/woff2"; + return "application/octet-stream"; +} + +function safeExt(name: string): string { + const ext = extname(name).toLowerCase(); + return [".ttf", ".otf", ".woff", ".woff2"].includes(ext) ? ext : ".ttf"; +} + +function safeSourceName(name: string): string { + return basename(name).replace(/[^\w.-]+/g, "_"); +} + +/** Known OS font directories. The reference can only be read from inside these. */ +function fontDirs(): string[] { + const home = homedir(); + if (process.platform === "darwin") + return [ + "/System/Library/Fonts", + "/Library/Fonts", + join(home, "Library", "Fonts"), + "/Applications/Microsoft Word.app/Contents/Resources/DFonts", + DEFAULT_REFERENCE_FONT_DIR, + ]; + if (process.platform === "win32") + return [ + "C:\\Windows\\Fonts", + join(home, "AppData", "Local", "Microsoft", "Windows", "Fonts"), + DEFAULT_REFERENCE_FONT_DIR, + ]; + return [ + "/usr/share/fonts", + "/usr/local/share/fonts", + join(home, ".fonts"), + join(home, ".local", "share", "fonts"), + DEFAULT_REFERENCE_FONT_DIR, + ]; +} + +/** The compare engine can read static TrueType/OpenType and selected TTC members. */ +function isLocalFont(name: string): boolean { + return [".ttf", ".otf", ".ttc", ".otc"].includes(extname(name).toLowerCase()); +} + +/** Depth-limited, count-capped scan so a huge font tree never stalls the request. */ +function walkFonts( + dir: string, + depth: number, + out: string[], + cap = 4000, +): void { + if (depth < 0 || out.length >= cap) return; + let names: string[]; + try { + names = readdirSync(dir); + } catch { + return; + } + for (const name of names) { + if (out.length >= cap) return; + const full = join(dir, name); + let stat: ReturnType; + try { + stat = statSync(full); + } catch { + continue; + } + if (stat.isDirectory()) walkFonts(full, depth - 1, out, cap); + else if (stat.isFile() && isLocalFont(name)) out.push(full); + } +} + +type FaceKey = "regular" | "bold" | "italic" | "boldItalic"; +export interface Face { + name: string; + path: string; + fontIndex?: number; + weight: string; + style: string; + synthetic?: boolean; +} +export interface FontFamily { + family: string; + faces: Partial>; +} + +function decodeUtf16BE(buf: Buffer): string { + let out = ""; + for (let i = 0; i + 1 < buf.length; i += 2) + out += String.fromCharCode((buf[i] << 8) | buf[i + 1]); + return out; +} + +/** Strip an extension and trailing style words so a filename can stand in for a family name. */ +function familyFromFilename(name: string): string { + const base = name + .replace(/\.(ttf|otf|ttc|otc)$/i, "") + .replace(/[-_ ]?(bold ?italic|italic|oblique|bold|regular|book)$/i, "") + .replace(/[-_ ]+$/, "") + .trim(); + return base || name; +} + +function styleFromFilename(name: string): { bold: boolean; italic: boolean } { + const lower = name.toLowerCase(); + return { + bold: /bold/.test(lower), + italic: /italic|oblique/.test(lower), + }; +} + +interface FontIdentity { + family: string; + bold: boolean; + italic: boolean; + fontIndex?: number; +} + +interface NameChoice { + value: string; + score: number; +} + +const TTCF = 0x74746366; + +function englishNameScore(platformID: number, languageID: number): number { + if (platformID === 3 && languageID === 0x0409) return 5; + if (platformID === 1 && languageID === 0) return 4; + if (platformID === 0) return 3; + if (platformID === 3) return 2; + return 1; +} + +function betterName( + current: NameChoice | null, + value: string, + score: number, +): NameChoice { + if (!current || score > current.score) return { value, score }; + return current; +} + +function styleFromName(value: string): { bold: boolean; italic: boolean } { + const lower = value.toLowerCase(); + return { + bold: /bold|black|heavy|semibold|demibold/.test(lower), + italic: /italic|oblique/.test(lower), + }; +} + +/** + * Read every font identity from a single font or collection. Uses positioned reads so one large TTC + * does not need to be loaded during the local-family scan. + */ +function readFontIdentities(path: string): FontIdentity[] { + const fallback = (): FontIdentity[] => [ + { + family: familyFromFilename(basename(path)), + ...styleFromFilename(basename(path)), + }, + ]; + const at = (fd: number, position: number, length: number): Buffer | null => { + const buf = Buffer.alloc(length); + try { + return readSync(fd, buf, 0, length, position) === length ? buf : null; + } catch { + return null; + } + }; + let fd: number; + try { + fd = openSync(path, "r"); + } catch { + return fallback(); + } + try { + const header = at(fd, 0, 12); + if (!header) return fallback(); + const sfntVersion = header.readUInt32BE(0); + let offsets: number[]; + if (sfntVersion === TTCF) { + const count = header.readUInt32BE(8); + const table = at(fd, 12, count * 4); + if (!table) return fallback(); + offsets = Array.from({ length: count }, (_, i) => + table.readUInt32BE(i * 4), + ); + } else { + offsets = [0]; + } + + const identities: FontIdentity[] = []; + offsets.forEach((sfntOffset, fontIndex) => { + const sfntHeader = at(fd, sfntOffset, 12); + if (!sfntHeader) return; + const numTables = sfntHeader.readUInt16BE(4); + const dir = at(fd, sfntOffset + 12, numTables * 16); + if (!dir) return; + let nameOff = 0; + let nameLen = 0; + let os2Off = 0; + for (let i = 0; i < numTables; i++) { + const rec = i * 16; + const tag = dir.toString("latin1", rec, rec + 4); + if (tag === "name") { + nameOff = dir.readUInt32BE(rec + 8); + nameLen = dir.readUInt32BE(rec + 12); + } else if (tag === "OS/2") { + os2Off = dir.readUInt32BE(rec + 8); + } + } + + let bold = false; + let italic = false; + let haveStyle = false; + if (os2Off) { + const fsSel = at(fd, os2Off + 62, 2); + if (fsSel) { + const sel = fsSel.readUInt16BE(0); + italic = (sel & 0x01) !== 0; + bold = (sel & 0x20) !== 0; + haveStyle = true; + } + } + + let id1: NameChoice | null = null; + let id2: NameChoice | null = null; + let id16: NameChoice | null = null; + let id17: NameChoice | null = null; + const table = + nameOff && nameLen ? at(fd, nameOff, Math.min(nameLen, 65536)) : null; + if (table && table.length >= 6) { + const count = table.readUInt16BE(2); + const storage = table.readUInt16BE(4); + for (let i = 0; i < count; i++) { + const rec = 6 + i * 12; + if (rec + 12 > table.length) break; + const platformID = table.readUInt16BE(rec); + const languageID = table.readUInt16BE(rec + 4); + const nameID = table.readUInt16BE(rec + 6); + const len = table.readUInt16BE(rec + 8); + const off = storage + table.readUInt16BE(rec + 10); + if ( + (nameID !== 1 && nameID !== 2 && nameID !== 16 && nameID !== 17) || + off + len > table.length + ) + continue; + const raw = table.subarray(off, off + len); + const value = ( + platformID === 1 ? raw.toString("latin1") : decodeUtf16BE(raw) + ).trim(); + if (!value) continue; + const score = englishNameScore(platformID, languageID); + if (nameID === 1) id1 = betterName(id1, value, score); + else if (nameID === 2) id2 = betterName(id2, value, score); + else if (nameID === 16) id16 = betterName(id16, value, score); + else if (nameID === 17) id17 = betterName(id17, value, score); + } + } + + if (!haveStyle) { + const namedStyle = styleFromName(id17?.value || id2?.value || ""); + const fileStyle = styleFromFilename(basename(path)); + bold = namedStyle.bold || fileStyle.bold; + italic = namedStyle.italic || fileStyle.italic; + } + + const family = + id1?.value || id16?.value || familyFromFilename(basename(path)); + identities.push({ + family, + bold, + italic, + fontIndex: sfntVersion === TTCF ? fontIndex : undefined, + }); + }); + return identities.length ? identities : fallback(); + } catch { + return fallback(); + } finally { + try { + closeSync(fd); + } catch { + /* already closed */ + } + } +} + +function faceKeyOf(id: { bold: boolean; italic: boolean }): FaceKey { + if (id.bold && id.italic) return "boldItalic"; + if (id.bold) return "bold"; + if (id.italic) return "italic"; + return "regular"; +} + +function faceStyle(face: FaceKey): Pick { + return { + style: face === "italic" || face === "boldItalic" ? "italic" : "normal", + weight: face === "bold" || face === "boldItalic" ? "700" : "400", + }; +} + +const FACE_LABELS: Record = { + regular: "Regular", + bold: "Bold", + italic: "Italic", + boldItalic: "Bold Italic", +}; + +function syntheticFace(source: Face, face: FaceKey): Face { + const baseName = source.name.replace(/(?: \(synthetic [^)]+\))+$/g, ""); + return { + ...source, + ...faceStyle(face), + name: `${baseName} (synthetic ${FACE_LABELS[face]})`, + synthetic: true, + }; +} + +export function completeSyntheticFaces(family: FontFamily): void { + const { faces } = family; + if (!faces.regular) return; + if (!faces.bold) faces.bold = syntheticFace(faces.regular, "bold"); + if (!faces.italic) faces.italic = syntheticFace(faces.regular, "italic"); + if (!faces.boldItalic) + faces.boldItalic = syntheticFace( + faces.bold && !faces.bold.synthetic + ? faces.bold + : (faces.italic ?? faces.regular), + "boldItalic", + ); +} + +/** Scan the OS font directories and group the static fonts into families with R/B/I/BI faces. */ +function scanLocalFamilies(): FontFamily[] { + const seenPaths = new Set(); + const families = new Map(); + for (const dir of fontDirs()) { + if (!existsSync(dir)) continue; + const found: string[] = []; + walkFonts(dir, 3, found); + for (const path of found) { + if (seenPaths.has(path)) continue; + seenPaths.add(path); + for (const id of readFontIdentities(path)) { + const key = id.family || basename(path); + let family = families.get(key); + if (!family) { + family = { family: key, faces: {} }; + families.set(key, family); + } + const face = faceKeyOf(id); + if (!family.faces[face]) + family.faces[face] = { + name: + id.fontIndex === undefined + ? basename(path) + : `${basename(path)} #${id.fontIndex + 1}`, + path, + fontIndex: id.fontIndex, + ...faceStyle(face), + }; + } + } + } + for (const family of families.values()) completeSyntheticFaces(family); + return [...families.values()].sort((a, b) => + a.family.localeCompare(b.family), + ); +} + +/** Resolve a detected-font path, refusing anything outside the known font directories. */ +export function resolveLocalFont(raw: string): string { + const target = resolve(raw); + const inside = fontDirs().some((dir) => { + const rel = relative(resolve(dir), target); + return rel !== "" && !rel.startsWith("..") && !isAbsolute(rel); + }); + if (!inside) + throw new Error("reference path is outside the known font directories"); + if (!existsSync(target) || !isLocalFont(target)) + throw new Error("reference font not found"); + return target; +} + +function json(data: unknown, status = 200): Response { + return new Response(JSON.stringify(data), { + status, + headers: { "content-type": "application/json; charset=utf-8" }, + }); +} + +function errorResponse(error: unknown, status = 400): Response { + return json( + { error: error instanceof Error ? error.message : String(error) }, + status, + ); +} + +function serveFile(root: string, pathname: string): Response { + const rel = pathname === "/" ? "index.html" : pathname.slice(1); + const target = normalize(join(root, rel)); + if (!relative(root, target).startsWith("..") && existsSync(target)) { + return new Response(Bun.file(target), { + headers: { "content-type": mimeFor(target) }, + }); + } + return new Response("not found", { status: 404 }); +} + +function emptyLike(response: Response): Response { + return new Response(null, { + status: response.status, + headers: response.headers, + }); +} + +function sourceUrl(runId: string, file: string): string { + return `/runs/${encodeURIComponent(runId)}/${encodeURIComponent(file)}`; +} + +export function summarizeCandidate( + row: ScoredCandidate, + index: number, + url: string, +): CandidateSummary { + return { + index, + sourceId: row.sourceId, + file: row.file, + url, + tier: row.score.tier, + mean: formatDelta(row.score.meanDelta), + max: formatDelta(row.score.maxDelta), + coverage: `${row.score.compared}/${row.score.total}`, + fscore: formatFeatureScore(row.feature), + fcov: formatFeatureCoverage(row.feature), + flags: row.feature.gaps.map((gap) => `${gap.feature}_gap`), + worst: row.score.worstGlyphs.map( + (glyph) => + `U+${glyph.codepoint.toString(16).toUpperCase().padStart(4, "0")} ${glyph.delta.toFixed(4)}`, + ), + }; +} + +function parseSourceList(raw: FormDataEntryValue | null): string[] { + return typeof raw === "string" + ? raw + .split(",") + .map((value) => value.trim()) + .filter(Boolean) + : []; +} + +function parseLimit(raw: FormDataEntryValue | null): number { + if (typeof raw !== "string" || raw.trim() === "") return 10; + const limit = Number(raw); + if (!Number.isInteger(limit) || limit <= 0 || limit > 50) + throw new Error("limit must be a positive integer up to 50"); + return limit; +} + +function parseModel(raw: FormDataEntryValue | null): CompareModel { + if (raw === "monospace") return "monospace"; + return "latin"; +} + +function parseFontIndex(raw: FormDataEntryValue | null): number { + if (typeof raw !== "string" || raw.trim() === "") return 0; + const index = Number(raw); + if (!Number.isInteger(index) || index < 0) + throw new Error("referenceIndex must be a non-negative integer"); + return index; +} + +async function resolveReference( + form: FormData, +): Promise<{ bytes: Uint8Array; name: string }> { + const fontIndex = parseFontIndex(form.get("referenceIndex")); + const reference = form.get("reference"); + if (reference instanceof File) { + const raw = new Uint8Array(await reference.arrayBuffer()); + return { + bytes: extractFont(raw, fontIndex), + name: + fontIndex === 0 + ? reference.name + : `${reference.name} #${fontIndex + 1}`, + }; + } + const referencePath = form.get("referencePath"); + if (typeof referencePath === "string" && referencePath.trim() !== "") { + const target = resolveLocalFont(referencePath.trim()); + const raw = new Uint8Array(readFileSync(target)); + return { + bytes: extractFont(raw, fontIndex), + name: + fontIndex === 0 + ? basename(target) + : `${basename(target)} #${fontIndex + 1}`, + }; + } + throw new Error( + "choose a reference font file, or pick one detected on this machine", + ); +} + +async function handleCompare(req: Request): Promise { + const form = await req.formData(); + const { bytes: referenceBytes, name: referenceName } = + await resolveReference(form); + + const limit = parseLimit(form.get("limit")); + const model = parseModel(form.get("model")); + const cacheDir = process.env.DOCFONTS_SOURCE_CACHE ?? DEFAULT_CACHE_DIR; + const appCacheDir = process.env.DOCFONTS_APP_CACHE ?? DEFAULT_APP_CACHE_DIR; + const runId = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`; + const runDir = join(appCacheDir, runId); + mkdirSync(runDir, { recursive: true }); + + const referenceFile = `reference${safeExt(referenceName)}`; + writeFileSync(join(runDir, referenceFile), referenceBytes); + + const result = compareReferenceToCorpus(referenceBytes, { + cacheDir, + sources: parseSourceList(form.get("sources")), + model, + limit, + }); + + const candidates = result.rows.map((row, index) => { + const file = `candidate-${index}${safeExt(row.file)}`; + writeFileSync(join(runDir, file), row.bytes); + return summarizeCandidate(row, index, sourceUrl(runId, file)); + }); + + return json({ + runId, + reference: { + name: safeSourceName(referenceName), + url: sourceUrl(runId, referenceFile), + }, + totalRows: result.totalRows, + skipped: result.skipped, + candidates, + }); +} + +async function handleSources(): Promise { + try { + const cacheDir = process.env.DOCFONTS_SOURCE_CACHE ?? DEFAULT_CACHE_DIR; + const { loadSnapshot } = await import("./src/cache"); + return json({ + sources: loadSnapshot(cacheDir).map((source) => ({ + sourceId: source.sourceId, + family: source.family, + })), + }); + } catch (error) { + return errorResponse(error); + } +} + +function handleLocalFonts(): Response { + try { + return json({ families: scanLocalFamilies() }); + } catch (error) { + return errorResponse(error); + } +} + +/** The corpus catalog is static for a cache dir, so list it once and reuse it across typeahead loads. */ +const corpusFontCache = new Map(); + +function handleCorpusFonts(): Response { + try { + const cacheDir = process.env.DOCFONTS_SOURCE_CACHE ?? DEFAULT_CACHE_DIR; + let fonts = corpusFontCache.get(cacheDir); + if (!fonts) { + fonts = listCorpusFonts(cacheDir); + corpusFontCache.set(cacheDir, fonts); + } + return json({ fonts }); + } catch (error) { + return errorResponse(error); + } +} + +/** Run IDs are server-minted (base36-base36); reject anything else so a client can't escape the cache dir. */ +export function runDirFor(runId: string): string { + if (!/^[a-z0-9]+-[a-z0-9]+$/.test(runId)) throw new Error("invalid run id"); + const appCacheDir = process.env.DOCFONTS_APP_CACHE ?? DEFAULT_APP_CACHE_DIR; + return join(appCacheDir, runId); +} + +/** Compare the reference against one specific corpus font, on demand, writing it into an existing run. */ +async function handleCompareTarget(req: Request): Promise { + const form = await req.formData(); + const sourceId = form.get("sourceId"); + const file = form.get("file"); + if (typeof sourceId !== "string" || !sourceId) + throw new Error("sourceId is required"); + if (typeof file !== "string" || !file) throw new Error("file is required"); + const runId = form.get("runId"); + if (typeof runId !== "string") throw new Error("runId is required"); + + const runDir = runDirFor(runId); + mkdirSync(runDir, { recursive: true }); + const { bytes: referenceBytes } = await resolveReference(form); + const model = parseModel(form.get("model")); + const cacheDir = process.env.DOCFONTS_SOURCE_CACHE ?? DEFAULT_CACHE_DIR; + + const row = compareReferenceToTarget(referenceBytes, { + cacheDir, + sourceId, + file, + model, + }); + + const candFile = `pin-${safeSourceName(sourceId)}-${safeSourceName(file)}${safeExt(row.file)}`; + writeFileSync(join(runDir, candFile), row.bytes); + return json({ + candidate: summarizeCandidate(row, 0, sourceUrl(runId, candFile)), + }); +} + +function serveRunFile(pathname: string): Response { + const appCacheDir = process.env.DOCFONTS_APP_CACHE ?? DEFAULT_APP_CACHE_DIR; + const raw = pathname.replace(/^\/runs\//, ""); + const parts = raw.split("/").map(decodeURIComponent); + if (parts.length !== 2) return new Response("not found", { status: 404 }); + const target = normalize(join(appCacheDir, parts[0], parts[1])); + if (relative(appCacheDir, target).startsWith("..") || !existsSync(target)) + return new Response("not found", { status: 404 }); + return new Response(Bun.file(target), { + headers: { "content-type": mimeFor(target) }, + }); +} + +export function createServer(port: number): ReturnType { + rmSync(DEFAULT_APP_CACHE_DIR, { recursive: true, force: true }); + mkdirSync(DEFAULT_APP_CACHE_DIR, { recursive: true }); + return Bun.serve({ + port, + hostname: "127.0.0.1", + async fetch(req) { + const url = new URL(req.url); + try { + if (req.method === "POST" && url.pathname === "/api/compare") + return await handleCompare(req); + if (req.method === "POST" && url.pathname === "/api/compare-target") + return await handleCompareTarget(req); + if (req.method === "GET" && url.pathname === "/api/corpus-fonts") + return handleCorpusFonts(); + if (req.method === "GET" && url.pathname === "/api/sources") + return await handleSources(); + if (req.method === "GET" && url.pathname === "/api/local-fonts") + return handleLocalFonts(); + if (req.method === "GET" && url.pathname.startsWith("/runs/")) + return serveRunFile(url.pathname); + if (req.method === "GET") return serveFile(APP_DIR, url.pathname); + if (req.method === "HEAD") + return emptyLike(serveFile(APP_DIR, url.pathname)); + return new Response("method not allowed", { status: 405 }); + } catch (error) { + return errorResponse(error); + } + }, + }); +} + +if (import.meta.main) { + try { + const args = parseArgs(process.argv.slice(2)); + const server = createServer(args.port); + console.log(`DocFonts corpus app: http://127.0.0.1:${server.port}`); + setInterval(() => {}, 1 << 30); + } catch (error) { + console.error(error instanceof Error ? error.message : error); + process.exit(1); + } +} diff --git a/tools/corpus/src/cache.ts b/tools/corpus/src/cache.ts index 089b07e..848043c 100644 --- a/tools/corpus/src/cache.ts +++ b/tools/corpus/src/cache.ts @@ -101,19 +101,53 @@ export function collectCandidates( if (members.length === 0) throw new Error(`no candidate font files in ${archivePath}`); - const basenameCounts = new Map(); + const duplicateBasenames = duplicateBasenamesOf(members); + return members.map((member) => ({ + file: displayNameForMember(member, duplicateBasenames), + bytes: readArchiveMember(archivePath, member, format), + })); +} + +/** Basenames shared by more than one member, so those members fall back to a path-qualified display name. */ +function duplicateBasenamesOf(members: string[]): Set { + const counts = new Map(); for (const member of members) { const file = basename(member); - basenameCounts.set(file, (basenameCounts.get(file) ?? 0) + 1); + counts.set(file, (counts.get(file) ?? 0) + 1); } - const duplicateBasenames = new Set( - [...basenameCounts].filter(([, count]) => count > 1).map(([file]) => file), + return new Set( + [...counts].filter(([, count]) => count > 1).map(([file]) => file), ); +} - return members.map((member) => ({ - file: displayNameForMember(member, duplicateBasenames), - bytes: readArchiveMember(archivePath, member, format), - })); +/** + * Candidate font display names for one source, without reading any font bytes. Mirrors the naming in + * {@link collectCandidates} so a name listed here can be passed straight back to look up its bytes. + */ +export function listCandidateFiles( + source: SnapshotSource, + cacheDir: string, +): string[] { + if (source.kind === "github-tree") { + const files = source.files ?? []; + if (files.length === 0) + throw new Error(`no candidate files listed for ${source.sourceId}`); + return files.map((entry) => entry.name); + } + + const format = archiveFormatOf(source); + const archivePath = archivePathFor(cacheDir, source.sourceId, format); + if (!existsSync(archivePath)) + throw new Error( + `candidate archive missing for ${source.sourceId}: ${archivePath}. Run \`bun run corpus:acquire\` first.`, + ); + const members = listFontMembers(archivePath, format); + if (members.length === 0) + throw new Error(`no candidate font files in ${archivePath}`); + const duplicateBasenames = duplicateBasenamesOf(members); + return members.map((member) => + displayNameForMember(member, duplicateBasenames), + ); } function displayNameForMember( diff --git a/tools/corpus/src/compare-engine.ts b/tools/corpus/src/compare-engine.ts new file mode 100644 index 0000000..a0469f7 --- /dev/null +++ b/tools/corpus/src/compare-engine.ts @@ -0,0 +1,214 @@ +import { readFileSync } from "node:fs"; +import { + archiveFormatOf, + collectCandidates, + listCandidateFiles, + loadSnapshot, + requireArchiveTool, + type SnapshotSource, +} from "./cache"; +import { + type FeatureDistance, + type FontFeatures, + featureDistance, + parseFeatures, +} from "./features"; +import { parseFont, sampleMetrics } from "./font"; +import { rankRows } from "./report"; +import { LATIN_SAMPLE, LATIN_TEXT_SAMPLE } from "./samples"; +import { type CompareScore, scoreAdvances } from "./score"; +import type { CompareModel } from "./tiers"; + +export interface ScoredCandidate { + sourceId: string; + file: string; + score: CompareScore; + feature: FeatureDistance; + bytes: Uint8Array; +} + +export interface CompareCorpusOptions { + cacheDir: string; + sources?: string[]; + model?: CompareModel; + limit?: number | null; +} + +export interface CompareCorpusResult { + rows: ScoredCandidate[]; + totalRows: number; + skipped: number; +} + +export function selectSources( + snapshot: SnapshotSource[], + requestedIds: string[] = [], +): SnapshotSource[] { + if (requestedIds.length === 0) return snapshot; + + const byId = new Map(snapshot.map((source) => [source.sourceId, source])); + const unknown = requestedIds.filter((id) => !byId.has(id)); + if (unknown.length > 0) + throw new Error( + `source(s) not in cache: ${unknown.join(", ")}. Acquired: ${[...byId.keys()].join(", ")}`, + ); + return requestedIds.map((id) => byId.get(id) as SnapshotSource); +} + +export function requireArchiveTools(selected: SnapshotSource[]): void { + const archiveSources = selected.filter( + (source) => source.kind !== "github-tree", + ); + for (const format of new Set(archiveSources.map(archiveFormatOf))) + requireArchiveTool(format); +} + +export function scoreCandidateBytes( + reference: ReadonlyMap, + referenceFeatures: FontFeatures, + bytes: Uint8Array, + model: CompareModel, +): { score: CompareScore; feature: FeatureDistance } { + const font = parseFont(bytes); + return { + score: scoreAdvances(reference, sampleMetrics(font), { + reportSample: LATIN_SAMPLE, + tierSample: model === "latin" ? LATIN_TEXT_SAMPLE : LATIN_SAMPLE, + model, + }), + feature: featureDistance(referenceFeatures, parseFeatures(bytes)), + }; +} + +export function compareReferenceToSources( + referenceBytes: Uint8Array, + selected: SnapshotSource[], + cacheDir: string, + model: CompareModel, +): { rows: ScoredCandidate[]; skipped: number } { + const reference = sampleMetrics(parseFont(referenceBytes)); + const referenceFeatures = parseFeatures(referenceBytes); + const rows: ScoredCandidate[] = []; + let skipped = 0; + + for (const source of selected) { + for (const candidate of collectCandidates(source, cacheDir)) { + try { + const { score, feature } = scoreCandidateBytes( + reference, + referenceFeatures, + candidate.bytes, + model, + ); + rows.push({ + sourceId: source.sourceId, + file: candidate.file, + score, + feature, + bytes: candidate.bytes, + }); + } catch { + skipped++; + } + } + } + + return { rows, skipped }; +} + +export function compareReferenceToCorpus( + referenceBytes: Uint8Array, + options: CompareCorpusOptions, +): CompareCorpusResult { + const model = options.model ?? "latin"; + const selected = selectSources( + loadSnapshot(options.cacheDir), + options.sources ?? [], + ); + requireArchiveTools(selected); + + const { rows, skipped } = compareReferenceToSources( + referenceBytes, + selected, + options.cacheDir, + model, + ); + const ranked = rankRows(rows); + const visible = + options.limit === null ? ranked : ranked.slice(0, options.limit ?? 50); + return { rows: visible, totalRows: rows.length, skipped }; +} + +export function compareReferenceFileToCorpus( + referencePath: string, + options: CompareCorpusOptions, +): CompareCorpusResult { + return compareReferenceToCorpus(readFileSync(referencePath), options); +} + +export interface CorpusFont { + sourceId: string; + file: string; +} + +/** + * A flat catalog of every corpus font by source and display name. Reads no font bytes, so it is cheap enough + * to power a typeahead. Sources whose cache files are missing are skipped rather than failing the whole list. + */ +export function listCorpusFonts(cacheDir: string): CorpusFont[] { + const fonts: CorpusFont[] = []; + for (const source of loadSnapshot(cacheDir)) { + let files: string[]; + try { + files = listCandidateFiles(source, cacheDir); + } catch { + continue; + } + for (const file of files) fonts.push({ sourceId: source.sourceId, file }); + } + return fonts; +} + +export interface CompareTargetOptions { + cacheDir: string; + sourceId: string; + file: string; + model?: CompareModel; +} + +/** + * Score the reference against one specific corpus font, identified by source and display name. Lets a caller + * compare against any font in the corpus, not just the ranked top of {@link compareReferenceToCorpus}. + */ +export function compareReferenceToTarget( + referenceBytes: Uint8Array, + options: CompareTargetOptions, +): ScoredCandidate { + const model = options.model ?? "latin"; + const source = loadSnapshot(options.cacheDir).find( + (entry) => entry.sourceId === options.sourceId, + ); + if (!source) throw new Error(`source not in cache: ${options.sourceId}`); + requireArchiveTools([source]); + const candidate = collectCandidates(source, options.cacheDir).find( + (entry) => entry.file === options.file, + ); + if (!candidate) + throw new Error(`font not found in ${options.sourceId}: ${options.file}`); + + const reference = sampleMetrics(parseFont(referenceBytes)); + const referenceFeatures = parseFeatures(referenceBytes); + const { score, feature } = scoreCandidateBytes( + reference, + referenceFeatures, + candidate.bytes, + model, + ); + return { + sourceId: source.sourceId, + file: candidate.file, + score, + feature, + bytes: candidate.bytes, + }; +} diff --git a/tools/corpus/src/contact-sheet.ts b/tools/corpus/src/contact-sheet.ts new file mode 100644 index 0000000..6d73de9 --- /dev/null +++ b/tools/corpus/src/contact-sheet.ts @@ -0,0 +1,305 @@ +import { extname, isAbsolute, join, resolve } from "node:path"; + +const REPO_DIR = join(import.meta.dir, "..", "..", ".."); +export const DEFAULT_OUT = join( + REPO_DIR, + ".cache", + "corpus-visual", + "review.html", +); + +export const ASSETS_DIR = "assets"; +export const FONT_ASSETS_DIR = `${ASSETS_DIR}/fonts`; + +export const FACE_SLOTS = ["regular", "bold", "italic", "boldItalic"] as const; + +export type FaceSlot = (typeof FACE_SLOTS)[number]; + +export const FACE_LABELS: Record = { + regular: "Regular", + bold: "Bold", + italic: "Italic", + boldItalic: "Bold Italic", +}; + +export const OVERLAY_GLYPHS: readonly string[] = [ + "a", + "g", + "e", + "s", + "t", + "R", + "Q", + "M", + "G", + "&", + "@", + "0", + "8", + "1", + "i", + "l", + "y", + "j", + "?", +]; + +export const SPECIMEN_TEXT = "Hamburgefonstiv 0123456789"; +export const SAMPLE_TEXT = "The quick brown fox jumps over the lazy dog."; + +export interface FontFaceAsset { + asset: string; + sourceName: string; +} + +export type FaceAssets = Partial>; + +export interface FontSetView { + label: string; + faces: FaceAssets; +} + +export interface VisualReviewApp { + family?: string; + reference: FontSetView; + candidates: FontSetView[]; +} + +export function isFaceSlot(value: string): value is FaceSlot { + return FACE_SLOTS.includes(value as FaceSlot); +} + +export function resolveOutPath(out: string | undefined): string { + if (!out) return DEFAULT_OUT; + const absolute = isAbsolute(out) ? out : resolve(out); + return absolute.endsWith(".html") ? absolute : join(absolute, "review.html"); +} + +export function escapeHtml(value: string): string { + return value + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); +} + +function escapeCssString(value: string): string { + return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"'); +} + +function safeJson(value: unknown): string { + return JSON.stringify(value).replace(/ { + for (const face of FACE_SLOTS) { + const asset = candidate.faces[face]; + if (!asset) continue; + rules.push( + `@font-face { font-family: "${fontFamily("candidate", index, face)}"; src: url("${escapeCssString(asset.asset)}"); }`, + ); + } + }); + return rules.join("\n"); +} + +function reviewModel(app: VisualReviewApp) { + return { + ...app, + faces: FACE_SLOTS.map((slot) => ({ slot, label: FACE_LABELS[slot] })), + glyphs: OVERLAY_GLYPHS.map((glyph) => ({ + glyph, + codepoint: formatCodepoint(codepointOf(glyph)), + })), + specimenText: SPECIMEN_TEXT, + sampleText: SAMPLE_TEXT, + fontFamilies: { + reference: Object.fromEntries( + FACE_SLOTS.map((face) => [face, fontFamily("ref", 0, face)]), + ), + candidates: app.candidates.map((_, index) => + Object.fromEntries( + FACE_SLOTS.map((face) => [ + face, + fontFamily("candidate", index, face), + ]), + ), + ), + }, + }; +} + +export function fontAssetName( + prefix: string, + slot: FaceSlot, + sourcePath: string, +): string { + const ext = extname(sourcePath) || ".ttf"; + return `${prefix}-${slot}${ext}`; +} + +export function renderVisualReviewApp(app: VisualReviewApp): string { + const family = app.family ?? "(family not specified)"; + const model = safeJson(reviewModel(app)); + return ` + + + +${escapeHtml(`Visual review: ${family}`)} + + + +

${escapeHtml(`Visual review: ${family}`)}

+

Loaded ${app.candidates.length} candidate(s). Cyan is reference, magenta is candidate, blue overlap means shared ink.

+
+
+ + + +`; +} diff --git a/tools/corpus/src/features.ts b/tools/corpus/src/features.ts new file mode 100644 index 0000000..96da56f --- /dev/null +++ b/tools/corpus/src/features.ts @@ -0,0 +1,234 @@ +import { openFont, type Sfnt, type SfntTable } from "./font"; + +/** + * Typographic features read from OS/2 and post. Every field is optional: an absent field means the + * data was missing or not trustworthy (see the rules in `parseFeatures`), never a real zero. This is + * the reliability flag - a value is present only when the font actually declared it. + */ +export interface FontFeatures { + /** OS/2 usWeightClass (roughly 1..1000), present when non-zero. */ + weightClass?: number; + /** OS/2 usWidthClass (1..9), present when non-zero. */ + widthClass?: number; + /** OS/2 sxHeight as a fraction of the em, present when the field exists and is positive. */ + xHeight?: number; + /** OS/2 sCapHeight as a fraction of the em, present when the field exists and is positive. */ + capHeight?: number; + /** post italicAngle in degrees (0 for upright), present when the post table exists. */ + italicAngle?: number; + /** OS/2 PANOSE 10 digits, present only when more than the family byte is filled in. */ + panose?: readonly number[]; +} + +/** A PANOSE that carries no style data: all zero, or only the family byte set (e.g. [2,0,...]). */ +function panoseIsUnset(panose: readonly number[]): boolean { + // The family byte (index 0) alone does not describe the face, so treat it like no data. + return panose.slice(1).every((digit) => digit === 0); +} + +/** Read OS/2 weight, width, PANOSE, and (version >= 2) the em-normalized x-height and cap-height. */ +function parseOs2( + view: DataView, + table: SfntTable, + unitsPerEm: number, + out: FontFeatures, +): void { + const { offset, length } = table; + const version = view.getUint16(offset); + + if (length >= 8) { + const weight = view.getUint16(offset + 4); + if (weight > 0) out.weightClass = weight; + const width = view.getUint16(offset + 6); + if (width > 0) out.widthClass = width; + } + + // PANOSE is 10 bytes at offset 32 in every OS/2 version. + if (length >= 42) { + const panose: number[] = []; + for (let i = 0; i < 10; i++) panose.push(view.getUint8(offset + 32 + i)); + if (!panoseIsUnset(panose)) out.panose = panose; + } + + // sxHeight and sCapHeight only exist from OS/2 version 2 onward (fields at 86 and 88). + if (version >= 2 && length >= 90) { + const sx = view.getInt16(offset + 86); + if (sx > 0) out.xHeight = sx / unitsPerEm; + const cap = view.getInt16(offset + 88); + if (cap > 0) out.capHeight = cap / unitsPerEm; + } +} + +/** Read the post table's italicAngle, stored as a 16.16 fixed-point number of degrees. */ +function parsePost(view: DataView, table: SfntTable, out: FontFeatures): void { + const { offset, length } = table; + if (length < 8) return; + out.italicAngle = view.getInt32(offset + 4) / 0x10000; +} + +/** + * Parse the typographic features the compare tool ranks on. Builds on `openFont`, so it shares the + * same SFNT validation. OS/2 and post are optional tables: when they are absent, or too short for a + * field, that feature is simply left unset rather than guessed. + */ +export function parseFeatures(bytes: Uint8Array, fontIndex = 0): FontFeatures { + const sfnt: Sfnt = openFont(bytes, fontIndex); + const out: FontFeatures = {}; + const os2 = sfnt.tables.get("OS/2"); + if (os2) parseOs2(sfnt.view, os2, sfnt.unitsPerEm, out); + const post = sfnt.tables.get("post"); + if (post) parsePost(sfnt.view, post, out); + return out; +} + +/** Named, tunable weights for the feature-distance blend. Higher means the feature matters more. */ +export interface FeatureWeights { + xHeight: number; + capHeight: number; + weight: number; + width: number; + italic: number; + panose: number; +} + +/** + * Default feature weights. X-height carries the most visual signal for a fallback, so it leads; width + * class is the noisiest, so it trails. Kept as plain numbers so they are easy to retune. + */ +export const DEFAULT_FEATURE_WEIGHTS: FeatureWeights = { + xHeight: 1.5, + capHeight: 1.0, + weight: 1.0, + width: 0.75, + italic: 1.0, + panose: 1.0, +}; + +/** The number of features the distance can compare when both fonts declare everything. */ +export const FEATURE_COUNT = 6; + +export type FeatureName = keyof FeatureWeights; + +export interface FeatureGap { + feature: FeatureName; + distance: number; +} + +/** A feature-distance result: the blended score plus how many features actually overlapped. */ +export interface FeatureDistance { + /** Weighted mean of the per-feature distances, 0 = identical. NaN when nothing overlapped. */ + score: number; + /** Features both fonts declared and that were compared. */ + compared: number; + /** Features skipped because one or both fonts did not declare them. */ + missing: number; + /** Total comparable features (compared + missing). */ + total: number; + /** Compared features whose normalized distance is large enough to deserve manual review. */ + gaps: readonly FeatureGap[]; +} + +const clamp01 = (value: number): number => Math.min(1, Math.max(0, value)); + +// Each helper maps a raw difference onto roughly [0, 1] so the weights, not the units, set the blend. +const heightDistance = (a: number, b: number): number => + clamp01(Math.abs(a - b) / 0.5); // half an em apart is already as different as it gets +const weightDistance = (a: number, b: number): number => + clamp01(Math.abs(a - b) / 1000); // usWeightClass spans ~1..1000 +const widthDistance = (a: number, b: number): number => + clamp01(Math.abs(a - b) / 8); // usWidthClass spans 1..9 +const italicDistance = (a: number, b: number): number => + clamp01(Math.abs(a - b) / 90); // degrees; past a quarter turn it is just "slanted" +const panoseDistance = (a: readonly number[], b: readonly number[]): number => { + // PANOSE digits are categorical, so count how many of the 10 classifications disagree. + let differing = 0; + for (let i = 0; i < 10; i++) if (a[i] !== b[i]) differing++; + return differing / 10; +}; + +const REVIEW_GAP_THRESHOLDS: Record = { + xHeight: 0.12, + capHeight: 0.12, + weight: 0.1, + width: 0.125, + italic: 0.05, + panose: 0.3, +}; + +/** + * Deterministic feature distance between a reference and a candidate. Only features both fonts declare + * are compared; missing features are skipped, never treated as zero, and counted separately so a thin + * match is visible. The result is a weighted mean in [0, 1] where lower is more similar. + */ +export function featureDistance( + reference: FontFeatures, + candidate: FontFeatures, + weights: FeatureWeights = DEFAULT_FEATURE_WEIGHTS, +): FeatureDistance { + let weightedSum = 0; + let weightTotal = 0; + let compared = 0; + const gaps: FeatureGap[] = []; + const add = ( + feature: FeatureName, + weight: number, + distance: number, + ): void => { + weightedSum += weight * distance; + weightTotal += weight; + compared++; + if (distance >= REVIEW_GAP_THRESHOLDS[feature]) + gaps.push({ feature, distance }); + }; + + if (reference.xHeight !== undefined && candidate.xHeight !== undefined) + add( + "xHeight", + weights.xHeight, + heightDistance(reference.xHeight, candidate.xHeight), + ); + if (reference.capHeight !== undefined && candidate.capHeight !== undefined) + add( + "capHeight", + weights.capHeight, + heightDistance(reference.capHeight, candidate.capHeight), + ); + if ( + reference.weightClass !== undefined && + candidate.weightClass !== undefined + ) + add( + "weight", + weights.weight, + weightDistance(reference.weightClass, candidate.weightClass), + ); + if (reference.widthClass !== undefined && candidate.widthClass !== undefined) + add( + "width", + weights.width, + widthDistance(reference.widthClass, candidate.widthClass), + ); + if ( + reference.italicAngle !== undefined && + candidate.italicAngle !== undefined + ) + add( + "italic", + weights.italic, + italicDistance(reference.italicAngle, candidate.italicAngle), + ); + if (reference.panose !== undefined && candidate.panose !== undefined) + add( + "panose", + weights.panose, + panoseDistance(reference.panose, candidate.panose), + ); + + return { + score: compared === 0 ? Number.NaN : weightedSum / weightTotal, + compared, + missing: FEATURE_COUNT - compared, + total: FEATURE_COUNT, + gaps, + }; +} diff --git a/tools/corpus/src/font.ts b/tools/corpus/src/font.ts index 05da3d0..90b5d66 100644 --- a/tools/corpus/src/font.ts +++ b/tools/corpus/src/font.ts @@ -1,6 +1,7 @@ import { LATIN_SAMPLE } from "./samples"; const REQUIRED_TABLES = ["head", "maxp", "hhea", "hmtx", "cmap"] as const; +const TTCF = 0x74746366; /** A parsed font's em size plus a normalized advance lookup over its Unicode `cmap`. */ export interface FontMetrics { @@ -9,6 +10,19 @@ export interface FontMetrics { normalizedAdvance(codepoint: number): number | undefined; } +/** Where one SFNT table lives in the file, and how long it is, so readers can bounds-check. */ +export interface SfntTable { + offset: number; + length: number; +} + +/** A validated SFNT: the backing view, its table directory, and the em size from `head`. */ +export interface Sfnt { + view: DataView; + tables: Map; + unitsPerEm: number; +} + function tagAt(view: DataView, offset: number): string { return String.fromCharCode( view.getUint8(offset), @@ -18,6 +32,33 @@ function tagAt(view: DataView, offset: number): string { ); } +function align4(value: number): number { + return (value + 3) & ~3; +} + +function sfntOffsetFor(view: DataView, fontIndex: number): number { + const sfntVersion = view.getUint32(0); + if (sfntVersion !== TTCF) { + if (fontIndex !== 0) + throw new Error("unsupported font: fontIndex requires a collection"); + return 0; + } + + const count = view.getUint32(8); + if (count === 0) throw new Error("unsupported font: empty collection"); + if (!Number.isInteger(fontIndex) || fontIndex < 0 || fontIndex >= count) + throw new Error( + `unsupported font: collection index ${fontIndex} out of range`, + ); + const offsetTableEnd = 12 + count * 4; + if (offsetTableEnd > view.byteLength) + throw new Error("unsupported font: truncated collection header"); + const offset = view.getUint32(12 + fontIndex * 4); + if (offset + 12 > view.byteLength) + throw new Error("unsupported font: collection member is truncated"); + return offset; +} + /** Resolve a codepoint to a glyph id within one `cmap` subtable, for the formats we support (4, 6, 12). */ function makeCmapLookup( view: DataView, @@ -129,17 +170,18 @@ function cmapPreference(platformId: number, encodingId: number): number | null { } /** - * Parse just enough of an SFNT font (TrueType or CFF/OTF) to read normalized advance widths by - * codepoint. Throws an explicit error when the container is a collection or a required table is missing. + * Read and validate the SFNT container: confirm it is a supported SFNT, index its table directory, + * and read the em size from `head`. Throws an explicit error when + * the container is unusable or a required table is missing. Feature parsing reuses this so both paths + * share one validation step. */ -export function parseFont(bytes: Uint8Array): FontMetrics { +export function openFont(bytes: Uint8Array, fontIndex = 0): Sfnt { const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength); if (bytes.byteLength < 12) throw new Error("unsupported font: file is too small to be an SFNT"); - const sfntVersion = view.getUint32(0); - if (sfntVersion === 0x74746366) - throw new Error("unsupported font: TrueType/OpenType collections (ttcf)"); + const sfntOffset = sfntOffsetFor(view, fontIndex); + const sfntVersion = view.getUint32(sfntOffset); const isSfnt = sfntVersion === 0x00010000 || sfntVersion === 0x4f54544f || @@ -149,11 +191,18 @@ export function parseFont(bytes: Uint8Array): FontMetrics { `unsupported font: not an SFNT (sfntVersion 0x${sfntVersion.toString(16)})`, ); - const numTables = view.getUint16(4); - const tables = new Map(); + const numTables = view.getUint16(sfntOffset + 4); + const directoryEnd = sfntOffset + 12 + numTables * 16; + if (directoryEnd > view.byteLength) + throw new Error("unsupported font: truncated table directory"); + const tables = new Map(); for (let i = 0; i < numTables; i++) { - const recordOffset = 12 + i * 16; - tables.set(tagAt(view, recordOffset), view.getUint32(recordOffset + 8)); + const recordOffset = sfntOffset + 12 + i * 16; + const offset = view.getUint32(recordOffset + 8); + const length = view.getUint32(recordOffset + 12); + if (offset + length > view.byteLength) + throw new Error("unsupported font: table extends past end of file"); + tables.set(tagAt(view, recordOffset), { offset, length }); } const missing = REQUIRED_TABLES.filter((tag) => !tables.has(tag)); @@ -162,22 +211,91 @@ export function parseFont(bytes: Uint8Array): FontMetrics { `unsupported font: missing required table(s): ${missing.join(", ")}`, ); - const headOffset = tables.get("head") as number; + const headOffset = (tables.get("head") as SfntTable).offset; const unitsPerEm = view.getUint16(headOffset + 18); if (unitsPerEm === 0) throw new Error("unsupported font: head.unitsPerEm is zero"); - const numberOfHMetrics = view.getUint16((tables.get("hhea") as number) + 34); + return { view, tables, unitsPerEm }; +} + +/** + * Extract one collection member into a standalone SFNT. Browsers can be ambiguous about which face in + * a TTC they load, so the local app serves the selected member as a single font file. + */ +export function extractFont(bytes: Uint8Array, fontIndex = 0): Uint8Array { + const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength); + if (bytes.byteLength < 12) + throw new Error("unsupported font: file is too small to be an SFNT"); + const sfntOffset = sfntOffsetFor(view, fontIndex); + if (sfntOffset === 0) return bytes; + + const sfntVersion = view.getUint32(sfntOffset); + const numTables = view.getUint16(sfntOffset + 4); + const searchRange = view.getUint16(sfntOffset + 6); + const entrySelector = view.getUint16(sfntOffset + 8); + const rangeShift = view.getUint16(sfntOffset + 10); + const records = []; + let outputOffset = 12 + numTables * 16; + for (let i = 0; i < numTables; i++) { + const recordOffset = sfntOffset + 12 + i * 16; + const tag = [ + view.getUint8(recordOffset), + view.getUint8(recordOffset + 1), + view.getUint8(recordOffset + 2), + view.getUint8(recordOffset + 3), + ] as const; + const checksum = view.getUint32(recordOffset + 4); + const sourceOffset = view.getUint32(recordOffset + 8); + const length = view.getUint32(recordOffset + 12); + if (sourceOffset + length > view.byteLength) + throw new Error("unsupported font: table extends past end of file"); + outputOffset = align4(outputOffset); + records.push({ tag, checksum, sourceOffset, length, outputOffset }); + outputOffset += length; + } + + const out = new Uint8Array(align4(outputOffset)); + const outView = new DataView(out.buffer); + outView.setUint32(0, sfntVersion); + outView.setUint16(4, numTables); + outView.setUint16(6, searchRange); + outView.setUint16(8, entrySelector); + outView.setUint16(10, rangeShift); + records.forEach((record, index) => { + const recordOffset = 12 + index * 16; + out.set(record.tag, recordOffset); + outView.setUint32(recordOffset + 4, record.checksum); + outView.setUint32(recordOffset + 8, record.outputOffset); + outView.setUint32(recordOffset + 12, record.length); + out.set( + bytes.subarray(record.sourceOffset, record.sourceOffset + record.length), + record.outputOffset, + ); + }); + return out; +} + +/** + * Parse just enough of an SFNT font (TrueType or CFF/OTF) to read normalized advance widths by + * codepoint. Throws an explicit error when the container is unusable or a required table is missing. + */ +export function parseFont(bytes: Uint8Array, fontIndex = 0): FontMetrics { + const { view, tables, unitsPerEm } = openFont(bytes, fontIndex); + + const numberOfHMetrics = view.getUint16( + (tables.get("hhea") as SfntTable).offset + 34, + ); if (numberOfHMetrics === 0) throw new Error("unsupported font: hhea.numberOfHMetrics is zero"); - const hmtxOffset = tables.get("hmtx") as number; + const hmtxOffset = (tables.get("hmtx") as SfntTable).offset; const advanceOfGlyph = (glyphId: number): number => { const index = glyphId < numberOfHMetrics ? glyphId : numberOfHMetrics - 1; return view.getUint16(hmtxOffset + index * 4); }; - const lookup = readCmap(view, tables.get("cmap") as number); + const lookup = readCmap(view, (tables.get("cmap") as SfntTable).offset); return { unitsPerEm, diff --git a/tools/corpus/src/report.ts b/tools/corpus/src/report.ts index 004bcf0..ed8f182 100644 --- a/tools/corpus/src/report.ts +++ b/tools/corpus/src/report.ts @@ -1,10 +1,14 @@ +import type { FeatureDistance } from "./features"; import type { CompareScore, GlyphDelta } from "./score"; +import type { CompareTier } from "./tiers"; import { TIER_RANK } from "./tiers"; -interface CompareRow { +export interface CompareRow { sourceId: string; file: string; score: CompareScore; + /** Typographic feature distance, when computed. Used as a within-tier ranking signal. */ + feature?: FeatureDistance; } interface RenderOptions { @@ -15,7 +19,8 @@ function formatCodepoint(cp: number): string { return `U+${cp.toString(16).toUpperCase().padStart(4, "0")}`; } -function formatDelta(value: number): string { +/** Format an advance delta (fraction of the em) for a table cell, or "n/a" when not measured. */ +export function formatDelta(value: number): string { return Number.isNaN(value) ? "n/a" : value.toFixed(4); } @@ -26,12 +31,73 @@ function formatWorst(worst: GlyphDelta[]): string { .join("; "); } -/** Render the ranked table. Returned as a string so it can be tested without capturing stdout. */ -export function renderReport( - rows: CompareRow[], - options: RenderOptions = {}, +function featureCoverageOf(feature: FeatureDistance | undefined): number { + if (!feature || feature.total === 0) return 0; + return feature.compared / feature.total; +} + +/** Feature score for sorting: a real score, or +Infinity when nothing was compared. */ +function featureScoreOf(feature: FeatureDistance | undefined): number { + if (!feature || Number.isNaN(feature.score)) return Infinity; + return feature.score; +} + +/** Format the blended feature distance for a table cell, or "n/a" when nothing overlapped. */ +export function formatFeatureScore( + feature: FeatureDistance | undefined, +): string { + if (!feature || Number.isNaN(feature.score)) return "n/a"; + return feature.score.toFixed(4); +} + +/** Format how many features both fonts declared as a `compared/total` cell. */ +export function formatFeatureCoverage( + feature: FeatureDistance | undefined, ): string { - const ranked = [...rows].sort((a, b) => { + if (!feature) return "-"; + return `${feature.compared}/${feature.total}`; +} + +function carriesStrongAdvanceSignal(tier: CompareTier): boolean { + return ( + tier === "metric_safe" || + tier === "near_metric" || + tier === "cell_width_only" + ); +} + +/** + * Flag a strong advance match whose declared features disagree enough to need review. Only meaningful + * when the advance tier already vouches for line metrics; otherwise there is nothing to flag. + */ +export function formatFlags( + tier: CompareTier, + feature: FeatureDistance | undefined, +): string { + if (!carriesStrongAdvanceSignal(tier) || !feature) return "-"; + if (feature.gaps.length === 0) return "-"; + return feature.gaps.map((gap) => `${gap.feature}_gap`).join(","); +} + +/** + * Render a fixed-column text table: a header row plus body rows, each cell left-padded to its column + * width. Shared by the corpus report and the bake-off so both print aligned columns the same way. + */ +export function formatTable(header: string[], body: string[][]): string { + const widths = header.map((h, col) => + Math.max(h.length, ...body.map((r) => r[col].length)), + ); + const line = (cells: string[]) => + cells + .map((cell, col) => cell.padEnd(widths[col])) + .join(" ") + .trimEnd(); + return [line(header), ...body.map(line)].join("\n"); +} + +/** Rank rows by the same order the text report prints. */ +export function rankRows(rows: T[]): T[] { + return [...rows].sort((a, b) => { const tierDiff = TIER_RANK[a.score.tier] - TIER_RANK[b.score.tier]; if (tierDiff !== 0) return tierDiff; const aCoverage = @@ -40,6 +106,12 @@ export function renderReport( b.score.total === 0 ? 0 : b.score.compared / b.score.total; const coverageDiff = bCoverage - aCoverage; if (coverageDiff !== 0) return coverageDiff; + const featureCoverageDiff = + featureCoverageOf(b.feature) - featureCoverageOf(a.feature); + if (featureCoverageDiff !== 0) return featureCoverageDiff; + const aFeature = featureScoreOf(a.feature); + const bFeature = featureScoreOf(b.feature); + if (aFeature !== bFeature) return aFeature - bFeature; const aMean = Number.isNaN(a.score.meanDelta) ? Infinity : a.score.meanDelta; @@ -48,6 +120,14 @@ export function renderReport( : b.score.meanDelta; return aMean - bMean; }); +} + +/** Render the ranked table. Returned as a string so it can be tested without capturing stdout. */ +export function renderReport( + rows: CompareRow[], + options: RenderOptions = {}, +): string { + const ranked = rankRows(rows); const visible = options.limit === null ? ranked : ranked.slice(0, options.limit); @@ -60,6 +140,9 @@ export function renderReport( "tier", "coverage", "missing", + "fscore", + "fcov", + "flags", "over1", "over2.5", "worst", @@ -72,18 +155,13 @@ export function renderReport( row.score.tier, `${row.score.compared}/${row.score.total}`, String(row.score.missing), + formatFeatureScore(row.feature), + formatFeatureCoverage(row.feature), + formatFlags(row.score.tier, row.feature), String(row.score.over1Percent), String(row.score.over2_5Percent), formatWorst(row.score.worstGlyphs), ]); - const widths = header.map((h, col) => - Math.max(h.length, ...body.map((r) => r[col].length)), - ); - const line = (cells: string[]) => - cells - .map((cell, col) => cell.padEnd(widths[col])) - .join(" ") - .trimEnd(); - return [line(header), ...body.map(line)].join("\n"); + return formatTable(header, body); } diff --git a/tools/corpus/src/visual.ts b/tools/corpus/src/visual.ts new file mode 100644 index 0000000..a0e2297 --- /dev/null +++ b/tools/corpus/src/visual.ts @@ -0,0 +1,87 @@ +import { execFileSync, spawnSync } from "node:child_process"; + +/** + * Optional rendered-glyph probe for the bake-off. ImageMagick is required only when `--visual` is used. + */ + +export const VISUAL_GLYPH_GRID = "aeg RQ& @ 0123456789 g a y j ?!"; + +const CANVAS_SIZE = "1100x140"; +const POINTSIZE = "64"; + +export function magickAvailable(): boolean { + try { + execFileSync("magick", ["-version"], { stdio: "ignore" }); + return true; + } catch { + return false; + } +} + +export function requireMagick(): void { + if (!magickAvailable()) + throw new Error( + "--visual needs ImageMagick 7 (`magick`) on PATH. Install it, or drop --visual to compare without rendered metrics.", + ); +} + +export function renderGlyphGrid(fontPath: string, outPath: string): void { + execFileSync( + "magick", + [ + "-size", + CANVAS_SIZE, + "canvas:white", + "-font", + fontPath, + "-pointsize", + POINTSIZE, + "-fill", + "black", + "-gravity", + "NorthWest", + "-annotate", + "+10+10", + VISUAL_GLYPH_GRID, + "-flatten", + outPath, + ], + { stdio: "ignore" }, + ); +} + +/** + * `magick compare -metric RMSE` prints a raw value plus a normalized value in parentheses. + */ +export function parseCompareMetric(output: string): number { + const parenthesized = output.match(/\(([\d.eE+-]+)\)/); + if (parenthesized) return Number(parenthesized[1]); + const leading = output.trim().match(/^[\d.eE+-]+/); + if (leading) return Number(leading[0]); + throw new Error( + `could not read a metric from \`magick compare\` output: ${output.trim()}`, + ); +} + +/** + * `magick compare` exits 1 for normal image differences and 2+ for real errors. + */ +export function compareImages( + referencePng: string, + candidatePng: string, +): number { + const result = spawnSync( + "magick", + ["compare", "-metric", "RMSE", referencePng, candidatePng, "null:"], + { encoding: "utf8" }, + ); + if (result.error) throw result.error; + const output = `${result.stderr ?? ""}${result.stdout ?? ""}`; + if (result.status !== 0 && result.status !== 1) + throw new Error(`\`magick compare\` failed: ${output.trim()}`); + return parseCompareMetric(output); +} + +export function formatVisualDiff(diff: number | undefined): string { + return diff === undefined || Number.isNaN(diff) ? "n/a" : diff.toFixed(4); +} diff --git a/tools/corpus/visual-review.test.ts b/tools/corpus/visual-review.test.ts new file mode 100644 index 0000000..041a38f --- /dev/null +++ b/tools/corpus/visual-review.test.ts @@ -0,0 +1,261 @@ +import { describe, expect, test } from "bun:test"; +import { isAbsolute } from "node:path"; +import { + DEFAULT_OUT, + escapeHtml, + FACE_SLOTS, + fontAssetName, + formatCodepoint, + isFaceSlot, + OVERLAY_GLYPHS, + renderVisualReviewApp, + resolveOutPath, + type VisualReviewApp, +} from "./src/contact-sheet"; +import { + parseArgs, + parseCandidateFaceValue, + parseFaceValue, + parseLabeledValue, +} from "./visual-review"; + +describe("parseArgs", () => { + test("keeps --reference and --candidate as regular-face shorthands", () => { + const args = parseArgs([ + "--reference", + "ref.ttf", + "--candidate", + "Inter=/fonts/inter.ttf", + "--candidate", + "Roboto=/fonts/roboto.ttf", + ]); + expect(args.referenceFaces).toEqual({ regular: "ref.ttf" }); + expect(args.candidates).toEqual([ + { label: "Inter", faces: { regular: "/fonts/inter.ttf" } }, + { label: "Roboto", faces: { regular: "/fonts/roboto.ttf" } }, + ]); + }); + + test("collects four-face reference and candidate values", () => { + const args = parseArgs([ + "--reference-face", + "regular=/ref-r.ttf", + "--reference-face", + "bold=/ref-b.ttf", + "--reference-face", + "italic=/ref-i.ttf", + "--reference-face", + "boldItalic=/ref-bi.ttf", + "--candidate-face", + "A:regular=/a-r.ttf", + "--candidate-face", + "A:bold=/a-b.ttf", + "--candidate-face", + "A:italic=/a-i.ttf", + "--candidate-face", + "A:boldItalic=/a-bi.ttf", + ]); + expect(args.referenceFaces).toEqual({ + regular: "/ref-r.ttf", + bold: "/ref-b.ttf", + italic: "/ref-i.ttf", + boldItalic: "/ref-bi.ttf", + }); + expect(args.candidates).toEqual([ + { + label: "A", + faces: { + regular: "/a-r.ttf", + bold: "/a-b.ttf", + italic: "/a-i.ttf", + boldItalic: "/a-bi.ttf", + }, + }, + ]); + }); + + test("captures --family and --out", () => { + const args = parseArgs([ + "--reference", + "ref.ttf", + "--family", + "Verdana", + "--out", + "/tmp/review.html", + ]); + expect(args.family).toBe("Verdana"); + expect(args.out).toBe("/tmp/review.html"); + }); + + test("rejects duplicate faces", () => { + expect(() => + parseArgs(["--reference", "a.ttf", "--reference-face", "regular=b.ttf"]), + ).toThrow("duplicate reference face: regular"); + expect(() => + parseArgs([ + "--candidate", + "A=/a.ttf", + "--candidate-face", + "A:regular=/b.ttf", + ]), + ).toThrow('duplicate candidate "A" face: regular'); + }); + + test("rejects unknown arguments and missing values", () => { + expect(() => parseArgs(["--bogus"])).toThrow("unknown argument: --bogus"); + expect(() => parseArgs(["--reference"])).toThrow( + "--reference requires a value", + ); + }); +}); + +describe("labeled values", () => { + test("splits on the first equals and trims", () => { + expect(parseLabeledValue("--candidate", " Inter = /a=b.ttf ")).toEqual([ + "Inter", + "/a=b.ttf", + ]); + }); + + test("parses reference face values", () => { + expect(parseFaceValue("--reference-face", "bold=/b.ttf")).toEqual([ + "bold", + "/b.ttf", + ]); + }); + + test("parses candidate face values", () => { + expect( + parseCandidateFaceValue("--candidate-face", "A:bold=/a.ttf"), + ).toEqual(["A", "bold", "/a.ttf"]); + }); + + test("rejects invalid face names and shapes", () => { + expect(() => parseFaceValue("--reference-face", "black=/b.ttf")).toThrow( + "face must be one of", + ); + expect(() => + parseCandidateFaceValue("--candidate-face", "A=/a.ttf"), + ).toThrow('expects "Label:face=value"'); + }); +}); + +describe("output helpers", () => { + test("defaults to the ignored cache path", () => { + expect(resolveOutPath(undefined)).toBe(DEFAULT_OUT); + expect(DEFAULT_OUT.endsWith("/.cache/corpus-visual/review.html")).toBe( + true, + ); + }); + + test("uses an explicit .html path as the file", () => { + expect(resolveOutPath("/tmp/out/review.html")).toBe("/tmp/out/review.html"); + }); + + test("treats a non-html path as a directory", () => { + expect(resolveOutPath("/tmp/out")).toBe("/tmp/out/review.html"); + }); + + test("resolves a relative path to absolute", () => { + const resolved = resolveOutPath("out/review.html"); + expect(isAbsolute(resolved)).toBe(true); + expect(resolved.endsWith("/out/review.html")).toBe(true); + }); + + test("names copied font assets by prefix and face", () => { + expect(fontAssetName("c0", "boldItalic", "/fonts/A.ttf")).toBe( + "c0-boldItalic.ttf", + ); + expect(fontAssetName("ref", "regular", "/fonts/A")).toBe("ref-regular.ttf"); + }); +}); + +describe("face and glyph helpers", () => { + test("recognizes the four face slots", () => { + for (const slot of FACE_SLOTS) expect(isFaceSlot(slot)).toBe(true); + expect(isFaceSlot("black")).toBe(false); + }); + + test("formatCodepoint pads to four hex digits", () => { + expect(formatCodepoint(0x61)).toBe("U+0061"); + expect(formatCodepoint(0x26)).toBe("U+0026"); + }); + + test("the overlay glyph set is non-empty and unique", () => { + expect(OVERLAY_GLYPHS.length).toBeGreaterThan(0); + expect(new Set(OVERLAY_GLYPHS).size).toBe(OVERLAY_GLYPHS.length); + }); +}); + +describe("escapeHtml", () => { + test("escapes the five HTML-significant characters", () => { + expect(escapeHtml(`&`)).toBe( + "<a href="x" class='y'>&</a>", + ); + }); +}); + +function sampleApp(): VisualReviewApp { + return { + family: "Verdana", + reference: { + label: "Reference", + faces: { + regular: { + asset: "assets/fonts/ref-regular.ttf", + sourceName: "Reference.ttf", + }, + bold: { + asset: "assets/fonts/ref-bold.ttf", + sourceName: "Reference Bold.ttf", + }, + }, + }, + candidates: [ + { + label: "Inter ", + faces: { + regular: { + asset: "assets/fonts/c0-regular.ttf", + sourceName: "Inter.ttf", + }, + bold: { + asset: "assets/fonts/c0-bold.ttf", + sourceName: "Inter Bold.ttf", + }, + }, + }, + ], + }; +} + +describe("renderVisualReviewApp", () => { + test("produces a complete HTML document with font-face rules", () => { + const html = renderVisualReviewApp(sampleApp()); + expect(html.startsWith("")).toBe(true); + expect(html).toContain("Visual review: Verdana"); + expect(html).toContain('@font-face { font-family: "docfonts-ref-regular"'); + expect(html).toContain('src: url("assets/fonts/c0-regular.ttf")'); + expect(html.trimEnd().endsWith("")).toBe(true); + }); + + test("escapes visible labels and serializes safe JSON", () => { + const html = renderVisualReviewApp(sampleApp()); + expect(html).not.toContain(""); + expect(html).toContain('"label":"Inter \\u003cb>"'); + }); + + test("embeds the face selector and overlay app script", () => { + const html = renderVisualReviewApp(sampleApp()); + expect(html).toContain('id="face-toolbar"'); + expect(html).toContain("let selectedFace = firstAvailableFace"); + expect(html).toContain("Cyan is reference, magenta is candidate"); + }); + + test("falls back to a placeholder family", () => { + const app = sampleApp(); + app.family = undefined; + const html = renderVisualReviewApp(app); + expect(html).toContain("Visual review: (family not specified)"); + }); +}); diff --git a/tools/corpus/visual-review.ts b/tools/corpus/visual-review.ts new file mode 100644 index 0000000..f097bfe --- /dev/null +++ b/tools/corpus/visual-review.ts @@ -0,0 +1,250 @@ +import { + copyFileSync, + existsSync, + mkdirSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { basename, dirname, join } from "node:path"; +import { + FACE_SLOTS, + type FaceAssets, + type FaceSlot, + FONT_ASSETS_DIR, + type FontFaceAsset, + type FontSetView, + fontAssetName, + isFaceSlot, + renderVisualReviewApp, + resolveOutPath, + type VisualReviewApp, +} from "./src/contact-sheet"; + +export type FacePaths = Partial>; + +export interface VisualCandidate { + label: string; + faces: FacePaths; +} + +export interface ParsedArgs { + family?: string; + referenceFaces: FacePaths; + candidates: VisualCandidate[]; + out?: string; +} + +function readValue(flag: string, argv: string[], index: number): string { + const value = argv[index + 1]; + if (!value || value.startsWith("--")) + throw new Error(`${flag} requires a value`); + return value; +} + +export function parseLabeledValue(flag: string, raw: string): [string, string] { + const eq = raw.indexOf("="); + if (eq <= 0) throw new Error(`${flag} expects "Label=value", got "${raw}"`); + const label = raw.slice(0, eq).trim(); + const value = raw.slice(eq + 1).trim(); + if (!label || !value) + throw new Error(`${flag} expects "Label=value", got "${raw}"`); + return [label, value]; +} + +export function parseFaceValue(flag: string, raw: string): [FaceSlot, string] { + const [face, path] = parseLabeledValue(flag, raw); + if (!isFaceSlot(face)) + throw new Error(`${flag} face must be one of: ${FACE_SLOTS.join(", ")}`); + return [face, path]; +} + +export function parseCandidateFaceValue( + flag: string, + raw: string, +): [string, FaceSlot, string] { + const [left, path] = parseLabeledValue(flag, raw); + const sep = left.lastIndexOf(":"); + if (sep <= 0 || sep === left.length - 1) + throw new Error(`${flag} expects "Label:face=value", got "${raw}"`); + const label = left.slice(0, sep).trim(); + const face = left.slice(sep + 1).trim(); + if (!label) throw new Error(`${flag} expects a non-empty label`); + if (!isFaceSlot(face)) + throw new Error(`${flag} face must be one of: ${FACE_SLOTS.join(", ")}`); + return [label, face, path]; +} + +function setFace( + faces: FacePaths, + slot: FaceSlot, + path: string, + source: string, +): void { + if (faces[slot]) throw new Error(`duplicate ${source} face: ${slot}`); + faces[slot] = path; +} + +function candidateFor( + candidates: VisualCandidate[], + label: string, +): VisualCandidate { + const existing = candidates.find((candidate) => candidate.label === label); + if (existing) return existing; + const candidate = { label, faces: {} }; + candidates.push(candidate); + return candidate; +} + +export function parseArgs(argv: string[]): ParsedArgs { + const args: ParsedArgs = { referenceFaces: {}, candidates: [] }; + for (let i = 0; i < argv.length; i++) { + const flag = argv[i]; + switch (flag) { + case "--reference": + setFace( + args.referenceFaces, + "regular", + readValue(flag, argv, i), + "reference", + ); + i++; + break; + case "--reference-face": { + const [slot, path] = parseFaceValue(flag, readValue(flag, argv, i)); + setFace(args.referenceFaces, slot, path, "reference"); + i++; + break; + } + case "--family": + args.family = readValue(flag, argv, i); + i++; + break; + case "--candidate": { + const [label, path] = parseLabeledValue(flag, readValue(flag, argv, i)); + const candidate = candidateFor(args.candidates, label); + setFace(candidate.faces, "regular", path, `candidate "${label}"`); + i++; + break; + } + case "--candidate-face": { + const [label, slot, path] = parseCandidateFaceValue( + flag, + readValue(flag, argv, i), + ); + const candidate = candidateFor(args.candidates, label); + setFace(candidate.faces, slot, path, `candidate "${label}"`); + i++; + break; + } + case "--out": + args.out = readValue(flag, argv, i); + i++; + break; + default: + throw new Error(`unknown argument: ${flag}`); + } + } + return args; +} + +function assertFontPaths(args: ParsedArgs): void { + if (Object.keys(args.referenceFaces).length === 0) + throw new Error( + "missing --reference: pass a regular reference, or use --reference-face face=/path.", + ); + if (args.candidates.length === 0) + throw new Error( + 'missing --candidate: pass at least one "Label=/path/to/font.ttf".', + ); + for (const [slot, path] of Object.entries(args.referenceFaces)) { + if (!existsSync(path)) + throw new Error(`reference ${slot} font not found: ${path}`); + } + for (const candidate of args.candidates) { + for (const [slot, path] of Object.entries(candidate.faces)) { + if (!existsSync(path)) + throw new Error( + `candidate "${candidate.label}" ${slot} font not found: ${path}`, + ); + } + } +} + +function copyFace( + sourcePath: string, + prefix: string, + slot: FaceSlot, + fontsDir: string, +): FontFaceAsset { + const file = fontAssetName(prefix, slot, sourcePath); + const dest = join(fontsDir, file); + copyFileSync(sourcePath, dest); + return { + asset: `${FONT_ASSETS_DIR}/${file}`, + sourceName: basename(sourcePath), + }; +} + +function copyFaces( + faces: FacePaths, + prefix: string, + fontsDir: string, +): FaceAssets { + const out: FaceAssets = {}; + for (const slot of FACE_SLOTS) { + const sourcePath = faces[slot]; + if (!sourcePath) continue; + out[slot] = copyFace(sourcePath, prefix, slot, fontsDir); + } + return out; +} + +function fontSet( + label: string, + faces: FacePaths, + prefix: string, + fontsDir: string, +): FontSetView { + return { + label, + faces: copyFaces(faces, prefix, fontsDir), + }; +} + +export function buildVisualReviewApp( + args: ParsedArgs, + htmlPath: string, +): VisualReviewApp { + assertFontPaths(args); + const outDir = dirname(htmlPath); + const fontsDir = join(outDir, FONT_ASSETS_DIR); + rmSync(fontsDir, { recursive: true, force: true }); + mkdirSync(fontsDir, { recursive: true }); + + return { + family: args.family, + reference: fontSet("Reference", args.referenceFaces, "ref", fontsDir), + candidates: args.candidates.map((candidate, index) => + fontSet(candidate.label, candidate.faces, `c${index}`, fontsDir), + ), + }; +} + +function main(): void { + const args = parseArgs(process.argv.slice(2)); + const htmlPath = resolveOutPath(args.out); + const app = buildVisualReviewApp(args, htmlPath); + writeFileSync(htmlPath, renderVisualReviewApp(app)); + console.log( + `wrote visual review app for ${app.candidates.length} candidate(s) to ${htmlPath}`, + ); +} + +if (import.meta.main) { + try { + main(); + } catch (err) { + console.error(err instanceof Error ? err.message : err); + process.exit(1); + } +}