From e613c3fed9175b21ae45ad15a8f15fe8c2442186 Mon Sep 17 00:00:00 2001 From: adibarra <93070681+adibarra@users.noreply.github.com> Date: Tue, 31 Mar 2026 17:21:44 -0500 Subject: [PATCH 01/10] add publishDate frontmatter field for scheduled blog publishing --- packages/app/src/lib/blog.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/app/src/lib/blog.ts b/packages/app/src/lib/blog.ts index 8a846f6..4fb4c5b 100644 --- a/packages/app/src/lib/blog.ts +++ b/packages/app/src/lib/blog.ts @@ -8,6 +8,7 @@ export interface BlogFrontmatter { date: string; subtitle: string; modifiedDate?: string; + publishDate?: string; tags?: string[]; } @@ -50,7 +51,13 @@ export function getAllPosts(): BlogPostMeta[] { }; }); - return posts.sort((a, b) => new Date(b.date).getTime() - new Date(a.date).getTime()); + const now = new Date(); + const visible = + process.env.NODE_ENV === 'production' + ? posts.filter((p) => !p.publishDate || new Date(p.publishDate + 'T00:00:00Z') <= now) + : posts; + + return visible.sort((a, b) => new Date(b.date).getTime() - new Date(a.date).getTime()); } export interface AdjacentPosts { From 8558414279873a70dda111a62fbd40dae04d81bd Mon Sep 17 00:00:00 2001 From: adibarra <93070681+adibarra@users.noreply.github.com> Date: Tue, 31 Mar 2026 17:21:53 -0500 Subject: [PATCH 02/10] add JsonLd MDX component for structured data in blog posts --- packages/app/src/components/blog/mdx-components.tsx | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/app/src/components/blog/mdx-components.tsx b/packages/app/src/components/blog/mdx-components.tsx index da4b1ef..b9e4d8d 100644 --- a/packages/app/src/components/blog/mdx-components.tsx +++ b/packages/app/src/components/blog/mdx-components.tsx @@ -118,5 +118,15 @@ export function createMdxComponents(): Record> ); }, Blur, + JsonLd: (props: { children?: ReactNode }) => { + const raw = childrenToText(props.children).trim(); + if (!raw) return null; + try { + JSON.parse(raw); + } catch { + return null; + } + return ; + }, }; } From 4e4441d8494265a6119cdea0232c2f8a3a4aa85c Mon Sep 17 00:00:00 2001 From: adibarra <93070681+adibarra@users.noreply.github.com> Date: Tue, 31 Mar 2026 23:48:05 -0500 Subject: [PATCH 03/10] add scrollable table wrapper for blog prose --- packages/app/src/app/globals.css | 10 ++++++++++ packages/app/src/components/blog/mdx-components.tsx | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/packages/app/src/app/globals.css b/packages/app/src/app/globals.css index 27c29fe..32aa678 100644 --- a/packages/app/src/app/globals.css +++ b/packages/app/src/app/globals.css @@ -17,6 +17,16 @@ word-break: break-all; } +/* Allow wide tables to scroll horizontally on narrow viewports */ +.blog-prose .table-scroll { + overflow-x: auto; + -webkit-overflow-scrolling: touch; +} + +.blog-prose .table-scroll table { + min-width: 100%; +} + /* Remove auto-inserted curly quotes from blockquotes in blog prose */ .blog-prose blockquote p:first-of-type::before, .blog-prose blockquote p:last-of-type::after { diff --git a/packages/app/src/components/blog/mdx-components.tsx b/packages/app/src/components/blog/mdx-components.tsx index b9e4d8d..ca4151d 100644 --- a/packages/app/src/components/blog/mdx-components.tsx +++ b/packages/app/src/components/blog/mdx-components.tsx @@ -117,6 +117,11 @@ export function createMdxComponents(): Record> ); }, + table: (props: React.TableHTMLAttributes) => ( +
+ + + ), Blur, JsonLd: (props: { children?: ReactNode }) => { const raw = childrenToText(props.children).trim(); From c50904ba71c7ddf177be79fdc8f7f650741f8c50 Mon Sep 17 00:00:00 2001 From: adibarra <93070681+adibarra@users.noreply.github.com> Date: Wed, 1 Apr 2026 11:08:44 -0500 Subject: [PATCH 04/10] add SEO article generation infrastructure --- .github/workflows/seo-articles.yml | 100 ++++++++++ .gitignore | 1 + package.json | 3 +- packages/app/package.json | 3 +- packages/app/scripts/generate-seo-articles.ts | 149 ++++++++++++++ packages/app/scripts/seo/article-template.md | 187 ++++++++++++++++++ packages/app/scripts/seo/data.ts | 109 ++++++++++ packages/app/scripts/seo/types.ts | 36 ++++ 8 files changed, 586 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/seo-articles.yml create mode 100644 packages/app/scripts/generate-seo-articles.ts create mode 100644 packages/app/scripts/seo/article-template.md create mode 100644 packages/app/scripts/seo/data.ts create mode 100644 packages/app/scripts/seo/types.ts diff --git a/.github/workflows/seo-articles.yml b/.github/workflows/seo-articles.yml new file mode 100644 index 0000000..f979710 --- /dev/null +++ b/.github/workflows/seo-articles.yml @@ -0,0 +1,100 @@ +name: SEO Articles + +on: + schedule: + # Every Monday at 08:00 UTC + - cron: '0 8 * * 1' + workflow_dispatch: + +jobs: + generate: + timeout-minutes: 30 + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 1 + token: ${{ secrets.PAT }} + + - uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v4.4.0 + + - uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + with: + node-version: '24' + cache: pnpm + + - name: Install dependencies + run: pnpm install --filter @semianalysisai/inferencex-app... + env: + CYPRESS_INSTALL_BINARY: '0' + + - name: Fetch benchmark data + run: pnpm admin:seo:data + + - name: Generate articles with Claude + uses: anthropics/claude-code-action@88c168b39e7e64da0286d812b6e9fbebb6708185 # v1.0.82 + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + github_token: ${{ secrets.PAT }} + direct_prompt: | + You are generating SEO blog articles for InferenceX from benchmark data. + + ## Instructions + 1. Read the article template at `packages/app/scripts/seo/article-template.md` + 2. Read the benchmark data at `packages/app/tmp/seo-data.json` + 3. For each model in the data, create or update an MDX file at + `packages/app/content/blog/best-gpu-for--inference.mdx` + 4. Create/update the rollup article at + `packages/app/content/blog/inference-benchmark-roundup.mdx` + + Follow the template structure EXACTLY — same sections, same order, same table format. + Write natural, varied prose for each model (not copy-paste between articles). + + ## Critical Rules + - ALL numbers MUST come from the data file. Never invent or estimate numbers. + - TTFT/TPOT/E2EL values in the data are in SECONDS — multiply by 1000 for display in ms. + - Use gpuDisplayName from the data (e.g. "NVIDIA B200", "AMD MI 355X"). + - Preserve the `date` frontmatter from existing files (check if file exists first). + - The runner-up in Key Findings must be a DIFFERENT GPU than the winner. + - Skip the 1k/8k sequence entirely — it is deprecated. + - Format large numbers with commas (e.g. 18,131.6). + allowed_tools: 'Read,Write,Edit,Glob,Grep,Bash' + claude_args: '--model claude-sonnet-4-5-20250929' + + - name: Check for changes + id: changes + run: | + if git diff --quiet -- 'packages/app/content/blog/'; then + echo "changed=false" >> "$GITHUB_OUTPUT" + else + echo "changed=true" >> "$GITHUB_OUTPUT" + fi + + - name: Create PR + if: steps.changes.outputs.changed == 'true' + env: + GH_TOKEN: ${{ secrets.PAT }} + run: | + DATE=$(date +%Y-%m-%d) + BRANCH="seo/update-articles-${DATE}" + git checkout -b "$BRANCH" + git add 'packages/app/content/blog/' + git commit -m "update SEO benchmark articles (${DATE})" + git push origin "$BRANCH" + gh pr create \ + --title "update SEO benchmark articles (${DATE})" \ + --body "$(cat <<'EOF' + ## Summary + - Auto-generated SEO blog articles from latest benchmark data + - Articles written by Claude using real data from `seo-data.json` + + ## Review checklist + - [ ] Spot-check numbers against live dashboard + - [ ] Verify JSON-LD in page source + - [ ] Read prose for quality and accuracy + EOF + )" \ + --base master diff --git a/.gitignore b/.gitignore index b9f92a6..e86e3d4 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,7 @@ **/inferencex-backup-*.dump # local data +**/tmp **/gcs **/logs **/public/data/* diff --git a/package.json b/package.json index cfab976..f70eb1e 100644 --- a/package.json +++ b/package.json @@ -38,7 +38,8 @@ "admin:db:migrate": "pnpm --filter *db db:migrate", "admin:db:apply-overrides": "pnpm --filter *db db:apply-overrides", "admin:db:reset": "pnpm --filter *db db:reset", - "admin:db:verify": "pnpm --filter *db db:verify" + "admin:db:verify": "pnpm --filter *db db:verify", + "admin:seo:data": "pnpm --filter *app seo:data" }, "devDependencies": { "audit-ci": "^7.1.0", diff --git a/packages/app/package.json b/packages/app/package.json index 722ab67..1d95354 100644 --- a/packages/app/package.json +++ b/packages/app/package.json @@ -21,7 +21,8 @@ "clean": "rimraf .next out", "clean:all": "rimraf .next out cypress/videos cypress/screenshots coverage", "cache:invalidate": "dotenv -e ../../.env -- tsx scripts/invalidate-cache.ts", - "cache:warmup": "dotenv -e ../../.env -- tsx scripts/warmup-cache.ts" + "cache:warmup": "dotenv -e ../../.env -- tsx scripts/warmup-cache.ts", + "seo:data": "tsx scripts/generate-seo-articles.ts" }, "dependencies": { "@jpinsonneau/html-to-image": "^1.11.13", diff --git a/packages/app/scripts/generate-seo-articles.ts b/packages/app/scripts/generate-seo-articles.ts new file mode 100644 index 0000000..7ae8390 --- /dev/null +++ b/packages/app/scripts/generate-seo-articles.ts @@ -0,0 +1,149 @@ +export {}; + +/** + * Fetch and aggregate benchmark data for SEO article generation. + * Outputs a JSON file that Claude uses to write natural-language articles. + * + * Usage: + * pnpm admin:seo:data # production API + * pnpm admin:seo:data --base-url http://localhost:3000 # local dev + * pnpm admin:seo:data --output /tmp/seo-data.json # custom output path + */ + +import fs from 'node:fs'; +import path from 'node:path'; + +import { GPU_VENDORS } from '@semianalysisai/inferencex-constants'; + +import { aggregateModelData, allModels, distinctGpus, fetchBenchmarks } from './seo/data'; +import type { BestConfig } from './seo/types'; + +const PRIMARY_SEQ = '8k/1k'; +const MIN_GPUS = 2; + +// --------------------------------------------------------------------------- +// CLI arg parsing +// --------------------------------------------------------------------------- + +function parseArgs(): { baseUrl: string; output: string } { + const args = process.argv.slice(2); + let baseUrl = 'https://inferencex.semianalysis.com'; + let output = path.join(process.cwd(), 'tmp', 'seo-data.json'); + + for (let i = 0; i < args.length; i++) { + if (args[i] === '--base-url' && args[i + 1]) { + baseUrl = args[++i]; + } else if (args[i] === '--output' && args[i + 1]) { + output = args[++i]; + } + } + + return { baseUrl: baseUrl.replace(/\/$/, ''), output }; +} + +// --------------------------------------------------------------------------- +// Serializable output types +// --------------------------------------------------------------------------- + +interface SerializableBestConfig extends BestConfig { + gpuDisplayName: string; + vendor: string; +} + +interface SerializableModelData { + modelKey: string; + displayName: string; + slug: string; + totalRows: number; + sequences: Record; + primarySequence: string; + gpuCount: number; + precisionCount: number; + frameworkCount: number; +} + +interface SeoDataOutput { + generatedAt: string; + baseUrl: string; + primarySequence: string; + models: SerializableModelData[]; +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +function gpuDisplay(hw: string): string { + const vendor = GPU_VENDORS[hw]; + const upper = hw.toUpperCase().replace('MI', 'MI '); + return vendor ? `${vendor} ${upper}` : upper; +} + +function enrichConfig(c: BestConfig): SerializableBestConfig { + return { + ...c, + gpuDisplayName: gpuDisplay(c.hardware), + vendor: GPU_VENDORS[c.hardware] ?? 'Unknown', + }; +} + +async function main() { + const { baseUrl, output } = parseArgs(); + console.log(`Fetching benchmark data from: ${baseUrl}`); + + const models = allModels(); + const result: SeoDataOutput = { + generatedAt: new Date().toISOString(), + baseUrl, + primarySequence: PRIMARY_SEQ, + models: [], + }; + + for (const [modelKey, displayName] of models) { + console.log(` Fetching: ${displayName} (${modelKey})`); + + const rows = await fetchBenchmarks(baseUrl, displayName); + if (rows.length === 0) { + console.log(' Skipped: no benchmark data'); + continue; + } + + const data = aggregateModelData(modelKey, displayName, rows); + const gpus = distinctGpus(data, PRIMARY_SEQ); + + if (gpus.size < MIN_GPUS) { + console.log(` Skipped: only ${gpus.size} GPU(s) at ${PRIMARY_SEQ} (need ${MIN_GPUS}+)`); + continue; + } + + const primaryConfigs = data.bestBySequence.get(PRIMARY_SEQ) ?? []; + + // Convert Map to plain object for JSON serialization + const sequences: Record = {}; + for (const [seq, configs] of data.bestBySequence) { + sequences[seq] = configs.map(enrichConfig); + } + + result.models.push({ + modelKey, + displayName, + slug: `best-gpu-for-${modelKey}-inference`, + totalRows: rows.length, + sequences, + primarySequence: PRIMARY_SEQ, + gpuCount: new Set(primaryConfigs.map((c) => c.hardware)).size, + precisionCount: new Set(primaryConfigs.map((c) => c.precision)).size, + frameworkCount: new Set(primaryConfigs.map((c) => c.framework)).size, + }); + } + + // Write output + fs.mkdirSync(path.dirname(output), { recursive: true }); + fs.writeFileSync(output, JSON.stringify(result, null, 2), 'utf-8'); + console.log(`\nWrote ${result.models.length} models to: ${output}`); +} + +main().catch((err) => { + console.error('generate-seo-data failed:', err); + process.exitCode = 1; +}); diff --git a/packages/app/scripts/seo/article-template.md b/packages/app/scripts/seo/article-template.md new file mode 100644 index 0000000..660298d --- /dev/null +++ b/packages/app/scripts/seo/article-template.md @@ -0,0 +1,187 @@ +# SEO Article Template + +Claude uses this template to generate per-model benchmark articles. +Every article MUST follow this section order. Claude's job is to fill in the data +and write natural prose that reads like a SemiAnalysis engineer wrote it — not a content farm. + +**Key principle:** Adapt depth and tone to the data. A model with 9 GPUs, 6 frameworks, +and disaggregated data should produce a rich, detailed article. A model with 3 GPUs and +1 framework should be shorter and honest about limited coverage. + +--- + +## Frontmatter + +```yaml +--- +title: 'Best GPU for Inference — Benchmarks & Comparison ()' +subtitle: '' +date: '' +modifiedDate: '' +tags: + - benchmark + - inference + - gpu + - +--- +``` + +**Subtitle examples (good):** + +- "GB300 takes the crown, but AMD's MI 355X closes the gap on this 671B MoE beast" +- "A dense 70B model where framework choice matters as much as GPU generation" +- "Limited data, clear winner — B200 dominates with only 3 GPUs tested so far" + +**Subtitle examples (bad — too formulaic):** + +- "We benchmarked X across Y GPUs, Z precisions, and N frameworks" +- "Comprehensive benchmarks across N GPUs with real throughput data" + +## Section 1: Intro (1 paragraph) + +2-3 sentences. Describe the model (size, architecture if known). Explain what makes +GPU selection interesting _for this specific model_ — don't just repeat generic "GPU choice matters." +Link to the dashboard: `[InferenceX](https://inferencex.semianalysis.com)` + +**Cross-link:** If there are related models (same vendor, similar size, or interesting to compare), +mention 1-2 with links: `[see our DeepSeek-R1 benchmarks](/blog/best-gpu-for-dsr1-inference)` + +## Section 2: Key Findings (h2) + +3-5 bullet points. Rules: + +- First bullet: the overall winner GPU, with bold throughput number +- Second bullet: runner-up — MUST be a different GPU than the winner +- Remaining bullets: highlight what's INTERESTING about this model's data specifically. + Don't just list numbers — surface insights (e.g. "AMD closes the gap to within 10%", + "FP4 provides a 2x uplift", "disagg actually hurts on this model", "the H100 holds up + surprisingly well", "only 1 framework was tested so results may improve") +- Every bullet must contain a concrete number from the data + +## Section 3: GPU Comparison Table (h2) + +Title: `## GPU Comparison — at 8k/1k` + +Markdown table with ONE row per GPU (best config for that GPU), sorted by throughput descending. + +| GPU | Precision | Framework | Throughput/GPU (tok/s) | Median TTFT (ms) | Median TPOT (ms) | Concurrency | Date | +| --- | --------- | --------- | ---------------------: | ---------------: | ---------------: | ----------: | ---- | + +- Use gpuDisplayName (e.g. "NVIDIA B200", "AMD MI 355X") +- Precision in UPPERCASE (FP4, FP8, BF16, INT4) +- TTFT and TPOT: convert from seconds to milliseconds (multiply by 1000) +- Format large numbers with commas (e.g. 18,131.6) + +Footer: `*One row per GPU showing the highest-throughput configuration. All data from automated [InferenceX](https://inferencex.semianalysis.com) benchmarks.*` + +## Section 4: FAQ Sections (h2 each) + +Each FAQ is an h2 heading with 1 paragraph answer. Cover these topics IN ORDER, +skipping any that don't apply to this model's data: + +### 4a: Best GPU (always include) + +Write a natural heading — doesn't have to be "What is the best GPU for X?" +Could be "Which GPU should you pick for X?" or "The fastest GPU for X" etc. +Mention the winner, a practical alternative, and AMD option if available. + +### 4b: Best Precision (include if >1 precision in data) + +Heading should name the specific precisions being compared (e.g. "FP4 vs FP8", "FP8 vs BF16"). +Compare throughput at same GPU where possible. State the winner clearly. + +### 4c: Best Framework (include if >1 framework in data) + +Compare frameworks, noting which GPU each excels on. + +### 4d: Disaggregated Prefill (include if both disagg=true and disagg=false exist) + +Compare best disagg vs best non-disagg result with percentage difference. + +### 4e: GPU Head-to-Head (always include) + +Top 2 GPUs by throughput. Compare throughput, TTFT, TPOT. Note tradeoffs. +Phrase the heading naturally for the specific matchup. + +### 4f: Lowest TTFT (always include) + +Find the config with the ACTUAL lowest medianTtft across ALL configs at 8k/1k +(not just best-per-GPU rows). Note the throughput tradeoff. + +**IMPORTANT:** Write each answer differently. Vary sentence length, structure, and opening. +Don't start every answer with "The NVIDIA B200..." — mix it up with context, tradeoffs, +or the surprising finding first. + +## Section 5: Additional Sequence Table (h2) + +`## Additional Results — 1k/1k Sequence Length` + +Same table format as Section 3, one row per GPU, sorted by throughput descending. +Skip this section if no 1k/1k data exists. +NEVER include 1k/8k data. + +## Section 6: Cross-Links (h2) + +`## Related Benchmarks` + +Link to 2-3 other model articles that readers might find useful. +Group by relevance: similar model size, same vendor, or commonly compared. +Example: "If you're evaluating MoE models, see our [DeepSeek-R1 benchmarks](/blog/best-gpu-for-dsr1-inference) and [Qwen-3.5 results](/blog/best-gpu-for-qwen3.5-inference)." + +Use these slugs for cross-links: + +- `/blog/best-gpu-for-dsr1-inference` (DeepSeek-R1-0528) +- `/blog/best-gpu-for-gptoss120b-inference` (gpt-oss-120b) +- `/blog/best-gpu-for-llama70b-inference` (Llama-3.3-70B) +- `/blog/best-gpu-for-qwen3.5-inference` (Qwen-3.5-397B) +- `/blog/best-gpu-for-kimik2.5-inference` (Kimi-K2.5) +- `/blog/best-gpu-for-minimaxm2.5-inference` (MiniMax-M2.5) +- `/blog/best-gpu-for-glm5-inference` (GLM-5) + +## Section 7: ClusterMax CTA (h2) + +`## Where to Run Inference` + +One line: `Looking for API providers? See real-time provider rankings on [ClusterMax](https://www.clustermax.ai/).` + +## Section 8: Methodology (h2) + +`## Methodology` + +2-3 sentences about automated nightly benchmarking. Link to dashboard. +End with: `*Last updated: .*` + +Do NOT say "automatically generated" — just give the date. + +## Section 9: JsonLd (no heading) + +```mdx +{``} +``` + +The JSON-LD must include every FAQ from Section 4 as a Question/Answer pair. + +--- + +## Rollup Article Template + +Slug: `inference-benchmark-roundup` + +### Frontmatter + +```yaml +title: 'ML Inference Benchmark Roundup — GPU Comparison ()' +subtitle: '' +date: '' +modifiedDate: '' +tags: [benchmark, inference, gpu, roundup] +``` + +### Sections (in order): + +1. **Overview** (h2) — 1 paragraph explaining this is a cross-model summary, link to dashboard +2. **Best GPU Per Model** (h2) — markdown table: Model (linked to article), Best GPU, Precision, Throughput/GPU +3. **Per-Model Details** (h2) — one h3 per model with best result, interesting insight, and link to full article +4. **ClusterMax CTA** (h2) +5. **Methodology** (h2) — end with `*Last updated: .*` +6. **JsonLd** — Article schema (not FAQPage) diff --git a/packages/app/scripts/seo/data.ts b/packages/app/scripts/seo/data.ts new file mode 100644 index 0000000..d70b807 --- /dev/null +++ b/packages/app/scripts/seo/data.ts @@ -0,0 +1,109 @@ +import { DB_MODEL_TO_DISPLAY, GPU_VENDORS } from '@semianalysisai/inferencex-constants'; + +import type { BenchmarkRow } from '../../src/lib/api'; +import type { BestConfig, ModelData } from './types'; + +/** Human-readable GPU name (e.g. "NVIDIA B200"). */ +export function gpuDisplayName(hw: string): string { + const vendor = GPU_VENDORS[hw]; + const upper = hw.toUpperCase().replace('MI', 'MI '); + return vendor ? `${vendor} ${upper}` : upper; +} + +/** Human-friendly model slug for article filenames. */ +export function modelSlug(displayName: string): string { + return displayName + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, ''); +} + +/** Sequence key from ISL/OSL. */ +function seqKey(isl: number, osl: number): string { + const fmt = (n: number) => (n >= 1024 ? `${Math.round(n / 1024)}k` : String(n)); + return `${fmt(isl)}/${fmt(osl)}`; +} + +/** Fetch benchmark data for a model from the API. */ +export async function fetchBenchmarks( + baseUrl: string, + displayName: string, +): Promise { + const url = `${baseUrl}/api/v1/benchmarks?model=${encodeURIComponent(displayName)}`; + const res = await fetch(url); + if (!res.ok) { + console.warn(` Failed to fetch ${displayName}: ${res.status} ${res.statusText}`); + return []; + } + return (await res.json()) as BenchmarkRow[]; +} + +/** + * Group benchmark rows by sequence length and find the best config + * (highest tput_per_gpu) for each GPU+precision+framework combo. + */ +export function aggregateModelData( + modelKey: string, + displayName: string, + rows: BenchmarkRow[], +): ModelData { + const bySeq = new Map(); + + for (const row of rows) { + const key = seqKey(row.isl, row.osl); + if (key === '1k/8k') continue; // deprecated sequence, skip + const arr = bySeq.get(key); + if (arr) { + arr.push(row); + } else { + bySeq.set(key, [row]); + } + } + + const bestBySequence = new Map(); + + for (const [seq, seqRows] of bySeq) { + // Group by hardware+precision+framework+disagg, keep best tput_per_gpu + const configMap = new Map(); + + for (const row of seqRows) { + const tput = row.metrics.tput_per_gpu ?? 0; + if (tput <= 0) continue; + + const configKey = `${row.hardware}|${row.precision}|${row.framework}|${row.disagg}`; + const existing = configMap.get(configKey); + + if (!existing || tput > existing.tputPerGpu) { + configMap.set(configKey, { + hardware: row.hardware, + precision: row.precision, + framework: row.framework, + disagg: row.disagg, + tputPerGpu: tput, + medianTtft: row.metrics.median_ttft ?? 0, + medianTpot: row.metrics.median_tpot ?? 0, + medianE2el: row.metrics.median_e2el ?? 0, + conc: row.conc, + tp: row.disagg ? row.num_prefill_gpu + row.num_decode_gpu : row.decode_tp, + date: row.date, + }); + } + } + + const configs = [...configMap.values()].sort((a, b) => b.tputPerGpu - a.tputPerGpu); + bestBySequence.set(seq, configs); + } + + return { modelKey, displayName, rows, bestBySequence }; +} + +/** Get the distinct GPU keys that have data for a model at the primary sequence. */ +export function distinctGpus(data: ModelData, primarySeq: string): Set { + const configs = data.bestBySequence.get(primarySeq) ?? []; + return new Set(configs.map((c) => c.hardware)); +} + +/** All available model entries as [dbKey, displayName] pairs. */ +export function allModels(): [string, string][] { + return Object.entries(DB_MODEL_TO_DISPLAY); +} diff --git a/packages/app/scripts/seo/types.ts b/packages/app/scripts/seo/types.ts new file mode 100644 index 0000000..7792dd5 --- /dev/null +++ b/packages/app/scripts/seo/types.ts @@ -0,0 +1,36 @@ +import type { BenchmarkRow } from '../../src/lib/api'; + +/** Aggregated best result for a GPU+precision+framework combination. */ +export interface BestConfig { + hardware: string; + precision: string; + framework: string; + disagg: boolean; + tputPerGpu: number; + medianTtft: number; + medianTpot: number; + medianE2el: number; + conc: number; + tp: number; + date: string; +} + +/** Per-model data grouped by sequence length. */ +export interface ModelData { + modelKey: string; + displayName: string; + rows: BenchmarkRow[]; + bestBySequence: Map; +} + +/** FAQ question/answer pair for JSON-LD. */ +export interface FaqEntry { + question: string; + answer: string; +} + +/** CLI options for the generation script. */ +export interface GenerateOptions { + baseUrl: string; + dryRun: boolean; +} From bcb6779443d8e9d4243eb8f4a9998909da951bd4 Mon Sep 17 00:00:00 2001 From: adibarra <93070681+adibarra@users.noreply.github.com> Date: Wed, 1 Apr 2026 11:16:57 -0500 Subject: [PATCH 05/10] add tests for publishDate filtering and SEO data aggregation --- packages/app/scripts/seo/data.test.ts | 357 ++++++++++++++++++++++++++ packages/app/src/lib/blog.test.ts | 164 +++++++++++- packages/app/vitest.config.ts | 2 +- 3 files changed, 521 insertions(+), 2 deletions(-) create mode 100644 packages/app/scripts/seo/data.test.ts diff --git a/packages/app/scripts/seo/data.test.ts b/packages/app/scripts/seo/data.test.ts new file mode 100644 index 0000000..86ef5aa --- /dev/null +++ b/packages/app/scripts/seo/data.test.ts @@ -0,0 +1,357 @@ +import { describe, it, expect } from 'vitest'; + +import type { BenchmarkRow } from '../../src/lib/api'; +import type { BestConfig } from './types'; +import { gpuDisplayName, modelSlug, aggregateModelData, distinctGpus, allModels } from './data'; + +// --------------------------------------------------------------------------- +// helpers +// --------------------------------------------------------------------------- + +/** Minimal BenchmarkRow factory — only the fields aggregateModelData reads. */ +function makeRow(overrides: Partial = {}): BenchmarkRow { + return { + hardware: 'h100', + framework: 'vllm', + model: 'llama70b', + precision: 'fp8', + spec_method: 'fixed', + disagg: false, + is_multinode: false, + prefill_tp: 1, + prefill_ep: 1, + prefill_dp_attention: false, + prefill_num_workers: 1, + decode_tp: 4, + decode_ep: 1, + decode_dp_attention: false, + decode_num_workers: 1, + num_prefill_gpu: 0, + num_decode_gpu: 0, + isl: 1024, + osl: 1024, + conc: 64, + image: null, + metrics: { + tput_per_gpu: 500, + median_ttft: 100, + median_tpot: 10, + median_e2el: 2000, + }, + date: '2026-03-01', + run_url: null, + ...overrides, + }; +} + +// =========================================================================== +// gpuDisplayName +// =========================================================================== +describe('gpuDisplayName', () => { + it('prepends NVIDIA vendor for known nvidia key', () => { + expect(gpuDisplayName('h100')).toBe('NVIDIA H100'); + }); + + it('prepends NVIDIA vendor for b200', () => { + expect(gpuDisplayName('b200')).toBe('NVIDIA B200'); + }); + + it('prepends AMD vendor for mi300x', () => { + expect(gpuDisplayName('mi300x')).toBe('AMD MI 300X'); + }); + + it('inserts space after MI for AMD keys', () => { + expect(gpuDisplayName('mi325x')).toBe('AMD MI 325X'); + }); + + it('uppercases unknown key with no vendor prefix', () => { + expect(gpuDisplayName('tpu-v5')).toBe('TPU-V5'); + }); + + it('handles gb200 (multi-letter prefix)', () => { + expect(gpuDisplayName('gb200')).toBe('NVIDIA GB200'); + }); + + it('handles mi355x with space insertion', () => { + expect(gpuDisplayName('mi355x')).toBe('AMD MI 355X'); + }); +}); + +// =========================================================================== +// modelSlug +// =========================================================================== +describe('modelSlug', () => { + it('lowercases and replaces spaces with hyphens', () => { + expect(modelSlug('Llama 3 70B')).toBe('llama-3-70b'); + }); + + it('replaces dots and special chars with hyphens', () => { + expect(modelSlug('Qwen-3.5-397B-A17B')).toBe('qwen-3-5-397b-a17b'); + }); + + it('collapses multiple non-alphanumeric chars into one hyphen', () => { + expect(modelSlug('foo---bar')).toBe('foo-bar'); + }); + + it('strips leading and trailing hyphens', () => { + expect(modelSlug('--hello--')).toBe('hello'); + }); + + it('returns empty string for all-special-char input', () => { + expect(modelSlug('...')).toBe(''); + }); + + it('handles FP8 suffix correctly', () => { + expect(modelSlug('Llama-3.3-70B-Instruct-FP8')).toBe('llama-3-3-70b-instruct-fp8'); + }); + + it('handles parentheses in display name', () => { + expect(modelSlug('GPT-4 (oss)')).toBe('gpt-4-oss'); + }); +}); + +// =========================================================================== +// aggregateModelData +// =========================================================================== +describe('aggregateModelData', () => { + it('returns empty bestBySequence for empty rows', () => { + const result = aggregateModelData('llama70b', 'Llama-3.3-70B', []); + expect(result.modelKey).toBe('llama70b'); + expect(result.displayName).toBe('Llama-3.3-70B'); + expect(result.rows).toHaveLength(0); + expect(result.bestBySequence.size).toBe(0); + }); + + it('groups rows by sequence key', () => { + const rows = [makeRow({ isl: 1024, osl: 1024 }), makeRow({ isl: 8192, osl: 1024 })]; + const result = aggregateModelData('m', 'M', rows); + expect(result.bestBySequence.has('1k/1k')).toBe(true); + expect(result.bestBySequence.has('8k/1k')).toBe(true); + }); + + it('formats sub-1024 ISL/OSL as plain numbers', () => { + const rows = [makeRow({ isl: 128, osl: 512 })]; + const result = aggregateModelData('m', 'M', rows); + expect(result.bestBySequence.has('128/512')).toBe(true); + }); + + it('skips 1k/8k sequence', () => { + const rows = [makeRow({ isl: 1024, osl: 8192 })]; + const result = aggregateModelData('m', 'M', rows); + expect(result.bestBySequence.has('1k/8k')).toBe(false); + expect(result.bestBySequence.size).toBe(0); + }); + + it('keeps best tput_per_gpu per config key', () => { + const rows = [ + makeRow({ + hardware: 'h100', + precision: 'fp8', + framework: 'vllm', + disagg: false, + conc: 32, + metrics: { tput_per_gpu: 300, median_ttft: 80, median_tpot: 12, median_e2el: 1500 }, + }), + makeRow({ + hardware: 'h100', + precision: 'fp8', + framework: 'vllm', + disagg: false, + conc: 64, + metrics: { tput_per_gpu: 700, median_ttft: 120, median_tpot: 8, median_e2el: 2500 }, + }), + ]; + const result = aggregateModelData('m', 'M', rows); + const configs = result.bestBySequence.get('1k/1k')!; + expect(configs).toHaveLength(1); + expect(configs[0].tputPerGpu).toBe(700); + expect(configs[0].conc).toBe(64); + }); + + it('keeps separate entries for different config keys', () => { + const rows = [ + makeRow({ hardware: 'h100', framework: 'vllm', precision: 'fp8', disagg: false }), + makeRow({ hardware: 'b200', framework: 'sglang', precision: 'bf16', disagg: false }), + ]; + const result = aggregateModelData('m', 'M', rows); + const configs = result.bestBySequence.get('1k/1k')!; + expect(configs).toHaveLength(2); + }); + + it('treats disagg=true and disagg=false as separate configs for the same hardware', () => { + const rows = [ + makeRow({ + hardware: 'h100', + precision: 'fp8', + framework: 'vllm', + disagg: false, + metrics: { tput_per_gpu: 500 }, + }), + makeRow({ + hardware: 'h100', + precision: 'fp8', + framework: 'vllm', + disagg: true, + num_prefill_gpu: 2, + num_decode_gpu: 6, + metrics: { tput_per_gpu: 600 }, + }), + ]; + const result = aggregateModelData('m', 'M', rows); + const configs = result.bestBySequence.get('1k/1k')!; + expect(configs).toHaveLength(2); + }); + + it('sorts configs descending by tputPerGpu', () => { + const rows = [ + makeRow({ + hardware: 'h100', + framework: 'vllm', + metrics: { tput_per_gpu: 200 }, + }), + makeRow({ + hardware: 'b200', + framework: 'sglang', + metrics: { tput_per_gpu: 900 }, + }), + makeRow({ + hardware: 'mi300x', + framework: 'vllm', + metrics: { tput_per_gpu: 450 }, + }), + ]; + const result = aggregateModelData('m', 'M', rows); + const configs = result.bestBySequence.get('1k/1k')!; + expect(configs.map((c) => c.tputPerGpu)).toEqual([900, 450, 200]); + }); + + it('skips rows with tput_per_gpu <= 0', () => { + const rows = [ + makeRow({ metrics: { tput_per_gpu: 0 } }), + makeRow({ metrics: { tput_per_gpu: -1 } }), + makeRow({ metrics: {} }), + ]; + const result = aggregateModelData('m', 'M', rows); + // The sequence key still exists but no configs survive the tput > 0 filter + const configs = result.bestBySequence.get('1k/1k')!; + expect(configs).toHaveLength(0); + }); + + it('uses decode_tp for non-disagg rows', () => { + const rows = [makeRow({ disagg: false, decode_tp: 8 })]; + const result = aggregateModelData('m', 'M', rows); + const config = result.bestBySequence.get('1k/1k')![0]; + expect(config.tp).toBe(8); + }); + + it('sums prefill + decode GPUs for disagg rows', () => { + const rows = [ + makeRow({ + disagg: true, + num_prefill_gpu: 2, + num_decode_gpu: 6, + metrics: { tput_per_gpu: 500 }, + }), + ]; + const result = aggregateModelData('m', 'M', rows); + const config = result.bestBySequence.get('1k/1k')![0]; + expect(config.tp).toBe(8); + }); + + it('maps all metric fields onto BestConfig correctly', () => { + const rows = [ + makeRow({ + metrics: { + tput_per_gpu: 999, + median_ttft: 42, + median_tpot: 7, + median_e2el: 3000, + }, + conc: 128, + date: '2026-02-15', + }), + ]; + const result = aggregateModelData('m', 'M', rows); + const config = result.bestBySequence.get('1k/1k')![0]; + expect(config).toMatchObject>({ + tputPerGpu: 999, + medianTtft: 42, + medianTpot: 7, + medianE2el: 3000, + conc: 128, + date: '2026-02-15', + }); + }); + + it('defaults missing metric values to 0', () => { + const rows = [makeRow({ metrics: { tput_per_gpu: 100 } })]; + const result = aggregateModelData('m', 'M', rows); + const config = result.bestBySequence.get('1k/1k')![0]; + expect(config.medianTtft).toBe(0); + expect(config.medianTpot).toBe(0); + expect(config.medianE2el).toBe(0); + }); + + it('preserves the original rows array', () => { + const rows = [makeRow(), makeRow()]; + const result = aggregateModelData('m', 'M', rows); + expect(result.rows).toBe(rows); + }); + + it('handles mixed sequences with 1k/8k filtered out', () => { + const rows = [ + makeRow({ isl: 1024, osl: 1024 }), // 1k/1k — kept + makeRow({ isl: 1024, osl: 8192 }), // 1k/8k — skipped + makeRow({ isl: 8192, osl: 1024 }), // 8k/1k — kept + ]; + const result = aggregateModelData('m', 'M', rows); + expect(result.bestBySequence.size).toBe(2); + expect(result.bestBySequence.has('1k/1k')).toBe(true); + expect(result.bestBySequence.has('8k/1k')).toBe(true); + }); +}); + +// =========================================================================== +// distinctGpus +// =========================================================================== +describe('distinctGpus', () => { + it('returns unique hardware keys for the given sequence', () => { + const rows = [ + makeRow({ hardware: 'h100', framework: 'vllm' }), + makeRow({ hardware: 'b200', framework: 'sglang' }), + makeRow({ hardware: 'h100', framework: 'sglang' }), + ]; + const data = aggregateModelData('m', 'M', rows); + const gpus = distinctGpus(data, '1k/1k'); + expect(gpus).toEqual(new Set(['h100', 'b200'])); + }); + + it('returns empty set for non-existent sequence', () => { + const data = aggregateModelData('m', 'M', [makeRow()]); + const gpus = distinctGpus(data, '999/999'); + expect(gpus.size).toBe(0); + }); +}); + +// =========================================================================== +// allModels +// =========================================================================== +describe('allModels', () => { + it('returns [dbKey, displayName] pairs', () => { + const models = allModels(); + expect(models.length).toBeGreaterThan(0); + for (const [key, display] of models) { + expect(typeof key).toBe('string'); + expect(typeof display).toBe('string'); + expect(key.length).toBeGreaterThan(0); + expect(display.length).toBeGreaterThan(0); + } + }); + + it('includes llama70b entry', () => { + const models = allModels(); + const llama = models.find(([k]) => k === 'llama70b'); + expect(llama).toBeDefined(); + expect(llama![1]).toContain('70B'); + }); +}); diff --git a/packages/app/src/lib/blog.test.ts b/packages/app/src/lib/blog.test.ts index 6f28bf7..677645a 100644 --- a/packages/app/src/lib/blog.test.ts +++ b/packages/app/src/lib/blog.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it, vi, beforeEach } from 'vitest'; +import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest'; import fs from 'node:fs'; import { @@ -45,6 +45,41 @@ date: '2026-01-01' Some middle content. `; +const FAKE_MDX_FUTURE = `--- +title: 'Future Post' +subtitle: 'A future subtitle' +date: '2099-06-01' +publishDate: '2099-06-01' +--- + +# Future + +This post is scheduled for the far future. +`; + +const FAKE_MDX_PAST_PUBLISH = `--- +title: 'Past Publish Post' +subtitle: 'Already published' +date: '2025-06-01' +publishDate: '2025-01-01' +--- + +# Past Publish + +This post has a publishDate in the past. +`; + +const FAKE_MDX_NO_PUBLISH = `--- +title: 'No Publish Date Post' +subtitle: 'No publishDate set' +date: '2025-08-01' +--- + +# No Publish + +This post has no publishDate field at all. +`; + vi.mock('node:fs', async (importOriginal) => { const actual = await importOriginal(); return { ...actual, default: { ...actual } }; @@ -136,6 +171,119 @@ describe('getAllPosts', () => { }); }); +describe('getAllPosts — publishDate filtering', () => { + afterEach(() => { + vi.unstubAllEnvs(); + }); + + function mockPostFiles(files: Record) { + vi.spyOn(fs, 'existsSync').mockReturnValue(true); + vi.spyOn(fs, 'readdirSync').mockReturnValue( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + Object.keys(files).map((name) => name) as any, + ); + vi.spyOn(fs, 'readFileSync').mockImplementation((filePath) => { + const p = String(filePath); + for (const [name, content] of Object.entries(files)) { + if (p.includes(name.replace('.mdx', ''))) return content; + } + return ''; + }); + } + + it('filters out future-publishDate posts in production', () => { + vi.stubEnv('NODE_ENV', 'production'); + mockPostFiles({ + 'past-publish.mdx': FAKE_MDX_PAST_PUBLISH, + 'future-post.mdx': FAKE_MDX_FUTURE, + 'no-publish.mdx': FAKE_MDX_NO_PUBLISH, + }); + + const posts = getAllPosts(); + const slugs = posts.map((p) => p.slug); + expect(slugs).toContain('past-publish'); + expect(slugs).toContain('no-publish'); + expect(slugs).not.toContain('future-post'); + }); + + it('keeps posts without publishDate in production', () => { + vi.stubEnv('NODE_ENV', 'production'); + mockPostFiles({ + 'no-publish.mdx': FAKE_MDX_NO_PUBLISH, + }); + + const posts = getAllPosts(); + expect(posts).toHaveLength(1); + expect(posts[0].slug).toBe('no-publish'); + }); + + it('keeps posts with past publishDate in production', () => { + vi.stubEnv('NODE_ENV', 'production'); + mockPostFiles({ + 'past-publish.mdx': FAKE_MDX_PAST_PUBLISH, + }); + + const posts = getAllPosts(); + expect(posts).toHaveLength(1); + expect(posts[0].slug).toBe('past-publish'); + }); + + it('shows all posts including future-dated in development', () => { + vi.stubEnv('NODE_ENV', 'development'); + mockPostFiles({ + 'past-publish.mdx': FAKE_MDX_PAST_PUBLISH, + 'future-post.mdx': FAKE_MDX_FUTURE, + 'no-publish.mdx': FAKE_MDX_NO_PUBLISH, + }); + + const posts = getAllPosts(); + const slugs = posts.map((p) => p.slug); + expect(slugs).toContain('past-publish'); + expect(slugs).toContain('future-post'); + expect(slugs).toContain('no-publish'); + expect(posts).toHaveLength(3); + }); + + it('shows all posts including future-dated in test env', () => { + vi.stubEnv('NODE_ENV', 'test'); + mockPostFiles({ + 'future-post.mdx': FAKE_MDX_FUTURE, + 'no-publish.mdx': FAKE_MDX_NO_PUBLISH, + }); + + const posts = getAllPosts(); + expect(posts).toHaveLength(2); + const slugs = posts.map((p) => p.slug); + expect(slugs).toContain('future-post'); + expect(slugs).toContain('no-publish'); + }); + + it('returns empty array when all posts are future-dated in production', () => { + vi.stubEnv('NODE_ENV', 'production'); + mockPostFiles({ + 'future-post.mdx': FAKE_MDX_FUTURE, + }); + + const posts = getAllPosts(); + expect(posts).toHaveLength(0); + }); + + it('still sorts filtered results by date descending in production', () => { + vi.stubEnv('NODE_ENV', 'production'); + mockPostFiles({ + 'past-publish.mdx': FAKE_MDX_PAST_PUBLISH, + 'no-publish.mdx': FAKE_MDX_NO_PUBLISH, + 'future-post.mdx': FAKE_MDX_FUTURE, + }); + + const posts = getAllPosts(); + expect(posts).toHaveLength(2); + // no-publish date: 2025-08-01, past-publish date: 2025-06-01 + expect(posts[0].slug).toBe('no-publish'); + expect(posts[1].slug).toBe('past-publish'); + }); +}); + describe('getPostBySlug', () => { it('returns null for non-existent slug', () => { vi.spyOn(fs, 'existsSync').mockReturnValue(false); @@ -153,6 +301,20 @@ describe('getPostBySlug', () => { expect(result!.meta.slug).toBe('test-post'); expect(result!.raw).toContain('# Test Heading'); }); + + it('returns a post with future publishDate regardless of NODE_ENV', () => { + vi.stubEnv('NODE_ENV', 'production'); + + vi.spyOn(fs, 'existsSync').mockReturnValue(true); + vi.spyOn(fs, 'readFileSync').mockReturnValue(FAKE_MDX_FUTURE); + + const result = getPostBySlug('future-post'); + expect(result).not.toBeNull(); + expect(result!.meta.title).toBe('Future Post'); + expect(result!.meta.publishDate).toBe('2099-06-01'); + + vi.unstubAllEnvs(); + }); }); describe('getAdjacentPosts', () => { diff --git a/packages/app/vitest.config.ts b/packages/app/vitest.config.ts index 374c66e..ebdf0d3 100644 --- a/packages/app/vitest.config.ts +++ b/packages/app/vitest.config.ts @@ -4,7 +4,7 @@ import path from 'path'; export default defineConfig({ test: { environment: 'node', - include: ['src/**/*.test.ts'], + include: ['src/**/*.test.ts', 'scripts/**/*.test.ts'], coverage: { provider: 'v8', include: ['src/lib/**/*.ts', 'src/scripts/**/*.ts', 'src/app/api/**/*.ts'], From 3187505180341a9dbcc27f467cdc7de9816e867c Mon Sep 17 00:00:00 2001 From: adibarra <93070681+adibarra@users.noreply.github.com> Date: Wed, 1 Apr 2026 11:17:09 -0500 Subject: [PATCH 06/10] harden SEO data script with parallel fetches and retry logic --- packages/app/scripts/generate-seo-articles.ts | 133 +++++++++++++----- packages/app/scripts/seo/data.ts | 61 +++++++- 2 files changed, 156 insertions(+), 38 deletions(-) diff --git a/packages/app/scripts/generate-seo-articles.ts b/packages/app/scripts/generate-seo-articles.ts index 7ae8390..f4ae527 100644 --- a/packages/app/scripts/generate-seo-articles.ts +++ b/packages/app/scripts/generate-seo-articles.ts @@ -87,11 +87,71 @@ function enrichConfig(c: BestConfig): SerializableBestConfig { }; } +interface FetchResult { + modelKey: string; + displayName: string; + status: 'included' | 'skipped_no_data' | 'skipped_few_gpus' | 'error'; + reason?: string; +} + +async function processModel( + baseUrl: string, + modelKey: string, + displayName: string, +): Promise<{ result: FetchResult; entry?: SerializableModelData }> { + const rows = await fetchBenchmarks(baseUrl, displayName); + if (rows.length === 0) { + return { + result: { modelKey, displayName, status: 'skipped_no_data', reason: 'no benchmark data' }, + }; + } + + const data = aggregateModelData(modelKey, displayName, rows); + const gpus = distinctGpus(data, PRIMARY_SEQ); + + if (gpus.size < MIN_GPUS) { + return { + result: { + modelKey, + displayName, + status: 'skipped_few_gpus', + reason: `only ${gpus.size} GPU(s) at ${PRIMARY_SEQ} (need ${MIN_GPUS}+)`, + }, + }; + } + + const primaryConfigs = data.bestBySequence.get(PRIMARY_SEQ) ?? []; + + // Convert Map to plain object for JSON serialization + const sequences: Record = {}; + for (const [seq, configs] of data.bestBySequence) { + sequences[seq] = configs.map(enrichConfig); + } + + const entry: SerializableModelData = { + modelKey, + displayName, + slug: `best-gpu-for-${modelKey}-inference`, + totalRows: rows.length, + sequences, + primarySequence: PRIMARY_SEQ, + gpuCount: new Set(primaryConfigs.map((c) => c.hardware)).size, + precisionCount: new Set(primaryConfigs.map((c) => c.precision)).size, + frameworkCount: new Set(primaryConfigs.map((c) => c.framework)).size, + }; + + return { result: { modelKey, displayName, status: 'included' }, entry }; +} + async function main() { const { baseUrl, output } = parseArgs(); - console.log(`Fetching benchmark data from: ${baseUrl}`); - const models = allModels(); + console.log(`Fetching benchmark data from: ${baseUrl} (${models.length} models in parallel)\n`); + + const settled = await Promise.allSettled( + models.map(([modelKey, displayName]) => processModel(baseUrl, modelKey, displayName)), + ); + const result: SeoDataOutput = { generatedAt: new Date().toISOString(), baseUrl, @@ -99,47 +159,56 @@ async function main() { models: [], }; - for (const [modelKey, displayName] of models) { - console.log(` Fetching: ${displayName} (${modelKey})`); - - const rows = await fetchBenchmarks(baseUrl, displayName); - if (rows.length === 0) { - console.log(' Skipped: no benchmark data'); - continue; - } + const fetchResults: FetchResult[] = []; - const data = aggregateModelData(modelKey, displayName, rows); - const gpus = distinctGpus(data, PRIMARY_SEQ); + for (let i = 0; i < settled.length; i++) { + const outcome = settled[i]; + const [modelKey, displayName] = models[i]; - if (gpus.size < MIN_GPUS) { - console.log(` Skipped: only ${gpus.size} GPU(s) at ${PRIMARY_SEQ} (need ${MIN_GPUS}+)`); + if (outcome.status === 'rejected') { + const reason = + outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason); + fetchResults.push({ modelKey, displayName, status: 'error', reason }); continue; } - const primaryConfigs = data.bestBySequence.get(PRIMARY_SEQ) ?? []; - - // Convert Map to plain object for JSON serialization - const sequences: Record = {}; - for (const [seq, configs] of data.bestBySequence) { - sequences[seq] = configs.map(enrichConfig); + fetchResults.push(outcome.value.result); + if (outcome.value.entry) { + result.models.push(outcome.value.entry); } - - result.models.push({ - modelKey, - displayName, - slug: `best-gpu-for-${modelKey}-inference`, - totalRows: rows.length, - sequences, - primarySequence: PRIMARY_SEQ, - gpuCount: new Set(primaryConfigs.map((c) => c.hardware)).size, - precisionCount: new Set(primaryConfigs.map((c) => c.precision)).size, - frameworkCount: new Set(primaryConfigs.map((c) => c.framework)).size, - }); } // Write output fs.mkdirSync(path.dirname(output), { recursive: true }); fs.writeFileSync(output, JSON.stringify(result, null, 2), 'utf-8'); + + // Print summary + const included = fetchResults.filter((r) => r.status === 'included'); + const skipped = fetchResults.filter( + (r) => r.status === 'skipped_no_data' || r.status === 'skipped_few_gpus', + ); + const errors = fetchResults.filter((r) => r.status === 'error'); + + console.log('\n--- Summary ---'); + console.log(`Total models: ${fetchResults.length}`); + console.log(`Included: ${included.length}`); + console.log(`Skipped: ${skipped.length}`); + console.log(`Errors: ${errors.length}`); + + if (skipped.length > 0) { + console.log('\nSkipped models:'); + for (const r of skipped) { + console.log(` ${r.displayName}: ${r.reason}`); + } + } + + if (errors.length > 0) { + console.log('\nFailed models:'); + for (const r of errors) { + console.log(` ${r.displayName}: ${r.reason}`); + } + } + console.log(`\nWrote ${result.models.length} models to: ${output}`); } diff --git a/packages/app/scripts/seo/data.ts b/packages/app/scripts/seo/data.ts index d70b807..695a256 100644 --- a/packages/app/scripts/seo/data.ts +++ b/packages/app/scripts/seo/data.ts @@ -24,18 +24,67 @@ function seqKey(isl: number, osl: number): string { return `${fmt(isl)}/${fmt(osl)}`; } -/** Fetch benchmark data for a model from the API. */ +const MAX_RETRIES = 3; +const RETRY_DELAY_MS = 1000; + +function isRetryable(error: unknown): boolean { + // Network errors (fetch throws) are always retryable + if (error instanceof TypeError) return true; + return false; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** Fetch benchmark data for a model from the API (retries on network errors / 5xx). */ export async function fetchBenchmarks( baseUrl: string, displayName: string, ): Promise { const url = `${baseUrl}/api/v1/benchmarks?model=${encodeURIComponent(displayName)}`; - const res = await fetch(url); - if (!res.ok) { - console.warn(` Failed to fetch ${displayName}: ${res.status} ${res.statusText}`); - return []; + + for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) { + try { + const res = await fetch(url); + + // 4xx — not retryable, return immediately + if (res.status >= 400 && res.status < 500) { + console.warn(` Failed to fetch ${displayName}: ${res.status} ${res.statusText}`); + return []; + } + + // 5xx — retryable + if (!res.ok) { + const msg = `${res.status} ${res.statusText}`; + if (attempt < MAX_RETRIES) { + console.warn( + ` Fetch ${displayName} failed (${msg}), retrying (${attempt}/${MAX_RETRIES})...`, + ); + await sleep(RETRY_DELAY_MS); + continue; + } + console.warn(` Failed to fetch ${displayName} after ${MAX_RETRIES} attempts: ${msg}`); + return []; + } + + return (await res.json()) as BenchmarkRow[]; + } catch (error: unknown) { + if (isRetryable(error) && attempt < MAX_RETRIES) { + const reason = error instanceof Error ? error.message : String(error); + console.warn( + ` Fetch ${displayName} error (${reason}), retrying (${attempt}/${MAX_RETRIES})...`, + ); + await sleep(RETRY_DELAY_MS); + continue; + } + const reason = error instanceof Error ? error.message : String(error); + console.warn(` Failed to fetch ${displayName} after ${MAX_RETRIES} attempts: ${reason}`); + return []; + } } - return (await res.json()) as BenchmarkRow[]; + + return []; } /** From ec5daa3baa8c8781cf7e683008aa5f4394520701 Mon Sep 17 00:00:00 2001 From: adibarra <93070681+adibarra@users.noreply.github.com> Date: Wed, 1 Apr 2026 11:17:21 -0500 Subject: [PATCH 07/10] document publishDate, JsonLd component, and SEO article generation --- docs/blog.md | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/docs/blog.md b/docs/blog.md index 8ff8f3a..a85be5f 100644 --- a/docs/blog.md +++ b/docs/blog.md @@ -18,9 +18,14 @@ title: string subtitle: string date: YYYY-MM-DD modifiedDate?: YYYY-MM-DD # Used in sitemap and JSON-LD +publishDate?: YYYY-MM-DD # Scheduled publishing, hidden in production until this date tags?: string[] # Used for filtering on /blog and in RSS categories ``` +### Scheduled Publishing (`publishDate`) + +If `publishDate` is set to a future date, the post is hidden from `getAllPosts()` in production but visible in development (for preview). This allows articles to be merged to `master` via PR and go live automatically when the date arrives. All downstream consumers (sitemap, RSS, llms.txt) automatically respect the filter since they call `getAllPosts()`. `getPostBySlug()` still returns the post regardless of `publishDate` (for direct URL preview). + Slug is derived from the filename (e.g., `my-post.mdx` -> `my-post`), not from frontmatter. Reading time is calculated at 265 WPM. ## MDX Components Available to Authors @@ -32,6 +37,7 @@ Slug is derived from the filename (e.g., `my-post.mdx` -> `my-post`), not from f | `![alt](src)` | Images | Rendered via `next/image` with lazy loading (first image is eager) | | `
` | Captioned figures | Uses `` (not `next/image`) for external URLs | | `...` | Paywall teaser blur overlay | Content is blurred, unselectable, and not clickable | +| `{...}` | Structured data (JSON-LD) | Renders `