diff --git a/packages/app/src/app/api/v1/framework-releases/route.ts b/packages/app/src/app/api/v1/framework-releases/route.ts new file mode 100644 index 0000000..efb36cd --- /dev/null +++ b/packages/app/src/app/api/v1/framework-releases/route.ts @@ -0,0 +1,50 @@ +import { NextResponse } from 'next/server'; + +interface GitHubRelease { + tag_name: string; + prerelease: boolean; + draft: boolean; +} + +const FRAMEWORK_REPOS: Record = { + vllm: { owner: 'vllm-project', repo: 'vllm' }, + sglang: { owner: 'sgl-project', repo: 'sglang' }, +}; + +async function fetchLatestRelease(owner: string, repo: string): Promise { + const res = await fetch(`https://api.github.com/repos/${owner}/${repo}/releases?per_page=20`, { + headers: { + Accept: 'application/vnd.github.v3+json', + ...(process.env.GITHUB_TOKEN && { Authorization: `token ${process.env.GITHUB_TOKEN}` }), + }, + next: { revalidate: 60 * 60 }, // 1 hour + }); + + if (!res.ok) return null; + + const releases: GitHubRelease[] = await res.json(); + const latest = releases.find((r) => !r.prerelease && !r.draft); + return latest?.tag_name ?? null; +} + +export async function GET() { + try { + const entries = await Promise.all( + Object.entries(FRAMEWORK_REPOS).map(async ([framework, { owner, repo }]) => { + const tag = await fetchLatestRelease(owner, repo); + return [framework, tag] as const; + }), + ); + + const result: Record = Object.fromEntries(entries); + + return NextResponse.json(result, { + headers: { + 'Cache-Control': 'public, max-age=0, s-maxage=3600, stale-while-revalidate=7200', + }, + }); + } catch (error) { + console.error('Error fetching framework releases:', error); + return NextResponse.json({ error: 'Internal server error' }, { status: 500 }); + } +} diff --git a/packages/app/src/app/api/v1/latest-images/route.ts b/packages/app/src/app/api/v1/latest-images/route.ts new file mode 100644 index 0000000..3b41c1b --- /dev/null +++ b/packages/app/src/app/api/v1/latest-images/route.ts @@ -0,0 +1,27 @@ +import { NextResponse } from 'next/server'; + +import { getDb } from '@semianalysisai/inferencex-db/connection'; +import { getLatestImages } from '@semianalysisai/inferencex-db/queries/latest-images'; + +import { cachedJson, cachedQuery } from '@/lib/api-cache'; + +export const dynamic = 'force-dynamic'; + +const getCachedLatestImages = cachedQuery( + async () => { + const sql = getDb(); + return getLatestImages(sql); + }, + 'latest-images', + { blobOnly: true }, +); + +export async function GET() { + try { + const rows = await getCachedLatestImages(); + return cachedJson(rows); + } catch (error) { + console.error('Error fetching latest images:', error); + return NextResponse.json({ error: 'Internal server error' }, { status: 500 }); + } +} diff --git a/packages/app/src/app/current-inferencex-image/page.tsx b/packages/app/src/app/current-inferencex-image/page.tsx new file mode 100644 index 0000000..860cbba --- /dev/null +++ b/packages/app/src/app/current-inferencex-image/page.tsx @@ -0,0 +1,19 @@ +import type { Metadata } from 'next'; + +import { SITE_URL } from '@semianalysisai/inferencex-constants'; +import { CurrentImageContent } from '@/components/latest-image/latest-image-content'; + +export const metadata: Metadata = { + title: 'Current InferenceX Image', + description: 'Current InferenceX Docker image tags for each model, GPU SKU, and configuration.', + alternates: { canonical: `${SITE_URL}/current-inferencex-image` }, + openGraph: { + title: 'Current InferenceX Image | InferenceX by SemiAnalysis', + description: 'Current InferenceX Docker image tags for each model, GPU SKU, and configuration.', + url: `${SITE_URL}/current-inferencex-image`, + }, +}; + +export default function CurrentInferenceXImagePage() { + return ; +} diff --git a/packages/app/src/components/latest-image/latest-image-content.tsx b/packages/app/src/components/latest-image/latest-image-content.tsx new file mode 100644 index 0000000..5200101 --- /dev/null +++ b/packages/app/src/components/latest-image/latest-image-content.tsx @@ -0,0 +1,292 @@ +'use client'; + +import { useMemo, useState } from 'react'; + +import { DB_MODEL_TO_DISPLAY, islOslToSequence } from '@semianalysisai/inferencex-constants'; + +import { LabelWithTooltip } from '@/components/ui/label-with-tooltip'; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select'; +import { TooltipProvider } from '@/components/ui/tooltip'; +import { useFrameworkReleases } from '@/hooks/api/use-framework-releases'; +import { useLatestImages } from '@/hooks/api/use-latest-images'; +import type { FrameworkReleases, LatestImageRow } from '@/lib/api'; +import { track } from '@/lib/analytics'; + +/** Map framework variants to their base framework for release lookup. */ +const FRAMEWORK_TO_BASE: Record = { + vllm: 'vllm', + sglang: 'sglang', + 'dynamo-sglang': 'sglang', + 'mori-sglang': 'sglang', +}; + +function deriveOptions(data: LatestImageRow[]) { + const models = new Set(); + const precisions = new Set(); + const sequences = new Set(); + const specMethods = new Set(); + + for (const row of data) { + const displayModel = DB_MODEL_TO_DISPLAY[row.model] ?? row.model; + models.add(displayModel); + precisions.add(row.precision); + const seq = islOslToSequence(row.isl, row.osl) ?? `${row.isl}/${row.osl}`; + sequences.add(seq); + specMethods.add(row.spec_method); + } + + return { + models: [...models].sort(), + precisions: [...precisions].sort(), + sequences: [...sequences].filter((s) => s !== '1k/8k').sort(), + specMethods: [...specMethods].sort(), + }; +} + +function formatSpecMethod(method: string) { + return method === 'none' ? 'Off' : method.toUpperCase(); +} + +function getActualLatestTag(framework: string, releases: FrameworkReleases | undefined) { + if (!releases) return null; + const base = FRAMEWORK_TO_BASE[framework]; + if (!base) return null; + return releases[base] ?? null; +} + +const UNSTABLE_PATTERNS = ['nightly', 'rocm/sgl-dev', 'sglang-rocm']; + +/** Check if the image tag is outdated or uses an unstable/dev image. */ +function isOutdated(image: string, actualLatest: string | null): boolean { + const lower = image.toLowerCase(); + if (UNSTABLE_PATTERNS.some((p) => lower.includes(p))) return true; + if (!actualLatest) return false; + return !image.includes(actualLatest); +} + +export function CurrentImageContent() { + const { data, isLoading, error } = useLatestImages(); + const { data: releases } = useFrameworkReleases(); + + const [selectedModel, setSelectedModel] = useState('all'); + const [selectedPrecision, setSelectedPrecision] = useState('all'); + const [selectedSequence, setSelectedSequence] = useState('1k/1k'); + const [selectedSpecMethod, setSelectedSpecMethod] = useState('none'); + + const options = useMemo(() => (data ? deriveOptions(data) : null), [data]); + + const filtered = useMemo(() => { + if (!data) return []; + return data.filter((row) => { + if (selectedModel !== 'all') { + const displayModel = DB_MODEL_TO_DISPLAY[row.model] ?? row.model; + if (displayModel !== selectedModel) return false; + } + if (selectedPrecision !== 'all') { + if (row.precision !== selectedPrecision) return false; + } + const seq = islOslToSequence(row.isl, row.osl) ?? `${row.isl}/${row.osl}`; + if (seq !== selectedSequence) return false; + if (row.spec_method !== selectedSpecMethod) return false; + return true; + }); + }, [data, selectedModel, selectedPrecision, selectedSequence, selectedSpecMethod]); + + return ( +
+
+

Current InferenceX Image

+

+ Docker image tags for each model and GPU configuration. +

+
+ + {isLoading &&
Loading...
} + + {error && ( +
Failed to load image data.
+ )} + + {options && ( + +
+
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +
+
+
+ )} + + {data && filtered.length === 0 && ( +
+ No image data matches the selected filters. +
+ )} + + {filtered.length > 0 && ( +
+ + + + + + + + + + + + {filtered.map((row, i) => { + const displayModel = DB_MODEL_TO_DISPLAY[row.model] ?? row.model; + const gpuLabel = row.hardware.toUpperCase(); + const actualLatest = getActualLatestTag(row.framework, releases); + const outdated = isOutdated(row.image, actualLatest); + + return ( + + + + + + + + ); + })} + +
ModelPrecisionGPU SKU + Current InferenceX Image Tag + Actual Latest Tag
{displayModel}{row.precision}{gpuLabel} + + {row.image} + + + {actualLatest ? ( + + {actualLatest} + + ) : ( + - + )} +
+
+ )} +
+ ); +} diff --git a/packages/app/src/hooks/api/use-framework-releases.ts b/packages/app/src/hooks/api/use-framework-releases.ts new file mode 100644 index 0000000..cbc5c55 --- /dev/null +++ b/packages/app/src/hooks/api/use-framework-releases.ts @@ -0,0 +1,11 @@ +import { useQuery } from '@tanstack/react-query'; + +import { fetchFrameworkReleases } from '@/lib/api'; + +export function useFrameworkReleases() { + return useQuery({ + queryKey: ['framework-releases'], + queryFn: fetchFrameworkReleases, + staleTime: 60 * 60 * 1000, // 1 hour + }); +} diff --git a/packages/app/src/hooks/api/use-latest-images.ts b/packages/app/src/hooks/api/use-latest-images.ts new file mode 100644 index 0000000..5bc896a --- /dev/null +++ b/packages/app/src/hooks/api/use-latest-images.ts @@ -0,0 +1,10 @@ +import { useQuery } from '@tanstack/react-query'; + +import { fetchLatestImages } from '@/lib/api'; + +export function useLatestImages() { + return useQuery({ + queryKey: ['latest-images'], + queryFn: fetchLatestImages, + }); +} diff --git a/packages/app/src/lib/api.ts b/packages/app/src/lib/api.ts index 0646bb7..ea88889 100644 --- a/packages/app/src/lib/api.ts +++ b/packages/app/src/lib/api.ts @@ -145,3 +145,25 @@ export interface GitHubStarsResponse { export function fetchGitHubStars() { return fetchJson('/api/v1/github-stars'); } + +export interface LatestImageRow { + model: string; + hardware: string; + framework: string; + precision: string; + spec_method: string; + isl: number; + osl: number; + image: string; + date: string; +} + +export function fetchLatestImages() { + return fetchJson('/api/v1/latest-images'); +} + +export type FrameworkReleases = Record; + +export function fetchFrameworkReleases() { + return fetchJson('/api/v1/framework-releases'); +} diff --git a/packages/db/src/queries/latest-images.ts b/packages/db/src/queries/latest-images.ts new file mode 100644 index 0000000..1ba7150 --- /dev/null +++ b/packages/db/src/queries/latest-images.ts @@ -0,0 +1,37 @@ +import type { NeonClient } from '../connection.js'; + +export interface LatestImageRow { + model: string; + hardware: string; + framework: string; + precision: string; + spec_method: string; + isl: number; + osl: number; + image: string; + date: string; +} + +/** + * Fetch the latest non-null image tag per unique (model, hardware, framework, precision, spec_method, isl, osl). + * Uses the latest_benchmarks materialized view for fast lookups. + */ +export async function getLatestImages(sql: NeonClient): Promise { + const rows = await sql` + SELECT DISTINCT ON (c.model, c.hardware, c.framework, c.precision, c.spec_method, lb.isl, lb.osl) + c.model, + c.hardware, + c.framework, + c.precision, + c.spec_method, + lb.isl, + lb.osl, + lb.image, + lb.date::text + FROM latest_benchmarks lb + JOIN configs c ON c.id = lb.config_id + WHERE lb.image IS NOT NULL + ORDER BY c.model, c.hardware, c.framework, c.precision, c.spec_method, lb.isl, lb.osl, lb.date DESC + `; + return rows as unknown as LatestImageRow[]; +}