From ec56b6349a526cd850d711ee457e2aa3a5b37ffd Mon Sep 17 00:00:00 2001 From: adibarra <93070681+adibarra@users.noreply.github.com> Date: Fri, 3 Apr 2026 12:19:43 -0500 Subject: [PATCH] refactor: extract shared GitHub artifacts fetching logic --- packages/app/src/app/api/gpu-metrics/route.ts | 87 +++-------- .../app/src/app/api/unofficial-run/route.ts | 91 +++-------- packages/app/src/lib/github-artifacts.test.ts | 142 +++++++++++++++++ packages/app/src/lib/github-artifacts.ts | 146 ++++++++++++++++++ 4 files changed, 333 insertions(+), 133 deletions(-) create mode 100644 packages/app/src/lib/github-artifacts.test.ts create mode 100644 packages/app/src/lib/github-artifacts.ts diff --git a/packages/app/src/app/api/gpu-metrics/route.ts b/packages/app/src/app/api/gpu-metrics/route.ts index f37c9ab..17d5bc5 100644 --- a/packages/app/src/app/api/gpu-metrics/route.ts +++ b/packages/app/src/app/api/gpu-metrics/route.ts @@ -2,67 +2,37 @@ * DO NOT ADD CACHING (blob, CDN, or unstable_cache) to this route. * It fetches live GitHub Actions artifacts which change while a run is in progress. */ -import AdmZip from 'adm-zip'; import { NextRequest, NextResponse } from 'next/server'; -import { GITHUB_API_BASE, GITHUB_OWNER, GITHUB_REPO } from '@semianalysisai/inferencex-constants'; - import { parseCsvData } from '@/components/gpu-power/types'; - -interface GithubArtifact { - id: number; - name: string; - archive_download_url: string; -} - -/** Paginated GitHub API fetch (same pattern as unofficial-run route). */ -async function githubFetchAll(url: string, token: string): Promise { - const items: unknown[] = []; - let page = 1; - while (true) { - const sep = url.includes('?') ? '&' : '?'; - const resp = await fetch(`${url}${sep}per_page=100&page=${page}`, { - headers: { Authorization: `Bearer ${token}`, Accept: 'application/vnd.github.v3+json' }, - }); - if (!resp.ok) break; - const data = await resp.json(); - const arr = Array.isArray(data) ? data : (data.artifacts ?? data.workflow_runs ?? []); - if (arr.length === 0) break; - items.push(...arr); - if (arr.length < 100) break; - page++; - } - return items; -} +import { + downloadGithubArtifact, + extractZipEntries, + fetchGithubRunArtifacts, + fetchGithubWorkflowRun, + getGithubToken, + normalizeGithubRunInfo, + type GithubWorkflowRun, +} from '@/lib/github-artifacts'; const MAX_ARTIFACT_BYTES = 50 * 1024 * 1024; async function fetchGpuMetrics(runId: string) { - const githubToken = process.env.GITHUB_TOKEN; + const githubToken = getGithubToken(); if (!githubToken) throw new Error('GitHub token not configured'); - const runResp = await fetch( - `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs/${runId}`, - { - headers: { Authorization: `Bearer ${githubToken}`, Accept: 'application/vnd.github.v3+json' }, - }, - ); + const runResp = await fetchGithubWorkflowRun(runId, githubToken); if (!runResp.ok) throw new Error(`Failed to fetch workflow run: ${runResp.status}`); - const run = await runResp.json(); + const run = (await runResp.json()) as GithubWorkflowRun; - const artifacts = (await githubFetchAll( - `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs/${runId}/artifacts`, - githubToken, - )) as GithubArtifact[]; + const artifacts = await fetchGithubRunArtifacts(runId, githubToken); const gpuArtifacts = artifacts.filter((a) => a.name.startsWith('gpu_metrics')); if (gpuArtifacts.length === 0) throw new Error('No gpu_metrics artifacts found for this run'); const parsedArtifacts: { name: string; data: ReturnType }[] = []; for (const artifact of gpuArtifacts) { - const dlResp = await fetch(artifact.archive_download_url, { - headers: { Authorization: `Bearer ${githubToken}`, Accept: 'application/vnd.github.v3+json' }, - }); + const dlResp = await downloadGithubArtifact(artifact.archive_download_url, githubToken); if (!dlResp.ok) { console.warn(`Failed to download artifact ${artifact.name}: ${dlResp.statusText}`); continue; @@ -74,32 +44,21 @@ async function fetchGpuMetrics(runId: string) { continue; } - const zip = new AdmZip(Buffer.from(await dlResp.arrayBuffer())); - const rows = []; - for (const entry of zip.getEntries()) { - if (!entry.entryName.endsWith('.csv')) continue; - try { - rows.push(...parseCsvData(zip.readAsText(entry))); - } catch (e) { - console.warn(`Failed to parse CSV ${entry.entryName} from ${artifact.name}:`, e); - } - } + const rows = extractZipEntries( + Buffer.from(await dlResp.arrayBuffer()), + '.csv', + (_entryName, contents) => parseCsvData(contents), + (entryName, error) => { + console.warn(`Failed to parse CSV ${entryName} from ${artifact.name}:`, error); + }, + ); if (rows.length > 0) parsedArtifacts.push({ name: artifact.name, data: rows }); } if (parsedArtifacts.length === 0) throw new Error('No GPU metrics data found in artifacts'); return { - runInfo: { - id: run.id, - name: run.name, - branch: run.head_branch, - sha: run.head_sha, - createdAt: run.created_at, - url: run.html_url, - conclusion: run.conclusion, - status: run.status, - }, + runInfo: normalizeGithubRunInfo(run), artifacts: parsedArtifacts, }; } diff --git a/packages/app/src/app/api/unofficial-run/route.ts b/packages/app/src/app/api/unofficial-run/route.ts index e94c2f1..b63745d 100644 --- a/packages/app/src/app/api/unofficial-run/route.ts +++ b/packages/app/src/app/api/unofficial-run/route.ts @@ -2,38 +2,20 @@ * DO NOT ADD CACHING (blob, CDN, or unstable_cache) to this route. * It fetches live GitHub Actions artifacts which change while a run is in progress. */ -import AdmZip from 'adm-zip'; import { NextRequest, NextResponse } from 'next/server'; -import { GITHUB_API_BASE, GITHUB_OWNER, GITHUB_REPO } from '@semianalysisai/inferencex-constants'; import { mapBenchmarkRow } from '@semianalysisai/inferencex-db/etl/benchmark-mapper'; import { createSkipTracker } from '@semianalysisai/inferencex-db/etl/skip-tracker'; - -interface GithubArtifact { - id: number; - name: string; - archive_download_url: string; -} - -/** Paginated GitHub API fetch. */ -async function githubFetchAll(url: string, token: string): Promise { - const items: unknown[] = []; - let page = 1; - while (true) { - const sep = url.includes('?') ? '&' : '?'; - const resp = await fetch(`${url}${sep}per_page=100&page=${page}`, { - headers: { Authorization: `Bearer ${token}`, Accept: 'application/vnd.github.v3+json' }, - }); - if (!resp.ok) break; - const data = await resp.json(); - const arr = Array.isArray(data) ? data : (data.artifacts ?? data.workflow_runs ?? []); - if (arr.length === 0) break; - items.push(...arr); - if (arr.length < 100) break; - page++; - } - return items; -} +import { + downloadGithubArtifact, + extractZipEntries, + fetchGithubRunArtifacts, + fetchGithubWorkflowRun, + getGithubToken, + getRunDate, + normalizeGithubRunInfo, + type GithubWorkflowRun, +} from '@/lib/github-artifacts'; /** Normalize raw artifact rows into the BenchmarkRow shape the frontend expects. */ export function normalizeArtifactRows(rawRows: Record[], date: string) { @@ -72,19 +54,12 @@ export function normalizeArtifactRows(rawRows: Record[], date: return results; } -/** Extract and parse all JSON files from a ZIP buffer. */ +/** Extract all valid JSON files from a ZIP buffer; malformed JSON entries are skipped. */ function extractJsonFromZip(buffer: Buffer): Record[] { - const zip = new AdmZip(buffer); - const results: Record[] = []; - for (const entry of zip.getEntries()) { - if (!entry.entryName.endsWith('.json')) continue; - try { - const data = JSON.parse(zip.readAsText(entry)); - if (Array.isArray(data)) results.push(...data); - else results.push(data); - } catch {} - } - return results; + return extractZipEntries(buffer, '.json', (_entryName, contents) => { + const data = JSON.parse(contents) as Record | Record[]; + return Array.isArray(data) ? data : [data]; + }); } export async function GET(request: NextRequest) { @@ -93,35 +68,24 @@ export async function GET(request: NextRequest) { return NextResponse.json({ error: 'runId must be a numeric value' }, { status: 400 }); } - const githubToken = process.env.GITHUB_TOKEN; + const githubToken = getGithubToken(); if (!githubToken) { return NextResponse.json({ error: 'GitHub token not configured' }, { status: 500 }); } try { // Fetch workflow run metadata - const runResp = await fetch( - `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs/${runId}`, - { - headers: { - Authorization: `Bearer ${githubToken}`, - Accept: 'application/vnd.github.v3+json', - }, - }, - ); + const runResp = await fetchGithubWorkflowRun(runId, githubToken); if (!runResp.ok) { return NextResponse.json( { error: `GitHub API: ${runResp.statusText}` }, { status: runResp.status }, ); } - const run = await runResp.json(); + const run = (await runResp.json()) as GithubWorkflowRun; // Fetch artifacts, find latest results_bmk - const artifacts = (await githubFetchAll( - `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs/${runId}/artifacts`, - githubToken, - )) as GithubArtifact[]; + const artifacts = await fetchGithubRunArtifacts(runId, githubToken); const bmkArtifact = artifacts .filter((a) => a.name === 'results_bmk') @@ -132,9 +96,7 @@ export async function GET(request: NextRequest) { } // Download and extract benchmark data - const dlResp = await fetch(bmkArtifact.archive_download_url, { - headers: { Authorization: `Bearer ${githubToken}`, Accept: 'application/vnd.github.v3+json' }, - }); + const dlResp = await downloadGithubArtifact(bmkArtifact.archive_download_url, githubToken); if (!dlResp.ok) { return NextResponse.json( { error: `Artifact download failed: ${dlResp.statusText}` }, @@ -143,21 +105,12 @@ export async function GET(request: NextRequest) { } const rawRows = extractJsonFromZip(Buffer.from(await dlResp.arrayBuffer())); - const date = run.created_at - ? run.created_at.split('T')[0] - : new Date().toISOString().split('T')[0]; + const date = getRunDate(run); const benchmarks = normalizeArtifactRows(rawRows, date); return NextResponse.json({ runInfo: { - id: run.id, - name: run.name, - branch: run.head_branch, - sha: run.head_sha, - createdAt: run.created_at, - url: run.html_url, - conclusion: run.conclusion, - status: run.status, + ...normalizeGithubRunInfo(run), isNonMainBranch: run.head_branch !== 'main', }, benchmarks, diff --git a/packages/app/src/lib/github-artifacts.test.ts b/packages/app/src/lib/github-artifacts.test.ts new file mode 100644 index 0000000..707e061 --- /dev/null +++ b/packages/app/src/lib/github-artifacts.test.ts @@ -0,0 +1,142 @@ +import AdmZip from 'adm-zip'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +import { GITHUB_API_BASE, GITHUB_OWNER, GITHUB_REPO } from '@semianalysisai/inferencex-constants'; + +import { + extractZipEntries, + fetchGithubRunArtifacts, + getRunDate, + normalizeGithubRunInfo, + type GithubArtifact, + type GithubWorkflowRun, +} from './github-artifacts'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +afterEach(() => { + mockFetch.mockReset(); + vi.useRealTimers(); +}); + +function workflowRun(overrides: Partial = {}): GithubWorkflowRun { + return { + id: 123, + name: 'nightly', + head_branch: 'main', + head_sha: 'abc123', + created_at: '2026-03-01T05:06:07Z', + html_url: 'https://github.com/runs/123', + conclusion: 'success', + status: 'completed', + ...overrides, + }; +} + +function artifact(id: number, name = 'gpu_metrics'): GithubArtifact { + return { + id, + name, + archive_download_url: `https://github.com/artifacts/${id}.zip`, + }; +} + +describe('normalizeGithubRunInfo', () => { + it('preserves nullable branch and run state metadata', () => { + expect( + normalizeGithubRunInfo( + workflowRun({ + head_branch: null, + conclusion: null, + status: null, + }), + ), + ).toEqual({ + id: 123, + name: 'nightly', + branch: null, + sha: 'abc123', + createdAt: '2026-03-01T05:06:07Z', + url: 'https://github.com/runs/123', + conclusion: null, + status: null, + }); + }); +}); + +describe('getRunDate', () => { + it('uses the run timestamp date when created_at is present', () => { + expect(getRunDate(workflowRun({ created_at: '2026-02-14T23:59:59Z' }))).toBe('2026-02-14'); + }); + + it('falls back to the current UTC date when created_at is missing', () => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-04-03T12:34:56Z')); + + expect(getRunDate(workflowRun({ created_at: '' }))).toBe('2026-04-03'); + }); +}); + +describe('fetchGithubRunArtifacts', () => { + it('paginates artifacts and returns accumulated results when a later page fails', async () => { + const page1Artifacts = Array.from({ length: 100 }, (_, index) => artifact(index + 1)); + + mockFetch + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ artifacts: page1Artifacts }), + }) + .mockResolvedValueOnce({ + ok: false, + status: 502, + statusText: 'Bad Gateway', + }); + + const artifacts = await fetchGithubRunArtifacts('456', 'token-123'); + + expect(artifacts).toEqual(page1Artifacts); + expect(mockFetch).toHaveBeenNthCalledWith( + 1, + `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs/456/artifacts?per_page=100&page=1`, + { + headers: { + Accept: 'application/vnd.github.v3+json', + Authorization: 'Bearer token-123', + }, + }, + ); + expect(mockFetch).toHaveBeenNthCalledWith( + 2, + `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs/456/artifacts?per_page=100&page=2`, + { + headers: { + Accept: 'application/vnd.github.v3+json', + Authorization: 'Bearer token-123', + }, + }, + ); + }); +}); + +describe('extractZipEntries', () => { + it('skips non-matching files and continues after parse errors', () => { + const zip = new AdmZip(); + zip.addFile('good.json', Buffer.from('{"id":1}', 'utf8')); + zip.addFile('bad.json', Buffer.from('not json', 'utf8')); + zip.addFile('notes.txt', Buffer.from('ignore me', 'utf8')); + + const parseErrors: string[] = []; + const rows = extractZipEntries( + zip.toBuffer(), + '.json', + (entryName, contents) => [{ entryName, payload: JSON.parse(contents) as { id: number } }], + (entryName) => { + parseErrors.push(entryName); + }, + ); + + expect(rows).toEqual([{ entryName: 'good.json', payload: { id: 1 } }]); + expect(parseErrors).toEqual(['bad.json']); + }); +}); diff --git a/packages/app/src/lib/github-artifacts.ts b/packages/app/src/lib/github-artifacts.ts new file mode 100644 index 0000000..0d22f7e --- /dev/null +++ b/packages/app/src/lib/github-artifacts.ts @@ -0,0 +1,146 @@ +import AdmZip from 'adm-zip'; + +import { GITHUB_API_BASE, GITHUB_OWNER, GITHUB_REPO } from '@semianalysisai/inferencex-constants'; + +/** + * DO NOT ADD CACHING around these GitHub artifact fetches. + * Workflow run metadata/artifacts can change while a run is still in progress. + */ +const GITHUB_HEADERS = { + Accept: 'application/vnd.github.v3+json', +} as const; + +export interface GithubArtifact { + id: number; + name: string; + archive_download_url: string; +} + +export interface GithubWorkflowRun { + id: number; + name: string; + // GitHub can return null here for detached refs or in-progress runs. + head_branch: string | null; + head_sha: string; + created_at: string; + html_url: string; + // conclusion/status may be null while a workflow run is still active. + conclusion: string | null; + status: string | null; +} + +export interface GithubRunInfo { + id: number; + name: string; + branch: string | null; + sha: string; + createdAt: string; + url: string; + conclusion: string | null; + status: string | null; +} + +export function getGithubToken(): string | undefined { + return process.env.GITHUB_TOKEN; +} + +export function normalizeGithubRunInfo(run: GithubWorkflowRun): GithubRunInfo { + return { + id: run.id, + name: run.name, + branch: run.head_branch, + sha: run.head_sha, + createdAt: run.created_at, + url: run.html_url, + conclusion: run.conclusion, + status: run.status, + }; +} + +export function getRunDate(run: GithubWorkflowRun): string { + return run.created_at ? run.created_at.split('T')[0] : new Date().toISOString().split('T')[0]; +} + +function appendPaginationParams(url: string, page: number): string { + const separator = url.includes('?') ? '&' : '?'; + return `${url}${separator}per_page=100&page=${page}`; +} + +export async function fetchGithubWorkflowRun(runId: string, token: string): Promise { + return fetch(`${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs/${runId}`, { + headers: { + ...GITHUB_HEADERS, + Authorization: `Bearer ${token}`, + }, + }); +} + +export async function fetchGithubRunArtifacts( + runId: string, + token: string, +): Promise { + const url = `${GITHUB_API_BASE}/repos/${GITHUB_OWNER}/${GITHUB_REPO}/actions/runs/${runId}/artifacts`; + const artifacts: GithubArtifact[] = []; + let page = 1; + + while (true) { + const response = await fetch(appendPaginationParams(url, page), { + headers: { + ...GITHUB_HEADERS, + Authorization: `Bearer ${token}`, + }, + }); + if (!response.ok) { + // Preserve old route behavior: stop pagination on API failure and return what we have. + break; + } + + const data = (await response.json()) as { artifacts?: GithubArtifact[] }; + const pageArtifacts = data.artifacts ?? []; + if (pageArtifacts.length === 0) { + break; + } + + artifacts.push(...pageArtifacts); + if (pageArtifacts.length < 100) { + break; + } + page++; + } + + return artifacts; +} + +export async function downloadGithubArtifact(url: string, token: string): Promise { + return fetch(url, { + headers: { + ...GITHUB_HEADERS, + Authorization: `Bearer ${token}`, + }, + }); +} + +export function extractZipEntries( + buffer: Buffer, + extension: string, + parseEntry: (entryName: string, contents: string) => T[], + onParseError?: (entryName: string, error: unknown) => void, +): T[] { + // Preserve partial-success behavior: malformed matching files are skipped after optional reporting. + const zip = new AdmZip(buffer); + const rows: T[] = []; + + for (const entry of zip.getEntries()) { + if (!entry.entryName.endsWith(extension)) { + continue; + } + + try { + rows.push(...parseEntry(entry.entryName, zip.readAsText(entry))); + } catch (error) { + onParseError?.(entry.entryName, error); + } + } + + return rows; +}