diff --git a/apps/web/src/lib/ai-gateway/providers/moonshotai.ts b/apps/web/src/lib/ai-gateway/providers/moonshotai.ts index f4edbd920..91280e6d2 100644 --- a/apps/web/src/lib/ai-gateway/providers/moonshotai.ts +++ b/apps/web/src/lib/ai-gateway/providers/moonshotai.ts @@ -11,6 +11,6 @@ export function applyMoonshotModelSettings(requestToMutate: GatewayRequest) { delete requestToMutate.body.top_p; } -export const KIMI_CURRENT_MODEL_ID = 'moonshotai/kimi-k2.6'; +export const KIMI_CURRENT_MODEL_ID = 'moonshotai/kimi-k2.7-code'; export const KIMI_CURRENT_VERCEL_MODEL_ID = KIMI_CURRENT_MODEL_ID; diff --git a/apps/web/src/lib/model-stats/sync-openrouter.ts b/apps/web/src/lib/model-stats/sync-openrouter.ts index a6e286468..3742a8c7e 100644 --- a/apps/web/src/lib/model-stats/sync-openrouter.ts +++ b/apps/web/src/lib/model-stats/sync-openrouter.ts @@ -3,6 +3,7 @@ import { modelStats } from '@kilocode/db/schema'; import { eq, sql } from 'drizzle-orm'; import type { OpenRouterModel } from '@/lib/organizations/organization-types'; import type { OpenRouterModel as OpenRouterApiModel } from '@/lib/ai-gateway/providers/openrouter/openrouter-types'; +import { deriveModelStatsIdentity } from '@kilocode/worker-utils/kilo-model-id'; /** * Convert per-token price to per-million-tokens price @@ -66,6 +67,7 @@ export async function syncOpenRouterModels( data: { isActive: true, isRecommended, + ...deriveModelStatsIdentity(model.id), name: model.name, description: model.description, priceInput: toPricePerMillion(model.pricing?.prompt), @@ -83,11 +85,9 @@ export async function syncOpenRouterModels( isActive: true, isRecommended, openrouterId: model.id, - slug: generateSlug(model.id), + ...deriveModelStatsIdentity(model.id), name: model.name, description: model.description, - modelCreator: extractCreator(model.id), - creatorSlug: extractCreatorSlug(model.id), priceInput: toPricePerMillion(model.pricing?.prompt), priceOutput: toPricePerMillion(model.pricing?.completion), contextLength: model.context_length ?? null, @@ -114,6 +114,7 @@ export async function syncOpenRouterModels( data: { // Note: NOT updating isActive - preserve user's setting isRecommended, + ...deriveModelStatsIdentity(updatedModelData.id), name: updatedModelData.name, description: updatedModelData.description, priceInput: toPricePerMillion(updatedModelData.pricing?.prompt), @@ -154,31 +155,3 @@ export async function syncOpenRouterModels( totalProcessed: newModels.length + updatedModels.length, }; } - -/** - * Generate a URL-friendly slug from the model ID - */ -function generateSlug(modelId: string): string { - return modelId - .toLowerCase() - .replace(/[^a-z0-9]+/g, '-') - .replace(/^-|-$/g, ''); -} - -/** - * Extract the creator/provider name from the model ID - * e.g., "anthropic/claude-sonnet-4.5" -> "anthropic" - */ -function extractCreator(modelId: string): string { - const parts = modelId.split('/'); - return parts.length > 1 ? parts[0] : 'unknown'; -} - -/** - * Extract and format the creator slug from the model ID - */ -function extractCreatorSlug(modelId: string): string { - return extractCreator(modelId) - .toLowerCase() - .replace(/[^a-z0-9]+/g, '-'); -} diff --git a/packages/worker-utils/src/kilo-model-id.test.ts b/packages/worker-utils/src/kilo-model-id.test.ts index 96b66f069..cd2c00d11 100644 --- a/packages/worker-utils/src/kilo-model-id.test.ts +++ b/packages/worker-utils/src/kilo-model-id.test.ts @@ -1,5 +1,9 @@ import { describe, expect, it } from 'vitest'; -import { KILO_MODEL_PREFIX, unprefixKiloGatewayModelId } from './kilo-model-id.js'; +import { + deriveModelStatsIdentity, + KILO_MODEL_PREFIX, + unprefixKiloGatewayModelId, +} from './kilo-model-id.js'; describe('kilo model ids', () => { it('exposes the shared Kilo model prefix', () => { @@ -12,4 +16,17 @@ describe('kilo model ids', () => { expect(unprefixKiloGatewayModelId('kilo/kilo/special-model')).toBe('kilo/special-model'); expect(unprefixKiloGatewayModelId('kilo/special-model')).toBeUndefined(); }); + + it('derives model stats identity from provider-shaped model ids', () => { + expect(deriveModelStatsIdentity('MoonshotAI/Kimi-K2.7-Code')).toEqual({ + slug: 'moonshotai-kimi-k2-7-code', + modelCreator: 'MoonshotAI', + creatorSlug: 'moonshotai', + }); + expect(deriveModelStatsIdentity('special-model')).toEqual({ + slug: 'special-model', + modelCreator: 'unknown', + creatorSlug: 'unknown', + }); + }); }); diff --git a/packages/worker-utils/src/kilo-model-id.ts b/packages/worker-utils/src/kilo-model-id.ts index a7752ecf1..331ccf4ee 100644 --- a/packages/worker-utils/src/kilo-model-id.ts +++ b/packages/worker-utils/src/kilo-model-id.ts @@ -6,3 +6,15 @@ export function unprefixKiloGatewayModelId(model: string): string | undefined { const unprefixedModel = model.slice(KILO_MODEL_PREFIX.length); return unprefixedModel.includes('/') ? unprefixedModel : undefined; } + +export function deriveModelStatsIdentity(model: string) { + const modelCreator = model.includes('/') ? model.split('/')[0] : 'unknown'; + return { + slug: model + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, ''), + modelCreator, + creatorSlug: modelCreator.toLowerCase().replace(/[^a-z0-9]+/g, '-'), + }; +} diff --git a/services/model-eval-ingest/src/sync.test.ts b/services/model-eval-ingest/src/sync.test.ts index 3bdcef24a..59ad4e73d 100644 --- a/services/model-eval-ingest/src/sync.test.ts +++ b/services/model-eval-ingest/src/sync.test.ts @@ -34,6 +34,28 @@ class MemoryPromotionStore implements PromotionStore { return latest; } + async listOrphanedTerminalBenchModels(): Promise { + return [ + ...new Set( + [...this.rows.values()] + .filter( + row => row.promotion.task_source === 'terminal-bench' && row.modelStatsId === null + ) + .map(row => row.promotion.model) + ), + ]; + } + + async ensureModelStatsTargets(models: string[]): Promise { + const existing = await this.findModelStatsTargets(models); + for (const model of models) { + if (existing.has(model)) continue; + const canonical = unprefixKiloGatewayModelId(model) ?? model; + if (this.modelStats.has(canonical)) continue; + this.modelStats.set(canonical, { id: `created:${canonical}`, model: canonical }); + } + } + async findModelStatsTargets(models: string[]): Promise> { return new Map( models.flatMap(model => { @@ -47,6 +69,26 @@ class MemoryPromotionStore implements PromotionStore { ); } + async linkOrphanedTerminalBenchPromotions( + targets: Map + ): Promise { + const tuples = new Map(); + for (const row of this.rows.values()) { + if (row.promotion.task_source !== 'terminal-bench' || row.modelStatsId !== null) continue; + const target = targets.get(row.promotion.model); + if (!target) continue; + row.modelStatsId = target.id; + const tuple = { + provider: row.promotion.provider, + model: row.promotion.model, + variant: row.promotion.variant, + modelStatsId: target.id, + }; + tuples.set(JSON.stringify(tuple), tuple); + } + return [...tuples.values()]; + } + async insertPromotions( promotions: Array<{ promotion: PromotionRecord; modelStatsId: string | null }> ): Promise> { @@ -211,6 +253,23 @@ describe('syncPromotionsFromBench', () => { expect(store.cache.get('model-stats-kilo-provider')?.evals['terminal-bench']).toBeDefined(); }); + it('creates a canonical model stats target for an unknown terminal-bench model', async () => { + const store = new MemoryPromotionStore([]); + const bench = new MemoryBenchDashboard([ + promotion({ bench_eval_name: 'new-model', model: 'kilo/moonshotai/kimi-k2.7-code' }), + ]); + + const result = await syncPromotionsFromBench(bench, store); + + expect(result).toEqual({ inserted: 1, alreadyHad: 0, cacheRecomputes: 1, fetched: 1 }); + expect(store.modelStats.has('moonshotai/kimi-k2.7-code')).toBe(true); + expect(store.modelStats.has('kilo/moonshotai/kimi-k2.7-code')).toBe(false); + expect(store.rows.get('new-model')?.modelStatsId).toBe('created:moonshotai/kimi-k2.7-code'); + expect( + store.cache.get('created:moonshotai/kimi-k2.7-code')?.evals['terminal-bench'] + ).toBeDefined(); + }); + it('does not strip a single Kilo provider prefix into a bare model id', async () => { const store = new MemoryPromotionStore([{ id: 'model-stats-bare', model: 'special-model' }]); const bench = new MemoryBenchDashboard([ @@ -219,9 +278,48 @@ describe('syncPromotionsFromBench', () => { const result = await syncPromotionsFromBench(bench, store); + expect(result).toEqual({ inserted: 1, alreadyHad: 0, cacheRecomputes: 1, fetched: 1 }); + expect(store.modelStats.has('kilo/special-model')).toBe(true); + expect(store.rows.get('single-kilo-provider-model')?.modelStatsId).toBe( + 'created:kilo/special-model' + ); + expect(store.cache.has('created:kilo/special-model')).toBe(true); + }); + + it('does not create a model stats target for other benchmark tasks', async () => { + const store = new MemoryPromotionStore([]); + const bench = new MemoryBenchDashboard([ + promotion({ + bench_eval_name: 'unknown-swebench-model', + model: 'kilo/openai/unknown-model', + task_source: 'swebench-verified', + }), + ]); + + const result = await syncPromotionsFromBench(bench, store); + expect(result).toEqual({ inserted: 1, alreadyHad: 0, cacheRecomputes: 0, fetched: 1 }); - expect(store.rows.get('single-kilo-provider-model')?.modelStatsId).toBeNull(); - expect(store.cache.size).toBe(0); + expect(store.modelStats.size).toBe(0); + expect(store.rows.get('unknown-swebench-model')?.modelStatsId).toBeNull(); + }); + + it('repairs an orphaned terminal-bench promotion on a later sync', async () => { + const store = new MemoryPromotionStore([]); + const orphan = promotion({ + bench_eval_name: 'orphaned-model', + model: 'kilo/moonshotai/kimi-k2.7-code', + }); + store.rows.set(orphan.bench_eval_name, { promotion: orphan, modelStatsId: null }); + + const result = await syncPromotionsFromBench(new MemoryBenchDashboard([]), store); + + expect(result).toEqual({ inserted: 0, alreadyHad: 0, cacheRecomputes: 1, fetched: 0 }); + expect(store.rows.get('orphaned-model')?.modelStatsId).toBe( + 'created:moonshotai/kimi-k2.7-code' + ); + expect( + store.cache.get('created:moonshotai/kimi-k2.7-code')?.evals['terminal-bench'] + ).toBeDefined(); }); it('does not duplicate rows on an idempotent rerun', async () => { diff --git a/services/model-eval-ingest/src/sync.ts b/services/model-eval-ingest/src/sync.ts index d4d345fcd..d2e1fcf1a 100644 --- a/services/model-eval-ingest/src/sync.ts +++ b/services/model-eval-ingest/src/sync.ts @@ -1,6 +1,9 @@ import type { WorkerDb } from '@kilocode/db/client'; import { model_eval_ingestions, modelStats } from '@kilocode/db/schema'; -import { unprefixKiloGatewayModelId } from '@kilocode/worker-utils/kilo-model-id'; +import { + deriveModelStatsIdentity, + unprefixKiloGatewayModelId, +} from '@kilocode/worker-utils/kilo-model-id'; import { and, desc, eq, inArray, isNull, sql } from 'drizzle-orm'; import { PromotionRecordSchema, @@ -71,7 +74,12 @@ function storedPromotionValues({ promotion, modelStatsId }: PromotionInsert) { export type PromotionStore = { getLatestPromotedAtMs(): Promise; + listOrphanedTerminalBenchModels(): Promise; + ensureModelStatsTargets(models: string[]): Promise; findModelStatsTargets(models: string[]): Promise>; + linkOrphanedTerminalBenchPromotions( + targets: Map + ): Promise; insertPromotions(promotions: PromotionInsert[]): Promise>; refreshPromotion(promotion: PromotionInsert): Promise; listLatestPromotions(tuple: Omit): Promise; @@ -95,6 +103,46 @@ export function createPromotionStore(db: WorkerDb): PromotionStore { return latest ? Date.parse(latest.promotedAt) : 0; }, + async listOrphanedTerminalBenchModels(): Promise { + const rows = await db + .selectDistinct({ model: model_eval_ingestions.model }) + .from(model_eval_ingestions) + .where( + and( + eq(model_eval_ingestions.task_source, 'terminal-bench'), + isNull(model_eval_ingestions.model_stats_id) + ) + ); + return rows.map(row => row.model); + }, + + async ensureModelStatsTargets(models: string[]): Promise { + const candidates = [...new Set(models)]; + if (candidates.length === 0) return; + + const existing = await this.findModelStatsTargets(candidates); + const missing = [ + ...new Set( + candidates + .filter(model => !existing.has(model)) + .map(model => unprefixKiloGatewayModelId(model) ?? model) + ), + ]; + if (missing.length === 0) return; + + await db + .insert(modelStats) + .values( + missing.map(model => ({ + openrouterId: model, + ...deriveModelStatsIdentity(model), + name: model, + openrouterData: sql`'{}'::jsonb`, + })) + ) + .onConflictDoNothing(); + }, + async findModelStatsTargets(models: string[]): Promise> { const promotionModels = [...new Set(models)]; if (promotionModels.length === 0) return new Map(); @@ -123,6 +171,34 @@ export function createPromotionStore(db: WorkerDb): PromotionStore { return resolvedTargets; }, + async linkOrphanedTerminalBenchPromotions( + targets: Map + ): Promise { + const tuples = new Map(); + for (const [model, target] of targets) { + const rows = await db + .update(model_eval_ingestions) + .set({ model_stats_id: target.id }) + .where( + and( + eq(model_eval_ingestions.model, model), + eq(model_eval_ingestions.task_source, 'terminal-bench'), + isNull(model_eval_ingestions.model_stats_id) + ) + ) + .returning({ + provider: model_eval_ingestions.provider, + model: model_eval_ingestions.model, + variant: model_eval_ingestions.variant, + }); + for (const row of rows) { + const tuple = { ...row, modelStatsId: target.id } satisfies PromotionTuple; + tuples.set(tupleKey(tuple), tuple); + } + } + return [...tuples.values()]; + }, + async insertPromotions(promotions: PromotionInsert[]): Promise> { if (promotions.length === 0) return new Set(); @@ -274,9 +350,22 @@ export async function syncPromotionsFromBench( let inserted = 0; let alreadyHad = 0; const tuplesToRecompute = new Map(); - const modelStatsTargets = await store.findModelStatsTargets([ - ...new Set(promotions.map(promotion => promotion.model)), - ]); + const fetchedModels = [...new Set(promotions.map(promotion => promotion.model))]; + const terminalModels = promotions + .filter(promotion => promotion.task_source === 'terminal-bench') + .map(promotion => promotion.model); + const orphanedModels = await store.listOrphanedTerminalBenchModels(); + const models = [...new Set([...fetchedModels, ...orphanedModels])]; + await store.ensureModelStatsTargets([...terminalModels, ...orphanedModels]); + const modelStatsTargets = await store.findModelStatsTargets(models); + const orphanedTargets = new Map( + orphanedModels.flatMap(model => { + const target = modelStatsTargets.get(model); + return target ? [[model, target]] : []; + }) + ); + const repaired = await store.linkOrphanedTerminalBenchPromotions(orphanedTargets); + for (const tuple of repaired) tuplesToRecompute.set(tupleKey(tuple), tuple); const promotionsToInsert = promotions.map(promotion => { const modelStatsTarget = modelStatsTargets.get(promotion.model); return {