Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/web/src/lib/ai-gateway/providers/moonshotai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ export function applyMoonshotModelSettings(requestToMutate: GatewayRequest) {
delete requestToMutate.body.top_p;
}

export const KIMI_CURRENT_MODEL_ID = 'moonshotai/kimi-k2.6';
export const KIMI_CURRENT_MODEL_ID = 'moonshotai/kimi-k2.7-code';

export const KIMI_CURRENT_VERCEL_MODEL_ID = KIMI_CURRENT_MODEL_ID;
35 changes: 4 additions & 31 deletions apps/web/src/lib/model-stats/sync-openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { modelStats } from '@kilocode/db/schema';
import { eq, sql } from 'drizzle-orm';
import type { OpenRouterModel } from '@/lib/organizations/organization-types';
import type { OpenRouterModel as OpenRouterApiModel } from '@/lib/ai-gateway/providers/openrouter/openrouter-types';
import { deriveModelStatsIdentity } from '@kilocode/worker-utils/kilo-model-id';

/**
* Convert per-token price to per-million-tokens price
Expand Down Expand Up @@ -66,6 +67,7 @@ export async function syncOpenRouterModels(
data: {
isActive: true,
isRecommended,
...deriveModelStatsIdentity(model.id),
name: model.name,
description: model.description,
priceInput: toPricePerMillion(model.pricing?.prompt),
Expand All @@ -83,11 +85,9 @@ export async function syncOpenRouterModels(
isActive: true,
isRecommended,
openrouterId: model.id,
slug: generateSlug(model.id),
...deriveModelStatsIdentity(model.id),
name: model.name,
description: model.description,
modelCreator: extractCreator(model.id),
creatorSlug: extractCreatorSlug(model.id),
priceInput: toPricePerMillion(model.pricing?.prompt),
priceOutput: toPricePerMillion(model.pricing?.completion),
contextLength: model.context_length ?? null,
Expand All @@ -114,6 +114,7 @@ export async function syncOpenRouterModels(
data: {
// Note: NOT updating isActive - preserve user's setting
isRecommended,
...deriveModelStatsIdentity(updatedModelData.id),
name: updatedModelData.name,
description: updatedModelData.description,
priceInput: toPricePerMillion(updatedModelData.pricing?.prompt),
Expand Down Expand Up @@ -154,31 +155,3 @@ export async function syncOpenRouterModels(
totalProcessed: newModels.length + updatedModels.length,
};
}

/**
* Generate a URL-friendly slug from the model ID
*/
function generateSlug(modelId: string): string {
return modelId
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, '');
}

/**
* Extract the creator/provider name from the model ID
* e.g., "anthropic/claude-sonnet-4.5" -> "anthropic"
*/
function extractCreator(modelId: string): string {
const parts = modelId.split('/');
return parts.length > 1 ? parts[0] : 'unknown';
}

/**
* Extract and format the creator slug from the model ID
*/
function extractCreatorSlug(modelId: string): string {
return extractCreator(modelId)
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-');
}
19 changes: 18 additions & 1 deletion packages/worker-utils/src/kilo-model-id.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { describe, expect, it } from 'vitest';
import { KILO_MODEL_PREFIX, unprefixKiloGatewayModelId } from './kilo-model-id.js';
import {
deriveModelStatsIdentity,
KILO_MODEL_PREFIX,
unprefixKiloGatewayModelId,
} from './kilo-model-id.js';

describe('kilo model ids', () => {
it('exposes the shared Kilo model prefix', () => {
Expand All @@ -12,4 +16,17 @@ describe('kilo model ids', () => {
expect(unprefixKiloGatewayModelId('kilo/kilo/special-model')).toBe('kilo/special-model');
expect(unprefixKiloGatewayModelId('kilo/special-model')).toBeUndefined();
});

it('derives model stats identity from provider-shaped model ids', () => {
expect(deriveModelStatsIdentity('MoonshotAI/Kimi-K2.7-Code')).toEqual({
slug: 'moonshotai-kimi-k2-7-code',
modelCreator: 'MoonshotAI',
creatorSlug: 'moonshotai',
});
expect(deriveModelStatsIdentity('special-model')).toEqual({
slug: 'special-model',
modelCreator: 'unknown',
creatorSlug: 'unknown',
});
});
});
12 changes: 12 additions & 0 deletions packages/worker-utils/src/kilo-model-id.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,15 @@ export function unprefixKiloGatewayModelId(model: string): string | undefined {
const unprefixedModel = model.slice(KILO_MODEL_PREFIX.length);
return unprefixedModel.includes('/') ? unprefixedModel : undefined;
}

export function deriveModelStatsIdentity(model: string) {
const modelCreator = model.includes('/') ? model.split('/')[0] : 'unknown';
return {
slug: model
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-|-$/g, ''),
modelCreator,
creatorSlug: modelCreator.toLowerCase().replace(/[^a-z0-9]+/g, '-'),
};
}
102 changes: 100 additions & 2 deletions services/model-eval-ingest/src/sync.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,28 @@ class MemoryPromotionStore implements PromotionStore {
return latest;
}

async listOrphanedTerminalBenchModels(): Promise<string[]> {
return [
...new Set(
[...this.rows.values()]
.filter(
row => row.promotion.task_source === 'terminal-bench' && row.modelStatsId === null
)
.map(row => row.promotion.model)
),
];
}

async ensureModelStatsTargets(models: string[]): Promise<void> {
const existing = await this.findModelStatsTargets(models);
for (const model of models) {
if (existing.has(model)) continue;
const canonical = unprefixKiloGatewayModelId(model) ?? model;
if (this.modelStats.has(canonical)) continue;
this.modelStats.set(canonical, { id: `created:${canonical}`, model: canonical });
}
}

async findModelStatsTargets(models: string[]): Promise<Map<string, ModelStatsTarget>> {
return new Map(
models.flatMap(model => {
Expand All @@ -47,6 +69,26 @@ class MemoryPromotionStore implements PromotionStore {
);
}

async linkOrphanedTerminalBenchPromotions(
targets: Map<string, ModelStatsTarget>
): Promise<PromotionTuple[]> {
const tuples = new Map<string, PromotionTuple>();
for (const row of this.rows.values()) {
if (row.promotion.task_source !== 'terminal-bench' || row.modelStatsId !== null) continue;
const target = targets.get(row.promotion.model);
if (!target) continue;
row.modelStatsId = target.id;
const tuple = {
provider: row.promotion.provider,
model: row.promotion.model,
variant: row.promotion.variant,
modelStatsId: target.id,
};
tuples.set(JSON.stringify(tuple), tuple);
}
return [...tuples.values()];
}

async insertPromotions(
promotions: Array<{ promotion: PromotionRecord; modelStatsId: string | null }>
): Promise<Set<string>> {
Expand Down Expand Up @@ -211,6 +253,23 @@ describe('syncPromotionsFromBench', () => {
expect(store.cache.get('model-stats-kilo-provider')?.evals['terminal-bench']).toBeDefined();
});

it('creates a canonical model stats target for an unknown terminal-bench model', async () => {
const store = new MemoryPromotionStore([]);
const bench = new MemoryBenchDashboard([
promotion({ bench_eval_name: 'new-model', model: 'kilo/moonshotai/kimi-k2.7-code' }),
]);

const result = await syncPromotionsFromBench(bench, store);

expect(result).toEqual({ inserted: 1, alreadyHad: 0, cacheRecomputes: 1, fetched: 1 });
expect(store.modelStats.has('moonshotai/kimi-k2.7-code')).toBe(true);
expect(store.modelStats.has('kilo/moonshotai/kimi-k2.7-code')).toBe(false);
expect(store.rows.get('new-model')?.modelStatsId).toBe('created:moonshotai/kimi-k2.7-code');
expect(
store.cache.get('created:moonshotai/kimi-k2.7-code')?.evals['terminal-bench']
).toBeDefined();
});

it('does not strip a single Kilo provider prefix into a bare model id', async () => {
const store = new MemoryPromotionStore([{ id: 'model-stats-bare', model: 'special-model' }]);
const bench = new MemoryBenchDashboard([
Expand All @@ -219,9 +278,48 @@ describe('syncPromotionsFromBench', () => {

const result = await syncPromotionsFromBench(bench, store);

expect(result).toEqual({ inserted: 1, alreadyHad: 0, cacheRecomputes: 1, fetched: 1 });
expect(store.modelStats.has('kilo/special-model')).toBe(true);
expect(store.rows.get('single-kilo-provider-model')?.modelStatsId).toBe(
'created:kilo/special-model'
);
expect(store.cache.has('created:kilo/special-model')).toBe(true);
});

it('does not create a model stats target for other benchmark tasks', async () => {
const store = new MemoryPromotionStore([]);
const bench = new MemoryBenchDashboard([
promotion({
bench_eval_name: 'unknown-swebench-model',
model: 'kilo/openai/unknown-model',
task_source: 'swebench-verified',
}),
]);

const result = await syncPromotionsFromBench(bench, store);

expect(result).toEqual({ inserted: 1, alreadyHad: 0, cacheRecomputes: 0, fetched: 1 });
expect(store.rows.get('single-kilo-provider-model')?.modelStatsId).toBeNull();
expect(store.cache.size).toBe(0);
expect(store.modelStats.size).toBe(0);
expect(store.rows.get('unknown-swebench-model')?.modelStatsId).toBeNull();
});

it('repairs an orphaned terminal-bench promotion on a later sync', async () => {
const store = new MemoryPromotionStore([]);
const orphan = promotion({
bench_eval_name: 'orphaned-model',
model: 'kilo/moonshotai/kimi-k2.7-code',
});
store.rows.set(orphan.bench_eval_name, { promotion: orphan, modelStatsId: null });

const result = await syncPromotionsFromBench(new MemoryBenchDashboard([]), store);

expect(result).toEqual({ inserted: 0, alreadyHad: 0, cacheRecomputes: 1, fetched: 0 });
expect(store.rows.get('orphaned-model')?.modelStatsId).toBe(
'created:moonshotai/kimi-k2.7-code'
);
expect(
store.cache.get('created:moonshotai/kimi-k2.7-code')?.evals['terminal-bench']
).toBeDefined();
});

it('does not duplicate rows on an idempotent rerun', async () => {
Expand Down
97 changes: 93 additions & 4 deletions services/model-eval-ingest/src/sync.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import type { WorkerDb } from '@kilocode/db/client';
import { model_eval_ingestions, modelStats } from '@kilocode/db/schema';
import { unprefixKiloGatewayModelId } from '@kilocode/worker-utils/kilo-model-id';
import {
deriveModelStatsIdentity,
unprefixKiloGatewayModelId,
} from '@kilocode/worker-utils/kilo-model-id';
import { and, desc, eq, inArray, isNull, sql } from 'drizzle-orm';
import {
PromotionRecordSchema,
Expand Down Expand Up @@ -71,7 +74,12 @@ function storedPromotionValues({ promotion, modelStatsId }: PromotionInsert) {

export type PromotionStore = {
getLatestPromotedAtMs(): Promise<number>;
listOrphanedTerminalBenchModels(): Promise<string[]>;
ensureModelStatsTargets(models: string[]): Promise<void>;
findModelStatsTargets(models: string[]): Promise<Map<string, ModelStatsTarget>>;
linkOrphanedTerminalBenchPromotions(
targets: Map<string, ModelStatsTarget>
): Promise<PromotionTuple[]>;
insertPromotions(promotions: PromotionInsert[]): Promise<Set<string>>;
refreshPromotion(promotion: PromotionInsert): Promise<void>;
listLatestPromotions(tuple: Omit<PromotionTuple, 'modelStatsId'>): Promise<LatestPromotion[]>;
Expand All @@ -95,6 +103,46 @@ export function createPromotionStore(db: WorkerDb): PromotionStore {
return latest ? Date.parse(latest.promotedAt) : 0;
},

async listOrphanedTerminalBenchModels(): Promise<string[]> {
const rows = await db
.selectDistinct({ model: model_eval_ingestions.model })
.from(model_eval_ingestions)
.where(
and(
eq(model_eval_ingestions.task_source, 'terminal-bench'),
isNull(model_eval_ingestions.model_stats_id)
)
);
return rows.map(row => row.model);
},

async ensureModelStatsTargets(models: string[]): Promise<void> {
const candidates = [...new Set(models)];
if (candidates.length === 0) return;

const existing = await this.findModelStatsTargets(candidates);
const missing = [
...new Set(
candidates
.filter(model => !existing.has(model))
.map(model => unprefixKiloGatewayModelId(model) ?? model)
),
];
if (missing.length === 0) return;

await db
.insert(modelStats)
.values(
missing.map(model => ({
openrouterId: model,
...deriveModelStatsIdentity(model),
name: model,
openrouterData: sql`'{}'::jsonb`,
}))
)
.onConflictDoNothing();
},

async findModelStatsTargets(models: string[]): Promise<Map<string, ModelStatsTarget>> {
const promotionModels = [...new Set(models)];
if (promotionModels.length === 0) return new Map();
Expand Down Expand Up @@ -123,6 +171,34 @@ export function createPromotionStore(db: WorkerDb): PromotionStore {
return resolvedTargets;
},

async linkOrphanedTerminalBenchPromotions(
targets: Map<string, ModelStatsTarget>
): Promise<PromotionTuple[]> {
const tuples = new Map<string, PromotionTuple>();
for (const [model, target] of targets) {
const rows = await db
.update(model_eval_ingestions)
.set({ model_stats_id: target.id })
.where(
and(
eq(model_eval_ingestions.model, model),
eq(model_eval_ingestions.task_source, 'terminal-bench'),
isNull(model_eval_ingestions.model_stats_id)
)
)
.returning({
provider: model_eval_ingestions.provider,
model: model_eval_ingestions.model,
variant: model_eval_ingestions.variant,
});
for (const row of rows) {
const tuple = { ...row, modelStatsId: target.id } satisfies PromotionTuple;
tuples.set(tupleKey(tuple), tuple);
}
}
return [...tuples.values()];
},

async insertPromotions(promotions: PromotionInsert[]): Promise<Set<string>> {
if (promotions.length === 0) return new Set();

Expand Down Expand Up @@ -274,9 +350,22 @@ export async function syncPromotionsFromBench(
let inserted = 0;
let alreadyHad = 0;
const tuplesToRecompute = new Map<string, PromotionTuple>();
const modelStatsTargets = await store.findModelStatsTargets([
...new Set(promotions.map(promotion => promotion.model)),
]);
const fetchedModels = [...new Set(promotions.map(promotion => promotion.model))];
const terminalModels = promotions
.filter(promotion => promotion.task_source === 'terminal-bench')
.map(promotion => promotion.model);
const orphanedModels = await store.listOrphanedTerminalBenchModels();
const models = [...new Set([...fetchedModels, ...orphanedModels])];
await store.ensureModelStatsTargets([...terminalModels, ...orphanedModels]);
const modelStatsTargets = await store.findModelStatsTargets(models);
const orphanedTargets = new Map(
orphanedModels.flatMap(model => {
const target = modelStatsTargets.get(model);
return target ? [[model, target]] : [];
})
);
const repaired = await store.linkOrphanedTerminalBenchPromotions(orphanedTargets);
for (const tuple of repaired) tuplesToRecompute.set(tupleKey(tuple), tuple);
const promotionsToInsert = promotions.map(promotion => {
const modelStatsTarget = modelStatsTargets.get(promotion.model);
return {
Expand Down