From 5dd959ac6b5622124e868c32dcca15b8c81e424e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20=C5=A0=C4=87eki=C4=87?= <iscekic@protonmail.com>
Date: Wed, 17 Jun 2026 18:36:50 +0200
Subject: [PATCH 1/5] feat(auto-routing): auto-sync decider benchmark models

---
 .../auto-routing/BenchmarksSection.test.ts    |  44 +-
 .../admin/auto-routing/BenchmarksSection.tsx  | 137 +++-
 .../decider-candidates/route.test.ts          |  50 ++
 .../decider-candidates/route.ts               |  31 +
 .../auto-routing-decider-candidates.test.ts   |  60 ++
 .../auto-routing-decider-candidates.ts        |  69 ++
 .../auto-routing-contracts/src/benchmark.ts   |  43 +
 .../src/contracts.test.ts                     |  23 +
 .../migrations/0002_magical_wendell_rand.sql  |  10 +
 .../migrations/meta/0002_snapshot.json        | 734 ++++++++++++++++++
 .../migrations/meta/_journal.json             |   7 +
 .../auto-routing-benchmark/src/admin.test.ts  |  18 +-
 .../src/auto-decider-sync.test.ts             | 142 ++++
 .../src/auto-decider-sync.ts                  | 130 ++++
 .../auto-routing-benchmark/src/config.test.ts |  36 +-
 services/auto-routing-benchmark/src/config.ts |  58 +-
 .../auto-routing-benchmark/src/db-schema.ts   |  11 +
 services/auto-routing-benchmark/src/db.ts     |  45 +-
 services/auto-routing-benchmark/src/index.ts  |  11 +
 .../auto-routing-benchmark/wrangler.jsonc     |   1 +
 20 files changed, 1626 insertions(+), 34 deletions(-)
 create mode 100644 apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.test.ts
 create mode 100644 apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.ts
 create mode 100644 apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
 create mode 100644 apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
 create mode 100644 services/auto-routing-benchmark/migrations/0002_magical_wendell_rand.sql
 create mode 100644 services/auto-routing-benchmark/migrations/meta/0002_snapshot.json
 create mode 100644 services/auto-routing-benchmark/src/auto-decider-sync.test.ts
 create mode 100644 services/auto-routing-benchmark/src/auto-decider-sync.ts

diff --git a/apps/web/src/app/admin/auto-routing/BenchmarksSection.test.ts b/apps/web/src/app/admin/auto-routing/BenchmarksSection.test.ts
index 8796256337..059275664d 100644
--- a/apps/web/src/app/admin/auto-routing/BenchmarksSection.test.ts
+++ b/apps/web/src/app/admin/auto-routing/BenchmarksSection.test.ts
@@ -1,9 +1,11 @@
 import { describe, expect, it } from '@jest/globals';
+import type { BenchmarkConfig } from '@kilocode/auto-routing-contracts';
 import React from 'react';
 import { renderToStaticMarkup } from 'react-dom/server';
 import {
   configToFormState,
   costPerAccuracy,
+  effectiveDeciderModels,
   formatCostPerAccuracy,
   formatAccuracy,
   formatUsd,
@@ -121,6 +123,8 @@ describe('configToFormState', () => {
     expect(state.classifierMaxP95LatencyMs).toBe('1000');
     expect(state.classifierModels).toBe('');
     expect(state.deciderModels).toEqual([]);
+    expect(state.autoDeciderModels).toEqual([]);
+    expect(state.excludedAutoDeciderModels).toBe('');
     expect(state.maxConcurrency).toBe(100);
     expect(state.benchmarkUserId).toBe('ce12ef3d-ae95-4d77-b4f0-23735f0a0591');
     expect(state.benchmarkOrgId).toBe('9d278969-5453-4ae3-a51f-a8d2274a7b56');
@@ -128,9 +132,15 @@ describe('configToFormState', () => {
 });
 
 describe('formStateToConfig round-trip', () => {
-  const baseConfig = {
+  const baseConfig: BenchmarkConfig = {
     classifierModels: ['model-a', 'model-b'],
     deciderModels: [{ id: 'model-c', reasoningEffort: null }],
+    manualDeciderModels: [{ id: 'manual-model', reasoningEffort: 'low' }],
+    autoDeciderModels: [
+      { id: 'auto-model', reasoningEffort: null, avgAttemptCostUsd: 21.25 },
+      { id: 'excluded-auto-model', reasoningEffort: 'high', avgAttemptCostUsd: 18 },
+    ],
+    excludedAutoDeciderModels: ['excluded-auto-model'],
     minAccuracy: 0.8,
     switchCostFactor: 3,
     maxConcurrency: 4,
@@ -149,12 +159,21 @@ describe('formStateToConfig round-trip', () => {
     expect(state.deciderRepetitions).toBe(2);
     expect(state.classifierMaxP95LatencyMs).toBe('500');
     expect(state.benchmarkOrgId).toBe('org-123');
+    expect(state.deciderModels).toEqual([{ id: 'manual-model', reasoningEffort: 'low' }]);
+    expect(state.autoDeciderModels).toEqual(baseConfig.autoDeciderModels);
+    expect(state.excludedAutoDeciderModels).toBe('excluded-auto-model');
 
     const result = formStateToConfig(state, baseConfig);
     expect(result.classifierRepetitions).toBe(3);
     expect(result.deciderRepetitions).toBe(2);
     expect(result.classifierMaxP95LatencyMs).toBe(500);
     expect(result.benchmarkOrgId).toBe('org-123');
+    expect(result.manualDeciderModels).toEqual([{ id: 'manual-model', reasoningEffort: 'low' }]);
+    expect(result.excludedAutoDeciderModels).toEqual(['excluded-auto-model']);
+    expect(result.deciderModels).toEqual([
+      { id: 'manual-model', reasoningEffort: 'low' },
+      { id: 'auto-model', reasoningEffort: null },
+    ]);
   });
 
   it('converts empty-string classifierMaxP95LatencyMs form value to null in config', () => {
@@ -164,3 +183,26 @@ describe('formStateToConfig round-trip', () => {
     expect(result.classifierMaxP95LatencyMs).toBeNull();
   });
 });
+
+describe('effectiveDeciderModels', () => {
+  it('combines manual models with non-excluded auto models and lets manual override an auto duplicate', () => {
+    expect(
+      effectiveDeciderModels({
+        manualDeciderModels: [
+          { id: 'manual/model', reasoningEffort: null },
+          { id: 'auto/duplicate', reasoningEffort: 'high' },
+        ],
+        autoDeciderModels: [
+          { id: 'auto/duplicate', reasoningEffort: null, avgAttemptCostUsd: 20 },
+          { id: 'auto/included', reasoningEffort: 'low', avgAttemptCostUsd: 22 },
+          { id: 'auto/excluded', reasoningEffort: null, avgAttemptCostUsd: 23 },
+        ],
+        excludedAutoDeciderModels: ['auto/excluded'],
+      })
+    ).toEqual([
+      { id: 'manual/model', reasoningEffort: null },
+      { id: 'auto/duplicate', reasoningEffort: 'high' },
+      { id: 'auto/included', reasoningEffort: 'low' },
+    ]);
+  });
+});
diff --git a/apps/web/src/app/admin/auto-routing/BenchmarksSection.tsx b/apps/web/src/app/admin/auto-routing/BenchmarksSection.tsx
index 94096fbd1e..bf73a21be8 100644
--- a/apps/web/src/app/admin/auto-routing/BenchmarksSection.tsx
+++ b/apps/web/src/app/admin/auto-routing/BenchmarksSection.tsx
@@ -12,6 +12,7 @@ import {
   type BenchmarkModelSummary,
   type RankedCandidate,
   type ReasoningEffort,
+  type AutoBenchmarkDeciderModel,
 } from '@kilocode/auto-routing-contracts';
 import React, { useCallback, useEffect, useRef, useState } from 'react';
 import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
@@ -119,12 +120,16 @@ type DeciderModelRow = {
   reasoningEffort: ReasoningEffort | null;
 };
 
+type AutoDeciderModelRow = AutoBenchmarkDeciderModel;
+
 const DEFAULT_BENCHMARK_USER_ID = 'ce12ef3d-ae95-4d77-b4f0-23735f0a0591';
 const DEFAULT_BENCHMARK_ORG_ID = '9d278969-5453-4ae3-a51f-a8d2274a7b56';
 
 export function configToFormState(config: BenchmarkConfig | null): {
   classifierModels: string;
   deciderModels: DeciderModelRow[];
+  autoDeciderModels: AutoDeciderModelRow[];
+  excludedAutoDeciderModels: string;
   minAccuracy: number;
   switchCostFactor: number;
   maxConcurrency: number;
@@ -140,6 +145,8 @@ export function configToFormState(config: BenchmarkConfig | null): {
     return {
       classifierModels: '',
       deciderModels: [],
+      autoDeciderModels: [],
+      excludedAutoDeciderModels: '',
       minAccuracy: 0.7,
       switchCostFactor: 3,
       maxConcurrency: 100,
@@ -152,10 +159,12 @@ export function configToFormState(config: BenchmarkConfig | null): {
   }
   return {
     classifierModels: config.classifierModels.join('\n'),
-    deciderModels: config.deciderModels.map(m => ({
+    deciderModels: (config.manualDeciderModels ?? config.deciderModels).map(m => ({
       id: m.id,
       reasoningEffort: m.reasoningEffort ?? null,
     })),
+    autoDeciderModels: config.autoDeciderModels ?? [],
+    excludedAutoDeciderModels: (config.excludedAutoDeciderModels ?? []).join('\n'),
     minAccuracy: config.minAccuracy,
     switchCostFactor: config.switchCostFactor,
     maxConcurrency: config.maxConcurrency,
@@ -168,20 +177,56 @@ export function configToFormState(config: BenchmarkConfig | null): {
   };
 }
 
-export function formStateToConfig(
-  state: ReturnType<typeof configToFormState>,
-  base: BenchmarkConfig | null
-): BenchmarkConfig {
-  const classifierModels = state.classifierModels
+function parseModelLines(value: string): string[] {
+  return value
     .split('\n')
     .map(s => s.trim())
     .filter(s => s.length > 0);
-  const deciderModels = state.deciderModels
+}
+
+export function effectiveDeciderModels({
+  manualDeciderModels,
+  autoDeciderModels,
+  excludedAutoDeciderModels,
+}: {
+  manualDeciderModels: DeciderModelRow[];
+  autoDeciderModels: AutoDeciderModelRow[];
+  excludedAutoDeciderModels: string[];
+}): DeciderModelRow[] {
+  const manual = manualDeciderModels
     .filter(row => row.id.trim().length > 0)
     .map(row => ({
       id: row.id.trim(),
       reasoningEffort: row.reasoningEffort ?? null,
     }));
+  const manualIds = new Set(manual.map(model => model.id));
+  const excludedAuto = new Set(excludedAutoDeciderModels);
+  return [
+    ...manual,
+    ...autoDeciderModels
+      .filter(model => !excludedAuto.has(model.id))
+      .filter(model => !manualIds.has(model.id))
+      .map(model => ({
+        id: model.id,
+        reasoningEffort: model.reasoningEffort ?? null,
+      })),
+  ];
+}
+
+export function formStateToConfig(
+  state: ReturnType<typeof configToFormState>,
+  base: BenchmarkConfig | null
+): BenchmarkConfig {
+  const classifierModels = parseModelLines(state.classifierModels);
+  const excludedAutoDeciderModels = parseModelLines(state.excludedAutoDeciderModels);
+  const manualDeciderModels = state.deciderModels
+    .filter(row => row.id.trim().length > 0)
+    .map(row => ({ id: row.id.trim(), reasoningEffort: row.reasoningEffort ?? null }));
+  const deciderModels = effectiveDeciderModels({
+    manualDeciderModels,
+    autoDeciderModels: state.autoDeciderModels,
+    excludedAutoDeciderModels,
+  });
   const benchmarkUserId = state.benchmarkUserId.trim();
   const benchmarkOrgId = state.benchmarkOrgId.trim();
   const rawLatency = state.classifierMaxP95LatencyMs.trim();
@@ -189,6 +234,9 @@ export function formStateToConfig(
   return {
     classifierModels,
     deciderModels,
+    manualDeciderModels,
+    autoDeciderModels: state.autoDeciderModels,
+    excludedAutoDeciderModels,
     minAccuracy: state.minAccuracy,
     switchCostFactor: state.switchCostFactor,
     maxConcurrency: state.maxConcurrency,
@@ -287,6 +335,24 @@ function BenchmarkConfigEditor({
     [updateForm]
   );
 
+  const handleToggleAutoDeciderModel = useCallback(
+    (modelId: string, included: boolean) => {
+      updateForm(prev => {
+        const excluded = new Set(parseModelLines(prev.excludedAutoDeciderModels));
+        if (included) {
+          excluded.delete(modelId);
+        } else {
+          excluded.add(modelId);
+        }
+        return {
+          ...prev,
+          excludedAutoDeciderModels: [...excluded].sort().join('\n'),
+        };
+      });
+    },
+    [updateForm]
+  );
+
   const handleSave = useCallback(() => {
     saveMutation.mutate(formStateToConfig(form, config));
   }, [form, config, saveMutation]);
@@ -312,9 +378,9 @@ function BenchmarkConfigEditor({
           />
         </div>
 
-        {/* Decider models table */}
+        {/* Manual decider models table */}
         <div className="flex flex-col gap-1.5">
-          <Label className="text-sm font-medium">Decider models</Label>
+          <Label className="text-sm font-medium">Manual decider models</Label>
           <div className="rounded-md border">
             <Table>
               <TableHeader>
@@ -389,6 +455,59 @@ function BenchmarkConfigEditor({
           </Button>
         </div>
 
+        {/* Auto decider models */}
+        <div className="flex flex-col gap-1.5">
+          <div className="flex items-center justify-between gap-3">
+            <Label className="text-sm font-medium">Auto decider models</Label>
+            <Badge variant="secondary">{form.autoDeciderModels.length} synced</Badge>
+          </div>
+          {form.autoDeciderModels.length > 0 ? (
+            <div className="rounded-md border">
+              <Table>
+                <TableHeader>
+                  <TableRow>
+                    <TableHead>Model ID</TableHead>
+                    <TableHead className="w-32">Avg run</TableHead>
+                    <TableHead className="w-36">Reasoning effort</TableHead>
+                    <TableHead className="w-24">Included</TableHead>
+                  </TableRow>
+                </TableHeader>
+                <TableBody>
+                  {form.autoDeciderModels.map(model => {
+                    const excluded = parseModelLines(form.excludedAutoDeciderModels).includes(
+                      model.id
+                    );
+                    return (
+                      <TableRow key={model.id}>
+                        <TableCell className="font-mono text-xs">{model.id}</TableCell>
+                        <TableCell className="tabular-nums">
+                          {formatUsd(model.avgAttemptCostUsd)}
+                        </TableCell>
+                        <TableCell className="text-muted-foreground text-xs">
+                          {model.reasoningEffort ?? 'default'}
+                        </TableCell>
+                        <TableCell>
+                          <Checkbox
+                            checked={!excluded}
+                            onCheckedChange={checked =>
+                              handleToggleAutoDeciderModel(model.id, checked === true)
+                            }
+                            aria-label={`${excluded ? 'Include' : 'Exclude'} ${model.id}`}
+                          />
+                        </TableCell>
+                      </TableRow>
+                    );
+                  })}
+                </TableBody>
+              </Table>
+            </div>
+          ) : (
+            <div className="text-muted-foreground rounded-md border px-3 py-2 text-sm">
+              No auto decider models synced yet.
+            </div>
+          )}
+        </div>
+
         {/* Numeric inputs */}
         <div className="grid gap-4 sm:grid-cols-2">
           <div className="flex flex-col gap-1.5">
diff --git a/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.test.ts b/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.test.ts
new file mode 100644
index 0000000000..2afdd598c3
--- /dev/null
+++ b/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.test.ts
@@ -0,0 +1,50 @@
+import { NextRequest } from 'next/server';
+import { listAutoRoutingDeciderCandidates } from '@/lib/model-stats/auto-routing-decider-candidates';
+
+jest.mock('@/lib/config.server', () => ({
+  INTERNAL_API_SECRET: 'internal-secret',
+}));
+
+jest.mock('@/lib/model-stats/auto-routing-decider-candidates', () => ({
+  AUTO_DECIDER_MIN_COST_USD: 15,
+  AUTO_DECIDER_MAX_COST_USD: 25,
+  listAutoRoutingDeciderCandidates: jest.fn(),
+}));
+
+import { GET } from './route';
+
+const mockListAutoRoutingDeciderCandidates = jest.mocked(listAutoRoutingDeciderCandidates);
+
+function createRequest(headers: Record<string, string> = {}) {
+  return new NextRequest(
+    'http://localhost:3000/api/internal/auto-routing-benchmark/decider-candidates',
+    { headers }
+  );
+}
+
+describe('GET /api/internal/auto-routing-benchmark/decider-candidates', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+    mockListAutoRoutingDeciderCandidates.mockResolvedValue([
+      { id: 'model/a', avgAttemptCostUsd: 20.5 },
+    ]);
+  });
+
+  it('returns 401 without the bearer secret', async () => {
+    const res = await GET(createRequest());
+
+    expect(res.status).toBe(401);
+    expect(mockListAutoRoutingDeciderCandidates).not.toHaveBeenCalled();
+  });
+
+  it('returns synced auto decider candidates for authenticated worker callers', async () => {
+    const res = await GET(createRequest({ authorization: 'Bearer internal-secret' }));
+
+    expect(res.status).toBe(200);
+    await expect(res.json()).resolves.toMatchObject({
+      candidates: [{ id: 'model/a', avgAttemptCostUsd: 20.5 }],
+      minCostUsd: 15,
+      maxCostUsd: 25,
+    });
+  });
+});
diff --git a/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.ts b/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.ts
new file mode 100644
index 0000000000..e7338a8485
--- /dev/null
+++ b/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.ts
@@ -0,0 +1,31 @@
+import type { NextRequest } from 'next/server';
+import { NextResponse } from 'next/server';
+import { timingSafeEqual } from '@kilocode/encryption';
+import {
+  AUTO_DECIDER_MAX_COST_USD,
+  AUTO_DECIDER_MIN_COST_USD,
+  listAutoRoutingDeciderCandidates,
+} from '@/lib/model-stats/auto-routing-decider-candidates';
+import { INTERNAL_API_SECRET } from '@/lib/config.server';
+
+function extractBearerToken(authHeader: string | null): string | null {
+  if (!authHeader) return null;
+  const trimmed = authHeader.trim();
+  if (trimmed.slice(0, 7).toLowerCase() !== 'bearer ') return null;
+  return trimmed.slice(7).trim() || null;
+}
+
+export async function GET(req: NextRequest) {
+  const token = extractBearerToken(req.headers.get('authorization'));
+  if (!INTERNAL_API_SECRET || !token || !timingSafeEqual(token, INTERNAL_API_SECRET)) {
+    return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
+  }
+
+  const candidates = await listAutoRoutingDeciderCandidates();
+  return NextResponse.json({
+    candidates,
+    minCostUsd: AUTO_DECIDER_MIN_COST_USD,
+    maxCostUsd: AUTO_DECIDER_MAX_COST_USD,
+    generatedAt: new Date().toISOString(),
+  });
+}
diff --git a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
new file mode 100644
index 0000000000..4d45fd87e6
--- /dev/null
+++ b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
@@ -0,0 +1,60 @@
+import { describe, expect, it } from '@jest/globals';
+import {
+  AUTO_DECIDER_MAX_COST_USD,
+  AUTO_DECIDER_MIN_COST_USD,
+  summarizeAutoRoutingDeciderCandidates,
+} from './auto-routing-decider-candidates';
+
+function row(
+  openrouterId: string,
+  avgAttemptCostUsd: number,
+  overrides: { active?: boolean } = {}
+) {
+  return {
+    openrouterId,
+    isActive: overrides.active ?? true,
+    benchmarks: {
+      kiloBench: {
+        overallScore: 0.5,
+        evals: {
+          'terminal-bench': {
+            taskSource: 'terminal-bench',
+            overallScore: 0.5,
+            totalScore: 3,
+            avgCostUsd: 1,
+            avgInputTokens: 1,
+            avgOutputTokens: 1,
+            avgCacheReadTokens: 1,
+            avgExecutionMs: 1,
+            nTotalTrials: 6,
+            nAttempts: 6,
+            avgAttemptCostUsd,
+            avgAttemptInputTokens: 1,
+            avgAttemptOutputTokens: 1,
+            avgAttemptCacheReadTokens: 1,
+            nErrored: 0,
+            lastPromotedAt: '2026-06-01T00:00:00.000Z',
+          },
+        },
+      },
+    },
+  };
+}
+
+describe('summarizeAutoRoutingDeciderCandidates', () => {
+  it('keeps active terminal-bench models whose floored average attempt cost is in the auto range', () => {
+    const candidates = summarizeAutoRoutingDeciderCandidates([
+      row('model/too-cheap', AUTO_DECIDER_MIN_COST_USD - 0.01),
+      row('model/minimum', AUTO_DECIDER_MIN_COST_USD),
+      row('model/floored-maximum', AUTO_DECIDER_MAX_COST_USD + 0.99),
+      row('model/too-expensive', AUTO_DECIDER_MAX_COST_USD + 1),
+      row('model/inactive', 20, { active: false }),
+      row('kilo-internal/custom', 20),
+    ]);
+
+    expect(candidates).toEqual([
+      { id: 'model/floored-maximum', avgAttemptCostUsd: 25.99 },
+      { id: 'model/minimum', avgAttemptCostUsd: 15 },
+    ]);
+  });
+});
diff --git a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
new file mode 100644
index 0000000000..0fc4b48b47
--- /dev/null
+++ b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
@@ -0,0 +1,69 @@
+import { CUSTOM_LLM_PREFIX } from '@/lib/ai-gateway/model-utils';
+import { readDb } from '@/lib/drizzle';
+import { ModelStatsBenchmarksSchema, modelStats } from '@kilocode/db/schema';
+import { and, eq, notLike } from 'drizzle-orm';
+
+const TerminalBenchSchema = ModelStatsBenchmarksSchema.unwrap()
+  .pick({ kiloBench: true })
+  .optional();
+
+export const AUTO_DECIDER_MIN_COST_USD = 15;
+export const AUTO_DECIDER_MAX_COST_USD = 25;
+
+export type AutoRoutingDeciderCandidate = {
+  id: string;
+  avgAttemptCostUsd: number;
+};
+
+type Row = {
+  openrouterId: string;
+  isActive: boolean | null;
+  benchmarks: unknown;
+};
+
+function isInAutoCostBand(avgAttemptCostUsd: number): boolean {
+  const floored = Math.floor(avgAttemptCostUsd);
+  return floored >= AUTO_DECIDER_MIN_COST_USD && floored <= AUTO_DECIDER_MAX_COST_USD;
+}
+
+export function summarizeAutoRoutingDeciderCandidates(
+  rows: readonly Row[]
+): AutoRoutingDeciderCandidate[] {
+  const candidates: AutoRoutingDeciderCandidate[] = [];
+
+  for (const row of rows) {
+    if (!row.isActive || row.openrouterId.startsWith(CUSTOM_LLM_PREFIX)) continue;
+    const result = TerminalBenchSchema.safeParse(row.benchmarks);
+    if (!result.success) continue;
+    const bench = result.data?.kiloBench?.evals['terminal-bench'];
+    if (
+      !bench ||
+      (bench.nAttempts ?? 0) < 5 ||
+      bench.avgAttemptCostUsd === null ||
+      bench.avgAttemptCostUsd === undefined ||
+      !isInAutoCostBand(bench.avgAttemptCostUsd)
+    ) {
+      continue;
+    }
+    candidates.push({ id: row.openrouterId, avgAttemptCostUsd: bench.avgAttemptCostUsd });
+  }
+
+  return candidates.sort((left, right) => {
+    const costDelta = right.avgAttemptCostUsd - left.avgAttemptCostUsd;
+    return costDelta === 0 ? left.id.localeCompare(right.id) : costDelta;
+  });
+}
+
+export async function listAutoRoutingDeciderCandidates(): Promise<AutoRoutingDeciderCandidate[]> {
+  const rows = await readDb
+    .select({
+      openrouterId: modelStats.openrouterId,
+      isActive: modelStats.isActive,
+      benchmarks: modelStats.benchmarks,
+    })
+    .from(modelStats)
+    .where(
+      and(eq(modelStats.isActive, true), notLike(modelStats.openrouterId, `${CUSTOM_LLM_PREFIX}%`))
+    );
+  return summarizeAutoRoutingDeciderCandidates(rows);
+}
diff --git a/packages/auto-routing-contracts/src/benchmark.ts b/packages/auto-routing-contracts/src/benchmark.ts
index 57fb7b11e6..76e12fc663 100644
--- a/packages/auto-routing-contracts/src/benchmark.ts
+++ b/packages/auto-routing-contracts/src/benchmark.ts
@@ -18,6 +18,11 @@ export const BenchmarkDeciderModelSchema = z.object({
 });
 export type BenchmarkDeciderModel = z.infer<typeof BenchmarkDeciderModelSchema>;
 
+export const AutoBenchmarkDeciderModelSchema = BenchmarkDeciderModelSchema.extend({
+  avgAttemptCostUsd: z.number().nonnegative(),
+});
+export type AutoBenchmarkDeciderModel = z.infer<typeof AutoBenchmarkDeciderModelSchema>;
+
 // Flags each list entry whose (trimmed) id already appeared earlier in the
 // array. Model ids are the D1 primary keys for config_classifier_models /
 // config_decider_models, so duplicates would otherwise reach the DB as an
@@ -40,6 +45,14 @@ export const BenchmarkConfigSchema = z
   .object({
     classifierModels: z.array(z.string().trim().min(1)).min(1),
     deciderModels: z.array(BenchmarkDeciderModelSchema).min(1),
+    // Manual additions are operator-pinned decider candidates. When omitted by
+    // older clients, the worker treats deciderModels as the manual list.
+    manualDeciderModels: z.array(BenchmarkDeciderModelSchema).optional(),
+    // Auto additions are refreshed from Kilo Bench cost data by the benchmark
+    // worker's scheduled sync. The effective deciderModels list is manual +
+    // non-excluded auto models.
+    autoDeciderModels: z.array(AutoBenchmarkDeciderModelSchema).optional(),
+    excludedAutoDeciderModels: z.array(z.string().trim().min(1)).optional(),
     // Accuracy threshold for "gets the job done" (per taxonomy route).
     minAccuracy: z.number().min(0).max(1),
     // Benchmark-wide parallelism budget. Decider runs use it as a live
@@ -77,9 +90,39 @@ export const BenchmarkConfigSchema = z
       'deciderModels',
       ctx
     );
+    addDuplicateModelIssues(
+      (config.manualDeciderModels ?? []).map(m => m.id),
+      'manualDeciderModels',
+      ctx
+    );
+    addDuplicateModelIssues(
+      (config.autoDeciderModels ?? []).map(m => m.id),
+      'autoDeciderModels',
+      ctx
+    );
+    addDuplicateModelIssues(
+      config.excludedAutoDeciderModels ?? [],
+      'excludedAutoDeciderModels',
+      ctx
+    );
   });
 export type BenchmarkConfig = z.infer<typeof BenchmarkConfigSchema>;
 
+export const AutoBenchmarkDeciderCandidatesResponseSchema = z.object({
+  candidates: z.array(
+    z.object({
+      id: z.string().trim().min(1),
+      avgAttemptCostUsd: z.number().nonnegative(),
+    })
+  ),
+  minCostUsd: z.number().nonnegative().optional(),
+  maxCostUsd: z.number().nonnegative().optional(),
+  generatedAt: z.string().optional(),
+});
+export type AutoBenchmarkDeciderCandidatesResponse = z.infer<
+  typeof AutoBenchmarkDeciderCandidatesResponseSchema
+>;
+
 export const BenchmarkRunStatusSchema = z.enum(['running', 'completed', 'failed']);
 export type BenchmarkRunStatus = z.infer<typeof BenchmarkRunStatusSchema>;
 
diff --git a/packages/auto-routing-contracts/src/contracts.test.ts b/packages/auto-routing-contracts/src/contracts.test.ts
index 42e0379fc6..292fbdadad 100644
--- a/packages/auto-routing-contracts/src/contracts.test.ts
+++ b/packages/auto-routing-contracts/src/contracts.test.ts
@@ -163,6 +163,29 @@ describe('BenchmarkConfigSchema defaults', () => {
     });
     expect(result.success).toBe(true);
   });
+
+  it('accepts explicit manual and excluded auto decider model lists', () => {
+    const result = BenchmarkConfigSchema.parse({
+      classifierModels: ['model/a'],
+      deciderModels: [{ id: 'model/b' }],
+      manualDeciderModels: [{ id: 'model/c', reasoningEffort: 'high' }],
+      autoDeciderModels: [{ id: 'model/b', reasoningEffort: null, avgAttemptCostUsd: 21.1 }],
+      excludedAutoDeciderModels: ['model/d'],
+      minAccuracy: 0.7,
+      switchCostFactor: 3,
+      maxConcurrency: 10,
+      benchmarkUserId: null,
+      benchmarkOrgId: null,
+      updatedAt: null,
+      updatedBy: null,
+    });
+
+    expect(result.manualDeciderModels).toEqual([{ id: 'model/c', reasoningEffort: 'high' }]);
+    expect(result.autoDeciderModels).toEqual([
+      { id: 'model/b', reasoningEffort: null, avgAttemptCostUsd: 21.1 },
+    ]);
+    expect(result.excludedAutoDeciderModels).toEqual(['model/d']);
+  });
 });
 
 describe('BenchmarkConfigSchema duplicate model ids', () => {
diff --git a/services/auto-routing-benchmark/migrations/0002_magical_wendell_rand.sql b/services/auto-routing-benchmark/migrations/0002_magical_wendell_rand.sql
new file mode 100644
index 0000000000..31cc8b7a7c
--- /dev/null
+++ b/services/auto-routing-benchmark/migrations/0002_magical_wendell_rand.sql
@@ -0,0 +1,10 @@
+CREATE TABLE `config_auto_decider_exclusions` (
+	`model` text PRIMARY KEY NOT NULL
+);
+--> statement-breakpoint
+CREATE TABLE `config_auto_decider_models` (
+	`model` text PRIMARY KEY NOT NULL,
+	`reasoning_effort` text,
+	`avg_attempt_cost_usd` real NOT NULL,
+	`synced_at` text NOT NULL
+);
diff --git a/services/auto-routing-benchmark/migrations/meta/0002_snapshot.json b/services/auto-routing-benchmark/migrations/meta/0002_snapshot.json
new file mode 100644
index 0000000000..7156487709
--- /dev/null
+++ b/services/auto-routing-benchmark/migrations/meta/0002_snapshot.json
@@ -0,0 +1,734 @@
+{
+  "version": "6",
+  "dialect": "sqlite",
+  "id": "3c258229-2360-4f73-bc7e-807239a3336d",
+  "prevId": "b717d9d9-78c9-43eb-99fa-b0a1db80b78e",
+  "tables": {
+    "benchmark_config": {
+      "name": "benchmark_config",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "min_accuracy": {
+          "name": "min_accuracy",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "switch_cost_factor": {
+          "name": "switch_cost_factor",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "max_concurrency": {
+          "name": "max_concurrency",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "benchmark_user_id": {
+          "name": "benchmark_user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "benchmark_org_id": {
+          "name": "benchmark_org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "classifier_repetitions": {
+          "name": "classifier_repetitions",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 1
+        },
+        "decider_repetitions": {
+          "name": "decider_repetitions",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 1
+        },
+        "classifier_max_p95_latency_ms": {
+          "name": "classifier_max_p95_latency_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "updated_by": {
+          "name": "updated_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "benchmark_runs": {
+      "name": "benchmark_runs",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "kind": {
+          "name": "kind",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "status": {
+          "name": "status",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "started_at": {
+          "name": "started_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "error": {
+          "name": "error",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "min_accuracy": {
+          "name": "min_accuracy",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "switch_cost_factor": {
+          "name": "switch_cost_factor",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "max_concurrency": {
+          "name": "max_concurrency",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "benchmark_user_id": {
+          "name": "benchmark_user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "benchmark_org_id": {
+          "name": "benchmark_org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "repetitions": {
+          "name": "repetitions",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 1
+        },
+        "classifier_max_p95_latency_ms": {
+          "name": "classifier_max_p95_latency_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "engine_identity": {
+          "name": "engine_identity",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "''"
+        }
+      },
+      "indexes": {
+        "UQ_benchmark_runs_one_running_per_kind": {
+          "name": "UQ_benchmark_runs_one_running_per_kind",
+          "columns": [
+            "kind"
+          ],
+          "isUnique": true,
+          "where": "\"benchmark_runs\".\"status\" = 'running'"
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "case_results": {
+      "name": "case_results",
+      "columns": {
+        "run_id": {
+          "name": "run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "case_id": {
+          "name": "case_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "route_key": {
+          "name": "route_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "score": {
+          "name": "score",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "latency_ms": {
+          "name": "latency_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "cost_usd": {
+          "name": "cost_usd",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "error": {
+          "name": "error",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "fallback_reason": {
+          "name": "fallback_reason",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "retried": {
+          "name": "retried",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "exit_code": {
+          "name": "exit_code",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "output_prefix": {
+          "name": "output_prefix",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "event_count": {
+          "name": "event_count",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "last_event_types": {
+          "name": "last_event_types",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "rep": {
+          "name": "rep",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 0
+        },
+        "timed_out": {
+          "name": "timed_out",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 0
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {
+        "case_results_run_id_model_case_id_rep_pk": {
+          "columns": [
+            "run_id",
+            "model",
+            "case_id",
+            "rep"
+          ],
+          "name": "case_results_run_id_model_case_id_rep_pk"
+        }
+      },
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "config_auto_decider_exclusions": {
+      "name": "config_auto_decider_exclusions",
+      "columns": {
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "config_auto_decider_models": {
+      "name": "config_auto_decider_models",
+      "columns": {
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "reasoning_effort": {
+          "name": "reasoning_effort",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "avg_attempt_cost_usd": {
+          "name": "avg_attempt_cost_usd",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "synced_at": {
+          "name": "synced_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "config_classifier_models": {
+      "name": "config_classifier_models",
+      "columns": {
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "config_decider_models": {
+      "name": "config_decider_models",
+      "columns": {
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "reasoning_effort": {
+          "name": "reasoning_effort",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "model_summaries": {
+      "name": "model_summaries",
+      "columns": {
+        "run_id": {
+          "name": "run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "route_key": {
+          "name": "route_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "accuracy": {
+          "name": "accuracy",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "avg_cost_usd": {
+          "name": "avg_cost_usd",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "avg_latency_ms": {
+          "name": "avg_latency_ms",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "p50_latency_ms": {
+          "name": "p50_latency_ms",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "cases": {
+          "name": "cases",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "errors": {
+          "name": "errors",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "p95_latency_ms": {
+          "name": "p95_latency_ms",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "timeouts": {
+          "name": "timeouts",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 0
+        },
+        "carried": {
+          "name": "carried",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {
+        "model_summaries_run_id_model_route_key_pk": {
+          "columns": [
+            "run_id",
+            "model",
+            "route_key"
+          ],
+          "name": "model_summaries_run_id_model_route_key_pk"
+        }
+      },
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "routing_table_candidates": {
+      "name": "routing_table_candidates",
+      "columns": {
+        "run_id": {
+          "name": "run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "route_key": {
+          "name": "route_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "rank": {
+          "name": "rank",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "accuracy": {
+          "name": "accuracy",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "avg_cost_usd": {
+          "name": "avg_cost_usd",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "meets_threshold": {
+          "name": "meets_threshold",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "reasoning_effort": {
+          "name": "reasoning_effort",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {
+        "routing_table_candidates_run_id_route_key_rank_pk": {
+          "columns": [
+            "run_id",
+            "route_key",
+            "rank"
+          ],
+          "name": "routing_table_candidates_run_id_route_key_rank_pk"
+        }
+      },
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "routing_tables": {
+      "name": "routing_tables",
+      "columns": {
+        "run_id": {
+          "name": "run_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "published_at": {
+          "name": "published_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "generated_at": {
+          "name": "generated_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "min_accuracy": {
+          "name": "min_accuracy",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "switch_cost_factor": {
+          "name": "switch_cost_factor",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "source": {
+          "name": "source",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "run_models": {
+      "name": "run_models",
+      "columns": {
+        "run_id": {
+          "name": "run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "enqueued": {
+          "name": "enqueued",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "reasoning_effort": {
+          "name": "reasoning_effort",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {
+        "run_models_run_id_model_pk": {
+          "columns": [
+            "run_id",
+            "model"
+          ],
+          "name": "run_models_run_id_model_pk"
+        }
+      },
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    }
+  },
+  "views": {},
+  "enums": {},
+  "_meta": {
+    "schemas": {},
+    "tables": {},
+    "columns": {}
+  },
+  "internal": {
+    "indexes": {}
+  }
+}
\ No newline at end of file
diff --git a/services/auto-routing-benchmark/migrations/meta/_journal.json b/services/auto-routing-benchmark/migrations/meta/_journal.json
index da28b77f72..d6cb5b17a7 100644
--- a/services/auto-routing-benchmark/migrations/meta/_journal.json
+++ b/services/auto-routing-benchmark/migrations/meta/_journal.json
@@ -15,6 +15,13 @@
       "when": 1781696079415,
       "tag": "0001_special_yellow_claw",
       "breakpoints": true
+    },
+    {
+      "idx": 2,
+      "version": "6",
+      "when": 1781713850969,
+      "tag": "0002_magical_wendell_rand",
+      "breakpoints": true
     }
   ]
 }
\ No newline at end of file
diff --git a/services/auto-routing-benchmark/src/admin.test.ts b/services/auto-routing-benchmark/src/admin.test.ts
index 162d727b05..2c34b0bcec 100644
--- a/services/auto-routing-benchmark/src/admin.test.ts
+++ b/services/auto-routing-benchmark/src/admin.test.ts
@@ -62,6 +62,8 @@ const TEST_CONFIG_ROWS = {
     model: m.id,
     reasoning_effort: m.reasoningEffort ?? null,
   })),
+  autoDeciderModels: [],
+  excludedAutoDeciderModels: [],
 };
 
 // ---------------------------------------------------------------------------
@@ -153,6 +155,8 @@ beforeEach(() => {
     config: null,
     classifierModels: [],
     deciderModels: [],
+    autoDeciderModels: [],
+    excludedAutoDeciderModels: [],
   });
   vi.mocked(replaceConfig).mockResolvedValue(undefined);
   vi.mocked(listRuns).mockResolvedValue([]);
@@ -219,6 +223,8 @@ describe('GET /admin/config', () => {
       },
       classifierModels,
       deciderModels,
+      autoDeciderModels: [],
+      excludedAutoDeciderModels: [],
     });
 
     const res = await authedGet('/admin/config');
@@ -278,6 +284,13 @@ describe('PUT /admin/config', () => {
     const validConfig = {
       ...TEST_CONFIG,
       minAccuracy: 0.85,
+      deciderModels: [
+        { id: 'manual/model', reasoningEffort: 'low' },
+        { id: 'auto/model', reasoningEffort: null },
+      ],
+      manualDeciderModels: [{ id: 'manual/model', reasoningEffort: 'low' }],
+      autoDeciderModels: [{ id: 'auto/model', reasoningEffort: null, avgAttemptCostUsd: 20 }],
+      excludedAutoDeciderModels: ['auto/excluded'],
       updatedAt: null,
       updatedBy: null,
     };
@@ -295,10 +308,13 @@ describe('PUT /admin/config', () => {
     expect(typeof body.config.updatedAt).toBe('string');
 
     expect(replaceConfig).toHaveBeenCalledOnce();
-    const [, configArg] = vi.mocked(replaceConfig).mock.calls[0];
+    const [, configArg, , deciderModelRows, excludedAutoDeciderModels] =
+      vi.mocked(replaceConfig).mock.calls[0];
     expect(configArg.min_accuracy).toBe(0.85);
     expect(typeof configArg.updated_at).toBe('string');
     expect(configArg.updated_by).toBe('igor@kilocode.ai');
+    expect(deciderModelRows).toEqual([{ model: 'manual/model', reasoning_effort: 'low' }]);
+    expect(excludedAutoDeciderModels).toEqual(['auto/excluded']);
   });
 });
 
diff --git a/services/auto-routing-benchmark/src/auto-decider-sync.test.ts b/services/auto-routing-benchmark/src/auto-decider-sync.test.ts
new file mode 100644
index 0000000000..cf7e64e33d
--- /dev/null
+++ b/services/auto-routing-benchmark/src/auto-decider-sync.test.ts
@@ -0,0 +1,142 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import type * as DbModule from './db';
+import { syncAutoDeciderModels } from './auto-decider-sync';
+
+vi.mock('./db', async importOriginal => {
+  const actual = await importOriginal<typeof DbModule>();
+  return {
+    ...actual,
+    getConfigRows: vi.fn(),
+    replaceAutoDeciderModels: vi.fn(),
+    getRunningRun: vi.fn(),
+    getLatestSummariesByModel: vi.fn(),
+    insertRun: vi.fn(),
+    markStaleRunsFailed: vi.fn(),
+  };
+});
+
+import {
+  getConfigRows,
+  getLatestSummariesByModel,
+  getRunningRun,
+  insertRun,
+  markStaleRunsFailed,
+  replaceAutoDeciderModels,
+} from './db';
+
+const tokenGet = vi.fn<() => Promise<string>>();
+const queueSendBatch = vi.fn();
+const fetchImpl = vi.fn<typeof fetch>();
+
+const env = {
+  INTERNAL_API_SECRET_PROD: { get: tokenGet },
+  BENCH_DB: {} as D1Database,
+  BENCH_QUEUE: { sendBatch: queueSendBatch },
+  AUTO_ROUTING_CONFIG: { delete: vi.fn() },
+  KILO_WEB_API_BASE_URL: 'https://app.test',
+  KILO_CLI_API_URL: 'https://api.test',
+} as unknown as Env;
+
+const config = {
+  id: 1 as const,
+  min_accuracy: 0.7,
+  switch_cost_factor: 3,
+  max_concurrency: 100,
+  benchmark_user_id: 'user-123',
+  benchmark_org_id: null,
+  classifier_repetitions: 1,
+  decider_repetitions: 1,
+  classifier_max_p95_latency_ms: 1000,
+  updated_at: '2026-06-01T00:00:00.000Z',
+  updated_by: null,
+};
+
+describe('syncAutoDeciderModels', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    tokenGet.mockResolvedValue('secret');
+    fetchImpl.mockResolvedValue(
+      new Response(
+        JSON.stringify({
+          candidates: [
+            { id: 'auto/existing', avgAttemptCostUsd: 18 },
+            { id: 'auto/new', avgAttemptCostUsd: 21.75 },
+          ],
+        }),
+        { status: 200, headers: { 'content-type': 'application/json' } }
+      )
+    );
+    vi.mocked(getConfigRows).mockResolvedValue({
+      config,
+      classifierModels: ['classifier/model'],
+      deciderModels: [{ model: 'manual/model', reasoning_effort: null }],
+      autoDeciderModels: [
+        {
+          model: 'auto/existing',
+          reasoning_effort: 'high',
+          avg_attempt_cost_usd: 18,
+          synced_at: '2026-06-01T00:00:00.000Z',
+        },
+      ],
+      excludedAutoDeciderModels: [],
+    });
+    vi.mocked(replaceAutoDeciderModels).mockResolvedValue(undefined);
+    vi.mocked(markStaleRunsFailed).mockResolvedValue(undefined);
+    vi.mocked(getRunningRun).mockResolvedValue(undefined);
+    vi.mocked(getLatestSummariesByModel).mockResolvedValue(new Map());
+    vi.mocked(insertRun).mockResolvedValue(undefined);
+    queueSendBatch.mockResolvedValue(undefined);
+  });
+
+  it('persists auto candidates, preserves existing reasoning effort, and starts a decider run for new effective models', async () => {
+    const result = await syncAutoDeciderModels(env, { fetchImpl });
+
+    expect(fetchImpl).toHaveBeenCalledWith(
+      'https://app.test/api/internal/auto-routing-benchmark/decider-candidates',
+      expect.objectContaining({
+        headers: expect.objectContaining({ authorization: 'Bearer secret' }),
+      })
+    );
+    expect(replaceAutoDeciderModels).toHaveBeenCalledWith(env.BENCH_DB, [
+      expect.objectContaining({ model: 'auto/existing', reasoning_effort: 'high' }),
+      expect.objectContaining({ model: 'auto/new', reasoning_effort: null }),
+    ]);
+    expect(insertRun).toHaveBeenCalledOnce();
+    expect(result).toMatchObject({
+      addedModels: ['auto/new'],
+      removedModels: [],
+      startedRun: true,
+    });
+  });
+
+  it('does not fail the sync when a decider run is already active', async () => {
+    vi.mocked(getRunningRun).mockResolvedValue({
+      id: 'decider-active',
+      kind: 'decider',
+      status: 'running',
+      started_at: '2026-06-01T00:00:00.000Z',
+      completed_at: null,
+      error: null,
+      min_accuracy: 0.7,
+      switch_cost_factor: 3,
+      max_concurrency: 100,
+      benchmark_user_id: 'user-123',
+      benchmark_org_id: null,
+      repetitions: 1,
+      classifier_max_p95_latency_ms: null,
+      engine_identity: 'v1:test',
+    });
+
+    const result = await syncAutoDeciderModels(env, { fetchImpl });
+
+    expect(result).toMatchObject({
+      addedModels: ['auto/new'],
+      removedModels: [],
+      startedRun: false,
+      runId: null,
+      skippedReason: 'active-run',
+      activeRunId: 'decider-active',
+    });
+    expect(insertRun).not.toHaveBeenCalled();
+  });
+});
diff --git a/services/auto-routing-benchmark/src/auto-decider-sync.ts b/services/auto-routing-benchmark/src/auto-decider-sync.ts
new file mode 100644
index 0000000000..5880bc39c8
--- /dev/null
+++ b/services/auto-routing-benchmark/src/auto-decider-sync.ts
@@ -0,0 +1,130 @@
+import {
+  AutoBenchmarkDeciderCandidatesResponseSchema,
+  type BenchmarkDeciderModel,
+} from '@kilocode/auto-routing-contracts';
+import { getBenchmarkConfig, mapConfigRows } from './config';
+import { getConfigRows, replaceAutoDeciderModels, type ConfigAutoDeciderModelRow } from './db';
+import { RunAlreadyActiveError, startRun } from './run';
+
+type SyncOptions = {
+  fetchImpl?: typeof fetch;
+  now?: Date;
+};
+
+export type AutoDeciderSyncResult = {
+  addedModels: string[];
+  removedModels: string[];
+  startedRun: boolean;
+  runId: string | null;
+  skippedReason?: 'active-run';
+  activeRunId?: string;
+};
+
+function modelKey(model: BenchmarkDeciderModel): string {
+  return `${model.id}\0${model.reasoningEffort ?? ''}`;
+}
+
+function diffModels(
+  before: readonly BenchmarkDeciderModel[],
+  after: readonly BenchmarkDeciderModel[]
+): { added: string[]; removed: string[] } {
+  const beforeKeys = new Set(before.map(modelKey));
+  const afterKeys = new Set(after.map(modelKey));
+  return {
+    added: after.filter(model => !beforeKeys.has(modelKey(model))).map(model => model.id),
+    removed: before.filter(model => !afterKeys.has(modelKey(model))).map(model => model.id),
+  };
+}
+
+async function fetchAutoDeciderCandidates(
+  env: Env,
+  fetchImpl: typeof fetch
+): Promise<{ id: string; avgAttemptCostUsd: number }[]> {
+  const secret = await env.INTERNAL_API_SECRET_PROD.get();
+  const response = await fetchImpl(
+    `${env.KILO_WEB_API_BASE_URL}/api/internal/auto-routing-benchmark/decider-candidates`,
+    {
+      headers: {
+        authorization: `Bearer ${secret}`,
+      },
+    }
+  );
+  if (!response.ok) {
+    const detail = (await response.text().catch(() => '')).slice(0, 200);
+    throw new Error(`auto decider candidate sync failed: HTTP ${response.status} ${detail}`);
+  }
+  const parsed = AutoBenchmarkDeciderCandidatesResponseSchema.safeParse(await response.json());
+  if (!parsed.success) throw new Error('auto decider candidate sync returned unexpected response');
+  return parsed.data.candidates;
+}
+
+export async function syncAutoDeciderModels(
+  env: Env,
+  options: SyncOptions = {}
+): Promise<AutoDeciderSyncResult> {
+  const fetchImpl = options.fetchImpl ?? fetch;
+  const syncedAt = (options.now ?? new Date()).toISOString();
+  const beforeRows = await getConfigRows(env.BENCH_DB);
+  const beforeConfig = mapConfigRows(
+    beforeRows.config,
+    beforeRows.classifierModels,
+    beforeRows.deciderModels,
+    beforeRows.autoDeciderModels,
+    beforeRows.excludedAutoDeciderModels
+  );
+
+  const candidates = await fetchAutoDeciderCandidates(env, fetchImpl);
+  const previousReasoningEffort = new Map<string, string | null>();
+  for (const row of beforeRows.autoDeciderModels) {
+    previousReasoningEffort.set(row.model, row.reasoning_effort);
+  }
+  for (const row of beforeRows.deciderModels) {
+    previousReasoningEffort.set(row.model, row.reasoning_effort);
+  }
+
+  const nextAutoRows: ConfigAutoDeciderModelRow[] = candidates.map(candidate => ({
+    model: candidate.id,
+    reasoning_effort: previousReasoningEffort.get(candidate.id) ?? null,
+    avg_attempt_cost_usd: candidate.avgAttemptCostUsd,
+    synced_at: syncedAt,
+  }));
+
+  await replaceAutoDeciderModels(env.BENCH_DB, nextAutoRows);
+
+  const afterConfig = mapConfigRows(
+    beforeRows.config,
+    beforeRows.classifierModels,
+    beforeRows.deciderModels,
+    nextAutoRows,
+    beforeRows.excludedAutoDeciderModels
+  );
+  const diff = diffModels(beforeConfig?.deciderModels ?? [], afterConfig?.deciderModels ?? []);
+  const changed = diff.added.length > 0 || diff.removed.length > 0;
+
+  if (!changed || !(await getBenchmarkConfig(env.BENCH_DB))) {
+    return { addedModels: diff.added, removedModels: diff.removed, startedRun: false, runId: null };
+  }
+
+  let run: Awaited<ReturnType<typeof startRun>>;
+  try {
+    run = await startRun(env, 'decider');
+  } catch (error) {
+    if (error instanceof RunAlreadyActiveError) {
+      return {
+        addedModels: diff.added,
+        removedModels: diff.removed,
+        startedRun: false,
+        runId: null,
+        skippedReason: 'active-run',
+        activeRunId: error.activeRunId,
+      };
+    }
+    throw error;
+  }
+  return {
+    addedModels: diff.added,
+    removedModels: diff.removed,
+    startedRun: true,
+    runId: run.runId,
+  };
+}
diff --git a/services/auto-routing-benchmark/src/config.test.ts b/services/auto-routing-benchmark/src/config.test.ts
index 3e80a08259..55566dc7a0 100644
--- a/services/auto-routing-benchmark/src/config.test.ts
+++ b/services/auto-routing-benchmark/src/config.test.ts
@@ -23,23 +23,32 @@ const deciderRows: ConfigDeciderModelRow[] = [
   },
 ];
 
+const autoRows = [
+  {
+    model: 'auto/model',
+    reasoning_effort: null,
+    avg_attempt_cost_usd: 19.75,
+    synced_at: '2026-06-01T01:00:00.000Z',
+  },
+];
+
 describe('mapConfigRows', () => {
   it('returns null when config row is null', () => {
-    expect(mapConfigRows(null, ['some/model'], deciderRows)).toBeNull();
+    expect(mapConfigRows(null, ['some/model'], deciderRows, autoRows, [])).toBeNull();
   });
 
   it('returns null when classifierModels array is empty', () => {
-    expect(mapConfigRows(configRow, [], deciderRows)).toBeNull();
+    expect(mapConfigRows(configRow, [], deciderRows, autoRows, [])).toBeNull();
   });
 
   it('returns null when deciderModels array is empty', () => {
-    expect(mapConfigRows(configRow, ['some/model'], [])).toBeNull();
+    expect(mapConfigRows(configRow, ['some/model'], [], [], [])).toBeNull();
   });
 
   it('maps a full config row set to BenchmarkConfig', () => {
     const classifierModels = ['some/model-a', 'some/model-b'];
 
-    const result = mapConfigRows(configRow, classifierModels, deciderRows);
+    const result = mapConfigRows(configRow, classifierModels, deciderRows, autoRows, []);
 
     expect(result).not.toBeNull();
     expect(result?.minAccuracy).toBe(0.85);
@@ -50,11 +59,28 @@ describe('mapConfigRows', () => {
     expect(result?.updatedAt).toBe('2026-06-01T00:00:00.000Z');
     expect(result?.updatedBy).toBe('admin@example.com');
     expect(result?.classifierModels).toEqual(classifierModels);
-    expect(result?.deciderModels).toHaveLength(1);
+    expect(result?.deciderModels).toHaveLength(2);
     expect(result?.deciderModels[0].id).toBe('some/decider');
     expect(result?.deciderModels[0].reasoningEffort).toBe('high');
+    expect(result?.manualDeciderModels).toEqual([{ id: 'some/decider', reasoningEffort: 'high' }]);
+    expect(result?.autoDeciderModels).toEqual([
+      { id: 'auto/model', reasoningEffort: null, avgAttemptCostUsd: 19.75 },
+    ]);
     expect(result?.classifierRepetitions).toBe(1);
     expect(result?.deciderRepetitions).toBe(1);
     expect(result?.classifierMaxP95LatencyMs).toBeNull();
   });
+
+  it('excludes only auto decider models, leaving a manual model with the same id included', () => {
+    const result = mapConfigRows(
+      configRow,
+      ['some/model'],
+      [{ model: 'auto/model', reasoning_effort: 'medium' }],
+      autoRows,
+      ['auto/model']
+    );
+
+    expect(result?.deciderModels).toEqual([{ id: 'auto/model', reasoningEffort: 'medium' }]);
+    expect(result?.excludedAutoDeciderModels).toEqual(['auto/model']);
+  });
 });
diff --git a/services/auto-routing-benchmark/src/config.ts b/services/auto-routing-benchmark/src/config.ts
index e4091ac0dd..7e99fcd47b 100644
--- a/services/auto-routing-benchmark/src/config.ts
+++ b/services/auto-routing-benchmark/src/config.ts
@@ -1,5 +1,10 @@
 import type { BenchmarkConfig } from '@kilocode/auto-routing-contracts';
-import { getConfigRows, replaceConfig, type ConfigDeciderModelRow } from './db';
+import {
+  getConfigRows,
+  replaceConfig,
+  type ConfigAutoDeciderModelRow,
+  type ConfigDeciderModelRow,
+} from './db';
 
 // Maps the three normalized config tables to the BenchmarkConfig contract.
 // Null when no admin has saved a config yet — the worker never fabricates
@@ -18,19 +23,39 @@ export function mapConfigRows(
     updated_by: string | null;
   } | null,
   classifierModels: string[],
-  deciderModelRows: ConfigDeciderModelRow[]
+  deciderModelRows: ConfigDeciderModelRow[],
+  autoDeciderModelRows: ConfigAutoDeciderModelRow[] = [],
+  excludedAutoDeciderModels: string[] = []
 ): BenchmarkConfig | null {
-  if (configRow === null || classifierModels.length === 0 || deciderModelRows.length === 0) {
+  const excludedAuto = new Set(excludedAutoDeciderModels);
+  const manualDeciderModels = deciderModelRows.map(r => ({
+    id: r.model,
+    reasoningEffort:
+      r.reasoning_effort as BenchmarkConfig['deciderModels'][number]['reasoningEffort'],
+  }));
+  const manualIds = new Set(manualDeciderModels.map(model => model.id));
+  const autoDeciderModels = autoDeciderModelRows.map(r => ({
+    id: r.model,
+    reasoningEffort:
+      r.reasoning_effort as BenchmarkConfig['deciderModels'][number]['reasoningEffort'],
+    avgAttemptCostUsd: r.avg_attempt_cost_usd,
+  }));
+  const effectiveAutoDeciderModels = autoDeciderModels
+    .filter(model => !excludedAuto.has(model.id))
+    .filter(model => !manualIds.has(model.id))
+    .map(model => ({ id: model.id, reasoningEffort: model.reasoningEffort }));
+  const deciderModels = [...manualDeciderModels, ...effectiveAutoDeciderModels];
+
+  if (configRow === null || classifierModels.length === 0 || deciderModels.length === 0) {
     return null;
   }
 
   return {
     classifierModels,
-    deciderModels: deciderModelRows.map(r => ({
-      id: r.model,
-      reasoningEffort:
-        r.reasoning_effort as BenchmarkConfig['deciderModels'][number]['reasoningEffort'],
-    })),
+    deciderModels,
+    manualDeciderModels,
+    autoDeciderModels,
+    excludedAutoDeciderModels,
     minAccuracy: configRow.min_accuracy,
     switchCostFactor: configRow.switch_cost_factor,
     maxConcurrency: configRow.max_concurrency,
@@ -45,8 +70,15 @@ export function mapConfigRows(
 }
 
 export async function getBenchmarkConfig(db: D1Database): Promise<BenchmarkConfig | null> {
-  const { config, classifierModels, deciderModels } = await getConfigRows(db);
-  return mapConfigRows(config, classifierModels, deciderModels);
+  const { config, classifierModels, deciderModels, autoDeciderModels, excludedAutoDeciderModels } =
+    await getConfigRows(db);
+  return mapConfigRows(
+    config,
+    classifierModels,
+    deciderModels,
+    autoDeciderModels,
+    excludedAutoDeciderModels
+  );
 }
 
 export async function saveBenchmarkConfig(
@@ -57,7 +89,8 @@ export async function saveBenchmarkConfig(
   const updatedAt = new Date().toISOString();
   const stamped: BenchmarkConfig = { ...config, updatedAt, updatedBy };
 
-  const deciderModelRows: ConfigDeciderModelRow[] = config.deciderModels.map(m => ({
+  const manualDeciderModels = config.manualDeciderModels ?? config.deciderModels;
+  const deciderModelRows: ConfigDeciderModelRow[] = manualDeciderModels.map(m => ({
     model: m.id,
     reasoning_effort: m.reasoningEffort ?? null,
   }));
@@ -77,7 +110,8 @@ export async function saveBenchmarkConfig(
       updated_by: updatedBy,
     },
     config.classifierModels,
-    deciderModelRows
+    deciderModelRows,
+    config.excludedAutoDeciderModels ?? []
   );
 
   return stamped;
diff --git a/services/auto-routing-benchmark/src/db-schema.ts b/services/auto-routing-benchmark/src/db-schema.ts
index 6b400cefa3..b191bff2e0 100644
--- a/services/auto-routing-benchmark/src/db-schema.ts
+++ b/services/auto-routing-benchmark/src/db-schema.ts
@@ -28,6 +28,17 @@ export const configDeciderModels = sqliteTable('config_decider_models', {
   reasoning_effort: text('reasoning_effort'),
 });
 
+export const configAutoDeciderModels = sqliteTable('config_auto_decider_models', {
+  model: text('model').primaryKey(),
+  reasoning_effort: text('reasoning_effort'),
+  avg_attempt_cost_usd: real('avg_attempt_cost_usd').notNull(),
+  synced_at: text('synced_at').notNull(),
+});
+
+export const configAutoDeciderExclusions = sqliteTable('config_auto_decider_exclusions', {
+  model: text('model').primaryKey(),
+});
+
 export const benchmarkRuns = sqliteTable(
   'benchmark_runs',
   {
diff --git a/services/auto-routing-benchmark/src/db.ts b/services/auto-routing-benchmark/src/db.ts
index 7f21ed2526..f4bbac38a3 100644
--- a/services/auto-routing-benchmark/src/db.ts
+++ b/services/auto-routing-benchmark/src/db.ts
@@ -14,6 +14,8 @@ import {
   benchmarkConfig,
   benchmarkRuns,
   caseResults,
+  configAutoDeciderExclusions,
+  configAutoDeciderModels,
   configClassifierModels,
   configDeciderModels,
   modelSummaries,
@@ -27,6 +29,7 @@ export type CaseResultRow = typeof caseResults.$inferSelect;
 export type RunRow = typeof benchmarkRuns.$inferSelect;
 export type RunModelRow = typeof runModels.$inferSelect;
 export type ConfigDeciderModelRow = typeof configDeciderModels.$inferSelect;
+export type ConfigAutoDeciderModelRow = typeof configAutoDeciderModels.$inferSelect;
 type ModelSummaryRow = typeof modelSummaries.$inferSelect;
 
 // D1 rejects statements with too many bound variables. A model summary insert
@@ -78,17 +81,24 @@ export async function getConfigRows(db: D1Database): Promise<{
   config: typeof benchmarkConfig.$inferSelect | null;
   classifierModels: string[];
   deciderModels: ConfigDeciderModelRow[];
+  autoDeciderModels: ConfigAutoDeciderModelRow[];
+  excludedAutoDeciderModels: string[];
 }> {
   const orm = drizzle(db);
-  const [configRows, classifierRows, deciderRows] = await Promise.all([
-    orm.select().from(benchmarkConfig).where(eq(benchmarkConfig.id, 1)).limit(1),
-    orm.select().from(configClassifierModels),
-    orm.select().from(configDeciderModels),
-  ]);
+  const [configRows, classifierRows, deciderRows, autoDeciderRows, exclusionRows] =
+    await Promise.all([
+      orm.select().from(benchmarkConfig).where(eq(benchmarkConfig.id, 1)).limit(1),
+      orm.select().from(configClassifierModels),
+      orm.select().from(configDeciderModels),
+      orm.select().from(configAutoDeciderModels),
+      orm.select().from(configAutoDeciderExclusions),
+    ]);
   return {
     config: configRows[0] ?? null,
     classifierModels: classifierRows.map(r => r.model),
     deciderModels: deciderRows,
+    autoDeciderModels: autoDeciderRows,
+    excludedAutoDeciderModels: exclusionRows.map(r => r.model),
   };
 }
 
@@ -107,7 +117,8 @@ export async function replaceConfig(
     updated_by: string | null;
   },
   classifierModels: string[],
-  deciderModels: ConfigDeciderModelRow[]
+  deciderModels: ConfigDeciderModelRow[],
+  excludedAutoDeciderModels: string[] = []
 ): Promise<void> {
   const orm = drizzle(db);
   const stmts: [BatchItem<'sqlite'>, ...BatchItem<'sqlite'>[]] = [
@@ -120,6 +131,7 @@ export async function replaceConfig(
       }),
     orm.delete(configClassifierModels),
     orm.delete(configDeciderModels),
+    orm.delete(configAutoDeciderExclusions),
   ];
   if (classifierModels.length > 0) {
     stmts.push(
@@ -129,6 +141,27 @@ export async function replaceConfig(
   if (deciderModels.length > 0) {
     stmts.push(orm.insert(configDeciderModels).values(deciderModels));
   }
+  if (excludedAutoDeciderModels.length > 0) {
+    stmts.push(
+      orm
+        .insert(configAutoDeciderExclusions)
+        .values(excludedAutoDeciderModels.map(model => ({ model })))
+    );
+  }
+  await orm.batch(stmts);
+}
+
+export async function replaceAutoDeciderModels(
+  db: D1Database,
+  autoDeciderModels: ConfigAutoDeciderModelRow[]
+): Promise<void> {
+  const orm = drizzle(db);
+  const stmts: [BatchItem<'sqlite'>, ...BatchItem<'sqlite'>[]] = [
+    orm.delete(configAutoDeciderModels),
+  ];
+  if (autoDeciderModels.length > 0) {
+    stmts.push(orm.insert(configAutoDeciderModels).values(autoDeciderModels));
+  }
   await orm.batch(stmts);
 }
 
diff --git a/services/auto-routing-benchmark/src/index.ts b/services/auto-routing-benchmark/src/index.ts
index 75cacb902c..7efbe7b296 100644
--- a/services/auto-routing-benchmark/src/index.ts
+++ b/services/auto-routing-benchmark/src/index.ts
@@ -2,6 +2,7 @@ import { Hono } from 'hono';
 import { createErrorHandler, createNotFoundHandler } from '@kilocode/worker-utils';
 import { registerAdminRoutes } from './admin';
 import { authMiddleware } from './auth';
+import { syncAutoDeciderModels } from './auto-decider-sync';
 import type { HonoEnv } from './hono-env';
 import { processJob, type BenchmarkJobMessage } from './run';
 
@@ -19,6 +20,16 @@ app.onError(createErrorHandler());
 
 export default {
   fetch: app.fetch,
+  async scheduled(controller: ScheduledController, env: Env): Promise<void> {
+    const result = await syncAutoDeciderModels(env);
+    console.log(
+      JSON.stringify({
+        event: 'auto_decider_model_sync_completed',
+        cron: controller.cron,
+        ...result,
+      })
+    );
+  },
   async queue(batch: MessageBatch<BenchmarkJobMessage>, env: Env): Promise<void> {
     for (const message of batch.messages) {
       // Deliberately no try/catch: a throw from processJob (transient token,
diff --git a/services/auto-routing-benchmark/wrangler.jsonc b/services/auto-routing-benchmark/wrangler.jsonc
index c0433b1073..fa08071eaa 100644
--- a/services/auto-routing-benchmark/wrangler.jsonc
+++ b/services/auto-routing-benchmark/wrangler.jsonc
@@ -15,6 +15,7 @@
       "custom_domain": true,
     },
   ],
+  "triggers": { "crons": ["0 9 * * *"] },
   "dev": { "port": 8814, "local_protocol": "http", "ip": "0.0.0.0" },
   "observability": { "enabled": true },
   "vars": {

From 4b5212f125b66137e238aabef1c1c1dfc6870fef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20=C5=A0=C4=87eki=C4=87?= <iscekic@protonmail.com>
Date: Wed, 17 Jun 2026 19:35:10 +0200
Subject: [PATCH 2/5] feat(auto-routing): configure auto decider cost bounds

---
 .../benchmark-config/route.test.ts            |   2 +
 .../auto-routing/BenchmarksSection.test.ts    |  10 +-
 .../admin/auto-routing/BenchmarksSection.tsx  |  48 ++
 .../decider-candidates/route.test.ts          |  21 +
 .../decider-candidates/route.ts               |  24 +-
 ...uto-routing-benchmark-admin-client.test.ts |   2 +
 .../auto-routing-decider-candidates.test.ts   |  31 +-
 .../auto-routing-decider-candidates.ts        |  35 +-
 .../auto-routing-contracts/src/benchmark.ts   |  14 +
 .../src/contracts.test.ts                     |  27 +-
 .../migrations/0003_chunky_ogun.sql           |   2 +
 .../migrations/meta/0003_snapshot.json        | 750 ++++++++++++++++++
 .../migrations/meta/_journal.json             |   7 +
 .../auto-routing-benchmark/src/admin.test.ts  |   8 +
 .../src/auto-decider-sync.test.ts             |   4 +-
 .../src/auto-decider-sync.ts                  |  28 +-
 .../auto-routing-benchmark/src/config.test.ts |   4 +
 services/auto-routing-benchmark/src/config.ts |   6 +
 .../src/db-replace-summaries.test.ts          |  45 +-
 .../auto-routing-benchmark/src/db-schema.ts   |   2 +
 services/auto-routing-benchmark/src/db.ts     |   7 +-
 21 files changed, 1042 insertions(+), 35 deletions(-)
 create mode 100644 services/auto-routing-benchmark/migrations/0003_chunky_ogun.sql
 create mode 100644 services/auto-routing-benchmark/migrations/meta/0003_snapshot.json

diff --git a/apps/web/src/app/admin/api/auto-routing/benchmark-config/route.test.ts b/apps/web/src/app/admin/api/auto-routing/benchmark-config/route.test.ts
index e708bd01b9..ee6ca77392 100644
--- a/apps/web/src/app/admin/api/auto-routing/benchmark-config/route.test.ts
+++ b/apps/web/src/app/admin/api/auto-routing/benchmark-config/route.test.ts
@@ -78,6 +78,8 @@ const validConfig = {
   classifierRepetitions: 1,
   deciderRepetitions: 1,
   classifierMaxP95LatencyMs: 1000,
+  autoDeciderMinCostUsd: 15,
+  autoDeciderMaxCostUsd: 25,
   updatedAt: null,
   updatedBy: null,
 };
diff --git a/apps/web/src/app/admin/auto-routing/BenchmarksSection.test.ts b/apps/web/src/app/admin/auto-routing/BenchmarksSection.test.ts
index 059275664d..6f0e74bf6a 100644
--- a/apps/web/src/app/admin/auto-routing/BenchmarksSection.test.ts
+++ b/apps/web/src/app/admin/auto-routing/BenchmarksSection.test.ts
@@ -121,6 +121,8 @@ describe('configToFormState', () => {
     expect(state.classifierRepetitions).toBe(1);
     expect(state.deciderRepetitions).toBe(1);
     expect(state.classifierMaxP95LatencyMs).toBe('1000');
+    expect(state.autoDeciderMinCostUsd).toBe(15);
+    expect(state.autoDeciderMaxCostUsd).toBe(25);
     expect(state.classifierModels).toBe('');
     expect(state.deciderModels).toEqual([]);
     expect(state.autoDeciderModels).toEqual([]);
@@ -149,15 +151,19 @@ describe('formStateToConfig round-trip', () => {
     classifierRepetitions: 3,
     deciderRepetitions: 2,
     classifierMaxP95LatencyMs: 500,
+    autoDeciderMinCostUsd: 12,
+    autoDeciderMaxCostUsd: 24,
     updatedAt: null,
     updatedBy: null,
   };
 
-  it('preserves classifierRepetitions, deciderRepetitions, and classifierMaxP95LatencyMs', () => {
+  it('preserves repetitions, classifierMaxP95LatencyMs, and auto decider cost bounds', () => {
     const state = configToFormState(baseConfig);
     expect(state.classifierRepetitions).toBe(3);
     expect(state.deciderRepetitions).toBe(2);
     expect(state.classifierMaxP95LatencyMs).toBe('500');
+    expect(state.autoDeciderMinCostUsd).toBe(12);
+    expect(state.autoDeciderMaxCostUsd).toBe(24);
     expect(state.benchmarkOrgId).toBe('org-123');
     expect(state.deciderModels).toEqual([{ id: 'manual-model', reasoningEffort: 'low' }]);
     expect(state.autoDeciderModels).toEqual(baseConfig.autoDeciderModels);
@@ -167,6 +173,8 @@ describe('formStateToConfig round-trip', () => {
     expect(result.classifierRepetitions).toBe(3);
     expect(result.deciderRepetitions).toBe(2);
     expect(result.classifierMaxP95LatencyMs).toBe(500);
+    expect(result.autoDeciderMinCostUsd).toBe(12);
+    expect(result.autoDeciderMaxCostUsd).toBe(24);
     expect(result.benchmarkOrgId).toBe('org-123');
     expect(result.manualDeciderModels).toEqual([{ id: 'manual-model', reasoningEffort: 'low' }]);
     expect(result.excludedAutoDeciderModels).toEqual(['excluded-auto-model']);
diff --git a/apps/web/src/app/admin/auto-routing/BenchmarksSection.tsx b/apps/web/src/app/admin/auto-routing/BenchmarksSection.tsx
index bf73a21be8..2e79f3f6a6 100644
--- a/apps/web/src/app/admin/auto-routing/BenchmarksSection.tsx
+++ b/apps/web/src/app/admin/auto-routing/BenchmarksSection.tsx
@@ -1,6 +1,8 @@
 'use client';
 
 import {
+  AUTO_DECIDER_DEFAULT_MAX_COST_USD,
+  AUTO_DECIDER_DEFAULT_MIN_COST_USD,
   BenchmarkConfigResponseSchema,
   BenchmarkRoutingTableResponseSchema,
   BenchmarkRunsResponseSchema,
@@ -138,6 +140,8 @@ export function configToFormState(config: BenchmarkConfig | null): {
   classifierRepetitions: number;
   deciderRepetitions: number;
   classifierMaxP95LatencyMs: string;
+  autoDeciderMinCostUsd: number;
+  autoDeciderMaxCostUsd: number;
 } {
   if (config === null) {
     // No config saved yet: the worker fabricates nothing, so the form starts
@@ -155,6 +159,8 @@ export function configToFormState(config: BenchmarkConfig | null): {
       classifierRepetitions: 1,
       deciderRepetitions: 1,
       classifierMaxP95LatencyMs: '1000',
+      autoDeciderMinCostUsd: AUTO_DECIDER_DEFAULT_MIN_COST_USD,
+      autoDeciderMaxCostUsd: AUTO_DECIDER_DEFAULT_MAX_COST_USD,
     };
   }
   return {
@@ -174,6 +180,8 @@ export function configToFormState(config: BenchmarkConfig | null): {
     deciderRepetitions: config.deciderRepetitions,
     classifierMaxP95LatencyMs:
       config.classifierMaxP95LatencyMs !== null ? String(config.classifierMaxP95LatencyMs) : '',
+    autoDeciderMinCostUsd: config.autoDeciderMinCostUsd,
+    autoDeciderMaxCostUsd: config.autoDeciderMaxCostUsd,
   };
 }
 
@@ -245,6 +253,8 @@ export function formStateToConfig(
     classifierRepetitions: state.classifierRepetitions,
     deciderRepetitions: state.deciderRepetitions,
     classifierMaxP95LatencyMs,
+    autoDeciderMinCostUsd: state.autoDeciderMinCostUsd,
+    autoDeciderMaxCostUsd: state.autoDeciderMaxCostUsd,
     updatedAt: base?.updatedAt ?? null,
     updatedBy: base?.updatedBy ?? null,
   };
@@ -561,6 +571,44 @@ function BenchmarkConfigEditor({
               className="h-8 w-40 tabular-nums"
             />
           </div>
+          <div className="flex flex-col gap-1.5">
+            <Label htmlFor="benchmark-auto-decider-min-cost" className="text-sm font-medium">
+              Auto min run cost
+            </Label>
+            <Input
+              id="benchmark-auto-decider-min-cost"
+              type="number"
+              min={0}
+              step={1}
+              value={form.autoDeciderMinCostUsd}
+              onChange={e =>
+                updateForm(prev => ({
+                  ...prev,
+                  autoDeciderMinCostUsd: parseFloat(e.target.value) || 0,
+                }))
+              }
+              className="h-8 w-40 tabular-nums"
+            />
+          </div>
+          <div className="flex flex-col gap-1.5">
+            <Label htmlFor="benchmark-auto-decider-max-cost" className="text-sm font-medium">
+              Auto max run cost
+            </Label>
+            <Input
+              id="benchmark-auto-decider-max-cost"
+              type="number"
+              min={0}
+              step={1}
+              value={form.autoDeciderMaxCostUsd}
+              onChange={e =>
+                updateForm(prev => ({
+                  ...prev,
+                  autoDeciderMaxCostUsd: parseFloat(e.target.value) || 0,
+                }))
+              }
+              className="h-8 w-40 tabular-nums"
+            />
+          </div>
           <div className="flex flex-col gap-1.5">
             <Label htmlFor="benchmark-classifier-repetitions" className="text-sm font-medium">
               Classifier repetitions (1–5)
diff --git a/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.test.ts b/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.test.ts
index 2afdd598c3..e53b88fe50 100644
--- a/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.test.ts
+++ b/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.test.ts
@@ -22,6 +22,13 @@ function createRequest(headers: Record<string, string> = {}) {
   );
 }
 
+function createRequestWithBounds(headers: Record<string, string> = {}) {
+  return new NextRequest(
+    'http://localhost:3000/api/internal/auto-routing-benchmark/decider-candidates?minCostUsd=12&maxCostUsd=24',
+    { headers }
+  );
+}
+
 describe('GET /api/internal/auto-routing-benchmark/decider-candidates', () => {
   beforeEach(() => {
     jest.clearAllMocks();
@@ -47,4 +54,18 @@ describe('GET /api/internal/auto-routing-benchmark/decider-candidates', () => {
       maxCostUsd: 25,
     });
   });
+
+  it('uses requested cost bounds for authenticated worker callers', async () => {
+    const res = await GET(createRequestWithBounds({ authorization: 'Bearer internal-secret' }));
+
+    expect(res.status).toBe(200);
+    expect(mockListAutoRoutingDeciderCandidates).toHaveBeenCalledWith({
+      minCostUsd: 12,
+      maxCostUsd: 24,
+    });
+    await expect(res.json()).resolves.toMatchObject({
+      minCostUsd: 12,
+      maxCostUsd: 24,
+    });
+  });
 });
diff --git a/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.ts b/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.ts
index e7338a8485..50837abe86 100644
--- a/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.ts
+++ b/apps/web/src/app/api/internal/auto-routing-benchmark/decider-candidates/route.ts
@@ -15,17 +15,35 @@ function extractBearerToken(authHeader: string | null): string | null {
   return trimmed.slice(7).trim() || null;
 }
 
+function parseCostBound(value: string | null, fallback: number): number {
+  if (value === null) return fallback;
+  const parsed = Number(value);
+  return Number.isFinite(parsed) && parsed >= 0 ? parsed : fallback;
+}
+
 export async function GET(req: NextRequest) {
   const token = extractBearerToken(req.headers.get('authorization'));
   if (!INTERNAL_API_SECRET || !token || !timingSafeEqual(token, INTERNAL_API_SECRET)) {
     return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
   }
 
-  const candidates = await listAutoRoutingDeciderCandidates();
+  const minCostUsd = parseCostBound(
+    req.nextUrl.searchParams.get('minCostUsd'),
+    AUTO_DECIDER_MIN_COST_USD
+  );
+  const maxCostUsd = parseCostBound(
+    req.nextUrl.searchParams.get('maxCostUsd'),
+    AUTO_DECIDER_MAX_COST_USD
+  );
+  if (minCostUsd > maxCostUsd) {
+    return NextResponse.json({ error: 'Invalid cost bounds' }, { status: 400 });
+  }
+
+  const candidates = await listAutoRoutingDeciderCandidates({ minCostUsd, maxCostUsd });
   return NextResponse.json({
     candidates,
-    minCostUsd: AUTO_DECIDER_MIN_COST_USD,
-    maxCostUsd: AUTO_DECIDER_MAX_COST_USD,
+    minCostUsd,
+    maxCostUsd,
     generatedAt: new Date().toISOString(),
   });
 }
diff --git a/apps/web/src/lib/ai-gateway/auto-routing-benchmark-admin-client.test.ts b/apps/web/src/lib/ai-gateway/auto-routing-benchmark-admin-client.test.ts
index a6b0f62c75..b34f77a898 100644
--- a/apps/web/src/lib/ai-gateway/auto-routing-benchmark-admin-client.test.ts
+++ b/apps/web/src/lib/ai-gateway/auto-routing-benchmark-admin-client.test.ts
@@ -26,6 +26,8 @@ const configResponse = {
     classifierRepetitions: 1,
     deciderRepetitions: 1,
     classifierMaxP95LatencyMs: 1000,
+    autoDeciderMinCostUsd: 15,
+    autoDeciderMaxCostUsd: 25,
     updatedAt: null,
     updatedBy: null,
   },
diff --git a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
index 4d45fd87e6..524131fcf9 100644
--- a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
+++ b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
@@ -43,18 +43,33 @@ function row(
 
 describe('summarizeAutoRoutingDeciderCandidates', () => {
   it('keeps active terminal-bench models whose floored average attempt cost is in the auto range', () => {
-    const candidates = summarizeAutoRoutingDeciderCandidates([
-      row('model/too-cheap', AUTO_DECIDER_MIN_COST_USD - 0.01),
-      row('model/minimum', AUTO_DECIDER_MIN_COST_USD),
-      row('model/floored-maximum', AUTO_DECIDER_MAX_COST_USD + 0.99),
-      row('model/too-expensive', AUTO_DECIDER_MAX_COST_USD + 1),
-      row('model/inactive', 20, { active: false }),
-      row('kilo-internal/custom', 20),
-    ]);
+    const candidates = summarizeAutoRoutingDeciderCandidates(
+      [
+        row('model/too-cheap', AUTO_DECIDER_MIN_COST_USD - 0.01),
+        row('model/minimum', AUTO_DECIDER_MIN_COST_USD),
+        row('model/floored-maximum', AUTO_DECIDER_MAX_COST_USD + 0.99),
+        row('model/too-expensive', AUTO_DECIDER_MAX_COST_USD + 1),
+        row('model/inactive', 20, { active: false }),
+        row('kilo-internal/custom', 20),
+      ],
+      { minCostUsd: AUTO_DECIDER_MIN_COST_USD, maxCostUsd: AUTO_DECIDER_MAX_COST_USD }
+    );
 
     expect(candidates).toEqual([
       { id: 'model/floored-maximum', avgAttemptCostUsd: 25.99 },
       { id: 'model/minimum', avgAttemptCostUsd: 15 },
     ]);
   });
+
+  it('uses caller-provided cost bounds', () => {
+    const candidates = summarizeAutoRoutingDeciderCandidates(
+      [row('model/low', 12.1), row('model/in-band', 13.9), row('model/high', 15)],
+      { minCostUsd: 12, maxCostUsd: 13 }
+    );
+
+    expect(candidates).toEqual([
+      { id: 'model/in-band', avgAttemptCostUsd: 13.9 },
+      { id: 'model/low', avgAttemptCostUsd: 12.1 },
+    ]);
+  });
 });
diff --git a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
index 0fc4b48b47..8990ff0404 100644
--- a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
+++ b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
@@ -1,5 +1,9 @@
 import { CUSTOM_LLM_PREFIX } from '@/lib/ai-gateway/model-utils';
 import { readDb } from '@/lib/drizzle';
+import {
+  AUTO_DECIDER_DEFAULT_MAX_COST_USD,
+  AUTO_DECIDER_DEFAULT_MIN_COST_USD,
+} from '@kilocode/auto-routing-contracts';
 import { ModelStatsBenchmarksSchema, modelStats } from '@kilocode/db/schema';
 import { and, eq, notLike } from 'drizzle-orm';
 
@@ -7,29 +11,42 @@ const TerminalBenchSchema = ModelStatsBenchmarksSchema.unwrap()
   .pick({ kiloBench: true })
   .optional();
 
-export const AUTO_DECIDER_MIN_COST_USD = 15;
-export const AUTO_DECIDER_MAX_COST_USD = 25;
+export const AUTO_DECIDER_MIN_COST_USD = AUTO_DECIDER_DEFAULT_MIN_COST_USD;
+export const AUTO_DECIDER_MAX_COST_USD = AUTO_DECIDER_DEFAULT_MAX_COST_USD;
 
 export type AutoRoutingDeciderCandidate = {
   id: string;
   avgAttemptCostUsd: number;
 };
 
+export type AutoRoutingDeciderCandidateOptions = {
+  minCostUsd?: number;
+  maxCostUsd?: number;
+};
+
 type Row = {
   openrouterId: string;
   isActive: boolean | null;
   benchmarks: unknown;
 };
 
-function isInAutoCostBand(avgAttemptCostUsd: number): boolean {
+function isInAutoCostBand(
+  avgAttemptCostUsd: number,
+  { minCostUsd, maxCostUsd }: Required<AutoRoutingDeciderCandidateOptions>
+): boolean {
   const floored = Math.floor(avgAttemptCostUsd);
-  return floored >= AUTO_DECIDER_MIN_COST_USD && floored <= AUTO_DECIDER_MAX_COST_USD;
+  return floored >= minCostUsd && floored <= maxCostUsd;
 }
 
 export function summarizeAutoRoutingDeciderCandidates(
-  rows: readonly Row[]
+  rows: readonly Row[],
+  options: AutoRoutingDeciderCandidateOptions = {}
 ): AutoRoutingDeciderCandidate[] {
   const candidates: AutoRoutingDeciderCandidate[] = [];
+  const costBounds = {
+    minCostUsd: options.minCostUsd ?? AUTO_DECIDER_MIN_COST_USD,
+    maxCostUsd: options.maxCostUsd ?? AUTO_DECIDER_MAX_COST_USD,
+  };
 
   for (const row of rows) {
     if (!row.isActive || row.openrouterId.startsWith(CUSTOM_LLM_PREFIX)) continue;
@@ -41,7 +58,7 @@ export function summarizeAutoRoutingDeciderCandidates(
       (bench.nAttempts ?? 0) < 5 ||
       bench.avgAttemptCostUsd === null ||
       bench.avgAttemptCostUsd === undefined ||
-      !isInAutoCostBand(bench.avgAttemptCostUsd)
+      !isInAutoCostBand(bench.avgAttemptCostUsd, costBounds)
     ) {
       continue;
     }
@@ -54,7 +71,9 @@ export function summarizeAutoRoutingDeciderCandidates(
   });
 }
 
-export async function listAutoRoutingDeciderCandidates(): Promise<AutoRoutingDeciderCandidate[]> {
+export async function listAutoRoutingDeciderCandidates(
+  options: AutoRoutingDeciderCandidateOptions = {}
+): Promise<AutoRoutingDeciderCandidate[]> {
   const rows = await readDb
     .select({
       openrouterId: modelStats.openrouterId,
@@ -65,5 +84,5 @@ export async function listAutoRoutingDeciderCandidates(): Promise<AutoRoutingDec
     .where(
       and(eq(modelStats.isActive, true), notLike(modelStats.openrouterId, `${CUSTOM_LLM_PREFIX}%`))
     );
-  return summarizeAutoRoutingDeciderCandidates(rows);
+  return summarizeAutoRoutingDeciderCandidates(rows, options);
 }
diff --git a/packages/auto-routing-contracts/src/benchmark.ts b/packages/auto-routing-contracts/src/benchmark.ts
index 76e12fc663..4867254ddb 100644
--- a/packages/auto-routing-contracts/src/benchmark.ts
+++ b/packages/auto-routing-contracts/src/benchmark.ts
@@ -18,6 +18,9 @@ export const BenchmarkDeciderModelSchema = z.object({
 });
 export type BenchmarkDeciderModel = z.infer<typeof BenchmarkDeciderModelSchema>;
 
+export const AUTO_DECIDER_DEFAULT_MIN_COST_USD = 15;
+export const AUTO_DECIDER_DEFAULT_MAX_COST_USD = 25;
+
 export const AutoBenchmarkDeciderModelSchema = BenchmarkDeciderModelSchema.extend({
   avgAttemptCostUsd: z.number().nonnegative(),
 });
@@ -80,6 +83,10 @@ export const BenchmarkConfigSchema = z
     // Maximum acceptable p95 latency for the classifier winner; null means no
     // constraint (cost-only selection).
     classifierMaxP95LatencyMs: z.number().int().positive().nullable().default(1000),
+    // Auto decider model selection includes terminal-bench models whose
+    // floored average run cost falls within this inclusive range.
+    autoDeciderMinCostUsd: z.number().nonnegative().default(AUTO_DECIDER_DEFAULT_MIN_COST_USD),
+    autoDeciderMaxCostUsd: z.number().nonnegative().default(AUTO_DECIDER_DEFAULT_MAX_COST_USD),
     updatedAt: z.string().nullable(),
     updatedBy: z.string().nullable(),
   })
@@ -105,6 +112,13 @@ export const BenchmarkConfigSchema = z
       'excludedAutoDeciderModels',
       ctx
     );
+    if (config.autoDeciderMinCostUsd > config.autoDeciderMaxCostUsd) {
+      ctx.addIssue({
+        code: 'custom',
+        path: ['autoDeciderMaxCostUsd'],
+        message: 'Auto decider max cost must be greater than or equal to min cost',
+      });
+    }
   });
 export type BenchmarkConfig = z.infer<typeof BenchmarkConfigSchema>;
 
diff --git a/packages/auto-routing-contracts/src/contracts.test.ts b/packages/auto-routing-contracts/src/contracts.test.ts
index 292fbdadad..4b97897900 100644
--- a/packages/auto-routing-contracts/src/contracts.test.ts
+++ b/packages/auto-routing-contracts/src/contracts.test.ts
@@ -131,7 +131,7 @@ describe('auto routing contracts', () => {
 });
 
 describe('BenchmarkConfigSchema defaults', () => {
-  it('applies defaults of 1/1/1000 for classifierRepetitions, deciderRepetitions, classifierMaxP95LatencyMs', () => {
+  it('applies config defaults for repetitions, classifier latency, and auto decider cost bounds', () => {
     const result = BenchmarkConfigSchema.parse({
       classifierModels: ['model/a'],
       deciderModels: [{ id: 'model/b' }],
@@ -147,6 +147,8 @@ describe('BenchmarkConfigSchema defaults', () => {
     expect(result.classifierRepetitions).toBe(1);
     expect(result.deciderRepetitions).toBe(1);
     expect(result.classifierMaxP95LatencyMs).toBe(1000);
+    expect(result.autoDeciderMinCostUsd).toBe(15);
+    expect(result.autoDeciderMaxCostUsd).toBe(25);
   });
 
   it('accepts the benchmark maximum concurrency cap of 100', () => {
@@ -186,6 +188,29 @@ describe('BenchmarkConfigSchema defaults', () => {
     ]);
     expect(result.excludedAutoDeciderModels).toEqual(['model/d']);
   });
+
+  it('rejects auto decider cost bounds where min is greater than max', () => {
+    const result = BenchmarkConfigSchema.safeParse({
+      classifierModels: ['model/a'],
+      deciderModels: [{ id: 'model/b' }],
+      minAccuracy: 0.7,
+      switchCostFactor: 3,
+      maxConcurrency: 10,
+      benchmarkUserId: null,
+      benchmarkOrgId: null,
+      autoDeciderMinCostUsd: 30,
+      autoDeciderMaxCostUsd: 20,
+      updatedAt: null,
+      updatedBy: null,
+    });
+
+    expect(result.success).toBe(false);
+    if (!result.success) {
+      expect(result.error.issues.some(issue => issue.path[0] === 'autoDeciderMaxCostUsd')).toBe(
+        true
+      );
+    }
+  });
 });
 
 describe('BenchmarkConfigSchema duplicate model ids', () => {
diff --git a/services/auto-routing-benchmark/migrations/0003_chunky_ogun.sql b/services/auto-routing-benchmark/migrations/0003_chunky_ogun.sql
new file mode 100644
index 0000000000..d7448828fa
--- /dev/null
+++ b/services/auto-routing-benchmark/migrations/0003_chunky_ogun.sql
@@ -0,0 +1,2 @@
+ALTER TABLE `benchmark_config` ADD `auto_decider_min_cost_usd` real DEFAULT 15 NOT NULL;--> statement-breakpoint
+ALTER TABLE `benchmark_config` ADD `auto_decider_max_cost_usd` real DEFAULT 25 NOT NULL;
\ No newline at end of file
diff --git a/services/auto-routing-benchmark/migrations/meta/0003_snapshot.json b/services/auto-routing-benchmark/migrations/meta/0003_snapshot.json
new file mode 100644
index 0000000000..2721f3e0c1
--- /dev/null
+++ b/services/auto-routing-benchmark/migrations/meta/0003_snapshot.json
@@ -0,0 +1,750 @@
+{
+  "version": "6",
+  "dialect": "sqlite",
+  "id": "a7348ac7-ad74-4ab4-89c4-534ae4b73500",
+  "prevId": "3c258229-2360-4f73-bc7e-807239a3336d",
+  "tables": {
+    "benchmark_config": {
+      "name": "benchmark_config",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "integer",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "min_accuracy": {
+          "name": "min_accuracy",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "switch_cost_factor": {
+          "name": "switch_cost_factor",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "max_concurrency": {
+          "name": "max_concurrency",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "benchmark_user_id": {
+          "name": "benchmark_user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "benchmark_org_id": {
+          "name": "benchmark_org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "classifier_repetitions": {
+          "name": "classifier_repetitions",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 1
+        },
+        "decider_repetitions": {
+          "name": "decider_repetitions",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 1
+        },
+        "classifier_max_p95_latency_ms": {
+          "name": "classifier_max_p95_latency_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "auto_decider_min_cost_usd": {
+          "name": "auto_decider_min_cost_usd",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 15
+        },
+        "auto_decider_max_cost_usd": {
+          "name": "auto_decider_max_cost_usd",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 25
+        },
+        "updated_at": {
+          "name": "updated_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "updated_by": {
+          "name": "updated_by",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "benchmark_runs": {
+      "name": "benchmark_runs",
+      "columns": {
+        "id": {
+          "name": "id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "kind": {
+          "name": "kind",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "status": {
+          "name": "status",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "started_at": {
+          "name": "started_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "completed_at": {
+          "name": "completed_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "error": {
+          "name": "error",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "min_accuracy": {
+          "name": "min_accuracy",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "switch_cost_factor": {
+          "name": "switch_cost_factor",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "max_concurrency": {
+          "name": "max_concurrency",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "benchmark_user_id": {
+          "name": "benchmark_user_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "benchmark_org_id": {
+          "name": "benchmark_org_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "repetitions": {
+          "name": "repetitions",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 1
+        },
+        "classifier_max_p95_latency_ms": {
+          "name": "classifier_max_p95_latency_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "engine_identity": {
+          "name": "engine_identity",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": "''"
+        }
+      },
+      "indexes": {
+        "UQ_benchmark_runs_one_running_per_kind": {
+          "name": "UQ_benchmark_runs_one_running_per_kind",
+          "columns": [
+            "kind"
+          ],
+          "isUnique": true,
+          "where": "\"benchmark_runs\".\"status\" = 'running'"
+        }
+      },
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "case_results": {
+      "name": "case_results",
+      "columns": {
+        "run_id": {
+          "name": "run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "case_id": {
+          "name": "case_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "route_key": {
+          "name": "route_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "score": {
+          "name": "score",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "latency_ms": {
+          "name": "latency_ms",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "cost_usd": {
+          "name": "cost_usd",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "error": {
+          "name": "error",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "fallback_reason": {
+          "name": "fallback_reason",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "retried": {
+          "name": "retried",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "exit_code": {
+          "name": "exit_code",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "output_prefix": {
+          "name": "output_prefix",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "event_count": {
+          "name": "event_count",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "last_event_types": {
+          "name": "last_event_types",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "rep": {
+          "name": "rep",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 0
+        },
+        "timed_out": {
+          "name": "timed_out",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 0
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {
+        "case_results_run_id_model_case_id_rep_pk": {
+          "columns": [
+            "run_id",
+            "model",
+            "case_id",
+            "rep"
+          ],
+          "name": "case_results_run_id_model_case_id_rep_pk"
+        }
+      },
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "config_auto_decider_exclusions": {
+      "name": "config_auto_decider_exclusions",
+      "columns": {
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "config_auto_decider_models": {
+      "name": "config_auto_decider_models",
+      "columns": {
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "reasoning_effort": {
+          "name": "reasoning_effort",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "avg_attempt_cost_usd": {
+          "name": "avg_attempt_cost_usd",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "synced_at": {
+          "name": "synced_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "config_classifier_models": {
+      "name": "config_classifier_models",
+      "columns": {
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "config_decider_models": {
+      "name": "config_decider_models",
+      "columns": {
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "reasoning_effort": {
+          "name": "reasoning_effort",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "model_summaries": {
+      "name": "model_summaries",
+      "columns": {
+        "run_id": {
+          "name": "run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "route_key": {
+          "name": "route_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "accuracy": {
+          "name": "accuracy",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "avg_cost_usd": {
+          "name": "avg_cost_usd",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "avg_latency_ms": {
+          "name": "avg_latency_ms",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "p50_latency_ms": {
+          "name": "p50_latency_ms",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "cases": {
+          "name": "cases",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "errors": {
+          "name": "errors",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "p95_latency_ms": {
+          "name": "p95_latency_ms",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        },
+        "timeouts": {
+          "name": "timeouts",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": 0
+        },
+        "carried": {
+          "name": "carried",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false,
+          "default": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {
+        "model_summaries_run_id_model_route_key_pk": {
+          "columns": [
+            "run_id",
+            "model",
+            "route_key"
+          ],
+          "name": "model_summaries_run_id_model_route_key_pk"
+        }
+      },
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "routing_table_candidates": {
+      "name": "routing_table_candidates",
+      "columns": {
+        "run_id": {
+          "name": "run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "route_key": {
+          "name": "route_key",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "rank": {
+          "name": "rank",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "accuracy": {
+          "name": "accuracy",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "avg_cost_usd": {
+          "name": "avg_cost_usd",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "meets_threshold": {
+          "name": "meets_threshold",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "reasoning_effort": {
+          "name": "reasoning_effort",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {
+        "routing_table_candidates_run_id_route_key_rank_pk": {
+          "columns": [
+            "run_id",
+            "route_key",
+            "rank"
+          ],
+          "name": "routing_table_candidates_run_id_route_key_rank_pk"
+        }
+      },
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "routing_tables": {
+      "name": "routing_tables",
+      "columns": {
+        "run_id": {
+          "name": "run_id",
+          "type": "text",
+          "primaryKey": true,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "published_at": {
+          "name": "published_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "generated_at": {
+          "name": "generated_at",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "min_accuracy": {
+          "name": "min_accuracy",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "switch_cost_factor": {
+          "name": "switch_cost_factor",
+          "type": "real",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "source": {
+          "name": "source",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {},
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    },
+    "run_models": {
+      "name": "run_models",
+      "columns": {
+        "run_id": {
+          "name": "run_id",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "model": {
+          "name": "model",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "enqueued": {
+          "name": "enqueued",
+          "type": "integer",
+          "primaryKey": false,
+          "notNull": true,
+          "autoincrement": false
+        },
+        "reasoning_effort": {
+          "name": "reasoning_effort",
+          "type": "text",
+          "primaryKey": false,
+          "notNull": false,
+          "autoincrement": false
+        }
+      },
+      "indexes": {},
+      "foreignKeys": {},
+      "compositePrimaryKeys": {
+        "run_models_run_id_model_pk": {
+          "columns": [
+            "run_id",
+            "model"
+          ],
+          "name": "run_models_run_id_model_pk"
+        }
+      },
+      "uniqueConstraints": {},
+      "checkConstraints": {}
+    }
+  },
+  "views": {},
+  "enums": {},
+  "_meta": {
+    "schemas": {},
+    "tables": {},
+    "columns": {}
+  },
+  "internal": {
+    "indexes": {}
+  }
+}
\ No newline at end of file
diff --git a/services/auto-routing-benchmark/migrations/meta/_journal.json b/services/auto-routing-benchmark/migrations/meta/_journal.json
index d6cb5b17a7..b38e8d0cfa 100644
--- a/services/auto-routing-benchmark/migrations/meta/_journal.json
+++ b/services/auto-routing-benchmark/migrations/meta/_journal.json
@@ -22,6 +22,13 @@
       "when": 1781713850969,
       "tag": "0002_magical_wendell_rand",
       "breakpoints": true
+    },
+    {
+      "idx": 3,
+      "version": "6",
+      "when": 1781717243859,
+      "tag": "0003_chunky_ogun",
+      "breakpoints": true
     }
   ]
 }
\ No newline at end of file
diff --git a/services/auto-routing-benchmark/src/admin.test.ts b/services/auto-routing-benchmark/src/admin.test.ts
index 2c34b0bcec..32598065ed 100644
--- a/services/auto-routing-benchmark/src/admin.test.ts
+++ b/services/auto-routing-benchmark/src/admin.test.ts
@@ -38,6 +38,8 @@ const TEST_CONFIG: BenchmarkConfig = {
   classifierRepetitions: 1,
   deciderRepetitions: 1,
   classifierMaxP95LatencyMs: 1000,
+  autoDeciderMinCostUsd: 15,
+  autoDeciderMaxCostUsd: 25,
   updatedAt: null,
   updatedBy: null,
 };
@@ -54,6 +56,8 @@ const TEST_CONFIG_ROWS = {
     classifier_repetitions: TEST_CONFIG.classifierRepetitions,
     decider_repetitions: TEST_CONFIG.deciderRepetitions,
     classifier_max_p95_latency_ms: TEST_CONFIG.classifierMaxP95LatencyMs,
+    auto_decider_min_cost_usd: TEST_CONFIG.autoDeciderMinCostUsd,
+    auto_decider_max_cost_usd: TEST_CONFIG.autoDeciderMaxCostUsd,
     updated_at: '2026-06-01T00:00:00.000Z',
     updated_by: null,
   },
@@ -218,6 +222,8 @@ describe('GET /admin/config', () => {
         classifier_repetitions: 1,
         decider_repetitions: 1,
         classifier_max_p95_latency_ms: null,
+        auto_decider_min_cost_usd: 12,
+        auto_decider_max_cost_usd: 24,
         updated_at: '2026-06-01T00:00:00.000Z',
         updated_by: 'admin@example.com',
       },
@@ -311,6 +317,8 @@ describe('PUT /admin/config', () => {
     const [, configArg, , deciderModelRows, excludedAutoDeciderModels] =
       vi.mocked(replaceConfig).mock.calls[0];
     expect(configArg.min_accuracy).toBe(0.85);
+    expect(configArg.auto_decider_min_cost_usd).toBe(15);
+    expect(configArg.auto_decider_max_cost_usd).toBe(25);
     expect(typeof configArg.updated_at).toBe('string');
     expect(configArg.updated_by).toBe('igor@kilocode.ai');
     expect(deciderModelRows).toEqual([{ model: 'manual/model', reasoning_effort: 'low' }]);
diff --git a/services/auto-routing-benchmark/src/auto-decider-sync.test.ts b/services/auto-routing-benchmark/src/auto-decider-sync.test.ts
index cf7e64e33d..c5325df468 100644
--- a/services/auto-routing-benchmark/src/auto-decider-sync.test.ts
+++ b/services/auto-routing-benchmark/src/auto-decider-sync.test.ts
@@ -47,6 +47,8 @@ const config = {
   classifier_repetitions: 1,
   decider_repetitions: 1,
   classifier_max_p95_latency_ms: 1000,
+  auto_decider_min_cost_usd: 12,
+  auto_decider_max_cost_usd: 24,
   updated_at: '2026-06-01T00:00:00.000Z',
   updated_by: null,
 };
@@ -92,7 +94,7 @@ describe('syncAutoDeciderModels', () => {
     const result = await syncAutoDeciderModels(env, { fetchImpl });
 
     expect(fetchImpl).toHaveBeenCalledWith(
-      'https://app.test/api/internal/auto-routing-benchmark/decider-candidates',
+      'https://app.test/api/internal/auto-routing-benchmark/decider-candidates?minCostUsd=12&maxCostUsd=24',
       expect.objectContaining({
         headers: expect.objectContaining({ authorization: 'Bearer secret' }),
       })
diff --git a/services/auto-routing-benchmark/src/auto-decider-sync.ts b/services/auto-routing-benchmark/src/auto-decider-sync.ts
index 5880bc39c8..972b6284f5 100644
--- a/services/auto-routing-benchmark/src/auto-decider-sync.ts
+++ b/services/auto-routing-benchmark/src/auto-decider-sync.ts
@@ -1,4 +1,6 @@
 import {
+  AUTO_DECIDER_DEFAULT_MAX_COST_USD,
+  AUTO_DECIDER_DEFAULT_MIN_COST_USD,
   AutoBenchmarkDeciderCandidatesResponseSchema,
   type BenchmarkDeciderModel,
 } from '@kilocode/auto-routing-contracts';
@@ -38,17 +40,21 @@ function diffModels(
 
 async function fetchAutoDeciderCandidates(
   env: Env,
-  fetchImpl: typeof fetch
+  fetchImpl: typeof fetch,
+  costBounds: { minCostUsd: number; maxCostUsd: number }
 ): Promise<{ id: string; avgAttemptCostUsd: number }[]> {
   const secret = await env.INTERNAL_API_SECRET_PROD.get();
-  const response = await fetchImpl(
-    `${env.KILO_WEB_API_BASE_URL}/api/internal/auto-routing-benchmark/decider-candidates`,
-    {
-      headers: {
-        authorization: `Bearer ${secret}`,
-      },
-    }
+  const url = new URL(
+    '/api/internal/auto-routing-benchmark/decider-candidates',
+    env.KILO_WEB_API_BASE_URL
   );
+  url.searchParams.set('minCostUsd', String(costBounds.minCostUsd));
+  url.searchParams.set('maxCostUsd', String(costBounds.maxCostUsd));
+  const response = await fetchImpl(url.toString(), {
+    headers: {
+      authorization: `Bearer ${secret}`,
+    },
+  });
   if (!response.ok) {
     const detail = (await response.text().catch(() => '')).slice(0, 200);
     throw new Error(`auto decider candidate sync failed: HTTP ${response.status} ${detail}`);
@@ -73,7 +79,11 @@ export async function syncAutoDeciderModels(
     beforeRows.excludedAutoDeciderModels
   );
 
-  const candidates = await fetchAutoDeciderCandidates(env, fetchImpl);
+  const costBounds = {
+    minCostUsd: beforeConfig?.autoDeciderMinCostUsd ?? AUTO_DECIDER_DEFAULT_MIN_COST_USD,
+    maxCostUsd: beforeConfig?.autoDeciderMaxCostUsd ?? AUTO_DECIDER_DEFAULT_MAX_COST_USD,
+  };
+  const candidates = await fetchAutoDeciderCandidates(env, fetchImpl, costBounds);
   const previousReasoningEffort = new Map<string, string | null>();
   for (const row of beforeRows.autoDeciderModels) {
     previousReasoningEffort.set(row.model, row.reasoning_effort);
diff --git a/services/auto-routing-benchmark/src/config.test.ts b/services/auto-routing-benchmark/src/config.test.ts
index 55566dc7a0..7008af8421 100644
--- a/services/auto-routing-benchmark/src/config.test.ts
+++ b/services/auto-routing-benchmark/src/config.test.ts
@@ -12,6 +12,8 @@ const configRow = {
   classifier_repetitions: 1,
   decider_repetitions: 1,
   classifier_max_p95_latency_ms: null,
+  auto_decider_min_cost_usd: 12,
+  auto_decider_max_cost_usd: 24,
   updated_at: '2026-06-01T00:00:00.000Z',
   updated_by: 'admin@example.com',
 };
@@ -69,6 +71,8 @@ describe('mapConfigRows', () => {
     expect(result?.classifierRepetitions).toBe(1);
     expect(result?.deciderRepetitions).toBe(1);
     expect(result?.classifierMaxP95LatencyMs).toBeNull();
+    expect(result?.autoDeciderMinCostUsd).toBe(12);
+    expect(result?.autoDeciderMaxCostUsd).toBe(24);
   });
 
   it('excludes only auto decider models, leaving a manual model with the same id included', () => {
diff --git a/services/auto-routing-benchmark/src/config.ts b/services/auto-routing-benchmark/src/config.ts
index 7e99fcd47b..a41fe60c12 100644
--- a/services/auto-routing-benchmark/src/config.ts
+++ b/services/auto-routing-benchmark/src/config.ts
@@ -19,6 +19,8 @@ export function mapConfigRows(
     classifier_repetitions: number;
     decider_repetitions: number;
     classifier_max_p95_latency_ms: number | null;
+    auto_decider_min_cost_usd: number;
+    auto_decider_max_cost_usd: number;
     updated_at: string;
     updated_by: string | null;
   } | null,
@@ -64,6 +66,8 @@ export function mapConfigRows(
     classifierRepetitions: configRow.classifier_repetitions,
     deciderRepetitions: configRow.decider_repetitions,
     classifierMaxP95LatencyMs: configRow.classifier_max_p95_latency_ms,
+    autoDeciderMinCostUsd: configRow.auto_decider_min_cost_usd,
+    autoDeciderMaxCostUsd: configRow.auto_decider_max_cost_usd,
     updatedAt: configRow.updated_at,
     updatedBy: configRow.updated_by,
   };
@@ -106,6 +110,8 @@ export async function saveBenchmarkConfig(
       classifier_repetitions: config.classifierRepetitions,
       decider_repetitions: config.deciderRepetitions,
       classifier_max_p95_latency_ms: config.classifierMaxP95LatencyMs,
+      auto_decider_min_cost_usd: config.autoDeciderMinCostUsd,
+      auto_decider_max_cost_usd: config.autoDeciderMaxCostUsd,
       updated_at: updatedAt,
       updated_by: updatedBy,
     },
diff --git a/services/auto-routing-benchmark/src/db-replace-summaries.test.ts b/services/auto-routing-benchmark/src/db-replace-summaries.test.ts
index d77974387a..be18363e35 100644
--- a/services/auto-routing-benchmark/src/db-replace-summaries.test.ts
+++ b/services/auto-routing-benchmark/src/db-replace-summaries.test.ts
@@ -5,7 +5,7 @@ const mocks = vi.hoisted(() => {
   const deleteStatement = { kind: 'delete' };
   const where = vi.fn(() => deleteStatement);
   const deleteFrom = vi.fn(() => ({ where }));
-  const insertValues = vi.fn((rows: unknown[]) => ({ kind: 'insert', rows }));
+  const insertValues = vi.fn((rows: unknown) => ({ kind: 'insert', rows }));
   const insertInto = vi.fn(() => ({ values: insertValues }));
   const batch = vi.fn(async (_stmts: unknown[]) => []);
 
@@ -20,7 +20,7 @@ vi.mock('drizzle-orm/d1', () => ({
   })),
 }));
 
-import { replaceModelSummaries } from './db';
+import { insertRun, replaceModelSummaries } from './db';
 
 function makeSummary(model: string): BenchmarkModelSummary {
   return {
@@ -59,3 +59,44 @@ describe('replaceModelSummaries', () => {
     expect(mocks.batch.mock.calls[0]?.[0]).toHaveLength(3);
   });
 });
+
+describe('insertRun', () => {
+  beforeEach(() => {
+    mocks.batch.mockClear();
+    mocks.deleteFrom.mockClear();
+    mocks.insertInto.mockClear();
+    mocks.insertValues.mockClear();
+    mocks.where.mockClear();
+  });
+
+  it('chunks carried summary inserts to stay below D1 SQL variable limits', async () => {
+    const summaries = Array.from({ length: 10 }, (_, i) => makeSummary(`model/${i}`));
+
+    await insertRun(
+      {} as D1Database,
+      {
+        id: 'run-1',
+        kind: 'decider',
+        startedAt: '2026-06-17T00:00:00.000Z',
+        min_accuracy: 0.7,
+        switch_cost_factor: 3,
+        max_concurrency: 100,
+        benchmark_user_id: 'user-123',
+        benchmark_org_id: null,
+        repetitions: 1,
+        classifier_max_p95_latency_ms: null,
+        engine_identity: 'v1:test',
+      },
+      [],
+      summaries
+    );
+
+    const carriedInsertSizes = mocks.insertValues.mock.calls
+      .map(([rows]) => rows)
+      .filter(Array.isArray)
+      .map(rows => rows.length);
+    expect(carriedInsertSizes).toEqual([8, 2]);
+    expect(mocks.batch).toHaveBeenCalledTimes(1);
+    expect(mocks.batch.mock.calls[0]?.[0]).toHaveLength(3);
+  });
+});
diff --git a/services/auto-routing-benchmark/src/db-schema.ts b/services/auto-routing-benchmark/src/db-schema.ts
index b191bff2e0..59a7ab19dd 100644
--- a/services/auto-routing-benchmark/src/db-schema.ts
+++ b/services/auto-routing-benchmark/src/db-schema.ts
@@ -15,6 +15,8 @@ export const benchmarkConfig = sqliteTable('benchmark_config', {
   classifier_repetitions: integer('classifier_repetitions').notNull().default(1),
   decider_repetitions: integer('decider_repetitions').notNull().default(1),
   classifier_max_p95_latency_ms: integer('classifier_max_p95_latency_ms'),
+  auto_decider_min_cost_usd: real('auto_decider_min_cost_usd').notNull().default(15),
+  auto_decider_max_cost_usd: real('auto_decider_max_cost_usd').notNull().default(25),
   updated_at: text('updated_at').notNull(),
   updated_by: text('updated_by'),
 });
diff --git a/services/auto-routing-benchmark/src/db.ts b/services/auto-routing-benchmark/src/db.ts
index f4bbac38a3..ae42077dac 100644
--- a/services/auto-routing-benchmark/src/db.ts
+++ b/services/auto-routing-benchmark/src/db.ts
@@ -113,6 +113,8 @@ export async function replaceConfig(
     classifier_repetitions: number;
     decider_repetitions: number;
     classifier_max_p95_latency_ms: number | null;
+    auto_decider_min_cost_usd: number;
+    auto_decider_max_cost_usd: number;
     updated_at: string;
     updated_by: string | null;
   },
@@ -214,10 +216,11 @@ export async function insertRun(
     stmts.push(orm.insert(runModels).values(models));
   }
 
-  if (carriedSummaries.length > 0) {
+  for (let i = 0; i < carriedSummaries.length; i += MODEL_SUMMARY_INSERT_BATCH_SIZE) {
+    const summaryChunk = carriedSummaries.slice(i, i + MODEL_SUMMARY_INSERT_BATCH_SIZE);
     stmts.push(
       orm.insert(modelSummaries).values(
-        carriedSummaries.map(s => ({
+        summaryChunk.map(s => ({
           run_id: run.id,
           model: s.model,
           route_key: s.routeKey,

From 9a3bf341fb717194c15a7aff34a2fa7db50b7664 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20=C5=A0=C4=87eki=C4=87?= <iscekic@protonmail.com>
Date: Wed, 17 Jun 2026 19:53:52 +0200
Subject: [PATCH 3/5] fix(auto-routing): allow one-attempt auto decider
 candidates

---
 .../lib/model-stats/auto-routing-decider-candidates.test.ts | 6 ++++--
 .../src/lib/model-stats/auto-routing-decider-candidates.ts  | 1 -
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
index 524131fcf9..bf008fe75f 100644
--- a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
+++ b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
@@ -8,7 +8,7 @@ import {
 function row(
   openrouterId: string,
   avgAttemptCostUsd: number,
-  overrides: { active?: boolean } = {}
+  overrides: { active?: boolean; nAttempts?: number } = {}
 ) {
   return {
     openrouterId,
@@ -27,7 +27,7 @@ function row(
             avgCacheReadTokens: 1,
             avgExecutionMs: 1,
             nTotalTrials: 6,
-            nAttempts: 6,
+            nAttempts: overrides.nAttempts ?? 6,
             avgAttemptCostUsd,
             avgAttemptInputTokens: 1,
             avgAttemptOutputTokens: 1,
@@ -47,6 +47,7 @@ describe('summarizeAutoRoutingDeciderCandidates', () => {
       [
         row('model/too-cheap', AUTO_DECIDER_MIN_COST_USD - 0.01),
         row('model/minimum', AUTO_DECIDER_MIN_COST_USD),
+        row('model/one-attempt', AUTO_DECIDER_MIN_COST_USD + 1, { nAttempts: 1 }),
         row('model/floored-maximum', AUTO_DECIDER_MAX_COST_USD + 0.99),
         row('model/too-expensive', AUTO_DECIDER_MAX_COST_USD + 1),
         row('model/inactive', 20, { active: false }),
@@ -57,6 +58,7 @@ describe('summarizeAutoRoutingDeciderCandidates', () => {
 
     expect(candidates).toEqual([
       { id: 'model/floored-maximum', avgAttemptCostUsd: 25.99 },
+      { id: 'model/one-attempt', avgAttemptCostUsd: 16 },
       { id: 'model/minimum', avgAttemptCostUsd: 15 },
     ]);
   });
diff --git a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
index 8990ff0404..2f8540dae3 100644
--- a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
+++ b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
@@ -55,7 +55,6 @@ export function summarizeAutoRoutingDeciderCandidates(
     const bench = result.data?.kiloBench?.evals['terminal-bench'];
     if (
       !bench ||
-      (bench.nAttempts ?? 0) < 5 ||
       bench.avgAttemptCostUsd === null ||
       bench.avgAttemptCostUsd === undefined ||
       !isInAutoCostBand(bench.avgAttemptCostUsd, costBounds)

From 76f7b69aa7367550d33320a08e82563cd17a63a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20=C5=A0=C4=87eki=C4=87?= <iscekic@protonmail.com>
Date: Wed, 17 Jun 2026 20:00:22 +0200
Subject: [PATCH 4/5] fix(auto-routing): validate persisted reasoning effort

---
 .../auto-routing-benchmark/src/config.test.ts | 26 +++++++++++++++++++
 services/auto-routing-benchmark/src/config.ts |  7 +++--
 services/auto-routing-benchmark/src/db.ts     |  3 ++-
 .../src/reasoning-effort.ts                   | 10 +++++++
 services/auto-routing-benchmark/src/run.ts    |  3 ++-
 5 files changed, 43 insertions(+), 6 deletions(-)
 create mode 100644 services/auto-routing-benchmark/src/reasoning-effort.ts

diff --git a/services/auto-routing-benchmark/src/config.test.ts b/services/auto-routing-benchmark/src/config.test.ts
index 7008af8421..1f1f6a02ba 100644
--- a/services/auto-routing-benchmark/src/config.test.ts
+++ b/services/auto-routing-benchmark/src/config.test.ts
@@ -87,4 +87,30 @@ describe('mapConfigRows', () => {
     expect(result?.deciderModels).toEqual([{ id: 'auto/model', reasoningEffort: 'medium' }]);
     expect(result?.excludedAutoDeciderModels).toEqual(['auto/model']);
   });
+
+  it('normalizes unsupported persisted reasoning effort values to null', () => {
+    const result = mapConfigRows(
+      configRow,
+      ['some/model'],
+      [{ model: 'manual/thinking', reasoning_effort: 'thinking' }],
+      [
+        {
+          model: 'auto/none',
+          reasoning_effort: 'none',
+          avg_attempt_cost_usd: 20,
+          synced_at: '2026-06-01T01:00:00.000Z',
+        },
+      ],
+      []
+    );
+
+    expect(result?.manualDeciderModels).toEqual([{ id: 'manual/thinking', reasoningEffort: null }]);
+    expect(result?.autoDeciderModels).toEqual([
+      { id: 'auto/none', reasoningEffort: null, avgAttemptCostUsd: 20 },
+    ]);
+    expect(result?.deciderModels).toEqual([
+      { id: 'manual/thinking', reasoningEffort: null },
+      { id: 'auto/none', reasoningEffort: null },
+    ]);
+  });
 });
diff --git a/services/auto-routing-benchmark/src/config.ts b/services/auto-routing-benchmark/src/config.ts
index a41fe60c12..88f2836564 100644
--- a/services/auto-routing-benchmark/src/config.ts
+++ b/services/auto-routing-benchmark/src/config.ts
@@ -5,6 +5,7 @@ import {
   type ConfigAutoDeciderModelRow,
   type ConfigDeciderModelRow,
 } from './db';
+import { parsePersistedReasoningEffort } from './reasoning-effort';
 
 // Maps the three normalized config tables to the BenchmarkConfig contract.
 // Null when no admin has saved a config yet — the worker never fabricates
@@ -32,14 +33,12 @@ export function mapConfigRows(
   const excludedAuto = new Set(excludedAutoDeciderModels);
   const manualDeciderModels = deciderModelRows.map(r => ({
     id: r.model,
-    reasoningEffort:
-      r.reasoning_effort as BenchmarkConfig['deciderModels'][number]['reasoningEffort'],
+    reasoningEffort: parsePersistedReasoningEffort(r.reasoning_effort),
   }));
   const manualIds = new Set(manualDeciderModels.map(model => model.id));
   const autoDeciderModels = autoDeciderModelRows.map(r => ({
     id: r.model,
-    reasoningEffort:
-      r.reasoning_effort as BenchmarkConfig['deciderModels'][number]['reasoningEffort'],
+    reasoningEffort: parsePersistedReasoningEffort(r.reasoning_effort),
     avgAttemptCostUsd: r.avg_attempt_cost_usd,
   }));
   const effectiveAutoDeciderModels = autoDeciderModels
diff --git a/services/auto-routing-benchmark/src/db.ts b/services/auto-routing-benchmark/src/db.ts
index ae42077dac..79dc06ccaf 100644
--- a/services/auto-routing-benchmark/src/db.ts
+++ b/services/auto-routing-benchmark/src/db.ts
@@ -24,6 +24,7 @@ import {
   runModels,
 } from './db-schema';
 import { pickClassifierWinner } from './winner';
+import { parsePersistedReasoningEffort } from './reasoning-effort';
 
 export type CaseResultRow = typeof caseResults.$inferSelect;
 export type RunRow = typeof benchmarkRuns.$inferSelect;
@@ -589,7 +590,7 @@ export function rowsToRoutingTable(
       accuracy: row.accuracy,
       avgCostUsd: row.avg_cost_usd,
       meetsThreshold: row.meets_threshold,
-      reasoningEffort: row.reasoning_effort as RankedCandidate['reasoningEffort'],
+      reasoningEffort: parsePersistedReasoningEffort(row.reasoning_effort),
     });
   }
   return {
diff --git a/services/auto-routing-benchmark/src/reasoning-effort.ts b/services/auto-routing-benchmark/src/reasoning-effort.ts
new file mode 100644
index 0000000000..05cee319e3
--- /dev/null
+++ b/services/auto-routing-benchmark/src/reasoning-effort.ts
@@ -0,0 +1,10 @@
+import { ReasoningEffortSchema, type ReasoningEffort } from '@kilocode/auto-routing-contracts';
+
+export function parsePersistedReasoningEffort(value: string | null): ReasoningEffort | null {
+  if (value === null) {
+    return null;
+  }
+
+  const parsed = ReasoningEffortSchema.safeParse(value);
+  return parsed.success ? parsed.data : null;
+}
diff --git a/services/auto-routing-benchmark/src/run.ts b/services/auto-routing-benchmark/src/run.ts
index 041ed902a0..6cd8fb3327 100644
--- a/services/auto-routing-benchmark/src/run.ts
+++ b/services/auto-routing-benchmark/src/run.ts
@@ -41,6 +41,7 @@ import {
   runDeciderCaseViaCli,
   warmUpCliContainer,
 } from './cli-runner';
+import { parsePersistedReasoningEffort } from './reasoning-effort';
 import { pickClassifierWinner } from './winner';
 
 export type BenchmarkJobMessage = {
@@ -910,7 +911,7 @@ async function finalizeRunIfComplete(
       // admin edit can't skew the published table.
       const deciderModels: BenchmarkDeciderModel[] = state.models.map(m => ({
         id: m.model,
-        reasoningEffort: m.reasoning_effort as BenchmarkDeciderModel['reasoningEffort'],
+        reasoningEffort: parsePersistedReasoningEffort(m.reasoning_effort),
       }));
       const table = buildRoutingTable({
         runId,

From 2c8d3932db71c200f7f3f8b944f3ed3604efdd12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Igor=20=C5=A0=C4=87eki=C4=87?= <iscekic@protonmail.com>
Date: Wed, 17 Jun 2026 20:02:46 +0200
Subject: [PATCH 5/5] fix(auto-routing): normalize kilo bench model ids

---
 .../lib/model-stats/auto-routing-decider-candidates.test.ts | 2 ++
 .../src/lib/model-stats/auto-routing-decider-candidates.ts  | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
index bf008fe75f..d8ee82cf71 100644
--- a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
+++ b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.test.ts
@@ -49,6 +49,7 @@ describe('summarizeAutoRoutingDeciderCandidates', () => {
         row('model/minimum', AUTO_DECIDER_MIN_COST_USD),
         row('model/one-attempt', AUTO_DECIDER_MIN_COST_USD + 1, { nAttempts: 1 }),
         row('model/floored-maximum', AUTO_DECIDER_MAX_COST_USD + 0.99),
+        row('kilo/openai/gpt-5.5', 24),
         row('model/too-expensive', AUTO_DECIDER_MAX_COST_USD + 1),
         row('model/inactive', 20, { active: false }),
         row('kilo-internal/custom', 20),
@@ -58,6 +59,7 @@ describe('summarizeAutoRoutingDeciderCandidates', () => {
 
     expect(candidates).toEqual([
       { id: 'model/floored-maximum', avgAttemptCostUsd: 25.99 },
+      { id: 'openai/gpt-5.5', avgAttemptCostUsd: 24 },
       { id: 'model/one-attempt', avgAttemptCostUsd: 16 },
       { id: 'model/minimum', avgAttemptCostUsd: 15 },
     ]);
diff --git a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
index 2f8540dae3..6218b5ebd6 100644
--- a/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
+++ b/apps/web/src/lib/model-stats/auto-routing-decider-candidates.ts
@@ -5,6 +5,7 @@ import {
   AUTO_DECIDER_DEFAULT_MIN_COST_USD,
 } from '@kilocode/auto-routing-contracts';
 import { ModelStatsBenchmarksSchema, modelStats } from '@kilocode/db/schema';
+import { unprefixKiloGatewayModelId } from '@kilocode/worker-utils/kilo-model-id';
 import { and, eq, notLike } from 'drizzle-orm';
 
 const TerminalBenchSchema = ModelStatsBenchmarksSchema.unwrap()
@@ -61,7 +62,10 @@ export function summarizeAutoRoutingDeciderCandidates(
     ) {
       continue;
     }
-    candidates.push({ id: row.openrouterId, avgAttemptCostUsd: bench.avgAttemptCostUsd });
+    candidates.push({
+      id: unprefixKiloGatewayModelId(row.openrouterId) ?? row.openrouterId,
+      avgAttemptCostUsd: bench.avgAttemptCostUsd,
+    });
   }
 
   return candidates.sort((left, right) => {