diff --git a/CLAUDE.md b/CLAUDE.md index 2a9e6ddc5..7680af94b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -575,15 +575,17 @@ backlog → in_progress → review → done ### Model Hierarchy for Auto-Mode -Auto-mode uses a tiered model selection based on feature complexity: +Auto-mode uses a tiered model selection based on feature complexity. Defaults route through the protoLabs gateway (`api.proto-labs.ai`) so the gateway-issued API key is the only credential needed out of the box. Override per-tier in **Settings → AI Models → Model Defaults**. -| Model | Use Case | Triggered By | -| ---------- | -------------------------------------------------------- | -------------------------------------------------- | -| **Opus** | Orchestration, architectural decisions, challenging work | `complexity: 'architectural'` or after 2+ failures | -| **Sonnet** | Standard feature implementation (default) | `complexity: 'medium'` or `'large'` | -| **Haiku** | Trivial/quick tasks | `complexity: 'small'` | +| Tier | Default | Triggered By | +| ------------- | --------------------- | -------------------------------------------------- | +| **Reasoning** | `protolabs/reasoning` | `complexity: 'architectural'` or after 2+ failures | +| **Smart** | `protolabs/smart` | `complexity: 'medium'` or `'large'` | +| **Fast** | `protolabs/fast` | `complexity: 'small'` | -**Auto-escalation:** Features that fail 2+ times automatically escalate to opus on retry. +The reasoning tier is for system-design, spec generation, and deep-thinking work. Smart is the workhorse for ticket-level feature implementation. Fast is for trivial / quick tasks (commits, branch names, file descriptions). + +**Auto-escalation:** Features that fail 2+ times automatically escalate to `DEFAULT_MODELS.claude` (`protolabs/reasoning`) on retry. **Setting complexity via MCP:** @@ -592,7 +594,7 @@ mcp__protolabs__create_feature({ projectPath: '/path/to/project', title: 'Core Infrastructure Setup', description: '...', - complexity: 'architectural', // Uses opus + complexity: 'architectural', // Routes to protolabs/reasoning }); ``` diff --git a/apps/server/src/services/auto-mode-service.ts b/apps/server/src/services/auto-mode-service.ts index 8ca9ffca7..268705dcc 100644 --- a/apps/server/src/services/auto-mode-service.ts +++ b/apps/server/src/services/auto-mode-service.ts @@ -528,16 +528,16 @@ export class AutoModeService { return { model: resolveModelString(feature.model, DEFAULT_MODELS.autoMode) }; } - // 2. Escalate to opus after multiple failures (safety net) + // 2. Escalate to the strongest available model after multiple failures if (feature.failureCount && feature.failureCount >= 2) { - logger.info(`Escalating to opus after ${feature.failureCount} failures`); - return { model: DEFAULT_MODELS.claude }; // opus + logger.info(`Escalating to strongest model after ${feature.failureCount} failures`); + return { model: DEFAULT_MODELS.claude }; } - // 3. Architectural complexity always gets opus + // 3. Architectural complexity always gets the strongest available model if (feature.complexity === 'architectural') { - logger.info('Using opus for architectural feature'); - return { model: DEFAULT_MODELS.claude }; // opus + logger.info('Using strongest model for architectural feature'); + return { model: DEFAULT_MODELS.claude }; } // 4. AssignedRole model override (manifest takes precedence over settings) @@ -594,11 +594,11 @@ export class AutoModeService { // 6. Fallback: complexity-based (only if no setting configured) if (feature.complexity === 'small') { - logger.info('Using haiku for small feature'); - return { model: DEFAULT_MODELS.trivial }; // haiku + logger.info('Using fast model for small feature'); + return { model: DEFAULT_MODELS.trivial }; } - return { model: DEFAULT_MODELS.autoMode }; // sonnet + return { model: DEFAULT_MODELS.autoMode }; } /** diff --git a/apps/server/src/services/lead-engineer-processors.ts b/apps/server/src/services/lead-engineer-processors.ts index f4a30d6cd..79707fb6b 100644 --- a/apps/server/src/services/lead-engineer-processors.ts +++ b/apps/server/src/services/lead-engineer-processors.ts @@ -113,16 +113,16 @@ export class IntakeProcessor implements StateProcessor { return resolveModelString(feature.model, DEFAULT_MODELS.autoMode); } - // 2. Escalate to opus after multiple failures (safety net) + // 2. Escalate to the strongest available model after multiple failures if (feature.failureCount && feature.failureCount >= 2) { - logger.info(`[INTAKE] Escalating to opus after ${feature.failureCount} failures`); - return DEFAULT_MODELS.claude; // opus + logger.info(`[INTAKE] Escalating to strongest model after ${feature.failureCount} failures`); + return DEFAULT_MODELS.claude; } - // 3. Architectural complexity always gets opus + // 3. Architectural complexity always gets the strongest available model if (feature.complexity === 'architectural') { - logger.info('[INTAKE] Using opus for architectural feature'); - return DEFAULT_MODELS.claude; // opus + logger.info('[INTAKE] Using strongest model for architectural feature'); + return DEFAULT_MODELS.claude; } // 4. Read user's configured agent execution model from settings diff --git a/apps/server/tests/unit/lib/model-resolver.test.ts b/apps/server/tests/unit/lib/model-resolver.test.ts index 300b231cc..263d06168 100644 --- a/apps/server/tests/unit/lib/model-resolver.test.ts +++ b/apps/server/tests/unit/lib/model-resolver.test.ts @@ -155,10 +155,11 @@ describe('model-resolver.ts', () => { }); it('should have valid default model', () => { - // DEFAULT_MODELS.claude resolves through the gateway via @protolabsai/sdk. - // The field name still says "claude" for callsite stability; PR 3 renames - // it to `proto` when ClaudeProvider is removed. - expect(DEFAULT_MODELS.claude).toBe('protolabs/smart'); + // DEFAULT_MODELS.claude is the "strongest available" — used for + // architectural complexity and the 2+ failures escalation path. It + // resolves through the gateway via @protolabsai/sdk. Field name kept + // as `claude` for caller stability through the SDK cutover. + expect(DEFAULT_MODELS.claude).toBe('protolabs/reasoning'); }); }); }); diff --git a/apps/server/tests/unit/services/settings-service.test.ts b/apps/server/tests/unit/services/settings-service.test.ts index e645d7ccf..c3572e86d 100644 --- a/apps/server/tests/unit/services/settings-service.test.ts +++ b/apps/server/tests/unit/services/settings-service.test.ts @@ -741,8 +741,8 @@ describe('settings-service.ts', () => { // Legacy fields should be migrated to phaseModels with canonical IDs expect(settings.phaseModels.enhancementModel).toEqual({ model: 'claude-haiku' }); expect(settings.phaseModels.validationModel).toEqual({ model: 'claude-opus' }); - // Other fields should use defaults (canonical IDs) - expect(settings.phaseModels.specGenerationModel).toEqual({ model: 'claude-opus' }); + // Other fields should use defaults (DEFAULT_PHASE_MODELS, gateway-routed) + expect(settings.phaseModels.specGenerationModel).toEqual({ model: 'protolabs/reasoning' }); }); it('should use default phase models when none are configured', async () => { @@ -756,10 +756,10 @@ describe('settings-service.ts', () => { const settings = await settingsService.getGlobalSettings(); - // Should use DEFAULT_PHASE_MODELS (with canonical IDs) - expect(settings.phaseModels.enhancementModel).toEqual({ model: 'claude-sonnet' }); - expect(settings.phaseModels.fileDescriptionModel).toEqual({ model: 'claude-haiku' }); - expect(settings.phaseModels.specGenerationModel).toEqual({ model: 'claude-opus' }); + // Should use DEFAULT_PHASE_MODELS (gateway-routed) + expect(settings.phaseModels.enhancementModel).toEqual({ model: 'protolabs/smart' }); + expect(settings.phaseModels.fileDescriptionModel).toEqual({ model: 'protolabs/fast' }); + expect(settings.phaseModels.specGenerationModel).toEqual({ model: 'protolabs/reasoning' }); }); it('should deep merge phaseModels on update', async () => { diff --git a/docs/integrations/ai-providers.md b/docs/integrations/ai-providers.md index f4db49b8e..81eda8a04 100644 --- a/docs/integrations/ai-providers.md +++ b/docs/integrations/ai-providers.md @@ -169,16 +169,16 @@ Once providers are configured, use the model routing settings to control which m Features are assigned a complexity level (`small`, `medium`, `large`, `architectural`). The complexity tier settings map each level to a model: -| Tier | Default | Typical Use | -| ----------------- | ------- | ------------------------------------------------------------- | -| **Small** | Haiku | Trivial tasks, quick fixes, one-file changes | -| **Medium** | Sonnet | Standard feature work | -| **Large** | Sonnet | Complex multi-file changes, refactors | -| **Architectural** | Opus | System design, core infrastructure, performance-critical work | +| Tier | Default | Typical Use | +| ----------------- | --------------------- | ------------------------------------------------------ | +| **Small** | `protolabs/fast` | Trivial tasks, quick fixes, one-file changes | +| **Medium** | `protolabs/smart` | Standard feature work | +| **Large** | `protolabs/smart` | Complex multi-file changes, refactors | +| **Architectural** | `protolabs/reasoning` | System design, core infrastructure, deep-thinking work | Configure in **Settings → AI Models → Model Defaults → Complexity Tiers**. -Any provider model can be assigned to any tier — for example, route small features to a local Ollama model and architectural features to Opus. +Any provider model can be assigned to any tier — for example, route small features to a local Ollama model or architectural features to Claude Opus via the Claude Compatible provider. ### Agent Execution (catch-all) @@ -189,23 +189,23 @@ The **Agent Execution** model applies when a feature has no complexity set. This When auto-mode selects a model for a feature, it follows this priority order: 1. Explicit `model` field set on the feature itself -2. 2+ failures → Opus escalation +2. 2+ failures → escalation to the reasoning tier (`DEFAULT_MODELS.claude` — `protolabs/reasoning`) 3. Agent role manifest or `roleModelOverrides` settings 4. **Complexity tier setting** (small/medium/large/architectural) 5. **Agent Execution** catch-all setting -6. Built-in default (Sonnet) +6. Built-in default (`protolabs/smart`) ### Per-Phase Task Models Beyond agent execution, individual application tasks have their own model settings: -| Task | Default | Location | -| ------------------- | ------- | ---------------- | -| Feature Enhancement | Sonnet | Quick Tasks | -| Commit Messages | Haiku | Quick Tasks | -| App Specification | Opus | Generation Tasks | -| Feature Generation | Sonnet | Generation Tasks | -| Memory Extraction | Haiku | Memory Tasks | +| Task | Default | Location | +| ------------------- | --------------------- | ---------------- | +| Feature Enhancement | `protolabs/smart` | Quick Tasks | +| Commit Messages | `protolabs/fast` | Quick Tasks | +| App Specification | `protolabs/reasoning` | Generation Tasks | +| Feature Generation | `protolabs/smart` | Generation Tasks | +| Memory Extraction | `protolabs/fast` | Memory Tasks | All configurable in **Settings → AI Models → Model Defaults**. diff --git a/libs/model-resolver/tests/resolver.test.ts b/libs/model-resolver/tests/resolver.test.ts index 3233a2ce2..815b8dd8e 100644 --- a/libs/model-resolver/tests/resolver.test.ts +++ b/libs/model-resolver/tests/resolver.test.ts @@ -322,13 +322,13 @@ describe('model-resolver', () => { it('should use DEFAULT_MODELS.claude as fallback', () => { const result = resolveModelString(undefined); - // DEFAULT_MODELS.claude now points at the gateway-routed `protolabs/smart` - // model since the Anthropic SDK is being phased out in favor of - // @protolabsai/sdk. The field name still says "claude" through this PR - // to keep the diff focused; PR 3 of the cutover will rename it to `proto`. + // DEFAULT_MODELS.claude points at the gateway-routed reasoning tier — this + // is the "strongest available" used for orchestration, architectural + // complexity, and the 2+ failures escalation path. Field name kept as + // `claude` for caller compatibility through the SDK cutover. expect(result).toBe(DEFAULT_MODELS.claude); expect(DEFAULT_MODELS.claude).toBeDefined(); - expect(DEFAULT_MODELS.claude).toBe('protolabs/smart'); + expect(DEFAULT_MODELS.claude).toBe('protolabs/reasoning'); }); }); diff --git a/libs/types/src/agent-settings.ts b/libs/types/src/agent-settings.ts index f8c51d0e4..1999908e1 100644 --- a/libs/types/src/agent-settings.ts +++ b/libs/types/src/agent-settings.ts @@ -226,42 +226,53 @@ export interface PhaseModelConfig { */ export type PhaseModelKey = Exclude; -/** Default phase model configuration - sensible defaults for each task type - * Uses canonical prefixed model IDs for consistent routing. +/** Default phase model configuration — sensible defaults for each task type. + * + * Routes through the protoLabs LLM gateway by default. The gateway exposes + * three code-work tiers: + * - `protolabs/fast` — trivial / quick tasks (haiku-equivalent) + * - `protolabs/smart` — feature work + standard generation (sonnet-equivalent) + * - `protolabs/reasoning` — architectural / spec / deep-thinking (opus-equivalent) + * + * Users who want a different provider override these per-phase via the + * Settings → AI Models surface. Don't hardcode raw Anthropic / OpenAI IDs + * here — the boxed default has to work with the gateway-issued API key out + * of the box (the only key shipped on a fresh install). */ export const DEFAULT_PHASE_MODELS: PhaseModelConfig = { - // Quick tasks - use fast models for speed and cost - enhancementModel: { model: 'claude-sonnet' }, - fileDescriptionModel: { model: 'claude-haiku' }, - imageDescriptionModel: { model: 'claude-haiku' }, - - // Validation - use smart models for accuracy - validationModel: { model: 'claude-sonnet' }, - - // Generation - use powerful models for quality - specGenerationModel: { model: 'claude-opus' }, - featureGenerationModel: { model: 'claude-sonnet' }, - backlogPlanningModel: { model: 'claude-sonnet' }, - projectAnalysisModel: { model: 'claude-sonnet' }, - suggestionsModel: { model: 'claude-sonnet' }, - - // Memory - use fast model for learning extraction (cost-effective) - memoryExtractionModel: { model: 'claude-haiku' }, - - // Commit messages - use fast model for speed - commitMessageModel: { model: 'claude-haiku' }, - - // Branch names - use fast model for speed - branchNameModel: { model: 'claude-haiku' }, - - // Agent execution - default to sonnet for reliable feature implementation - agentExecutionModel: { model: 'claude-sonnet' }, - - // Complexity tiers - route features to the right model by complexity - complexitySmallModel: { model: 'claude-haiku' }, - complexityMediumModel: { model: 'claude-sonnet' }, - complexityLargeModel: { model: 'claude-sonnet' }, - complexityArchitecturalModel: { model: 'claude-opus' }, + // Quick tasks — fast tier + enhancementModel: { model: 'protolabs/smart' }, + fileDescriptionModel: { model: 'protolabs/fast' }, + imageDescriptionModel: { model: 'protolabs/fast' }, + + // Validation — smart tier (accuracy matters) + validationModel: { model: 'protolabs/smart' }, + + // Generation — reasoning tier for spec, smart for the rest + specGenerationModel: { model: 'protolabs/reasoning' }, + featureGenerationModel: { model: 'protolabs/smart' }, + backlogPlanningModel: { model: 'protolabs/smart' }, + projectAnalysisModel: { model: 'protolabs/smart' }, + suggestionsModel: { model: 'protolabs/smart' }, + + // Memory extraction — fast tier (cost-effective) + memoryExtractionModel: { model: 'protolabs/fast' }, + + // Commit messages — fast tier + commitMessageModel: { model: 'protolabs/fast' }, + + // Branch names — fast tier + branchNameModel: { model: 'protolabs/fast' }, + + // Agent execution — smart tier (reliable feature implementation) + agentExecutionModel: { model: 'protolabs/smart' }, + + // Complexity tiers — route features by complexity. Architectural uses the + // reasoning tier for system-design / deep-thinking work. + complexitySmallModel: { model: 'protolabs/fast' }, + complexityMediumModel: { model: 'protolabs/smart' }, + complexityLargeModel: { model: 'protolabs/smart' }, + complexityArchitecturalModel: { model: 'protolabs/reasoning' }, }; /** diff --git a/libs/types/src/model.ts b/libs/types/src/model.ts index b89495070..cb519ce18 100644 --- a/libs/types/src/model.ts +++ b/libs/types/src/model.ts @@ -99,18 +99,16 @@ export function getAllCodexModelIds(): CodexModelId[] { */ export const DEFAULT_MODELS = { /** - * Default for agent orchestration / planning. - * Historically pointed at `claude-opus-4-6`; now routed through the protoLabs - * gateway via the proto SDK. Existing callers that key off - * `DEFAULT_MODELS.claude` continue to compile — only the resolved model id - * changes. The field name is intentionally NOT renamed in this PR to keep - * the diff focused; PR 3 (final SDK rip-out) will rename it to `proto`. + * Default for agent orchestration / planning + escalation-to-strongest path. + * Routed through the protoLabs gateway via the proto SDK. This is the tier + * used for architectural features, 2+ failure retries, and the reasoning + * path in the lead engineer. */ - claude: 'protolabs/smart', - /** Default for auto-mode feature implementation - sonnet for ticket work */ - autoMode: 'claude-sonnet-4-6', - /** Default for trivial/quick tasks - haiku */ - trivial: 'claude-haiku-4-5-20251001', + claude: 'protolabs/reasoning', + /** Default for auto-mode feature implementation — smart tier for ticket work. */ + autoMode: 'protolabs/smart', + /** Default for trivial / quick tasks — fast tier for speed and cost. */ + trivial: 'protolabs/fast', cursor: 'cursor-auto', // Cursor's recommended default (with prefix) codex: CODEX_MODEL_MAP.gpt55, // GPT-5.5 is the current flagship Codex model } as const;