protoLabsAI · mabry1985 · May 24, 2026 · May 24, 2026 · May 24, 2026 · coderabbitai
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -575,15 +575,17 @@ backlog → in_progress → review → done
 
 ### Model Hierarchy for Auto-Mode
 
-Auto-mode uses a tiered model selection based on feature complexity:
+Auto-mode uses a tiered model selection based on feature complexity. Defaults route through the protoLabs gateway (`api.proto-labs.ai`) so the gateway-issued API key is the only credential needed out of the box. Override per-tier in **Settings → AI Models → Model Defaults**.
 
-| Model      | Use Case                                                 | Triggered By                                       |
-| ---------- | -------------------------------------------------------- | -------------------------------------------------- |
-| **Opus**   | Orchestration, architectural decisions, challenging work | `complexity: 'architectural'` or after 2+ failures |
-| **Sonnet** | Standard feature implementation (default)                | `complexity: 'medium'` or `'large'`                |
-| **Haiku**  | Trivial/quick tasks                                      | `complexity: 'small'`                              |
+| Tier          | Default               | Triggered By                                       |
+| ------------- | --------------------- | -------------------------------------------------- |
+| **Reasoning** | `protolabs/reasoning` | `complexity: 'architectural'` or after 2+ failures |
+| **Smart**     | `protolabs/smart`     | `complexity: 'medium'` or `'large'`                |
+| **Fast**      | `protolabs/fast`      | `complexity: 'small'`                              |
 
-**Auto-escalation:** Features that fail 2+ times automatically escalate to opus on retry.
+The reasoning tier is for system-design, spec generation, and deep-thinking work. Smart is the workhorse for ticket-level feature implementation. Fast is for trivial / quick tasks (commits, branch names, file descriptions).
+
+**Auto-escalation:** Features that fail 2+ times automatically escalate to `DEFAULT_MODELS.claude` (`protolabs/reasoning`) on retry.
 
 **Setting complexity via MCP:**
 
@@ -592,7 +594,7 @@ mcp__protolabs__create_feature({
   projectPath: '/path/to/project',
   title: 'Core Infrastructure Setup',
   description: '...',
-  complexity: 'architectural', // Uses opus
+  complexity: 'architectural', // Routes to protolabs/reasoning
 });
 ```
 

diff --git a/apps/server/src/services/auto-mode-service.ts b/apps/server/src/services/auto-mode-service.ts
@@ -528,16 +528,16 @@ export class AutoModeService {
       return { model: resolveModelString(feature.model, DEFAULT_MODELS.autoMode) };
     }
 
-    // 2. Escalate to opus after multiple failures (safety net)
+    // 2. Escalate to the strongest available model after multiple failures
     if (feature.failureCount && feature.failureCount >= 2) {
-      logger.info(`Escalating to opus after ${feature.failureCount} failures`);
-      return { model: DEFAULT_MODELS.claude }; // opus
+      logger.info(`Escalating to strongest model after ${feature.failureCount} failures`);
+      return { model: DEFAULT_MODELS.claude };
     }
 
-    // 3. Architectural complexity always gets opus
+    // 3. Architectural complexity always gets the strongest available model
     if (feature.complexity === 'architectural') {
-      logger.info('Using opus for architectural feature');
-      return { model: DEFAULT_MODELS.claude }; // opus
+      logger.info('Using strongest model for architectural feature');
+      return { model: DEFAULT_MODELS.claude };
     }
 
     // 4. AssignedRole model override (manifest takes precedence over settings)
@@ -594,11 +594,11 @@ export class AutoModeService {
 
     // 6. Fallback: complexity-based (only if no setting configured)
     if (feature.complexity === 'small') {
-      logger.info('Using haiku for small feature');
-      return { model: DEFAULT_MODELS.trivial }; // haiku
+      logger.info('Using fast model for small feature');
+      return { model: DEFAULT_MODELS.trivial };
     }
 
-    return { model: DEFAULT_MODELS.autoMode }; // sonnet
+    return { model: DEFAULT_MODELS.autoMode };
   }
 
   /**

diff --git a/apps/server/src/services/lead-engineer-processors.ts b/apps/server/src/services/lead-engineer-processors.ts
@@ -113,16 +113,16 @@ export class IntakeProcessor implements StateProcessor {
       return resolveModelString(feature.model, DEFAULT_MODELS.autoMode);
     }
 
-    // 2. Escalate to opus after multiple failures (safety net)
+    // 2. Escalate to the strongest available model after multiple failures
     if (feature.failureCount && feature.failureCount >= 2) {
-      logger.info(`[INTAKE] Escalating to opus after ${feature.failureCount} failures`);
-      return DEFAULT_MODELS.claude; // opus
+      logger.info(`[INTAKE] Escalating to strongest model after ${feature.failureCount} failures`);
+      return DEFAULT_MODELS.claude;
     }
 
-    // 3. Architectural complexity always gets opus
+    // 3. Architectural complexity always gets the strongest available model
     if (feature.complexity === 'architectural') {
-      logger.info('[INTAKE] Using opus for architectural feature');
-      return DEFAULT_MODELS.claude; // opus
+      logger.info('[INTAKE] Using strongest model for architectural feature');
+      return DEFAULT_MODELS.claude;
     }
 
     // 4. Read user's configured agent execution model from settings

diff --git a/apps/server/tests/unit/lib/model-resolver.test.ts b/apps/server/tests/unit/lib/model-resolver.test.ts
@@ -155,10 +155,11 @@ describe('model-resolver.ts', () => {
     });
 
     it('should have valid default model', () => {
-      // DEFAULT_MODELS.claude resolves through the gateway via @protolabsai/sdk.
-      // The field name still says "claude" for callsite stability; PR 3 renames
-      // it to `proto` when ClaudeProvider is removed.
-      expect(DEFAULT_MODELS.claude).toBe('protolabs/smart');
+      // DEFAULT_MODELS.claude is the "strongest available" — used for
+      // architectural complexity and the 2+ failures escalation path. It
+      // resolves through the gateway via @protolabsai/sdk. Field name kept
+      // as `claude` for caller stability through the SDK cutover.
+      expect(DEFAULT_MODELS.claude).toBe('protolabs/reasoning');
     });
   });
 });
diff --git a/apps/server/tests/unit/services/settings-service.test.ts b/apps/server/tests/unit/services/settings-service.test.ts
@@ -741,8 +741,8 @@ describe('settings-service.ts', () => {
       // Legacy fields should be migrated to phaseModels with canonical IDs
       expect(settings.phaseModels.enhancementModel).toEqual({ model: 'claude-haiku' });
       expect(settings.phaseModels.validationModel).toEqual({ model: 'claude-opus' });
-      // Other fields should use defaults (canonical IDs)
-      expect(settings.phaseModels.specGenerationModel).toEqual({ model: 'claude-opus' });
+      // Other fields should use defaults (DEFAULT_PHASE_MODELS, gateway-routed)
+      expect(settings.phaseModels.specGenerationModel).toEqual({ model: 'protolabs/reasoning' });
     });
 
     it('should use default phase models when none are configured', async () => {
@@ -756,10 +756,10 @@ describe('settings-service.ts', () => {
 
       const settings = await settingsService.getGlobalSettings();
 
-      // Should use DEFAULT_PHASE_MODELS (with canonical IDs)
-      expect(settings.phaseModels.enhancementModel).toEqual({ model: 'claude-sonnet' });
-      expect(settings.phaseModels.fileDescriptionModel).toEqual({ model: 'claude-haiku' });
-      expect(settings.phaseModels.specGenerationModel).toEqual({ model: 'claude-opus' });
+      // Should use DEFAULT_PHASE_MODELS (gateway-routed)
+      expect(settings.phaseModels.enhancementModel).toEqual({ model: 'protolabs/smart' });
+      expect(settings.phaseModels.fileDescriptionModel).toEqual({ model: 'protolabs/fast' });
+      expect(settings.phaseModels.specGenerationModel).toEqual({ model: 'protolabs/reasoning' });
     });
 
     it('should deep merge phaseModels on update', async () => {

diff --git a/docs/integrations/ai-providers.md b/docs/integrations/ai-providers.md
@@ -169,16 +169,16 @@ Once providers are configured, use the model routing settings to control which m
 
 Features are assigned a complexity level (`small`, `medium`, `large`, `architectural`). The complexity tier settings map each level to a model:
 
-| Tier              | Default | Typical Use                                                   |
-| ----------------- | ------- | ------------------------------------------------------------- |
-| **Small**         | Haiku   | Trivial tasks, quick fixes, one-file changes                  |
-| **Medium**        | Sonnet  | Standard feature work                                         |
-| **Large**         | Sonnet  | Complex multi-file changes, refactors                         |
-| **Architectural** | Opus    | System design, core infrastructure, performance-critical work |
+| Tier              | Default               | Typical Use                                            |
+| ----------------- | --------------------- | ------------------------------------------------------ |
+| **Small**         | `protolabs/fast`      | Trivial tasks, quick fixes, one-file changes           |
+| **Medium**        | `protolabs/smart`     | Standard feature work                                  |
+| **Large**         | `protolabs/smart`     | Complex multi-file changes, refactors                  |
+| **Architectural** | `protolabs/reasoning` | System design, core infrastructure, deep-thinking work |
 
 Configure in **Settings → AI Models → Model Defaults → Complexity Tiers**.
 
-Any provider model can be assigned to any tier — for example, route small features to a local Ollama model and architectural features to Opus.
+Any provider model can be assigned to any tier — for example, route small features to a local Ollama model or architectural features to Claude Opus via the Claude Compatible provider.
 
 ### Agent Execution (catch-all)
 
@@ -189,23 +189,23 @@ The **Agent Execution** model applies when a feature has no complexity set. This
 When auto-mode selects a model for a feature, it follows this priority order:
 
 1. Explicit `model` field set on the feature itself
-2. 2+ failures → Opus escalation
+2. 2+ failures → escalation to the reasoning tier (`DEFAULT_MODELS.claude` — `protolabs/reasoning`)
 3. Agent role manifest or `roleModelOverrides` settings
 4. **Complexity tier setting** (small/medium/large/architectural)
 5. **Agent Execution** catch-all setting
-6. Built-in default (Sonnet)
+6. Built-in default (`protolabs/smart`)
 
 ### Per-Phase Task Models
 
 Beyond agent execution, individual application tasks have their own model settings:
 
-| Task                | Default | Location         |
-| ------------------- | ------- | ---------------- |
-| Feature Enhancement | Sonnet  | Quick Tasks      |
-| Commit Messages     | Haiku   | Quick Tasks      |
-| App Specification   | Opus    | Generation Tasks |
-| Feature Generation  | Sonnet  | Generation Tasks |
-| Memory Extraction   | Haiku   | Memory Tasks     |
+| Task                | Default               | Location         |
+| ------------------- | --------------------- | ---------------- |
+| Feature Enhancement | `protolabs/smart`     | Quick Tasks      |
+| Commit Messages     | `protolabs/fast`      | Quick Tasks      |
+| App Specification   | `protolabs/reasoning` | Generation Tasks |
+| Feature Generation  | `protolabs/smart`     | Generation Tasks |
+| Memory Extraction   | `protolabs/fast`      | Memory Tasks     |
 
 All configurable in **Settings → AI Models → Model Defaults**.
 

diff --git a/libs/model-resolver/tests/resolver.test.ts b/libs/model-resolver/tests/resolver.test.ts
@@ -322,13 +322,13 @@ describe('model-resolver', () => {
     it('should use DEFAULT_MODELS.claude as fallback', () => {
       const result = resolveModelString(undefined);
 
-      // DEFAULT_MODELS.claude now points at the gateway-routed `protolabs/smart`
-      // model since the Anthropic SDK is being phased out in favor of
-      // @protolabsai/sdk. The field name still says "claude" through this PR
-      // to keep the diff focused; PR 3 of the cutover will rename it to `proto`.
+      // DEFAULT_MODELS.claude points at the gateway-routed reasoning tier — this
+      // is the "strongest available" used for orchestration, architectural
+      // complexity, and the 2+ failures escalation path. Field name kept as
+      // `claude` for caller compatibility through the SDK cutover.
       expect(result).toBe(DEFAULT_MODELS.claude);
       expect(DEFAULT_MODELS.claude).toBeDefined();
-      expect(DEFAULT_MODELS.claude).toBe('protolabs/smart');
+      expect(DEFAULT_MODELS.claude).toBe('protolabs/reasoning');
     });
   });
 

diff --git a/libs/types/src/agent-settings.ts b/libs/types/src/agent-settings.ts
@@ -226,42 +226,53 @@ export interface PhaseModelConfig {
  */
 export type PhaseModelKey = Exclude<keyof PhaseModelConfig, 'flowModels'>;
 
-/** Default phase model configuration - sensible defaults for each task type
- * Uses canonical prefixed model IDs for consistent routing.
+/** Default phase model configuration — sensible defaults for each task type.
+ *
+ * Routes through the protoLabs LLM gateway by default. The gateway exposes
+ * three code-work tiers:
+ *   - `protolabs/fast`      — trivial / quick tasks (haiku-equivalent)
+ *   - `protolabs/smart`     — feature work + standard generation (sonnet-equivalent)
+ *   - `protolabs/reasoning` — architectural / spec / deep-thinking (opus-equivalent)
+ *
+ * Users who want a different provider override these per-phase via the
+ * Settings → AI Models surface. Don't hardcode raw Anthropic / OpenAI IDs
+ * here — the boxed default has to work with the gateway-issued API key out
+ * of the box (the only key shipped on a fresh install).
  */
 export const DEFAULT_PHASE_MODELS: PhaseModelConfig = {
-  // Quick tasks - use fast models for speed and cost
-  enhancementModel: { model: 'claude-sonnet' },
-  fileDescriptionModel: { model: 'claude-haiku' },
-  imageDescriptionModel: { model: 'claude-haiku' },
-
-  // Validation - use smart models for accuracy
-  validationModel: { model: 'claude-sonnet' },
-
-  // Generation - use powerful models for quality
-  specGenerationModel: { model: 'claude-opus' },
-  featureGenerationModel: { model: 'claude-sonnet' },
-  backlogPlanningModel: { model: 'claude-sonnet' },
-  projectAnalysisModel: { model: 'claude-sonnet' },
-  suggestionsModel: { model: 'claude-sonnet' },
-
-  // Memory - use fast model for learning extraction (cost-effective)
-  memoryExtractionModel: { model: 'claude-haiku' },
-
-  // Commit messages - use fast model for speed
-  commitMessageModel: { model: 'claude-haiku' },
-
-  // Branch names - use fast model for speed
-  branchNameModel: { model: 'claude-haiku' },
-
-  // Agent execution - default to sonnet for reliable feature implementation
-  agentExecutionModel: { model: 'claude-sonnet' },
-
-  // Complexity tiers - route features to the right model by complexity
-  complexitySmallModel: { model: 'claude-haiku' },
-  complexityMediumModel: { model: 'claude-sonnet' },
-  complexityLargeModel: { model: 'claude-sonnet' },
-  complexityArchitecturalModel: { model: 'claude-opus' },
+  // Quick tasks — fast tier
+  enhancementModel: { model: 'protolabs/smart' },
+  fileDescriptionModel: { model: 'protolabs/fast' },
+  imageDescriptionModel: { model: 'protolabs/fast' },
+
+  // Validation — smart tier (accuracy matters)
+  validationModel: { model: 'protolabs/smart' },
+
+  // Generation — reasoning tier for spec, smart for the rest
+  specGenerationModel: { model: 'protolabs/reasoning' },
+  featureGenerationModel: { model: 'protolabs/smart' },
+  backlogPlanningModel: { model: 'protolabs/smart' },
+  projectAnalysisModel: { model: 'protolabs/smart' },
+  suggestionsModel: { model: 'protolabs/smart' },
+
+  // Memory extraction — fast tier (cost-effective)
+  memoryExtractionModel: { model: 'protolabs/fast' },
+
+  // Commit messages — fast tier
+  commitMessageModel: { model: 'protolabs/fast' },
+
+  // Branch names — fast tier
+  branchNameModel: { model: 'protolabs/fast' },
+
+  // Agent execution — smart tier (reliable feature implementation)
+  agentExecutionModel: { model: 'protolabs/smart' },
+
+  // Complexity tiers — route features by complexity. Architectural uses the
+  // reasoning tier for system-design / deep-thinking work.
+  complexitySmallModel: { model: 'protolabs/fast' },
+  complexityMediumModel: { model: 'protolabs/smart' },
+  complexityLargeModel: { model: 'protolabs/smart' },
+  complexityArchitecturalModel: { model: 'protolabs/reasoning' },
 };
 
 /**

diff --git a/libs/types/src/model.ts b/libs/types/src/model.ts
@@ -99,18 +99,16 @@ export function getAllCodexModelIds(): CodexModelId[] {
  */
 export const DEFAULT_MODELS = {
   /**
-   * Default for agent orchestration / planning.
-   * Historically pointed at `claude-opus-4-6`; now routed through the protoLabs
-   * gateway via the proto SDK. Existing callers that key off
-   * `DEFAULT_MODELS.claude` continue to compile — only the resolved model id
-   * changes. The field name is intentionally NOT renamed in this PR to keep
-   * the diff focused; PR 3 (final SDK rip-out) will rename it to `proto`.
+   * Default for agent orchestration / planning + escalation-to-strongest path.
+   * Routed through the protoLabs gateway via the proto SDK. This is the tier
+   * used for architectural features, 2+ failure retries, and the reasoning
+   * path in the lead engineer.
    */
-  claude: 'protolabs/smart',
-  /** Default for auto-mode feature implementation - sonnet for ticket work */
-  autoMode: 'claude-sonnet-4-6',
-  /** Default for trivial/quick tasks - haiku */
-  trivial: 'claude-haiku-4-5-20251001',
+  claude: 'protolabs/reasoning',
+  /** Default for auto-mode feature implementation — smart tier for ticket work. */
+  autoMode: 'protolabs/smart',
+  /** Default for trivial / quick tasks — fast tier for speed and cost. */
+  trivial: 'protolabs/fast',
   cursor: 'cursor-auto', // Cursor's recommended default (with prefix)
   codex: CODEX_MODEL_MAP.gpt55, // GPT-5.5 is the current flagship Codex model
 } as const;