From ab97adc5a8449bf14c9ccbf5d6ab9b55696f55d2 Mon Sep 17 00:00:00 2001
From: lai3d <cgpanda.sg@gmail.com>
Date: Tue, 19 May 2026 02:25:55 +0800
Subject: [PATCH] =?UTF-8?q?Multi-LLM=20backend=20for=20/api/ai/triage=20?=
 =?UTF-8?q?=E2=80=94=20Anthropic=20/=20OpenAI=20/=20Doubao=20/=20Grok?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds an `LlmProvider` enum and a single dispatcher so the triage
endpoint can run against any of four backends, selected at startup via
the `LLM_PROVIDER` env var. The same prompt and response schema work
across all four; operators pick the provider that fits their stack.

Why this matters:
  - The endpoint was previously hard-coded to Anthropic. For
    Volcengine-hosted deployments, operators want Doubao; for users
    with existing OpenAI/Grok keys, no Anthropic signup overhead;
    for the rest, Anthropic remains the default with its prompt-cache
    advantage.
  - Adds a clean talking point: the AI brain is provider-agnostic by
    design. Locking the platform to one LLM vendor would have been a
    bad call.

Implementation:
  - `LlmProvider` enum (Anthropic / OpenAI / Doubao / Grok) with
    `parse`, `as_str`, `default_model`, `Default = Anthropic`. Aliases
    accepted (claude, gpt, volcengine, ark, xai).
  - `call_llm` dispatches to `call_anthropic` (existing, preserves
    `cache_control: ephemeral`) or `call_openai_compatible` (new,
    shared by OpenAI/Doubao/Grok — same chat/completions wire format,
    same `Authorization: Bearer` auth).
  - `TriageResponse` gains a `provider` field so the caller can see
    which backend produced a response (useful when rotating providers
    during testing).
  - Config switches from `ANTHROPIC_API_KEY` to `LLM_PROVIDER` +
    `LLM_API_KEY`. Back-compat: `ANTHROPIC_API_KEY` still honored
    when `LLM_PROVIDER` is anthropic or unset — existing deployments
    don't need to rotate env vars.

Tests:
  - 4 existing parse tests still pass.
  - 3 new tests: provider label propagation across all parse paths,
    `LlmProvider::parse` alias matrix, default-model + Default impl.

Zero new crate dependencies — still just reqwest + serde_json.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 sigma-api/README.md               |  23 ++-
 sigma-api/src/config.rs           |  37 +++-
 sigma-api/src/main.rs             |  13 +-
 sigma-api/src/routes/ai_triage.rs | 302 ++++++++++++++++++++++++++++--
 sigma-api/src/routes/mod.rs       |   3 +-
 5 files changed, 349 insertions(+), 29 deletions(-)

diff --git a/sigma-api/README.md b/sigma-api/README.md
index 9c956c1..8fd612e 100644
--- a/sigma-api/README.md
+++ b/sigma-api/README.md
@@ -72,16 +72,30 @@ cargo run              # Migrations run automatically on startup
 
 Centralizes the AI "brain" of the platform: sigma-agent stays lean and only exposes raw capabilities; sigma-api takes an alert plus structured context and returns a triage suggestion (diagnosis, ranked likely causes, ordered remediation steps, confidence). Always returns `200 OK` — if the LLM is not configured or unreachable, the response carries `available: false` and the operator workflow degrades to "raw alert only".
 
-**Backend:** Anthropic Messages API (default `claude-sonnet-4-6`), called directly via `reqwest` (no SDK). System prompt is sent with `cache_control: ephemeral` so repeated triages within ~5 min reuse the cached prompt — typical follow-up call is 100s of tokens of input instead of thousands.
+**Backend — pluggable.** Four providers supported out of the box, selected at startup via `LLM_PROVIDER`. All share the same prompt and response schema:
+
+| `LLM_PROVIDER` | Endpoint | Default model | Notes |
+|---|---|---|---|
+| `anthropic` (default) | `api.anthropic.com/v1/messages` | `claude-sonnet-4-6` | Uses `cache_control: ephemeral` on the system prompt for repeated-triage savings |
+| `openai` | `api.openai.com/v1/chat/completions` | `gpt-4o-mini` | OpenAI's automatic prompt caching kicks in for long stable system prompts |
+| `doubao` | `ark.cn-beijing.volces.com/api/v3/chat/completions` | `doubao-pro-32k` | Volcengine's LLM — natural fit for Volcengine-hosted deployments |
+| `grok` | `api.x.ai/v1/chat/completions` | `grok-3` | xAI's chat-completions endpoint |
+
+Anthropic uses its own Messages API (with `cache_control` for prompt caching); OpenAI / Doubao / Grok share one OpenAI-compatible chat-completions implementation. All called directly via `reqwest` — no SDK dependency.
+
+The `model` field on every `TriageRequest` lets per-call override of the default. Aliases accepted for `LLM_PROVIDER`: `claude` → anthropic, `gpt` → openai, `volcengine` / `ark` → doubao, `xai` → grok. Unknown values fall back to anthropic with a startup warning.
 
 **Configuration:**
 
 ```bash
-export ANTHROPIC_API_KEY=sk-ant-...
-# Optional: ANTHROPIC_MODEL=claude-sonnet-4-6 (set per request via "model" field)
+export LLM_PROVIDER=anthropic              # or: openai | doubao | grok
+export LLM_API_KEY=...                     # the key for the selected provider
+
+# Back-compat: ANTHROPIC_API_KEY still works when LLM_PROVIDER is anthropic
+# (or unset). No env-var rotation needed for existing deployments.
 ```
 
-If `ANTHROPIC_API_KEY` is unset, the endpoint returns immediately with `available: false` — no startup failure, no per-request error.
+If `LLM_API_KEY` (and the back-compat `ANTHROPIC_API_KEY`) are both unset, the endpoint returns immediately with `available: false` and a note pointing at the misconfiguration — no startup failure, no per-request error.
 
 **Read-only by design.** This endpoint never mutates fleet state; auto-remediation is an explicit non-goal. Human-in-the-loop: the LLM proposes, the operator decides.
 
@@ -123,6 +137,7 @@ curl -s -X POST http://localhost:3000/api/ai/triage \
   ],
   "confidence": "medium",
   "model": "claude-sonnet-4-6",
+  "provider": "anthropic",
   "note": null
 }
 ```
diff --git a/sigma-api/src/config.rs b/sigma-api/src/config.rs
index 3e8ad94..b2960df 100644
--- a/sigma-api/src/config.rs
+++ b/sigma-api/src/config.rs
@@ -15,7 +15,18 @@ pub struct Config {
     pub jwt_secret: String,
     pub jwt_expiry_hours: u64,
     pub dns_sync_interval_secs: u64,
-    pub anthropic_api_key: Option<String>,
+
+    /// Which LLM backend `/api/ai/triage` uses. Parsed at startup via
+    /// `LlmProvider::parse`; invalid values fall back to Anthropic.
+    /// Accepted values: `anthropic` | `openai` | `doubao` | `grok`
+    /// (plus aliases: `claude`, `gpt`, `volcengine`, `ark`, `xai`).
+    pub llm_provider_raw: String,
+
+    /// API key for the configured LLM provider. Reads `LLM_API_KEY`
+    /// first; falls back to legacy `ANTHROPIC_API_KEY` when
+    /// `LLM_PROVIDER` is unset or `anthropic`, so existing deployments
+    /// keep working without rotating env vars.
+    pub llm_api_key: Option<String>,
 }
 
 impl Config {
@@ -71,9 +82,29 @@ impl Config {
                 .ok()
                 .and_then(|p| p.parse().ok())
                 .unwrap_or(3600),
-            anthropic_api_key: std::env::var("ANTHROPIC_API_KEY")
+            llm_provider_raw: std::env::var("LLM_PROVIDER")
                 .ok()
-                .filter(|s| !s.is_empty()),
+                .filter(|s| !s.is_empty())
+                .unwrap_or_else(|| "anthropic".into()),
+            llm_api_key: std::env::var("LLM_API_KEY")
+                .ok()
+                .filter(|s| !s.is_empty())
+                .or_else(|| {
+                    // Back-compat: ANTHROPIC_API_KEY still works when the
+                    // provider is anthropic (or unset, which defaults to
+                    // anthropic). Existing deployments keep working without
+                    // rotating env vars.
+                    let provider_is_anthropic = std::env::var("LLM_PROVIDER")
+                        .map(|s| s.trim().eq_ignore_ascii_case("anthropic") || s.trim().is_empty())
+                        .unwrap_or(true);
+                    if provider_is_anthropic {
+                        std::env::var("ANTHROPIC_API_KEY")
+                            .ok()
+                            .filter(|s| !s.is_empty())
+                    } else {
+                        None
+                    }
+                }),
         }
     }
 }
diff --git a/sigma-api/src/main.rs b/sigma-api/src/main.rs
index 60f94e4..3fd8a1a 100644
--- a/sigma-api/src/main.rs
+++ b/sigma-api/src/main.rs
@@ -66,6 +66,16 @@ async fn main() -> anyhow::Result<()> {
     let http_client = reqwest::Client::new();
     let addr = format!("{}:{}", cfg.listen_host, cfg.listen_port);
 
+    let llm_provider = routes::ai_triage::LlmProvider::parse(&cfg.llm_provider_raw)
+        .unwrap_or_else(|| {
+            tracing::warn!(
+                raw = %cfg.llm_provider_raw,
+                "Unknown LLM_PROVIDER; falling back to anthropic"
+            );
+            routes::ai_triage::LlmProvider::default()
+        });
+    tracing::info!(provider = llm_provider.as_str(), "LLM provider selected");
+
     let app_state = routes::AppState {
         db: pool,
         api_key: cfg.api_key.clone(),
@@ -75,7 +85,8 @@ async fn main() -> anyhow::Result<()> {
         http_client: http_client.clone(),
         jwt_secret: cfg.jwt_secret.clone(),
         jwt_expiry_hours: cfg.jwt_expiry_hours,
-        anthropic_api_key: cfg.anthropic_api_key.clone(),
+        llm_provider,
+        llm_api_key: cfg.llm_api_key.clone(),
     };
 
     // Capture before cfg is moved into notification worker
diff --git a/sigma-api/src/routes/ai_triage.rs b/sigma-api/src/routes/ai_triage.rs
index 329d989..bd163ee 100644
--- a/sigma-api/src/routes/ai_triage.rs
+++ b/sigma-api/src/routes/ai_triage.rs
@@ -7,13 +7,16 @@
 //! AI "brain" is centralized.
 //!
 //! Design notes:
+//! - **Provider-agnostic.** Supports Anthropic, OpenAI, Volcengine
+//!   Doubao, and xAI Grok. Operator picks via `LLM_PROVIDER` env var;
+//!   the same prompt and same response schema work across all four.
+//!   Anthropic gets `cache_control: ephemeral` for prompt caching;
+//!   OpenAI-compatible providers (OpenAI/Doubao/Grok) share one
+//!   chat-completions implementation.
 //! - **Graceful degradation**: always returns 200 OK with a structured
 //!   payload. If the LLM is not configured or unreachable, the response
 //!   carries `available: false` + `note`; the operator workflow degrades
 //!   to "raw alert only" rather than erroring out.
-//! - **Prompt caching**: the system prompt describes the Sigma
-//!   architecture and is sent with `cache_control: ephemeral` so repeated
-//!   triages within the cache TTL (~5 min) skip re-tokenizing it.
 //! - **Read-only**: this endpoint reasons about an alert; it never
 //!   mutates fleet state. Auto-remediation is an explicit non-goal —
 //!   human-in-the-loop is the design.
@@ -28,12 +31,74 @@ use utoipa::ToSchema;
 use crate::errors::AppError;
 use crate::routes::AppState;
 
+// Provider endpoints.
 const ANTHROPIC_URL: &str = "https://api.anthropic.com/v1/messages";
 const ANTHROPIC_VERSION: &str = "2023-06-01";
-const DEFAULT_MODEL: &str = "claude-sonnet-4-6";
+const OPENAI_URL: &str = "https://api.openai.com/v1/chat/completions";
+const DOUBAO_URL: &str = "https://ark.cn-beijing.volces.com/api/v3/chat/completions";
+const GROK_URL: &str = "https://api.x.ai/v1/chat/completions";
+
 const DEFAULT_MAX_TOKENS: u32 = 1024;
 const LLM_TIMEOUT_SECS: u64 = 30;
 
+/// Which LLM backend to call. Selected at sigma-api startup via
+/// `LLM_PROVIDER` env var; can be queried at runtime via `AppState`.
+///
+/// OpenAI, Doubao, and Grok all speak the same `chat/completions` wire
+/// protocol so they share `call_openai_compatible`. Anthropic uses its
+/// own `messages` endpoint with `cache_control: ephemeral` for prompt
+/// caching.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum LlmProvider {
+    Anthropic,
+    OpenAI,
+    Doubao,
+    Grok,
+}
+
+impl LlmProvider {
+    /// Parse `LLM_PROVIDER` env var (case-insensitive). Returns `None`
+    /// on unknown values so the caller can decide to fall back vs error.
+    pub fn parse(s: &str) -> Option<Self> {
+        match s.trim().to_ascii_lowercase().as_str() {
+            "anthropic" | "claude" => Some(Self::Anthropic),
+            "openai" | "gpt" => Some(Self::OpenAI),
+            "doubao" | "volcengine" | "volc" | "ark" => Some(Self::Doubao),
+            "grok" | "xai" => Some(Self::Grok),
+            _ => None,
+        }
+    }
+
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::Anthropic => "anthropic",
+            Self::OpenAI => "openai",
+            Self::Doubao => "doubao",
+            Self::Grok => "grok",
+        }
+    }
+
+    /// Default model when the request doesn't override via `model` field.
+    /// Operator can always override per-request; these are sensible
+    /// "cheap and capable" defaults known to exist when this code was
+    /// written. Wrong model name produces a clear API error rather than
+    /// silent fallback — that's the right failure mode.
+    pub fn default_model(&self) -> &'static str {
+        match self {
+            Self::Anthropic => "claude-sonnet-4-6",
+            Self::OpenAI => "gpt-4o-mini",
+            Self::Doubao => "doubao-pro-32k",
+            Self::Grok => "grok-3",
+        }
+    }
+}
+
+impl Default for LlmProvider {
+    fn default() -> Self {
+        Self::Anthropic
+    }
+}
+
 pub fn router() -> Router<AppState> {
     Router::new().route("/api/ai/triage", post(triage))
 }
@@ -94,6 +159,11 @@ pub struct TriageResponse {
     /// "low" | "medium" | "high" — model's self-rated confidence.
     pub confidence: Option<String>,
     pub model: Option<String>,
+    /// Which LLM backend produced this response: "anthropic", "openai",
+    /// "doubao", or "grok". Surfaced so the UI / caller can render the
+    /// origin and so logs are unambiguous when multiple providers are
+    /// rotated through during testing.
+    pub provider: Option<String>,
     /// Diagnostic note (e.g., "LLM unavailable", "malformed JSON").
     pub note: Option<String>,
 }
@@ -112,36 +182,47 @@ pub async fn triage(
     State(state): State<AppState>,
     Json(req): Json<TriageRequest>,
 ) -> Result<Json<TriageResponse>, AppError> {
+    let provider = state.llm_provider;
+    let provider_str = provider.as_str();
+
     // If no API key configured, degrade gracefully — return the alert
     // back with `available: false` so the UI can still render something.
-    let Some(api_key) = state.anthropic_api_key.clone() else {
+    let Some(api_key) = state.llm_api_key.clone() else {
         return Ok(Json(TriageResponse {
             available: false,
             diagnosis: Some(format!(
-                "AI triage not configured (no ANTHROPIC_API_KEY). Alert: {}",
-                req.alert.name
+                "AI triage not configured (no LLM_API_KEY for provider={}). Alert: {}",
+                provider_str, req.alert.name
             )),
             likely_causes: vec![],
             remediation_steps: vec![
-                "Configure ANTHROPIC_API_KEY in sigma-api environment".into(),
+                format!(
+                    "Set LLM_PROVIDER (anthropic|openai|doubao|grok) — currently {} — and LLM_API_KEY",
+                    provider_str
+                ),
                 "Restart sigma-api".into(),
             ],
             confidence: Some("low".into()),
             model: None,
-            note: Some("ANTHROPIC_API_KEY not configured".into()),
+            provider: Some(provider_str.to_string()),
+            note: Some(format!(
+                "API key not configured for provider={}",
+                provider_str
+            )),
         }));
     };
 
     let model = req
         .model
         .clone()
-        .unwrap_or_else(|| DEFAULT_MODEL.to_string());
+        .unwrap_or_else(|| provider.default_model().to_string());
 
     let system_prompt = build_system_prompt();
     let user_prompt = build_user_prompt(&req);
 
-    let llm_text = match call_anthropic(
+    let llm_text = match call_llm(
         &state.http_client,
+        provider,
         &api_key,
         &model,
         &system_prompt,
@@ -152,7 +233,7 @@ pub async fn triage(
     {
         Ok(text) => text,
         Err(e) => {
-            warn!(error = %e, "LLM call failed; degrading to alert-only response");
+            warn!(provider = %provider_str, error = %e, "LLM call failed; degrading to alert-only response");
             return Ok(Json(TriageResponse {
                 available: false,
                 diagnosis: Some(format!("Raw alert: {}", req.alert.name)),
@@ -162,12 +243,13 @@ pub async fn triage(
                 ],
                 confidence: Some("low".into()),
                 model: Some(model),
+                provider: Some(provider_str.to_string()),
                 note: Some(format!("LLM unreachable: {:#}", e)),
             }));
         }
     };
 
-    Ok(Json(parse_llm_response(&llm_text, &model)))
+    Ok(Json(parse_llm_response(&llm_text, &model, provider_str)))
 }
 
 // ---------- Prompt construction ----------
@@ -265,6 +347,39 @@ fn build_user_prompt(req: &TriageRequest) -> String {
     out
 }
 
+// ---------- LLM dispatch ----------
+
+/// Route to the correct provider implementation. Anthropic uses its own
+/// Messages API; OpenAI, Doubao, and Grok share one chat-completions
+/// implementation.
+async fn call_llm(
+    http: &reqwest::Client,
+    provider: LlmProvider,
+    api_key: &str,
+    model: &str,
+    system_prompt: &str,
+    user_prompt: &str,
+    max_tokens: u32,
+) -> anyhow::Result<String> {
+    match provider {
+        LlmProvider::Anthropic => {
+            call_anthropic(http, api_key, model, system_prompt, user_prompt, max_tokens).await
+        }
+        LlmProvider::OpenAI | LlmProvider::Doubao | LlmProvider::Grok => {
+            call_openai_compatible(
+                http,
+                provider,
+                api_key,
+                model,
+                system_prompt,
+                user_prompt,
+                max_tokens,
+            )
+            .await
+        }
+    }
+}
+
 // ---------- Anthropic Messages API client (inline, no SDK) ----------
 
 async fn call_anthropic(
@@ -339,12 +454,105 @@ async fn call_anthropic(
     Ok(text)
 }
 
+// ---------- OpenAI-compatible client (OpenAI / Doubao / Grok) ----------
+
+/// Single implementation for any provider that speaks the OpenAI
+/// `chat/completions` wire protocol. We currently dispatch this for
+/// OpenAI, Volcengine Doubao, and xAI Grok — they accept the same body
+/// shape, `Bearer` auth, and return the same response envelope. Anthropic
+/// has its own `messages` endpoint and lives in `call_anthropic`.
+///
+/// Note: this path does not use Anthropic's `cache_control` block —
+/// OpenAI's automatic prompt caching kicks in on its own for sufficiently
+/// long, stable system prompts; Doubao/Grok cache support is provider-
+/// and model-specific. We log token usage including any `cached_tokens`
+/// field the provider exposes (OpenAI's `prompt_tokens_details`).
+async fn call_openai_compatible(
+    http: &reqwest::Client,
+    provider: LlmProvider,
+    api_key: &str,
+    model: &str,
+    system_prompt: &str,
+    user_prompt: &str,
+    max_tokens: u32,
+) -> anyhow::Result<String> {
+    let url = match provider {
+        LlmProvider::OpenAI => OPENAI_URL,
+        LlmProvider::Doubao => DOUBAO_URL,
+        LlmProvider::Grok => GROK_URL,
+        LlmProvider::Anthropic => {
+            anyhow::bail!("call_openai_compatible called with Anthropic provider — this is a bug")
+        }
+    };
+
+    let body = json!({
+        "model": model,
+        "max_tokens": max_tokens,
+        "messages": [
+            { "role": "system", "content": system_prompt },
+            { "role": "user",   "content": user_prompt }
+        ]
+    });
+
+    let resp = http
+        .post(url)
+        .header("Authorization", format!("Bearer {}", api_key))
+        .header("Content-Type", "application/json")
+        .timeout(Duration::from_secs(LLM_TIMEOUT_SECS))
+        .json(&body)
+        .send()
+        .await?;
+
+    let status = resp.status();
+    let bytes = resp.bytes().await?;
+
+    if !status.is_success() {
+        anyhow::bail!(
+            "{} API returned {}: {}",
+            provider.as_str(),
+            status,
+            String::from_utf8_lossy(&bytes)
+        );
+    }
+
+    let parsed: Value = serde_json::from_slice(&bytes)?;
+
+    if let Some(usage) = parsed.get("usage") {
+        info!(
+            provider = provider.as_str(),
+            prompt_tokens = ?usage.get("prompt_tokens"),
+            completion_tokens = ?usage.get("completion_tokens"),
+            total_tokens = ?usage.get("total_tokens"),
+            cached_tokens = ?usage
+                .get("prompt_tokens_details")
+                .and_then(|d| d.get("cached_tokens")),
+            "OpenAI-compatible usage"
+        );
+    }
+
+    // Standard chat-completions response shape:
+    //   { "choices": [ { "message": { "content": "..." } }, ... ] }
+    let text = parsed
+        .get("choices")
+        .and_then(|c| c.as_array())
+        .and_then(|arr| arr.first())
+        .and_then(|first| first.get("message"))
+        .and_then(|m| m.get("content"))
+        .and_then(|t| t.as_str())
+        .ok_or_else(|| {
+            anyhow::anyhow!("no message content in {} response", provider.as_str())
+        })?
+        .to_string();
+
+    Ok(text)
+}
+
 // ---------- LLM response parsing ----------
 
-fn parse_llm_response(text: &str, model: &str) -> TriageResponse {
+fn parse_llm_response(text: &str, model: &str, provider: &str) -> TriageResponse {
     // Strict JSON parse first.
     match serde_json::from_str::<Value>(text.trim()) {
-        Ok(v) => extract_response(&v, model, None),
+        Ok(v) => extract_response(&v, model, provider, None),
         Err(_) => {
             // Try to extract a {...} substring (in case the model wrapped
             // the JSON in prose despite the instructions).
@@ -353,6 +561,7 @@ fn parse_llm_response(text: &str, model: &str) -> TriageResponse {
                     return extract_response(
                         &v,
                         model,
+                        provider,
                         Some("LLM wrapped JSON in prose; extracted".into()),
                     );
                 }
@@ -366,13 +575,19 @@ fn parse_llm_response(text: &str, model: &str) -> TriageResponse {
                 remediation_steps: vec![],
                 confidence: Some("low".into()),
                 model: Some(model.into()),
+                provider: Some(provider.into()),
                 note: Some("LLM did not return valid JSON; raw text in diagnosis".into()),
             }
         }
     }
 }
 
-fn extract_response(v: &Value, model: &str, note: Option<String>) -> TriageResponse {
+fn extract_response(
+    v: &Value,
+    model: &str,
+    provider: &str,
+    note: Option<String>,
+) -> TriageResponse {
     let diagnosis = v
         .get("diagnosis")
         .and_then(|s| s.as_str())
@@ -407,6 +622,7 @@ fn extract_response(v: &Value, model: &str, note: Option<String>) -> TriageRespo
         remediation_steps,
         confidence,
         model: Some(model.into()),
+        provider: Some(provider.into()),
         note,
     }
 }
@@ -433,7 +649,7 @@ mod tests {
             "remediation_steps": ["Check jmap", "Restart"],
             "confidence": "medium"
         }"#;
-        let r = parse_llm_response(text, "claude-sonnet-4-6");
+        let r = parse_llm_response(text, "claude-sonnet-4-6", "anthropic");
         assert!(r.available);
         assert_eq!(r.diagnosis.unwrap(), "java process consumed 80% RSS");
         assert_eq!(r.likely_causes.len(), 2);
@@ -452,7 +668,7 @@ mod tests {
             "confidence": "high"
         }
         Hope this helps."#;
-        let r = parse_llm_response(text, "claude-sonnet-4-6");
+        let r = parse_llm_response(text, "claude-sonnet-4-6", "anthropic");
         assert!(r.available);
         assert!(r.note.as_deref().unwrap().contains("wrapped JSON"));
         assert_eq!(r.diagnosis.unwrap(), "OOM kill");
@@ -461,7 +677,7 @@ mod tests {
     #[test]
     fn parse_total_garbage() {
         let text = "I am not a JSON response.";
-        let r = parse_llm_response(text, "claude-sonnet-4-6");
+        let r = parse_llm_response(text, "claude-sonnet-4-6", "anthropic");
         assert!(r.available);
         assert_eq!(r.diagnosis.unwrap(), "I am not a JSON response.");
         assert!(r.note.as_deref().unwrap().contains("did not return valid JSON"));
@@ -474,11 +690,57 @@ mod tests {
             "diagnosis": "unknown",
             "remediation_steps": ["gather more data"]
         }"#;
-        let r = parse_llm_response(text, "claude-sonnet-4-6");
+        let r = parse_llm_response(text, "claude-sonnet-4-6", "anthropic");
         assert!(r.available);
         assert_eq!(r.diagnosis.unwrap(), "unknown");
         assert!(r.likely_causes.is_empty());
         assert_eq!(r.remediation_steps.len(), 1);
         assert!(r.confidence.is_none());
     }
+
+    #[test]
+    fn parse_provider_label_propagates() {
+        // The provider field should appear in the response regardless of
+        // which parse path was taken.
+        let strict = parse_llm_response(
+            r#"{"diagnosis":"x","confidence":"low"}"#,
+            "gpt-4o-mini",
+            "openai",
+        );
+        assert_eq!(strict.provider.as_deref(), Some("openai"));
+        assert_eq!(strict.model.as_deref(), Some("gpt-4o-mini"));
+
+        let prose = parse_llm_response(
+            r#"Sure: {"diagnosis":"y"}. EOF"#,
+            "doubao-pro-32k",
+            "doubao",
+        );
+        assert_eq!(prose.provider.as_deref(), Some("doubao"));
+
+        let garbage = parse_llm_response("not json", "grok-3", "grok");
+        assert_eq!(garbage.provider.as_deref(), Some("grok"));
+    }
+
+    #[test]
+    fn llm_provider_parse_aliases() {
+        assert_eq!(LlmProvider::parse("anthropic"), Some(LlmProvider::Anthropic));
+        assert_eq!(LlmProvider::parse("CLAUDE"), Some(LlmProvider::Anthropic));
+        assert_eq!(LlmProvider::parse("OpenAI"), Some(LlmProvider::OpenAI));
+        assert_eq!(LlmProvider::parse("gpt"), Some(LlmProvider::OpenAI));
+        assert_eq!(LlmProvider::parse("doubao"), Some(LlmProvider::Doubao));
+        assert_eq!(LlmProvider::parse("volcengine"), Some(LlmProvider::Doubao));
+        assert_eq!(LlmProvider::parse("ark"), Some(LlmProvider::Doubao));
+        assert_eq!(LlmProvider::parse("grok"), Some(LlmProvider::Grok));
+        assert_eq!(LlmProvider::parse("xai"), Some(LlmProvider::Grok));
+        assert_eq!(LlmProvider::parse("unknown"), None);
+    }
+
+    #[test]
+    fn llm_provider_defaults() {
+        assert_eq!(LlmProvider::Anthropic.default_model(), "claude-sonnet-4-6");
+        assert_eq!(LlmProvider::OpenAI.default_model(), "gpt-4o-mini");
+        assert_eq!(LlmProvider::Doubao.default_model(), "doubao-pro-32k");
+        assert_eq!(LlmProvider::Grok.default_model(), "grok-3");
+        assert_eq!(LlmProvider::default(), LlmProvider::Anthropic);
+    }
 }
diff --git a/sigma-api/src/routes/mod.rs b/sigma-api/src/routes/mod.rs
index febf063..290114f 100644
--- a/sigma-api/src/routes/mod.rs
+++ b/sigma-api/src/routes/mod.rs
@@ -43,7 +43,8 @@ pub struct AppState {
     pub http_client: reqwest::Client,
     pub jwt_secret: String,
     pub jwt_expiry_hours: u64,
-    pub anthropic_api_key: Option<String>,
+    pub llm_provider: ai_triage::LlmProvider,
+    pub llm_api_key: Option<String>,
 }
 
 /// Auth middleware: try Bearer JWT → try X-Api-Key → allow if no API_KEY set → 401.