lai3d · lai3d · May 18, 2026 · May 18, 2026
diff --git a/sigma-api/README.md b/sigma-api/README.md
@@ -72,16 +72,30 @@ cargo run              # Migrations run automatically on startup
 
 Centralizes the AI "brain" of the platform: sigma-agent stays lean and only exposes raw capabilities; sigma-api takes an alert plus structured context and returns a triage suggestion (diagnosis, ranked likely causes, ordered remediation steps, confidence). Always returns `200 OK` — if the LLM is not configured or unreachable, the response carries `available: false` and the operator workflow degrades to "raw alert only".
 
-**Backend:** Anthropic Messages API (default `claude-sonnet-4-6`), called directly via `reqwest` (no SDK). System prompt is sent with `cache_control: ephemeral` so repeated triages within ~5 min reuse the cached prompt — typical follow-up call is 100s of tokens of input instead of thousands.
+**Backend — pluggable.** Four providers supported out of the box, selected at startup via `LLM_PROVIDER`. All share the same prompt and response schema:
+
+| `LLM_PROVIDER` | Endpoint | Default model | Notes |
+|---|---|---|---|
+| `anthropic` (default) | `api.anthropic.com/v1/messages` | `claude-sonnet-4-6` | Uses `cache_control: ephemeral` on the system prompt for repeated-triage savings |
+| `openai` | `api.openai.com/v1/chat/completions` | `gpt-4o-mini` | OpenAI's automatic prompt caching kicks in for long stable system prompts |
+| `doubao` | `ark.cn-beijing.volces.com/api/v3/chat/completions` | `doubao-pro-32k` | Volcengine's LLM — natural fit for Volcengine-hosted deployments |
+| `grok` | `api.x.ai/v1/chat/completions` | `grok-3` | xAI's chat-completions endpoint |
+
+Anthropic uses its own Messages API (with `cache_control` for prompt caching); OpenAI / Doubao / Grok share one OpenAI-compatible chat-completions implementation. All called directly via `reqwest` — no SDK dependency.
+
+The `model` field on every `TriageRequest` lets per-call override of the default. Aliases accepted for `LLM_PROVIDER`: `claude` → anthropic, `gpt` → openai, `volcengine` / `ark` → doubao, `xai` → grok. Unknown values fall back to anthropic with a startup warning.
 
 **Configuration:**
 
 ```bash
-export ANTHROPIC_API_KEY=sk-ant-...
-# Optional: ANTHROPIC_MODEL=claude-sonnet-4-6 (set per request via "model" field)
+export LLM_PROVIDER=anthropic              # or: openai | doubao | grok
+export LLM_API_KEY=...                     # the key for the selected provider
+
+# Back-compat: ANTHROPIC_API_KEY still works when LLM_PROVIDER is anthropic
+# (or unset). No env-var rotation needed for existing deployments.
 ```
 
-If `ANTHROPIC_API_KEY` is unset, the endpoint returns immediately with `available: false` — no startup failure, no per-request error.
+If `LLM_API_KEY` (and the back-compat `ANTHROPIC_API_KEY`) are both unset, the endpoint returns immediately with `available: false` and a note pointing at the misconfiguration — no startup failure, no per-request error.
 
 **Read-only by design.** This endpoint never mutates fleet state; auto-remediation is an explicit non-goal. Human-in-the-loop: the LLM proposes, the operator decides.
 
@@ -123,6 +137,7 @@ curl -s -X POST http://localhost:3000/api/ai/triage \
   ],
   "confidence": "medium",
   "model": "claude-sonnet-4-6",
+  "provider": "anthropic",
   "note": null
 }
 ```

diff --git a/sigma-api/src/config.rs b/sigma-api/src/config.rs
@@ -15,7 +15,18 @@ pub struct Config {
     pub jwt_secret: String,
     pub jwt_expiry_hours: u64,
     pub dns_sync_interval_secs: u64,
-    pub anthropic_api_key: Option<String>,
+
+    /// Which LLM backend `/api/ai/triage` uses. Parsed at startup via
+    /// `LlmProvider::parse`; invalid values fall back to Anthropic.
+    /// Accepted values: `anthropic` | `openai` | `doubao` | `grok`
+    /// (plus aliases: `claude`, `gpt`, `volcengine`, `ark`, `xai`).
+    pub llm_provider_raw: String,
+
+    /// API key for the configured LLM provider. Reads `LLM_API_KEY`
+    /// first; falls back to legacy `ANTHROPIC_API_KEY` when
+    /// `LLM_PROVIDER` is unset or `anthropic`, so existing deployments
+    /// keep working without rotating env vars.
+    pub llm_api_key: Option<String>,
 }
 
 impl Config {
@@ -71,9 +82,29 @@ impl Config {
                 .ok()
                 .and_then(|p| p.parse().ok())
                 .unwrap_or(3600),
-            anthropic_api_key: std::env::var("ANTHROPIC_API_KEY")
+            llm_provider_raw: std::env::var("LLM_PROVIDER")
                 .ok()
-                .filter(|s| !s.is_empty()),
+                .filter(|s| !s.is_empty())
+                .unwrap_or_else(|| "anthropic".into()),
+            llm_api_key: std::env::var("LLM_API_KEY")
+                .ok()
+                .filter(|s| !s.is_empty())
+                .or_else(|| {
+                    // Back-compat: ANTHROPIC_API_KEY still works when the
+                    // provider is anthropic (or unset, which defaults to
+                    // anthropic). Existing deployments keep working without
+                    // rotating env vars.
+                    let provider_is_anthropic = std::env::var("LLM_PROVIDER")
+                        .map(|s| s.trim().eq_ignore_ascii_case("anthropic") || s.trim().is_empty())
+                        .unwrap_or(true);
+                    if provider_is_anthropic {
+                        std::env::var("ANTHROPIC_API_KEY")
+                            .ok()
+                            .filter(|s| !s.is_empty())
+                    } else {
+                        None
+                    }
+                }),
         }
     }
 }
diff --git a/sigma-api/src/main.rs b/sigma-api/src/main.rs
@@ -66,6 +66,16 @@ async fn main() -> anyhow::Result<()> {
     let http_client = reqwest::Client::new();
     let addr = format!("{}:{}", cfg.listen_host, cfg.listen_port);
 
+    let llm_provider = routes::ai_triage::LlmProvider::parse(&cfg.llm_provider_raw)
+        .unwrap_or_else(|| {
+            tracing::warn!(
+                raw = %cfg.llm_provider_raw,
+                "Unknown LLM_PROVIDER; falling back to anthropic"
+            );
+            routes::ai_triage::LlmProvider::default()
+        });
+    tracing::info!(provider = llm_provider.as_str(), "LLM provider selected");
+
     let app_state = routes::AppState {
         db: pool,
         api_key: cfg.api_key.clone(),
@@ -75,7 +85,8 @@ async fn main() -> anyhow::Result<()> {
         http_client: http_client.clone(),
         jwt_secret: cfg.jwt_secret.clone(),
         jwt_expiry_hours: cfg.jwt_expiry_hours,
-        anthropic_api_key: cfg.anthropic_api_key.clone(),
+        llm_provider,
+        llm_api_key: cfg.llm_api_key.clone(),
     };
 
     // Capture before cfg is moved into notification worker