From ab97adc5a8449bf14c9ccbf5d6ab9b55696f55d2 Mon Sep 17 00:00:00 2001 From: lai3d Date: Tue, 19 May 2026 02:25:55 +0800 Subject: [PATCH] =?UTF-8?q?Multi-LLM=20backend=20for=20/api/ai/triage=20?= =?UTF-8?q?=E2=80=94=20Anthropic=20/=20OpenAI=20/=20Doubao=20/=20Grok?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds an `LlmProvider` enum and a single dispatcher so the triage endpoint can run against any of four backends, selected at startup via the `LLM_PROVIDER` env var. The same prompt and response schema work across all four; operators pick the provider that fits their stack. Why this matters: - The endpoint was previously hard-coded to Anthropic. For Volcengine-hosted deployments, operators want Doubao; for users with existing OpenAI/Grok keys, no Anthropic signup overhead; for the rest, Anthropic remains the default with its prompt-cache advantage. - Adds a clean talking point: the AI brain is provider-agnostic by design. Locking the platform to one LLM vendor would have been a bad call. Implementation: - `LlmProvider` enum (Anthropic / OpenAI / Doubao / Grok) with `parse`, `as_str`, `default_model`, `Default = Anthropic`. Aliases accepted (claude, gpt, volcengine, ark, xai). - `call_llm` dispatches to `call_anthropic` (existing, preserves `cache_control: ephemeral`) or `call_openai_compatible` (new, shared by OpenAI/Doubao/Grok — same chat/completions wire format, same `Authorization: Bearer` auth). - `TriageResponse` gains a `provider` field so the caller can see which backend produced a response (useful when rotating providers during testing). - Config switches from `ANTHROPIC_API_KEY` to `LLM_PROVIDER` + `LLM_API_KEY`. Back-compat: `ANTHROPIC_API_KEY` still honored when `LLM_PROVIDER` is anthropic or unset — existing deployments don't need to rotate env vars. Tests: - 4 existing parse tests still pass. - 3 new tests: provider label propagation across all parse paths, `LlmProvider::parse` alias matrix, default-model + Default impl. Zero new crate dependencies — still just reqwest + serde_json. Co-Authored-By: Claude Opus 4.7 (1M context) --- sigma-api/README.md | 23 ++- sigma-api/src/config.rs | 37 +++- sigma-api/src/main.rs | 13 +- sigma-api/src/routes/ai_triage.rs | 302 ++++++++++++++++++++++++++++-- sigma-api/src/routes/mod.rs | 3 +- 5 files changed, 349 insertions(+), 29 deletions(-) diff --git a/sigma-api/README.md b/sigma-api/README.md index 9c956c1..8fd612e 100644 --- a/sigma-api/README.md +++ b/sigma-api/README.md @@ -72,16 +72,30 @@ cargo run # Migrations run automatically on startup Centralizes the AI "brain" of the platform: sigma-agent stays lean and only exposes raw capabilities; sigma-api takes an alert plus structured context and returns a triage suggestion (diagnosis, ranked likely causes, ordered remediation steps, confidence). Always returns `200 OK` — if the LLM is not configured or unreachable, the response carries `available: false` and the operator workflow degrades to "raw alert only". -**Backend:** Anthropic Messages API (default `claude-sonnet-4-6`), called directly via `reqwest` (no SDK). System prompt is sent with `cache_control: ephemeral` so repeated triages within ~5 min reuse the cached prompt — typical follow-up call is 100s of tokens of input instead of thousands. +**Backend — pluggable.** Four providers supported out of the box, selected at startup via `LLM_PROVIDER`. All share the same prompt and response schema: + +| `LLM_PROVIDER` | Endpoint | Default model | Notes | +|---|---|---|---| +| `anthropic` (default) | `api.anthropic.com/v1/messages` | `claude-sonnet-4-6` | Uses `cache_control: ephemeral` on the system prompt for repeated-triage savings | +| `openai` | `api.openai.com/v1/chat/completions` | `gpt-4o-mini` | OpenAI's automatic prompt caching kicks in for long stable system prompts | +| `doubao` | `ark.cn-beijing.volces.com/api/v3/chat/completions` | `doubao-pro-32k` | Volcengine's LLM — natural fit for Volcengine-hosted deployments | +| `grok` | `api.x.ai/v1/chat/completions` | `grok-3` | xAI's chat-completions endpoint | + +Anthropic uses its own Messages API (with `cache_control` for prompt caching); OpenAI / Doubao / Grok share one OpenAI-compatible chat-completions implementation. All called directly via `reqwest` — no SDK dependency. + +The `model` field on every `TriageRequest` lets per-call override of the default. Aliases accepted for `LLM_PROVIDER`: `claude` → anthropic, `gpt` → openai, `volcengine` / `ark` → doubao, `xai` → grok. Unknown values fall back to anthropic with a startup warning. **Configuration:** ```bash -export ANTHROPIC_API_KEY=sk-ant-... -# Optional: ANTHROPIC_MODEL=claude-sonnet-4-6 (set per request via "model" field) +export LLM_PROVIDER=anthropic # or: openai | doubao | grok +export LLM_API_KEY=... # the key for the selected provider + +# Back-compat: ANTHROPIC_API_KEY still works when LLM_PROVIDER is anthropic +# (or unset). No env-var rotation needed for existing deployments. ``` -If `ANTHROPIC_API_KEY` is unset, the endpoint returns immediately with `available: false` — no startup failure, no per-request error. +If `LLM_API_KEY` (and the back-compat `ANTHROPIC_API_KEY`) are both unset, the endpoint returns immediately with `available: false` and a note pointing at the misconfiguration — no startup failure, no per-request error. **Read-only by design.** This endpoint never mutates fleet state; auto-remediation is an explicit non-goal. Human-in-the-loop: the LLM proposes, the operator decides. @@ -123,6 +137,7 @@ curl -s -X POST http://localhost:3000/api/ai/triage \ ], "confidence": "medium", "model": "claude-sonnet-4-6", + "provider": "anthropic", "note": null } ``` diff --git a/sigma-api/src/config.rs b/sigma-api/src/config.rs index 3e8ad94..b2960df 100644 --- a/sigma-api/src/config.rs +++ b/sigma-api/src/config.rs @@ -15,7 +15,18 @@ pub struct Config { pub jwt_secret: String, pub jwt_expiry_hours: u64, pub dns_sync_interval_secs: u64, - pub anthropic_api_key: Option, + + /// Which LLM backend `/api/ai/triage` uses. Parsed at startup via + /// `LlmProvider::parse`; invalid values fall back to Anthropic. + /// Accepted values: `anthropic` | `openai` | `doubao` | `grok` + /// (plus aliases: `claude`, `gpt`, `volcengine`, `ark`, `xai`). + pub llm_provider_raw: String, + + /// API key for the configured LLM provider. Reads `LLM_API_KEY` + /// first; falls back to legacy `ANTHROPIC_API_KEY` when + /// `LLM_PROVIDER` is unset or `anthropic`, so existing deployments + /// keep working without rotating env vars. + pub llm_api_key: Option, } impl Config { @@ -71,9 +82,29 @@ impl Config { .ok() .and_then(|p| p.parse().ok()) .unwrap_or(3600), - anthropic_api_key: std::env::var("ANTHROPIC_API_KEY") + llm_provider_raw: std::env::var("LLM_PROVIDER") .ok() - .filter(|s| !s.is_empty()), + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "anthropic".into()), + llm_api_key: std::env::var("LLM_API_KEY") + .ok() + .filter(|s| !s.is_empty()) + .or_else(|| { + // Back-compat: ANTHROPIC_API_KEY still works when the + // provider is anthropic (or unset, which defaults to + // anthropic). Existing deployments keep working without + // rotating env vars. + let provider_is_anthropic = std::env::var("LLM_PROVIDER") + .map(|s| s.trim().eq_ignore_ascii_case("anthropic") || s.trim().is_empty()) + .unwrap_or(true); + if provider_is_anthropic { + std::env::var("ANTHROPIC_API_KEY") + .ok() + .filter(|s| !s.is_empty()) + } else { + None + } + }), } } } diff --git a/sigma-api/src/main.rs b/sigma-api/src/main.rs index 60f94e4..3fd8a1a 100644 --- a/sigma-api/src/main.rs +++ b/sigma-api/src/main.rs @@ -66,6 +66,16 @@ async fn main() -> anyhow::Result<()> { let http_client = reqwest::Client::new(); let addr = format!("{}:{}", cfg.listen_host, cfg.listen_port); + let llm_provider = routes::ai_triage::LlmProvider::parse(&cfg.llm_provider_raw) + .unwrap_or_else(|| { + tracing::warn!( + raw = %cfg.llm_provider_raw, + "Unknown LLM_PROVIDER; falling back to anthropic" + ); + routes::ai_triage::LlmProvider::default() + }); + tracing::info!(provider = llm_provider.as_str(), "LLM provider selected"); + let app_state = routes::AppState { db: pool, api_key: cfg.api_key.clone(), @@ -75,7 +85,8 @@ async fn main() -> anyhow::Result<()> { http_client: http_client.clone(), jwt_secret: cfg.jwt_secret.clone(), jwt_expiry_hours: cfg.jwt_expiry_hours, - anthropic_api_key: cfg.anthropic_api_key.clone(), + llm_provider, + llm_api_key: cfg.llm_api_key.clone(), }; // Capture before cfg is moved into notification worker diff --git a/sigma-api/src/routes/ai_triage.rs b/sigma-api/src/routes/ai_triage.rs index 329d989..bd163ee 100644 --- a/sigma-api/src/routes/ai_triage.rs +++ b/sigma-api/src/routes/ai_triage.rs @@ -7,13 +7,16 @@ //! AI "brain" is centralized. //! //! Design notes: +//! - **Provider-agnostic.** Supports Anthropic, OpenAI, Volcengine +//! Doubao, and xAI Grok. Operator picks via `LLM_PROVIDER` env var; +//! the same prompt and same response schema work across all four. +//! Anthropic gets `cache_control: ephemeral` for prompt caching; +//! OpenAI-compatible providers (OpenAI/Doubao/Grok) share one +//! chat-completions implementation. //! - **Graceful degradation**: always returns 200 OK with a structured //! payload. If the LLM is not configured or unreachable, the response //! carries `available: false` + `note`; the operator workflow degrades //! to "raw alert only" rather than erroring out. -//! - **Prompt caching**: the system prompt describes the Sigma -//! architecture and is sent with `cache_control: ephemeral` so repeated -//! triages within the cache TTL (~5 min) skip re-tokenizing it. //! - **Read-only**: this endpoint reasons about an alert; it never //! mutates fleet state. Auto-remediation is an explicit non-goal — //! human-in-the-loop is the design. @@ -28,12 +31,74 @@ use utoipa::ToSchema; use crate::errors::AppError; use crate::routes::AppState; +// Provider endpoints. const ANTHROPIC_URL: &str = "https://api.anthropic.com/v1/messages"; const ANTHROPIC_VERSION: &str = "2023-06-01"; -const DEFAULT_MODEL: &str = "claude-sonnet-4-6"; +const OPENAI_URL: &str = "https://api.openai.com/v1/chat/completions"; +const DOUBAO_URL: &str = "https://ark.cn-beijing.volces.com/api/v3/chat/completions"; +const GROK_URL: &str = "https://api.x.ai/v1/chat/completions"; + const DEFAULT_MAX_TOKENS: u32 = 1024; const LLM_TIMEOUT_SECS: u64 = 30; +/// Which LLM backend to call. Selected at sigma-api startup via +/// `LLM_PROVIDER` env var; can be queried at runtime via `AppState`. +/// +/// OpenAI, Doubao, and Grok all speak the same `chat/completions` wire +/// protocol so they share `call_openai_compatible`. Anthropic uses its +/// own `messages` endpoint with `cache_control: ephemeral` for prompt +/// caching. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum LlmProvider { + Anthropic, + OpenAI, + Doubao, + Grok, +} + +impl LlmProvider { + /// Parse `LLM_PROVIDER` env var (case-insensitive). Returns `None` + /// on unknown values so the caller can decide to fall back vs error. + pub fn parse(s: &str) -> Option { + match s.trim().to_ascii_lowercase().as_str() { + "anthropic" | "claude" => Some(Self::Anthropic), + "openai" | "gpt" => Some(Self::OpenAI), + "doubao" | "volcengine" | "volc" | "ark" => Some(Self::Doubao), + "grok" | "xai" => Some(Self::Grok), + _ => None, + } + } + + pub fn as_str(&self) -> &'static str { + match self { + Self::Anthropic => "anthropic", + Self::OpenAI => "openai", + Self::Doubao => "doubao", + Self::Grok => "grok", + } + } + + /// Default model when the request doesn't override via `model` field. + /// Operator can always override per-request; these are sensible + /// "cheap and capable" defaults known to exist when this code was + /// written. Wrong model name produces a clear API error rather than + /// silent fallback — that's the right failure mode. + pub fn default_model(&self) -> &'static str { + match self { + Self::Anthropic => "claude-sonnet-4-6", + Self::OpenAI => "gpt-4o-mini", + Self::Doubao => "doubao-pro-32k", + Self::Grok => "grok-3", + } + } +} + +impl Default for LlmProvider { + fn default() -> Self { + Self::Anthropic + } +} + pub fn router() -> Router { Router::new().route("/api/ai/triage", post(triage)) } @@ -94,6 +159,11 @@ pub struct TriageResponse { /// "low" | "medium" | "high" — model's self-rated confidence. pub confidence: Option, pub model: Option, + /// Which LLM backend produced this response: "anthropic", "openai", + /// "doubao", or "grok". Surfaced so the UI / caller can render the + /// origin and so logs are unambiguous when multiple providers are + /// rotated through during testing. + pub provider: Option, /// Diagnostic note (e.g., "LLM unavailable", "malformed JSON"). pub note: Option, } @@ -112,36 +182,47 @@ pub async fn triage( State(state): State, Json(req): Json, ) -> Result, AppError> { + let provider = state.llm_provider; + let provider_str = provider.as_str(); + // If no API key configured, degrade gracefully — return the alert // back with `available: false` so the UI can still render something. - let Some(api_key) = state.anthropic_api_key.clone() else { + let Some(api_key) = state.llm_api_key.clone() else { return Ok(Json(TriageResponse { available: false, diagnosis: Some(format!( - "AI triage not configured (no ANTHROPIC_API_KEY). Alert: {}", - req.alert.name + "AI triage not configured (no LLM_API_KEY for provider={}). Alert: {}", + provider_str, req.alert.name )), likely_causes: vec![], remediation_steps: vec![ - "Configure ANTHROPIC_API_KEY in sigma-api environment".into(), + format!( + "Set LLM_PROVIDER (anthropic|openai|doubao|grok) — currently {} — and LLM_API_KEY", + provider_str + ), "Restart sigma-api".into(), ], confidence: Some("low".into()), model: None, - note: Some("ANTHROPIC_API_KEY not configured".into()), + provider: Some(provider_str.to_string()), + note: Some(format!( + "API key not configured for provider={}", + provider_str + )), })); }; let model = req .model .clone() - .unwrap_or_else(|| DEFAULT_MODEL.to_string()); + .unwrap_or_else(|| provider.default_model().to_string()); let system_prompt = build_system_prompt(); let user_prompt = build_user_prompt(&req); - let llm_text = match call_anthropic( + let llm_text = match call_llm( &state.http_client, + provider, &api_key, &model, &system_prompt, @@ -152,7 +233,7 @@ pub async fn triage( { Ok(text) => text, Err(e) => { - warn!(error = %e, "LLM call failed; degrading to alert-only response"); + warn!(provider = %provider_str, error = %e, "LLM call failed; degrading to alert-only response"); return Ok(Json(TriageResponse { available: false, diagnosis: Some(format!("Raw alert: {}", req.alert.name)), @@ -162,12 +243,13 @@ pub async fn triage( ], confidence: Some("low".into()), model: Some(model), + provider: Some(provider_str.to_string()), note: Some(format!("LLM unreachable: {:#}", e)), })); } }; - Ok(Json(parse_llm_response(&llm_text, &model))) + Ok(Json(parse_llm_response(&llm_text, &model, provider_str))) } // ---------- Prompt construction ---------- @@ -265,6 +347,39 @@ fn build_user_prompt(req: &TriageRequest) -> String { out } +// ---------- LLM dispatch ---------- + +/// Route to the correct provider implementation. Anthropic uses its own +/// Messages API; OpenAI, Doubao, and Grok share one chat-completions +/// implementation. +async fn call_llm( + http: &reqwest::Client, + provider: LlmProvider, + api_key: &str, + model: &str, + system_prompt: &str, + user_prompt: &str, + max_tokens: u32, +) -> anyhow::Result { + match provider { + LlmProvider::Anthropic => { + call_anthropic(http, api_key, model, system_prompt, user_prompt, max_tokens).await + } + LlmProvider::OpenAI | LlmProvider::Doubao | LlmProvider::Grok => { + call_openai_compatible( + http, + provider, + api_key, + model, + system_prompt, + user_prompt, + max_tokens, + ) + .await + } + } +} + // ---------- Anthropic Messages API client (inline, no SDK) ---------- async fn call_anthropic( @@ -339,12 +454,105 @@ async fn call_anthropic( Ok(text) } +// ---------- OpenAI-compatible client (OpenAI / Doubao / Grok) ---------- + +/// Single implementation for any provider that speaks the OpenAI +/// `chat/completions` wire protocol. We currently dispatch this for +/// OpenAI, Volcengine Doubao, and xAI Grok — they accept the same body +/// shape, `Bearer` auth, and return the same response envelope. Anthropic +/// has its own `messages` endpoint and lives in `call_anthropic`. +/// +/// Note: this path does not use Anthropic's `cache_control` block — +/// OpenAI's automatic prompt caching kicks in on its own for sufficiently +/// long, stable system prompts; Doubao/Grok cache support is provider- +/// and model-specific. We log token usage including any `cached_tokens` +/// field the provider exposes (OpenAI's `prompt_tokens_details`). +async fn call_openai_compatible( + http: &reqwest::Client, + provider: LlmProvider, + api_key: &str, + model: &str, + system_prompt: &str, + user_prompt: &str, + max_tokens: u32, +) -> anyhow::Result { + let url = match provider { + LlmProvider::OpenAI => OPENAI_URL, + LlmProvider::Doubao => DOUBAO_URL, + LlmProvider::Grok => GROK_URL, + LlmProvider::Anthropic => { + anyhow::bail!("call_openai_compatible called with Anthropic provider — this is a bug") + } + }; + + let body = json!({ + "model": model, + "max_tokens": max_tokens, + "messages": [ + { "role": "system", "content": system_prompt }, + { "role": "user", "content": user_prompt } + ] + }); + + let resp = http + .post(url) + .header("Authorization", format!("Bearer {}", api_key)) + .header("Content-Type", "application/json") + .timeout(Duration::from_secs(LLM_TIMEOUT_SECS)) + .json(&body) + .send() + .await?; + + let status = resp.status(); + let bytes = resp.bytes().await?; + + if !status.is_success() { + anyhow::bail!( + "{} API returned {}: {}", + provider.as_str(), + status, + String::from_utf8_lossy(&bytes) + ); + } + + let parsed: Value = serde_json::from_slice(&bytes)?; + + if let Some(usage) = parsed.get("usage") { + info!( + provider = provider.as_str(), + prompt_tokens = ?usage.get("prompt_tokens"), + completion_tokens = ?usage.get("completion_tokens"), + total_tokens = ?usage.get("total_tokens"), + cached_tokens = ?usage + .get("prompt_tokens_details") + .and_then(|d| d.get("cached_tokens")), + "OpenAI-compatible usage" + ); + } + + // Standard chat-completions response shape: + // { "choices": [ { "message": { "content": "..." } }, ... ] } + let text = parsed + .get("choices") + .and_then(|c| c.as_array()) + .and_then(|arr| arr.first()) + .and_then(|first| first.get("message")) + .and_then(|m| m.get("content")) + .and_then(|t| t.as_str()) + .ok_or_else(|| { + anyhow::anyhow!("no message content in {} response", provider.as_str()) + })? + .to_string(); + + Ok(text) +} + // ---------- LLM response parsing ---------- -fn parse_llm_response(text: &str, model: &str) -> TriageResponse { +fn parse_llm_response(text: &str, model: &str, provider: &str) -> TriageResponse { // Strict JSON parse first. match serde_json::from_str::(text.trim()) { - Ok(v) => extract_response(&v, model, None), + Ok(v) => extract_response(&v, model, provider, None), Err(_) => { // Try to extract a {...} substring (in case the model wrapped // the JSON in prose despite the instructions). @@ -353,6 +561,7 @@ fn parse_llm_response(text: &str, model: &str) -> TriageResponse { return extract_response( &v, model, + provider, Some("LLM wrapped JSON in prose; extracted".into()), ); } @@ -366,13 +575,19 @@ fn parse_llm_response(text: &str, model: &str) -> TriageResponse { remediation_steps: vec![], confidence: Some("low".into()), model: Some(model.into()), + provider: Some(provider.into()), note: Some("LLM did not return valid JSON; raw text in diagnosis".into()), } } } } -fn extract_response(v: &Value, model: &str, note: Option) -> TriageResponse { +fn extract_response( + v: &Value, + model: &str, + provider: &str, + note: Option, +) -> TriageResponse { let diagnosis = v .get("diagnosis") .and_then(|s| s.as_str()) @@ -407,6 +622,7 @@ fn extract_response(v: &Value, model: &str, note: Option) -> TriageRespo remediation_steps, confidence, model: Some(model.into()), + provider: Some(provider.into()), note, } } @@ -433,7 +649,7 @@ mod tests { "remediation_steps": ["Check jmap", "Restart"], "confidence": "medium" }"#; - let r = parse_llm_response(text, "claude-sonnet-4-6"); + let r = parse_llm_response(text, "claude-sonnet-4-6", "anthropic"); assert!(r.available); assert_eq!(r.diagnosis.unwrap(), "java process consumed 80% RSS"); assert_eq!(r.likely_causes.len(), 2); @@ -452,7 +668,7 @@ mod tests { "confidence": "high" } Hope this helps."#; - let r = parse_llm_response(text, "claude-sonnet-4-6"); + let r = parse_llm_response(text, "claude-sonnet-4-6", "anthropic"); assert!(r.available); assert!(r.note.as_deref().unwrap().contains("wrapped JSON")); assert_eq!(r.diagnosis.unwrap(), "OOM kill"); @@ -461,7 +677,7 @@ mod tests { #[test] fn parse_total_garbage() { let text = "I am not a JSON response."; - let r = parse_llm_response(text, "claude-sonnet-4-6"); + let r = parse_llm_response(text, "claude-sonnet-4-6", "anthropic"); assert!(r.available); assert_eq!(r.diagnosis.unwrap(), "I am not a JSON response."); assert!(r.note.as_deref().unwrap().contains("did not return valid JSON")); @@ -474,11 +690,57 @@ mod tests { "diagnosis": "unknown", "remediation_steps": ["gather more data"] }"#; - let r = parse_llm_response(text, "claude-sonnet-4-6"); + let r = parse_llm_response(text, "claude-sonnet-4-6", "anthropic"); assert!(r.available); assert_eq!(r.diagnosis.unwrap(), "unknown"); assert!(r.likely_causes.is_empty()); assert_eq!(r.remediation_steps.len(), 1); assert!(r.confidence.is_none()); } + + #[test] + fn parse_provider_label_propagates() { + // The provider field should appear in the response regardless of + // which parse path was taken. + let strict = parse_llm_response( + r#"{"diagnosis":"x","confidence":"low"}"#, + "gpt-4o-mini", + "openai", + ); + assert_eq!(strict.provider.as_deref(), Some("openai")); + assert_eq!(strict.model.as_deref(), Some("gpt-4o-mini")); + + let prose = parse_llm_response( + r#"Sure: {"diagnosis":"y"}. EOF"#, + "doubao-pro-32k", + "doubao", + ); + assert_eq!(prose.provider.as_deref(), Some("doubao")); + + let garbage = parse_llm_response("not json", "grok-3", "grok"); + assert_eq!(garbage.provider.as_deref(), Some("grok")); + } + + #[test] + fn llm_provider_parse_aliases() { + assert_eq!(LlmProvider::parse("anthropic"), Some(LlmProvider::Anthropic)); + assert_eq!(LlmProvider::parse("CLAUDE"), Some(LlmProvider::Anthropic)); + assert_eq!(LlmProvider::parse("OpenAI"), Some(LlmProvider::OpenAI)); + assert_eq!(LlmProvider::parse("gpt"), Some(LlmProvider::OpenAI)); + assert_eq!(LlmProvider::parse("doubao"), Some(LlmProvider::Doubao)); + assert_eq!(LlmProvider::parse("volcengine"), Some(LlmProvider::Doubao)); + assert_eq!(LlmProvider::parse("ark"), Some(LlmProvider::Doubao)); + assert_eq!(LlmProvider::parse("grok"), Some(LlmProvider::Grok)); + assert_eq!(LlmProvider::parse("xai"), Some(LlmProvider::Grok)); + assert_eq!(LlmProvider::parse("unknown"), None); + } + + #[test] + fn llm_provider_defaults() { + assert_eq!(LlmProvider::Anthropic.default_model(), "claude-sonnet-4-6"); + assert_eq!(LlmProvider::OpenAI.default_model(), "gpt-4o-mini"); + assert_eq!(LlmProvider::Doubao.default_model(), "doubao-pro-32k"); + assert_eq!(LlmProvider::Grok.default_model(), "grok-3"); + assert_eq!(LlmProvider::default(), LlmProvider::Anthropic); + } } diff --git a/sigma-api/src/routes/mod.rs b/sigma-api/src/routes/mod.rs index febf063..290114f 100644 --- a/sigma-api/src/routes/mod.rs +++ b/sigma-api/src/routes/mod.rs @@ -43,7 +43,8 @@ pub struct AppState { pub http_client: reqwest::Client, pub jwt_secret: String, pub jwt_expiry_hours: u64, - pub anthropic_api_key: Option, + pub llm_provider: ai_triage::LlmProvider, + pub llm_api_key: Option, } /// Auth middleware: try Bearer JWT → try X-Api-Key → allow if no API_KEY set → 401.