From 36fcc1b068622777eaa7873d19880d48d703108a Mon Sep 17 00:00:00 2001 From: lai3d Date: Tue, 19 May 2026 19:46:41 +0800 Subject: [PATCH] Gate /api/ai/triage on admin or operator role LLM calls cost real tokens; the endpoint must not be reachable from `readonly` consumers (dashboards, monitoring) or per-VPS `agent` keys. Adds `require_role(&user, &["admin", "operator"])` to the handler, plus integration tests covering all four roles. The pre-existing test setup was missing the `llm_provider` and `llm_api_key` fields on AppState and didn't wire the ai_triage router; fixed both so the new tests compile. Docs (project CLAUDE.md, docs/ai-triage.{en,zh}.md, docs/api-authentication.{en,zh}.md) updated to reflect the gate. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 1 + docs/ai-triage.en.md | 2 +- docs/ai-triage.zh.md | 2 +- docs/api-authentication.en.md | 1 + docs/api-authentication.zh.md | 1 + sigma-api/src/routes/ai_triage.rs | 11 ++- sigma-api/tests/ai_triage_test.rs | 110 ++++++++++++++++++++++++++++++ sigma-api/tests/common/mod.rs | 3 + 8 files changed, 128 insertions(+), 3 deletions(-) create mode 100644 sigma-api/tests/ai_triage_test.rs diff --git a/CLAUDE.md b/CLAUDE.md index 0054f30..950a99d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -155,6 +155,7 @@ sigma/ | All GET endpoints (read) | ✓ | ✓ | ✓ | ✓ | | VPS / Provider / DNS / Cloud CRUD | ✓ | ✓ | — | — | | Tickets / IP Checks / Costs / Import | ✓ | ✓ | — | — | +| `POST /api/ai/triage` (spends LLM tokens) | ✓ | ✓ | — | — | | `POST /agent/register`, `/agent/heartbeat` | ✓ | ✓ | ✓ | — | | Envoy nodes & routes write (8 endpoints) | ✓ | ✓ | ✓ | — | | User management (`/api/users`) | ✓ | — | — | — | diff --git a/docs/ai-triage.en.md b/docs/ai-triage.en.md index fdf4eca..a3ed7dc 100644 --- a/docs/ai-triage.en.md +++ b/docs/ai-triage.en.md @@ -195,7 +195,7 @@ All three paths are unit-tested. ### Auth -The endpoint sits behind the API's standard `auth` middleware (JWT or `X-Api-Key`). It does not currently enforce a minimum role beyond authenticated — operators should treat the rate limit and provider-side quota as the primary cost control, and consider gating to `operator` or `admin` if their key inventory includes `readonly` consumers that shouldn't spend LLM tokens. +The endpoint sits behind the API's standard `auth` middleware (JWT or `X-Api-Key`) **and requires `admin` or `operator` role**. `readonly` consumers (dashboards, monitoring) and per-VPS `agent` keys receive a `403 Forbidden` before any LLM call is made — they can't spend tokens. The global rate limit still applies on top, and provider-side quota remains the second line of defence. ### OpenAPI diff --git a/docs/ai-triage.zh.md b/docs/ai-triage.zh.md index a7c5e63..8c058d3 100644 --- a/docs/ai-triage.zh.md +++ b/docs/ai-triage.zh.md @@ -195,7 +195,7 @@ Content-Type: application/json ### 认证 -端点位于 API 的标准 `auth` 中间件之后(JWT 或 `X-Api-Key`)。**当前不强制最低角色**,只要通过认证即可。运维应把速率限制和 provider 侧的 quota 作为主要成本控制手段;如果 key 库存中包含本不应消费 LLM token 的 `readonly` 消费者,建议把端点收紧到 `operator` 或 `admin` 角色。 +端点位于 API 的标准 `auth` 中间件之后(JWT 或 `X-Api-Key`),**并要求 `admin` 或 `operator` 角色**。`readonly` 消费者(仪表盘、监控)和每个 VPS 的 `agent` key 在到达 LLM 调用之前就会收到 `403 Forbidden` —— 它们无法消费 token。全局速率限制仍然叠加生效,provider 侧的 quota 是第二道防线。 ### OpenAPI diff --git a/docs/api-authentication.en.md b/docs/api-authentication.en.md index 75d3541..8e7013b 100644 --- a/docs/api-authentication.en.md +++ b/docs/api-authentication.en.md @@ -23,6 +23,7 @@ Sigma supports two authentication methods: | All GET endpoints | Y | Y | Y | Y | | VPS / Provider / DNS / Cloud CRUD | Y | Y | - | - | | Tickets / IP Checks / Costs / Import | Y | Y | - | - | +| AI Triage (`POST /api/ai/triage`, spends LLM tokens) | Y | Y | - | - | | Agent register & heartbeat | Y | Y | Y | - | | Envoy nodes & routes write | Y | Y | Y | - | | User management | Y | - | - | - | diff --git a/docs/api-authentication.zh.md b/docs/api-authentication.zh.md index 98a443a..0e2bd7a 100644 --- a/docs/api-authentication.zh.md +++ b/docs/api-authentication.zh.md @@ -23,6 +23,7 @@ Sigma 支持两种认证方式: | 所有 GET 端点 | Y | Y | Y | Y | | VPS / Provider / DNS / Cloud 增删改 | Y | Y | - | - | | Ticket / IP 检测 / 费用 / 导入 | Y | Y | - | - | +| AI 诊断(`POST /api/ai/triage`,消费 LLM token)| Y | Y | - | - | | Agent 注册与心跳 | Y | Y | Y | - | | Envoy 节点与路由写操作 | Y | Y | Y | - | | 用户管理 | Y | - | - | - | diff --git a/sigma-api/src/routes/ai_triage.rs b/sigma-api/src/routes/ai_triage.rs index bd163ee..9a5b460 100644 --- a/sigma-api/src/routes/ai_triage.rs +++ b/sigma-api/src/routes/ai_triage.rs @@ -21,13 +21,14 @@ //! mutates fleet state. Auto-remediation is an explicit non-goal — //! human-in-the-loop is the design. -use axum::{extract::State, routing::post, Json, Router}; +use axum::{extract::State, routing::post, Extension, Json, Router}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use std::time::Duration; use tracing::{info, warn}; use utoipa::ToSchema; +use crate::auth::{require_role, CurrentUser}; use crate::errors::AppError; use crate::routes::AppState; @@ -176,12 +177,20 @@ pub struct TriageResponse { request_body = TriageRequest, responses( (status = 200, description = "Triage suggestion (degrades gracefully when LLM unavailable)", body = TriageResponse), + (status = 403, description = "Caller's role is not permitted to spend LLM tokens (requires admin or operator)"), ) )] pub async fn triage( State(state): State, + Extension(user): Extension, Json(req): Json, ) -> Result, AppError> { + // LLM calls cost real tokens. Restrict to roles that can already mutate + // fleet state — keeps `readonly` consumers (dashboards, monitoring) and + // per-VPS `agent` keys from spending budget. The global rate-limit + // middleware still applies on top. + require_role(&user, &["admin", "operator"])?; + let provider = state.llm_provider; let provider_str = provider.as_str(); diff --git a/sigma-api/tests/ai_triage_test.rs b/sigma-api/tests/ai_triage_test.rs new file mode 100644 index 0000000..031a56f --- /dev/null +++ b/sigma-api/tests/ai_triage_test.rs @@ -0,0 +1,110 @@ +//! Integration tests for `POST /api/ai/triage` RBAC gating. +//! +//! The endpoint spends LLM tokens, so it is restricted to `admin` and +//! `operator`. `readonly` and `agent` roles must be rejected at the +//! request boundary — before any LLM call is made. +//! +//! The test environment has no `LLM_API_KEY`, so allowed roles get back +//! a 200 OK with `available: false`. That's sufficient: it proves RBAC +//! passed without depending on external network calls. + +mod common; + +use axum::body::Body; +use http_body_util::BodyExt; +use serde_json::{json, Value}; +use tower::ServiceExt; + +async fn login_as( + router: &axum::Router, + admin_token: &str, + email: &str, + role: &str, +) -> String { + let user_body = json!({ + "email": email, + "password": "password123", + "name": format!("{role} user"), + "role": role, + }); + let (status, _) = + common::request_with_token(router, "POST", "/api/users", admin_token, Some(user_body)) + .await; + assert_eq!(status, 200, "creating {role} user should succeed"); + + let login_body = json!({ "email": email, "password": "password123" }); + let req = axum::http::Request::builder() + .method("POST") + .uri("/api/auth/login") + .header("content-type", "application/json") + .body(Body::from(serde_json::to_string(&login_body).unwrap())) + .unwrap(); + let response = router.clone().oneshot(req).await.unwrap(); + assert_eq!(response.status(), 200, "{role} login should succeed"); + let bytes = response.into_body().collect().await.unwrap().to_bytes(); + let login_json: Value = serde_json::from_slice(&bytes).unwrap(); + login_json["token"].as_str().unwrap().to_string() +} + +fn alert_body() -> Value { + json!({ "alert": { "name": "test alert" } }) +} + +#[tokio::test] +async fn test_admin_can_triage() { + let (router, pool) = common::setup().await; + let token = common::login_admin(&router).await; + + let (status, body) = + common::request_with_token(&router, "POST", "/api/ai/triage", &token, Some(alert_body())) + .await; + assert_eq!(status, 200); + // No LLM_API_KEY in the test env, so the endpoint degrades — but the + // request itself was accepted, which is what we're verifying here. + assert_eq!(body["available"], false); + + common::cleanup(&pool).await; +} + +#[tokio::test] +async fn test_operator_can_triage() { + let (router, pool) = common::setup().await; + let admin_token = common::login_admin(&router).await; + let token = login_as(&router, &admin_token, "operator@test.local", "operator").await; + + let (status, body) = + common::request_with_token(&router, "POST", "/api/ai/triage", &token, Some(alert_body())) + .await; + assert_eq!(status, 200); + assert_eq!(body["available"], false); + + common::cleanup(&pool).await; +} + +#[tokio::test] +async fn test_readonly_cannot_triage() { + let (router, pool) = common::setup().await; + let admin_token = common::login_admin(&router).await; + let token = login_as(&router, &admin_token, "readonly@test.local", "readonly").await; + + let (status, _) = + common::request_with_token(&router, "POST", "/api/ai/triage", &token, Some(alert_body())) + .await; + assert_eq!(status, 403); + + common::cleanup(&pool).await; +} + +#[tokio::test] +async fn test_agent_cannot_triage() { + let (router, pool) = common::setup().await; + let admin_token = common::login_admin(&router).await; + let token = login_as(&router, &admin_token, "agent@test.local", "agent").await; + + let (status, _) = + common::request_with_token(&router, "POST", "/api/ai/triage", &token, Some(alert_body())) + .await; + assert_eq!(status, 403); + + common::cleanup(&pool).await; +} diff --git a/sigma-api/tests/common/mod.rs b/sigma-api/tests/common/mod.rs index 635c785..8e3d4b7 100644 --- a/sigma-api/tests/common/mod.rs +++ b/sigma-api/tests/common/mod.rs @@ -63,6 +63,8 @@ pub async fn setup() -> (Router, PgPool) { http_client: reqwest::Client::new(), jwt_secret: "test-jwt-secret".to_string(), jwt_expiry_hours: 24, + llm_provider: routes::ai_triage::LlmProvider::default(), + llm_api_key: None, }; // Build router matching main.rs structure @@ -82,6 +84,7 @@ pub async fn setup() -> (Router, PgPool) { .merge(routes::users::router()) .merge(routes::audit_logs::router()) .merge(routes::tickets::router()) + .merge(routes::ai_triage::router()) .layer(axum::middleware::from_fn_with_state( app_state.clone(), routes::rate_limit::rate_limit,