diff --git a/.env.example b/.env.example index 552a2b7..68dc3a8 100644 --- a/.env.example +++ b/.env.example @@ -165,7 +165,7 @@ GENERAL_TIMEZONE=Asia/Shanghai # DISABLE_HTTP2=false # Cursor客户端版本(已弃用) -# CURSOR_CLIENT_VERSION=2.0.0 +# CURSOR_CLIENT_VERSION=2.6.0 # 思考标签(已弃用) # THINKING_TAG=think @@ -284,5 +284,20 @@ DURATION_FORMAT=random # - random : 随机语言 (仅用于测试) DURATION_LANGUAGE=random +# ====== CLI 后端配置 ====== +# 启用 CLI 后端(通过 Cursor 官方 agent CLI 调用) +# 前提: 安装 Cursor CLI (curl https://cursor.com/install -fsS | bash && agent login) +# 启用后通过 /cli/chat/completions 端点调用 +CLI_BACKEND_ENABLED=false + +# agent 二进制路径 +CLI_AGENT_BIN=agent + +# CLI 请求超时(毫秒) +CLI_TIMEOUT_MS=300000 + +# CLI 工作目录 +CLI_WORKSPACE=/tmp + # 配置文件路径 CONFIG_FILE=config.toml diff --git a/config.example.toml b/config.example.toml index ebe3e8c..ff3a3dd 100644 --- a/config.example.toml +++ b/config.example.toml @@ -55,4 +55,4 @@ raw_model_fetch_mode = "truncate" emulated_platform = "{DEFAULT_PLATFORM}" # Cursor客户端版本 -cursor_client_version = "2.0.0" +cursor_client_version = "2.6.0" diff --git a/src/app/constant.rs b/src/app/constant.rs index ea3257d..232f4ea 100644 --- a/src/app/constant.rs +++ b/src/app/constant.rs @@ -160,6 +160,7 @@ def_pub_const!( ROUTE_RAW_MODELS_PATH = "/raw/models", ROUTE_MODELS_PATH = "/v1/models", ROUTE_CHAT_COMPLETIONS_PATH = "/v1/chat/completions", + ROUTE_CLI_CHAT_COMPLETIONS_PATH = "/cli/chat/completions", ROUTE_MESSAGES_PATH = "/v1/messages", ROUTE_MESSAGES_COUNT_TOKENS_PATH = "/v1/messages/count_tokens", ); diff --git a/src/app/constant/header.rs b/src/app/constant/header.rs index b0b070c..33ce1fd 100644 --- a/src/app/constant/header.rs +++ b/src/app/constant/header.rs @@ -55,8 +55,8 @@ def_header_value! { (TRAILERS, "trailers"), (U_EQ_0, "u=0"), (U_EQ_1_I, "u=1, i"), - (CONNECT_ES, "connect-es/1.6.1"), - (NOT_A_BRAND, "\"Not)A;Brand\";v=\"8\", \"Chromium\";v=\"138\""), + (CONNECT_ES, "connect-es/2.1.1"), + (NOT_A_BRAND, "\"Not)A;Brand\";v=\"8\", \"Chromium\";v=\"140\""), (MOBILE_NO, "?0"), (VSCODE_ORIGIN, "vscode-file://vscode-app"), (CROSS_SITE, "cross-site"), diff --git a/src/app/constant/header/version.rs b/src/app/constant/header/version.rs index 6d6573c..50277a8 100644 --- a/src/app/constant/header/version.rs +++ b/src/app/constant/header/version.rs @@ -18,11 +18,11 @@ use manually_init::ManuallyInit; crate::define_typed_constants! { &'static str => { /// 默认的客户端版本号 - DEFAULT_CLIENT_VERSION = "2.0.0", + DEFAULT_CLIENT_VERSION = "2.6.0", /// 环境变量名:Cursor 客户端版本 ENV_CURSOR_CLIENT_VERSION = "CURSOR_CLIENT_VERSION", /// Chrome 版本信息 - CHROME_VERSION_INFO = " Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36", + CHROME_VERSION_INFO = " Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36", /// User-Agent 前缀 UA_PREFIX = cfg_select! { target_os = "windows" => {"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/"} @@ -31,9 +31,9 @@ crate::define_typed_constants! { }, /// 默认的 User-Agent DEFAULT_UA = cfg_select! { - target_os = "windows" => {"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36"} - target_os = "macos" => {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36"} - target_os = "linux" => {"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36"} + target_os = "windows" => {"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36"} + target_os = "macos" => {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36"} + target_os = "linux" => {"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36"} }, } diff --git a/src/app/model/config.rs b/src/app/model/config.rs index f9ba216..0b04e28 100644 --- a/src/app/model/config.rs +++ b/src/app/model/config.rs @@ -82,6 +82,7 @@ impl AppConfig { super::context_fill_mode::init(); } crate::core::constant::create_models(); + crate::core::service::cli_backend::init(); let (content, config) = if let Ok(s) = std::fs::read_to_string(&*CONFIG_FILE_PATH) { match toml::from_str(&s) { diff --git a/src/app/model/cursor_version.rs b/src/app/model/cursor_version.rs index 02f2e92..e6c2c63 100644 --- a/src/app/model/cursor_version.rs +++ b/src/app/model/cursor_version.rs @@ -21,7 +21,7 @@ impl Version { } impl Default for Version { - fn default() -> Self { Self { major: 2, minor: 0, patch: 0 } } + fn default() -> Self { Self { major: 2, minor: 6, patch: 0 } } } impl<'de> serde::Deserialize<'de> for Version { diff --git a/src/app/model/platform.rs b/src/app/model/platform.rs index 31a1f31..d3121b4 100644 --- a/src/app/model/platform.rs +++ b/src/app/model/platform.rs @@ -73,28 +73,28 @@ impl PlatformType { } /// User-Agent 后缀 -pub const UA_SUFFIX: &'static str = " Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36"; +pub const UA_SUFFIX: &'static str = " Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36"; -// const UA_SUFFIX_LEN: usize = " Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36".len(); +// const UA_SUFFIX_LEN: usize = " Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36".len(); static PLATFORMS: Platforms = Platforms { windows: Platform { - web_ua: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", + web_ua: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", string: "\"Windows\"", ua_prefix: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/", - // default_ua: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36", + // default_ua: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36", }, macos: Platform { - web_ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", + web_ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", string: "\"macOS\"", ua_prefix: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/", - // default_ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36", + // default_ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36", }, linux: Platform { - web_ua: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36", + web_ua: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36", string: "\"Linux\"", ua_prefix: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/", - // default_ua: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36", + // default_ua: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36", }, }; diff --git a/src/app/route.rs b/src/app/route.rs index cacd045..3f3a977 100644 --- a/src/app/route.rs +++ b/src/app/route.rs @@ -4,7 +4,8 @@ pub use json::{InfallibleSerialize, InfallibleJson, GenericJson, OpenAiJson, Ant use super::{ constant::{ - ROUTE_BUILD_KEY_PATH, ROUTE_CHAT_COMPLETIONS_PATH, ROUTE_CONFIG_EXAMPLE_PATH, + ROUTE_BUILD_KEY_PATH, ROUTE_CHAT_COMPLETIONS_PATH, ROUTE_CLI_CHAT_COMPLETIONS_PATH, + ROUTE_CONFIG_EXAMPLE_PATH, ROUTE_CONFIG_GET_PATH, ROUTE_CONFIG_RELOAD_PATH, ROUTE_CONFIG_SET_PATH, ROUTE_CONFIG_VERSION_GET_PATH, ROUTE_CPP_CONFIG_PATH, ROUTE_CPP_MODELS_PATH, ROUTE_CPP_STREAM_PATH, ROUTE_ENV_EXAMPLE_PATH, ROUTE_FILE_SYNC_PATH, @@ -41,6 +42,7 @@ use crate::{ handle_update_tokens_config_version, handle_update_tokens_profile, }, service::{ + cli_backend::handle_cli_chat_completions, cpp::{ handle_cpp_config, handle_cpp_models, handle_stream_cpp, handle_sync_file, handle_upload_file, @@ -141,6 +143,11 @@ pub fn create_router(state: Arc) -> Router { post(handle_chat_completions) .route_layer(middleware::from_fn_with_state(state.clone(), v1_auth_middleware)), ) + .route( + exchange_map.resolve(ROUTE_CLI_CHAT_COMPLETIONS_PATH), + post(handle_cli_chat_completions) + .route_layer(middleware::from_fn(admin_auth_middleware)), + ) .route( exchange_map.resolve(ROUTE_MESSAGES_COUNT_TOKENS_PATH), post(handle_messages_count_tokens) diff --git a/src/core/service.rs b/src/core/service.rs index fd70fe2..8d82a82 100644 --- a/src/core/service.rs +++ b/src/core/service.rs @@ -1,5 +1,6 @@ // mod backend; // mod context; +pub mod cli_backend; pub mod cpp; use crate::{ diff --git a/src/core/service/cli_backend.rs b/src/core/service/cli_backend.rs new file mode 100644 index 0000000..552575f --- /dev/null +++ b/src/core/service/cli_backend.rs @@ -0,0 +1,765 @@ +//! CLI Backend - 通过 Cursor 官方 CLI 工具 `agent` 完成聊天请求 +//! +//! 这是一个独立的后端模块,不依赖现有的 gRPC/protobuf 请求路径。 +//! 通过调用 `agent --print --mode ask` 命令行工具来完成请求。 +//! +//! # 启用方式 +//! 设置环境变量 `CLI_BACKEND_ENABLED=true` +//! +//! # 前提条件 +//! - 安装 Cursor CLI: `curl https://cursor.com/install -fsS | bash` +//! - 登录: `agent login` 或设置 `CURSOR_API_KEY` + +use alloc::borrow::Cow; +use axum::{body::Body, response::Response}; +use bytes::Bytes; +use http::{ + StatusCode, + header::{CACHE_CONTROL, CONNECTION, CONTENT_TYPE, TRANSFER_ENCODING}, +}; +use serde::{Deserialize, Serialize}; +use std::{ + io::{BufRead, BufReader}, + process::{Command, Stdio}, +}; + +use crate::app::{ + constant::header::{CHUNKED, EVENT_STREAM, KEEP_ALIVE, NO_CACHE_REVALIDATE}, + route::GenericJson, +}; + +// ============================================================ +// Configuration +// ============================================================ + +/// CLI 后端配置,从环境变量读取 +pub struct CliConfig { + pub enabled: bool, + pub agent_bin: Cow<'static, str>, + pub timeout_ms: u64, + pub workspace: Cow<'static, str>, + /// 是否使用 --mode ask(安全但可能导致 thinking 模型写文件而非输出文本) + /// 设为 false 时不传 --mode,agent 有完整 tool access + pub use_ask_mode: bool, +} + +impl CliConfig { + pub fn from_env() -> Self { + use crate::common::utils::parse_from_env; + Self { + enabled: parse_from_env("CLI_BACKEND_ENABLED", false), + agent_bin: parse_from_env("CLI_AGENT_BIN", "agent"), + timeout_ms: parse_from_env("CLI_TIMEOUT_MS", 300_000u64), + workspace: parse_from_env("CLI_WORKSPACE", "/tmp"), + use_ask_mode: parse_from_env("CLI_USE_ASK_MODE", false), + } + } +} + +use manually_init::ManuallyInit; + +static CLI_CONFIG: ManuallyInit = ManuallyInit::new(); + +/// 初始化 CLI 配置(在程序启动时调用一次) +pub fn init() { + CLI_CONFIG.init(CliConfig::from_env()); +} + +/// 检查 CLI 后端是否启用 +#[inline] +pub fn is_enabled() -> bool { + CLI_CONFIG.get().enabled +} + +#[inline] +fn config() -> &'static CliConfig { + CLI_CONFIG.get() +} + +// ============================================================ +// Request / Response types (OpenAI compatible subset) +// ============================================================ + +#[derive(Deserialize)] +pub struct CliChatRequest { + pub model: Option, + pub messages: Vec, + #[serde(default)] + pub stream: bool, +} + +#[derive(Deserialize)] +pub struct CliMessage { + pub role: String, + pub content: CliContent, +} + +#[derive(Deserialize)] +#[serde(untagged)] +pub enum CliContent { + Text(String), + Parts(Vec), +} + +#[derive(Deserialize)] +pub struct CliContentPart { + pub r#type: Option, + pub text: Option, +} + +impl CliContent { + pub fn as_text(&self) -> String { + match self { + CliContent::Text(s) => s.clone(), + CliContent::Parts(parts) => parts + .iter() + .filter(|p| p.r#type.as_deref() == Some("text")) + .filter_map(|p| p.text.as_deref()) + .collect::>() + .join(""), + } + } +} + +#[derive(Serialize)] +struct CliChatResponse { + id: String, + object: &'static str, + created: u64, + model: String, + choices: Vec, + usage: CliUsage, +} + +#[derive(Serialize)] +struct CliChoice { + index: u32, + message: CliResponseMessage, + finish_reason: &'static str, +} + +#[derive(Serialize)] +struct CliResponseMessage { + role: &'static str, + content: String, + /// Thinking/reasoning content (OpenAI o1/o3 compatible format) + #[serde(skip_serializing_if = "Option::is_none")] + reasoning_content: Option, +} + +#[derive(Serialize)] +struct CliUsage { + prompt_tokens: u32, + completion_tokens: u32, + total_tokens: u32, +} + +#[derive(Serialize)] +struct CliStreamChunk { + id: String, + object: &'static str, + created: u64, + model: String, + choices: Vec, +} + +#[derive(Serialize)] +struct CliStreamChoice { + index: u32, + delta: CliStreamDelta, + finish_reason: Option<&'static str>, +} + +#[derive(Serialize)] +struct CliStreamDelta { + #[serde(skip_serializing_if = "Option::is_none")] + role: Option<&'static str>, + #[serde(skip_serializing_if = "Option::is_none")] + content: Option, + /// Thinking/reasoning content (OpenAI o1/o3 compatible format) + #[serde(skip_serializing_if = "Option::is_none")] + reasoning_content: Option, +} + +#[derive(Serialize)] +struct CliErrorResponse { + error: CliErrorInner, +} + +#[derive(Serialize)] +struct CliErrorInner { + message: String, + r#type: &'static str, + code: Option<&'static str>, +} + +// ============================================================ +// Prompt building +// ============================================================ + +fn build_prompt(messages: &[CliMessage]) -> String { + let mut parts = Vec::with_capacity(messages.len()); + for msg in messages { + let text = msg.content.as_text(); + match msg.role.as_str() { + "system" | "developer" => { + parts.push(format!("[System]\n{text}")); + } + "assistant" => { + parts.push(format!("[Assistant]\n{text}")); + } + _ => { + // user and any other role + parts.push(format!("[User]\n{text}")); + } + } + } + parts.join("\n\n") +} + +// ============================================================ +// CLI execution +// ============================================================ + +struct CliSyncResult { + content: String, + thinking: Option, + usage: CliUsageInfo, +} + +/// 构建 agent CLI 命令的公共参数 +fn build_agent_command(agent_bin: &str, model: &str, workspace: &str, use_ask_mode: bool) -> Command { + let mut cmd = Command::new(agent_bin); + cmd.arg("--print"); + if use_ask_mode { + cmd.arg("--mode").arg("ask"); + } + cmd.arg("--model") + .arg(model) + .arg("--workspace") + .arg(workspace) + .arg("--trust"); + cmd +} + +async fn run_cli_sync(model: &str, prompt: &str) -> Result { + let cfg = config(); + let agent_bin = cfg.agent_bin.to_string(); + let workspace = cfg.workspace.to_string(); + let timeout_ms = cfg.timeout_ms; + let use_ask_mode = cfg.use_ask_mode; + let model = model.to_string(); + let prompt = prompt.to_string(); + + let result = tokio::time::timeout( + std::time::Duration::from_millis(timeout_ms), + tokio::task::spawn_blocking(move || { + // Use stream-json format to capture thinking content + let child = build_agent_command(&agent_bin, &model, &workspace, use_ask_mode) + .arg("--stream-partial-output") + .arg("--output-format") + .arg("stream-json") + .arg(&prompt) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|e| format!("Failed to execute agent CLI: {e}"))?; + + let stdout = child.stdout.ok_or("Failed to capture stdout")?; + let reader = BufReader::new(stdout); + let mut parser = StreamParser::new(); + let mut thinking_parts = Vec::new(); + let mut content_parts = Vec::new(); + let mut usage = CliUsageInfo::default(); + + for line in reader.lines() { + let line = match line { + Ok(l) => l, + Err(_) => break, + }; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + + match parser.parse_line(trimmed) { + Some(StreamParseResult::ThinkingDelta(text)) => { + thinking_parts.push(text); + } + Some(StreamParseResult::ContentDelta(text)) => { + content_parts.push(text); + } + Some(StreamParseResult::FileContent { path, content }) => { + // Agent wrote code to a file — include it in the response + content_parts.push(format!("\n```\n// File: {path}\n{content}\n```\n")); + } + Some(StreamParseResult::Done(u)) => { + usage = u; + break; + } + _ => {} + } + } + + let thinking = if thinking_parts.is_empty() { + None + } else { + Some(thinking_parts.join("")) + }; + + Ok(CliSyncResult { + content: content_parts.join(""), + thinking, + usage, + }) + }), + ) + .await + .map_err(|_| "CLI request timed out".to_string())? + .map_err(|e| format!("Task join error: {e}"))?; + + result +} + +/// CLI 流式输出解析器 +/// +/// CLI stream-json 格式: +/// - `{"type":"thinking","subtype":"delta","text":"..."}` — 思考增量(每条是独立delta) +/// - `{"type":"thinking","subtype":"completed"}` — 思考完成 +/// - `{"type":"assistant","message":{"content":[{"type":"text","text":"..."}]}}` — 回答增量(每条独立delta) +/// - 每段 assistant 回答后会有一条累积文本消息(需要跳过) +/// - `{"type":"result","subtype":"success","usage":{...}}` — 结束 +struct StreamParser { + /// 已输出的 assistant 文本(用于去重累积消息) + accumulated_text: String, + /// thinking 阶段是否完成 + thinking_done: bool, +} + +/// 解析 CLI stream-json 中的 usage 信息 +#[derive(Deserialize, Default)] +#[serde(rename_all = "camelCase")] +struct CliUsageInfo { + #[serde(default)] + input_tokens: u32, + #[serde(default)] + output_tokens: u32, + #[serde(default)] + cache_read_tokens: u32, + #[serde(default)] + cache_write_tokens: u32, +} + +impl StreamParser { + fn new() -> Self { + Self { + accumulated_text: String::new(), + thinking_done: false, + } + } + + /// 解析一行 CLI 输出 + fn parse_line(&mut self, line: &str) -> Option { + // 使用 serde_json::Value 灵活解析 + let obj: serde_json::Value = serde_json::from_str(line).ok()?; + let line_type = obj.get("type")?.as_str()?; + let subtype = obj.get("subtype").and_then(|v| v.as_str()); + + match line_type { + // === Thinking delta === + "thinking" if subtype == Some("delta") => { + let text = obj.get("text")?.as_str()?; + if text.is_empty() { + return None; + } + Some(StreamParseResult::ThinkingDelta(text.to_string())) + } + + // === Thinking completed === + "thinking" if subtype == Some("completed") => { + self.thinking_done = true; + Some(StreamParseResult::ThinkingDone) + } + + // === Tool call with file content === + // When agent writes code to a file, capture the content + "tool_call" => { + // Look for editToolCall.args.streamContent or editToolCall.args.content + let tool_call = obj.get("tool_call")?; + let edit = tool_call.get("editToolCall")?; + let args = edit.get("args")?; + + // streamContent is the file content being written + let content = args + .get("streamContent") + .or_else(|| args.get("content")) + .and_then(|v| v.as_str())?; + + if content.is_empty() { + return None; + } + + // Only emit on "completed" to avoid duplicates (started + completed both have content) + if subtype == Some("completed") { + // Reset accumulated text for the next assistant segment + self.accumulated_text.clear(); + let path = args.get("path").and_then(|v| v.as_str()).unwrap_or(""); + Some(StreamParseResult::FileContent { + path: path.to_string(), + content: content.to_string(), + }) + } else { + None + } + } + + // === Assistant content delta === + // CLI pattern per segment (before/after each tool_call): + // delta (has timestamp_ms, short text) × N + // accumulated message (same text concatenated, may or may not have timestamp_ms) + // + // We track segment_text to detect accumulated messages: + // if current text == segment_text, it's accumulated → skip. + "assistant" => { + let content = obj.get("message")?.get("content")?.as_array()?; + let text: String = content + .iter() + .filter(|p| p.get("type").and_then(|t| t.as_str()) == Some("text")) + .filter_map(|p| p.get("text").and_then(|t| t.as_str())) + .collect::>() + .join(""); + + if text.is_empty() { + return None; + } + + // If this text equals what we've accumulated in this segment → accumulated msg, skip + if text == self.accumulated_text { + // Reset for next segment (after tool_call) + self.accumulated_text.clear(); + return None; + } + + // If accumulated text is a prefix of this text, it's a growing accumulated → skip + if !self.accumulated_text.is_empty() + && text.starts_with(&self.accumulated_text) + && text.len() > self.accumulated_text.len() + { + self.accumulated_text = text; + return None; + } + + self.accumulated_text.push_str(&text); + Some(StreamParseResult::ContentDelta(text)) + } + + // === Result (done) === + "result" if subtype == Some("success") => { + let usage = obj + .get("usage") + .and_then(|u| serde_json::from_value::(u.clone()).ok()) + .unwrap_or_default(); + Some(StreamParseResult::Done(usage)) + } + + _ => None, + } + } +} + +enum StreamParseResult { + /// Thinking content delta (from thinking models) + ThinkingDelta(String), + /// Thinking phase completed + ThinkingDone, + /// Assistant content delta + ContentDelta(String), + /// File content from tool_call (agent wrote code to a file) + FileContent { path: String, content: String }, + /// Stream completed with usage info + Done(CliUsageInfo), +} + +// ============================================================ +// HTTP Handler +// ============================================================ + +fn error_response(status: StatusCode, message: String) -> Response { + let body = serde_json::to_string(&CliErrorResponse { + error: CliErrorInner { + message, + r#type: "cli_backend_error", + code: None, + }, + }) + .unwrap_or_else(|_| r#"{"error":{"message":"internal error","type":"cli_backend_error"}}"#.to_string()); + + Response::builder() + .status(status) + .header(CONTENT_TYPE, "application/json") + .body(Body::from(body)) + .unwrap() +} + +/// 主处理函数: `/cli/chat/completions` +pub async fn handle_cli_chat_completions( + GenericJson(request): GenericJson, +) -> Response { + if !is_enabled() { + return error_response( + StatusCode::SERVICE_UNAVAILABLE, + "CLI backend is not enabled. Set CLI_BACKEND_ENABLED=true".to_string(), + ); + } + + let model = request.model.as_deref().unwrap_or("auto"); + let prompt = build_prompt(&request.messages); + + if request.stream { + handle_stream(model, &prompt).await + } else { + handle_sync(model, &prompt).await + } +} + +async fn handle_sync(model: &str, prompt: &str) -> Response { + match run_cli_sync(model, prompt).await { + Ok(result) => { + let id = format!( + "chatcmpl-cli-{}", + uuid::Uuid::new_v4().as_simple() + ); + let response = CliChatResponse { + id, + object: "chat.completion", + created: crate::common::utils::now_secs(), + model: model.to_string(), + choices: vec![CliChoice { + index: 0, + message: CliResponseMessage { + role: "assistant", + content: result.content, + reasoning_content: result.thinking, + }, + finish_reason: "stop", + }], + usage: CliUsage { + prompt_tokens: result.usage.input_tokens, + completion_tokens: result.usage.output_tokens, + total_tokens: result.usage.input_tokens + result.usage.output_tokens, + }, + }; + + let body = serde_json::to_string(&response).unwrap(); + Response::builder() + .status(StatusCode::OK) + .header(CONTENT_TYPE, "application/json") + .body(Body::from(body)) + .unwrap() + } + Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, e), + } +} + +async fn handle_stream(model: &str, prompt: &str) -> Response { + let cfg = config(); + let mut child = match build_agent_command(cfg.agent_bin.as_ref(), model, cfg.workspace.as_ref(), cfg.use_ask_mode) + .arg("--stream-partial-output") + .arg("--output-format") + .arg("stream-json") + .arg(prompt) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + { + Ok(c) => c, + Err(e) => { + return error_response( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Failed to start agent CLI: {e}"), + ); + } + }; + + let stdout = match child.stdout.take() { + Some(s) => s, + None => { + return error_response( + StatusCode::INTERNAL_SERVER_ERROR, + "Failed to capture agent stdout".to_string(), + ); + } + }; + + let id = format!( + "chatcmpl-cli-{}", + uuid::Uuid::new_v4().as_simple() + ); + let created = crate::common::utils::now_secs(); + let model_owned = model.to_string(); + + // Use a channel to bridge the CLI stdout reader to the HTTP response stream + let (tx, rx) = tokio::sync::mpsc::channel::>(32); + + // Spawn blocking thread to read CLI stdout line by line and send SSE chunks via channel + tokio::task::spawn_blocking(move || { + let reader = BufReader::new(stdout); + let mut parser = StreamParser::new(); + + // Helper: send a chunk, return false if client disconnected + let send = |tx: &tokio::sync::mpsc::Sender>, + chunk: &CliStreamChunk| -> bool { + tx.blocking_send(Ok(Bytes::from(format!( + "data: {}\n\n", + __unwrap!(serde_json::to_string(chunk)) + )))) + .is_ok() + }; + + // Send initial role chunk + let initial = CliStreamChunk { + id: id.clone(), + object: "chat.completion.chunk", + created, + model: model_owned.clone(), + choices: vec![CliStreamChoice { + index: 0, + delta: CliStreamDelta { + role: Some("assistant"), + content: None, + reasoning_content: None, + }, + finish_reason: None, + }], + }; + if !send(&tx, &initial) { + return; + } + + for line in reader.lines() { + let line = match line { + Ok(l) => l, + Err(_) => break, + }; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + + match parser.parse_line(trimmed) { + // Thinking delta → reasoning_content field + Some(StreamParseResult::ThinkingDelta(text)) => { + let chunk = CliStreamChunk { + id: id.clone(), + object: "chat.completion.chunk", + created, + model: model_owned.clone(), + choices: vec![CliStreamChoice { + index: 0, + delta: CliStreamDelta { + role: None, + content: None, + reasoning_content: Some(text), + }, + finish_reason: None, + }], + }; + if !send(&tx, &chunk) { + break; + } + } + + // Thinking done → no special chunk needed, just continue + Some(StreamParseResult::ThinkingDone) => {} + + // Content delta → content field + Some(StreamParseResult::ContentDelta(text)) => { + let chunk = CliStreamChunk { + id: id.clone(), + object: "chat.completion.chunk", + created, + model: model_owned.clone(), + choices: vec![CliStreamChoice { + index: 0, + delta: CliStreamDelta { + role: None, + content: Some(text), + reasoning_content: None, + }, + finish_reason: None, + }], + }; + if !send(&tx, &chunk) { + break; + } + } + + // File content from tool_call → emit as content + Some(StreamParseResult::FileContent { path, content }) => { + let text = format!("\n```\n// File: {path}\n{content}\n```\n"); + let chunk = CliStreamChunk { + id: id.clone(), + object: "chat.completion.chunk", + created, + model: model_owned.clone(), + choices: vec![CliStreamChoice { + index: 0, + delta: CliStreamDelta { + role: None, + content: Some(text), + reasoning_content: None, + }, + finish_reason: None, + }], + }; + if !send(&tx, &chunk) { + break; + } + } + + // Done → send finish chunk + Some(StreamParseResult::Done(_usage)) => { + break; + } + + None => {} + } + } + + // Send finish chunk + let finish = CliStreamChunk { + id: id.clone(), + object: "chat.completion.chunk", + created, + model: model_owned, + choices: vec![CliStreamChoice { + index: 0, + delta: CliStreamDelta { + role: None, + content: None, + reasoning_content: None, + }, + finish_reason: Some("stop"), + }], + }; + let _ = tx.blocking_send(Ok(Bytes::from(format!( + "data: {}\n\n", + __unwrap!(serde_json::to_string(&finish)) + )))); + let _ = tx.blocking_send(Ok(Bytes::from("data: [DONE]\n\n"))); + + // Wait for child to finish + let _ = child.wait(); + }); + + let stream = tokio_stream::wrappers::ReceiverStream::new(rx); + + Response::builder() + .status(StatusCode::OK) + .header(CONTENT_TYPE, EVENT_STREAM) + .header(CACHE_CONTROL, NO_CACHE_REVALIDATE) + .header(CONNECTION, KEEP_ALIVE) + .header(TRANSFER_ENCODING, CHUNKED) + .body(Body::from_stream(stream)) + .unwrap() +}