From 558934fccc57a23b3ebcdaf820990f928b4ddafd Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 23 Mar 2026 13:28:40 +0000
Subject: [PATCH 1/5] fix: update outdated Cursor client version and
 fingerprint constants

- Update cursor_client_version default from 2.0.0 to 2.6.0 (Cursor is now at v2.6+)
  This is likely the root cause of token verification failures, as the
  Cursor backend returns OutdatedClient (410 Gone) for old client versions.

- Update Electron/Chrome versions in UA strings:
  - Client UA: Chrome/138 Electron/37.7.0 -> Chrome/140 Electron/38.0.0
  - Web UA: Chrome/143 -> Chrome/145 (latest stable browser)
  - sec-ch-ua: Chromium/138 -> Chromium/140 (matching Electron 38)

- Update connect-es version from 1.6.1 to 2.1.1

- Update Version default in cursor_version.rs from 2.0.0 to 2.6.0

These changes address the most likely cause of 'token obtained but
verification always fails' - the server-side OutdatedClient check
rejecting requests with x-cursor-client-version: 2.0.0.

Co-authored-by: hzsw1234 <hzsw1234@users.noreply.github.com>
---
 .env.example                       |  2 +-
 config.example.toml                |  2 +-
 src/app/constant/header.rs         |  4 ++--
 src/app/constant/header/version.rs | 10 +++++-----
 src/app/model/cursor_version.rs    |  2 +-
 src/app/model/platform.rs          | 16 ++++++++--------
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/.env.example b/.env.example
index 552a2b7..eb063bb 100644
--- a/.env.example
+++ b/.env.example
@@ -165,7 +165,7 @@ GENERAL_TIMEZONE=Asia/Shanghai
 # DISABLE_HTTP2=false
 
 # Cursor客户端版本(已弃用)
-# CURSOR_CLIENT_VERSION=2.0.0
+# CURSOR_CLIENT_VERSION=2.6.0
 
 # 思考标签(已弃用)
 # THINKING_TAG=think
diff --git a/config.example.toml b/config.example.toml
index ebe3e8c..ff3a3dd 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -55,4 +55,4 @@ raw_model_fetch_mode = "truncate"
 emulated_platform = "{DEFAULT_PLATFORM}"
 
 # Cursor客户端版本
-cursor_client_version = "2.0.0"
+cursor_client_version = "2.6.0"
diff --git a/src/app/constant/header.rs b/src/app/constant/header.rs
index b0b070c..33ce1fd 100644
--- a/src/app/constant/header.rs
+++ b/src/app/constant/header.rs
@@ -55,8 +55,8 @@ def_header_value! {
     (TRAILERS, "trailers"),
     (U_EQ_0, "u=0"),
     (U_EQ_1_I, "u=1, i"),
-    (CONNECT_ES, "connect-es/1.6.1"),
-    (NOT_A_BRAND, "\"Not)A;Brand\";v=\"8\", \"Chromium\";v=\"138\""),
+    (CONNECT_ES, "connect-es/2.1.1"),
+    (NOT_A_BRAND, "\"Not)A;Brand\";v=\"8\", \"Chromium\";v=\"140\""),
     (MOBILE_NO, "?0"),
     (VSCODE_ORIGIN, "vscode-file://vscode-app"),
     (CROSS_SITE, "cross-site"),
diff --git a/src/app/constant/header/version.rs b/src/app/constant/header/version.rs
index 6d6573c..50277a8 100644
--- a/src/app/constant/header/version.rs
+++ b/src/app/constant/header/version.rs
@@ -18,11 +18,11 @@ use manually_init::ManuallyInit;
 crate::define_typed_constants! {
     &'static str => {
         /// 默认的客户端版本号
-        DEFAULT_CLIENT_VERSION = "2.0.0",
+        DEFAULT_CLIENT_VERSION = "2.6.0",
         /// 环境变量名：Cursor 客户端版本
         ENV_CURSOR_CLIENT_VERSION = "CURSOR_CLIENT_VERSION",
         /// Chrome 版本信息
-        CHROME_VERSION_INFO = " Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36",
+        CHROME_VERSION_INFO = " Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36",
         /// User-Agent 前缀
         UA_PREFIX = cfg_select! {
             target_os = "windows" => {"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/"}
@@ -31,9 +31,9 @@ crate::define_typed_constants! {
         },
         /// 默认的 User-Agent
         DEFAULT_UA = cfg_select! {
-            target_os = "windows" => {"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36"}
-            target_os = "macos" => {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36"}
-            target_os = "linux" => {"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36"}
+            target_os = "windows" => {"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36"}
+            target_os = "macos" => {"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36"}
+            target_os = "linux" => {"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36"}
         },
     }
 
diff --git a/src/app/model/cursor_version.rs b/src/app/model/cursor_version.rs
index 02f2e92..e6c2c63 100644
--- a/src/app/model/cursor_version.rs
+++ b/src/app/model/cursor_version.rs
@@ -21,7 +21,7 @@ impl Version {
 }
 
 impl Default for Version {
-    fn default() -> Self { Self { major: 2, minor: 0, patch: 0 } }
+    fn default() -> Self { Self { major: 2, minor: 6, patch: 0 } }
 }
 
 impl<'de> serde::Deserialize<'de> for Version {
diff --git a/src/app/model/platform.rs b/src/app/model/platform.rs
index 31a1f31..d3121b4 100644
--- a/src/app/model/platform.rs
+++ b/src/app/model/platform.rs
@@ -73,28 +73,28 @@ impl PlatformType {
 }
 
 /// User-Agent 后缀
-pub const UA_SUFFIX: &'static str = " Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36";
+pub const UA_SUFFIX: &'static str = " Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36";
 
-// const UA_SUFFIX_LEN: usize = " Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36".len();
+// const UA_SUFFIX_LEN: usize = " Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36".len();
 
 static PLATFORMS: Platforms = Platforms {
     windows: Platform {
-        web_ua: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
+        web_ua: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",
         string: "\"Windows\"",
         ua_prefix: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/",
-        // default_ua: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36",
+        // default_ua: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36",
     },
     macos: Platform {
-        web_ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
+        web_ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",
         string: "\"macOS\"",
         ua_prefix: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/",
-        // default_ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36",
+        // default_ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36",
     },
     linux: Platform {
-        web_ua: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
+        web_ua: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",
         string: "\"Linux\"",
         ua_prefix: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/",
-        // default_ua: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.0.0 Chrome/138.0.7204.251 Electron/37.7.0 Safari/537.36",
+        // default_ua: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Cursor/2.6.0 Chrome/140.0.7339.41 Electron/38.0.0 Safari/537.36",
     },
 };
 

From 796bbc340a14bcf0a76f8fa95521a2f865218eb3 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 23 Mar 2026 15:10:25 +0000
Subject: [PATCH 2/5] feat: add CLI backend for Cursor agent CLI fallback

Add a new independent backend that calls the official Cursor CLI tool
('agent') to handle chat completion requests. This bypasses the direct
gRPC/protobuf path, using the CLI's built-in authentication and protocol
handling instead.

New endpoint: POST /cli/chat/completions (requires admin auth)
- Supports both sync and streaming (SSE) modes
- Builds prompt from OpenAI-format messages array
- Parses CLI stream-json output with deduplication
- Uses std::process via spawn_blocking (no new dependencies)

Configuration (all optional, disabled by default):
- CLI_BACKEND_ENABLED=true  - Enable the CLI backend
- CLI_AGENT_BIN=agent       - Path to the agent binary
- CLI_TIMEOUT_MS=300000     - Request timeout
- CLI_WORKSPACE=/tmp        - Working directory for CLI

Prerequisites:
- Install Cursor CLI: curl https://cursor.com/install -fsS | bash
- Login: agent login (or set CURSOR_API_KEY)

This does NOT modify the existing gRPC code path.

Co-authored-by: hzsw1234 <hzsw1234@users.noreply.github.com>
---
 .env.example                    |  15 +
 src/app/constant.rs             |   1 +
 src/app/model/config.rs         |   1 +
 src/app/route.rs                |   9 +-
 src/core/service.rs             |   1 +
 src/core/service/cli_backend.rs | 569 ++++++++++++++++++++++++++++++++
 6 files changed, 595 insertions(+), 1 deletion(-)
 create mode 100644 src/core/service/cli_backend.rs

diff --git a/.env.example b/.env.example
index eb063bb..68dc3a8 100644
--- a/.env.example
+++ b/.env.example
@@ -284,5 +284,20 @@ DURATION_FORMAT=random
 #   - random    : 随机语言 (仅用于测试)
 DURATION_LANGUAGE=random
 
+# ====== CLI 后端配置 ======
+# 启用 CLI 后端（通过 Cursor 官方 agent CLI 调用）
+# 前提: 安装 Cursor CLI (curl https://cursor.com/install -fsS | bash && agent login)
+# 启用后通过 /cli/chat/completions 端点调用
+CLI_BACKEND_ENABLED=false
+
+# agent 二进制路径
+CLI_AGENT_BIN=agent
+
+# CLI 请求超时(毫秒)
+CLI_TIMEOUT_MS=300000
+
+# CLI 工作目录
+CLI_WORKSPACE=/tmp
+
 # 配置文件路径
 CONFIG_FILE=config.toml
diff --git a/src/app/constant.rs b/src/app/constant.rs
index ea3257d..232f4ea 100644
--- a/src/app/constant.rs
+++ b/src/app/constant.rs
@@ -160,6 +160,7 @@ def_pub_const!(
     ROUTE_RAW_MODELS_PATH = "/raw/models",
     ROUTE_MODELS_PATH = "/v1/models",
     ROUTE_CHAT_COMPLETIONS_PATH = "/v1/chat/completions",
+    ROUTE_CLI_CHAT_COMPLETIONS_PATH = "/cli/chat/completions",
     ROUTE_MESSAGES_PATH = "/v1/messages",
     ROUTE_MESSAGES_COUNT_TOKENS_PATH = "/v1/messages/count_tokens",
 );
diff --git a/src/app/model/config.rs b/src/app/model/config.rs
index f9ba216..0b04e28 100644
--- a/src/app/model/config.rs
+++ b/src/app/model/config.rs
@@ -82,6 +82,7 @@ impl AppConfig {
             super::context_fill_mode::init();
         }
         crate::core::constant::create_models();
+        crate::core::service::cli_backend::init();
 
         let (content, config) = if let Ok(s) = std::fs::read_to_string(&*CONFIG_FILE_PATH) {
             match toml::from_str(&s) {
diff --git a/src/app/route.rs b/src/app/route.rs
index cacd045..3f3a977 100644
--- a/src/app/route.rs
+++ b/src/app/route.rs
@@ -4,7 +4,8 @@ pub use json::{InfallibleSerialize, InfallibleJson, GenericJson, OpenAiJson, Ant
 
 use super::{
     constant::{
-        ROUTE_BUILD_KEY_PATH, ROUTE_CHAT_COMPLETIONS_PATH, ROUTE_CONFIG_EXAMPLE_PATH,
+        ROUTE_BUILD_KEY_PATH, ROUTE_CHAT_COMPLETIONS_PATH, ROUTE_CLI_CHAT_COMPLETIONS_PATH,
+        ROUTE_CONFIG_EXAMPLE_PATH,
         ROUTE_CONFIG_GET_PATH, ROUTE_CONFIG_RELOAD_PATH, ROUTE_CONFIG_SET_PATH,
         ROUTE_CONFIG_VERSION_GET_PATH, ROUTE_CPP_CONFIG_PATH, ROUTE_CPP_MODELS_PATH,
         ROUTE_CPP_STREAM_PATH, ROUTE_ENV_EXAMPLE_PATH, ROUTE_FILE_SYNC_PATH,
@@ -41,6 +42,7 @@ use crate::{
             handle_update_tokens_config_version, handle_update_tokens_profile,
         },
         service::{
+            cli_backend::handle_cli_chat_completions,
             cpp::{
                 handle_cpp_config, handle_cpp_models, handle_stream_cpp, handle_sync_file,
                 handle_upload_file,
@@ -141,6 +143,11 @@ pub fn create_router(state: Arc<AppState>) -> Router {
             post(handle_chat_completions)
                 .route_layer(middleware::from_fn_with_state(state.clone(), v1_auth_middleware)),
         )
+        .route(
+            exchange_map.resolve(ROUTE_CLI_CHAT_COMPLETIONS_PATH),
+            post(handle_cli_chat_completions)
+                .route_layer(middleware::from_fn(admin_auth_middleware)),
+        )
         .route(
             exchange_map.resolve(ROUTE_MESSAGES_COUNT_TOKENS_PATH),
             post(handle_messages_count_tokens)
diff --git a/src/core/service.rs b/src/core/service.rs
index fd70fe2..8d82a82 100644
--- a/src/core/service.rs
+++ b/src/core/service.rs
@@ -1,5 +1,6 @@
 // mod backend;
 // mod context;
+pub mod cli_backend;
 pub mod cpp;
 
 use crate::{
diff --git a/src/core/service/cli_backend.rs b/src/core/service/cli_backend.rs
new file mode 100644
index 0000000..7eb89ad
--- /dev/null
+++ b/src/core/service/cli_backend.rs
@@ -0,0 +1,569 @@
+//! CLI Backend - 通过 Cursor 官方 CLI 工具 `agent` 完成聊天请求
+//!
+//! 这是一个独立的后端模块，不依赖现有的 gRPC/protobuf 请求路径。
+//! 通过调用 `agent --print --mode ask` 命令行工具来完成请求。
+//!
+//! # 启用方式
+//! 设置环境变量 `CLI_BACKEND_ENABLED=true`
+//!
+//! # 前提条件
+//! - 安装 Cursor CLI: `curl https://cursor.com/install -fsS | bash`
+//! - 登录: `agent login` 或设置 `CURSOR_API_KEY`
+
+use alloc::borrow::Cow;
+use axum::{body::Body, response::Response};
+use bytes::Bytes;
+use http::{
+    StatusCode,
+    header::{CACHE_CONTROL, CONNECTION, CONTENT_TYPE, TRANSFER_ENCODING},
+};
+use serde::{Deserialize, Serialize};
+use std::{
+    io::{BufRead, BufReader},
+    process::{Command, Stdio},
+};
+
+use crate::app::{
+    constant::header::{CHUNKED, EVENT_STREAM, KEEP_ALIVE, NO_CACHE_REVALIDATE},
+    route::GenericJson,
+};
+
+// ============================================================
+// Configuration
+// ============================================================
+
+/// CLI 后端配置，从环境变量读取
+pub struct CliConfig {
+    pub enabled: bool,
+    pub agent_bin: Cow<'static, str>,
+    pub timeout_ms: u64,
+    pub workspace: Cow<'static, str>,
+}
+
+impl CliConfig {
+    pub fn from_env() -> Self {
+        use crate::common::utils::parse_from_env;
+        Self {
+            enabled: parse_from_env("CLI_BACKEND_ENABLED", false),
+            agent_bin: parse_from_env("CLI_AGENT_BIN", "agent"),
+            timeout_ms: parse_from_env("CLI_TIMEOUT_MS", 300_000u64),
+            workspace: parse_from_env("CLI_WORKSPACE", "/tmp"),
+        }
+    }
+}
+
+use manually_init::ManuallyInit;
+
+static CLI_CONFIG: ManuallyInit<CliConfig> = ManuallyInit::new();
+
+/// 初始化 CLI 配置（在程序启动时调用一次）
+pub fn init() {
+    CLI_CONFIG.init(CliConfig::from_env());
+}
+
+/// 检查 CLI 后端是否启用
+#[inline]
+pub fn is_enabled() -> bool {
+    CLI_CONFIG.get().enabled
+}
+
+#[inline]
+fn config() -> &'static CliConfig {
+    CLI_CONFIG.get()
+}
+
+// ============================================================
+// Request / Response types (OpenAI compatible subset)
+// ============================================================
+
+#[derive(Deserialize)]
+pub struct CliChatRequest {
+    pub model: Option<String>,
+    pub messages: Vec<CliMessage>,
+    #[serde(default)]
+    pub stream: bool,
+}
+
+#[derive(Deserialize)]
+pub struct CliMessage {
+    pub role: String,
+    pub content: CliContent,
+}
+
+#[derive(Deserialize)]
+#[serde(untagged)]
+pub enum CliContent {
+    Text(String),
+    Parts(Vec<CliContentPart>),
+}
+
+#[derive(Deserialize)]
+pub struct CliContentPart {
+    pub r#type: Option<String>,
+    pub text: Option<String>,
+}
+
+impl CliContent {
+    pub fn as_text(&self) -> String {
+        match self {
+            CliContent::Text(s) => s.clone(),
+            CliContent::Parts(parts) => parts
+                .iter()
+                .filter(|p| p.r#type.as_deref() == Some("text"))
+                .filter_map(|p| p.text.as_deref())
+                .collect::<Vec<_>>()
+                .join(""),
+        }
+    }
+}
+
+#[derive(Serialize)]
+struct CliChatResponse {
+    id: String,
+    object: &'static str,
+    created: u64,
+    model: String,
+    choices: Vec<CliChoice>,
+    usage: CliUsage,
+}
+
+#[derive(Serialize)]
+struct CliChoice {
+    index: u32,
+    message: CliResponseMessage,
+    finish_reason: &'static str,
+}
+
+#[derive(Serialize)]
+struct CliResponseMessage {
+    role: &'static str,
+    content: String,
+}
+
+#[derive(Serialize)]
+struct CliUsage {
+    prompt_tokens: u32,
+    completion_tokens: u32,
+    total_tokens: u32,
+}
+
+#[derive(Serialize)]
+struct CliStreamChunk {
+    id: String,
+    object: &'static str,
+    created: u64,
+    model: String,
+    choices: Vec<CliStreamChoice>,
+}
+
+#[derive(Serialize)]
+struct CliStreamChoice {
+    index: u32,
+    delta: CliStreamDelta,
+    finish_reason: Option<&'static str>,
+}
+
+#[derive(Serialize)]
+struct CliStreamDelta {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    role: Option<&'static str>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    content: Option<String>,
+}
+
+#[derive(Serialize)]
+struct CliErrorResponse {
+    error: CliErrorInner,
+}
+
+#[derive(Serialize)]
+struct CliErrorInner {
+    message: String,
+    r#type: &'static str,
+    code: Option<&'static str>,
+}
+
+// ============================================================
+// Prompt building
+// ============================================================
+
+fn build_prompt(messages: &[CliMessage]) -> String {
+    let mut parts = Vec::with_capacity(messages.len());
+    for msg in messages {
+        let text = msg.content.as_text();
+        match msg.role.as_str() {
+            "system" | "developer" => {
+                parts.push(format!("[System]\n{text}"));
+            }
+            "assistant" => {
+                parts.push(format!("[Assistant]\n{text}"));
+            }
+            _ => {
+                // user and any other role
+                parts.push(format!("[User]\n{text}"));
+            }
+        }
+    }
+    parts.join("\n\n")
+}
+
+// ============================================================
+// CLI execution
+// ============================================================
+
+async fn run_cli_sync(model: &str, prompt: &str) -> Result<String, String> {
+    let cfg = config();
+    let agent_bin = cfg.agent_bin.to_string();
+    let workspace = cfg.workspace.to_string();
+    let timeout_ms = cfg.timeout_ms;
+    let model = model.to_string();
+    let prompt = prompt.to_string();
+
+    let result = tokio::time::timeout(
+        std::time::Duration::from_millis(timeout_ms),
+        tokio::task::spawn_blocking(move || {
+            let output = Command::new(&agent_bin)
+                .arg("--print")
+                .arg("--mode")
+                .arg("ask")
+                .arg("--model")
+                .arg(&model)
+                .arg("--workspace")
+                .arg(&workspace)
+                .arg("--trust")
+                .arg("--output-format")
+                .arg("text")
+                .arg(&prompt)
+                .stdout(Stdio::piped())
+                .stderr(Stdio::piped())
+                .output()
+                .map_err(|e| format!("Failed to execute agent CLI: {e}"))?;
+
+            if !output.status.success() {
+                let stderr = String::from_utf8_lossy(&output.stderr);
+                return Err(format!(
+                    "agent CLI exited with code {}: {stderr}",
+                    output.status.code().unwrap_or(-1)
+                ));
+            }
+
+            Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
+        }),
+    )
+    .await
+    .map_err(|_| "CLI request timed out".to_string())?
+    .map_err(|e| format!("Task join error: {e}"))?;
+
+    result
+}
+
+/// CLI 流式输出解析器（去重逻辑）
+struct StreamParser {
+    accumulated: String,
+}
+
+impl StreamParser {
+    fn new() -> Self {
+        Self {
+            accumulated: String::new(),
+        }
+    }
+
+    /// 解析一行 CLI 输出，返回增量文本（如果有）
+    fn parse_line(&mut self, line: &str) -> Option<StreamParseResult> {
+        #[derive(Deserialize)]
+        struct CliStreamLine {
+            r#type: Option<String>,
+            subtype: Option<String>,
+            message: Option<CliStreamMessage>,
+        }
+        #[derive(Deserialize)]
+        struct CliStreamMessage {
+            content: Option<Vec<CliStreamContent>>,
+        }
+        #[derive(Deserialize)]
+        struct CliStreamContent {
+            r#type: Option<String>,
+            text: Option<String>,
+        }
+
+        let obj: CliStreamLine = serde_json::from_str(line).ok()?;
+
+        // Check for done signal
+        if obj.r#type.as_deref() == Some("result") && obj.subtype.as_deref() == Some("success") {
+            return Some(StreamParseResult::Done);
+        }
+
+        if obj.r#type.as_deref() != Some("assistant") {
+            return None;
+        }
+
+        let content = obj.message?.content?;
+        let text: String = content
+            .iter()
+            .filter(|p| p.r#type.as_deref() == Some("text"))
+            .filter_map(|p| p.text.as_deref())
+            .collect::<Vec<_>>()
+            .join("");
+
+        if text.is_empty() {
+            return None;
+        }
+
+        // Deduplication: CLI sends accumulated text, we need only the delta
+        if text == self.accumulated {
+            return None;
+        }
+
+        if text.starts_with(&self.accumulated) && !self.accumulated.is_empty() {
+            let delta = text[self.accumulated.len()..].to_string();
+            self.accumulated = text;
+            if delta.is_empty() {
+                return None;
+            }
+            return Some(StreamParseResult::Delta(delta));
+        }
+
+        // New text that doesn't extend accumulated — just emit it
+        self.accumulated.push_str(&text);
+        Some(StreamParseResult::Delta(text))
+    }
+}
+
+enum StreamParseResult {
+    Delta(String),
+    Done,
+}
+
+// ============================================================
+// HTTP Handler
+// ============================================================
+
+fn error_response(status: StatusCode, message: String) -> Response<Body> {
+    let body = serde_json::to_string(&CliErrorResponse {
+        error: CliErrorInner {
+            message,
+            r#type: "cli_backend_error",
+            code: None,
+        },
+    })
+    .unwrap_or_else(|_| r#"{"error":{"message":"internal error","type":"cli_backend_error"}}"#.to_string());
+
+    Response::builder()
+        .status(status)
+        .header(CONTENT_TYPE, "application/json")
+        .body(Body::from(body))
+        .unwrap()
+}
+
+/// 主处理函数: `/cli/chat/completions`
+pub async fn handle_cli_chat_completions(
+    GenericJson(request): GenericJson<CliChatRequest>,
+) -> Response<Body> {
+    if !is_enabled() {
+        return error_response(
+            StatusCode::SERVICE_UNAVAILABLE,
+            "CLI backend is not enabled. Set CLI_BACKEND_ENABLED=true".to_string(),
+        );
+    }
+
+    let model = request.model.as_deref().unwrap_or("auto");
+    let prompt = build_prompt(&request.messages);
+
+    if request.stream {
+        handle_stream(model, &prompt).await
+    } else {
+        handle_sync(model, &prompt).await
+    }
+}
+
+async fn handle_sync(model: &str, prompt: &str) -> Response<Body> {
+    match run_cli_sync(model, prompt).await {
+        Ok(content) => {
+            let id = format!(
+                "chatcmpl-cli-{}",
+                uuid::Uuid::new_v4().as_simple()
+            );
+            let response = CliChatResponse {
+                id,
+                object: "chat.completion",
+                created: crate::common::utils::now_secs(),
+                model: model.to_string(),
+                choices: vec![CliChoice {
+                    index: 0,
+                    message: CliResponseMessage {
+                        role: "assistant",
+                        content,
+                    },
+                    finish_reason: "stop",
+                }],
+                usage: CliUsage {
+                    prompt_tokens: 0,
+                    completion_tokens: 0,
+                    total_tokens: 0,
+                },
+            };
+
+            let body = serde_json::to_string(&response).unwrap();
+            Response::builder()
+                .status(StatusCode::OK)
+                .header(CONTENT_TYPE, "application/json")
+                .body(Body::from(body))
+                .unwrap()
+        }
+        Err(e) => error_response(StatusCode::INTERNAL_SERVER_ERROR, e),
+    }
+}
+
+async fn handle_stream(model: &str, prompt: &str) -> Response<Body> {
+    let cfg = config();
+    let mut child = match Command::new(cfg.agent_bin.as_ref())
+        .arg("--print")
+        .arg("--mode")
+        .arg("ask")
+        .arg("--model")
+        .arg(model)
+        .arg("--workspace")
+        .arg(cfg.workspace.as_ref())
+        .arg("--trust")
+        .arg("--stream-partial-output")
+        .arg("--output-format")
+        .arg("stream-json")
+        .arg(prompt)
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+    {
+        Ok(c) => c,
+        Err(e) => {
+            return error_response(
+                StatusCode::INTERNAL_SERVER_ERROR,
+                format!("Failed to start agent CLI: {e}"),
+            );
+        }
+    };
+
+    let stdout = match child.stdout.take() {
+        Some(s) => s,
+        None => {
+            return error_response(
+                StatusCode::INTERNAL_SERVER_ERROR,
+                "Failed to capture agent stdout".to_string(),
+            );
+        }
+    };
+
+    let id = format!(
+        "chatcmpl-cli-{}",
+        uuid::Uuid::new_v4().as_simple()
+    );
+    let created = crate::common::utils::now_secs();
+    let model_owned = model.to_string();
+
+    // Use a channel to bridge the CLI stdout reader to the HTTP response stream
+    let (tx, rx) = tokio::sync::mpsc::channel::<Result<Bytes, core::convert::Infallible>>(32);
+
+    // Spawn blocking thread to read CLI stdout line by line and send SSE chunks via channel
+    tokio::task::spawn_blocking(move || {
+        let reader = BufReader::new(stdout);
+        let mut parser = StreamParser::new();
+
+        // Send initial role chunk
+        let initial = CliStreamChunk {
+            id: id.clone(),
+            object: "chat.completion.chunk",
+            created,
+            model: model_owned.clone(),
+            choices: vec![CliStreamChoice {
+                index: 0,
+                delta: CliStreamDelta {
+                    role: Some("assistant"),
+                    content: None,
+                },
+                finish_reason: None,
+            }],
+        };
+        let _ = tx.blocking_send(Ok(Bytes::from(format!(
+            "data: {}\n\n",
+            __unwrap!(serde_json::to_string(&initial))
+        ))));
+
+        for line in reader.lines() {
+            let line = match line {
+                Ok(l) => l,
+                Err(_) => break,
+            };
+            let trimmed = line.trim();
+            if trimmed.is_empty() {
+                continue;
+            }
+
+            match parser.parse_line(trimmed) {
+                Some(StreamParseResult::Delta(text)) => {
+                    let chunk = CliStreamChunk {
+                        id: id.clone(),
+                        object: "chat.completion.chunk",
+                        created,
+                        model: model_owned.clone(),
+                        choices: vec![CliStreamChoice {
+                            index: 0,
+                            delta: CliStreamDelta {
+                                role: None,
+                                content: Some(text),
+                            },
+                            finish_reason: None,
+                        }],
+                    };
+                    if tx
+                        .blocking_send(Ok(Bytes::from(format!(
+                            "data: {}\n\n",
+                            __unwrap!(serde_json::to_string(&chunk))
+                        ))))
+                        .is_err()
+                    {
+                        break; // Client disconnected
+                    }
+                }
+                Some(StreamParseResult::Done) => {
+                    break;
+                }
+                None => {}
+            }
+        }
+
+        // Send finish chunk
+        let finish = CliStreamChunk {
+            id: id.clone(),
+            object: "chat.completion.chunk",
+            created,
+            model: model_owned,
+            choices: vec![CliStreamChoice {
+                index: 0,
+                delta: CliStreamDelta {
+                    role: None,
+                    content: None,
+                },
+                finish_reason: Some("stop"),
+            }],
+        };
+        let _ = tx.blocking_send(Ok(Bytes::from(format!(
+            "data: {}\n\n",
+            __unwrap!(serde_json::to_string(&finish))
+        ))));
+        let _ = tx.blocking_send(Ok(Bytes::from("data: [DONE]\n\n")));
+
+        // Wait for child to finish
+        let _ = child.wait();
+    });
+
+    let stream = tokio_stream::wrappers::ReceiverStream::new(rx);
+
+    Response::builder()
+        .status(StatusCode::OK)
+        .header(CONTENT_TYPE, EVENT_STREAM)
+        .header(CACHE_CONTROL, NO_CACHE_REVALIDATE)
+        .header(CONNECTION, KEEP_ALIVE)
+        .header(TRANSFER_ENCODING, CHUNKED)
+        .body(Body::from_stream(stream))
+        .unwrap()
+}

From 54ba7868375ff9ed045b8db540f5c369bdaf11e4 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 23 Mar 2026 20:37:52 +0000
Subject: [PATCH 3/5] fix: properly handle thinking models in CLI backend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rewrite StreamParser to handle CLI stream-json format correctly:
  - Parse 'thinking' delta events → reasoning_content field
  - Parse 'assistant' delta events → content field
  - Skip final accumulated assistant message (dedup)
  - Capture usage info from 'result' event

- Add reasoning_content to OpenAI-compatible response format:
  - Sync mode: message.reasoning_content contains full thinking
  - Stream mode: delta.reasoning_content streams thinking chunks
  - Compatible with OpenAI o1/o3 reasoning_content format

- Fix sync mode to use stream-json internally to capture thinking
- Add proper usage token counts from CLI output

Tested with claude-4.5-sonnet-thinking:
- Sync: reasoning_content + content both populated correctly
- Stream: 44 thinking chunks + 24 content chunks, no truncation

Co-authored-by: hzsw1234 <hzsw1234@users.noreply.github.com>
---
 src/core/service/cli_backend.rs | 290 ++++++++++++++++++++++----------
 1 file changed, 205 insertions(+), 85 deletions(-)

diff --git a/src/core/service/cli_backend.rs b/src/core/service/cli_backend.rs
index 7eb89ad..7953e45 100644
--- a/src/core/service/cli_backend.rs
+++ b/src/core/service/cli_backend.rs
@@ -138,6 +138,9 @@ struct CliChoice {
 struct CliResponseMessage {
     role: &'static str,
     content: String,
+    /// Thinking/reasoning content (OpenAI o1/o3 compatible format)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    reasoning_content: Option<String>,
 }
 
 #[derive(Serialize)]
@@ -169,6 +172,9 @@ struct CliStreamDelta {
     role: Option<&'static str>,
     #[serde(skip_serializing_if = "Option::is_none")]
     content: Option<String>,
+    /// Thinking/reasoning content (OpenAI o1/o3 compatible format)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    reasoning_content: Option<String>,
 }
 
 #[derive(Serialize)]
@@ -211,7 +217,13 @@ fn build_prompt(messages: &[CliMessage]) -> String {
 // CLI execution
 // ============================================================
 
-async fn run_cli_sync(model: &str, prompt: &str) -> Result<String, String> {
+struct CliSyncResult {
+    content: String,
+    thinking: Option<String>,
+    usage: CliUsageInfo,
+}
+
+async fn run_cli_sync(model: &str, prompt: &str) -> Result<CliSyncResult, String> {
     let cfg = config();
     let agent_bin = cfg.agent_bin.to_string();
     let workspace = cfg.workspace.to_string();
@@ -222,7 +234,8 @@ async fn run_cli_sync(model: &str, prompt: &str) -> Result<String, String> {
     let result = tokio::time::timeout(
         std::time::Duration::from_millis(timeout_ms),
         tokio::task::spawn_blocking(move || {
-            let output = Command::new(&agent_bin)
+            // Use stream-json format to capture thinking content
+            let child = Command::new(&agent_bin)
                 .arg("--print")
                 .arg("--mode")
                 .arg("ask")
@@ -231,23 +244,58 @@ async fn run_cli_sync(model: &str, prompt: &str) -> Result<String, String> {
                 .arg("--workspace")
                 .arg(&workspace)
                 .arg("--trust")
+                .arg("--stream-partial-output")
                 .arg("--output-format")
-                .arg("text")
+                .arg("stream-json")
                 .arg(&prompt)
                 .stdout(Stdio::piped())
                 .stderr(Stdio::piped())
-                .output()
+                .spawn()
                 .map_err(|e| format!("Failed to execute agent CLI: {e}"))?;
 
-            if !output.status.success() {
-                let stderr = String::from_utf8_lossy(&output.stderr);
-                return Err(format!(
-                    "agent CLI exited with code {}: {stderr}",
-                    output.status.code().unwrap_or(-1)
-                ));
+            let stdout = child.stdout.ok_or("Failed to capture stdout")?;
+            let reader = BufReader::new(stdout);
+            let mut parser = StreamParser::new();
+            let mut thinking_parts = Vec::new();
+            let mut content_parts = Vec::new();
+            let mut usage = CliUsageInfo::default();
+
+            for line in reader.lines() {
+                let line = match line {
+                    Ok(l) => l,
+                    Err(_) => break,
+                };
+                let trimmed = line.trim();
+                if trimmed.is_empty() {
+                    continue;
+                }
+
+                match parser.parse_line(trimmed) {
+                    Some(StreamParseResult::ThinkingDelta(text)) => {
+                        thinking_parts.push(text);
+                    }
+                    Some(StreamParseResult::ContentDelta(text)) => {
+                        content_parts.push(text);
+                    }
+                    Some(StreamParseResult::Done(u)) => {
+                        usage = u;
+                        break;
+                    }
+                    _ => {}
+                }
             }
 
-            Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
+            let thinking = if thinking_parts.is_empty() {
+                None
+            } else {
+                Some(thinking_parts.join(""))
+            };
+
+            Ok(CliSyncResult {
+                content: content_parts.join(""),
+                thinking,
+                usage,
+            })
         }),
     )
     .await
@@ -257,82 +305,118 @@ async fn run_cli_sync(model: &str, prompt: &str) -> Result<String, String> {
     result
 }
 
-/// CLI 流式输出解析器（去重逻辑）
+/// CLI 流式输出解析器
+///
+/// CLI stream-json 格式:
+/// - `{"type":"thinking","subtype":"delta","text":"..."}` — 思考增量(每条是独立delta)
+/// - `{"type":"thinking","subtype":"completed"}` — 思考完成
+/// - `{"type":"assistant","message":{"content":[{"type":"text","text":"..."}]}}` — 回答增量(每条独立delta)
+/// - 最后一条 assistant 是完整累积文本(需要跳过)
+/// - `{"type":"result","subtype":"success","usage":{...}}` — 结束
 struct StreamParser {
-    accumulated: String,
+    /// 已输出的 assistant 文本总长度(用于去重最后一条累积消息)
+    assistant_len: usize,
+    /// thinking 阶段是否完成
+    thinking_done: bool,
+}
+
+/// 解析 CLI stream-json 中的 usage 信息
+#[derive(Deserialize, Default)]
+#[serde(rename_all = "camelCase")]
+struct CliUsageInfo {
+    #[serde(default)]
+    input_tokens: u32,
+    #[serde(default)]
+    output_tokens: u32,
+    #[serde(default)]
+    cache_read_tokens: u32,
+    #[serde(default)]
+    cache_write_tokens: u32,
 }
 
 impl StreamParser {
     fn new() -> Self {
         Self {
-            accumulated: String::new(),
+            assistant_len: 0,
+            thinking_done: false,
         }
     }
 
-    /// 解析一行 CLI 输出，返回增量文本（如果有）
+    /// 解析一行 CLI 输出
     fn parse_line(&mut self, line: &str) -> Option<StreamParseResult> {
-        #[derive(Deserialize)]
-        struct CliStreamLine {
-            r#type: Option<String>,
-            subtype: Option<String>,
-            message: Option<CliStreamMessage>,
-        }
-        #[derive(Deserialize)]
-        struct CliStreamMessage {
-            content: Option<Vec<CliStreamContent>>,
-        }
-        #[derive(Deserialize)]
-        struct CliStreamContent {
-            r#type: Option<String>,
-            text: Option<String>,
-        }
-
-        let obj: CliStreamLine = serde_json::from_str(line).ok()?;
-
-        // Check for done signal
-        if obj.r#type.as_deref() == Some("result") && obj.subtype.as_deref() == Some("success") {
-            return Some(StreamParseResult::Done);
-        }
+        // 使用 serde_json::Value 灵活解析
+        let obj: serde_json::Value = serde_json::from_str(line).ok()?;
+        let line_type = obj.get("type")?.as_str()?;
+        let subtype = obj.get("subtype").and_then(|v| v.as_str());
+
+        match line_type {
+            // === Thinking delta ===
+            "thinking" if subtype == Some("delta") => {
+                let text = obj.get("text")?.as_str()?;
+                if text.is_empty() {
+                    return None;
+                }
+                Some(StreamParseResult::ThinkingDelta(text.to_string()))
+            }
 
-        if obj.r#type.as_deref() != Some("assistant") {
-            return None;
-        }
+            // === Thinking completed ===
+            "thinking" if subtype == Some("completed") => {
+                self.thinking_done = true;
+                Some(StreamParseResult::ThinkingDone)
+            }
 
-        let content = obj.message?.content?;
-        let text: String = content
-            .iter()
-            .filter(|p| p.r#type.as_deref() == Some("text"))
-            .filter_map(|p| p.text.as_deref())
-            .collect::<Vec<_>>()
-            .join("");
+            // === Assistant content delta ===
+            "assistant" => {
+                let content = obj.get("message")?.get("content")?.as_array()?;
+                let text: String = content
+                    .iter()
+                    .filter(|p| p.get("type").and_then(|t| t.as_str()) == Some("text"))
+                    .filter_map(|p| p.get("text").and_then(|t| t.as_str()))
+                    .collect::<Vec<_>>()
+                    .join("");
+
+                if text.is_empty() {
+                    return None;
+                }
 
-        if text.is_empty() {
-            return None;
-        }
+                // 最后一条 assistant 消息是完整累积文本，需要跳过
+                // 判断: 如果这条消息的长度 >= 之前所有 delta 的累积长度，它就是累积消息
+                if text.len() >= self.assistant_len && self.assistant_len > 0 {
+                    // 可能是累积消息。检查长度是否远大于单个 delta
+                    // CLI 的 delta 通常很短(几个词)，累积消息包含全部文本
+                    if text.len() > 100 && text.len() as f64 > self.assistant_len as f64 * 0.8 {
+                        // 跳过累积消息
+                        return None;
+                    }
+                }
 
-        // Deduplication: CLI sends accumulated text, we need only the delta
-        if text == self.accumulated {
-            return None;
-        }
+                self.assistant_len += text.len();
+                Some(StreamParseResult::ContentDelta(text))
+            }
 
-        if text.starts_with(&self.accumulated) && !self.accumulated.is_empty() {
-            let delta = text[self.accumulated.len()..].to_string();
-            self.accumulated = text;
-            if delta.is_empty() {
-                return None;
+            // === Result (done) ===
+            "result" if subtype == Some("success") => {
+                let usage = obj
+                    .get("usage")
+                    .and_then(|u| serde_json::from_value::<CliUsageInfo>(u.clone()).ok())
+                    .unwrap_or_default();
+                Some(StreamParseResult::Done(usage))
             }
-            return Some(StreamParseResult::Delta(delta));
-        }
 
-        // New text that doesn't extend accumulated — just emit it
-        self.accumulated.push_str(&text);
-        Some(StreamParseResult::Delta(text))
+            _ => None,
+        }
     }
 }
 
 enum StreamParseResult {
-    Delta(String),
-    Done,
+    /// Thinking content delta (from thinking models)
+    ThinkingDelta(String),
+    /// Thinking phase completed
+    ThinkingDone,
+    /// Assistant content delta
+    ContentDelta(String),
+    /// Stream completed with usage info
+    Done(CliUsageInfo),
 }
 
 // ============================================================
@@ -379,7 +463,7 @@ pub async fn handle_cli_chat_completions(
 
 async fn handle_sync(model: &str, prompt: &str) -> Response<Body> {
     match run_cli_sync(model, prompt).await {
-        Ok(content) => {
+        Ok(result) => {
             let id = format!(
                 "chatcmpl-cli-{}",
                 uuid::Uuid::new_v4().as_simple()
@@ -393,14 +477,15 @@ async fn handle_sync(model: &str, prompt: &str) -> Response<Body> {
                     index: 0,
                     message: CliResponseMessage {
                         role: "assistant",
-                        content,
+                        content: result.content,
+                        reasoning_content: result.thinking,
                     },
                     finish_reason: "stop",
                 }],
                 usage: CliUsage {
-                    prompt_tokens: 0,
-                    completion_tokens: 0,
-                    total_tokens: 0,
+                    prompt_tokens: result.usage.input_tokens,
+                    completion_tokens: result.usage.output_tokens,
+                    total_tokens: result.usage.input_tokens + result.usage.output_tokens,
                 },
             };
 
@@ -468,6 +553,16 @@ async fn handle_stream(model: &str, prompt: &str) -> Response<Body> {
         let reader = BufReader::new(stdout);
         let mut parser = StreamParser::new();
 
+        // Helper: send a chunk, return false if client disconnected
+        let send = |tx: &tokio::sync::mpsc::Sender<Result<Bytes, core::convert::Infallible>>,
+                    chunk: &CliStreamChunk| -> bool {
+            tx.blocking_send(Ok(Bytes::from(format!(
+                "data: {}\n\n",
+                __unwrap!(serde_json::to_string(chunk))
+            ))))
+            .is_ok()
+        };
+
         // Send initial role chunk
         let initial = CliStreamChunk {
             id: id.clone(),
@@ -479,14 +574,14 @@ async fn handle_stream(model: &str, prompt: &str) -> Response<Body> {
                 delta: CliStreamDelta {
                     role: Some("assistant"),
                     content: None,
+                    reasoning_content: None,
                 },
                 finish_reason: None,
             }],
         };
-        let _ = tx.blocking_send(Ok(Bytes::from(format!(
-            "data: {}\n\n",
-            __unwrap!(serde_json::to_string(&initial))
-        ))));
+        if !send(&tx, &initial) {
+            return;
+        }
 
         for line in reader.lines() {
             let line = match line {
@@ -499,7 +594,33 @@ async fn handle_stream(model: &str, prompt: &str) -> Response<Body> {
             }
 
             match parser.parse_line(trimmed) {
-                Some(StreamParseResult::Delta(text)) => {
+                // Thinking delta → reasoning_content field
+                Some(StreamParseResult::ThinkingDelta(text)) => {
+                    let chunk = CliStreamChunk {
+                        id: id.clone(),
+                        object: "chat.completion.chunk",
+                        created,
+                        model: model_owned.clone(),
+                        choices: vec![CliStreamChoice {
+                            index: 0,
+                            delta: CliStreamDelta {
+                                role: None,
+                                content: None,
+                                reasoning_content: Some(text),
+                            },
+                            finish_reason: None,
+                        }],
+                    };
+                    if !send(&tx, &chunk) {
+                        break;
+                    }
+                }
+
+                // Thinking done → no special chunk needed, just continue
+                Some(StreamParseResult::ThinkingDone) => {}
+
+                // Content delta → content field
+                Some(StreamParseResult::ContentDelta(text)) => {
                     let chunk = CliStreamChunk {
                         id: id.clone(),
                         object: "chat.completion.chunk",
@@ -510,23 +631,21 @@ async fn handle_stream(model: &str, prompt: &str) -> Response<Body> {
                             delta: CliStreamDelta {
                                 role: None,
                                 content: Some(text),
+                                reasoning_content: None,
                             },
                             finish_reason: None,
                         }],
                     };
-                    if tx
-                        .blocking_send(Ok(Bytes::from(format!(
-                            "data: {}\n\n",
-                            __unwrap!(serde_json::to_string(&chunk))
-                        ))))
-                        .is_err()
-                    {
-                        break; // Client disconnected
+                    if !send(&tx, &chunk) {
+                        break;
                     }
                 }
-                Some(StreamParseResult::Done) => {
+
+                // Done → send finish chunk
+                Some(StreamParseResult::Done(_usage)) => {
                     break;
                 }
+
                 None => {}
             }
         }
@@ -542,6 +661,7 @@ async fn handle_stream(model: &str, prompt: &str) -> Response<Body> {
                 delta: CliStreamDelta {
                     role: None,
                     content: None,
+                    reasoning_content: None,
                 },
                 finish_reason: Some("stop"),
             }],

From f308428d37ef277152ec871e75fd1e5b9852a4e5 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 24 Mar 2026 07:02:57 +0000
Subject: [PATCH 4/5] feat: capture file content from agent tool_calls in CLI
 backend

When the Cursor agent writes code to files via tool_calls (editToolCall),
the file content is now captured and included in the API response.

This fixes the issue where thinking models (opus-thinking, sonnet-thinking)
would write code to files instead of outputting to stdout, resulting in
empty or summary-only responses.

Changes:
- StreamParser now handles 'tool_call' events with editToolCall
- File content is emitted as code blocks in the response content
- Both sync and stream modes capture file content
- Added CLI_USE_ASK_MODE config (default false) - without ask mode,
  agent has full tool access and can write files
- build_agent_command helper for DRY CLI argument construction

Co-authored-by: hzsw1234 <hzsw1234@users.noreply.github.com>
---
 src/core/service/cli_backend.rs | 104 +++++++++++++++++++++++++-------
 1 file changed, 82 insertions(+), 22 deletions(-)

diff --git a/src/core/service/cli_backend.rs b/src/core/service/cli_backend.rs
index 7953e45..aa47bdc 100644
--- a/src/core/service/cli_backend.rs
+++ b/src/core/service/cli_backend.rs
@@ -38,6 +38,9 @@ pub struct CliConfig {
     pub agent_bin: Cow<'static, str>,
     pub timeout_ms: u64,
     pub workspace: Cow<'static, str>,
+    /// 是否使用 --mode ask（安全但可能导致 thinking 模型写文件而非输出文本）
+    /// 设为 false 时不传 --mode，agent 有完整 tool access
+    pub use_ask_mode: bool,
 }
 
 impl CliConfig {
@@ -48,6 +51,7 @@ impl CliConfig {
             agent_bin: parse_from_env("CLI_AGENT_BIN", "agent"),
             timeout_ms: parse_from_env("CLI_TIMEOUT_MS", 300_000u64),
             workspace: parse_from_env("CLI_WORKSPACE", "/tmp"),
+            use_ask_mode: parse_from_env("CLI_USE_ASK_MODE", false),
         }
     }
 }
@@ -223,11 +227,27 @@ struct CliSyncResult {
     usage: CliUsageInfo,
 }
 
+/// 构建 agent CLI 命令的公共参数
+fn build_agent_command(agent_bin: &str, model: &str, workspace: &str, use_ask_mode: bool) -> Command {
+    let mut cmd = Command::new(agent_bin);
+    cmd.arg("--print");
+    if use_ask_mode {
+        cmd.arg("--mode").arg("ask");
+    }
+    cmd.arg("--model")
+        .arg(model)
+        .arg("--workspace")
+        .arg(workspace)
+        .arg("--trust");
+    cmd
+}
+
 async fn run_cli_sync(model: &str, prompt: &str) -> Result<CliSyncResult, String> {
     let cfg = config();
     let agent_bin = cfg.agent_bin.to_string();
     let workspace = cfg.workspace.to_string();
     let timeout_ms = cfg.timeout_ms;
+    let use_ask_mode = cfg.use_ask_mode;
     let model = model.to_string();
     let prompt = prompt.to_string();
 
@@ -235,15 +255,7 @@ async fn run_cli_sync(model: &str, prompt: &str) -> Result<CliSyncResult, String
         std::time::Duration::from_millis(timeout_ms),
         tokio::task::spawn_blocking(move || {
             // Use stream-json format to capture thinking content
-            let child = Command::new(&agent_bin)
-                .arg("--print")
-                .arg("--mode")
-                .arg("ask")
-                .arg("--model")
-                .arg(&model)
-                .arg("--workspace")
-                .arg(&workspace)
-                .arg("--trust")
+            let child = build_agent_command(&agent_bin, &model, &workspace, use_ask_mode)
                 .arg("--stream-partial-output")
                 .arg("--output-format")
                 .arg("stream-json")
@@ -277,6 +289,10 @@ async fn run_cli_sync(model: &str, prompt: &str) -> Result<CliSyncResult, String
                     Some(StreamParseResult::ContentDelta(text)) => {
                         content_parts.push(text);
                     }
+                    Some(StreamParseResult::FileContent { path, content }) => {
+                        // Agent wrote code to a file — include it in the response
+                        content_parts.push(format!("\n```\n// File: {path}\n{content}\n```\n"));
+                    }
                     Some(StreamParseResult::Done(u)) => {
                         usage = u;
                         break;
@@ -365,6 +381,37 @@ impl StreamParser {
                 Some(StreamParseResult::ThinkingDone)
             }
 
+            // === Tool call with file content ===
+            // When agent writes code to a file, capture the content
+            "tool_call" => {
+                // Look for editToolCall.args.streamContent or editToolCall.args.content
+                let tool_call = obj.get("tool_call")?;
+                let edit = tool_call.get("editToolCall")?;
+                let args = edit.get("args")?;
+
+                // streamContent is the file content being written
+                let content = args
+                    .get("streamContent")
+                    .or_else(|| args.get("content"))
+                    .and_then(|v| v.as_str())?;
+
+                if content.is_empty() {
+                    return None;
+                }
+
+                // Only emit on "completed" to avoid duplicates (started + completed both have content)
+                if subtype == Some("completed") {
+                    let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
+                    self.assistant_len += content.len();
+                    Some(StreamParseResult::FileContent {
+                        path: path.to_string(),
+                        content: content.to_string(),
+                    })
+                } else {
+                    None
+                }
+            }
+
             // === Assistant content delta ===
             "assistant" => {
                 let content = obj.get("message")?.get("content")?.as_array()?;
@@ -380,12 +427,8 @@ impl StreamParser {
                 }
 
                 // 最后一条 assistant 消息是完整累积文本，需要跳过
-                // 判断: 如果这条消息的长度 >= 之前所有 delta 的累积长度，它就是累积消息
                 if text.len() >= self.assistant_len && self.assistant_len > 0 {
-                    // 可能是累积消息。检查长度是否远大于单个 delta
-                    // CLI 的 delta 通常很短(几个词)，累积消息包含全部文本
                     if text.len() > 100 && text.len() as f64 > self.assistant_len as f64 * 0.8 {
-                        // 跳过累积消息
                         return None;
                     }
                 }
@@ -415,6 +458,8 @@ enum StreamParseResult {
     ThinkingDone,
     /// Assistant content delta
     ContentDelta(String),
+    /// File content from tool_call (agent wrote code to a file)
+    FileContent { path: String, content: String },
     /// Stream completed with usage info
     Done(CliUsageInfo),
 }
@@ -502,15 +547,7 @@ async fn handle_sync(model: &str, prompt: &str) -> Response<Body> {
 
 async fn handle_stream(model: &str, prompt: &str) -> Response<Body> {
     let cfg = config();
-    let mut child = match Command::new(cfg.agent_bin.as_ref())
-        .arg("--print")
-        .arg("--mode")
-        .arg("ask")
-        .arg("--model")
-        .arg(model)
-        .arg("--workspace")
-        .arg(cfg.workspace.as_ref())
-        .arg("--trust")
+    let mut child = match build_agent_command(cfg.agent_bin.as_ref(), model, cfg.workspace.as_ref(), cfg.use_ask_mode)
         .arg("--stream-partial-output")
         .arg("--output-format")
         .arg("stream-json")
@@ -641,6 +678,29 @@ async fn handle_stream(model: &str, prompt: &str) -> Response<Body> {
                     }
                 }
 
+                // File content from tool_call → emit as content
+                Some(StreamParseResult::FileContent { path, content }) => {
+                    let text = format!("\n```\n// File: {path}\n{content}\n```\n");
+                    let chunk = CliStreamChunk {
+                        id: id.clone(),
+                        object: "chat.completion.chunk",
+                        created,
+                        model: model_owned.clone(),
+                        choices: vec![CliStreamChoice {
+                            index: 0,
+                            delta: CliStreamDelta {
+                                role: None,
+                                content: Some(text),
+                                reasoning_content: None,
+                            },
+                            finish_reason: None,
+                        }],
+                    };
+                    if !send(&tx, &chunk) {
+                        break;
+                    }
+                }
+
                 // Done → send finish chunk
                 Some(StreamParseResult::Done(_usage)) => {
                     break;

From 7c42ea604c0c1d5d04494299dea4c0a63396e7bf Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 24 Mar 2026 07:09:45 +0000
Subject: [PATCH 5/5] fix: correct assistant text deduplication in CLI stream
 parser
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CLI outputs deltas per segment (before/after each tool_call), followed by
an accumulated message containing all segment text. Previous logic failed
to detect mid-stream accumulated messages.

New approach:
- Track accumulated_text per segment
- If current text == accumulated_text → it's the accumulated msg, skip it
- Reset accumulated_text on tool_call completion
- Handles multi-segment conversations (pre-tool + post-tool text)

Verified: no more duplicated text in responses.

Co-authored-by: hzsw1234 <hzsw1234@users.noreply.github.com>
---
 src/core/service/cli_backend.rs | 38 +++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/src/core/service/cli_backend.rs b/src/core/service/cli_backend.rs
index aa47bdc..552575f 100644
--- a/src/core/service/cli_backend.rs
+++ b/src/core/service/cli_backend.rs
@@ -327,11 +327,11 @@ async fn run_cli_sync(model: &str, prompt: &str) -> Result<CliSyncResult, String
 /// - `{"type":"thinking","subtype":"delta","text":"..."}` — 思考增量(每条是独立delta)
 /// - `{"type":"thinking","subtype":"completed"}` — 思考完成
 /// - `{"type":"assistant","message":{"content":[{"type":"text","text":"..."}]}}` — 回答增量(每条独立delta)
-/// - 最后一条 assistant 是完整累积文本(需要跳过)
+/// - 每段 assistant 回答后会有一条累积文本消息(需要跳过)
 /// - `{"type":"result","subtype":"success","usage":{...}}` — 结束
 struct StreamParser {
-    /// 已输出的 assistant 文本总长度(用于去重最后一条累积消息)
-    assistant_len: usize,
+    /// 已输出的 assistant 文本(用于去重累积消息)
+    accumulated_text: String,
     /// thinking 阶段是否完成
     thinking_done: bool,
 }
@@ -353,7 +353,7 @@ struct CliUsageInfo {
 impl StreamParser {
     fn new() -> Self {
         Self {
-            assistant_len: 0,
+            accumulated_text: String::new(),
             thinking_done: false,
         }
     }
@@ -401,8 +401,9 @@ impl StreamParser {
 
                 // Only emit on "completed" to avoid duplicates (started + completed both have content)
                 if subtype == Some("completed") {
+                    // Reset accumulated text for the next assistant segment
+                    self.accumulated_text.clear();
                     let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
-                    self.assistant_len += content.len();
                     Some(StreamParseResult::FileContent {
                         path: path.to_string(),
                         content: content.to_string(),
@@ -413,6 +414,12 @@ impl StreamParser {
             }
 
             // === Assistant content delta ===
+            // CLI pattern per segment (before/after each tool_call):
+            //   delta (has timestamp_ms, short text) × N
+            //   accumulated message (same text concatenated, may or may not have timestamp_ms)
+            //
+            // We track segment_text to detect accumulated messages:
+            // if current text == segment_text, it's accumulated → skip.
             "assistant" => {
                 let content = obj.get("message")?.get("content")?.as_array()?;
                 let text: String = content
@@ -426,14 +433,23 @@ impl StreamParser {
                     return None;
                 }
 
-                // 最后一条 assistant 消息是完整累积文本，需要跳过
-                if text.len() >= self.assistant_len && self.assistant_len > 0 {
-                    if text.len() > 100 && text.len() as f64 > self.assistant_len as f64 * 0.8 {
-                        return None;
-                    }
+                // If this text equals what we've accumulated in this segment → accumulated msg, skip
+                if text == self.accumulated_text {
+                    // Reset for next segment (after tool_call)
+                    self.accumulated_text.clear();
+                    return None;
+                }
+
+                // If accumulated text is a prefix of this text, it's a growing accumulated → skip
+                if !self.accumulated_text.is_empty()
+                    && text.starts_with(&self.accumulated_text)
+                    && text.len() > self.accumulated_text.len()
+                {
+                    self.accumulated_text = text;
+                    return None;
                 }
 
-                self.assistant_len += text.len();
+                self.accumulated_text.push_str(&text);
                 Some(StreamParseResult::ContentDelta(text))
             }