diff --git a/AGENTS.md b/AGENTS.md index 6ee3826cb..2d43c64b1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -101,6 +101,8 @@ make format # 格式化代码 - 尽量使用较新的语法,避免使用旧版本的语法(版本兼容到 3.12+) - 更新 [roadmap.md](docs/develop-guides/roadmap.md) 文档记录本次修改,多个类似的功能更新已经补充在一起 - 开发完成后务必在 docker 中进行测试,可以读取 .env 获取管理员账户和密码 +- 不允许把代码写得稀碎:不要为简单线性逻辑拆出一堆细碎 helper;优先写成职责清晰、结构完整、可一眼读懂的实现。 +- 拆函数必须服务于明确的复用、隔离副作用或降低认知负担;如果拆分后调用链更绕、上下文更分散,就应合并回更直接的实现。 **其他**: diff --git a/CLAUDE.md b/CLAUDE.md index 8a0e4af0d..6ee3826cb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -7,15 +7,69 @@ Yuxi 是一个基于大模型的智能知识库与知识图谱智能体开发平 ## 开发准则 -Avoid over-engineering. Only make changes that are directly requested or clearly necessary. Keep solutions simple and focused. +Behavioral guidelines to reduce common LLM coding mistakes. Merge with project-specific instructions as needed. -Don't add features, refactor code, or make "improvements" beyond what was asked. A bug fix doesn't need surrounding code cleaned up. A simple feature doesn't need extra configurability. +**Tradeoff:** These guidelines bias toward caution over speed. For trivial tasks, use judgment. -Don't add error handling, fallbacks, or validation for scenarios that can't happen. Trust internal code and framework guarantees. Only validate at system boundaries (user input, external APIs). Don't use backwards-compatibility shims when you can just change the code. +## 1. Think Before Coding -Don't create helpers, utilities, or abstractions for one-time operations. Don't design for hypothetical future requirements. The right amount of complexity is the minimum needed for the current task. Reuse existing abstractions where possible and follow the DRY principle. +**Don't assume. Don't hide confusion. Surface tradeoffs.** -To ensure readability, it is necessary to add essential comments at key points, particularly to explain the functionality of a function and the design intent. +Before implementing: +- State your assumptions explicitly. If uncertain, ask. +- If multiple interpretations exist, present them - don't pick silently. +- If a simpler approach exists, say so. Push back when warranted. +- If something is unclear, stop. Name what's confusing. Ask. + +## 2. Simplicity First + +**Minimum code that solves the problem. Nothing speculative.** + +- No features beyond what was asked. +- No abstractions for single-use code. +- No "flexibility" or "configurability" that wasn't requested. +- No error handling for impossible scenarios. +- If you write 200 lines and it could be 50, rewrite it. + +Ask yourself: "Would a senior engineer say this is overcomplicated?" If yes, simplify. + +## 3. Surgical Changes + +**Touch only what you must. Clean up only your own mess.** + +When editing existing code: +- Don't "improve" adjacent code, comments, or formatting. +- Don't refactor things that aren't broken. +- Match existing style, even if you'd do it differently. +- If you notice unrelated dead code, mention it - don't delete it. + +When your changes create orphans: +- Remove imports/variables/functions that YOUR changes made unused. +- Don't remove pre-existing dead code unless asked. + +The test: Every changed line should trace directly to the user's request. + +## 4. Goal-Driven Execution + +**Define success criteria. Loop until verified.** + +Transform tasks into verifiable goals: +- "Add validation" → "Write tests for invalid inputs, then make them pass" +- "Fix the bug" → "Write a test that reproduces it, then make it pass" +- "Refactor X" → "Ensure tests pass before and after" + +For multi-step tasks, state a brief plan: +``` +1. [Step] → verify: [check] +2. [Step] → verify: [check] +3. [Step] → verify: [check] +``` + +Strong success criteria let you loop independently. Weak criteria ("make it work") require constant clarification. + +--- + +**These guidelines are working if:** fewer unnecessary changes in diffs, fewer rewrites due to overcomplication, and clarifying questions come before implementation rather than after mistakes. ## 开发与调试工作流 (Development & Debugging Workflow) diff --git a/backend/package/yuxi/agents/backends/sandbox/__init__.py b/backend/package/yuxi/agents/backends/sandbox/__init__.py index d4417bfd4..4dc1498df 100644 --- a/backend/package/yuxi/agents/backends/sandbox/__init__.py +++ b/backend/package/yuxi/agents/backends/sandbox/__init__.py @@ -2,10 +2,12 @@ from .paths import ( VIRTUAL_PATH_PREFIX, ensure_thread_dirs, + ensure_workspace_default_files, resolve_virtual_path, sandbox_outputs_dir, sandbox_uploads_dir, sandbox_user_data_dir, + sandbox_workspace_agents_prompt_file, sandbox_workspace_dir, virtual_path_for_thread_file, ) @@ -51,6 +53,7 @@ "ProvisionerSandboxProvider", "VIRTUAL_PATH_PREFIX", "ensure_thread_dirs", + "ensure_workspace_default_files", "get_sandbox_provider", "init_sandbox_provider", "resolve_virtual_path", @@ -58,6 +61,7 @@ "sandbox_outputs_dir", "sandbox_uploads_dir", "sandbox_user_data_dir", + "sandbox_workspace_agents_prompt_file", "sandbox_workspace_dir", "shutdown_sandbox_provider", "virtual_path_for_thread_file", diff --git a/backend/package/yuxi/agents/backends/sandbox/paths.py b/backend/package/yuxi/agents/backends/sandbox/paths.py index b7dd23b97..598d7fde2 100644 --- a/backend/package/yuxi/agents/backends/sandbox/paths.py +++ b/backend/package/yuxi/agents/backends/sandbox/paths.py @@ -4,7 +4,15 @@ from pathlib import Path from yuxi import config as conf -from yuxi.utils.paths import OUTPUTS_DIR_NAME, UPLOADS_DIR_NAME, VIRTUAL_PATH_PREFIX, WORKSPACE_DIR_NAME +from yuxi.utils.logging_config import logger +from yuxi.utils.paths import ( + OUTPUTS_DIR_NAME, + UPLOADS_DIR_NAME, + VIRTUAL_PATH_PREFIX, + WORKSPACE_AGENTS_DIR_NAME, + WORKSPACE_AGENTS_PROMPT_FILE_NAME, + WORKSPACE_DIR_NAME, +) _SAFE_ID_RE = re.compile(r"^[A-Za-z0-9_-]+$") @@ -51,6 +59,33 @@ def sandbox_workspace_dir(thread_id: str, user_id: str) -> Path: return _global_user_data_dir(user_id) / WORKSPACE_DIR_NAME +def sandbox_workspace_agents_prompt_file(thread_id: str, user_id: str) -> Path: + return sandbox_workspace_dir(thread_id, user_id) / WORKSPACE_AGENTS_DIR_NAME / WORKSPACE_AGENTS_PROMPT_FILE_NAME + + +def ensure_workspace_default_files(workspace_dir: Path) -> None: + agents_dir = workspace_dir / WORKSPACE_AGENTS_DIR_NAME + agents_file = agents_dir / WORKSPACE_AGENTS_PROMPT_FILE_NAME + + try: + agents_dir.mkdir(parents=True, exist_ok=True) + except FileExistsError: + logger.warning("工作区默认 Agents 目录创建失败:路径已被文件占用") + return + except OSError as exc: + logger.warning(f"工作区默认 Agents 目录初始化失败: {exc}") + return + + try: + with agents_file.open("xb"): + pass + except FileExistsError: + if agents_file.is_dir(): + logger.warning("工作区默认 AGENTS.md 创建失败:路径已被目录占用") + except OSError as exc: + logger.warning(f"工作区默认 Agents 文件初始化失败: {exc}") + + def sandbox_uploads_dir(thread_id: str) -> Path: return _thread_root_dir(thread_id) / UPLOADS_DIR_NAME @@ -61,7 +96,9 @@ def sandbox_outputs_dir(thread_id: str) -> Path: def ensure_thread_dirs(thread_id: str, user_id: str) -> None: _global_user_data_dir(user_id).mkdir(parents=True, exist_ok=True) - sandbox_workspace_dir(thread_id, user_id).mkdir(parents=True, exist_ok=True) + workspace_dir = sandbox_workspace_dir(thread_id, user_id) + workspace_dir.mkdir(parents=True, exist_ok=True) + ensure_workspace_default_files(workspace_dir) sandbox_uploads_dir(thread_id).mkdir(parents=True, exist_ok=True) sandbox_outputs_dir(thread_id).mkdir(parents=True, exist_ok=True) diff --git a/backend/package/yuxi/services/chat_service.py b/backend/package/yuxi/services/chat_service.py index b9cb9defb..3430a3180 100644 --- a/backend/package/yuxi/services/chat_service.py +++ b/backend/package/yuxi/services/chat_service.py @@ -9,6 +9,7 @@ from langchain.messages import AIMessage, AIMessageChunk, HumanMessage from langgraph.types import Command from yuxi import config as conf +from yuxi.agents.backends.sandbox.paths import sandbox_workspace_agents_prompt_file from yuxi.agents.buildin import agent_manager from yuxi.agents.state import AgentStatePayload from yuxi.plugins.guard import content_guard @@ -30,6 +31,43 @@ normalize_questions as _normalize_interrupt_questions, ) +WORKSPACE_AGENTS_PROMPT_MAX_BYTES = 64 * 1024 + + +def _load_workspace_agents_prompt(thread_id: str, user_id: str) -> str: + prompt_file = sandbox_workspace_agents_prompt_file(thread_id, user_id) + try: + with prompt_file.open("rb") as buffer: + content = buffer.read(WORKSPACE_AGENTS_PROMPT_MAX_BYTES + 1) + except FileNotFoundError: + return "" + except IsADirectoryError: + logger.warning("读取工作区 AGENTS.md 失败: 路径是目录") + return "" + except OSError as exc: + logger.warning(f"读取工作区 AGENTS.md 失败: {exc}") + return "" + + prompt = content[:WORKSPACE_AGENTS_PROMPT_MAX_BYTES].decode("utf-8", errors="replace").strip() + if not prompt: + return "" + if len(content) > WORKSPACE_AGENTS_PROMPT_MAX_BYTES: + return f"{prompt}\n\n[AGENTS.md 内容已截断]" + return prompt + + +async def _build_agent_input_context(agent_config: dict, *, thread_id: str, user_id: str) -> dict: + input_context = dict(agent_config or {}) + agents_prompt = await asyncio.to_thread(_load_workspace_agents_prompt, thread_id, user_id) + + if agents_prompt: + agents_section = f"用户工作区 agents/AGENTS.md 内容:\n{agents_prompt}" + base_prompt = str(input_context.get("system_prompt") or "").rstrip() + input_context["system_prompt"] = f"{base_prompt}\n\n{agents_section}" if base_prompt else agents_section + + input_context.update({"user_id": user_id, "thread_id": thread_id}) + return input_context + def _build_state_files(attachments: list[dict]) -> dict: """将附件列表转换为 StateBackend 格式的 files 字典 @@ -560,7 +598,7 @@ async def agent_chat( thread_id = str(uuid.uuid4()) logger.warning(f"No thread_id provided, generated new thread_id: {thread_id}") - input_context = agent_config | {"user_id": user_id, "thread_id": thread_id} + input_context = await _build_agent_input_context(agent_config, thread_id=thread_id, user_id=user_id) langfuse_run = _build_langfuse_run_context( current_user=current_user, thread_id=thread_id, @@ -776,7 +814,7 @@ def make_chunk(content=None, **kwargs): thread_id = str(uuid.uuid4()) logger.warning(f"No thread_id provided, generated new thread_id: {thread_id}") - input_context = agent_config | {"user_id": user_id, "thread_id": thread_id} + input_context = await _build_agent_input_context(agent_config, thread_id=thread_id, user_id=user_id) langfuse_run = _build_langfuse_run_context( current_user=current_user, thread_id=thread_id, @@ -1011,8 +1049,7 @@ def make_resume_chunk(content=None, **kwargs): return context = agent.context_schema() - context.update(agent_config or {}) - context.update({"user_id": user_id, "thread_id": thread_id}) + context.update(await _build_agent_input_context(agent_config or {}, thread_id=thread_id, user_id=user_id)) graph = await agent.get_graph(context=context) langfuse_run = _build_langfuse_run_context( current_user=current_user, diff --git a/backend/package/yuxi/services/conversation_service.py b/backend/package/yuxi/services/conversation_service.py index e71dfa0fb..b24c16b96 100644 --- a/backend/package/yuxi/services/conversation_service.py +++ b/backend/package/yuxi/services/conversation_service.py @@ -2,7 +2,6 @@ from dataclasses import dataclass from pathlib import Path -import aiofiles from fastapi import HTTPException, UploadFile from sqlalchemy.ext.asyncio import AsyncSession from yuxi.agents.backends.sandbox import ( @@ -13,6 +12,7 @@ from yuxi.config import config as app_config from yuxi.plugins.parser import Parser from yuxi.repositories.conversation_repository import ConversationRepository +from yuxi.services.upload_utils import write_upload_to_path from yuxi.utils.datetime_utils import utc_isoformat from yuxi.utils.logging_config import logger from yuxi.utils.paths import VIRTUAL_PATH_UPLOADS @@ -41,21 +41,12 @@ def _ensure_workdir() -> Path: async def _write_upload_to_disk(upload: UploadFile, dest: Path) -> int: - await upload.seek(0) - written = 0 - chunk_size = 1024 * 1024 - - async with aiofiles.open(dest, "wb") as buffer: - while True: - chunk = await upload.read(chunk_size) - if not chunk: - break - written += len(chunk) - if written > MAX_ATTACHMENT_SIZE_BYTES: - raise ValueError("附件过大,当前仅支持 5 MB 以内的文件") - await buffer.write(chunk) - - return written + return await write_upload_to_path( + upload, + dest, + max_size_bytes=MAX_ATTACHMENT_SIZE_BYTES, + too_large_message="附件过大,当前仅支持 5 MB 以内的文件", + ) def _truncate_markdown(markdown: str) -> tuple[str, bool]: diff --git a/backend/package/yuxi/services/upload_utils.py b/backend/package/yuxi/services/upload_utils.py new file mode 100644 index 000000000..6a1b79026 --- /dev/null +++ b/backend/package/yuxi/services/upload_utils.py @@ -0,0 +1,43 @@ +from pathlib import Path + +import aiofiles +from fastapi import UploadFile + + +async def write_upload_to_buffer( + upload: UploadFile, + buffer, + *, + max_size_bytes: int, + too_large_message: str, + chunk_size: int = 1024 * 1024, +) -> int: + await upload.seek(0) + written = 0 + + while chunk := await upload.read(chunk_size): + written += len(chunk) + if written > max_size_bytes: + raise ValueError(too_large_message) + await buffer.write(chunk) + + return written + + +async def write_upload_to_path( + upload: UploadFile, + dest: Path, + *, + max_size_bytes: int, + too_large_message: str, + mode: str = "wb", + chunk_size: int = 1024 * 1024, +) -> int: + async with aiofiles.open(dest, mode) as buffer: + return await write_upload_to_buffer( + upload, + buffer, + max_size_bytes=max_size_bytes, + too_large_message=too_large_message, + chunk_size=chunk_size, + ) diff --git a/backend/package/yuxi/services/workspace_service.py b/backend/package/yuxi/services/workspace_service.py new file mode 100644 index 000000000..509a0d3de --- /dev/null +++ b/backend/package/yuxi/services/workspace_service.py @@ -0,0 +1,277 @@ +from __future__ import annotations + +import asyncio +import contextlib +import io +import mimetypes +import shutil +from pathlib import Path, PurePosixPath +from urllib.parse import quote + +import aiofiles +from fastapi import HTTPException, UploadFile +from fastapi.responses import FileResponse, StreamingResponse +from yuxi.agents.backends.sandbox.paths import _global_user_data_dir, ensure_workspace_default_files +from yuxi.services.upload_utils import write_upload_to_buffer +from yuxi.services.viewer_filesystem_service import _detect_preview_type +from yuxi.storage.postgres.models_business import User +from yuxi.utils.datetime_utils import utc_isoformat_from_timestamp +from yuxi.utils.paths import WORKSPACE_DIR_NAME + +EDITABLE_WORKSPACE_SUFFIXES = {".md", ".markdown", ".mdx", ".txt"} +MAX_WORKSPACE_UPLOAD_SIZE_BYTES = 100 * 1024 * 1024 + + +def _workspace_root(user: User) -> Path: + try: + user_data_root = _global_user_data_dir(str(user.id)).resolve() + root = user_data_root / WORKSPACE_DIR_NAME + except ValueError as exc: + raise HTTPException(status_code=403, detail="Access denied") from exc + if root.is_symlink(): + raise HTTPException(status_code=403, detail="Access denied") + root.mkdir(parents=True, exist_ok=True) + resolved_root = root.resolve() + try: + resolved_root.relative_to(user_data_root) + except ValueError as exc: + raise HTTPException(status_code=403, detail="Access denied") from exc + ensure_workspace_default_files(resolved_root) + return resolved_root + + +def _normalize_workspace_path(path: str | None) -> PurePosixPath: + raw_path = (path or "/").strip() or "/" + if not raw_path.startswith("/"): + raw_path = f"/{raw_path}" + normalized = PurePosixPath(raw_path) + if ".." in normalized.parts: + raise HTTPException(status_code=403, detail="Access denied") + return normalized + + +def _resolve_workspace_path(user: User, path: str | None) -> Path: + root = _workspace_root(user) + normalized = _normalize_workspace_path(path) + relative_parts = [part for part in normalized.parts if part not in {"/", ""}] + target = (root.joinpath(*relative_parts) if relative_parts else root).resolve() + try: + target.relative_to(root) + except ValueError as exc: + raise HTTPException(status_code=403, detail="Access denied") from exc + return target + + +def _entry_for_path(root: Path, path: Path) -> dict: + stat = path.stat() + is_dir = path.is_dir() + relative = path.relative_to(root).as_posix() + display_path = f"/{relative}" if relative else "/" + if is_dir and display_path != "/" and not display_path.endswith("/"): + display_path = f"{display_path}/" + return { + "path": display_path, + "name": path.name or "工作区", + "is_dir": is_dir, + "size": 0 if is_dir else stat.st_size, + "modified_at": utc_isoformat_from_timestamp(stat.st_mtime) or "", + } + + +def _sort_entries(entries: list[dict]) -> list[dict]: + return sorted(entries, key=lambda item: (not bool(item.get("is_dir")), str(item.get("name") or "").lower())) + + +def _validate_child_name(name: str, *, field_name: str) -> str: + clean_name = str(name or "").strip() + if not clean_name: + raise HTTPException(status_code=422, detail=f"{field_name} 不能为空") + if clean_name in {".", ".."} or "/" in clean_name or "\\" in clean_name: + raise HTTPException(status_code=422, detail=f"{field_name} 不能包含路径分隔符") + if PurePosixPath(clean_name).name != clean_name: + raise HTTPException(status_code=422, detail=f"{field_name} 不能包含路径分隔符") + return clean_name + + +def _resolve_parent_directory(user: User, parent_path: str) -> Path: + parent = _resolve_workspace_path(user, parent_path) + if not parent.exists(): + raise HTTPException(status_code=404, detail="目标目录不存在") + if not parent.is_dir(): + raise HTTPException(status_code=400, detail="目标路径不是目录") + return parent + + +def _resolve_new_child(root: Path, parent: Path, name: str) -> Path: + target = parent / name + try: + target.resolve(strict=False).relative_to(root) + except ValueError as exc: + raise HTTPException(status_code=403, detail="Access denied") from exc + if target.exists(): + raise HTTPException(status_code=400, detail="同名文件或文件夹已存在") + return target + + +def _list_directory(root: Path, target: Path) -> list[dict]: + entries = [_entry_for_path(root, child) for child in target.iterdir()] + return _sort_entries(entries) + + +async def list_workspace_tree(*, path: str, current_user: User) -> dict: + root = _workspace_root(current_user) + target = _resolve_workspace_path(current_user, path) + if not target.exists(): + return {"entries": []} + if not target.is_dir(): + raise HTTPException(status_code=400, detail="当前路径不是目录") + entries = await asyncio.to_thread(_list_directory, root, target) + return {"entries": entries} + + +async def read_workspace_file_content(*, path: str, current_user: User) -> dict: + target = _resolve_workspace_path(current_user, path) + if not target.exists(): + raise HTTPException(status_code=404, detail="文件不存在") + if not target.is_file(): + raise HTTPException(status_code=400, detail="当前路径是目录") + + raw_content = await asyncio.to_thread(target.read_bytes) + preview_type, supported, message = _detect_preview_type(path, raw_content) + if preview_type in {"image", "pdf"} or not supported: + return { + "content": None, + "preview_type": preview_type, + "supported": supported, + "message": message, + } + try: + content = raw_content.decode("utf-8") + except UnicodeDecodeError: + return { + "content": None, + "preview_type": "unsupported", + "supported": False, + "message": "当前文件不是 UTF-8 文本,暂不支持预览", + } + return { + "content": content, + "preview_type": preview_type, + "supported": supported, + "message": message, + } + + +async def write_workspace_file_content(*, path: str, content: str, current_user: User) -> dict: + root = _workspace_root(current_user) + target = _resolve_workspace_path(current_user, path) + if not target.exists(): + raise HTTPException(status_code=404, detail="文件不存在") + if not target.is_file(): + raise HTTPException(status_code=400, detail="当前路径是目录") + if target.suffix.lower() not in EDITABLE_WORKSPACE_SUFFIXES: + raise HTTPException(status_code=400, detail="当前文件类型不支持编辑") + + raw_content = await asyncio.to_thread(target.read_bytes) + preview_type, supported, _message = _detect_preview_type(path, raw_content) + if preview_type not in {"markdown", "text"} or not supported: + raise HTTPException(status_code=400, detail="当前文件类型不支持编辑") + try: + raw_content.decode("utf-8") + except UnicodeDecodeError as exc: + raise HTTPException(status_code=400, detail="当前文件不是 UTF-8 文本") from exc + + try: + await asyncio.to_thread(target.write_text, content, encoding="utf-8") + except PermissionError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + return { + "success": True, + "path": _normalize_workspace_path(path).as_posix(), + "entry": _entry_for_path(root, target), + } + + +async def delete_workspace_path(*, path: str, current_user: User) -> dict: + root = _workspace_root(current_user) + target = _resolve_workspace_path(current_user, path) + if target == root: + raise HTTPException(status_code=400, detail="工作区根目录不允许删除") + if not target.exists(): + raise HTTPException(status_code=404, detail="文件不存在") + + try: + if target.is_dir(): + await asyncio.to_thread(shutil.rmtree, target) + else: + await asyncio.to_thread(target.unlink) + except PermissionError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + return {"success": True, "path": _normalize_workspace_path(path).as_posix()} + + +async def create_workspace_directory(*, parent_path: str, name: str, current_user: User) -> dict: + root = _workspace_root(current_user) + directory_name = _validate_child_name(name, field_name="文件夹名") + parent = _resolve_parent_directory(current_user, parent_path) + target = _resolve_new_child(root, parent, directory_name) + + try: + await asyncio.to_thread(target.mkdir) + except FileExistsError as exc: + raise HTTPException(status_code=400, detail="同名文件或文件夹已存在") from exc + except PermissionError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + + return {"success": True, "entry": _entry_for_path(root, target)} + + +async def upload_workspace_file(*, parent_path: str, file: UploadFile, current_user: User) -> dict: + root = _workspace_root(current_user) + file_name = _validate_child_name(Path(file.filename or "").name, field_name="文件名") + parent = _resolve_parent_directory(current_user, parent_path) + target = _resolve_new_child(root, parent, file_name) + created_file = False + upload_completed = False + + try: + async with aiofiles.open(target, "xb") as buffer: + created_file = True + await write_upload_to_buffer( + file, + buffer, + max_size_bytes=MAX_WORKSPACE_UPLOAD_SIZE_BYTES, + too_large_message="文件过大,当前仅支持 100 MB 以内的文件", + ) + upload_completed = True + except FileExistsError as exc: + raise HTTPException(status_code=400, detail="同名文件或文件夹已存在") from exc + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + except PermissionError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + finally: + if created_file and not upload_completed and target.exists(): + with contextlib.suppress(OSError): + await asyncio.to_thread(target.unlink) + + return {"success": True, "entry": _entry_for_path(root, target)} + + +async def download_workspace_file(*, path: str, current_user: User) -> StreamingResponse | FileResponse: + target = _resolve_workspace_path(current_user, path) + if not target.exists(): + raise HTTPException(status_code=404, detail="文件不存在") + if not target.is_file(): + raise HTTPException(status_code=400, detail="当前路径是目录") + + file_name = target.name or "download" + media_type = mimetypes.guess_type(file_name)[0] or "application/octet-stream" + headers = {"Content-Disposition": f"attachment; filename*=UTF-8''{quote(file_name)}"} + if target.stat().st_size > 1024 * 1024 * 16: + return FileResponse(path=target, media_type=media_type, headers=headers) + + content = await asyncio.to_thread(target.read_bytes) + return StreamingResponse(io.BytesIO(content), media_type=media_type, headers=headers) diff --git a/backend/package/yuxi/utils/paths.py b/backend/package/yuxi/utils/paths.py index 8c91a0d9d..304b5514b 100644 --- a/backend/package/yuxi/utils/paths.py +++ b/backend/package/yuxi/utils/paths.py @@ -4,6 +4,8 @@ VIRTUAL_PATH_PREFIX = config.sandbox_virtual_path_prefix WORKSPACE_DIR_NAME = "workspace" +WORKSPACE_AGENTS_DIR_NAME = "agents" +WORKSPACE_AGENTS_PROMPT_FILE_NAME = "AGENTS.md" UPLOADS_DIR_NAME = "uploads" OUTPUTS_DIR_NAME = "outputs" VIRTUAL_SKILLS_PATH = "/home/gem/skills" @@ -16,6 +18,8 @@ __all__ = [ "VIRTUAL_PATH_PREFIX", "WORKSPACE_DIR_NAME", + "WORKSPACE_AGENTS_DIR_NAME", + "WORKSPACE_AGENTS_PROMPT_FILE_NAME", "UPLOADS_DIR_NAME", "OUTPUTS_DIR_NAME", "VIRTUAL_PATH_WORKSPACE", diff --git a/backend/server/routers/__init__.py b/backend/server/routers/__init__.py index 86112ec4f..0413eea3f 100644 --- a/backend/server/routers/__init__.py +++ b/backend/server/routers/__init__.py @@ -15,6 +15,7 @@ from server.routers.tool_router import tools from server.routers.apikey_router import apikey_router from server.routers.filesystem_router import filesystem_router +from server.routers.workspace_router import workspace _LITE_MODE = os.environ.get("LITE_MODE", "").lower() in ("true", "1") @@ -36,6 +37,7 @@ router.include_router(tools) # /api/system/tools/* 工具列表与配置 router.include_router(apikey_router) # /api/apikey/* API Key 管理 router.include_router(filesystem_router) # /api/viewer/filesystem/* 工作台文件系统视图 +router.include_router(workspace) # /api/workspace/* 用户个人工作区 if not _LITE_MODE: from server.routers.graph_router import graph diff --git a/backend/server/routers/workspace_router.py b/backend/server/routers/workspace_router.py new file mode 100644 index 000000000..671c08a42 --- /dev/null +++ b/backend/server/routers/workspace_router.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from fastapi import APIRouter, Depends, File, Form, Query, UploadFile +from pydantic import BaseModel + +from server.utils.auth_middleware import get_required_user +from yuxi.services.workspace_service import ( + create_workspace_directory, + delete_workspace_path, + download_workspace_file, + list_workspace_tree, + read_workspace_file_content, + upload_workspace_file, + write_workspace_file_content, +) +from yuxi.storage.postgres.models_business import User + +workspace = APIRouter(prefix="/workspace", tags=["workspace"]) + + +class CreateWorkspaceDirectoryRequest(BaseModel): + parent_path: str + name: str + + +class UpdateWorkspaceFileContentRequest(BaseModel): + path: str + content: str + + +@workspace.get("/tree", response_model=dict) +async def get_workspace_tree( + path: str = Query("/", description="工作区目录路径"), + current_user: User = Depends(get_required_user), +): + return await list_workspace_tree(path=path, current_user=current_user) + + +@workspace.get("/file", response_model=dict) +async def get_workspace_file( + path: str = Query(..., description="工作区文件路径"), + current_user: User = Depends(get_required_user), +): + return await read_workspace_file_content(path=path, current_user=current_user) + + +@workspace.put("/file", response_model=dict) +async def update_workspace_file( + payload: UpdateWorkspaceFileContentRequest, + current_user: User = Depends(get_required_user), +): + return await write_workspace_file_content( + path=payload.path, + content=payload.content, + current_user=current_user, + ) + + +@workspace.delete("/file", response_model=dict) +async def delete_workspace_file_route( + path: str = Query(..., description="工作区文件或目录路径"), + current_user: User = Depends(get_required_user), +): + return await delete_workspace_path(path=path, current_user=current_user) + + +@workspace.post("/directory", response_model=dict) +async def create_workspace_directory_route( + payload: CreateWorkspaceDirectoryRequest, + current_user: User = Depends(get_required_user), +): + return await create_workspace_directory( + parent_path=payload.parent_path, + name=payload.name, + current_user=current_user, + ) + + +@workspace.post("/upload", response_model=dict) +async def upload_workspace_file_route( + parent_path: str = Form(..., description="父目录路径"), + file: UploadFile = File(..., description="上传文件"), + current_user: User = Depends(get_required_user), +): + return await upload_workspace_file(parent_path=parent_path, file=file, current_user=current_user) + + +@workspace.get("/download") +async def download_workspace( + path: str = Query(..., description="工作区文件路径"), + current_user: User = Depends(get_required_user), +): + return await download_workspace_file(path=path, current_user=current_user) diff --git a/backend/test/unit/services/test_chat_service_sync.py b/backend/test/unit/services/test_chat_service_sync.py index d00ac9c93..16a147636 100644 --- a/backend/test/unit/services/test_chat_service_sync.py +++ b/backend/test/unit/services/test_chat_service_sync.py @@ -8,6 +8,21 @@ from yuxi.services import chat_service as svc +def _empty_agents_prompt(_thread_id: str, _user_id: str) -> str: + return "" + + +class _FakeAgentConfigRepo: + def __init__(self, _db): + pass + + async def get_by_id(self, config_id: int): + return SimpleNamespace(id=config_id) + + async def get_or_create_default(self, *, department_id: str, agent_id: str, created_by: str): + return SimpleNamespace(id=999, department_id=department_id, agent_id=agent_id, created_by=created_by) + + class _FakeConvRepo: def __init__(self, _db): self.saved_messages: list[dict] = [] @@ -112,10 +127,12 @@ def fake_get_trace_info(_run_context): monkeypatch.setattr(svc, "_build_langfuse_run_context", fake_build_langfuse_run_context) monkeypatch.setattr(svc, "get_trace_info", fake_get_trace_info) monkeypatch.setattr(svc, "flush_langfuse", lambda: calls.setdefault("flushed", True)) + monkeypatch.setattr(svc, "_load_workspace_agents_prompt", _empty_agents_prompt) monkeypatch.setattr(svc.agent_manager, "get_agent", lambda agent_id: FakeAgent()) monkeypatch.setattr(svc, "get_agent_config_by_id", fake_get_agent_config_by_id) monkeypatch.setattr(svc, "ConversationRepository", _FakeConvRepo) + monkeypatch.setattr(svc, "AgentConfigRepository", _FakeAgentConfigRepo) monkeypatch.setattr(svc, "save_messages_from_langgraph_state", fake_save_messages_from_langgraph_state) monkeypatch.setattr(svc.content_guard, "check", fake_guard_check) @@ -193,10 +210,12 @@ async def fake_guard_check(_content): ) monkeypatch.setattr(svc, "get_trace_info", lambda _run_context: {}) monkeypatch.setattr(svc, "flush_langfuse", lambda: None) + monkeypatch.setattr(svc, "_load_workspace_agents_prompt", _empty_agents_prompt) monkeypatch.setattr(svc.agent_manager, "get_agent", lambda agent_id: FakeAgent()) monkeypatch.setattr(svc, "get_agent_config_by_id", fake_get_agent_config_by_id) monkeypatch.setattr(svc, "ConversationRepository", _FakeConvRepo) + monkeypatch.setattr(svc, "AgentConfigRepository", _FakeAgentConfigRepo) monkeypatch.setattr(svc, "save_messages_from_langgraph_state", fake_save_messages_from_langgraph_state) monkeypatch.setattr(svc.content_guard, "check", fake_guard_check) @@ -214,3 +233,37 @@ async def fake_guard_check(_content): assert result["response"] == "Need input later" assert result["thread_id"] == "thread-2" assert result["request_id"] == "req-2" + + +@pytest.mark.asyncio +async def test_build_agent_input_context_merges_workspace_agents_prompt(monkeypatch: pytest.MonkeyPatch): + def fake_agents_prompt(_thread_id: str, _user_id: str) -> str: + return "回答前先读取 AGENTS.md" + + monkeypatch.setattr(svc, "_load_workspace_agents_prompt", fake_agents_prompt) + + context = await svc._build_agent_input_context( + {"system_prompt": "原始系统提示词", "temperature": 0.1}, + thread_id="thread-1", + user_id="user-1", + ) + + assert context["system_prompt"] == "原始系统提示词\n\n用户工作区 agents/AGENTS.md 内容:\n回答前先读取 AGENTS.md" + assert context["temperature"] == 0.1 + assert context["thread_id"] == "thread-1" + assert context["user_id"] == "user-1" + + +@pytest.mark.asyncio +async def test_build_agent_input_context_keeps_prompt_when_workspace_agents_prompt_empty( + monkeypatch: pytest.MonkeyPatch, +): + monkeypatch.setattr(svc, "_load_workspace_agents_prompt", _empty_agents_prompt) + + context = await svc._build_agent_input_context( + {"system_prompt": "原始系统提示词"}, + thread_id="thread-1", + user_id="user-1", + ) + + assert context["system_prompt"] == "原始系统提示词" diff --git a/backend/test/unit/services/test_workspace_service.py b/backend/test/unit/services/test_workspace_service.py new file mode 100644 index 000000000..55de5bfda --- /dev/null +++ b/backend/test/unit/services/test_workspace_service.py @@ -0,0 +1,183 @@ +from __future__ import annotations + +from io import BytesIO +from pathlib import Path +from types import SimpleNamespace + +import pytest +from fastapi import HTTPException, UploadFile + +from yuxi.agents.backends.sandbox import paths as workspace_paths +from yuxi.services import workspace_service as svc + + +def test_workspace_root_creates_default_agents_prompt_file(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + + root = svc._workspace_root(SimpleNamespace(id="user-1")) + + agents_file = root / "agents" / "AGENTS.md" + assert agents_file.is_file() + assert agents_file.read_text(encoding="utf-8") == "" + + +def test_ensure_thread_dirs_creates_default_agents_prompt_file(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + + workspace_paths.ensure_thread_dirs("thread-1", "user-1") + + agents_file = tmp_path / "threads" / "shared" / "user-1" / "workspace" / "agents" / "AGENTS.md" + assert agents_file.is_file() + assert agents_file.read_text(encoding="utf-8") == "" + + +def test_workspace_root_keeps_existing_agents_prompt_file(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + agents_dir = tmp_path / "threads" / "shared" / "user-1" / "workspace" / "agents" + agents_dir.mkdir(parents=True) + agents_file = agents_dir / "AGENTS.md" + agents_file.write_text("保留已有内容", encoding="utf-8") + + root = svc._workspace_root(SimpleNamespace(id="user-1")) + + assert root == tmp_path / "threads" / "shared" / "user-1" / "workspace" + assert agents_file.read_text(encoding="utf-8") == "保留已有内容" + + +def test_workspace_root_rejects_symlink_root(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + user_root = tmp_path / "threads" / "shared" / "user-1" + outside_root = tmp_path / "outside" + user_root.mkdir(parents=True) + outside_root.mkdir() + (user_root / "workspace").symlink_to(outside_root, target_is_directory=True) + + with pytest.raises(HTTPException) as exc_info: + svc._workspace_root(SimpleNamespace(id="user-1")) + + assert exc_info.value.status_code == 403 + + +@pytest.mark.asyncio +async def test_read_workspace_file_content_returns_unsupported_for_non_utf8_text( + tmp_path: Path, + monkeypatch, +) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + user = SimpleNamespace(id="user-1") + root = svc._workspace_root(user) + target = root / "bad.txt" + target.write_bytes(b"\xff\xfe\x00") + + result = await svc.read_workspace_file_content(path="/bad.txt", current_user=user) + + assert result["content"] is None + assert result["preview_type"] == "unsupported" + assert result["supported"] is False + + +@pytest.mark.asyncio +async def test_write_workspace_file_content_updates_markdown_file(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + user = SimpleNamespace(id="user-1") + root = svc._workspace_root(user) + target = root / "note.md" + target.write_text("旧内容", encoding="utf-8") + + result = await svc.write_workspace_file_content(path="/note.md", content="# 新内容", current_user=user) + + assert result["success"] is True + assert result["path"] == "/note.md" + assert result["entry"]["path"] == "/note.md" + assert target.read_text(encoding="utf-8") == "# 新内容" + + +@pytest.mark.asyncio +async def test_write_workspace_file_content_updates_txt_file(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + user = SimpleNamespace(id="user-1") + root = svc._workspace_root(user) + target = root / "note.txt" + target.write_text("old", encoding="utf-8") + + await svc.write_workspace_file_content(path="/note.txt", content="new", current_user=user) + + assert target.read_text(encoding="utf-8") == "new" + + +@pytest.mark.asyncio +async def test_write_workspace_file_content_rejects_unsupported_suffix(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + user = SimpleNamespace(id="user-1") + root = svc._workspace_root(user) + target = root / "script.py" + target.write_text("print('hello')", encoding="utf-8") + + with pytest.raises(HTTPException) as exc_info: + await svc.write_workspace_file_content(path="/script.py", content="print('bye')", current_user=user) + + assert exc_info.value.status_code == 400 + assert target.read_text(encoding="utf-8") == "print('hello')" + + +@pytest.mark.asyncio +async def test_write_workspace_file_content_rejects_directory_and_missing_file(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + user = SimpleNamespace(id="user-1") + svc._workspace_root(user) + + with pytest.raises(HTTPException) as directory_error: + await svc.write_workspace_file_content(path="/agents/", content="x", current_user=user) + with pytest.raises(HTTPException) as missing_error: + await svc.write_workspace_file_content(path="/missing.md", content="x", current_user=user) + + assert directory_error.value.status_code == 400 + assert missing_error.value.status_code == 404 + + +@pytest.mark.asyncio +async def test_write_workspace_file_content_blocks_path_traversal(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + + with pytest.raises(HTTPException) as exc_info: + await svc.write_workspace_file_content( + path="/../outside.md", + content="x", + current_user=SimpleNamespace(id="user-1"), + ) + + assert exc_info.value.status_code == 403 + + +@pytest.mark.asyncio +async def test_upload_workspace_file_writes_file(tmp_path: Path, monkeypatch) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + user = SimpleNamespace(id="user-1") + root = svc._workspace_root(user) + upload = UploadFile(filename="demo.txt", file=BytesIO(b"hello")) + + result = await svc.upload_workspace_file(parent_path="/", file=upload, current_user=user) + + assert result["success"] is True + assert result["entry"]["path"] == "/demo.txt" + assert result["entry"]["size"] == 5 + assert (root / "demo.txt").read_bytes() == b"hello" + + +@pytest.mark.asyncio +async def test_upload_workspace_file_rejects_oversized_file_and_cleans_partial_file( + tmp_path: Path, + monkeypatch, +) -> None: + monkeypatch.setattr(workspace_paths.conf, "save_dir", str(tmp_path)) + monkeypatch.setattr(svc, "MAX_WORKSPACE_UPLOAD_SIZE_BYTES", 5) + user = SimpleNamespace(id="user-1") + root = svc._workspace_root(user) + upload = UploadFile(filename="large.txt", file=BytesIO(b"123456")) + + with pytest.raises(HTTPException) as exc_info: + await svc.upload_workspace_file(parent_path="/", file=upload, current_user=user) + + assert exc_info.value.status_code == 400 + assert "100 MB" in exc_info.value.detail + assert not (root / "large.txt").exists() diff --git a/docs/agents/agents-config.md b/docs/agents/agents-config.md index f5d26bef5..d8d426344 100644 --- a/docs/agents/agents-config.md +++ b/docs/agents/agents-config.md @@ -183,6 +183,18 @@ Context 的价值不只在“配置页面”。它贯穿了从配置加载到实 也就是说,运行期 Context 的基础来源并不是前端临时状态,而是数据库中保存的 AgentConfig。 +此外,用户工作区会默认创建 `agents/AGENTS.md`。当 Agent 开始执行时,后端会读取当前用户工作区下的这个文件,并将其内容追加到 `system_prompt`,用于补充该用户对 Agent 的长期指令或工作区约定。该文件属于用户级共享工作区,内容会随 `user_id` 和当前 `thread_id` 映射到运行时工作区路径;文件不存在、为空或不可读时不会影响 Agent 启动,单次注入内容最多读取 64 KiB,超出部分会截断并追加提示。 + +合并后的提示词结构可以理解为: + +```text +AgentConfig.config_json.context.system_prompt + + 用户工作区 agents/AGENTS.md 内容 + + 运行期中间件继续追加的系统提示段 +``` + +因此,`agents/AGENTS.md` 适合放置用户维度的稳定约束,不适合放置一次性任务要求;一次性要求仍应直接写在当前对话中。 + ### 4.2 Context 实例化阶段 `BaseAgent` 在运行前会创建 `context_schema()` 实例,并通过 `update_from_dict()` 注入配置值。 diff --git a/docs/develop-guides/roadmap.md b/docs/develop-guides/roadmap.md index 063cb2494..2fe4e5915 100644 --- a/docs/develop-guides/roadmap.md +++ b/docs/develop-guides/roadmap.md @@ -18,6 +18,8 @@ - 完善 Skills 的环境变量注入 - 拓宽检索的知识源,统一多知识源(channel),目前已知知识库/知识图谱/网页,可拓展:个人知识库、数据库、历史对话等 - 前置任务,多知识库并行检索(扩展 query_kb) + - 新增 query_keywords 工具,专门用于基于关键词命中的排序,也结合词频(和 BM25 的区别?) +- 评估 ### Bugs - 目前的知识库的图片存在公开访问风险 @@ -35,6 +37,7 @@ ### 0.6.2 开发记录 +- 新增个人工作区预览与管理:提供独立于对话 thread 的用户级 workspace API,并增加“工作区”页面,用于浏览个人 workspace 文件、预览 Markdown/文本/代码/图片/PDF;支持新建文件夹、上传文件、下载文件、删除文件/文件夹和多选删除;工作区预览支持 Markdown/TXT 在右侧预览框内切换编辑并保存,其他格式和非工作区预览默认只读;知识库与团队空间入口先展示到占位层级;默认创建 `agents/AGENTS.md`,并在 Agent 执行时将其内容追加到系统提示词。 - 加固 JWT 鉴权安全:移除历史默认密钥回退,初始化脚本支持生成并持久化 `JWT_SECRET_KEY` 与 `YUXI_INSTANCE_ID`,签发和验证令牌时校验 `iss/aud`,并在鉴权阶段拒绝已删除或登录锁定用户继续使用旧令牌访问系统。 - 扩展管理界面交互逻辑重构:将 MCP / Subagents / Skills 三个标签页从「左侧边栏 + 右侧详情面板」布局重构为「卡片式网格布局 + 路由跳转二级页面」布局,工具标签页改为卡片网格布局 + 弹窗详情(保持弹窗内容不变)。新增共享组件 `ExtensionCard`、`ExtensionCardGrid`、`ExtensionToolbar`、`ExtensionDetailLayout`,详情页(`McpDetailView`、`SubagentDetailView`、`SkillDetailView`)使用居中宽度限制,路由规划为 `/extensions/mcp/:name`、`/extensions/subagent/:name`、`/extensions/skill/:slug`。 - 统一卡片样式:`ExtensionCard` 新增 `tags` prop 支持传入 `[{label, color}]` 数组,内部使用 `` 渲染,与知识库卡片标签风格统一;知识库列表页 `DataBaseView` 改用 `ExtensionCard` + `ExtensionCardGrid` 替代原有自定义卡片,移除冗余 card 样式。 diff --git a/web/src/apis/workspace_api.js b/web/src/apis/workspace_api.js new file mode 100644 index 000000000..a4cdbf03c --- /dev/null +++ b/web/src/apis/workspace_api.js @@ -0,0 +1,49 @@ +import { apiDelete, apiGet, apiPost, apiPut } from './base' + +const buildQuery = (params) => { + const query = new URLSearchParams() + Object.entries(params).forEach(([key, value]) => { + if (value !== undefined && value !== null && value !== '') { + query.set(key, String(value)) + } + }) + return query.toString() +} + +export const getWorkspaceTree = (path = '/') => { + const query = buildQuery({ path }) + return apiGet(`/api/workspace/tree?${query}`) +} + +export const getWorkspaceFileContent = (path) => { + const query = buildQuery({ path }) + return apiGet(`/api/workspace/file?${query}`) +} + +export const saveWorkspaceFileContent = (path, content) => { + return apiPut('/api/workspace/file', { path, content }) +} + +export const deleteWorkspacePath = (path) => { + const query = buildQuery({ path }) + return apiDelete(`/api/workspace/file?${query}`) +} + +export const createWorkspaceDirectory = (parentPath, name) => { + return apiPost('/api/workspace/directory', { + parent_path: parentPath, + name + }) +} + +export const uploadWorkspaceFile = (parentPath, file) => { + const formData = new FormData() + formData.append('parent_path', parentPath) + formData.append('file', file) + return apiPost('/api/workspace/upload', formData) +} + +export const downloadWorkspaceFile = (path) => { + const query = buildQuery({ path }) + return apiGet(`/api/workspace/download?${query}`, {}, true, 'blob') +} diff --git a/web/src/assets/css/main.css b/web/src/assets/css/main.css index df6d42554..16b51757c 100644 --- a/web/src/assets/css/main.css +++ b/web/src/assets/css/main.css @@ -39,7 +39,6 @@ body { .layout-container { width: 100%; - padding: 0 var(--page-padding); h2 { margin: 20px 0 10px 0; diff --git a/web/src/components/AgentFilePreview.vue b/web/src/components/AgentFilePreview.vue index 8eab4a69b..98d25aa5a 100644 --- a/web/src/components/AgentFilePreview.vue +++ b/web/src/components/AgentFilePreview.vue @@ -9,6 +9,42 @@ {{ filePath }}
-