From 608758cacdd708209a54bf97fdce7375ea232a87 Mon Sep 17 00:00:00 2001 From: zfoong Date: Thu, 7 May 2026 11:55:05 +0900 Subject: [PATCH 1/3] Open router initiate implementation --- agent_core/core/impl/llm/interface.py | 57 ++- agent_core/core/models/connection_tester.py | 75 +++ agent_core/core/models/factory.py | 2 +- agent_core/core/models/model_registry.py | 7 + agent_core/core/models/provider_config.py | 5 + app/config.py | 6 + app/config/settings.json | 19 +- app/config/skills_config.json | 4 +- app/data/agent_file_system_template/AGENT.md | 15 - app/tui/settings.py | 1 + app/ui_layer/adapters/browser_adapter.py | 51 ++ .../src/pages/Settings/ModelSettings.tsx | 95 +++- .../pages/Settings/OpenRouterModelPicker.tsx | 468 ++++++++++++++++++ .../pages/Settings/SettingsPage.module.css | 307 ++++++++++++ app/ui_layer/commands/builtin/provider.py | 7 +- app/ui_layer/settings/model_settings.py | 23 +- app/ui_layer/settings/openrouter_catalog.py | 225 +++++++++ 17 files changed, 1306 insertions(+), 61 deletions(-) create mode 100644 app/ui_layer/browser/frontend/src/pages/Settings/OpenRouterModelPicker.tsx create mode 100644 app/ui_layer/settings/openrouter_catalog.py diff --git a/agent_core/core/impl/llm/interface.py b/agent_core/core/impl/llm/interface.py index 913453ca..8b01bf83 100644 --- a/agent_core/core/impl/llm/interface.py +++ b/agent_core/core/impl/llm/interface.py @@ -367,7 +367,7 @@ def _generate_response_sync( logger.info(f"[LLM SEND] system={system_prompt} | user={user_prompt}") try: - if self.provider in ("openai", "minimax", "deepseek", "moonshot", "grok"): + if self.provider in ("openai", "minimax", "deepseek", "moonshot", "grok", "openrouter"): response = self._generate_openai(system_prompt, user_prompt) elif self.provider == "remote": response = self._generate_ollama(system_prompt, user_prompt) @@ -502,7 +502,7 @@ def create_session_cache( supports_caching = ( (self.provider == "byteplus" and self._byteplus_cache_manager) or (self.provider == "gemini" and self._gemini_cache_manager) or - (self.provider in ("openai", "deepseek", "grok") and self.client) or # OpenAI/DeepSeek/Grok use automatic caching with prompt_cache_key + (self.provider in ("openai", "deepseek", "grok", "openrouter") and self.client) or # OpenAI/DeepSeek/Grok/OpenRouter use automatic caching with prompt_cache_key (and cache_control for Anthropic-routed OpenRouter models) (self.provider == "anthropic" and self._anthropic_client) # Anthropic uses ephemeral caching with extended TTL ) @@ -605,7 +605,7 @@ def has_session_cache(self, task_id: str, call_type: str) -> bool: return True if self.provider == "gemini" and self._gemini_cache_manager: return True - if self.provider in ("openai", "deepseek", "grok") and self.client: + if self.provider in ("openai", "deepseek", "grok", "openrouter") and self.client: return True if self.provider == "anthropic" and self._anthropic_client: return True @@ -687,8 +687,8 @@ def _generate_response_with_session_sync( logger.info(f"[LLM RECV] {cleaned}") return cleaned - # Handle OpenAI/DeepSeek/Grok with call_type-based cache routing - if self.provider in ("openai", "deepseek", "grok"): + # Handle OpenAI/DeepSeek/Grok/OpenRouter with call_type-based cache routing + if self.provider in ("openai", "deepseek", "grok", "openrouter"): # Get stored system prompt or use provided one session_key = f"{task_id}:{call_type}" stored_system_prompt = self._session_system_prompts.get(session_key) @@ -1184,15 +1184,46 @@ def _generate_openai( # Always enforce JSON output format request_kwargs["response_format"] = {"type": "json_object"} - # Add prompt_cache_key for OpenAI/DeepSeek cache routing. - # Grok (xAI) does not support prompt_cache_key — it uses automatic - # prefix caching and ignores this parameter, so skip it for Grok. - if self.provider != "grok" and call_type and system_prompt and len(system_prompt) >= config.min_cache_tokens: + # Build provider-specific cache hints in extra_body. + # - prompt_cache_key (OpenAI/DeepSeek/OpenRouter): improves prefix-cache routing + # stickiness across alternating call types. Grok ignores it; we skip there + # to avoid noise. + # - cache_control (OpenRouter routing to Anthropic Claude only): Anthropic + # prompt caching is opt-in. OpenRouter accepts a top-level cache_control + # field and applies it to the last cacheable block automatically. For + # OpenAI/DeepSeek/Gemini upstreams via OpenRouter, caching is automatic + # on the upstream side, so cache_control would be ignored — we only set + # it when the slug is Anthropic-routed. + extra_body: Dict[str, Any] = {} + + long_enough = system_prompt and len(system_prompt) >= config.min_cache_tokens + + if self.provider != "grok" and call_type and long_enough: prompt_hash = hashlib.sha256(system_prompt.encode()).hexdigest()[:16] cache_key = f"{call_type}_{prompt_hash}" - request_kwargs["extra_body"] = {"prompt_cache_key": cache_key} + extra_body["prompt_cache_key"] = cache_key logger.debug(f"[OPENAI] Using prompt_cache_key: {cache_key}") + if self.provider == "openrouter" and long_enough: + model_lower_for_cache = (self.model or "").lower() + # OpenRouter slugs are "/". Anthropic Claude routes + # are the only ones requiring opt-in cache_control. Detect by either + # the slug prefix or the "claude" substring (some aliases like + # "anthropic/claude-3.5-sonnet:beta" still match). + if model_lower_for_cache.startswith("anthropic/") or "claude" in model_lower_for_cache: + cache_control: Dict[str, Any] = {"type": "ephemeral"} + if call_type: + # 1-hour TTL keeps caches alive across alternating call types + # (mirrors the Anthropic-direct path). + cache_control["ttl"] = "1h" + extra_body["cache_control"] = cache_control + logger.debug( + f"[OPENROUTER] Anthropic cache_control: {cache_control} (model={self.model})" + ) + + if extra_body: + request_kwargs["extra_body"] = extra_body + response = self.client.chat.completions.create(**request_kwargs) content = response.choices[0].message.content.strip() token_count_input = response.usage.prompt_tokens @@ -1235,9 +1266,11 @@ def _generate_openai( token_count_output, ) - # Report usage + # Report usage. service_type stays "llm_openai" (the request shape) but + # provider attributes to the actual upstream so dashboards split out + # OpenRouter / DeepSeek / Grok separately. self._report_usage_async( - "llm_openai", "openai", self.model, + "llm_openai", self.provider, self.model, token_count_input, token_count_output, cached_tokens ) diff --git a/agent_core/core/models/connection_tester.py b/agent_core/core/models/connection_tester.py index 77925b51..761727a2 100644 --- a/agent_core/core/models/connection_tester.py +++ b/agent_core/core/models/connection_tester.py @@ -54,6 +54,9 @@ def test_provider_connection( elif provider == "grok": url = cfg.default_base_url return _test_grok(api_key, url, timeout) + elif provider == "openrouter": + url = base_url or cfg.default_base_url + return _test_openrouter(api_key, url, timeout) elif provider in ("minimax", "deepseek", "moonshot"): url = cfg.default_base_url return _test_openai_compat(provider, api_key, url, timeout) @@ -399,6 +402,78 @@ def _test_openai_compat( return {"success": False, "message": "Network error", "provider": provider, "error": str(e)} +def _test_openrouter( + api_key: Optional[str], base_url: str, timeout: float +) -> Dict[str, Any]: + """Test OpenRouter API connection. + + Uses /api/v1/auth/key (auth-required) so we both validate the key and + surface the user's credit balance in the success message — that's the + information OpenRouter users care about most. + """ + if not api_key: + return { + "success": False, + "message": "API key is required for OpenRouter", + "provider": "openrouter", + "error": "Missing API key", + } + + try: + with httpx.Client(timeout=timeout) as client: + response = client.get( + f"{base_url.rstrip('/')}/auth/key", + headers={"Authorization": f"Bearer {api_key}"}, + ) + + if response.status_code == 200: + data = response.json().get("data", {}) or {} + limit = data.get("limit") + usage = data.get("usage") + label = data.get("label") or "OpenRouter key" + if limit is None: + msg = f"Connected to OpenRouter ({label}) — unlimited credits" + else: + remaining = max(0.0, float(limit) - float(usage or 0.0)) + msg = ( + f"Connected to OpenRouter ({label}) — " + f"${remaining:.2f} of ${float(limit):.2f} remaining" + ) + return { + "success": True, + "message": msg, + "provider": "openrouter", + } + elif response.status_code in (401, 403): + return { + "success": False, + "message": "Invalid API key", + "provider": "openrouter", + "error": "Authentication failed - check your OpenRouter API key", + } + else: + return { + "success": False, + "message": f"API returned status {response.status_code}", + "provider": "openrouter", + "error": response.text[:300] if response.text else "Unknown error", + } + except httpx.TimeoutException: + return { + "success": False, + "message": "Connection timed out", + "provider": "openrouter", + "error": "Request timed out - check your network connection", + } + except httpx.RequestError as e: + return { + "success": False, + "message": "Network error", + "provider": "openrouter", + "error": str(e), + } + + def _test_grok(api_key: Optional[str], base_url: str, timeout: float) -> Dict[str, Any]: """Test xAI Grok API connection using a minimal chat completion request. diff --git a/agent_core/core/models/factory.py b/agent_core/core/models/factory.py index 7c654c58..a36b4302 100644 --- a/agent_core/core/models/factory.py +++ b/agent_core/core/models/factory.py @@ -65,7 +65,7 @@ def create( Dictionary with provider context including client instances """ # OpenAI-compatible providers that use OpenAI client with a custom base_url - _OPENAI_COMPAT = {"minimax", "deepseek", "moonshot", "grok"} + _OPENAI_COMPAT = {"minimax", "deepseek", "moonshot", "grok", "openrouter"} if provider not in PROVIDER_CONFIG: raise ValueError(f"Unsupported provider: {provider}") diff --git a/agent_core/core/models/model_registry.py b/agent_core/core/models/model_registry.py index f63c365c..f3178bfd 100644 --- a/agent_core/core/models/model_registry.py +++ b/agent_core/core/models/model_registry.py @@ -49,4 +49,11 @@ InterfaceType.VLM: "grok-4-0709", InterfaceType.EMBEDDING: None, }, + "openrouter": { + # OpenRouter slugs follow `/` format. Default to a Claude + # model so KV caching exercises the cache_control path on first use. + InterfaceType.LLM: "anthropic/claude-sonnet-4.5", + InterfaceType.VLM: "anthropic/claude-sonnet-4.5", + InterfaceType.EMBEDDING: None, + }, } diff --git a/agent_core/core/models/provider_config.py b/agent_core/core/models/provider_config.py index c948ded1..2c8de6bd 100644 --- a/agent_core/core/models/provider_config.py +++ b/agent_core/core/models/provider_config.py @@ -41,4 +41,9 @@ class ProviderConfig: api_key_env="XAI_API_KEY", default_base_url="https://api.x.ai/v1", ), + "openrouter": ProviderConfig( + api_key_env="OPENROUTER_API_KEY", + base_url_env="OPENROUTER_BASE_URL", + default_base_url="https://openrouter.ai/api/v1", + ), } diff --git a/app/config.py b/app/config.py index e8290284..e28fbaa6 100644 --- a/app/config.py +++ b/app/config.py @@ -100,12 +100,14 @@ def _get_default_settings() -> Dict[str, Any]: "anthropic": "", "google": "", "byteplus": "", + "openrouter": "", }, "endpoints": { "remote_model_url": "", "byteplus_base_url": "https://ark.ap-southeast.bytepluses.com/api/v3", "google_api_base": "", "google_api_version": "", + "openrouter_base_url": "", }, "web_search": { "google_cse_id": "", @@ -221,6 +223,7 @@ def get_api_key(provider: str) -> str: "gemini": "google", "google": "google", "byteplus": "byteplus", + "openrouter": "openrouter", } settings_key = key_map.get(provider, provider) @@ -247,6 +250,9 @@ def get_base_url(provider: str) -> Optional[str]: return url if url else "http://localhost:11434" elif provider == "gemini" or provider == "google": return endpoints.get("google_api_base") or None + elif provider == "openrouter": + url = endpoints.get("openrouter_base_url", "") + return url if url else "https://openrouter.ai/api/v1" return None diff --git a/app/config/settings.json b/app/config/settings.json index 12c00359..9be5089a 100644 --- a/app/config/settings.json +++ b/app/config/settings.json @@ -1,5 +1,5 @@ { - "version": "1.3.0", + "version": "1.3.1", "general": { "agent_name": "CraftBot", "os_language": "en" @@ -14,10 +14,10 @@ "item_word_limit": 150 }, "model": { - "llm_provider": "byteplus", - "vlm_provider": "byteplus", - "llm_model": "kimi-k2-250905", - "vlm_model": "seed-1-6-250915", + "llm_provider": "anthropic", + "vlm_provider": "anthropic", + "llm_model": "claude-sonnet-4-5-20250929", + "vlm_model": "claude-sonnet-4-5-20250929", "slow_mode": true, "slow_mode_tpm_limit": 25000 }, @@ -25,14 +25,16 @@ "openai": "", "anthropic": "", "google": "", - "byteplus": "" + "byteplus": "", + "openrouter": "" }, "endpoints": { "remote_model_url": "", "byteplus_base_url": "https://ark.ap-southeast.bytepluses.com/api/v3", "google_api_base": "", "google_api_version": "", - "remote": "http://localhost:11434" + "remote": "http://localhost:11434", + "openrouter_base_url": "" }, "gui": { "enabled": true, @@ -76,6 +78,7 @@ "openai": false, "anthropic": false, "google": true, - "byteplus": true + "byteplus": true, + "openrouter": false } } \ No newline at end of file diff --git a/app/config/skills_config.json b/app/config/skills_config.json index 5f963ad8..203b0611 100644 --- a/app/config/skills_config.json +++ b/app/config/skills_config.json @@ -9,7 +9,9 @@ "xlsx", "living-ui-creator", "living-ui-manager", - "living-ui-modify" + "living-ui-modify", + "craftbot-skill-creator", + "craftbot-skill-improve" ], "disabled_skills": [ "cli-anything", diff --git a/app/data/agent_file_system_template/AGENT.md b/app/data/agent_file_system_template/AGENT.md index ea03f0da..5c0d6634 100644 --- a/app/data/agent_file_system_template/AGENT.md +++ b/app/data/agent_file_system_template/AGENT.md @@ -718,21 +718,6 @@ You're blocked when you don't know what to do next AND retrying won't help. The - **Asking open-ended "what should I do" questions.** Always one specific question with an implied default ("Use the bot token from settings.oauth.slack, or reuse the existing /slack login session?"). - **Self-detected logical loops.** The consecutive-failure breaker only catches LLM-call failures. If you keep choosing slightly different params for the same action and getting the same business-logic error (e.g., "user not found" three times with three different IDs you guessed), that is a logical loop. Stop and ask the user. -### Errors observed in past self-edits of this file (do not repeat) - -When you edit AGENT.md, USER.md, SOUL.md, FORMAT.md, or any other file an LLM (you, future-you) will consume, the consumer is a token-counting reader, not a human eye. These mistakes have happened before and are easy to repeat: - -- **Generating output formatted for human readers.** ASCII art (boxes with `┌─┐│└─┘`, arrow trees with `│ ▼`), decorative tables for prose content, em-dash flourishes, marketing intros ("This file is your instruction manual..."), and rhetorical paragraphs are noise to a token-counting reader. Use plain bulleted structure, code blocks for schemas/formats, and direct imperative rules. -- **Numbered cross-references like `§4`, `§15`, `§22`.** They rot the moment a section is reordered or inserted. Use topic-anchored references: `see ## Tasks`, `see ## Configs`. The header becomes the stable anchor. -- **Repeating the same warning across multiple sections.** Saying "DO NOT edit MEMORY.md" three times burns tokens without adding meaning. State it once where it belongs (`## File System` or `## Memory`) and reference it elsewhere. -- **Documenting features that do not exist in the codebase.** Verify by reading source before describing behavior. Examples of fabrication that occurred: an "org chart" section, action parameters that the action did not accept, slash subcommands that were never registered. -- **Decorative tables for non-tabular content.** Tables make sense for `key | value` mappings (config schemas, command lists). They make NO sense for "rules" or "guidelines" — those should be bullets the LLM can grep with `-A` context. -- **Confusing length with comprehensiveness.** A 1300-line file the agent cannot navigate is less useful than a 700-line file with stable anchors and tight sections. Comprehensive means "covers what's needed", not "long". -- **Confusing brevity with clarity.** Inverse of the above. A 10-line section that omits the actual mechanism (what triggers, what payloads, what error states, what the harness does for you) leaves the agent guessing. Detail every mechanism the agent will encounter. -- **Writing prose where rules would do.** "Use this when X is true, but consider Y..." is harder to act on than "if X: do A. else: do B." - -The pattern under all of these: the consumer of this file is an LLM. Optimize for grep, structure, and unambiguous rules. Optimize against token waste and decorative noise. - ### What the harness does NOT do for you - It does NOT change your approach when an action fails. You must. diff --git a/app/tui/settings.py b/app/tui/settings.py index dc45304c..47f63036 100644 --- a/app/tui/settings.py +++ b/app/tui/settings.py @@ -20,6 +20,7 @@ "anthropic": "anthropic", "deepseek": "deepseek", "grok": "grok", + "openrouter": "openrouter", } diff --git a/app/ui_layer/adapters/browser_adapter.py b/app/ui_layer/adapters/browser_adapter.py index ec66b683..a2dbc872 100644 --- a/app/ui_layer/adapters/browser_adapter.py +++ b/app/ui_layer/adapters/browser_adapter.py @@ -1467,6 +1467,18 @@ async def _handle_ws_message(self, data: Dict[str, Any], ws=None) -> None: base_url = data.get("baseUrl") await self._handle_ollama_models_get(base_url) + elif msg_type == "openrouter_models_get": + await self._handle_openrouter_models_get( + base_url=data.get("baseUrl"), + force_refresh=bool(data.get("forceRefresh", False)), + ) + + elif msg_type == "openrouter_credits_get": + await self._handle_openrouter_credits_get( + api_key=data.get("apiKey"), + base_url=data.get("baseUrl"), + ) + elif msg_type == "slow_mode_get": await self._handle_slow_mode_get() @@ -3721,6 +3733,45 @@ async def _handle_ollama_models_get(self, base_url: Optional[str] = None) -> Non "data": {"success": False, "models": [], "error": str(e)}, }) + async def _handle_openrouter_models_get( + self, + base_url: Optional[str] = None, + force_refresh: bool = False, + ) -> None: + """Fetch the OpenRouter model catalog and broadcast it. + + The catalog is public (no auth) and large (~300 entries). The helper + caches it in-process for 5 min; pass forceRefresh=True from the UI + to bypass the cache. + """ + try: + from app.ui_layer.settings.openrouter_catalog import fetch_models + result = await asyncio.to_thread( + fetch_models, base_url, force_refresh=force_refresh + ) + await self._broadcast({"type": "openrouter_models_get", "data": result}) + except Exception as e: + await self._broadcast({ + "type": "openrouter_models_get", + "data": {"success": False, "models": [], "error": str(e)}, + }) + + async def _handle_openrouter_credits_get( + self, + api_key: Optional[str] = None, + base_url: Optional[str] = None, + ) -> None: + """Fetch the OpenRouter account credit balance for the configured key.""" + try: + from app.ui_layer.settings.openrouter_catalog import fetch_credits + result = await asyncio.to_thread(fetch_credits, api_key, base_url) + await self._broadcast({"type": "openrouter_credits_get", "data": result}) + except Exception as e: + await self._broadcast({ + "type": "openrouter_credits_get", + "data": {"success": False, "error": str(e)}, + }) + # ───────────────────────────────────────────────────────────────────── # Slow Mode Handlers # ───────────────────────────────────────────────────────────────────── diff --git a/app/ui_layer/browser/frontend/src/pages/Settings/ModelSettings.tsx b/app/ui_layer/browser/frontend/src/pages/Settings/ModelSettings.tsx index 984015ec..c74e41c3 100644 --- a/app/ui_layer/browser/frontend/src/pages/Settings/ModelSettings.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Settings/ModelSettings.tsx @@ -9,6 +9,11 @@ import { useToast } from '../../contexts/ToastContext' import styles from './SettingsPage.module.css' import { useSettingsWebSocket } from './useSettingsWebSocket' import { getOllamaInstallPercent } from '../../utils/ollamaInstall' +import { + OpenRouterModelPicker, + OpenRouterCreditsBanner, + useOpenRouterCatalog, +} from './OpenRouterModelPicker' // Types interface ProviderInfo { @@ -20,6 +25,7 @@ interface ProviderInfo { llm_model: string | null vlm_model: string | null has_vlm: boolean + supports_catalog?: boolean } interface ApiKeyStatus { @@ -93,6 +99,16 @@ export function ModelSettings() { const [pullBytes, setPullBytes] = useState<{ completed: number; total: number; percent: number } | null>(null) const [pullStatus, setPullStatus] = useState('') + // OpenRouter catalog — fetched once on first OpenRouter selection, + // shared between the LLM and VLM pickers below. + const orCatalog = useOpenRouterCatalog( + send, + onMessage, + isConnected, + provider === 'openrouter', + baseUrls['openrouter'] || newBaseUrl || undefined, + ) + const fmtBytes = (n: number) => { if (n >= 1_073_741_824) return `${(n / 1_073_741_824).toFixed(1)} GB` if (n >= 1_048_576) return `${(n / 1_048_576).toFixed(0)} MB` @@ -420,28 +436,40 @@ export function ModelSettings() { {/* Model Configuration */} {currentProvider && ( <> -
- - {provider === 'remote' && ollamaModels.length > 0 ? ( - - ) : ( - { setNewLlmModel(e.target.value); setHasChanges(true) }} - placeholder={ - provider === 'remote' && ollamaModelsLoading - ? 'Loading models...' - : currentLlmModel || 'Enter LLM model name...' - } - /> - )} -
+ {provider === 'openrouter' && currentProvider.supports_catalog ? ( + { setNewLlmModel(v); setHasChanges(true) }} + /> + ) : ( +
+ + {provider === 'remote' && ollamaModels.length > 0 ? ( + + ) : ( + { setNewLlmModel(e.target.value); setHasChanges(true) }} + placeholder={ + provider === 'remote' && ollamaModelsLoading + ? 'Loading models...' + : currentLlmModel || 'Enter LLM model name...' + } + /> + )} +
+ )} {/* Download new Ollama model / Install Ollama */} {provider === 'remote' && ( @@ -605,6 +633,18 @@ export function ModelSettings() { )} {currentProvider.has_vlm && ( + provider === 'openrouter' && currentProvider.supports_catalog ? ( + { setNewVlmModel(v); setHasChanges(true) }} + /> + ) : (
{(() => { @@ -636,6 +676,7 @@ export function ModelSettings() { ) })()}
+ ) )} )} @@ -663,6 +704,16 @@ export function ModelSettings() { )} + {/* OpenRouter credits */} + {provider === 'openrouter' && currentProvider?.supports_catalog && ( + + )} + {/* Base URL */} {currentProvider?.base_url_env && (
diff --git a/app/ui_layer/browser/frontend/src/pages/Settings/OpenRouterModelPicker.tsx b/app/ui_layer/browser/frontend/src/pages/Settings/OpenRouterModelPicker.tsx new file mode 100644 index 00000000..b3b86df2 --- /dev/null +++ b/app/ui_layer/browser/frontend/src/pages/Settings/OpenRouterModelPicker.tsx @@ -0,0 +1,468 @@ +import { useEffect, useMemo, useRef, useState } from 'react' +import { Loader2, RefreshCw, Eye, Wrench, Database, ExternalLink } from 'lucide-react' +import styles from './SettingsPage.module.css' + +export interface OpenRouterModel { + id: string + canonical_slug: string | null + name: string + description: string + context_length: number | null + input_modalities: string[] + output_modalities: string[] + pricing: { + prompt: string | null + completion: string | null + image: string | null + input_cache_read: string | null + input_cache_write: string | null + } + supported_parameters: string[] + is_moderated: boolean | null +} + +export interface OpenRouterCredits { + balance: number | null + usage: number + limit: number | null + label: string | null + is_free_tier: boolean | null +} + +type WsSend = (type: string, data?: Record) => void +type WsOnMessage = (type: string, handler: (data: unknown) => void) => () => void + +// ───────────────────────────────────────────────────────────────────── +// Hooks: catalog and credits +// ───────────────────────────────────────────────────────────────────── + +export function useOpenRouterCatalog( + send: WsSend, + onMessage: WsOnMessage, + isConnected: boolean, + enabled: boolean, + baseUrl?: string, +) { + const [models, setModels] = useState([]) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + const [fetchedAt, setFetchedAt] = useState(null) + const requestedRef = useRef(false) + + useEffect(() => { + const cleanup = onMessage('openrouter_models_get', (data: unknown) => { + const d = data as { success: boolean; models?: OpenRouterModel[]; error?: string; fetched_at?: number } + setLoading(false) + if (d.success && d.models) { + setModels(d.models) + setError(null) + if (d.fetched_at) setFetchedAt(d.fetched_at) + } else { + setError(d.error || 'Failed to load models') + } + }) + return cleanup + }, [onMessage]) + + // Fetch once after we go enabled+connected. Re-fetch when baseUrl changes. + useEffect(() => { + if (!isConnected || !enabled || requestedRef.current) return + requestedRef.current = true + setLoading(true) + setError(null) + send('openrouter_models_get', baseUrl ? { baseUrl } : {}) + }, [isConnected, enabled, baseUrl, send]) + + const refresh = () => { + setLoading(true) + setError(null) + send('openrouter_models_get', { ...(baseUrl ? { baseUrl } : {}), forceRefresh: true }) + } + + return { models, loading, error, fetchedAt, refresh } +} + +export function useOpenRouterCredits( + send: WsSend, + onMessage: WsOnMessage, + isConnected: boolean, + hasApiKey: boolean, +) { + const [credits, setCredits] = useState(null) + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + + useEffect(() => { + const cleanup = onMessage('openrouter_credits_get', (data: unknown) => { + const d = data as { + success: boolean + balance?: number | null + usage?: number + limit?: number | null + label?: string | null + is_free_tier?: boolean | null + error?: string + } + setLoading(false) + if (d.success) { + setCredits({ + balance: d.balance ?? null, + usage: d.usage ?? 0, + limit: d.limit ?? null, + label: d.label ?? null, + is_free_tier: d.is_free_tier ?? null, + }) + setError(null) + } else { + setCredits(null) + setError(d.error || 'Failed to load credits') + } + }) + return cleanup + }, [onMessage]) + + useEffect(() => { + if (!isConnected || !hasApiKey) { + setCredits(null) + return + } + setLoading(true) + send('openrouter_credits_get') + }, [isConnected, hasApiKey, send]) + + return { credits, loading, error } +} + +// ───────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────── + +function formatPricePerMillion(p: string | null | undefined): string | null { + if (p == null) return null + const n = parseFloat(p) + if (isNaN(n)) return null + if (n === 0) return null + const perM = n * 1_000_000 + if (perM >= 100) return `$${perM.toFixed(0)}` + if (perM >= 1) return `$${perM.toFixed(2)}` + if (perM >= 0.01) return `$${perM.toFixed(3)}` + return `$${perM.toFixed(4)}` +} + +function isFreeModel(m: OpenRouterModel): boolean { + if (m.id.endsWith(':free')) return true + const p = parseFloat(m.pricing.prompt || '0') + const c = parseFloat(m.pricing.completion || '0') + return (isNaN(p) || p === 0) && (isNaN(c) || c === 0) +} + +function supportsVision(m: OpenRouterModel): boolean { + return m.input_modalities.includes('image') +} + +function supportsTools(m: OpenRouterModel): boolean { + return m.supported_parameters.includes('tools') || m.supported_parameters.includes('tool_choice') +} + +function supportsCache(m: OpenRouterModel): boolean { + return Boolean(m.pricing.input_cache_read) +} + +function upstreamOf(id: string): string { + const slash = id.indexOf('/') + return slash > 0 ? id.slice(0, slash) : id +} + +function formatContext(n: number | null | undefined): string { + if (n == null) return '' + if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(n % 1_000_000 === 0 ? 0 : 1)}M` + if (n >= 1_000) return `${Math.round(n / 1_000)}K` + return `${n}` +} + +// ───────────────────────────────────────────────────────────────────── +// Credits banner +// ───────────────────────────────────────────────────────────────────── + +interface CreditsBannerProps { + send: WsSend + onMessage: WsOnMessage + isConnected: boolean + hasApiKey: boolean +} + +export function OpenRouterCreditsBanner({ send, onMessage, isConnected, hasApiKey }: CreditsBannerProps) { + const { credits, loading, error } = useOpenRouterCredits(send, onMessage, isConnected, hasApiKey) + + if (!hasApiKey) { + return ( +
+ + Save an API key to see credit balance. + + + Get a key + +
+ ) + } + + if (loading) { + return ( +
+ + Loading credits… + +
+ ) + } + + if (error) { + return ( +
+ Credits: {error} +
+ ) + } + + if (!credits) return null + + const balanceText = credits.balance != null + ? `$${credits.balance.toFixed(2)} remaining` + : 'Pay-as-you-go (no preset limit)' + + return ( +
+ + Credits: {balanceText} + {credits.label ? · {credits.label} : null} + + + Top up + +
+ ) +} + +// ───────────────────────────────────────────────────────────────────── +// Model picker +// ───────────────────────────────────────────────────────────────────── + +interface PickerProps { + models: OpenRouterModel[] + loading: boolean + error: string | null + value: string + onChange: (v: string) => void + onRefresh: () => void + requireVision?: boolean + label: string +} + +export function OpenRouterModelPicker({ + models, + loading, + error, + value, + onChange, + onRefresh, + requireVision = false, + label, +}: PickerProps) { + const [search, setSearch] = useState('') + const [filterFree, setFilterFree] = useState(false) + const [filterVision, setFilterVision] = useState(requireVision) + const [filterTools, setFilterTools] = useState(false) + const [filterCache, setFilterCache] = useState(false) + const [upstream, setUpstream] = useState('') + + // VLM picker should keep the vision filter pinned on + useEffect(() => { + if (requireVision) setFilterVision(true) + }, [requireVision]) + + const upstreams = useMemo(() => { + const set = new Set() + for (const m of models) set.add(upstreamOf(m.id)) + return Array.from(set).sort() + }, [models]) + + const filtered = useMemo(() => { + const q = search.trim().toLowerCase() + return models.filter(m => { + if (filterFree && !isFreeModel(m)) return false + if (filterVision && !supportsVision(m)) return false + if (filterTools && !supportsTools(m)) return false + if (filterCache && !supportsCache(m)) return false + if (upstream && upstreamOf(m.id) !== upstream) return false + if (!q) return true + return ( + m.id.toLowerCase().includes(q) || + m.name.toLowerCase().includes(q) || + (m.description || '').toLowerCase().includes(q) + ) + }) + }, [models, search, filterFree, filterVision, filterTools, filterCache, upstream]) + + return ( +
+ + +
+
+ setSearch(e.target.value)} + /> + +
+ +
+ + + + + +
+ +
+ {loading && models.length === 0 && ( +
+ Loading catalog… +
+ )} + {error && ( +
+ {error} + +
+ )} + {!loading && !error && filtered.length === 0 && ( +
+ No models match your filters. +
+ )} + {filtered.map(m => { + const free = isFreeModel(m) + const promptPrice = formatPricePerMillion(m.pricing.prompt) + const completionPrice = formatPricePerMillion(m.pricing.completion) + const ctxStr = formatContext(m.context_length) + const selected = value === m.id + return ( + + ) + })} +
+ +
+ + onChange(e.target.value)} + placeholder="anthropic/claude-sonnet-4.5" + /> +
+
+
+ ) +} diff --git a/app/ui_layer/browser/frontend/src/pages/Settings/SettingsPage.module.css b/app/ui_layer/browser/frontend/src/pages/Settings/SettingsPage.module.css index 35ba379f..68e5c57c 100644 --- a/app/ui_layer/browser/frontend/src/pages/Settings/SettingsPage.module.css +++ b/app/ui_layer/browser/frontend/src/pages/Settings/SettingsPage.module.css @@ -2220,3 +2220,310 @@ margin: 0; background: transparent; } + +/* ───────────────────────────────────────────────────────────────────── + OpenRouter model picker + credits banner + Used only when provider === 'openrouter'. + ───────────────────────────────────────────────────────────────────── */ + +/* Credits row — label-style, not an input. Mirrors the muted-text look + used elsewhere in settings (see .hint at line 183). No background, no + border — just a row of text with a trailing link. */ +.orCreditsRow { + display: flex; + align-items: center; + justify-content: space-between; + gap: var(--space-3); + padding: 0 var(--space-1); + font-size: var(--text-xs); + color: var(--text-secondary); + margin: calc(-1 * var(--space-2)) 0 var(--space-1); +} + +.orCreditsLabel { + display: inline-flex; + align-items: center; + gap: 6px; +} + +.orCreditsLabel strong { + color: var(--text-primary); + font-weight: 500; +} + +.orCreditsKey { + color: var(--text-muted); +} + +.orCreditsLink { + display: inline-flex; + align-items: center; + gap: 4px; + color: var(--color-primary); + text-decoration: none; +} + +.orCreditsLink:hover { + text-decoration: underline; +} + +.orPicker { + border: 1px solid var(--border-primary); + border-radius: var(--radius-md); + background: var(--bg-secondary); + display: flex; + flex-direction: column; +} + +/* Header reuses the shared .searchInput control (see line 1543) so the + style is consistent with Skills/MCP/Integrations search bars. */ +.orPickerHeader { + display: flex; + align-items: center; + gap: var(--space-2); + padding: var(--space-2) var(--space-3); + border-bottom: 1px solid var(--border-primary); +} + +.orPickerHeader .searchInput { + flex: 1; +} + +.orPickerRefreshBtn { + background: transparent; + border: 1px solid var(--border-primary); + color: var(--text-secondary); + border-radius: var(--radius-sm); + width: 32px; + height: 32px; + display: inline-flex; + align-items: center; + justify-content: center; + cursor: pointer; +} + +.orPickerRefreshBtn:hover { + background: var(--bg-tertiary); +} + +.orPickerRefreshBtn:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.orPickerFilters { + display: flex; + flex-wrap: wrap; + align-items: center; + gap: 8px; + padding: 8px 12px; + border-bottom: 1px solid var(--border-primary); + font-size: 12px; + color: var(--text-secondary); +} + +/* Filter chips behave as toggle-buttons. Visual language mirrors the + shared .toggle switch (line 245): gray surface when off, primary + when on — so the on/off semantics match the rest of settings. */ +.orPickerChip { + display: inline-flex; + align-items: center; + padding: 4px 10px; + border-radius: var(--radius-full); + border: 1px solid var(--border-primary); + background: transparent; + color: var(--text-secondary); + font-size: var(--text-xs); + cursor: pointer; + user-select: none; + transition: background var(--transition-fast), color var(--transition-fast), border-color var(--transition-fast); +} + +.orPickerChip:hover:not(:disabled) { + background: var(--bg-tertiary); + color: var(--text-primary); +} + +.orPickerChip:disabled { + cursor: not-allowed; + opacity: 0.6; +} + +.orPickerChipActive { + background: var(--color-primary); + border-color: var(--color-primary); + color: var(--color-white); +} + +.orPickerChipActive:hover:not(:disabled) { + background: var(--color-primary); + color: var(--color-white); +} + +.orPickerUpstream { + margin-left: auto; + padding: 3px 6px; + border-radius: var(--radius-sm); + border: 1px solid var(--border-primary); + background: var(--bg-primary); + color: var(--text-primary); + font-size: 12px; +} + +.orPickerList { + max-height: 360px; + overflow-y: auto; + display: flex; + flex-direction: column; +} + +.orPickerStatus { + padding: 16px; + display: inline-flex; + align-items: center; + gap: 8px; + color: var(--text-secondary); + font-size: 13px; +} + +.orPickerError { + padding: 12px 14px; + color: var(--color-warning); + font-size: 13px; + display: flex; + align-items: center; + gap: 12px; +} + +.orPickerError button { + background: transparent; + color: var(--color-primary); + border: 1px solid var(--border-primary); + padding: 3px 10px; + border-radius: var(--radius-sm); + cursor: pointer; +} + +.orPickerRow { + display: flex; + align-items: center; + gap: 12px; + padding: 10px 12px; + border: none; + border-bottom: 1px solid var(--border-primary); + background: transparent; + text-align: left; + cursor: pointer; + color: var(--text-primary); + width: 100%; +} + +.orPickerRow:last-child { + border-bottom: none; +} + +.orPickerRow:hover { + background: var(--bg-tertiary); +} + +.orPickerRowSelected { + background: var(--color-primary-light); + border-left: 3px solid var(--color-primary); + padding-left: 9px; +} + +.orPickerRowLeft { + flex: 1; + min-width: 0; +} + +.orPickerRowName { + font-size: 13px; + font-weight: 500; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +.orPickerRowSlug { + font-size: 11px; + color: var(--text-tertiary); + font-family: var(--font-mono); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +.orPickerRowRight { + display: flex; + flex-direction: column; + align-items: flex-end; + gap: 4px; + flex-shrink: 0; + min-width: 160px; + text-align: right; +} + +.orPickerRowMeta { + display: inline-flex; + align-items: center; + gap: 6px; + color: var(--text-tertiary); + font-size: 11px; +} + +.orPickerCtx { + font-variant-numeric: tabular-nums; +} + +.orPickerCap { + display: inline-flex; + align-items: center; + color: var(--text-secondary); +} + +.orPickerRowPrice { + font-size: 11px; + color: var(--text-secondary); + font-variant-numeric: tabular-nums; +} + +.orPickerPriceUnit { + color: var(--text-tertiary); +} + +.orPickerFreeBadge { + background: var(--color-success-light); + color: var(--color-success); + padding: 2px 6px; + border-radius: 4px; + font-size: 10px; + font-weight: 600; + letter-spacing: 0.04em; +} + +.orPickerSlug { + display: flex; + align-items: center; + gap: 8px; + padding: 10px 12px; + border-top: 1px solid var(--border-primary); +} + +.orPickerSlug label { + font-size: 12px; + color: var(--text-tertiary); + margin: 0; + white-space: nowrap; +} + +.orPickerSlug input { + flex: 1; + padding: 6px 10px; + border-radius: var(--radius-sm); + border: 1px solid var(--border-primary); + background: var(--bg-primary); + color: var(--text-primary); + font-size: 12px; + font-family: var(--font-mono); +} diff --git a/app/ui_layer/commands/builtin/provider.py b/app/ui_layer/commands/builtin/provider.py index e9c1d9b7..4008b660 100644 --- a/app/ui_layer/commands/builtin/provider.py +++ b/app/ui_layer/commands/builtin/provider.py @@ -19,6 +19,7 @@ class ProviderCommand(Command): "byteplus": ("BYTEPLUS_API_KEY", "BytePlus"), "deepseek": ("DEEPSEEK_API_KEY", "DeepSeek"), "grok": ("XAI_API_KEY", "Grok (xAI)"), + "openrouter": ("OPENROUTER_API_KEY", "OpenRouter"), "remote": (None, "Ollama (Local)"), } @@ -48,12 +49,16 @@ def help_text(self) -> str: gemini - Google Gemini models anthropic - Anthropic Claude models byteplus - BytePlus Kimi models + deepseek - DeepSeek models + grok - Grok (xAI) models + openrouter - OpenRouter (300+ models, one key) remote - Ollama (local models) Examples: /provider /provider openai - /provider openai sk-xxx""" + /provider openai sk-xxx + /provider openrouter sk-or-v1-xxx""" async def execute( self, diff --git a/app/ui_layer/settings/model_settings.py b/app/ui_layer/settings/model_settings.py index 9e3b7c52..6554b4bb 100644 --- a/app/ui_layer/settings/model_settings.py +++ b/app/ui_layer/settings/model_settings.py @@ -75,6 +75,19 @@ "settings_key": "grok", "requires_api_key": True, }, + "openrouter": { + "name": "OpenRouter", + "api_key_env": "OPENROUTER_API_KEY", + # Intentionally no base_url_env — the OpenRouter endpoint is fixed for + # almost everyone, and exposing the field confused users into thinking + # they had to fill it in. Power users who need a custom gateway can + # still set endpoints.openrouter_base_url in settings.json by hand; + # the backend still reads it (see app/config.py get_base_url). + "settings_key": "openrouter", + "requires_api_key": True, + # Frontend opts in to a catalog-aware picker for this provider. + "supports_catalog": True, + }, "remote": { "name": "Local (Ollama)", "base_url_env": "REMOTE_MODEL_URL", @@ -162,6 +175,7 @@ def get_available_providers() -> Dict[str, Any]: "llm_model": llm_model, "vlm_model": vlm_model, "has_vlm": vlm_model is not None, + "supports_catalog": info.get("supports_catalog", False), }) return { @@ -229,6 +243,9 @@ def get_model_settings() -> Dict[str, Any]: if remote_url: base_urls["remote"] = remote_url + if endpoints_settings.get("openrouter_base_url"): + base_urls["openrouter"] = endpoints_settings["openrouter_base_url"] + return { "success": True, "llm_provider": llm_provider, @@ -323,6 +340,8 @@ def update_model_settings( settings["endpoints"]["byteplus_base_url"] = base_url elif provider_for_url == "remote": settings["endpoints"]["remote_model_url"] = base_url + elif provider_for_url == "openrouter": + settings["endpoints"]["openrouter_base_url"] = base_url # Clear remote URL when switching away from remote so stale values don't persist if llm_provider and llm_provider != "remote" and old_llm_provider == "remote" and not provider_for_url: @@ -378,11 +397,13 @@ def test_connection( api_key = api_keys_settings.get(settings_key) # If no base URL provided, try to get it from settings.json - if base_url is None and provider in ["byteplus", "remote"]: + if base_url is None and provider in ["byteplus", "remote", "openrouter"]: if provider == "byteplus": base_url = endpoints_settings.get("byteplus_base_url") elif provider == "remote": base_url = endpoints_settings.get("remote_model_url") + elif provider == "openrouter": + base_url = endpoints_settings.get("openrouter_base_url") # Run connection test result = test_provider_connection( diff --git a/app/ui_layer/settings/openrouter_catalog.py b/app/ui_layer/settings/openrouter_catalog.py new file mode 100644 index 00000000..18724a9f --- /dev/null +++ b/app/ui_layer/settings/openrouter_catalog.py @@ -0,0 +1,225 @@ +"""OpenRouter catalog + credits helpers for the settings UI. + +OpenRouter exposes a public model catalog at GET /api/v1/models (no auth) +and a credits endpoint at GET /api/v1/credits / GET /api/v1/auth/key +(auth required). The model picker renders the catalog and the balance. + +The catalog is cached in-process for 5 minutes — OpenRouter publishes new +models periodically, but ~300 entries is enough payload that we don't +want to re-fetch on every settings page open. +""" + +from __future__ import annotations + +import time +from typing import Any, Dict, List, Optional + +import httpx + +from app.config import get_api_key, get_base_url + + +_DEFAULT_BASE_URL = "https://openrouter.ai/api/v1" +_CATALOG_TTL_SECONDS = 300 # 5 min — matches OR's own cache windows +_CATALOG_TIMEOUT = 15.0 +_CREDITS_TIMEOUT = 10.0 + +# In-process cache: { base_url: (timestamp, models) } +_catalog_cache: Dict[str, tuple] = {} + + +def _resolve_base_url(base_url: Optional[str] = None) -> str: + if base_url: + return base_url + configured = get_base_url("openrouter") + return configured or _DEFAULT_BASE_URL + + +def _normalize_model(raw: Dict[str, Any]) -> Dict[str, Any]: + """Project the OpenRouter model record to the fields the UI needs. + + OpenRouter's payload is large per-model (descriptions, hugging-face links, + etc.). We only ship what the picker actually renders — keeps WS frames + small and makes the frontend types narrower. + """ + pricing = raw.get("pricing") or {} + architecture = raw.get("architecture") or {} + top_provider = raw.get("top_provider") or {} + return { + "id": raw.get("id"), + "canonical_slug": raw.get("canonical_slug"), + "name": raw.get("name") or raw.get("id"), + "description": (raw.get("description") or "")[:500], + "context_length": raw.get("context_length") or top_provider.get("context_length"), + "input_modalities": architecture.get("input_modalities") or [], + "output_modalities": architecture.get("output_modalities") or [], + "pricing": { + "prompt": pricing.get("prompt"), + "completion": pricing.get("completion"), + "image": pricing.get("image"), + "input_cache_read": pricing.get("input_cache_read"), + "input_cache_write": pricing.get("input_cache_write"), + }, + "supported_parameters": raw.get("supported_parameters") or [], + "is_moderated": top_provider.get("is_moderated"), + } + + +def fetch_models( + base_url: Optional[str] = None, + *, + force_refresh: bool = False, +) -> Dict[str, Any]: + """Return the OpenRouter model catalog (cached). + + Returns: + {"success": bool, "models": [...], "fetched_at": int, "error": str?} + """ + url = _resolve_base_url(base_url) + cache_key = url + + if not force_refresh: + entry = _catalog_cache.get(cache_key) + if entry is not None: + ts, models = entry + if (time.time() - ts) < _CATALOG_TTL_SECONDS: + return { + "success": True, + "models": models, + "fetched_at": int(ts), + "cached": True, + } + + try: + with httpx.Client(timeout=_CATALOG_TIMEOUT) as client: + response = client.get(f"{url.rstrip('/')}/models") + if response.status_code != 200: + return { + "success": False, + "models": [], + "error": f"OpenRouter /models returned status {response.status_code}", + } + raw_models = response.json().get("data") or [] + models = [_normalize_model(m) for m in raw_models if m.get("id")] + _catalog_cache[cache_key] = (time.time(), models) + return { + "success": True, + "models": models, + "fetched_at": int(time.time()), + "cached": False, + } + except httpx.TimeoutException: + return { + "success": False, + "models": [], + "error": "Timed out fetching OpenRouter model catalog", + } + except httpx.RequestError as exc: + return { + "success": False, + "models": [], + "error": f"Network error fetching OpenRouter models: {exc}", + } + except Exception as exc: # pragma: no cover — defensive + return { + "success": False, + "models": [], + "error": f"Unexpected error fetching OpenRouter models: {exc}", + } + + +def fetch_credits( + api_key: Optional[str] = None, + base_url: Optional[str] = None, +) -> Dict[str, Any]: + """Return account credit info for the configured OpenRouter key. + + Hits /api/v1/credits (preferred — newer endpoint with `total_credits` / + `total_usage`). Falls back to /api/v1/auth/key on 404 since older keys / + routes still expose the legacy shape. + + Returns: + {"success": bool, "balance": float, "usage": float, "limit": float?, + "label": str?, "error": str?} + """ + if not api_key: + api_key = get_api_key("openrouter") + if not api_key: + return { + "success": False, + "error": "No OpenRouter API key configured", + } + + url = _resolve_base_url(base_url) + headers = {"Authorization": f"Bearer {api_key}"} + + try: + with httpx.Client(timeout=_CREDITS_TIMEOUT) as client: + response = client.get(f"{url.rstrip('/')}/credits", headers=headers) + if response.status_code == 404: + # Legacy fallback + response = client.get(f"{url.rstrip('/')}/auth/key", headers=headers) + + if response.status_code in (401, 403): + return { + "success": False, + "error": "Invalid API key", + } + if response.status_code != 200: + return { + "success": False, + "error": f"Credits endpoint returned status {response.status_code}", + } + + data = response.json().get("data") or {} + + # /credits shape: { total_credits, total_usage } + # /auth/key shape: { label, usage, limit, is_free_tier, ... } + total_credits = data.get("total_credits") + total_usage = data.get("total_usage") + if total_credits is not None or total_usage is not None: + credits = float(total_credits) if total_credits is not None else 0.0 + usage = float(total_usage) if total_usage is not None else 0.0 + return { + "success": True, + "balance": max(0.0, credits - usage), + "usage": usage, + "limit": credits if total_credits is not None else None, + "label": data.get("label"), + "is_free_tier": data.get("is_free_tier"), + } + + # Legacy /auth/key + usage = float(data.get("usage") or 0.0) + limit = data.get("limit") + balance = None + if limit is not None: + balance = max(0.0, float(limit) - usage) + return { + "success": True, + "balance": balance, + "usage": usage, + "limit": float(limit) if limit is not None else None, + "label": data.get("label"), + "is_free_tier": data.get("is_free_tier"), + } + + except httpx.TimeoutException: + return { + "success": False, + "error": "Timed out fetching OpenRouter credits", + } + except httpx.RequestError as exc: + return { + "success": False, + "error": f"Network error fetching OpenRouter credits: {exc}", + } + except Exception as exc: # pragma: no cover — defensive + return { + "success": False, + "error": f"Unexpected error fetching OpenRouter credits: {exc}", + } + + +def invalidate_catalog_cache() -> None: + _catalog_cache.clear() From 94b5ec5bcb58faab77dec725fc345faac097cf90 Mon Sep 17 00:00:00 2001 From: zfoong Date: Fri, 8 May 2026 14:57:28 +0900 Subject: [PATCH 2/3] Update LLM call error display and improve chat pannel UI --- agent_core/core/impl/llm/errors.py | 934 +++++++++++++++--- agent_core/core/impl/llm/interface.py | 79 +- agent_core/core/models/connection_tester.py | 695 +++++++------ app/agent_base.py | 45 +- app/ui_layer/adapters/browser_adapter.py | 5 +- .../src/components/Chat/Chat.module.css | 76 ++ .../frontend/src/components/Chat/Chat.tsx | 189 ++-- .../frontend/src/pages/Chat/ChatMessage.tsx | 2 +- .../src/pages/Settings/ModelSettings.tsx | 4 + app/ui_layer/settings/model_settings.py | 6 + 10 files changed, 1466 insertions(+), 569 deletions(-) diff --git a/agent_core/core/impl/llm/errors.py b/agent_core/core/impl/llm/errors.py index 052e2611..e63b5605 100644 --- a/agent_core/core/impl/llm/errors.py +++ b/agent_core/core/impl/llm/errors.py @@ -2,180 +2,864 @@ """ LLM Error Classification Module. -Provides user-friendly error messages for LLM-related failures. -Uses proper exception types and HTTP status codes - no string pattern matching. +Turns provider-specific exceptions into a structured `LLMErrorInfo` so the UI +can render category-aware error cards (auth vs credits vs rate-limit vs +server, etc.) instead of a single generic string. + +Provider error shapes were captured from live SDK responses — see comments +on each per-provider extractor. The classifier is intentionally defensive +(every body lookup tolerates `None` / wrong type) because some providers +return string bodies, partial JSON, or undocumented fields. + +External callers: +- `classify_llm_error(exc) -> LLMErrorInfo` is the new structured API. +- `classify_llm_error_message(exc) -> str` is the back-compat shim for any + caller that only wants the plain string. Equivalent to + `classify_llm_error(exc).message`. """ from __future__ import annotations +from dataclasses import dataclass, field, asdict +from enum import Enum +from typing import Any, Dict, List, Optional, Tuple -from typing import Optional -# Import provider exception types +# Optional provider SDK imports — kept defensive so missing extras don't +# break the classifier path. try: import openai -except ImportError: +except ImportError: # pragma: no cover openai = None try: import anthropic -except ImportError: +except ImportError: # pragma: no cover anthropic = None +try: + import httpx +except ImportError: # pragma: no cover + httpx = None + try: import requests -except ImportError: +except ImportError: # pragma: no cover requests = None -# User-friendly messages -MSG_AUTH = "Unable to connect to AI service. Please check your API key in Settings." +# ─── Public taxonomy ────────────────────────────────────────────────── + + +class ErrorCategory(str, Enum): + AUTH = "auth" # 401/403 — bad/missing key, key revoked + CREDIT = "credit" # 402, "insufficient_quota", "credit_balance_too_low" + RATE_LIMIT = "rate_limit" # 429 — transient + QUOTA = "quota" # 429 + monthly/account scope (separable from per-min) + MODEL = "model" # 404, "model_not_found" + BAD_REQUEST = "bad_request" # 400 — request malformed (context overflow, etc.) + BLOCKED = "blocked" # safety filter (Gemini/Anthropic) + SERVER = "server" # 5xx, "overloaded_error" + CONNECTION = "connection" # network / timeout / DNS + UNKNOWN = "unknown" + + +@dataclass +class ErrorAction: + """A clickable affordance attached to an error. + + `url` opens in a new tab; `action` is a frontend-resolved verb such as + "open_settings_model" — handled by the chat component, not by URL nav. + Exactly one of url/action should be set. + """ + label: str + url: Optional[str] = None + action: Optional[str] = None + + +@dataclass +class LLMErrorInfo: + category: ErrorCategory + title: str # e.g. "Rate limited" + message: str # e.g. "Free-tier limit on Google AI Studio. Wait ~30s or add your own key." + provider: str # "openrouter", "anthropic", ... + upstream: Optional[str] = None # "Google AI Studio" — present when OR proxies + model: Optional[str] = None + http_status: Optional[int] = None + retry_after_seconds: Optional[int] = None + actions: List[ErrorAction] = field(default_factory=list) + raw_message: Optional[str] = None # truncated raw upstream text for "Show details" + request_id: Optional[str] = None # for support tickets + + def to_dict(self) -> Dict[str, Any]: + d = asdict(self) + d["category"] = self.category.value + return d + + +# ─── Provider display names + category fallbacks ───────────────────── + + +_PROVIDER_DISPLAY: Dict[str, str] = { + "openai": "OpenAI", + "openrouter": "OpenRouter", + "anthropic": "Anthropic", + "gemini": "Gemini", + "google": "Gemini", + "byteplus": "BytePlus", + "deepseek": "DeepSeek", + "grok": "Grok", + "moonshot": "Moonshot", + "minimax": "MiniMax", + "remote": "Ollama", +} + + +# Used only when the provider gave us no message at all (rare). Most +# real-world errors have an upstream message that's already informative; +# we lead with that and only append a short action hint. +_FALLBACK_BODY_BY_CATEGORY: Dict[ErrorCategory, str] = { + ErrorCategory.AUTH: "the API key was rejected", + ErrorCategory.CREDIT: "out of credits", + ErrorCategory.RATE_LIMIT: "rate-limited", + ErrorCategory.QUOTA: "quota exceeded", + ErrorCategory.MODEL: "the selected model is not available", + ErrorCategory.BAD_REQUEST: "the request was rejected", + ErrorCategory.BLOCKED: "blocked by the provider's safety filter", + ErrorCategory.SERVER: "the provider is unavailable", + ErrorCategory.CONNECTION: "unable to reach the provider", + ErrorCategory.UNKNOWN: "something went wrong", +} + + +# Back-compat string constants — some callers still import these directly. +# Kept thin (single phrase) since the rich text now flows through info.message. +MSG_AUTH = "The API key was rejected. Check your key in Settings." +MSG_RATE_LIMIT = "The provider rate-limited this request. Try again shortly." +MSG_MODEL = "The selected model is not available. Pick a different model in Settings." +MSG_CONFIG = "The request was rejected by the provider." +MSG_SERVICE = "The provider service is unavailable. Try again later." +MSG_CONNECTION = "Could not reach the provider. Check your network connection." +MSG_GENERIC = "Something went wrong calling the AI service." MSG_CONSECUTIVE_FAILURE = ( - "LLM calls have failed {count} consecutive times. " - "Task aborted to prevent infinite retries. Please check your LLM configuration." + "Aborted after {count} consecutive failures." ) +# ─── Consecutive-failure exception (preserves last classified info) ─── + + class LLMConsecutiveFailureError(Exception): """Raised when LLM calls fail too many times consecutively. - This exception signals that the task should be aborted to prevent - infinite retry loops that flood logs and waste resources. + Carries the last classified `LLMErrorInfo` (when known) so the UI can + surface the *cause* of the failures, not just the count. """ - def __init__(self, failure_count: int, last_error: Optional[Exception] = None): + def __init__( + self, + failure_count: int, + last_error: Optional[Exception] = None, + last_error_info: Optional[LLMErrorInfo] = None, + ): self.failure_count = failure_count self.last_error = last_error + self.last_error_info = last_error_info message = MSG_CONSECUTIVE_FAILURE.format(count=failure_count) if last_error: message += f" Last error: {last_error}" super().__init__(message) -MSG_MODEL = "The selected AI model is not available. Please check your model settings." -MSG_CONFIG = "AI service configuration error. The selected model may not support required features." -MSG_RATE_LIMIT = "AI service is rate-limited. Please wait a moment and try again." -MSG_SERVICE = "AI service is temporarily unavailable. Please try again later." -MSG_CONNECTION = "Unable to reach AI service. Please check your internet connection." -MSG_GENERIC = "An error occurred with the AI service. Please check your LLM configuration." -def classify_llm_error(error: Exception) -> str: - """Classify an LLM error and return a user-friendly message. +# ─── Public entry points ────────────────────────────────────────────── + - Uses exception types and HTTP status codes for classification. +def classify_llm_error( + error: Exception, + *, + provider: Optional[str] = None, + model: Optional[str] = None, +) -> LLMErrorInfo: + """Classify an LLM error into structured info. + + The user-visible string is `info.message` — fully self-contained, with + provider/upstream/raw/action hint composed inline. Other fields are + informational (logging, metrics) and not surfaced to the UI directly. Args: - error: The exception from the LLM call. + error: The exception raised by the provider call. + provider: Provider id (e.g. "openrouter", "anthropic"). Lets us + unwrap provider-specific error shapes (notably OpenRouter's + `metadata.provider_name`/`metadata.raw`). + model: Model id at call time. Stored on the info for logging. Returns: - A user-friendly error message. + `LLMErrorInfo` — never raises. For unrecognised shapes, falls back + to UNKNOWN with the raw exception text preserved as the message + (better than a generic stub — at least the user sees what blew up). """ - # Check OpenAI exceptions - if openai is not None: - msg = _classify_openai_error(error) - if msg: - return msg - - # Check Anthropic exceptions - if anthropic is not None: - msg = _classify_anthropic_error(error) - if msg: - return msg - - # Check requests exceptions (BytePlus, remote/Ollama) - if requests is not None: - msg = _classify_requests_error(error) - if msg: - return msg - - # Check for status_code attribute on any exception - status_code = _get_status_code(error) - if status_code: - return _message_from_status_code(status_code) - - # Generic fallback - return MSG_GENERIC - - -def _classify_openai_error(error: Exception) -> Optional[str]: - """Classify OpenAI SDK exceptions.""" - if isinstance(error, openai.AuthenticationError): - return MSG_AUTH - if isinstance(error, openai.PermissionDeniedError): - return MSG_AUTH - if isinstance(error, openai.NotFoundError): - return MSG_MODEL - if isinstance(error, openai.BadRequestError): - return MSG_CONFIG - if isinstance(error, openai.RateLimitError): - return MSG_RATE_LIMIT - if isinstance(error, openai.InternalServerError): - return MSG_SERVICE - if isinstance(error, openai.APIConnectionError): - return MSG_CONNECTION - if isinstance(error, openai.APITimeoutError): - return MSG_CONNECTION - if isinstance(error, openai.APIStatusError): - return _message_from_status_code(error.status_code) - return None + info = _try_classify(error, provider=provider) + if info is None: + # Don't fabricate a generic message — the raw exception text is + # almost always more informative than any stub we could write. + raw = _truncate(str(error)) or "AI service error" + info = LLMErrorInfo( + category=ErrorCategory.UNKNOWN, + title="AI service error", + message=raw, + provider=provider or "unknown", + raw_message=raw, + ) + + if model and info.model is None: + info.model = model + + return info + + +def classify_llm_error_message(error: Exception) -> str: + """Back-compat shim — returns just the user-facing string. + + Equivalent to `classify_llm_error(error).message`. Kept so existing + call sites that only need a string don't have to refactor in this PR. + """ + return classify_llm_error(error).message -def _classify_anthropic_error(error: Exception) -> Optional[str]: - """Classify Anthropic SDK exceptions.""" - if isinstance(error, anthropic.AuthenticationError): - return MSG_AUTH - if isinstance(error, anthropic.PermissionDeniedError): - return MSG_AUTH - if isinstance(error, anthropic.NotFoundError): - return MSG_MODEL - if isinstance(error, anthropic.BadRequestError): - return MSG_CONFIG - if isinstance(error, anthropic.RateLimitError): - return MSG_RATE_LIMIT - if isinstance(error, anthropic.InternalServerError): - return MSG_SERVICE - if isinstance(error, anthropic.APIConnectionError): - return MSG_CONNECTION - if isinstance(error, anthropic.APITimeoutError): - return MSG_CONNECTION - if isinstance(error, anthropic.APIStatusError): - return _message_from_status_code(error.status_code) - return None +# ─── Dispatcher ─────────────────────────────────────────────────────── + + +def _try_classify( + error: Exception, + *, + provider: Optional[str], +) -> Optional[LLMErrorInfo]: + """Try each provider extractor in turn. Returns None if nothing matches.""" + # OpenAI SDK exceptions cover openai/openrouter/grok/deepseek/moonshot/minimax + if openai is not None and isinstance(error, openai.OpenAIError): + return _classify_openai_compat(error, provider or "openai") + + # Anthropic SDK exceptions + if anthropic is not None and isinstance(error, anthropic.AnthropicError): + return _classify_anthropic(error, provider or "anthropic") + # httpx errors are how the Gemini and BytePlus paths surface failures + if httpx is not None and isinstance(error, httpx.HTTPStatusError): + return _classify_httpx_status(error, provider) + if httpx is not None and isinstance(error, httpx.RequestError): + return _classify_httpx_connection(error, provider) + + # `requests` library — older code paths still raise these + if requests is not None and isinstance(error, requests.exceptions.RequestException): + return _classify_requests(error, provider) + + # Gemini's custom error type (raised by our REST client) + msg = str(error) + if "Gemini" in msg or "promptFeedback" in msg or "blocked" in msg.lower(): + return _classify_gemini_runtime(error, provider or "gemini") -def _classify_requests_error(error: Exception) -> Optional[str]: - """Classify requests library exceptions (for BytePlus/Ollama).""" - if isinstance(error, requests.exceptions.HTTPError): - if error.response is not None: - return _message_from_status_code(error.response.status_code) - return MSG_SERVICE - if isinstance(error, requests.exceptions.ConnectionError): - return MSG_CONNECTION - if isinstance(error, requests.exceptions.Timeout): - return MSG_CONNECTION return None -def _get_status_code(error: Exception) -> Optional[int]: - """Extract HTTP status code from exception if available.""" - # Check for status_code attribute - if hasattr(error, "status_code"): - return getattr(error, "status_code", None) - # Check for response.status_code (requests-style) - if hasattr(error, "response") and hasattr(error.response, "status_code"): - return error.response.status_code +# ─── OpenAI / OpenAI-compatible (openai, openrouter, grok, deepseek, ...) ─── + + +def _classify_openai_compat(exc: Exception, provider: str) -> LLMErrorInfo: + """Handle openai SDK exception hierarchy. + + Real shapes captured from live probes: + - OpenAI 401: body.code = "invalid_api_key" (string), body.type = "invalid_request_error" + - OpenRouter 401: body = {"message": "User not found.", "code": 401} ← flat, code is INT + - OpenRouter 429: body = {"message": ..., "code": 429, + "metadata": {"raw": ..., "provider_name": "...", "is_byok": false}} + - Grok 400 (auth!): body is a STRING, status is 400 (NOT 401) + - DeepSeek 401: body.type = "authentication_error", body.code = "invalid_request_error" + """ + body = getattr(exc, "body", None) + status = getattr(exc, "status_code", None) + request_id = getattr(exc, "request_id", None) + + body_dict: Dict[str, Any] = {} + if isinstance(body, dict): + body_dict = body + elif isinstance(body, str): + # Grok edge case — body is the raw string message + body_dict = {"message": body} + + raw_message: str = (body_dict.get("message") if isinstance(body_dict.get("message"), str) else None) or str(exc) + code = body_dict.get("code") + error_type = body_dict.get("type") + + upstream: Optional[str] = None + metadata = body_dict.get("metadata") if isinstance(body_dict.get("metadata"), dict) else None + + # OpenRouter wraps upstream errors. The upstream's verbatim message is + # FAR more useful than OR's "Provider returned error" wrapper. + if provider == "openrouter" and metadata: + if isinstance(metadata.get("provider_name"), str): + upstream = metadata["provider_name"] + if isinstance(metadata.get("raw"), str) and metadata["raw"]: + raw_message = metadata["raw"] + + # ── Category resolution ──────────────────────────────────────── + category = _category_from_openai_exc(exc, status=status, body_dict=body_dict, raw=raw_message) + + # OpenAI string codes are the gold standard signal where present + if isinstance(code, str): + if code == "insufficient_quota": + category = ErrorCategory.CREDIT + elif code == "rate_limit_exceeded": + category = ErrorCategory.RATE_LIMIT + elif code == "context_length_exceeded": + category = ErrorCategory.BAD_REQUEST + elif code in ("model_not_found", "invalid_model"): + category = ErrorCategory.MODEL + elif code == "invalid_api_key": + category = ErrorCategory.AUTH + + # Anthropic-style nested error type can appear when OR proxies Anthropic + if isinstance(error_type, str): + if error_type == "credit_balance_too_low": + category = ErrorCategory.CREDIT + elif error_type == "overloaded_error": + category = ErrorCategory.SERVER + + # OpenRouter uses 402 for empty wallet; the openai SDK doesn't have a + # dedicated 402 exception so we land in APIStatusError — adjust here. + if status == 402: + category = ErrorCategory.CREDIT + + # ── Retry-After ──────────────────────────────────────────────── + retry_after = _retry_after_seconds(exc) + + # ── User-facing message ──────────────────────────────────────── + message = _compose_message(category, raw_message, provider, upstream, retry_after_seconds=retry_after) + actions = _default_actions(category, provider, upstream, metadata) + + return LLMErrorInfo( + category=category, + title=_title_for(category, upstream=upstream), + message=message, + provider=provider, + upstream=upstream, + http_status=status if isinstance(status, int) else None, + retry_after_seconds=retry_after, + actions=actions, + raw_message=_truncate(raw_message), + request_id=request_id if isinstance(request_id, str) else None, + ) + + +def _category_from_openai_exc( + exc: Exception, + *, + status: Optional[int], + body_dict: Dict[str, Any], + raw: str, +) -> ErrorCategory: + """Map openai SDK exception type → category. Defensive for missing SDK.""" + if openai is None: # pragma: no cover + return _category_from_status(status) + + if isinstance(exc, openai.AuthenticationError): + return ErrorCategory.AUTH + if isinstance(exc, openai.PermissionDeniedError): + # Often "billing-blocked" or "country-not-supported" — surface as AUTH-ish. + return ErrorCategory.AUTH + if isinstance(exc, openai.NotFoundError): + return ErrorCategory.MODEL + if isinstance(exc, openai.RateLimitError): + return ErrorCategory.RATE_LIMIT + if isinstance(exc, openai.BadRequestError): + # Grok returns 400 for auth — sniff body + lower = raw.lower() + if "api key" in lower or "api_key" in lower or "invalid_api_key" in lower: + return ErrorCategory.AUTH + if "context" in lower and ("length" in lower or "too long" in lower or "exceeds" in lower): + return ErrorCategory.BAD_REQUEST + if "model" in lower and ("not found" in lower or "not available" in lower or "does not exist" in lower): + return ErrorCategory.MODEL + if "blocked" in lower or "safety" in lower or "policy" in lower: + return ErrorCategory.BLOCKED + return ErrorCategory.BAD_REQUEST + if isinstance(exc, openai.InternalServerError): + return ErrorCategory.SERVER + if isinstance(exc, (openai.APIConnectionError, openai.APITimeoutError)): + return ErrorCategory.CONNECTION + if isinstance(exc, openai.APIStatusError): + return _category_from_status(status) + + return _category_from_status(status) + + +# ─── Anthropic ──────────────────────────────────────────────────────── + + +def _classify_anthropic(exc: Exception, provider: str) -> LLMErrorInfo: + """Anthropic SDK shape: + body = { + "type": "error", + "error": {"type": "authentication_error" | ..., "message": "..."}, + "request_id": "..." + } + """ + if anthropic is None: # pragma: no cover + return _fallback_unknown(exc, provider) + + body = getattr(exc, "body", None) + status = getattr(exc, "status_code", None) + request_id = getattr(exc, "request_id", None) + + error_block = {} + if isinstance(body, dict): + if isinstance(body.get("error"), dict): + error_block = body["error"] + elif isinstance(body.get("type"), str): + error_block = body + + a_type = error_block.get("type") if isinstance(error_block, dict) else None + raw_message = ( + error_block.get("message") + if isinstance(error_block, dict) and isinstance(error_block.get("message"), str) + else str(exc) + ) + + # Map Anthropic's typed error names. These are richer than HTTP codes. + type_to_category = { + "authentication_error": ErrorCategory.AUTH, + "permission_error": ErrorCategory.AUTH, + "credit_balance_too_low": ErrorCategory.CREDIT, + "billing_error": ErrorCategory.CREDIT, + "rate_limit_error": ErrorCategory.RATE_LIMIT, + "overloaded_error": ErrorCategory.SERVER, + "api_error": ErrorCategory.SERVER, + "invalid_request_error": ErrorCategory.BAD_REQUEST, + "not_found_error": ErrorCategory.MODEL, + } + + category: Optional[ErrorCategory] = None + if isinstance(a_type, str) and a_type in type_to_category: + category = type_to_category[a_type] + else: + # Fall back to SDK exception class + if isinstance(exc, anthropic.AuthenticationError): + category = ErrorCategory.AUTH + elif isinstance(exc, anthropic.PermissionDeniedError): + category = ErrorCategory.AUTH + elif isinstance(exc, anthropic.NotFoundError): + category = ErrorCategory.MODEL + elif isinstance(exc, anthropic.RateLimitError): + category = ErrorCategory.RATE_LIMIT + elif isinstance(exc, anthropic.InternalServerError): + category = ErrorCategory.SERVER + elif isinstance(exc, (anthropic.APIConnectionError, anthropic.APITimeoutError)): + category = ErrorCategory.CONNECTION + elif isinstance(exc, anthropic.BadRequestError): + lower = raw_message.lower() + if "prompt is too long" in lower or "maximum context length" in lower: + category = ErrorCategory.BAD_REQUEST + else: + category = ErrorCategory.BAD_REQUEST + else: + category = _category_from_status(status) + + retry_after = _retry_after_seconds(exc) + + actions = _default_actions(category, provider, upstream=None, metadata=None) + + return LLMErrorInfo( + category=category, + title=_title_for(category), + message=_compose_message(category, raw_message, provider, upstream=None, retry_after_seconds=retry_after), + provider=provider, + upstream=None, + http_status=status if isinstance(status, int) else None, + retry_after_seconds=retry_after, + actions=actions, + raw_message=_truncate(raw_message), + request_id=request_id if isinstance(request_id, str) else None, + ) + + +# ─── Gemini ──────────────────────────────────────────────────────────── + + +def _classify_httpx_status(exc: Exception, provider: Optional[str]) -> LLMErrorInfo: + """httpx.HTTPStatusError — covers Gemini and BytePlus paths. + + Gemini body: {"error":{"code":400,"message":"...","status":"INVALID_ARGUMENT", + "details":[{"reason":"API_KEY_INVALID",...}]}} + BytePlus body: {"error":{"code":"AuthenticationError","message":"..."}} + """ + if httpx is None: # pragma: no cover + return _fallback_unknown(exc, provider or "unknown") + + response = getattr(exc, "response", None) + status = response.status_code if response is not None else None + text = response.text if response is not None else "" + body_dict = _safe_json(text) + + err = body_dict.get("error") if isinstance(body_dict.get("error"), dict) else {} + raw_message = err.get("message") if isinstance(err.get("message"), str) else str(exc) + + # Detect Gemini specifically by reason field + reason: Optional[str] = None + details = err.get("details") if isinstance(err.get("details"), list) else [] + for d in details: + if isinstance(d, dict) and isinstance(d.get("reason"), str): + reason = d["reason"] + break + + inferred_provider = provider or ("gemini" if reason or "generativelanguage" in text else "unknown") + + # Gemini's REST API returns 400 for invalid keys — map by reason field + if reason == "API_KEY_INVALID": + category = ErrorCategory.AUTH + elif reason == "RESOURCE_EXHAUSTED": + category = ErrorCategory.RATE_LIMIT + elif reason == "PERMISSION_DENIED": + category = ErrorCategory.AUTH + else: + category = _category_from_status(status) + # BytePlus encodes auth errors via err.code = "AuthenticationError" + if isinstance(err.get("code"), str) and "auth" in err["code"].lower(): + category = ErrorCategory.AUTH + + retry_after = None + if response is not None: + ra = response.headers.get("retry-after") + if ra is not None: + try: + retry_after = int(float(ra)) + except (ValueError, TypeError): + retry_after = None + + actions = _default_actions(category, inferred_provider, upstream=None, metadata=None) + + return LLMErrorInfo( + category=category, + title=_title_for(category), + message=_compose_message(category, raw_message, inferred_provider, upstream=None), + provider=inferred_provider, + upstream=None, + http_status=status, + retry_after_seconds=retry_after, + actions=actions, + raw_message=_truncate(raw_message), + ) + + +def _classify_httpx_connection(exc: Exception, provider: Optional[str]) -> LLMErrorInfo: + raw = _truncate(str(exc)) + return LLMErrorInfo( + category=ErrorCategory.CONNECTION, + title=_title_for(ErrorCategory.CONNECTION), + message=_compose_message(ErrorCategory.CONNECTION, raw, provider or "unknown", upstream=None), + provider=provider or "unknown", + raw_message=raw, + ) + + +def _classify_gemini_runtime(exc: Exception, provider: str) -> LLMErrorInfo: + """Gemini's GeminiAPIError — raised when the response shape signals an issue + that isn't an HTTP failure (e.g. promptFeedback.blockReason).""" + raw = str(exc) + lower = raw.lower() + + if "blocked" in lower or "promptfeedback" in lower or "safety" in lower: + category = ErrorCategory.BLOCKED + else: + category = ErrorCategory.UNKNOWN + + return LLMErrorInfo( + category=category, + title=_title_for(category), + message=_compose_message(category, raw, provider, upstream=None), + provider=provider, + raw_message=_truncate(raw), + actions=_default_actions(category, provider, upstream=None, metadata=None), + ) + + +# ─── requests library (legacy callers) ──────────────────────────────── + + +def _classify_requests(exc: Exception, provider: Optional[str]) -> Optional[LLMErrorInfo]: + if requests is None: # pragma: no cover + return None + if isinstance(exc, requests.exceptions.HTTPError): + response = exc.response + if response is not None: + status = response.status_code + try: + body = response.json() + except Exception: + body = {} + err = body.get("error") if isinstance(body.get("error"), dict) else {} + raw_message = err.get("message") if isinstance(err.get("message"), str) else response.text + return LLMErrorInfo( + category=_category_from_status(status), + title=_title_for(_category_from_status(status)), + message=_compose_message(_category_from_status(status), raw_message, provider or "unknown", upstream=None), + provider=provider or "unknown", + http_status=status, + raw_message=_truncate(raw_message), + ) + if isinstance(exc, (requests.exceptions.ConnectionError, requests.exceptions.Timeout)): + raw = _truncate(str(exc)) + return LLMErrorInfo( + category=ErrorCategory.CONNECTION, + title=_title_for(ErrorCategory.CONNECTION), + message=_compose_message(ErrorCategory.CONNECTION, raw, provider or "unknown", upstream=None), + provider=provider or "unknown", + raw_message=raw, + ) return None -def _message_from_status_code(status_code: int) -> str: - """Map HTTP status code to user-friendly message.""" - if status_code == 401 or status_code == 403: - return MSG_AUTH - if status_code == 404: - return MSG_MODEL - if status_code == 400: - return MSG_CONFIG - if status_code == 429: - return MSG_RATE_LIMIT - if 500 <= status_code < 600: - return MSG_SERVICE - return MSG_GENERIC +# ─── Helpers ────────────────────────────────────────────────────────── + + +def _category_from_status(status: Optional[int]) -> ErrorCategory: + if status is None: + return ErrorCategory.UNKNOWN + if status in (401, 403): + return ErrorCategory.AUTH + if status == 402: + return ErrorCategory.CREDIT + if status == 404: + return ErrorCategory.MODEL + if status == 400: + return ErrorCategory.BAD_REQUEST + if status == 429: + return ErrorCategory.RATE_LIMIT + if 500 <= status < 600: + return ErrorCategory.SERVER + return ErrorCategory.UNKNOWN + + +def _retry_after_seconds(exc: Exception) -> Optional[int]: + response = getattr(exc, "response", None) + if response is None: + return None + ra = None + try: + ra = response.headers.get("retry-after") + except AttributeError: + return None + if not ra: + return None + try: + return int(float(ra)) + except (ValueError, TypeError): + return None + + +_CATEGORY_TITLES: Dict[ErrorCategory, str] = { + ErrorCategory.AUTH: "Invalid API key", + ErrorCategory.CREDIT: "Out of credits", + ErrorCategory.RATE_LIMIT: "Rate limited", + ErrorCategory.QUOTA: "Quota exceeded", + ErrorCategory.MODEL: "Incorrect model id", + ErrorCategory.BAD_REQUEST: "Bad request", + ErrorCategory.BLOCKED: "Blocked by safety filter", + ErrorCategory.SERVER: "Provider service unavailable", + ErrorCategory.CONNECTION: "Cannot reach provider", + ErrorCategory.UNKNOWN: "AI service error", +} + + +# Categories where we suppress the leading title sentence — the raw +# provider message is already self-explanatory or the title would just +# repeat the upstream's words. +_SKIP_TITLE_CATEGORIES = {ErrorCategory.UNKNOWN, ErrorCategory.BAD_REQUEST} + + +def _title_for(category: ErrorCategory, *, upstream: Optional[str] = None) -> str: + """Short title — used for logging/metrics and for the leading sentence + of the user-facing chat message (see `_compose_message`).""" + base = _CATEGORY_TITLES.get(category, "AI service error") + if upstream and category in (ErrorCategory.RATE_LIMIT, ErrorCategory.SERVER, ErrorCategory.BLOCKED): + return f"{base} ({upstream})" + return base + + +def _compose_message( + category: ErrorCategory, + raw_message: str, + provider: str, + upstream: Optional[str], + *, + retry_after_seconds: Optional[int] = None, +) -> str: + """Build the single user-facing string shown in the chat error bubble. + + Format: ". [via ]: . ." + + The category title leads so users instantly know *what kind* of error + happened — important when the provider's raw text is terse (Anthropic + returns just `"model: claude-sonnet-4-5-2025092945"` for a bad model + id, which is meaningless without context). The raw provider text + follows so users see the exact upstream message. The action hint + closes when it adds value beyond what the raw already says. + """ + raw = (raw_message or "").strip() + if raw.lower() == "none": + raw = "" + raw = _truncate(raw.rstrip("."), limit=400) + if not raw: + raw = _FALLBACK_BODY_BY_CATEGORY.get(category, "an error occurred") + + # Lead with category title (e.g. "Incorrect model id.") unless the + # category is too vague to title meaningfully. + if category in _SKIP_TITLE_CATEGORIES: + lead = "" + else: + lead = f"{_title_for(category, upstream=upstream)}." + + name = _PROVIDER_DISPLAY.get(provider, "") + if name: + prefix = f"{name} (via {upstream})" if upstream else name + provider_part = f"{prefix}: {raw}" + else: + provider_part = raw + + body = f"{lead} {provider_part}" if lead else provider_part + return _append_hint(body, category, provider, retry_after_seconds, raw.lower()) + + +def _append_hint( + body: str, + category: ErrorCategory, + provider: str, + retry_after: Optional[int], + raw_lower: str, +) -> str: + """Append a short action hint, suppressed when the provider's own raw + text already covers it (avoids "...add your own key. Try again shortly.").""" + base = body.rstrip(".") + + if category == ErrorCategory.AUTH: + if "key" in raw_lower or "settings" in raw_lower: + return f"{base}." + return f"{base}. Check your API key in Settings." + + if category == ErrorCategory.CREDIT: + if any(s in raw_lower for s in ("billing", "credit", "top up", "topup")): + return f"{base}." + if provider == "openrouter": + return f"{base}. Top up at https://openrouter.ai/credits." + if provider == "openai": + return f"{base}. Manage billing at https://platform.openai.com/account/billing." + if provider == "anthropic": + return f"{base}. Manage billing at https://console.anthropic.com/settings/billing." + return f"{base}." + + if category == ErrorCategory.RATE_LIMIT: + if retry_after: + return f"{base}. Try again in {retry_after}s." + if any(s in raw_lower for s in ( + "byok", "your own key", "openrouter.ai/settings", "retry", "wait", "try again", + )): + return f"{base}." + return f"{base}. Try again shortly." + + if category == ErrorCategory.QUOTA: + if "billing" in raw_lower or "usage" in raw_lower: + return f"{base}." + if provider == "openai": + return f"{base}. Manage usage at https://platform.openai.com/usage." + return f"{base}." + + if category == ErrorCategory.MODEL: + if "settings" in raw_lower: + return f"{base}." + return f"{base}. Use a correct model in Settings." + + if category == ErrorCategory.BLOCKED: + return f"{base}. Edit your prompt and retry." + + if category == ErrorCategory.SERVER: + if "try again" in raw_lower or "retry" in raw_lower: + return f"{base}." + return f"{base}. Try again later." + + if category == ErrorCategory.CONNECTION: + if provider == "remote": + return f"{base}. Check that Ollama is running." + if "network" in raw_lower or "connection" in raw_lower: + return f"{base}." + return f"{base}. Check your network connection." + + # BAD_REQUEST / UNKNOWN — raw is the most informative thing we can show + return f"{base}." + + +def _default_actions( + category: ErrorCategory, + provider: str, + upstream: Optional[str], + metadata: Optional[Dict[str, Any]], +) -> List[ErrorAction]: + """Per-(category, provider) action affordances. + + Keep this list short — each action is a click target the user is more + likely to actually want than just dismissing the error. + """ + actions: List[ErrorAction] = [] + + if category == ErrorCategory.CREDIT: + if provider == "openrouter": + actions.append(ErrorAction(label="Top up credits", url="https://openrouter.ai/credits")) + elif provider == "openai": + actions.append(ErrorAction(label="Manage billing", url="https://platform.openai.com/account/billing")) + elif provider == "anthropic": + actions.append(ErrorAction(label="Manage billing", url="https://console.anthropic.com/settings/billing")) + actions.append(ErrorAction(label="Open settings", action="open_settings_model")) + + elif category == ErrorCategory.RATE_LIMIT: + if provider == "openrouter" and metadata and metadata.get("is_byok") is False: + # Free-tier user — point at OR integrations page for BYOK + actions.append(ErrorAction(label="Add your own key", url="https://openrouter.ai/settings/integrations")) + actions.append(ErrorAction(label="Open settings", action="open_settings_model")) + + elif category == ErrorCategory.QUOTA: + if provider == "openai": + actions.append(ErrorAction(label="Manage usage", url="https://platform.openai.com/usage")) + + return actions + + +def _has_action(info: LLMErrorInfo, action_value: str) -> bool: + return any(a.action == action_value for a in info.actions) + + +def _safe_json(text: str) -> Dict[str, Any]: + if not text: + return {} + try: + import json + result = json.loads(text) + return result if isinstance(result, dict) else {} + except Exception: + return {} + + +def _truncate(s: Optional[str], limit: int = 500) -> str: + if s is None: + return "" + s = str(s) + if len(s) <= limit: + return s + return s[:limit].rstrip() + "…" + + +def _fallback_unknown(exc: Exception, provider: str) -> LLMErrorInfo: + raw = _truncate(str(exc)) or "AI service error" + return LLMErrorInfo( + category=ErrorCategory.UNKNOWN, + title="AI service error", + message=raw, + provider=provider, + raw_message=raw, + ) diff --git a/agent_core/core/impl/llm/interface.py b/agent_core/core/impl/llm/interface.py index 8b01bf83..67d2cb54 100644 --- a/agent_core/core/impl/llm/interface.py +++ b/agent_core/core/impl/llm/interface.py @@ -29,7 +29,7 @@ get_cache_config, get_cache_metrics, ) -from agent_core.core.impl.llm.errors import LLMConsecutiveFailureError +from agent_core.core.impl.llm.errors import LLMConsecutiveFailureError, classify_llm_error from agent_core.core.hooks import ( GetTokenCountHook, SetTokenCountHook, @@ -384,8 +384,14 @@ def _generate_response_sync( # Check if response is empty and provide diagnostics if not content: + # Prefer the classified rich message (provider + upstream + + # raw + action hint inline) over the bare exception string. + # This is what the user actually sees in the chat bubble. + error_info = response.get("error_info_obj") error_msg = response.get("error", "") - if error_msg: + if error_info is not None: + error_detail = error_info.message + elif error_msg: error_detail = f"LLM provider returned error: {error_msg}" else: error_detail = ( @@ -402,7 +408,14 @@ def _generate_response_sync( f"[LLM CONSECUTIVE FAILURE] Count: {self._consecutive_failures}/{self._max_consecutive_failures}" ) if self._consecutive_failures >= self._max_consecutive_failures: - raise LLMConsecutiveFailureError(self._consecutive_failures) + # Attach the underlying classified info so the agent_base + # error handler can show the *cause* of the 5 failures + # (e.g. "rate-limited on Google AI Studio") instead of a + # meta-message about retry counts. + raise LLMConsecutiveFailureError( + self._consecutive_failures, + last_error_info=error_info, + ) raise RuntimeError(error_detail) # Success - reset consecutive failure counter @@ -428,7 +441,17 @@ def _generate_response_sync( f"[LLM CONSECUTIVE FAILURE] Count: {self._consecutive_failures}/{self._max_consecutive_failures} | Error: {e}" ) if self._consecutive_failures >= self._max_consecutive_failures: - raise LLMConsecutiveFailureError(self._consecutive_failures, last_error=e) from e + # Classify on the way out so the fatal-failure handler can + # surface the cause, not just the count. + try: + info = classify_llm_error(e, provider=self.provider, model=self.model) + except Exception: + info = None + raise LLMConsecutiveFailureError( + self._consecutive_failures, + last_error=e, + last_error_info=info, + ) from e raise @profile("llm_generate_response", OperationCategory.LLM) @@ -1343,6 +1366,18 @@ def _generate_ollama(self, system_prompt: str | None, user_prompt: str) -> Dict[ if exc_obj: error_str = f"{type(exc_obj).__name__}: {str(exc_obj)}" result["error"] = error_str + # Classify once and stash the LLMErrorInfo object so the + # outer `_generate_response_sync` can put `info.message` + # (the rich detailed string) into the RuntimeError it raises, + # and attach the info to LLMConsecutiveFailureError at the + # 5-failure threshold. The classifier is wrapped in try/except + # so it can never break the error path itself. + try: + result["error_info_obj"] = classify_llm_error( + exc_obj, provider=self.provider, model=self.model + ) + except Exception: + pass result["content"] = "" logger.error(f"[OLLAMA_ERROR] {error_str}") else: @@ -1464,6 +1499,18 @@ def _generate_gemini( if exc_obj: error_str = f"{type(exc_obj).__name__}: {str(exc_obj)}" result["error"] = error_str + # Classify once and stash the LLMErrorInfo object so the + # outer `_generate_response_sync` can put `info.message` + # (the rich detailed string) into the RuntimeError it raises, + # and attach the info to LLMConsecutiveFailureError at the + # 5-failure threshold. The classifier is wrapped in try/except + # so it can never break the error path itself. + try: + result["error_info_obj"] = classify_llm_error( + exc_obj, provider=self.provider, model=self.model + ) + except Exception: + pass result["content"] = "" logger.error(f"[GEMINI_ERROR] {error_str}") else: @@ -1701,6 +1748,18 @@ def _generate_byteplus_standard( if exc_obj: error_str = f"{type(exc_obj).__name__}: {str(exc_obj)}" result["error"] = error_str + # Classify once and stash the LLMErrorInfo object so the + # outer `_generate_response_sync` can put `info.message` + # (the rich detailed string) into the RuntimeError it raises, + # and attach the info to LLMConsecutiveFailureError at the + # 5-failure threshold. The classifier is wrapped in try/except + # so it can never break the error path itself. + try: + result["error_info_obj"] = classify_llm_error( + exc_obj, provider=self.provider, model=self.model + ) + except Exception: + pass result["content"] = "" logger.error(f"[BYTEPLUS_ERROR] {error_str}") else: @@ -1848,6 +1907,18 @@ def _generate_anthropic( if exc_obj: error_str = f"{type(exc_obj).__name__}: {str(exc_obj)}" result["error"] = error_str + # Classify once and stash the LLMErrorInfo object so the + # outer `_generate_response_sync` can put `info.message` + # (the rich detailed string) into the RuntimeError it raises, + # and attach the info to LLMConsecutiveFailureError at the + # 5-failure threshold. The classifier is wrapped in try/except + # so it can never break the error path itself. + try: + result["error_info_obj"] = classify_llm_error( + exc_obj, provider=self.provider, model=self.model + ) + except Exception: + pass result["content"] = "" logger.error(f"[ANTHROPIC_ERROR] {error_str}") else: diff --git a/agent_core/core/models/connection_tester.py b/agent_core/core/models/connection_tester.py index 761727a2..e315215e 100644 --- a/agent_core/core/models/connection_tester.py +++ b/agent_core/core/models/connection_tester.py @@ -1,5 +1,14 @@ # -*- coding: utf-8 -*- -"""Connection tester for validating provider API keys.""" +"""Connection tester for validating provider API keys and model ids. + +When `model` is provided, each tester attempts a tiny chat-completion (or +equivalent) against that exact model — so a typo in the model id is caught +at test time, not at first real call. When `model` is omitted we fall back +to a known-good default model from connection_test_models.json. + +On failure we run the underlying exception through `classify_llm_error` so +the test result message reads exactly like a real LLM error in the chat. +""" from typing import Dict, Any, Optional import httpx @@ -11,22 +20,24 @@ def test_provider_connection( provider: str, api_key: Optional[str] = None, base_url: Optional[str] = None, - timeout: float = 10.0, + timeout: float = 15.0, + model: Optional[str] = None, ) -> Dict[str, Any]: - """Test if a provider's API key is valid by making a minimal API call. + """Test if a provider's API key (and optionally model id) is valid. Args: - provider: The LLM provider name (openai, gemini, anthropic, byteplus, remote) - api_key: The API key to test. If None, will check if connection is possible. - base_url: Optional base URL override (for byteplus/remote providers) - timeout: Request timeout in seconds + provider: The LLM provider name. + api_key: The API key to test. + base_url: Optional base URL override. + timeout: Request timeout in seconds. + model: When provided, the tester verifies this exact model is + reachable. Catches typos in the model id (e.g. + "claude-sonnet-4-5-2025092945" vs the real + "claude-sonnet-4-5-20250929") that would otherwise pass an + auth-only test and only fail at first real call. Returns: - Dictionary with: - - success: bool indicating if connection succeeded - - message: str with success/failure message - - provider: str provider name - - error: Optional[str] error details if failed + Dictionary with success/message/provider/error. """ if provider not in PROVIDER_CONFIG: return { @@ -40,26 +51,26 @@ def test_provider_connection( try: if provider == "openai": - return _test_openai(api_key, timeout) + return _test_openai(api_key, timeout, model) elif provider == "anthropic": - return _test_anthropic(api_key, timeout) + return _test_anthropic(api_key, timeout, model) elif provider == "gemini": - return _test_gemini(api_key, timeout) + return _test_gemini(api_key, timeout, model) elif provider == "byteplus": url = base_url or cfg.default_base_url - return _test_byteplus(api_key, url, timeout) + return _test_byteplus(api_key, url, timeout, model) elif provider == "remote": url = base_url or cfg.default_base_url return _test_remote(url, timeout) elif provider == "grok": url = cfg.default_base_url - return _test_grok(api_key, url, timeout) + return _test_grok(api_key, url, timeout, model) elif provider == "openrouter": url = base_url or cfg.default_base_url - return _test_openrouter(api_key, url, timeout) + return _test_openrouter(api_key, url, timeout, model) elif provider in ("minimax", "deepseek", "moonshot"): url = cfg.default_base_url - return _test_openai_compat(provider, api_key, url, timeout) + return _test_openai_compat(provider, api_key, url, timeout, model) else: return { "success": False, @@ -76,356 +87,346 @@ def test_provider_connection( } -def _test_openai(api_key: Optional[str], timeout: float) -> Dict[str, Any]: - """Test OpenAI API connection.""" - if not api_key: - return { - "success": False, - "message": "API key is required for OpenAI", - "provider": "openai", - "error": "Missing API key", - } +# ─── Helpers ────────────────────────────────────────────────────────── - try: - # Use models endpoint - lightweight call to verify API key - with httpx.Client(timeout=timeout) as client: - response = client.get( - "https://api.openai.com/v1/models", - headers={"Authorization": f"Bearer {api_key}"}, - ) - if response.status_code == 200: - return { - "success": True, - "message": "Successfully connected to OpenAI API", - "provider": "openai", - } - elif response.status_code == 401: - return { - "success": False, - "message": "Invalid API key", - "provider": "openai", - "error": "Authentication failed - check your API key", - } - else: - return { - "success": False, - "message": f"API returned status {response.status_code}", - "provider": "openai", - "error": response.text[:200] if response.text else "Unknown error", - } - except httpx.TimeoutException: +def _classified_error_result(exc: Exception, provider: str, model: Optional[str]) -> Dict[str, Any]: + """Run an exception through the classifier and return a failure result + with the rich message — same format the chat sees for real LLM errors.""" + try: + from agent_core.core.impl.llm.errors import classify_llm_error + info = classify_llm_error(exc, provider=provider, model=model) return { "success": False, - "message": "Connection timed out", - "provider": "openai", - "error": "Request timed out - check your network connection", + "message": info.message, + "provider": provider, + "error": info.message, } - except httpx.RequestError as e: + except Exception: # pragma: no cover — classifier must never break test return { "success": False, - "message": "Network error", - "provider": "openai", - "error": str(e), + "message": str(exc), + "provider": provider, + "error": str(exc), } -def _test_anthropic(api_key: Optional[str], timeout: float) -> Dict[str, Any]: - """Test Anthropic API connection.""" +def _resolve_test_model(provider: str, model: Optional[str], fallback: str) -> str: + """Use the user's model when provided; otherwise pull the default test + model from connection_test_models.json (auth-only validation).""" + if model: + return model + try: + from app.config import get_connection_test_model + configured = get_connection_test_model(provider) + if configured: + return configured + except Exception: + pass + return fallback + + +def _success(provider: str, model: Optional[str]) -> Dict[str, Any]: + detail = f" with model {model}" if model else "" + return { + "success": True, + "message": f"Successfully connected to {_DISPLAY.get(provider, provider)} API{detail}.", + "provider": provider, + } + + +_DISPLAY = { + "openai": "OpenAI", + "anthropic": "Anthropic", + "gemini": "Google Gemini", + "byteplus": "BytePlus", + "deepseek": "DeepSeek", + "moonshot": "Moonshot", + "minimax": "MiniMax", + "grok": "Grok (xAI)", + "openrouter": "OpenRouter", + "remote": "Ollama", +} + + +# ─── OpenAI / OpenAI-compat ─────────────────────────────────────────── + + +def _openai_compat_chat_test( + *, + provider: str, + api_key: Optional[str], + base_url: Optional[str], + model: str, + timeout: float, +) -> Dict[str, Any]: + """Hit /chat/completions with the user's model. The response tells us: + 200/400/422 → key + model OK + 401 → bad key + 404 → bad model + 402 → no credits (key valid) + 429 → rate limited (key valid) + For all failure shapes, we surface the classifier's rich message. + """ if not api_key: return { "success": False, - "message": "API key is required for Anthropic", - "provider": "anthropic", + "message": f"API key is required for {_DISPLAY.get(provider, provider)}", + "provider": provider, "error": "Missing API key", } - try: - # Use a minimal messages request to verify API key - # We send an invalid request that will fail fast but verify auth - from app.config import get_connection_test_model, get_connection_test_config - test_model = get_connection_test_model("anthropic") or "claude-haiku-4-5-20251001" - test_config = get_connection_test_config("anthropic") + from openai import OpenAI + client = OpenAI( + api_key=api_key, + base_url=base_url or None, + timeout=timeout, + max_retries=0, + ) + client.chat.completions.create( + model=model, + messages=[{"role": "user", "content": "hi"}], + max_tokens=1, + ) + return _success(provider, model) + except Exception as exc: + # 422 BadRequest with a "messages" issue still means auth+model worked. + # Classify, and if it's a BAD_REQUEST not about the model, treat as success. + from agent_core.core.impl.llm.errors import classify_llm_error, ErrorCategory + try: + info = classify_llm_error(exc, provider=provider, model=model) + if info.category in (ErrorCategory.AUTH, ErrorCategory.MODEL, ErrorCategory.CREDIT): + return { + "success": False, + "message": info.message, + "provider": provider, + "error": info.message, + } + # RATE_LIMIT, SERVER, BAD_REQUEST, etc. — auth+model are likely fine. + return _success(provider, model) + except Exception: + return _classified_error_result(exc, provider, model) + + +def _test_openai(api_key: Optional[str], timeout: float, model: Optional[str]) -> Dict[str, Any]: + if model: + return _openai_compat_chat_test( + provider="openai", api_key=api_key, base_url=None, model=model, timeout=timeout, + ) + # No model specified → just verify the key with /models list (cheaper). + if not api_key: + return {"success": False, "message": "API key is required for OpenAI", + "provider": "openai", "error": "Missing API key"} + try: with httpx.Client(timeout=timeout) as client: - response = client.post( - "https://api.anthropic.com/v1/messages", - headers={ - "x-api-key": api_key, - "anthropic-version": "2023-06-01", - "content-type": "application/json", - }, - json={ - "model": test_model, - "max_tokens": test_config.get("max_tokens", 1), - "messages": [{"role": "user", "content": "hi"}], - }, + response = client.get( + "https://api.openai.com/v1/models", + headers={"Authorization": f"Bearer {api_key}"}, ) + if response.status_code == 200: + return _success("openai", None) + response.raise_for_status() + return {"success": False, "message": f"API returned status {response.status_code}", + "provider": "openai", "error": response.text[:300]} + except Exception as exc: + return _classified_error_result(exc, "openai", None) + - # 200 means success (actual completion - shouldn't happen with max_tokens=1 but possible) - # 400 with specific error also indicates valid auth +def _test_openai_compat( + provider: str, api_key: Optional[str], base_url: str, timeout: float, model: Optional[str], +) -> Dict[str, Any]: + if model: + return _openai_compat_chat_test( + provider=provider, api_key=api_key, base_url=base_url, model=model, timeout=timeout, + ) + # No model → /models list (auth-only). + display = _DISPLAY.get(provider, provider) + if not api_key: + return {"success": False, "message": f"API key is required for {display}", + "provider": provider, "error": "Missing API key"} + try: + with httpx.Client(timeout=timeout) as client: + response = client.get( + f"{base_url.rstrip('/')}/models", + headers={"Authorization": f"Bearer {api_key}"}, + ) if response.status_code == 200: - return { - "success": True, - "message": "Successfully connected to Anthropic API", - "provider": "anthropic", - } - elif response.status_code == 401: - return { - "success": False, - "message": "Invalid API key", - "provider": "anthropic", - "error": "Authentication failed - check your API key", - } - elif response.status_code == 400: - # Bad request but auth succeeded - return { - "success": True, - "message": "Successfully connected to Anthropic API", - "provider": "anthropic", - } - elif response.status_code == 529: - # Overloaded but auth succeeded - return { - "success": True, - "message": "Connected to Anthropic API (service currently overloaded)", - "provider": "anthropic", - } - else: - return { - "success": False, - "message": f"API returned status {response.status_code}", - "provider": "anthropic", - "error": response.text[:200] if response.text else "Unknown error", - } - except httpx.TimeoutException: - return { - "success": False, - "message": "Connection timed out", - "provider": "anthropic", - "error": "Request timed out - check your network connection", - } - except httpx.RequestError as e: - return { - "success": False, - "message": "Network error", - "provider": "anthropic", - "error": str(e), - } + return _success(provider, None) + response.raise_for_status() + return {"success": False, "message": f"API returned status {response.status_code}", + "provider": provider, "error": response.text[:300]} + except Exception as exc: + return _classified_error_result(exc, provider, None) + +# ─── Anthropic ──────────────────────────────────────────────────────── -def _test_gemini(api_key: Optional[str], timeout: float) -> Dict[str, Any]: - """Test Google Gemini API connection.""" + +def _test_anthropic(api_key: Optional[str], timeout: float, model: Optional[str]) -> Dict[str, Any]: if not api_key: - return { - "success": False, - "message": "API key is required for Gemini", - "provider": "gemini", - "error": "Missing API key", - } + return {"success": False, "message": "API key is required for Anthropic", + "provider": "anthropic", "error": "Missing API key"} + test_model = _resolve_test_model("anthropic", model, fallback="claude-haiku-4-5-20251001") + + try: + from anthropic import Anthropic + client = Anthropic(api_key=api_key, timeout=timeout, max_retries=0) + client.messages.create( + model=test_model, + max_tokens=1, + messages=[{"role": "user", "content": "hi"}], + ) + return _success("anthropic", model) + except Exception as exc: + from agent_core.core.impl.llm.errors import classify_llm_error, ErrorCategory + try: + info = classify_llm_error(exc, provider="anthropic", model=test_model) + # Auth, missing model, or credit issues are real failures. + # 400 BadRequest about the prompt itself is fine (auth+model OK). + if info.category in (ErrorCategory.AUTH, ErrorCategory.MODEL, ErrorCategory.CREDIT): + return { + "success": False, + "message": info.message, + "provider": "anthropic", + "error": info.message, + } + return _success("anthropic", model) + except Exception: + return _classified_error_result(exc, "anthropic", model) + + +# ─── Gemini ──────────────────────────────────────────────────────────── + + +def _test_gemini(api_key: Optional[str], timeout: float, model: Optional[str]) -> Dict[str, Any]: + if not api_key: + return {"success": False, "message": "API key is required for Gemini", + "provider": "gemini", "error": "Missing API key"} + if model: + # Verify the specific model via models/{name}. + url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}?key={api_key}" + try: + with httpx.Client(timeout=timeout) as client: + response = client.get(url) + if response.status_code == 200: + return _success("gemini", model) + response.raise_for_status() + return {"success": False, "message": f"API returned status {response.status_code}", + "provider": "gemini", "error": response.text[:300]} + except Exception as exc: + return _classified_error_result(exc, "gemini", model) + # No model → list endpoint (auth-only). try: - # Use models list endpoint to verify API key with httpx.Client(timeout=timeout) as client: response = client.get( f"https://generativelanguage.googleapis.com/v1/models?key={api_key}", ) - if response.status_code == 200: - return { - "success": True, - "message": "Successfully connected to Google Gemini API", - "provider": "gemini", - } - elif response.status_code == 400 or response.status_code == 403: - return { - "success": False, - "message": "Invalid API key", - "provider": "gemini", - "error": "Authentication failed - check your API key", - } - else: - return { - "success": False, - "message": f"API returned status {response.status_code}", - "provider": "gemini", - "error": response.text[:200] if response.text else "Unknown error", - } - except httpx.TimeoutException: - return { - "success": False, - "message": "Connection timed out", - "provider": "gemini", - "error": "Request timed out - check your network connection", - } - except httpx.RequestError as e: - return { - "success": False, - "message": "Network error", - "provider": "gemini", - "error": str(e), - } + return _success("gemini", None) + response.raise_for_status() + return {"success": False, "message": f"API returned status {response.status_code}", + "provider": "gemini", "error": response.text[:300]} + except Exception as exc: + return _classified_error_result(exc, "gemini", None) + + +# ─── BytePlus ───────────────────────────────────────────────────────── def _test_byteplus( - api_key: Optional[str], base_url: Optional[str], timeout: float + api_key: Optional[str], base_url: Optional[str], timeout: float, model: Optional[str], ) -> Dict[str, Any]: - """Test BytePlus API connection.""" if not api_key: - return { - "success": False, - "message": "API key is required for BytePlus", - "provider": "byteplus", - "error": "Missing API key", - } - + return {"success": False, "message": "API key is required for BytePlus", + "provider": "byteplus", "error": "Missing API key"} url = base_url or "https://ark.ap-southeast.bytepluses.com/api/v3" - + if model: + # Verify via tiny chat completion. + try: + with httpx.Client(timeout=timeout) as client: + response = client.post( + f"{url.rstrip('/')}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={ + "model": model, + "messages": [{"role": "user", "content": "hi"}], + "max_tokens": 1, + }, + ) + if response.status_code in (200, 400, 422): + # 200 = both OK. 400/422 = auth+model OK, request quirk only. + return _success("byteplus", model) + response.raise_for_status() + return {"success": False, "message": f"API returned status {response.status_code}", + "provider": "byteplus", "error": response.text[:300]} + except Exception as exc: + return _classified_error_result(exc, "byteplus", model) + # No model → /models list. try: - # BytePlus uses OpenAI-compatible API, test with models endpoint with httpx.Client(timeout=timeout) as client: response = client.get( f"{url.rstrip('/')}/models", headers={"Authorization": f"Bearer {api_key}"}, ) - if response.status_code == 200: - return { - "success": True, - "message": "Successfully connected to BytePlus API", - "provider": "byteplus", - } - elif response.status_code == 401: - return { - "success": False, - "message": "Invalid API key", - "provider": "byteplus", - "error": "Authentication failed - check your API key", - } - else: - return { - "success": False, - "message": f"API returned status {response.status_code}", - "provider": "byteplus", - "error": response.text[:200] if response.text else "Unknown error", - } - except httpx.TimeoutException: - return { - "success": False, - "message": "Connection timed out", - "provider": "byteplus", - "error": "Request timed out - check your network connection", - } - except httpx.RequestError as e: - return { - "success": False, - "message": "Network error", - "provider": "byteplus", - "error": str(e), - } + return _success("byteplus", None) + response.raise_for_status() + return {"success": False, "message": f"API returned status {response.status_code}", + "provider": "byteplus", "error": response.text[:300]} + except Exception as exc: + return _classified_error_result(exc, "byteplus", None) + + +# ─── Remote (Ollama) ────────────────────────────────────────────────── def _test_remote(base_url: Optional[str], timeout: float) -> Dict[str, Any]: - """Test remote/Ollama connection (no API key required).""" + """No API key required; the UI already validates Ollama models via + the /api/tags dropdown, so this stays auth-equivalent.""" url = base_url or "http://localhost:11434" - try: - # Ollama uses /api/tags to list models with httpx.Client(timeout=timeout) as client: response = client.get(f"{url.rstrip('/')}/api/tags") - if response.status_code == 200: models = [m["name"] for m in response.json().get("models", [])] if models: message = f"Connected! {len(models)} model(s) available: {', '.join(models)}" else: message = "Connected to Ollama, but no models downloaded yet. Use '+ Download New Model' to get one." - return { - "success": True, - "message": message, - "provider": "remote", - "models": models, - } - else: - return { - "success": False, - "message": f"Ollama returned status {response.status_code}", - "provider": "remote", - "error": response.text[:200] if response.text else "Unknown error", - } - except httpx.TimeoutException: - return { - "success": False, - "message": "Connection timed out", - "provider": "remote", - "error": f"Could not connect to Ollama at {url}. Is it running?", - } - except httpx.RequestError as e: - return { - "success": False, - "message": "Network error", - "provider": "remote", - "error": f"Could not connect to {url}: {str(e)}", - } - - -def _test_openai_compat( - provider: str, api_key: Optional[str], base_url: str, timeout: float -) -> Dict[str, Any]: - """Test an OpenAI-compatible API (MiniMax, DeepSeek, Moonshot).""" - names = {"minimax": "MiniMax", "deepseek": "DeepSeek", "moonshot": "Moonshot", "grok": "Grok (xAI)"} - display = names.get(provider, provider) - - if not api_key: - return { - "success": False, - "message": f"API key is required for {display}", - "provider": provider, - "error": "Missing API key", - } + return {"success": True, "message": message, "provider": "remote", "models": models} + return {"success": False, "message": f"Ollama returned status {response.status_code}", + "provider": "remote", "error": response.text[:200] if response.text else "Unknown error"} + except Exception as exc: + return _classified_error_result(exc, "remote", None) - try: - with httpx.Client(timeout=timeout) as client: - response = client.get( - f"{base_url.rstrip('/')}/models", - headers={"Authorization": f"Bearer {api_key}"}, - ) - if response.status_code == 200: - return {"success": True, "message": f"Successfully connected to {display} API", "provider": provider} - elif response.status_code in (401, 403): - return {"success": False, "message": "Invalid API key", "provider": provider, "error": f"Authentication failed (HTTP {response.status_code}) - check your API key"} - else: - return {"success": False, "message": f"API returned status {response.status_code}", "provider": provider, "error": response.text[:300] if response.text else "Unknown error"} - except httpx.TimeoutException: - return {"success": False, "message": "Connection timed out", "provider": provider, "error": "Request timed out - check your network connection"} - except httpx.RequestError as e: - return {"success": False, "message": "Network error", "provider": provider, "error": str(e)} +# ─── OpenRouter ─────────────────────────────────────────────────────── def _test_openrouter( - api_key: Optional[str], base_url: str, timeout: float + api_key: Optional[str], base_url: str, timeout: float, model: Optional[str], ) -> Dict[str, Any]: - """Test OpenRouter API connection. - - Uses /api/v1/auth/key (auth-required) so we both validate the key and - surface the user's credit balance in the success message — that's the - information OpenRouter users care about most. - """ if not api_key: - return { - "success": False, - "message": "API key is required for OpenRouter", - "provider": "openrouter", - "error": "Missing API key", - } - + return {"success": False, "message": "API key is required for OpenRouter", + "provider": "openrouter", "error": "Missing API key"} + if model: + # Verify auth + model + credits via tiny chat completion. OR returns + # 401 (bad key), 402 (no credits), 404 (bad model slug), or 200/4xx + # depending on upstream. Classifier handles them all. + return _openai_compat_chat_test( + provider="openrouter", api_key=api_key, base_url=base_url, model=model, timeout=timeout, + ) + # No model → /auth/key (auth + balance only). try: with httpx.Client(timeout=timeout) as client: response = client.get( f"{base_url.rstrip('/')}/auth/key", headers={"Authorization": f"Bearer {api_key}"}, ) - if response.status_code == 200: data = response.json().get("data", {}) or {} limit = data.get("limit") @@ -435,59 +436,29 @@ def _test_openrouter( msg = f"Connected to OpenRouter ({label}) — unlimited credits" else: remaining = max(0.0, float(limit) - float(usage or 0.0)) - msg = ( - f"Connected to OpenRouter ({label}) — " - f"${remaining:.2f} of ${float(limit):.2f} remaining" - ) - return { - "success": True, - "message": msg, - "provider": "openrouter", - } - elif response.status_code in (401, 403): - return { - "success": False, - "message": "Invalid API key", - "provider": "openrouter", - "error": "Authentication failed - check your OpenRouter API key", - } - else: - return { - "success": False, - "message": f"API returned status {response.status_code}", - "provider": "openrouter", - "error": response.text[:300] if response.text else "Unknown error", - } - except httpx.TimeoutException: - return { - "success": False, - "message": "Connection timed out", - "provider": "openrouter", - "error": "Request timed out - check your network connection", - } - except httpx.RequestError as e: - return { - "success": False, - "message": "Network error", - "provider": "openrouter", - "error": str(e), - } + msg = (f"Connected to OpenRouter ({label}) — " + f"${remaining:.2f} of ${float(limit):.2f} remaining") + return {"success": True, "message": msg, "provider": "openrouter"} + if response.status_code in (401, 403): + return {"success": False, "message": "Invalid API key", + "provider": "openrouter", + "error": "Authentication failed - check your OpenRouter API key"} + return {"success": False, "message": f"API returned status {response.status_code}", + "provider": "openrouter", "error": response.text[:300]} + except Exception as exc: + return _classified_error_result(exc, "openrouter", None) -def _test_grok(api_key: Optional[str], base_url: str, timeout: float) -> Dict[str, Any]: - """Test xAI Grok API connection using a minimal chat completion request. +# ─── Grok ───────────────────────────────────────────────────────────── - xAI returns 403 on the /models endpoint even for valid keys, so we use - a minimal chat completions call instead. - """ - if not api_key: - return { - "success": False, - "message": "API key is required for Grok (xAI)", - "provider": "grok", - "error": "Missing API key", - } +def _test_grok( + api_key: Optional[str], base_url: str, timeout: float, model: Optional[str], +) -> Dict[str, Any]: + if not api_key: + return {"success": False, "message": "API key is required for Grok (xAI)", + "provider": "grok", "error": "Missing API key"} + test_model = _resolve_test_model("grok", model, fallback="grok-3") try: with httpx.Client(timeout=timeout) as client: response = client.post( @@ -497,22 +468,18 @@ def _test_grok(api_key: Optional[str], base_url: str, timeout: float) -> Dict[st "Content-Type": "application/json", }, json={ - "model": "grok-3", + "model": test_model, "max_tokens": 1, "messages": [{"role": "user", "content": "hi"}], }, ) - - if response.status_code in (200, 400, 403, 422): - # 200 = success - # 400/422 = bad request but auth passed - # 403 = model tier restriction but key is valid - return {"success": True, "message": "Successfully connected to Grok (xAI) API", "provider": "grok"} - elif response.status_code == 401: - return {"success": False, "message": "Invalid API key", "provider": "grok", "error": "Authentication failed - check your xAI API key"} - else: - return {"success": False, "message": f"API returned status {response.status_code}", "provider": "grok", "error": response.text[:300] if response.text else "Unknown error"} - except httpx.TimeoutException: - return {"success": False, "message": "Connection timed out", "provider": "grok", "error": "Request timed out - check your network connection"} - except httpx.RequestError as e: - return {"success": False, "message": "Network error", "provider": "grok", "error": str(e)} + if response.status_code == 200: + return _success("grok", model) + if response.status_code in (400, 422) and model is None: + # Hardcoded test model probably hit a tier restriction; auth still OK. + return _success("grok", None) + response.raise_for_status() + return {"success": False, "message": f"API returned status {response.status_code}", + "provider": "grok", "error": response.text[:300]} + except Exception as exc: + return _classified_error_result(exc, "grok", model) diff --git a/app/agent_base.py b/app/agent_base.py index 3ffbfe4f..28500d6a 100644 --- a/app/agent_base.py +++ b/app/agent_base.py @@ -50,7 +50,11 @@ from app.internal_action_interface import InternalActionInterface from app.llm import LLMInterface, LLMCallType -from agent_core.core.impl.llm.errors import classify_llm_error, LLMConsecutiveFailureError +from agent_core.core.impl.llm.errors import ( + classify_llm_error, + classify_llm_error_message, + LLMConsecutiveFailureError, +) from app.vlm_interface import VLMInterface from app.database_interface import DatabaseInterface from app.logger import logger @@ -1297,20 +1301,43 @@ async def _handle_react_error( if not session_to_use or not self.event_stream_manager: return - # Get user-friendly error message - user_message = classify_llm_error(error) - - # Fatal LLM errors must not re-queue the task - that causes infinite retry loops - # Walk the full exception chain (__cause__, __context__) to detect wrapped errors + # Walk the exception chain (__cause__, __context__) to detect the + # fatal-LLM case. We need the LLMConsecutiveFailureError to surface + # the *cause* of the 5 failures (e.g. "rate-limited on Google AI + # Studio"), not the meta-message about retry counts. is_fatal_llm_error = False + fatal_exc: LLMConsecutiveFailureError | None = None + seen: set[int] = set() exc: BaseException | None = error - while exc is not None: + while exc is not None and id(exc) not in seen: + seen.add(id(exc)) if isinstance(exc, LLMConsecutiveFailureError): is_fatal_llm_error = True + fatal_exc = exc break - exc = exc.__cause__ or exc.__context__ - if exc is error: # prevent infinite loop on circular chains + cause = exc.__cause__ or exc.__context__ + if cause is None or cause is exc: break + exc = cause + + # Compose the user-facing message. For the fatal case we lead with + # the cause (already a rich detailed string from the classifier) + # and prefix the abort context. For non-fatal cases the RuntimeError + # we receive was already constructed from `info.message` upstream + # in interface.py, so str(error) IS the rich text — classify is a + # no-op fallthrough that returns the same string back. + if is_fatal_llm_error and fatal_exc is not None and fatal_exc.last_error_info is not None: + cause_msg = fatal_exc.last_error_info.message + user_message = f"Aborted after {fatal_exc.failure_count} consecutive failures. {cause_msg}" + elif is_fatal_llm_error and fatal_exc is not None: + # Old code path that didn't attach last_error_info — fall back + # to the wrapper's str(). Better than empty. + user_message = str(fatal_exc) + else: + try: + user_message = classify_llm_error_message(error) + except Exception: + user_message = str(error) or "AI service error" try: logger.debug("[REACT ERROR] Logging to event stream") diff --git a/app/ui_layer/adapters/browser_adapter.py b/app/ui_layer/adapters/browser_adapter.py index a2dbc872..05bf3dea 100644 --- a/app/ui_layer/adapters/browser_adapter.py +++ b/app/ui_layer/adapters/browser_adapter.py @@ -1458,7 +1458,8 @@ async def _handle_ws_message(self, data: Dict[str, Any], ws=None) -> None: provider = data.get("provider", "") api_key = data.get("apiKey") base_url = data.get("baseUrl") - await self._handle_model_connection_test(provider, api_key, base_url) + model = data.get("model") + await self._handle_model_connection_test(provider, api_key, base_url, model) elif msg_type == "model_validate_save": await self._handle_model_validate_save(data) @@ -3673,6 +3674,7 @@ async def _handle_model_connection_test( provider: str, api_key: Optional[str] = None, base_url: Optional[str] = None, + model: Optional[str] = None, ) -> None: """Test connection to a model provider.""" try: @@ -3680,6 +3682,7 @@ async def _handle_model_connection_test( provider=provider, api_key=api_key, base_url=base_url, + model=model, ) await self._broadcast({ "type": "model_connection_test", diff --git a/app/ui_layer/browser/frontend/src/components/Chat/Chat.module.css b/app/ui_layer/browser/frontend/src/components/Chat/Chat.module.css index b59cebe0..09c04973 100644 --- a/app/ui_layer/browser/frontend/src/components/Chat/Chat.module.css +++ b/app/ui_layer/browser/frontend/src/components/Chat/Chat.module.css @@ -7,6 +7,16 @@ min-width: 0; } +/* Wraps the scrolling list so the scroll-to-bottom button can sit absolutely + over the chat without scrolling along with the messages. */ +.messagesArea { + position: relative; + flex: 1; + display: flex; + flex-direction: column; + min-height: 0; +} + .messagesContainer { flex: 1; overflow-y: auto; @@ -16,6 +26,72 @@ gap: var(--space-3); } +/* Slack-style date divider: a thin rule with a centered pill label */ +.dateDivider { + display: flex; + align-items: center; + gap: var(--space-3); + padding: var(--space-2) 0 var(--space-3); + user-select: none; +} + +.dateDividerLine { + flex: 1; + height: 1px; + background: var(--border-primary); +} + +.dateDividerLabel { + flex-shrink: 0; + padding: 2px 12px; + background: var(--bg-primary); + border: 1px solid var(--border-primary); + border-radius: 999px; + font-size: var(--text-xs); + font-weight: var(--font-semibold); + color: var(--text-secondary); + letter-spacing: 0.01em; +} + +/* Floating scroll-to-bottom affordance. Appears when the user has scrolled + away from the latest message; click to jump back to the bottom. */ +.scrollToBottomBtn { + position: absolute; + right: var(--space-4); + bottom: var(--space-3); + display: flex; + align-items: center; + justify-content: center; + width: 34px; + height: 34px; + background: var(--bg-secondary); + border: 1px solid var(--border-primary); + border-radius: 999px; + color: var(--text-secondary); + cursor: pointer; + opacity: 0.85; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.12); + transition: opacity var(--transition-fast), background var(--transition-fast), + color var(--transition-fast), transform var(--transition-fast); + z-index: 5; + animation: scrollBtnFadeIn 120ms ease-out; +} + +.scrollToBottomBtn:hover { + opacity: 1; + background: var(--bg-tertiary); + color: var(--text-primary); +} + +.scrollToBottomBtn:active { + transform: translateY(1px); +} + +@keyframes scrollBtnFadeIn { + from { opacity: 0; transform: translateY(4px); } + to { opacity: 0.85; transform: translateY(0); } +} + .emptyState { flex: 1; display: flex; diff --git a/app/ui_layer/browser/frontend/src/components/Chat/Chat.tsx b/app/ui_layer/browser/frontend/src/components/Chat/Chat.tsx index 51fc0480..778ffb92 100644 --- a/app/ui_layer/browser/frontend/src/components/Chat/Chat.tsx +++ b/app/ui_layer/browser/frontend/src/components/Chat/Chat.tsx @@ -1,6 +1,6 @@ import React, { useState, useRef, useEffect, useLayoutEffect, KeyboardEvent, useCallback, ChangeEvent, useMemo } from 'react' import ReactDOM from 'react-dom' -import { Send, Paperclip, X, Loader2, File, AlertCircle, Reply, Mic, MicOff } from 'lucide-react' +import { Send, Paperclip, X, Loader2, File, AlertCircle, Reply, Mic, MicOff, ChevronDown } from 'lucide-react' import { useVirtualizer } from '@tanstack/react-virtual' import { useWebSocket } from '../../contexts/WebSocketContext' import { useToast } from '../../contexts/ToastContext' @@ -54,6 +54,41 @@ const formatFileSize = (bytes: number): string => { return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i] } +// Stable per-day key (local time) for grouping consecutive messages by date. +const getDateKey = (timestamp: number): string => { + const d = new Date(timestamp * 1000) + return `${d.getFullYear()}-${d.getMonth()}-${d.getDate()}` +} + +// Slack-style date divider label: "Today", "Yesterday", weekday for the +// last week, otherwise a full localized date. +const formatDateDivider = (timestamp: number): string => { + const date = new Date(timestamp * 1000) + const now = new Date() + const sameDay = (a: Date, b: Date) => + a.getFullYear() === b.getFullYear() && + a.getMonth() === b.getMonth() && + a.getDate() === b.getDate() + + if (sameDay(date, now)) return 'Today' + const yesterday = new Date(now) + yesterday.setDate(yesterday.getDate() - 1) + if (sameDay(date, yesterday)) return 'Yesterday' + + const msPerDay = 1000 * 60 * 60 * 24 + const startOfToday = new Date(now.getFullYear(), now.getMonth(), now.getDate()) + const startOfDate = new Date(date.getFullYear(), date.getMonth(), date.getDate()) + const daysDiff = Math.round((startOfToday.getTime() - startOfDate.getTime()) / msPerDay) + + if (daysDiff > 0 && daysDiff < 7) { + return date.toLocaleDateString(undefined, { weekday: 'long', month: 'long', day: 'numeric' }) + } + if (date.getFullYear() === now.getFullYear()) { + return date.toLocaleDateString(undefined, { weekday: 'long', month: 'long', day: 'numeric' }) + } + return date.toLocaleDateString(undefined, { year: 'numeric', month: 'long', day: 'numeric' }) +} + export function Chat({ livingUIId, placeholder, emptyMessage }: ChatProps) { const { messages, @@ -112,6 +147,7 @@ export function Chat({ livingUIId, placeholder, emptyMessage }: ChatProps) { const wasNearBottomRef = useRef(true) const prevMessageCountRef = useRef(0) const hasInitialScrolled = useRef(false) + const [showScrollToBottom, setShowScrollToBottom] = useState(false) const attachmentValidation = useMemo(() => { const totalSize = pendingAttachments.reduce((sum, att) => sum + att.size, 0) @@ -140,12 +176,6 @@ export function Chat({ livingUIId, placeholder, emptyMessage }: ChatProps) { return lastSeenIdx + 1 }, [orderedMessages, lastSeenMessageId]) - const isNearBottom = useCallback(() => { - const container = parentRef.current - if (!container) return true - return container.scrollHeight - container.scrollTop - container.clientHeight < 100 - }, []) - // Close language dropdown when clicking outside useEffect(() => { if (!langOpen) return @@ -171,14 +201,21 @@ export function Chat({ livingUIId, placeholder, emptyMessage }: ChatProps) { const container = parentRef.current if (!container) return const handleScroll = () => { - wasNearBottomRef.current = isNearBottom() + const distFromBottom = container.scrollHeight - container.scrollTop - container.clientHeight + wasNearBottomRef.current = distFromBottom < 100 + setShowScrollToBottom(distFromBottom > 100) if (container.scrollTop < 100 && hasMoreMessages && !loadingOlderMessages) { loadOlderMessages() } } container.addEventListener('scroll', handleScroll) return () => container.removeEventListener('scroll', handleScroll) - }, [isNearBottom, hasMoreMessages, loadingOlderMessages, loadOlderMessages]) + }, [hasMoreMessages, loadingOlderMessages, loadOlderMessages]) + + const scrollToBottom = useCallback(() => { + if (orderedMessages.length === 0) return + virtualizer.scrollToIndex(orderedMessages.length - 1, { align: 'end', behavior: 'smooth' }) + }, [virtualizer, orderedMessages.length]) // Scroll to unread on mount, auto-scroll on new messages if near bottom useEffect(() => { @@ -473,64 +510,86 @@ export function Chat({ livingUIId, placeholder, emptyMessage }: ChatProps) { return (
-
- {orderedMessages.length === 0 ? ( -
-
- - - - -
-

{emptyMessage || 'Start a conversation'}

-

{livingUIId ? 'Ask the agent about this UI' : 'Send a message to begin interacting with CraftBot'}

-
- ) : ( -
- {loadingOlderMessages && ( -
- Loading older messages... +
+
+ {orderedMessages.length === 0 ? ( +
+
+ + + +
- )} - {virtualizer.getVirtualItems().map((virtualItem) => { - const message = orderedMessages[virtualItem.index] - // Prefer clientId as the React key so that when a pending optimistic - // message is reconciled with the server echo (messageId changes from - // `pending:` to the real id), React reuses the same DOM node — - // letting the CSS transform transition animate the slide into - // its server-canonical sorted position. - const rowKey = message.clientId || message.messageId || virtualItem.index - return ( -
- +

{emptyMessage || 'Start a conversation'}

+

{livingUIId ? 'Ask the agent about this UI' : 'Send a message to begin interacting with CraftBot'}

+
+ ) : ( +
+ {loadingOlderMessages && ( +
+ Loading older messages...
- ) - })} -
+ )} + {virtualizer.getVirtualItems().map((virtualItem) => { + const message = orderedMessages[virtualItem.index] + const prev = virtualItem.index > 0 ? orderedMessages[virtualItem.index - 1] : null + const showDateDivider = !prev || getDateKey(prev.timestamp) !== getDateKey(message.timestamp) + // Prefer clientId as the React key so that when a pending optimistic + // message is reconciled with the server echo (messageId changes from + // `pending:` to the real id), React reuses the same DOM node — + // letting the CSS transform transition animate the slide into + // its server-canonical sorted position. + const rowKey = message.clientId || message.messageId || virtualItem.index + return ( +
+ {showDateDivider && ( +
+ + {formatDateDivider(message.timestamp)} + +
+ )} + +
+ ) + })} +
+ )} +
+ {showScrollToBottom && orderedMessages.length > 0 && ( + )}
diff --git a/app/ui_layer/browser/frontend/src/pages/Chat/ChatMessage.tsx b/app/ui_layer/browser/frontend/src/pages/Chat/ChatMessage.tsx index d8557a0c..44b06aca 100644 --- a/app/ui_layer/browser/frontend/src/pages/Chat/ChatMessage.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Chat/ChatMessage.tsx @@ -81,7 +81,7 @@ export const ChatMessageItem = memo(function ChatMessageItem({
{message.sender} - {new Date(message.timestamp * 1000).toLocaleTimeString()} + {new Date(message.timestamp * 1000).toLocaleTimeString(undefined, { hour: 'numeric', minute: '2-digit' })}
{/* Reply context callout - shown above user message when replying */} diff --git a/app/ui_layer/browser/frontend/src/pages/Settings/ModelSettings.tsx b/app/ui_layer/browser/frontend/src/pages/Settings/ModelSettings.tsx index c74e41c3..ccd5d729 100644 --- a/app/ui_layer/browser/frontend/src/pages/Settings/ModelSettings.tsx +++ b/app/ui_layer/browser/frontend/src/pages/Settings/ModelSettings.tsx @@ -363,10 +363,13 @@ export function ModelSettings() { const handleTestConnection = () => { setIsTesting(true) + // Send the user's actual model so the test exercises it; otherwise a + // typo passes the test (auth-only) and only fails at first real call. send('model_connection_test', { provider, apiKey: newApiKey || undefined, baseUrl: newBaseUrl || baseUrls[provider], + model: newLlmModel || currentLlmModel || undefined, }) } @@ -381,6 +384,7 @@ export function ModelSettings() { provider, apiKey: newApiKey || undefined, baseUrl: newBaseUrl || baseUrls[provider], + model: newLlmModel || currentLlmModel || undefined, }) } else { setIsSaving(true) diff --git a/app/ui_layer/settings/model_settings.py b/app/ui_layer/settings/model_settings.py index 6554b4bb..4b87e48c 100644 --- a/app/ui_layer/settings/model_settings.py +++ b/app/ui_layer/settings/model_settings.py @@ -372,6 +372,7 @@ def test_connection( provider: str, api_key: Optional[str] = None, base_url: Optional[str] = None, + model: Optional[str] = None, ) -> Dict[str, Any]: """Test connection to a provider. @@ -379,6 +380,10 @@ def test_connection( provider: Provider to test api_key: Optional API key to test with (if not provided, uses stored key) base_url: Optional base URL for byteplus/remote providers + model: Optional model id to verify. When provided the tester does a + tiny chat completion against this exact model so a typo in the + model id is caught at test time, not at first real call. When + omitted, falls back to a known-good test model (auth check only). Returns: Dict with test results @@ -410,6 +415,7 @@ def test_connection( provider=provider, api_key=api_key, base_url=base_url, + model=model, ) return result From a49846d84fcf69e47a992d0f8b8b4e680f5e5701 Mon Sep 17 00:00:00 2001 From: zfoong Date: Fri, 8 May 2026 15:19:28 +0900 Subject: [PATCH 3/3] Fix chat panel scroll to botton button display logic --- app/agent_base.py | 24 +++++++++++++-- .../frontend/src/components/Chat/Chat.tsx | 30 +++++++++++++++---- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/app/agent_base.py b/app/agent_base.py index 28500d6a..e05d18a7 100644 --- a/app/agent_base.py +++ b/app/agent_base.py @@ -1341,10 +1341,30 @@ async def _handle_react_error( try: logger.debug("[REACT ERROR] Logging to event stream") + # Only fatal errors surface as a red chat bubble. Non-fatal cases + # (single parse failure, transient API hiccup, etc.) are still + # recorded into the event stream so the LLM sees the failure in + # its next-attempt context — but we use a non-error kind so the + # transformer does NOT emit a chat-visible ERROR_MESSAGE. The + # agent retries automatically via _create_new_trigger below, and + # the user shouldn't see a scary error bubble for something that + # is being silently recovered. If retries pile up past the + # consecutive-failure threshold, the fatal branch above kicks in + # and the rich classified message is surfaced then. + # + # NOTE: We must change the *kind* rather than just unsetting + # display_message — when kind is in ERROR_KINDS the transformer + # falls back to event.message (the full traceback) for the chat + # bubble, which would be even worse. Using kind="warning" follows + # the existing convention (see the limit-reached events earlier + # in this file) and the LLM still understands the entry from the + # message text. + log_kind = "error" if is_fatal_llm_error else "warning" + log_display_message = user_message if is_fatal_llm_error else None self.event_stream_manager.log( - "error", + log_kind, f"[REACT] {type(error).__name__}: {error}\n{tb}", - display_message=user_message, + display_message=log_display_message, task_id=session_to_use, ) self.state_manager.bump_event_stream() diff --git a/app/ui_layer/browser/frontend/src/components/Chat/Chat.tsx b/app/ui_layer/browser/frontend/src/components/Chat/Chat.tsx index 778ffb92..e07b40ef 100644 --- a/app/ui_layer/browser/frontend/src/components/Chat/Chat.tsx +++ b/app/ui_layer/browser/frontend/src/components/Chat/Chat.tsx @@ -147,6 +147,7 @@ export function Chat({ livingUIId, placeholder, emptyMessage }: ChatProps) { const wasNearBottomRef = useRef(true) const prevMessageCountRef = useRef(0) const hasInitialScrolled = useRef(false) + const prevScrollTopRef = useRef(0) const [showScrollToBottom, setShowScrollToBottom] = useState(false) const attachmentValidation = useMemo(() => { @@ -196,15 +197,33 @@ export function Chat({ livingUIId, placeholder, emptyMessage }: ChatProps) { return () => document.removeEventListener('keydown', handler) }, [previewAttachment]) - // Track scroll position + load older messages on scroll-to-top + // Track scroll position + direction, and load older messages on scroll-to-top. + // The scroll-to-bottom button surfaces when the user is scrolling *toward* + // the bottom but hasn't arrived yet — scrolling up to read history hides it. useEffect(() => { const container = parentRef.current if (!container) return + prevScrollTopRef.current = container.scrollTop const handleScroll = () => { - const distFromBottom = container.scrollHeight - container.scrollTop - container.clientHeight - wasNearBottomRef.current = distFromBottom < 100 - setShowScrollToBottom(distFromBottom > 100) - if (container.scrollTop < 100 && hasMoreMessages && !loadingOlderMessages) { + const scrollTop = container.scrollTop + const distFromBottom = container.scrollHeight - scrollTop - container.clientHeight + const nearBottom = distFromBottom < 100 + wasNearBottomRef.current = nearBottom + + const delta = scrollTop - prevScrollTopRef.current + prevScrollTopRef.current = scrollTop + + if (nearBottom) { + setShowScrollToBottom(false) + } else if (delta > 0) { + // Scrolling down (toward latest) — offer a quick jump. + setShowScrollToBottom(true) + } else if (delta < 0) { + // Scrolling up (reading history) — get out of the way. + setShowScrollToBottom(false) + } + + if (scrollTop < 100 && hasMoreMessages && !loadingOlderMessages) { loadOlderMessages() } } @@ -215,6 +234,7 @@ export function Chat({ livingUIId, placeholder, emptyMessage }: ChatProps) { const scrollToBottom = useCallback(() => { if (orderedMessages.length === 0) return virtualizer.scrollToIndex(orderedMessages.length - 1, { align: 'end', behavior: 'smooth' }) + setShowScrollToBottom(false) }, [virtualizer, orderedMessages.length]) // Scroll to unread on mount, auto-scroll on new messages if near bottom