From 9042c1134f00fb2508f44c14236f987fcebed883 Mon Sep 17 00:00:00 2001 From: lonrencn <158190203+lonrencn@users.noreply.github.com> Date: Sun, 28 Jun 2026 16:52:22 +0800 Subject: [PATCH] fix: custom VLM API compatibility with vLLM reasoning models Three issues fixed for OpenAI-compatible custom VLM API: 1. Empty response from reasoning models (vLLM + Qwen3.6): - vLLM with --reasoning-parser returns content=null, actual text in 'reasoning' key. Added fallback to check reasoning_content/reasoning. - Reasoning models waste all tokens on thinking. Added chat_template_kwargs:{enable_thinking:false} to request payload. 2. VLM chat panel sends wrong version to backend: - readSelectedVlmVersion() reads Gradio Dropdown display label (model name) instead of internal value ('Custom') when component has no native input element. - Added heuristic: if version string matches custom model name, treat as Custom mode. 3. VLM selection resets to local model on page refresh: - Custom was excluded from admin default persistence, causing init_nav_bars to override the restored selection on page load. - Removed the exclusion so Custom persists as admin default. --- enhanced/vlm.py | 13 +++++++++++-- javascript/describe_vlm_chat.js | 2 ++ webui.py | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/enhanced/vlm.py b/enhanced/vlm.py index f945d588..d31e8044 100644 --- a/enhanced/vlm.py +++ b/enhanced/vlm.py @@ -251,7 +251,7 @@ def _extract_openai_compatible_text(response): return "" message = choices[0].get("message") if isinstance(choices[0], dict) else {} content = message.get("content") if isinstance(message, dict) else "" - if isinstance(content, str): + if isinstance(content, str) and content.strip(): return content if isinstance(content, list): parts = [] @@ -262,7 +262,15 @@ def _extract_openai_compatible_text(response): parts.append(str(item.get("text") or "")) elif isinstance(item.get("content"), str): parts.append(item.get("content")) - return "\n".join([part for part in parts if part]) + text = "\n".join([part for part in parts if part]) + if text.strip(): + return text + reasoning = message.get("reasoning_content") if isinstance(message, dict) else "" + if isinstance(reasoning, str) and reasoning.strip() and reasoning.strip() != "None": + return reasoning + reasoning = message.get("reasoning") if isinstance(message, dict) else "" + if isinstance(reasoning, str) and reasoning.strip() and reasoning.strip() != "None": + return reasoning return str(content or "") @@ -756,6 +764,7 @@ def inference_custom(self, image, prompt, max_tokens=2048, temperature=0.7, top_ "top_p": float(top_p), "max_tokens": int(max_tokens), "stream": False, + "chat_template_kwargs": {"enable_thinking": False}, } try: seed_value = int(seed) diff --git a/javascript/describe_vlm_chat.js b/javascript/describe_vlm_chat.js index ec14220e..135948e0 100644 --- a/javascript/describe_vlm_chat.js +++ b/javascript/describe_vlm_chat.js @@ -611,6 +611,8 @@ const version = cleanVlmVersion(raw); const customPanel = componentHost('describe_vlm_custom_panel'); if (version === 'Custom' || isVisible(customPanel)) return 'Custom'; + const customModel = String(readComponentValue('describe_vlm_custom_model') || '').trim(); + if (customModel && version === customModel) return 'Custom'; return version; } diff --git a/webui.py b/webui.py index c2a284c4..82318942 100644 --- a/webui.py +++ b/webui.py @@ -360,7 +360,7 @@ def _main_vlm_save_admin_version(version, state, request=None): def _main_vlm_save_selected_version(version, state, persist_admin=False, request=None): version = _vlm_resolve_version(version) _main_vlm_write_local_settings({"version": version}) - if persist_admin and version != VLM.CUSTOM_VERSION: + if persist_admin: _main_vlm_save_admin_version(version, state, request=request) return version