From 9042c1134f00fb2508f44c14236f987fcebed883 Mon Sep 17 00:00:00 2001
From: lonrencn <158190203+lonrencn@users.noreply.github.com>
Date: Sun, 28 Jun 2026 16:52:22 +0800
Subject: [PATCH] fix: custom VLM API compatibility with vLLM reasoning models

Three issues fixed for OpenAI-compatible custom VLM API:

1. Empty response from reasoning models (vLLM + Qwen3.6):
   - vLLM with --reasoning-parser returns content=null, actual text in
     'reasoning' key. Added fallback to check reasoning_content/reasoning.
   - Reasoning models waste all tokens on thinking. Added
     chat_template_kwargs:{enable_thinking:false} to request payload.

2. VLM chat panel sends wrong version to backend:
   - readSelectedVlmVersion() reads Gradio Dropdown display label
     (model name) instead of internal value ('Custom') when component
     has no native input element.
   - Added heuristic: if version string matches custom model name,
     treat as Custom mode.

3. VLM selection resets to local model on page refresh:
   - Custom was excluded from admin default persistence, causing
     init_nav_bars to override the restored selection on page load.
   - Removed the exclusion so Custom persists as admin default.
---
 enhanced/vlm.py                 | 13 +++++++++++--
 javascript/describe_vlm_chat.js |  2 ++
 webui.py                        |  2 +-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/enhanced/vlm.py b/enhanced/vlm.py
index f945d588..d31e8044 100644
--- a/enhanced/vlm.py
+++ b/enhanced/vlm.py
@@ -251,7 +251,7 @@ def _extract_openai_compatible_text(response):
         return ""
     message = choices[0].get("message") if isinstance(choices[0], dict) else {}
     content = message.get("content") if isinstance(message, dict) else ""
-    if isinstance(content, str):
+    if isinstance(content, str) and content.strip():
         return content
     if isinstance(content, list):
         parts = []
@@ -262,7 +262,15 @@ def _extract_openai_compatible_text(response):
                 parts.append(str(item.get("text") or ""))
             elif isinstance(item.get("content"), str):
                 parts.append(item.get("content"))
-        return "\n".join([part for part in parts if part])
+        text = "\n".join([part for part in parts if part])
+        if text.strip():
+            return text
+    reasoning = message.get("reasoning_content") if isinstance(message, dict) else ""
+    if isinstance(reasoning, str) and reasoning.strip() and reasoning.strip() != "None":
+        return reasoning
+    reasoning = message.get("reasoning") if isinstance(message, dict) else ""
+    if isinstance(reasoning, str) and reasoning.strip() and reasoning.strip() != "None":
+        return reasoning
     return str(content or "")
 
 
@@ -756,6 +764,7 @@ def inference_custom(self, image, prompt, max_tokens=2048, temperature=0.7, top_
             "top_p": float(top_p),
             "max_tokens": int(max_tokens),
             "stream": False,
+            "chat_template_kwargs": {"enable_thinking": False},
         }
         try:
             seed_value = int(seed)
diff --git a/javascript/describe_vlm_chat.js b/javascript/describe_vlm_chat.js
index ec14220e..135948e0 100644
--- a/javascript/describe_vlm_chat.js
+++ b/javascript/describe_vlm_chat.js
@@ -611,6 +611,8 @@
         const version = cleanVlmVersion(raw);
         const customPanel = componentHost('describe_vlm_custom_panel');
         if (version === 'Custom' || isVisible(customPanel)) return 'Custom';
+        const customModel = String(readComponentValue('describe_vlm_custom_model') || '').trim();
+        if (customModel && version === customModel) return 'Custom';
         return version;
     }
 
diff --git a/webui.py b/webui.py
index c2a284c4..82318942 100644
--- a/webui.py
+++ b/webui.py
@@ -360,7 +360,7 @@ def _main_vlm_save_admin_version(version, state, request=None):
 def _main_vlm_save_selected_version(version, state, persist_admin=False, request=None):
     version = _vlm_resolve_version(version)
     _main_vlm_write_local_settings({"version": version})
-    if persist_admin and version != VLM.CUSTOM_VERSION:
+    if persist_admin:
         _main_vlm_save_admin_version(version, state, request=request)
     return version