From 22bb34783cc69c4f1ff51235bc486907a520e163 Mon Sep 17 00:00:00 2001 From: rjckkkkk <59609580+rjckkkkk@users.noreply.github.com> Date: Thu, 11 Jun 2026 07:28:20 +0000 Subject: [PATCH] Add Qwen2.5-VL-3B-Instruct model knowledge (vlm + verified Strix Halo perf) Without a catalog entry the scanned GGUF (Qwen2.5-VL-3B-Instruct-Q4_K_M) was auto-detected with no type, so `openclaw sync` skipped it (default case) and the model never surfaced as an OpenClaw provider. Add the model yaml: type vlm, metadata.aliases for the scan name, openclaw.chat_provider so it surfaces as a multimodal (text+image) chat provider, a universal llamacpp gguf variant, and verified perf (AMD Strix Halo / llama.cpp b9330: ~90 tok/s text decode; vision verified with --mmproj). Co-Authored-By: Claude Opus 4.8 (1M context) --- catalog/models/qwen2.5-vl-3b-instruct.yaml | 53 ++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 catalog/models/qwen2.5-vl-3b-instruct.yaml diff --git a/catalog/models/qwen2.5-vl-3b-instruct.yaml b/catalog/models/qwen2.5-vl-3b-instruct.yaml new file mode 100644 index 0000000..62a605c --- /dev/null +++ b/catalog/models/qwen2.5-vl-3b-instruct.yaml @@ -0,0 +1,53 @@ +kind: model_asset +metadata: + name: qwen2.5-vl-3b-instruct + type: vlm + family: qwen + parameter_count: "3B" + released_at: "2025-01" + # Scan-name alias: on-disk GGUF name so the local scanner matches this asset + # (otherwise deploy/openclaw-sync fall back to auto-detect, which has no type + # and is skipped by openclaw sync → the model never reaches OpenClaw). + aliases: + - Qwen2.5-VL-3B-Instruct-Q4_K_M +openclaw: + # Multimodal chat (text + image): surface as a chat provider in OpenClaw so it + # is usable from the chat UI, not only as a vision/image tool. + chat_provider: true +storage: + formats: [safetensors, gguf] + default_path_pattern: "{{.DataDir}}/models/{{.Name}}" + sources: + - type: huggingface + repo: Qwen/Qwen2.5-VL-3B-Instruct + format: safetensors + - type: huggingface + repo: ggml-org/Qwen2.5-VL-3B-Instruct-GGUF + format: gguf + - type: modelscope + repo: Qwen/Qwen2.5-VL-3B-Instruct + format: safetensors + - type: local_path + path: "" +variants: + # --- universal llamacpp GGUF (verified on AMD Strix Halo Radeon 8060S iGPU) --- + # Q4_K_M ~1.8GB. Vision (image input) requires the mmproj projector — start + # llama-server with --mmproj (pass `--config mmproj=`), + # otherwise the model serves text only. + - name: qwen2.5-vl-3b-instruct-universal-llamacpp-q4 + hardware: + gpu_arch: "*" + vram_min_mib: 0 + ram_min_mib: 4096 + engine: llamacpp + format: gguf + default_config: + quantization: int4 + n_gpu_layers: 999 + ctx_size: 8192 + expected_performance: + startup_time_s: 10 + cold_start_time_s: 20 + tokens_per_second: [60, 95] + latency_first_token_ms: [20, 150] + notes: "Verified 2026-06-10 on AMD Strix Halo Radeon 8060S iGPU, llama.cpp b9330 HIP, Q4_K_M all 999 layers offloaded: text decode ~90 tok/s. Vision verified working when started with --mmproj (mmproj-Qwen2.5-VL-3B-Instruct-Q8_0.gguf): correctly identified a solid-color test image."