From 142c6bffc4debc1f4aca1de54512f225b23e08b3 Mon Sep 17 00:00:00 2001
From: rjckkkkk <59609580+rjckkkkk@users.noreply.github.com>
Date: Mon, 8 Jun 2026 11:02:16 +0000
Subject: [PATCH] Discover models across all drives; group split GGUF shards
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Windows the model scanner only looked at %USERPROFILE% directories, so
models on other drives (D:\models, D:\lmstudio\models, ...) were never
found and users had to set AIMA_MODEL_DIR by hand. Split GGUF shards were
each registered as a separate model, and mmproj projector files showed up
as standalone models — both break one-click deploy from the Web UI (a user
clicking a single shard would fail to load the model).

Scan discovery:
- enumerate fixed/removable drives and probe conventional model dirs that
  exist (<drive>:\models, <drive>:\lmstudio\models, .ollama, HF hub). This
  stays targeted — it never walks whole drives, so recycle bins and system
  folders are not pulled in.
- AIMA_MODEL_DIR now accepts a list (os.PathListSeparator) for custom dirs.
- honor HF_HOME / OLLAMA_MODELS for relocated caches.
- harden scanner.yaml skip list ($recycle.bin, system volume information,
  node_modules, .git, ...) so even an explicit drive-root scan stays clean.

GGUF grouping:
- collapse "...-00001-of-000NN.gguf" shards into one logical model whose
  Path is the first shard (llama.cpp auto-loads the rest) and whose size is
  the sum of all parts — so deploy targets the whole model, not one piece.
- drop mmproj projector files. The scanner already skips "mmproj" *dirs*
  (scanner.yaml); this covers the file-name form, so projectors no longer
  appear as deployable models.

Pure logic (groupGGUFModels, dedupePaths, DefaultScanPaths multi-path) is
table-driven tested. Verified on an AMD Strix Halo Windows box: a clean
scan now yields 22 grouped models across D:\model, D:\models,
D:\models-gguf and D:\lmstudio (drive auto-probe found the latter two on
its own), with no shards-as-models, no mmproj, and no recycle/tooling junk.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 catalog/scanner.yaml                 |   9 +++
 internal/model/gguf.go               | 111 +++++++++++++++++++++-----
 internal/model/gguf_test.go          | 115 +++++++++++++++++++++++++++
 internal/model/scan_paths_other.go   |   8 ++
 internal/model/scan_paths_test.go    |  66 +++++++++++++++
 internal/model/scan_paths_windows.go |  72 +++++++++++++++++
 internal/model/scanner.go            |  36 ++++++++-
 7 files changed, 394 insertions(+), 23 deletions(-)
 create mode 100644 internal/model/gguf_test.go
 create mode 100644 internal/model/scan_paths_other.go
 create mode 100644 internal/model/scan_paths_test.go
 create mode 100644 internal/model/scan_paths_windows.go
diff --git a/catalog/scanner.yaml b/catalog/scanner.yaml
index 2492f776..cc0966c3 100644
--- a/catalog/scanner.yaml
+++ b/catalog/scanner.yaml
@@ -38,6 +38,15 @@ config:
     - vision_model
     - audio_encoder
     - projection
+    # OS / tooling junk — never holds deployable models, and keeps a
+    # drive-root scan from pulling in deleted or system files.
+    - "$recycle.bin"
+    - "system volume information"
+    - "$windows.~bt"
+    - "$windows.~ws"
+    - node_modules
+    - .git
+    - .cache_tmp
 
   # Directory patterns that indicate parent models/pipelines
   # Subdirectories of these paths will be skipped during recursion
diff --git a/internal/model/gguf.go b/internal/model/gguf.go
index ebc7e466..be4169bf 100644
--- a/internal/model/gguf.go
+++ b/internal/model/gguf.go
@@ -7,12 +7,84 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"regexp"
+	"sort"
+	"strconv"
 	"strings"
 )
 
-// detectGGUFModels detects all GGUF models in a directory.
-// GGUF models don't have config.json, so we detect one model per .gguf file.
-// Each GGUF file gets its own Path (file path, not directory) for uniqueness.
+// ggufShardRe matches llama.cpp split-GGUF filenames, e.g.
+// "Qwen3.5-122B-A10B-Q4_K_M-00001-of-00002.gguf" → (base, index, total).
+var ggufShardRe = regexp.MustCompile(`(?i)^(.+)-(\d+)-of-(\d+)\.gguf$`)
+
+// ggufGroup is one logical GGUF model: either a single file, or a set of split
+// shards that llama.cpp loads by opening the first shard.
+type ggufGroup struct {
+	name    string   // model name: filename without .gguf and shard suffix
+	primary string   // path to load (first shard, or the only file)
+	parts   []string // all files belonging to this model
+}
+
+// isMMProjFile reports whether a GGUF base filename is a multimodal projector
+// (mmproj). A projector is an attachment to a vision model, not a standalone
+// model, so it must not be surfaced as a deployable model. The scanner already
+// skips "mmproj" subdirectories (scanner.yaml); this covers the file-name form.
+func isMMProjFile(baseNoExt string) bool {
+	return strings.Contains(strings.ToLower(baseNoExt), "mmproj")
+}
+
+// groupGGUFModels collapses split shards into one logical model and drops mmproj
+// projectors. A non-split .gguf stays a single-file model. Output is
+// deterministic for sorted input: grouped shards (first-seen order) then
+// singles (input order).
+func groupGGUFModels(files []string) []ggufGroup {
+	type acc struct {
+		group  *ggufGroup
+		minIdx int
+	}
+	groups := map[string]*acc{}
+	var order []string
+	var singles []ggufGroup
+
+	for _, f := range files {
+		base := filepath.Base(f)
+		nameNoExt := base[:len(base)-len(filepath.Ext(base))]
+		if isMMProjFile(nameNoExt) {
+			continue // projector, not a standalone model
+		}
+		if m := ggufShardRe.FindStringSubmatch(base); m != nil {
+			groupName := m[1]
+			idx, _ := strconv.Atoi(m[2])
+			key := filepath.Dir(f) + string(filepath.Separator) + groupName
+			a, ok := groups[key]
+			if !ok {
+				a = &acc{group: &ggufGroup{name: groupName, primary: f}, minIdx: idx}
+				groups[key] = a
+				order = append(order, key)
+			}
+			a.group.parts = append(a.group.parts, f)
+			if idx < a.minIdx { // primary = lowest-numbered shard
+				a.minIdx = idx
+				a.group.primary = f
+			}
+		} else {
+			singles = append(singles, ggufGroup{name: nameNoExt, primary: f, parts: []string{f}})
+		}
+	}
+
+	out := make([]ggufGroup, 0, len(order)+len(singles))
+	for _, key := range order {
+		g := groups[key].group
+		sort.Strings(g.parts)
+		out = append(out, *g)
+	}
+	return append(out, singles...)
+}
+
+// detectGGUFModels detects GGUF models in a directory. Split shards
+// ("...-00001-of-00003.gguf") collapse into one model whose Path is the first
+// shard (llama.cpp auto-loads the rest) and whose size is the sum of all parts;
+// mmproj projector files are excluded.
 func detectGGUFModels(dir string, entries []os.DirEntry, p ModelPattern, minSize int64) []*ModelInfo {
 	weightFiles := findAllWeightFiles(dir, entries, p.weightExts)
 	if len(weightFiles) == 0 {
@@ -20,30 +92,30 @@ func detectGGUFModels(dir string, entries []os.DirEntry, p ModelPattern, minSize
 	}
 
 	var models []*ModelInfo
-	for _, weightPath := range weightFiles {
-		// Check individual file size against minimum
-		info, err := os.Stat(weightPath)
-		if err != nil {
-			continue
+	for _, g := range groupGGUFModels(weightFiles) {
+		// Size = sum of all shards; compare the whole model against the minimum.
+		var totalSize int64
+		for _, part := range g.parts {
+			if info, err := os.Stat(part); err == nil {
+				totalSize += info.Size()
+			}
 		}
-		if info.Size() < minSize {
+		if totalSize < minSize {
 			continue
 		}
 
-		// Use the file path as the model path (unique per GGUF file)
-		// This allows multiple GGUF files in the same directory to be detected
 		model := &ModelInfo{
-			ID:         fmt.Sprintf("%x", sha256.Sum256([]byte(weightPath))),
-			Name:       strings.TrimSuffix(filepath.Base(weightPath), ".gguf"),
+			ID:         fmt.Sprintf("%x", sha256.Sum256([]byte(g.primary))),
+			Name:       g.name,
 			Type:       p.typeHint,
-			Path:       weightPath, // Use file path for uniqueness
+			Path:       g.primary, // first shard — llama.cpp loads the rest automatically
 			Format:     p.format,
-			SizeBytes:  info.Size(),
+			SizeBytes:  totalSize,
 			ModelClass: "unknown",
 		}
 
-		// Parse GGUF header metadata for arch, params, class
-		if meta := parseGGUFMeta(weightPath); meta != nil {
+		// Parse GGUF header metadata for arch, params, class (from the first shard)
+		if meta := parseGGUFMeta(g.primary); meta != nil {
 			modelType := jsonStr(meta, "model_type", "")
 			model.DetectedArch = detectArch(modelType)
 			if model.Type == "" {
@@ -64,9 +136,8 @@ func detectGGUFModels(dir string, entries []os.DirEntry, p ModelPattern, minSize
 			}
 		}
 
-		// Detect quantization from filename
-		weightName := filepath.Base(weightPath)
-		model.Quantization, model.QuantSrc = detectQuantization(nil, weightName, p.format)
+		// Detect quantization from the primary file name
+		model.Quantization, model.QuantSrc = detectQuantization(nil, filepath.Base(g.primary), p.format)
 
 		if model.Type == "" {
 			model.Type = "llm" // Default GGUF models to LLM
diff --git a/internal/model/gguf_test.go b/internal/model/gguf_test.go
new file mode 100644
index 00000000..8bb3dc24
--- /dev/null
+++ b/internal/model/gguf_test.go
@@ -0,0 +1,115 @@
+package model
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestIsMMProjFile(t *testing.T) {
+	tests := map[string]bool{
+		"mmproj-F16":              true,
+		"mmproj-Qwen3.5-27B-BF16": true,
+		"Qwen3.5-27B-mmproj":      true,
+		"Qwen3.5-27B-Q4_K_M":      false,
+		"GLM-4.7-Flash-Q4_K_M":    false,
+	}
+	for name, want := range tests {
+		if got := isMMProjFile(name); got != want {
+			t.Errorf("isMMProjFile(%q) = %v, want %v", name, got, want)
+		}
+	}
+}
+
+func TestGroupGGUFModels(t *testing.T) {
+	t.Run("split shards collapse to one model, primary = first shard", func(t *testing.T) {
+		// deliberately out of order; mmproj + a single file mixed in
+		files := []string{
+			`/d/Qwen3-Coder-Next-Q4_K_M-00002-of-00004.gguf`,
+			`/d/Qwen3-Coder-Next-Q4_K_M-00001-of-00004.gguf`,
+			`/d/Qwen3-Coder-Next-Q4_K_M-00004-of-00004.gguf`,
+			`/d/Qwen3-Coder-Next-Q4_K_M-00003-of-00004.gguf`,
+			`/d/mmproj-Qwen3.5-27B-BF16.gguf`,
+			`/d/GLM-4.7-Flash-Q4_K_M.gguf`,
+		}
+		groups := groupGGUFModels(files)
+		if len(groups) != 2 {
+			t.Fatalf("got %d groups, want 2: %+v", len(groups), groups)
+		}
+		shard := groups[0]
+		if shard.name != "Qwen3-Coder-Next-Q4_K_M" {
+			t.Errorf("group name = %q", shard.name)
+		}
+		if filepath.Base(shard.primary) != "Qwen3-Coder-Next-Q4_K_M-00001-of-00004.gguf" {
+			t.Errorf("primary = %q, want the -00001- shard", shard.primary)
+		}
+		if len(shard.parts) != 4 {
+			t.Errorf("parts = %d, want 4", len(shard.parts))
+		}
+		single := groups[1]
+		if single.name != "GLM-4.7-Flash-Q4_K_M" || len(single.parts) != 1 {
+			t.Errorf("single = %+v", single)
+		}
+	})
+
+	t.Run("same base name in different dirs are separate models", func(t *testing.T) {
+		files := []string{
+			`/a/M-00001-of-00002.gguf`, `/a/M-00002-of-00002.gguf`,
+			`/b/M-00001-of-00002.gguf`, `/b/M-00002-of-00002.gguf`,
+		}
+		if got := len(groupGGUFModels(files)); got != 2 {
+			t.Fatalf("got %d groups, want 2", got)
+		}
+	})
+
+	t.Run("mmproj-only input yields nothing", func(t *testing.T) {
+		if got := groupGGUFModels([]string{`/d/mmproj-F16.gguf`}); len(got) != 0 {
+			t.Fatalf("got %d groups, want 0", len(got))
+		}
+	})
+}
+
+func TestDetectGGUFModels_ShardsMMProjAndSize(t *testing.T) {
+	dir := t.TempDir()
+	write := func(name string, size int) {
+		if err := os.WriteFile(filepath.Join(dir, name), make([]byte, size), 0o644); err != nil {
+			t.Fatal(err)
+		}
+	}
+	write("Big-Q4_K_M-00001-of-00003.gguf", 100)
+	write("Big-Q4_K_M-00002-of-00003.gguf", 200)
+	write("Big-Q4_K_M-00003-of-00003.gguf", 300)
+	write("mmproj-Big-BF16.gguf", 50)
+	write("Small-Q8_0.gguf", 500)
+
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pattern := ModelPattern{weightExts: []string{".gguf"}, format: "gguf"}
+	models := detectGGUFModels(dir, entries, pattern, 0)
+
+	if len(models) != 2 {
+		t.Fatalf("got %d models, want 2 (split group + single, mmproj excluded): %+v", len(models), models)
+	}
+	byName := map[string]*ModelInfo{}
+	for _, m := range models {
+		byName[m.Name] = m
+	}
+	big, ok := byName["Big-Q4_K_M"]
+	if !ok {
+		t.Fatalf("missing collapsed split model; got %v", byName)
+	}
+	if big.SizeBytes != 600 {
+		t.Errorf("split model size = %d, want 600 (sum of shards)", big.SizeBytes)
+	}
+	if filepath.Base(big.Path) != "Big-Q4_K_M-00001-of-00003.gguf" {
+		t.Errorf("split model Path = %q, want first shard", big.Path)
+	}
+	if _, ok := byName["Small-Q8_0"]; !ok {
+		t.Errorf("missing single-file model; got %v", byName)
+	}
+	if _, ok := byName["mmproj-Big-BF16"]; ok {
+		t.Errorf("mmproj projector should not be a standalone model")
+	}
+}
diff --git a/internal/model/scan_paths_other.go b/internal/model/scan_paths_other.go
new file mode 100644
index 00000000..5274eae3
--- /dev/null
+++ b/internal/model/scan_paths_other.go
@@ -0,0 +1,8 @@
+//go:build !windows
+
+package model
+
+// platformExtraScanPaths has no extra locations on non-Windows hosts; Linux
+// server conventions (/mnt/data/models, /opt/*/models) are handled directly in
+// DefaultScanPaths.
+func platformExtraScanPaths() []string { return nil }
diff --git a/internal/model/scan_paths_test.go b/internal/model/scan_paths_test.go
new file mode 100644
index 00000000..6ee252b0
--- /dev/null
+++ b/internal/model/scan_paths_test.go
@@ -0,0 +1,66 @@
+package model
+
+import (
+	"os"
+	"strings"
+	"testing"
+)
+
+func TestDedupePaths(t *testing.T) {
+	got := dedupePaths([]string{"a", "b", "", "a", "c", "b", ""})
+	want := []string{"a", "b", "c"}
+	if strings.Join(got, ",") != strings.Join(want, ",") {
+		t.Errorf("dedupePaths = %v, want %v", got, want)
+	}
+}
+
+func TestDefaultScanPaths_MultiPathEnv(t *testing.T) {
+	sep := string(os.PathListSeparator)
+	a := t.TempDir()
+	b := t.TempDir()
+	t.Setenv("AIMA_MODEL_DIR", a+sep+b)
+
+	paths := DefaultScanPaths()
+	set := map[string]bool{}
+	for _, p := range paths {
+		set[p] = true
+	}
+	if !set[a] || !set[b] {
+		t.Errorf("DefaultScanPaths missing one of the AIMA_MODEL_DIR entries\n got: %v\n want both: %q, %q", paths, a, b)
+	}
+
+	// no duplicates in the returned list
+	seen := map[string]bool{}
+	for _, p := range paths {
+		if seen[p] {
+			t.Errorf("duplicate path in DefaultScanPaths: %q", p)
+		}
+		seen[p] = true
+	}
+}
+
+func TestDefaultScanPaths_EnvOverrides(t *testing.T) {
+	hf := t.TempDir()
+	om := t.TempDir()
+	t.Setenv("AIMA_MODEL_DIR", "")
+	t.Setenv("HF_HOME", hf)
+	t.Setenv("OLLAMA_MODELS", om)
+
+	paths := DefaultScanPaths()
+	wantHF := hf + string(os.PathSeparator) + "hub"
+	var foundHF, foundOM bool
+	for _, p := range paths {
+		if p == wantHF {
+			foundHF = true
+		}
+		if p == om {
+			foundOM = true
+		}
+	}
+	if !foundHF {
+		t.Errorf("HF_HOME/hub not in scan paths: want %q in %v", wantHF, paths)
+	}
+	if !foundOM {
+		t.Errorf("OLLAMA_MODELS not in scan paths: want %q in %v", om, paths)
+	}
+}
diff --git a/internal/model/scan_paths_windows.go b/internal/model/scan_paths_windows.go
new file mode 100644
index 00000000..b9d16106
--- /dev/null
+++ b/internal/model/scan_paths_windows.go
@@ -0,0 +1,72 @@
+//go:build windows
+
+package model
+
+import (
+	"os"
+	"path/filepath"
+	"syscall"
+	"unsafe"
+)
+
+// platformExtraScanPaths enumerates fixed and removable drives and returns the
+// conventional model directories that actually exist on each. This lets models
+// stored off the system drive (e.g. D:\models, D:\lmstudio\models) be found
+// without manual configuration, while staying targeted — it never walks whole
+// drives, so recycle bins / system folders are not pulled in.
+func platformExtraScanPaths() []string {
+	var paths []string
+	for _, drive := range fixedDriveRoots() {
+		for _, sub := range conventionalDriveSubdirs() {
+			p := filepath.Join(drive, sub)
+			if info, err := os.Stat(p); err == nil && info.IsDir() {
+				paths = append(paths, p)
+			}
+		}
+	}
+	return paths
+}
+
+// conventionalDriveSubdirs are the well-known model-manager layouts probed on
+// each drive root.
+func conventionalDriveSubdirs() []string {
+	return []string{
+		"models",
+		filepath.Join("lmstudio", "models"),
+		filepath.Join(".lmstudio", "models"),
+		filepath.Join(".cache", "lm-studio", "models"),
+		filepath.Join(".ollama", "models"),
+		filepath.Join(".cache", "huggingface", "hub"),
+	}
+}
+
+// fixedDriveRoots returns roots ("D:\\", ...) of fixed and removable drives.
+// Network and CD-ROM drives are excluded to avoid stalls on disconnected mounts.
+func fixedDriveRoots() []string {
+	kernel32 := syscall.NewLazyDLL("kernel32.dll")
+	getLogicalDrives := kernel32.NewProc("GetLogicalDrives")
+	getDriveType := kernel32.NewProc("GetDriveTypeW")
+
+	mask, _, _ := getLogicalDrives.Call()
+	if mask == 0 {
+		return nil
+	}
+
+	const driveRemovable, driveFixed = 2, 3
+	var roots []string
+	for i := 0; i < 26; i++ {
+		if mask&(1<<uint(i)) == 0 {
+			continue
+		}
+		root := string(rune('A'+i)) + ":\\"
+		ptr, err := syscall.UTF16PtrFromString(root)
+		if err != nil {
+			continue
+		}
+		dt, _, _ := getDriveType.Call(uintptr(unsafe.Pointer(ptr)))
+		if dt == driveFixed || dt == driveRemovable {
+			roots = append(roots, root)
+		}
+	}
+	return roots
+}
diff --git a/internal/model/scanner.go b/internal/model/scanner.go
index 45b3d356..114ba2bc 100644
--- a/internal/model/scanner.go
+++ b/internal/model/scanner.go
@@ -39,8 +39,12 @@ type ScanOptions struct {
 func DefaultScanPaths() []string {
 	var paths []string
 
-	if dir := os.Getenv("AIMA_MODEL_DIR"); dir != "" {
-		paths = append(paths, dir)
+	// AIMA_MODEL_DIR accepts a list ("D:\a;D:\b" on Windows, "/a:/b" on Linux)
+	// so users with models in several non-standard directories don't need junctions.
+	for _, dir := range filepath.SplitList(os.Getenv("AIMA_MODEL_DIR")) {
+		if dir = strings.TrimSpace(dir); dir != "" {
+			paths = append(paths, dir)
+		}
 	}
 
 	home, err := os.UserHomeDir()
@@ -52,6 +56,14 @@ func DefaultScanPaths() []string {
 		)
 	}
 
+	// Honor relocated model-manager caches via their standard env vars.
+	if hf := strings.TrimSpace(os.Getenv("HF_HOME")); hf != "" {
+		paths = append(paths, filepath.Join(hf, "hub"))
+	}
+	if om := strings.TrimSpace(os.Getenv("OLLAMA_MODELS")); om != "" {
+		paths = append(paths, om)
+	}
+
 	if runtime.GOOS == "linux" {
 		paths = append(paths,
 			"/mnt/data/models",
@@ -61,7 +73,25 @@ func DefaultScanPaths() []string {
 		paths = append(paths, discoverOptModelPaths()...)
 	}
 
-	return paths
+	// Platform extras — on Windows, conventional model dirs on every fixed drive
+	// (D:\models, D:\lmstudio\models, ...) so models off the system drive are found.
+	paths = append(paths, platformExtraScanPaths()...)
+
+	return dedupePaths(paths)
+}
+
+// dedupePaths removes empty and duplicate entries, preserving order.
+func dedupePaths(in []string) []string {
+	seen := make(map[string]bool, len(in))
+	out := in[:0]
+	for _, p := range in {
+		if p == "" || seen[p] {
+			continue
+		}
+		seen[p] = true
+		out = append(out, p)
+	}
+	return out
 }
 
 // discoverOptModelPaths finds vendor-preloaded model directories under /opt.