Approaching-AI · rjckkkkk · Jun 8, 2026
diff --git a/catalog/scanner.yaml b/catalog/scanner.yaml
@@ -38,6 +38,15 @@ config:
     - vision_model
     - audio_encoder
     - projection
+    # OS / tooling junk — never holds deployable models, and keeps a
+    # drive-root scan from pulling in deleted or system files.
+    - "$recycle.bin"
+    - "system volume information"
+    - "$windows.~bt"
+    - "$windows.~ws"
+    - node_modules
+    - .git
+    - .cache_tmp
 
   # Directory patterns that indicate parent models/pipelines
   # Subdirectories of these paths will be skipped during recursion

diff --git a/internal/model/gguf.go b/internal/model/gguf.go
@@ -7,43 +7,115 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"regexp"
+	"sort"
+	"strconv"
 	"strings"
 )
 
-// detectGGUFModels detects all GGUF models in a directory.
-// GGUF models don't have config.json, so we detect one model per .gguf file.
-// Each GGUF file gets its own Path (file path, not directory) for uniqueness.
+// ggufShardRe matches llama.cpp split-GGUF filenames, e.g.
+// "Qwen3.5-122B-A10B-Q4_K_M-00001-of-00002.gguf" → (base, index, total).
+var ggufShardRe = regexp.MustCompile(`(?i)^(.+)-(\d+)-of-(\d+)\.gguf$`)
+
+// ggufGroup is one logical GGUF model: either a single file, or a set of split
+// shards that llama.cpp loads by opening the first shard.
+type ggufGroup struct {
+	name    string   // model name: filename without .gguf and shard suffix
+	primary string   // path to load (first shard, or the only file)
+	parts   []string // all files belonging to this model
+}
+
+// isMMProjFile reports whether a GGUF base filename is a multimodal projector
+// (mmproj). A projector is an attachment to a vision model, not a standalone
+// model, so it must not be surfaced as a deployable model. The scanner already
+// skips "mmproj" subdirectories (scanner.yaml); this covers the file-name form.
+func isMMProjFile(baseNoExt string) bool {
+	return strings.Contains(strings.ToLower(baseNoExt), "mmproj")
+}
+
+// groupGGUFModels collapses split shards into one logical model and drops mmproj
+// projectors. A non-split .gguf stays a single-file model. Output is
+// deterministic for sorted input: grouped shards (first-seen order) then
+// singles (input order).
+func groupGGUFModels(files []string) []ggufGroup {
+	type acc struct {
+		group  *ggufGroup
+		minIdx int
+	}
+	groups := map[string]*acc{}
+	var order []string
+	var singles []ggufGroup
+
+	for _, f := range files {
+		base := filepath.Base(f)
+		nameNoExt := base[:len(base)-len(filepath.Ext(base))]
+		if isMMProjFile(nameNoExt) {
+			continue // projector, not a standalone model
+		}
+		if m := ggufShardRe.FindStringSubmatch(base); m != nil {
+			groupName := m[1]
+			idx, _ := strconv.Atoi(m[2])
+			key := filepath.Dir(f) + string(filepath.Separator) + groupName
+			a, ok := groups[key]
+			if !ok {
+				a = &acc{group: &ggufGroup{name: groupName, primary: f}, minIdx: idx}
+				groups[key] = a
+				order = append(order, key)
+			}
+			a.group.parts = append(a.group.parts, f)
+			if idx < a.minIdx { // primary = lowest-numbered shard
+				a.minIdx = idx
+				a.group.primary = f
+			}
+		} else {
+			singles = append(singles, ggufGroup{name: nameNoExt, primary: f, parts: []string{f}})
+		}
+	}
+
+	out := make([]ggufGroup, 0, len(order)+len(singles))
+	for _, key := range order {
+		g := groups[key].group
+		sort.Strings(g.parts)
+		out = append(out, *g)
+	}
+	return append(out, singles...)
+}
+
+// detectGGUFModels detects GGUF models in a directory. Split shards
+// ("...-00001-of-00003.gguf") collapse into one model whose Path is the first
+// shard (llama.cpp auto-loads the rest) and whose size is the sum of all parts;
+// mmproj projector files are excluded.
 func detectGGUFModels(dir string, entries []os.DirEntry, p ModelPattern, minSize int64) []*ModelInfo {
 	weightFiles := findAllWeightFiles(dir, entries, p.weightExts)
 	if len(weightFiles) == 0 {
 		return nil
 	}
 
 	var models []*ModelInfo
-	for _, weightPath := range weightFiles {
-		// Check individual file size against minimum
-		info, err := os.Stat(weightPath)
-		if err != nil {
-			continue
+	for _, g := range groupGGUFModels(weightFiles) {
+		// Size = sum of all shards; compare the whole model against the minimum.
+		var totalSize int64
+		for _, part := range g.parts {
+			if info, err := os.Stat(part); err == nil {
+				totalSize += info.Size()
+			}
 		}
-		if info.Size() < minSize {
+		if totalSize < minSize {
 			continue
 		}
 
-		// Use the file path as the model path (unique per GGUF file)
-		// This allows multiple GGUF files in the same directory to be detected
 		model := &ModelInfo{
-			ID:         fmt.Sprintf("%x", sha256.Sum256([]byte(weightPath))),
-			Name:       strings.TrimSuffix(filepath.Base(weightPath), ".gguf"),
+			ID:         fmt.Sprintf("%x", sha256.Sum256([]byte(g.primary))),
+			Name:       g.name,
 			Type:       p.typeHint,
-			Path:       weightPath, // Use file path for uniqueness
+			Path:       g.primary, // first shard — llama.cpp loads the rest automatically
 			Format:     p.format,
-			SizeBytes:  info.Size(),
+			SizeBytes:  totalSize,
 			ModelClass: "unknown",
 		}
 
-		// Parse GGUF header metadata for arch, params, class
-		if meta := parseGGUFMeta(weightPath); meta != nil {
+		// Parse GGUF header metadata for arch, params, class (from the first shard)
+		if meta := parseGGUFMeta(g.primary); meta != nil {
 			modelType := jsonStr(meta, "model_type", "")
 			model.DetectedArch = detectArch(modelType)
 			if model.Type == "" {
@@ -64,9 +136,8 @@ func detectGGUFModels(dir string, entries []os.DirEntry, p ModelPattern, minSize
 			}
 		}
 
-		// Detect quantization from filename
-		weightName := filepath.Base(weightPath)
-		model.Quantization, model.QuantSrc = detectQuantization(nil, weightName, p.format)
+		// Detect quantization from the primary file name
+		model.Quantization, model.QuantSrc = detectQuantization(nil, filepath.Base(g.primary), p.format)
 
 		if model.Type == "" {
 			model.Type = "llm" // Default GGUF models to LLM

diff --git a/internal/model/gguf_test.go b/internal/model/gguf_test.go
@@ -0,0 +1,115 @@
+package model
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestIsMMProjFile(t *testing.T) {
+	tests := map[string]bool{
+		"mmproj-F16":              true,
+		"mmproj-Qwen3.5-27B-BF16": true,
+		"Qwen3.5-27B-mmproj":      true,
+		"Qwen3.5-27B-Q4_K_M":      false,
+		"GLM-4.7-Flash-Q4_K_M":    false,
+	}
+	for name, want := range tests {
+		if got := isMMProjFile(name); got != want {
+			t.Errorf("isMMProjFile(%q) = %v, want %v", name, got, want)
+		}
+	}
+}
+
+func TestGroupGGUFModels(t *testing.T) {
+	t.Run("split shards collapse to one model, primary = first shard", func(t *testing.T) {
+		// deliberately out of order; mmproj + a single file mixed in
+		files := []string{
+			`/d/Qwen3-Coder-Next-Q4_K_M-00002-of-00004.gguf`,
+			`/d/Qwen3-Coder-Next-Q4_K_M-00001-of-00004.gguf`,
+			`/d/Qwen3-Coder-Next-Q4_K_M-00004-of-00004.gguf`,
+			`/d/Qwen3-Coder-Next-Q4_K_M-00003-of-00004.gguf`,
+			`/d/mmproj-Qwen3.5-27B-BF16.gguf`,
+			`/d/GLM-4.7-Flash-Q4_K_M.gguf`,
+		}
+		groups := groupGGUFModels(files)
+		if len(groups) != 2 {
+			t.Fatalf("got %d groups, want 2: %+v", len(groups), groups)
+		}
+		shard := groups[0]
+		if shard.name != "Qwen3-Coder-Next-Q4_K_M" {
+			t.Errorf("group name = %q", shard.name)
+		}
+		if filepath.Base(shard.primary) != "Qwen3-Coder-Next-Q4_K_M-00001-of-00004.gguf" {
+			t.Errorf("primary = %q, want the -00001- shard", shard.primary)
+		}
+		if len(shard.parts) != 4 {
+			t.Errorf("parts = %d, want 4", len(shard.parts))
+		}
+		single := groups[1]
+		if single.name != "GLM-4.7-Flash-Q4_K_M" || len(single.parts) != 1 {
+			t.Errorf("single = %+v", single)
+		}
+	})
+
+	t.Run("same base name in different dirs are separate models", func(t *testing.T) {
+		files := []string{
+			`/a/M-00001-of-00002.gguf`, `/a/M-00002-of-00002.gguf`,
+			`/b/M-00001-of-00002.gguf`, `/b/M-00002-of-00002.gguf`,
+		}
+		if got := len(groupGGUFModels(files)); got != 2 {
+			t.Fatalf("got %d groups, want 2", got)
+		}
+	})
+
+	t.Run("mmproj-only input yields nothing", func(t *testing.T) {
+		if got := groupGGUFModels([]string{`/d/mmproj-F16.gguf`}); len(got) != 0 {
+			t.Fatalf("got %d groups, want 0", len(got))
+		}
+	})
+}
+
+func TestDetectGGUFModels_ShardsMMProjAndSize(t *testing.T) {
+	dir := t.TempDir()
+	write := func(name string, size int) {
+		if err := os.WriteFile(filepath.Join(dir, name), make([]byte, size), 0o644); err != nil {
+			t.Fatal(err)
+		}
+	}
+	write("Big-Q4_K_M-00001-of-00003.gguf", 100)
+	write("Big-Q4_K_M-00002-of-00003.gguf", 200)
+	write("Big-Q4_K_M-00003-of-00003.gguf", 300)
+	write("mmproj-Big-BF16.gguf", 50)
+	write("Small-Q8_0.gguf", 500)
+
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pattern := ModelPattern{weightExts: []string{".gguf"}, format: "gguf"}
+	models := detectGGUFModels(dir, entries, pattern, 0)
+
+	if len(models) != 2 {
+		t.Fatalf("got %d models, want 2 (split group + single, mmproj excluded): %+v", len(models), models)
+	}
+	byName := map[string]*ModelInfo{}
+	for _, m := range models {
+		byName[m.Name] = m
+	}
+	big, ok := byName["Big-Q4_K_M"]
+	if !ok {
+		t.Fatalf("missing collapsed split model; got %v", byName)
+	}
+	if big.SizeBytes != 600 {
+		t.Errorf("split model size = %d, want 600 (sum of shards)", big.SizeBytes)
+	}
+	if filepath.Base(big.Path) != "Big-Q4_K_M-00001-of-00003.gguf" {
+		t.Errorf("split model Path = %q, want first shard", big.Path)
+	}
+	if _, ok := byName["Small-Q8_0"]; !ok {
+		t.Errorf("missing single-file model; got %v", byName)
+	}
+	if _, ok := byName["mmproj-Big-BF16"]; ok {
+		t.Errorf("mmproj projector should not be a standalone model")
+	}
+}
diff --git a/internal/model/scan_paths_other.go b/internal/model/scan_paths_other.go
@@ -0,0 +1,8 @@
+//go:build !windows
+
+package model
+
+// platformExtraScanPaths has no extra locations on non-Windows hosts; Linux
+// server conventions (/mnt/data/models, /opt/*/models) are handled directly in
+// DefaultScanPaths.
+func platformExtraScanPaths() []string { return nil }
diff --git a/internal/model/scan_paths_test.go b/internal/model/scan_paths_test.go
@@ -0,0 +1,66 @@
+package model
+
+import (
+	"os"
+	"strings"
+	"testing"
+)
+
+func TestDedupePaths(t *testing.T) {
+	got := dedupePaths([]string{"a", "b", "", "a", "c", "b", ""})
+	want := []string{"a", "b", "c"}
+	if strings.Join(got, ",") != strings.Join(want, ",") {
+		t.Errorf("dedupePaths = %v, want %v", got, want)
+	}
+}
+
+func TestDefaultScanPaths_MultiPathEnv(t *testing.T) {
+	sep := string(os.PathListSeparator)
+	a := t.TempDir()
+	b := t.TempDir()
+	t.Setenv("AIMA_MODEL_DIR", a+sep+b)
+
+	paths := DefaultScanPaths()
+	set := map[string]bool{}
+	for _, p := range paths {
+		set[p] = true
+	}
+	if !set[a] || !set[b] {
+		t.Errorf("DefaultScanPaths missing one of the AIMA_MODEL_DIR entries\n got: %v\n want both: %q, %q", paths, a, b)
+	}
+
+	// no duplicates in the returned list
+	seen := map[string]bool{}
+	for _, p := range paths {
+		if seen[p] {
+			t.Errorf("duplicate path in DefaultScanPaths: %q", p)
+		}
+		seen[p] = true
+	}
+}
+
+func TestDefaultScanPaths_EnvOverrides(t *testing.T) {
+	hf := t.TempDir()
+	om := t.TempDir()
+	t.Setenv("AIMA_MODEL_DIR", "")
+	t.Setenv("HF_HOME", hf)
+	t.Setenv("OLLAMA_MODELS", om)
+
+	paths := DefaultScanPaths()
+	wantHF := hf + string(os.PathSeparator) + "hub"
+	var foundHF, foundOM bool
+	for _, p := range paths {
+		if p == wantHF {
+			foundHF = true
+		}
+		if p == om {
+			foundOM = true
+		}
+	}
+	if !foundHF {
+		t.Errorf("HF_HOME/hub not in scan paths: want %q in %v", wantHF, paths)
+	}
+	if !foundOM {
+		t.Errorf("OLLAMA_MODELS not in scan paths: want %q in %v", om, paths)
+	}
+}