From 142c6bffc4debc1f4aca1de54512f225b23e08b3 Mon Sep 17 00:00:00 2001 From: rjckkkkk <59609580+rjckkkkk@users.noreply.github.com> Date: Mon, 8 Jun 2026 11:02:16 +0000 Subject: [PATCH] Discover models across all drives; group split GGUF shards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Windows the model scanner only looked at %USERPROFILE% directories, so models on other drives (D:\models, D:\lmstudio\models, ...) were never found and users had to set AIMA_MODEL_DIR by hand. Split GGUF shards were each registered as a separate model, and mmproj projector files showed up as standalone models — both break one-click deploy from the Web UI (a user clicking a single shard would fail to load the model). Scan discovery: - enumerate fixed/removable drives and probe conventional model dirs that exist (:\models, :\lmstudio\models, .ollama, HF hub). This stays targeted — it never walks whole drives, so recycle bins and system folders are not pulled in. - AIMA_MODEL_DIR now accepts a list (os.PathListSeparator) for custom dirs. - honor HF_HOME / OLLAMA_MODELS for relocated caches. - harden scanner.yaml skip list ($recycle.bin, system volume information, node_modules, .git, ...) so even an explicit drive-root scan stays clean. GGUF grouping: - collapse "...-00001-of-000NN.gguf" shards into one logical model whose Path is the first shard (llama.cpp auto-loads the rest) and whose size is the sum of all parts — so deploy targets the whole model, not one piece. - drop mmproj projector files. The scanner already skips "mmproj" *dirs* (scanner.yaml); this covers the file-name form, so projectors no longer appear as deployable models. Pure logic (groupGGUFModels, dedupePaths, DefaultScanPaths multi-path) is table-driven tested. Verified on an AMD Strix Halo Windows box: a clean scan now yields 22 grouped models across D:\model, D:\models, D:\models-gguf and D:\lmstudio (drive auto-probe found the latter two on its own), with no shards-as-models, no mmproj, and no recycle/tooling junk. Co-Authored-By: Claude Opus 4.8 (1M context) --- catalog/scanner.yaml | 9 +++ internal/model/gguf.go | 111 +++++++++++++++++++++----- internal/model/gguf_test.go | 115 +++++++++++++++++++++++++++ internal/model/scan_paths_other.go | 8 ++ internal/model/scan_paths_test.go | 66 +++++++++++++++ internal/model/scan_paths_windows.go | 72 +++++++++++++++++ internal/model/scanner.go | 36 ++++++++- 7 files changed, 394 insertions(+), 23 deletions(-) create mode 100644 internal/model/gguf_test.go create mode 100644 internal/model/scan_paths_other.go create mode 100644 internal/model/scan_paths_test.go create mode 100644 internal/model/scan_paths_windows.go diff --git a/catalog/scanner.yaml b/catalog/scanner.yaml index 2492f776..cc0966c3 100644 --- a/catalog/scanner.yaml +++ b/catalog/scanner.yaml @@ -38,6 +38,15 @@ config: - vision_model - audio_encoder - projection + # OS / tooling junk — never holds deployable models, and keeps a + # drive-root scan from pulling in deleted or system files. + - "$recycle.bin" + - "system volume information" + - "$windows.~bt" + - "$windows.~ws" + - node_modules + - .git + - .cache_tmp # Directory patterns that indicate parent models/pipelines # Subdirectories of these paths will be skipped during recursion diff --git a/internal/model/gguf.go b/internal/model/gguf.go index ebc7e466..be4169bf 100644 --- a/internal/model/gguf.go +++ b/internal/model/gguf.go @@ -7,12 +7,84 @@ import ( "io" "os" "path/filepath" + "regexp" + "sort" + "strconv" "strings" ) -// detectGGUFModels detects all GGUF models in a directory. -// GGUF models don't have config.json, so we detect one model per .gguf file. -// Each GGUF file gets its own Path (file path, not directory) for uniqueness. +// ggufShardRe matches llama.cpp split-GGUF filenames, e.g. +// "Qwen3.5-122B-A10B-Q4_K_M-00001-of-00002.gguf" → (base, index, total). +var ggufShardRe = regexp.MustCompile(`(?i)^(.+)-(\d+)-of-(\d+)\.gguf$`) + +// ggufGroup is one logical GGUF model: either a single file, or a set of split +// shards that llama.cpp loads by opening the first shard. +type ggufGroup struct { + name string // model name: filename without .gguf and shard suffix + primary string // path to load (first shard, or the only file) + parts []string // all files belonging to this model +} + +// isMMProjFile reports whether a GGUF base filename is a multimodal projector +// (mmproj). A projector is an attachment to a vision model, not a standalone +// model, so it must not be surfaced as a deployable model. The scanner already +// skips "mmproj" subdirectories (scanner.yaml); this covers the file-name form. +func isMMProjFile(baseNoExt string) bool { + return strings.Contains(strings.ToLower(baseNoExt), "mmproj") +} + +// groupGGUFModels collapses split shards into one logical model and drops mmproj +// projectors. A non-split .gguf stays a single-file model. Output is +// deterministic for sorted input: grouped shards (first-seen order) then +// singles (input order). +func groupGGUFModels(files []string) []ggufGroup { + type acc struct { + group *ggufGroup + minIdx int + } + groups := map[string]*acc{} + var order []string + var singles []ggufGroup + + for _, f := range files { + base := filepath.Base(f) + nameNoExt := base[:len(base)-len(filepath.Ext(base))] + if isMMProjFile(nameNoExt) { + continue // projector, not a standalone model + } + if m := ggufShardRe.FindStringSubmatch(base); m != nil { + groupName := m[1] + idx, _ := strconv.Atoi(m[2]) + key := filepath.Dir(f) + string(filepath.Separator) + groupName + a, ok := groups[key] + if !ok { + a = &acc{group: &ggufGroup{name: groupName, primary: f}, minIdx: idx} + groups[key] = a + order = append(order, key) + } + a.group.parts = append(a.group.parts, f) + if idx < a.minIdx { // primary = lowest-numbered shard + a.minIdx = idx + a.group.primary = f + } + } else { + singles = append(singles, ggufGroup{name: nameNoExt, primary: f, parts: []string{f}}) + } + } + + out := make([]ggufGroup, 0, len(order)+len(singles)) + for _, key := range order { + g := groups[key].group + sort.Strings(g.parts) + out = append(out, *g) + } + return append(out, singles...) +} + +// detectGGUFModels detects GGUF models in a directory. Split shards +// ("...-00001-of-00003.gguf") collapse into one model whose Path is the first +// shard (llama.cpp auto-loads the rest) and whose size is the sum of all parts; +// mmproj projector files are excluded. func detectGGUFModels(dir string, entries []os.DirEntry, p ModelPattern, minSize int64) []*ModelInfo { weightFiles := findAllWeightFiles(dir, entries, p.weightExts) if len(weightFiles) == 0 { @@ -20,30 +92,30 @@ func detectGGUFModels(dir string, entries []os.DirEntry, p ModelPattern, minSize } var models []*ModelInfo - for _, weightPath := range weightFiles { - // Check individual file size against minimum - info, err := os.Stat(weightPath) - if err != nil { - continue + for _, g := range groupGGUFModels(weightFiles) { + // Size = sum of all shards; compare the whole model against the minimum. + var totalSize int64 + for _, part := range g.parts { + if info, err := os.Stat(part); err == nil { + totalSize += info.Size() + } } - if info.Size() < minSize { + if totalSize < minSize { continue } - // Use the file path as the model path (unique per GGUF file) - // This allows multiple GGUF files in the same directory to be detected model := &ModelInfo{ - ID: fmt.Sprintf("%x", sha256.Sum256([]byte(weightPath))), - Name: strings.TrimSuffix(filepath.Base(weightPath), ".gguf"), + ID: fmt.Sprintf("%x", sha256.Sum256([]byte(g.primary))), + Name: g.name, Type: p.typeHint, - Path: weightPath, // Use file path for uniqueness + Path: g.primary, // first shard — llama.cpp loads the rest automatically Format: p.format, - SizeBytes: info.Size(), + SizeBytes: totalSize, ModelClass: "unknown", } - // Parse GGUF header metadata for arch, params, class - if meta := parseGGUFMeta(weightPath); meta != nil { + // Parse GGUF header metadata for arch, params, class (from the first shard) + if meta := parseGGUFMeta(g.primary); meta != nil { modelType := jsonStr(meta, "model_type", "") model.DetectedArch = detectArch(modelType) if model.Type == "" { @@ -64,9 +136,8 @@ func detectGGUFModels(dir string, entries []os.DirEntry, p ModelPattern, minSize } } - // Detect quantization from filename - weightName := filepath.Base(weightPath) - model.Quantization, model.QuantSrc = detectQuantization(nil, weightName, p.format) + // Detect quantization from the primary file name + model.Quantization, model.QuantSrc = detectQuantization(nil, filepath.Base(g.primary), p.format) if model.Type == "" { model.Type = "llm" // Default GGUF models to LLM diff --git a/internal/model/gguf_test.go b/internal/model/gguf_test.go new file mode 100644 index 00000000..8bb3dc24 --- /dev/null +++ b/internal/model/gguf_test.go @@ -0,0 +1,115 @@ +package model + +import ( + "os" + "path/filepath" + "testing" +) + +func TestIsMMProjFile(t *testing.T) { + tests := map[string]bool{ + "mmproj-F16": true, + "mmproj-Qwen3.5-27B-BF16": true, + "Qwen3.5-27B-mmproj": true, + "Qwen3.5-27B-Q4_K_M": false, + "GLM-4.7-Flash-Q4_K_M": false, + } + for name, want := range tests { + if got := isMMProjFile(name); got != want { + t.Errorf("isMMProjFile(%q) = %v, want %v", name, got, want) + } + } +} + +func TestGroupGGUFModels(t *testing.T) { + t.Run("split shards collapse to one model, primary = first shard", func(t *testing.T) { + // deliberately out of order; mmproj + a single file mixed in + files := []string{ + `/d/Qwen3-Coder-Next-Q4_K_M-00002-of-00004.gguf`, + `/d/Qwen3-Coder-Next-Q4_K_M-00001-of-00004.gguf`, + `/d/Qwen3-Coder-Next-Q4_K_M-00004-of-00004.gguf`, + `/d/Qwen3-Coder-Next-Q4_K_M-00003-of-00004.gguf`, + `/d/mmproj-Qwen3.5-27B-BF16.gguf`, + `/d/GLM-4.7-Flash-Q4_K_M.gguf`, + } + groups := groupGGUFModels(files) + if len(groups) != 2 { + t.Fatalf("got %d groups, want 2: %+v", len(groups), groups) + } + shard := groups[0] + if shard.name != "Qwen3-Coder-Next-Q4_K_M" { + t.Errorf("group name = %q", shard.name) + } + if filepath.Base(shard.primary) != "Qwen3-Coder-Next-Q4_K_M-00001-of-00004.gguf" { + t.Errorf("primary = %q, want the -00001- shard", shard.primary) + } + if len(shard.parts) != 4 { + t.Errorf("parts = %d, want 4", len(shard.parts)) + } + single := groups[1] + if single.name != "GLM-4.7-Flash-Q4_K_M" || len(single.parts) != 1 { + t.Errorf("single = %+v", single) + } + }) + + t.Run("same base name in different dirs are separate models", func(t *testing.T) { + files := []string{ + `/a/M-00001-of-00002.gguf`, `/a/M-00002-of-00002.gguf`, + `/b/M-00001-of-00002.gguf`, `/b/M-00002-of-00002.gguf`, + } + if got := len(groupGGUFModels(files)); got != 2 { + t.Fatalf("got %d groups, want 2", got) + } + }) + + t.Run("mmproj-only input yields nothing", func(t *testing.T) { + if got := groupGGUFModels([]string{`/d/mmproj-F16.gguf`}); len(got) != 0 { + t.Fatalf("got %d groups, want 0", len(got)) + } + }) +} + +func TestDetectGGUFModels_ShardsMMProjAndSize(t *testing.T) { + dir := t.TempDir() + write := func(name string, size int) { + if err := os.WriteFile(filepath.Join(dir, name), make([]byte, size), 0o644); err != nil { + t.Fatal(err) + } + } + write("Big-Q4_K_M-00001-of-00003.gguf", 100) + write("Big-Q4_K_M-00002-of-00003.gguf", 200) + write("Big-Q4_K_M-00003-of-00003.gguf", 300) + write("mmproj-Big-BF16.gguf", 50) + write("Small-Q8_0.gguf", 500) + + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatal(err) + } + pattern := ModelPattern{weightExts: []string{".gguf"}, format: "gguf"} + models := detectGGUFModels(dir, entries, pattern, 0) + + if len(models) != 2 { + t.Fatalf("got %d models, want 2 (split group + single, mmproj excluded): %+v", len(models), models) + } + byName := map[string]*ModelInfo{} + for _, m := range models { + byName[m.Name] = m + } + big, ok := byName["Big-Q4_K_M"] + if !ok { + t.Fatalf("missing collapsed split model; got %v", byName) + } + if big.SizeBytes != 600 { + t.Errorf("split model size = %d, want 600 (sum of shards)", big.SizeBytes) + } + if filepath.Base(big.Path) != "Big-Q4_K_M-00001-of-00003.gguf" { + t.Errorf("split model Path = %q, want first shard", big.Path) + } + if _, ok := byName["Small-Q8_0"]; !ok { + t.Errorf("missing single-file model; got %v", byName) + } + if _, ok := byName["mmproj-Big-BF16"]; ok { + t.Errorf("mmproj projector should not be a standalone model") + } +} diff --git a/internal/model/scan_paths_other.go b/internal/model/scan_paths_other.go new file mode 100644 index 00000000..5274eae3 --- /dev/null +++ b/internal/model/scan_paths_other.go @@ -0,0 +1,8 @@ +//go:build !windows + +package model + +// platformExtraScanPaths has no extra locations on non-Windows hosts; Linux +// server conventions (/mnt/data/models, /opt/*/models) are handled directly in +// DefaultScanPaths. +func platformExtraScanPaths() []string { return nil } diff --git a/internal/model/scan_paths_test.go b/internal/model/scan_paths_test.go new file mode 100644 index 00000000..6ee252b0 --- /dev/null +++ b/internal/model/scan_paths_test.go @@ -0,0 +1,66 @@ +package model + +import ( + "os" + "strings" + "testing" +) + +func TestDedupePaths(t *testing.T) { + got := dedupePaths([]string{"a", "b", "", "a", "c", "b", ""}) + want := []string{"a", "b", "c"} + if strings.Join(got, ",") != strings.Join(want, ",") { + t.Errorf("dedupePaths = %v, want %v", got, want) + } +} + +func TestDefaultScanPaths_MultiPathEnv(t *testing.T) { + sep := string(os.PathListSeparator) + a := t.TempDir() + b := t.TempDir() + t.Setenv("AIMA_MODEL_DIR", a+sep+b) + + paths := DefaultScanPaths() + set := map[string]bool{} + for _, p := range paths { + set[p] = true + } + if !set[a] || !set[b] { + t.Errorf("DefaultScanPaths missing one of the AIMA_MODEL_DIR entries\n got: %v\n want both: %q, %q", paths, a, b) + } + + // no duplicates in the returned list + seen := map[string]bool{} + for _, p := range paths { + if seen[p] { + t.Errorf("duplicate path in DefaultScanPaths: %q", p) + } + seen[p] = true + } +} + +func TestDefaultScanPaths_EnvOverrides(t *testing.T) { + hf := t.TempDir() + om := t.TempDir() + t.Setenv("AIMA_MODEL_DIR", "") + t.Setenv("HF_HOME", hf) + t.Setenv("OLLAMA_MODELS", om) + + paths := DefaultScanPaths() + wantHF := hf + string(os.PathSeparator) + "hub" + var foundHF, foundOM bool + for _, p := range paths { + if p == wantHF { + foundHF = true + } + if p == om { + foundOM = true + } + } + if !foundHF { + t.Errorf("HF_HOME/hub not in scan paths: want %q in %v", wantHF, paths) + } + if !foundOM { + t.Errorf("OLLAMA_MODELS not in scan paths: want %q in %v", om, paths) + } +} diff --git a/internal/model/scan_paths_windows.go b/internal/model/scan_paths_windows.go new file mode 100644 index 00000000..b9d16106 --- /dev/null +++ b/internal/model/scan_paths_windows.go @@ -0,0 +1,72 @@ +//go:build windows + +package model + +import ( + "os" + "path/filepath" + "syscall" + "unsafe" +) + +// platformExtraScanPaths enumerates fixed and removable drives and returns the +// conventional model directories that actually exist on each. This lets models +// stored off the system drive (e.g. D:\models, D:\lmstudio\models) be found +// without manual configuration, while staying targeted — it never walks whole +// drives, so recycle bins / system folders are not pulled in. +func platformExtraScanPaths() []string { + var paths []string + for _, drive := range fixedDriveRoots() { + for _, sub := range conventionalDriveSubdirs() { + p := filepath.Join(drive, sub) + if info, err := os.Stat(p); err == nil && info.IsDir() { + paths = append(paths, p) + } + } + } + return paths +} + +// conventionalDriveSubdirs are the well-known model-manager layouts probed on +// each drive root. +func conventionalDriveSubdirs() []string { + return []string{ + "models", + filepath.Join("lmstudio", "models"), + filepath.Join(".lmstudio", "models"), + filepath.Join(".cache", "lm-studio", "models"), + filepath.Join(".ollama", "models"), + filepath.Join(".cache", "huggingface", "hub"), + } +} + +// fixedDriveRoots returns roots ("D:\\", ...) of fixed and removable drives. +// Network and CD-ROM drives are excluded to avoid stalls on disconnected mounts. +func fixedDriveRoots() []string { + kernel32 := syscall.NewLazyDLL("kernel32.dll") + getLogicalDrives := kernel32.NewProc("GetLogicalDrives") + getDriveType := kernel32.NewProc("GetDriveTypeW") + + mask, _, _ := getLogicalDrives.Call() + if mask == 0 { + return nil + } + + const driveRemovable, driveFixed = 2, 3 + var roots []string + for i := 0; i < 26; i++ { + if mask&(1<