Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions catalog/scanner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ config:
- vision_model
- audio_encoder
- projection
# OS / tooling junk — never holds deployable models, and keeps a
# drive-root scan from pulling in deleted or system files.
- "$recycle.bin"
- "system volume information"
- "$windows.~bt"
- "$windows.~ws"
- node_modules
- .git
- .cache_tmp

# Directory patterns that indicate parent models/pipelines
# Subdirectories of these paths will be skipped during recursion
Expand Down
111 changes: 91 additions & 20 deletions internal/model/gguf.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,43 +7,115 @@ import (
"io"
"os"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
)

// detectGGUFModels detects all GGUF models in a directory.
// GGUF models don't have config.json, so we detect one model per .gguf file.
// Each GGUF file gets its own Path (file path, not directory) for uniqueness.
// ggufShardRe matches llama.cpp split-GGUF filenames, e.g.
// "Qwen3.5-122B-A10B-Q4_K_M-00001-of-00002.gguf" → (base, index, total).
var ggufShardRe = regexp.MustCompile(`(?i)^(.+)-(\d+)-of-(\d+)\.gguf$`)

// ggufGroup is one logical GGUF model: either a single file, or a set of split
// shards that llama.cpp loads by opening the first shard.
type ggufGroup struct {
name string // model name: filename without .gguf and shard suffix
primary string // path to load (first shard, or the only file)
parts []string // all files belonging to this model
}

// isMMProjFile reports whether a GGUF base filename is a multimodal projector
// (mmproj). A projector is an attachment to a vision model, not a standalone
// model, so it must not be surfaced as a deployable model. The scanner already
// skips "mmproj" subdirectories (scanner.yaml); this covers the file-name form.
func isMMProjFile(baseNoExt string) bool {
return strings.Contains(strings.ToLower(baseNoExt), "mmproj")
}

// groupGGUFModels collapses split shards into one logical model and drops mmproj
// projectors. A non-split .gguf stays a single-file model. Output is
// deterministic for sorted input: grouped shards (first-seen order) then
// singles (input order).
func groupGGUFModels(files []string) []ggufGroup {
type acc struct {
group *ggufGroup
minIdx int
}
groups := map[string]*acc{}
var order []string
var singles []ggufGroup

for _, f := range files {
base := filepath.Base(f)
nameNoExt := base[:len(base)-len(filepath.Ext(base))]
if isMMProjFile(nameNoExt) {
continue // projector, not a standalone model
}
if m := ggufShardRe.FindStringSubmatch(base); m != nil {
groupName := m[1]
idx, _ := strconv.Atoi(m[2])
key := filepath.Dir(f) + string(filepath.Separator) + groupName
a, ok := groups[key]
if !ok {
a = &acc{group: &ggufGroup{name: groupName, primary: f}, minIdx: idx}
groups[key] = a
order = append(order, key)
}
a.group.parts = append(a.group.parts, f)
if idx < a.minIdx { // primary = lowest-numbered shard
a.minIdx = idx
a.group.primary = f
}
} else {
singles = append(singles, ggufGroup{name: nameNoExt, primary: f, parts: []string{f}})
}
}

out := make([]ggufGroup, 0, len(order)+len(singles))
for _, key := range order {
g := groups[key].group
sort.Strings(g.parts)
out = append(out, *g)
}
return append(out, singles...)
}

// detectGGUFModels detects GGUF models in a directory. Split shards
// ("...-00001-of-00003.gguf") collapse into one model whose Path is the first
// shard (llama.cpp auto-loads the rest) and whose size is the sum of all parts;
// mmproj projector files are excluded.
func detectGGUFModels(dir string, entries []os.DirEntry, p ModelPattern, minSize int64) []*ModelInfo {
weightFiles := findAllWeightFiles(dir, entries, p.weightExts)
if len(weightFiles) == 0 {
return nil
}

var models []*ModelInfo
for _, weightPath := range weightFiles {
// Check individual file size against minimum
info, err := os.Stat(weightPath)
if err != nil {
continue
for _, g := range groupGGUFModels(weightFiles) {
// Size = sum of all shards; compare the whole model against the minimum.
var totalSize int64
for _, part := range g.parts {
if info, err := os.Stat(part); err == nil {
totalSize += info.Size()
}
}
if info.Size() < minSize {
if totalSize < minSize {
continue
}

// Use the file path as the model path (unique per GGUF file)
// This allows multiple GGUF files in the same directory to be detected
model := &ModelInfo{
ID: fmt.Sprintf("%x", sha256.Sum256([]byte(weightPath))),
Name: strings.TrimSuffix(filepath.Base(weightPath), ".gguf"),
ID: fmt.Sprintf("%x", sha256.Sum256([]byte(g.primary))),
Name: g.name,
Type: p.typeHint,
Path: weightPath, // Use file path for uniqueness
Path: g.primary, // first shard — llama.cpp loads the rest automatically
Format: p.format,
SizeBytes: info.Size(),
SizeBytes: totalSize,
ModelClass: "unknown",
}

// Parse GGUF header metadata for arch, params, class
if meta := parseGGUFMeta(weightPath); meta != nil {
// Parse GGUF header metadata for arch, params, class (from the first shard)
if meta := parseGGUFMeta(g.primary); meta != nil {
modelType := jsonStr(meta, "model_type", "")
model.DetectedArch = detectArch(modelType)
if model.Type == "" {
Expand All @@ -64,9 +136,8 @@ func detectGGUFModels(dir string, entries []os.DirEntry, p ModelPattern, minSize
}
}

// Detect quantization from filename
weightName := filepath.Base(weightPath)
model.Quantization, model.QuantSrc = detectQuantization(nil, weightName, p.format)
// Detect quantization from the primary file name
model.Quantization, model.QuantSrc = detectQuantization(nil, filepath.Base(g.primary), p.format)

if model.Type == "" {
model.Type = "llm" // Default GGUF models to LLM
Expand Down
115 changes: 115 additions & 0 deletions internal/model/gguf_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package model

import (
"os"
"path/filepath"
"testing"
)

func TestIsMMProjFile(t *testing.T) {
tests := map[string]bool{
"mmproj-F16": true,
"mmproj-Qwen3.5-27B-BF16": true,
"Qwen3.5-27B-mmproj": true,
"Qwen3.5-27B-Q4_K_M": false,
"GLM-4.7-Flash-Q4_K_M": false,
}
for name, want := range tests {
if got := isMMProjFile(name); got != want {
t.Errorf("isMMProjFile(%q) = %v, want %v", name, got, want)
}
}
}

func TestGroupGGUFModels(t *testing.T) {
t.Run("split shards collapse to one model, primary = first shard", func(t *testing.T) {
// deliberately out of order; mmproj + a single file mixed in
files := []string{
`/d/Qwen3-Coder-Next-Q4_K_M-00002-of-00004.gguf`,
`/d/Qwen3-Coder-Next-Q4_K_M-00001-of-00004.gguf`,
`/d/Qwen3-Coder-Next-Q4_K_M-00004-of-00004.gguf`,
`/d/Qwen3-Coder-Next-Q4_K_M-00003-of-00004.gguf`,
`/d/mmproj-Qwen3.5-27B-BF16.gguf`,
`/d/GLM-4.7-Flash-Q4_K_M.gguf`,
}
groups := groupGGUFModels(files)
if len(groups) != 2 {
t.Fatalf("got %d groups, want 2: %+v", len(groups), groups)
}
shard := groups[0]
if shard.name != "Qwen3-Coder-Next-Q4_K_M" {
t.Errorf("group name = %q", shard.name)
}
if filepath.Base(shard.primary) != "Qwen3-Coder-Next-Q4_K_M-00001-of-00004.gguf" {
t.Errorf("primary = %q, want the -00001- shard", shard.primary)
}
if len(shard.parts) != 4 {
t.Errorf("parts = %d, want 4", len(shard.parts))
}
single := groups[1]
if single.name != "GLM-4.7-Flash-Q4_K_M" || len(single.parts) != 1 {
t.Errorf("single = %+v", single)
}
})

t.Run("same base name in different dirs are separate models", func(t *testing.T) {
files := []string{
`/a/M-00001-of-00002.gguf`, `/a/M-00002-of-00002.gguf`,
`/b/M-00001-of-00002.gguf`, `/b/M-00002-of-00002.gguf`,
}
if got := len(groupGGUFModels(files)); got != 2 {
t.Fatalf("got %d groups, want 2", got)
}
})

t.Run("mmproj-only input yields nothing", func(t *testing.T) {
if got := groupGGUFModels([]string{`/d/mmproj-F16.gguf`}); len(got) != 0 {
t.Fatalf("got %d groups, want 0", len(got))
}
})
}

func TestDetectGGUFModels_ShardsMMProjAndSize(t *testing.T) {
dir := t.TempDir()
write := func(name string, size int) {
if err := os.WriteFile(filepath.Join(dir, name), make([]byte, size), 0o644); err != nil {
t.Fatal(err)
}
}
write("Big-Q4_K_M-00001-of-00003.gguf", 100)
write("Big-Q4_K_M-00002-of-00003.gguf", 200)
write("Big-Q4_K_M-00003-of-00003.gguf", 300)
write("mmproj-Big-BF16.gguf", 50)
write("Small-Q8_0.gguf", 500)

entries, err := os.ReadDir(dir)
if err != nil {
t.Fatal(err)
}
pattern := ModelPattern{weightExts: []string{".gguf"}, format: "gguf"}
models := detectGGUFModels(dir, entries, pattern, 0)

if len(models) != 2 {
t.Fatalf("got %d models, want 2 (split group + single, mmproj excluded): %+v", len(models), models)
}
byName := map[string]*ModelInfo{}
for _, m := range models {
byName[m.Name] = m
}
big, ok := byName["Big-Q4_K_M"]
if !ok {
t.Fatalf("missing collapsed split model; got %v", byName)
}
if big.SizeBytes != 600 {
t.Errorf("split model size = %d, want 600 (sum of shards)", big.SizeBytes)
}
if filepath.Base(big.Path) != "Big-Q4_K_M-00001-of-00003.gguf" {
t.Errorf("split model Path = %q, want first shard", big.Path)
}
if _, ok := byName["Small-Q8_0"]; !ok {
t.Errorf("missing single-file model; got %v", byName)
}
if _, ok := byName["mmproj-Big-BF16"]; ok {
t.Errorf("mmproj projector should not be a standalone model")
}
}
8 changes: 8 additions & 0 deletions internal/model/scan_paths_other.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
//go:build !windows

package model

// platformExtraScanPaths has no extra locations on non-Windows hosts; Linux
// server conventions (/mnt/data/models, /opt/*/models) are handled directly in
// DefaultScanPaths.
func platformExtraScanPaths() []string { return nil }
66 changes: 66 additions & 0 deletions internal/model/scan_paths_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package model

import (
"os"
"strings"
"testing"
)

func TestDedupePaths(t *testing.T) {
got := dedupePaths([]string{"a", "b", "", "a", "c", "b", ""})
want := []string{"a", "b", "c"}
if strings.Join(got, ",") != strings.Join(want, ",") {
t.Errorf("dedupePaths = %v, want %v", got, want)
}
}

func TestDefaultScanPaths_MultiPathEnv(t *testing.T) {
sep := string(os.PathListSeparator)
a := t.TempDir()
b := t.TempDir()
t.Setenv("AIMA_MODEL_DIR", a+sep+b)

paths := DefaultScanPaths()
set := map[string]bool{}
for _, p := range paths {
set[p] = true
}
if !set[a] || !set[b] {
t.Errorf("DefaultScanPaths missing one of the AIMA_MODEL_DIR entries\n got: %v\n want both: %q, %q", paths, a, b)
}

// no duplicates in the returned list
seen := map[string]bool{}
for _, p := range paths {
if seen[p] {
t.Errorf("duplicate path in DefaultScanPaths: %q", p)
}
seen[p] = true
}
}

func TestDefaultScanPaths_EnvOverrides(t *testing.T) {
hf := t.TempDir()
om := t.TempDir()
t.Setenv("AIMA_MODEL_DIR", "")
t.Setenv("HF_HOME", hf)
t.Setenv("OLLAMA_MODELS", om)

paths := DefaultScanPaths()
wantHF := hf + string(os.PathSeparator) + "hub"
var foundHF, foundOM bool
for _, p := range paths {
if p == wantHF {
foundHF = true
}
if p == om {
foundOM = true
}
}
if !foundHF {
t.Errorf("HF_HOME/hub not in scan paths: want %q in %v", wantHF, paths)
}
if !foundOM {
t.Errorf("OLLAMA_MODELS not in scan paths: want %q in %v", om, paths)
}
}
Loading
Loading