Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 30 additions & 17 deletions cmd/aima/tooldeps_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,28 +153,41 @@ func annotateModelsFromCatalog(models []*state.Model, cat *knowledge.Catalog) {
assetsByName[strings.ToLower(strings.TrimSpace(alias))] = ma
}
}
draftKeys := cat.SpeculativeDraftModelKeys()

for _, m := range models {
if m == nil {
continue
}
ma := assetsByName[strings.ToLower(strings.TrimSpace(m.Name))]
if ma == nil {
continue
}
if strings.TrimSpace(m.ModelClass) == "" {
m.ModelClass = strings.TrimSpace(ma.Metadata.ModelClass)
}
if strings.TrimSpace(m.UIRole) == "" {
m.UIRole = strings.TrimSpace(ma.UI.Role)
}
if strings.TrimSpace(m.UIDisplayNote) == "" {
m.UIDisplayNote = strings.TrimSpace(ma.UI.DisplayNote)
}
if strings.TrimSpace(m.UIDisplayNoteZh) == "" {
m.UIDisplayNoteZh = strings.TrimSpace(ma.UI.DisplayNoteZh)
if ma := assetsByName[strings.ToLower(strings.TrimSpace(m.Name))]; ma != nil {
if strings.TrimSpace(m.ModelClass) == "" {
m.ModelClass = strings.TrimSpace(ma.Metadata.ModelClass)
}
if strings.TrimSpace(m.UIRole) == "" {
m.UIRole = strings.TrimSpace(ma.UI.Role)
}
if strings.TrimSpace(m.UIDisplayNote) == "" {
m.UIDisplayNote = strings.TrimSpace(ma.UI.DisplayNote)
}
if strings.TrimSpace(m.UIDisplayNoteZh) == "" {
m.UIDisplayNoteZh = strings.TrimSpace(ma.UI.DisplayNoteZh)
}
if m.StandaloneDeploy == nil {
m.StandaloneDeploy = ma.Capabilities.StandaloneDeploy
}
}
if m.StandaloneDeploy == nil {
m.StandaloneDeploy = ma.Capabilities.StandaloneDeploy

// Speculative draft heads (e.g. DFlash/MTP) are companions of their
// parent model — the catalog names them via each variant's
// speculative_config.model — not independently deployable models.
if draftKeys[knowledge.NormalizeModelKey(m.Name)] {
if m.StandaloneDeploy == nil {
notStandalone := false
m.StandaloneDeploy = &notStandalone
}
if strings.TrimSpace(m.UIRole) == "" {
m.UIRole = "draft"
}
}
}
}
Expand Down
53 changes: 53 additions & 0 deletions cmd/aima/tooldeps_model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -581,3 +581,56 @@ func writeScanModelFixture(dir string, weightSize int) error {
}
return os.WriteFile(filepath.Join(dir, "model.safetensors"), make([]byte, weightSize), 0o644)
}

// A speculative draft head (e.g. DFlash/MTP), declared only as a variant's
// speculative_config.model in the catalog, must be marked non-standalone so the
// UI does not offer to deploy it on its own. Its parent model stays deployable.
func TestAnnotateModelsFromCatalog_SpeculativeDraftNotStandalone(t *testing.T) {
cat := &knowledge.Catalog{
ModelAssets: []knowledge.ModelAsset{{
Metadata: knowledge.ModelMetadata{
Name: "qwen3.6-35b-a3b",
Aliases: []string{"Qwen3.6-35B-A3B"},
},
Variants: []knowledge.ModelVariant{{
Name: "dflash",
DefaultConfig: map[string]any{
"speculative_config": map[string]any{
"method": "dflash",
"model": "/models/Qwen3.6-35B-A3B-DFlash",
},
},
}},
}},
}
models := []*state.Model{
{Name: "Qwen3.6-35B-A3B"}, // parent: stays deployable
{Name: "Qwen3.6-35B-A3B-DFlash"}, // draft (safetensors)
{Name: "Qwen3.6-35B-A3B-DFlash-Q4_K_M"}, // draft (gguf quant)
}

annotateModelsFromCatalog(models, cat)

byName := make(map[string]*state.Model, len(models))
for _, m := range models {
byName[m.Name] = m
}

for _, name := range []string{"Qwen3.6-35B-A3B-DFlash", "Qwen3.6-35B-A3B-DFlash-Q4_K_M"} {
m := byName[name]
if m.StandaloneDeploy == nil || *m.StandaloneDeploy {
t.Errorf("%s: StandaloneDeploy = %v, want non-nil false", name, m.StandaloneDeploy)
}
if m.UIRole != "draft" {
t.Errorf("%s: UIRole = %q, want %q", name, m.UIRole, "draft")
}
}

parent := byName["Qwen3.6-35B-A3B"]
if parent.StandaloneDeploy != nil && !*parent.StandaloneDeploy {
t.Errorf("parent model must not be marked non-standalone")
}
if parent.UIRole == "draft" {
t.Errorf("parent model must not be tagged as a draft")
}
}
71 changes: 71 additions & 0 deletions internal/knowledge/draft.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package knowledge

import (
"path"
"regexp"
"strings"
)

// quantSuffixToken matches a single '-'-delimited trailing token that denotes a
// quantization, precision, or storage-layout variant rather than model identity
// (e.g. "q4_k_m", "bf16", "ud", "unfused"). Role-bearing tokens such as
// "dflash"/"mtp"/"flash" are deliberately excluded so a draft head keeps its
// identity.
var quantSuffixToken = regexp.MustCompile(`^(?:q\d[\dkmsl_]*|iq\d[\dxsa_]*|bf16|fp16|fp32|fp8|f16|f32|int4|int8|nf4|mxfp4|ud|awq|gptq|gguf|mlx|unfused|fused)$`)

// NormalizeModelKey lowercases a model name and strips trailing
// quantization/precision/layout tokens so different on-disk artifacts of one
// logical model share a key. It keeps role-bearing tokens like "dflash" so a
// draft head normalizes to "<base>-dflash", distinct from its parent "<base>".
//
// "Qwen3.6-35B-A3B-UD-Q4_K_M" -> "qwen3.6-35b-a3b"
// "qwen3.6-35b-a3b-bf16-unfused" -> "qwen3.6-35b-a3b"
// "Qwen3.6-35B-A3B-DFlash-Q4_K_M" -> "qwen3.6-35b-a3b-dflash"
// "glm-4.7-flash" -> "glm-4.7-flash"
func NormalizeModelKey(name string) string {
name = strings.TrimSpace(strings.ToLower(name))
if name == "" {
return ""
}
tokens := strings.Split(name, "-")
for len(tokens) > 1 && quantSuffixToken.MatchString(tokens[len(tokens)-1]) {
tokens = tokens[:len(tokens)-1]
}
return strings.Join(tokens, "-")
}

// SpeculativeDraftModelKeys harvests every variant's speculative_config.model
// reference across all model assets and returns the set of normalized draft
// model keys. A scanned model whose NormalizeModelKey is in this set is a
// speculative draft head (e.g. DFlash/MTP) — a companion of its parent model,
// not an independently deployable model.
func (c *Catalog) SpeculativeDraftModelKeys() map[string]bool {
keys := make(map[string]bool)
if c == nil {
return keys
}
for i := range c.ModelAssets {
for _, v := range c.ModelAssets[i].Variants {
ref := speculativeModelRef(v.DefaultConfig)
if ref == "" {
continue
}
// The reference may be a path ("/models/X", "D:\models\X") or a
// bare name; reduce it to the artifact base name first.
base := path.Base(strings.ReplaceAll(ref, `\`, "/"))
if key := NormalizeModelKey(base); key != "" {
keys[key] = true
}
}
}
return keys
}

func speculativeModelRef(dc map[string]any) string {
sc, ok := dc["speculative_config"].(map[string]any)
if !ok {
return ""
}
model, _ := sc["model"].(string)
return strings.TrimSpace(model)
}
59 changes: 59 additions & 0 deletions internal/knowledge/draft_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package knowledge

import "testing"

func TestNormalizeModelKey(t *testing.T) {
cases := []struct {
name string
in string
want string
}{
{"plain", "qwen3.6-35b-a3b", "qwen3.6-35b-a3b"},
{"draft safetensors", "Qwen3.6-35B-A3B-DFlash", "qwen3.6-35b-a3b-dflash"},
{"draft gguf quant", "Qwen3.6-35B-A3B-DFlash-Q4_K_M", "qwen3.6-35b-a3b-dflash"},
{"bf16", "qwen3.6-35b-a3b-bf16", "qwen3.6-35b-a3b"},
{"bf16 unfused layout", "qwen3.6-35b-a3b-bf16-unfused", "qwen3.6-35b-a3b"},
{"q4 quant", "qwen3.6-35b-a3b-q4_k_m", "qwen3.6-35b-a3b"},
{"unsloth dynamic quant", "Qwen3.6-35B-A3B-UD-Q4_K_M", "qwen3.6-35b-a3b"},
{"flash is identity not quant", "glm-4.7-flash", "glm-4.7-flash"},
{"embedding q8", "qwen3-embedding-4b-q8_0", "qwen3-embedding-4b"},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
if got := NormalizeModelKey(c.in); got != c.want {
t.Errorf("NormalizeModelKey(%q) = %q, want %q", c.in, got, c.want)
}
})
}
}

func TestSpeculativeDraftModelKeys(t *testing.T) {
cat := &Catalog{
ModelAssets: []ModelAsset{{
Metadata: ModelMetadata{Name: "qwen3.6-35b-a3b"},
Variants: []ModelVariant{
{Name: "plain"}, // no speculative_config
{Name: "dflash", DefaultConfig: map[string]any{
"speculative_config": map[string]any{
"method": "dflash",
"model": "/models/Qwen3.6-35B-A3B-DFlash",
},
}},
},
}},
}
keys := cat.SpeculativeDraftModelKeys()
if !keys["qwen3.6-35b-a3b-dflash"] {
t.Fatalf("expected draft key %q in %v", "qwen3.6-35b-a3b-dflash", keys)
}
if keys["qwen3.6-35b-a3b"] {
t.Errorf("base model must not be a draft key: %v", keys)
}
}

func TestSpeculativeDraftModelKeys_NilCatalog(t *testing.T) {
var c *Catalog
if got := c.SpeculativeDraftModelKeys(); len(got) != 0 {
t.Errorf("nil catalog should yield no draft keys, got %v", got)
}
}
Loading