diff --git a/cmd/aima/tooldeps_model.go b/cmd/aima/tooldeps_model.go
index c604902..e537d07 100644
--- a/cmd/aima/tooldeps_model.go
+++ b/cmd/aima/tooldeps_model.go
@@ -153,28 +153,41 @@ func annotateModelsFromCatalog(models []*state.Model, cat *knowledge.Catalog) {
assetsByName[strings.ToLower(strings.TrimSpace(alias))] = ma
}
}
+ draftKeys := cat.SpeculativeDraftModelKeys()
+
for _, m := range models {
if m == nil {
continue
}
- ma := assetsByName[strings.ToLower(strings.TrimSpace(m.Name))]
- if ma == nil {
- continue
- }
- if strings.TrimSpace(m.ModelClass) == "" {
- m.ModelClass = strings.TrimSpace(ma.Metadata.ModelClass)
- }
- if strings.TrimSpace(m.UIRole) == "" {
- m.UIRole = strings.TrimSpace(ma.UI.Role)
- }
- if strings.TrimSpace(m.UIDisplayNote) == "" {
- m.UIDisplayNote = strings.TrimSpace(ma.UI.DisplayNote)
- }
- if strings.TrimSpace(m.UIDisplayNoteZh) == "" {
- m.UIDisplayNoteZh = strings.TrimSpace(ma.UI.DisplayNoteZh)
+ if ma := assetsByName[strings.ToLower(strings.TrimSpace(m.Name))]; ma != nil {
+ if strings.TrimSpace(m.ModelClass) == "" {
+ m.ModelClass = strings.TrimSpace(ma.Metadata.ModelClass)
+ }
+ if strings.TrimSpace(m.UIRole) == "" {
+ m.UIRole = strings.TrimSpace(ma.UI.Role)
+ }
+ if strings.TrimSpace(m.UIDisplayNote) == "" {
+ m.UIDisplayNote = strings.TrimSpace(ma.UI.DisplayNote)
+ }
+ if strings.TrimSpace(m.UIDisplayNoteZh) == "" {
+ m.UIDisplayNoteZh = strings.TrimSpace(ma.UI.DisplayNoteZh)
+ }
+ if m.StandaloneDeploy == nil {
+ m.StandaloneDeploy = ma.Capabilities.StandaloneDeploy
+ }
}
- if m.StandaloneDeploy == nil {
- m.StandaloneDeploy = ma.Capabilities.StandaloneDeploy
+
+ // Speculative draft heads (e.g. DFlash/MTP) are companions of their
+ // parent model — the catalog names them via each variant's
+ // speculative_config.model — not independently deployable models.
+ if draftKeys[knowledge.NormalizeModelKey(m.Name)] {
+ if m.StandaloneDeploy == nil {
+ notStandalone := false
+ m.StandaloneDeploy = ¬Standalone
+ }
+ if strings.TrimSpace(m.UIRole) == "" {
+ m.UIRole = "draft"
+ }
}
}
}
diff --git a/cmd/aima/tooldeps_model_test.go b/cmd/aima/tooldeps_model_test.go
index 55d98f0..f61cc46 100644
--- a/cmd/aima/tooldeps_model_test.go
+++ b/cmd/aima/tooldeps_model_test.go
@@ -581,3 +581,56 @@ func writeScanModelFixture(dir string, weightSize int) error {
}
return os.WriteFile(filepath.Join(dir, "model.safetensors"), make([]byte, weightSize), 0o644)
}
+
+// A speculative draft head (e.g. DFlash/MTP), declared only as a variant's
+// speculative_config.model in the catalog, must be marked non-standalone so the
+// UI does not offer to deploy it on its own. Its parent model stays deployable.
+func TestAnnotateModelsFromCatalog_SpeculativeDraftNotStandalone(t *testing.T) {
+ cat := &knowledge.Catalog{
+ ModelAssets: []knowledge.ModelAsset{{
+ Metadata: knowledge.ModelMetadata{
+ Name: "qwen3.6-35b-a3b",
+ Aliases: []string{"Qwen3.6-35B-A3B"},
+ },
+ Variants: []knowledge.ModelVariant{{
+ Name: "dflash",
+ DefaultConfig: map[string]any{
+ "speculative_config": map[string]any{
+ "method": "dflash",
+ "model": "/models/Qwen3.6-35B-A3B-DFlash",
+ },
+ },
+ }},
+ }},
+ }
+ models := []*state.Model{
+ {Name: "Qwen3.6-35B-A3B"}, // parent: stays deployable
+ {Name: "Qwen3.6-35B-A3B-DFlash"}, // draft (safetensors)
+ {Name: "Qwen3.6-35B-A3B-DFlash-Q4_K_M"}, // draft (gguf quant)
+ }
+
+ annotateModelsFromCatalog(models, cat)
+
+ byName := make(map[string]*state.Model, len(models))
+ for _, m := range models {
+ byName[m.Name] = m
+ }
+
+ for _, name := range []string{"Qwen3.6-35B-A3B-DFlash", "Qwen3.6-35B-A3B-DFlash-Q4_K_M"} {
+ m := byName[name]
+ if m.StandaloneDeploy == nil || *m.StandaloneDeploy {
+ t.Errorf("%s: StandaloneDeploy = %v, want non-nil false", name, m.StandaloneDeploy)
+ }
+ if m.UIRole != "draft" {
+ t.Errorf("%s: UIRole = %q, want %q", name, m.UIRole, "draft")
+ }
+ }
+
+ parent := byName["Qwen3.6-35B-A3B"]
+ if parent.StandaloneDeploy != nil && !*parent.StandaloneDeploy {
+ t.Errorf("parent model must not be marked non-standalone")
+ }
+ if parent.UIRole == "draft" {
+ t.Errorf("parent model must not be tagged as a draft")
+ }
+}
diff --git a/internal/knowledge/draft.go b/internal/knowledge/draft.go
new file mode 100644
index 0000000..984172b
--- /dev/null
+++ b/internal/knowledge/draft.go
@@ -0,0 +1,71 @@
+package knowledge
+
+import (
+ "path"
+ "regexp"
+ "strings"
+)
+
+// quantSuffixToken matches a single '-'-delimited trailing token that denotes a
+// quantization, precision, or storage-layout variant rather than model identity
+// (e.g. "q4_k_m", "bf16", "ud", "unfused"). Role-bearing tokens such as
+// "dflash"/"mtp"/"flash" are deliberately excluded so a draft head keeps its
+// identity.
+var quantSuffixToken = regexp.MustCompile(`^(?:q\d[\dkmsl_]*|iq\d[\dxsa_]*|bf16|fp16|fp32|fp8|f16|f32|int4|int8|nf4|mxfp4|ud|awq|gptq|gguf|mlx|unfused|fused)$`)
+
+// NormalizeModelKey lowercases a model name and strips trailing
+// quantization/precision/layout tokens so different on-disk artifacts of one
+// logical model share a key. It keeps role-bearing tokens like "dflash" so a
+// draft head normalizes to "-dflash", distinct from its parent "".
+//
+// "Qwen3.6-35B-A3B-UD-Q4_K_M" -> "qwen3.6-35b-a3b"
+// "qwen3.6-35b-a3b-bf16-unfused" -> "qwen3.6-35b-a3b"
+// "Qwen3.6-35B-A3B-DFlash-Q4_K_M" -> "qwen3.6-35b-a3b-dflash"
+// "glm-4.7-flash" -> "glm-4.7-flash"
+func NormalizeModelKey(name string) string {
+ name = strings.TrimSpace(strings.ToLower(name))
+ if name == "" {
+ return ""
+ }
+ tokens := strings.Split(name, "-")
+ for len(tokens) > 1 && quantSuffixToken.MatchString(tokens[len(tokens)-1]) {
+ tokens = tokens[:len(tokens)-1]
+ }
+ return strings.Join(tokens, "-")
+}
+
+// SpeculativeDraftModelKeys harvests every variant's speculative_config.model
+// reference across all model assets and returns the set of normalized draft
+// model keys. A scanned model whose NormalizeModelKey is in this set is a
+// speculative draft head (e.g. DFlash/MTP) — a companion of its parent model,
+// not an independently deployable model.
+func (c *Catalog) SpeculativeDraftModelKeys() map[string]bool {
+ keys := make(map[string]bool)
+ if c == nil {
+ return keys
+ }
+ for i := range c.ModelAssets {
+ for _, v := range c.ModelAssets[i].Variants {
+ ref := speculativeModelRef(v.DefaultConfig)
+ if ref == "" {
+ continue
+ }
+ // The reference may be a path ("/models/X", "D:\models\X") or a
+ // bare name; reduce it to the artifact base name first.
+ base := path.Base(strings.ReplaceAll(ref, `\`, "/"))
+ if key := NormalizeModelKey(base); key != "" {
+ keys[key] = true
+ }
+ }
+ }
+ return keys
+}
+
+func speculativeModelRef(dc map[string]any) string {
+ sc, ok := dc["speculative_config"].(map[string]any)
+ if !ok {
+ return ""
+ }
+ model, _ := sc["model"].(string)
+ return strings.TrimSpace(model)
+}
diff --git a/internal/knowledge/draft_test.go b/internal/knowledge/draft_test.go
new file mode 100644
index 0000000..2164a50
--- /dev/null
+++ b/internal/knowledge/draft_test.go
@@ -0,0 +1,59 @@
+package knowledge
+
+import "testing"
+
+func TestNormalizeModelKey(t *testing.T) {
+ cases := []struct {
+ name string
+ in string
+ want string
+ }{
+ {"plain", "qwen3.6-35b-a3b", "qwen3.6-35b-a3b"},
+ {"draft safetensors", "Qwen3.6-35B-A3B-DFlash", "qwen3.6-35b-a3b-dflash"},
+ {"draft gguf quant", "Qwen3.6-35B-A3B-DFlash-Q4_K_M", "qwen3.6-35b-a3b-dflash"},
+ {"bf16", "qwen3.6-35b-a3b-bf16", "qwen3.6-35b-a3b"},
+ {"bf16 unfused layout", "qwen3.6-35b-a3b-bf16-unfused", "qwen3.6-35b-a3b"},
+ {"q4 quant", "qwen3.6-35b-a3b-q4_k_m", "qwen3.6-35b-a3b"},
+ {"unsloth dynamic quant", "Qwen3.6-35B-A3B-UD-Q4_K_M", "qwen3.6-35b-a3b"},
+ {"flash is identity not quant", "glm-4.7-flash", "glm-4.7-flash"},
+ {"embedding q8", "qwen3-embedding-4b-q8_0", "qwen3-embedding-4b"},
+ }
+ for _, c := range cases {
+ t.Run(c.name, func(t *testing.T) {
+ if got := NormalizeModelKey(c.in); got != c.want {
+ t.Errorf("NormalizeModelKey(%q) = %q, want %q", c.in, got, c.want)
+ }
+ })
+ }
+}
+
+func TestSpeculativeDraftModelKeys(t *testing.T) {
+ cat := &Catalog{
+ ModelAssets: []ModelAsset{{
+ Metadata: ModelMetadata{Name: "qwen3.6-35b-a3b"},
+ Variants: []ModelVariant{
+ {Name: "plain"}, // no speculative_config
+ {Name: "dflash", DefaultConfig: map[string]any{
+ "speculative_config": map[string]any{
+ "method": "dflash",
+ "model": "/models/Qwen3.6-35B-A3B-DFlash",
+ },
+ }},
+ },
+ }},
+ }
+ keys := cat.SpeculativeDraftModelKeys()
+ if !keys["qwen3.6-35b-a3b-dflash"] {
+ t.Fatalf("expected draft key %q in %v", "qwen3.6-35b-a3b-dflash", keys)
+ }
+ if keys["qwen3.6-35b-a3b"] {
+ t.Errorf("base model must not be a draft key: %v", keys)
+ }
+}
+
+func TestSpeculativeDraftModelKeys_NilCatalog(t *testing.T) {
+ var c *Catalog
+ if got := c.SpeculativeDraftModelKeys(); len(got) != 0 {
+ t.Errorf("nil catalog should yield no draft keys, got %v", got)
+ }
+}