diff --git a/internal/mcp/tools_deploy.go b/internal/mcp/tools_deploy.go index 1124073..de9b5cb 100644 --- a/internal/mcp/tools_deploy.go +++ b/internal/mcp/tools_deploy.go @@ -4,9 +4,103 @@ import ( "context" "encoding/json" "fmt" + "strings" ) func registerDeployTools(s *Server, deps *ToolDeps) { + // deploy.defaults + s.RegisterTool(&Tool{ + Name: "deploy.defaults", + Description: "Get, set, or clear device-local default deployment settings for one model. This stores operator preference in local system config, not reusable AIMA knowledge.", + InputSchema: schema( + `"action":{"type":"string","enum":["get","set","clear"],"description":"Operation to perform."},`+ + `"model":{"type":"string","description":"Model name whose deployment defaults should be managed."},`+ + `"engine":{"type":"string","description":"Default engine override when action=set."},`+ + `"slot":{"type":"string","description":"Default slot when action=set."},`+ + `"no_pull":{"type":"boolean","description":"Default resource policy when action=set."},`+ + `"port":{"type":"string","description":"Default port value when action=set."},`+ + `"config":{"type":"object","description":"Default engine config overrides when action=set."}`, + "action", "model"), + Handler: func(ctx context.Context, params json.RawMessage) (*ToolResult, error) { + var p struct { + Action string `json:"action"` + Model string `json:"model"` + Engine string `json:"engine"` + Slot string `json:"slot"` + NoPull *bool `json:"no_pull"` + Port string `json:"port"` + Config map[string]any `json:"config"` + } + if err := json.Unmarshal(params, &p); err != nil { + return nil, fmt.Errorf("parse params: %w", err) + } + model := strings.TrimSpace(p.Model) + if model == "" { + return ErrorResult("model is required"), nil + } + key := deployDefaultsConfigKey(model) + action := strings.ToLower(strings.TrimSpace(p.Action)) + switch action { + case "get": + if deps.GetConfig == nil { + return ErrorResult("deploy.defaults get not implemented"), nil + } + raw, err := deps.GetConfig(ctx, key) + if err != nil || strings.TrimSpace(raw) == "" { + if err != nil && !isMissingConfigValue(err) { + return nil, fmt.Errorf("get deploy defaults for %s: %w", model, err) + } + data, _ := json.Marshal(map[string]any{"model": model, "exists": false}) + return TextResult(string(data)), nil + } + var stored map[string]any + if err := json.Unmarshal([]byte(raw), &stored); err != nil { + return nil, fmt.Errorf("parse deploy defaults for %s: %w", model, err) + } + data, _ := json.Marshal(map[string]any{"model": model, "exists": true, "defaults": stored}) + return TextResult(string(data)), nil + case "set": + if deps.SetConfig == nil { + return ErrorResult("deploy.defaults set not implemented"), nil + } + noPull := true + if p.NoPull != nil { + noPull = *p.NoPull + } + payload := map[string]any{ + "engine": strings.TrimSpace(p.Engine), + "slot": strings.TrimSpace(p.Slot), + "no_pull": noPull, + "port": strings.TrimSpace(p.Port), + "config": p.Config, + } + if payload["config"] == nil { + payload["config"] = map[string]any{} + } + raw, err := json.Marshal(payload) + if err != nil { + return nil, fmt.Errorf("marshal deploy defaults for %s: %w", model, err) + } + if err := deps.SetConfig(ctx, key, string(raw)); err != nil { + return nil, fmt.Errorf("set deploy defaults for %s: %w", model, err) + } + data, _ := json.Marshal(map[string]any{"model": model, "exists": true, "defaults": payload}) + return TextResult(string(data)), nil + case "clear": + if deps.SetConfig == nil { + return ErrorResult("deploy.defaults clear not implemented"), nil + } + if err := deps.SetConfig(ctx, key, ""); err != nil { + return nil, fmt.Errorf("clear deploy defaults for %s: %w", model, err) + } + data, _ := json.Marshal(map[string]any{"model": model, "exists": false}) + return TextResult(string(data)), nil + default: + return ErrorResult("action must be one of: get, set, clear"), nil + } + }, + }) + // deploy.apply s.RegisterTool(&Tool{ Name: "deploy.apply", @@ -283,3 +377,18 @@ func registerDeployTools(s *Server, deps *ToolDeps) { }, }) } + +func deployDefaultsConfigKey(model string) string { + normalized := strings.TrimSpace(strings.ToLower(model)) + normalized = strings.ReplaceAll(normalized, "\\", "_") + normalized = strings.ReplaceAll(normalized, "/", "_") + return "deploy.defaults." + normalized +} + +func isMissingConfigValue(err error) bool { + if err == nil { + return false + } + text := strings.ToLower(err.Error()) + return strings.Contains(text, "not found") || strings.Contains(text, "no rows") +} diff --git a/internal/mcp/tools_deploy_test.go b/internal/mcp/tools_deploy_test.go index 21a81a7..c2ac300 100644 --- a/internal/mcp/tools_deploy_test.go +++ b/internal/mcp/tools_deploy_test.go @@ -3,6 +3,7 @@ package mcp import ( "context" "encoding/json" + "strings" "testing" "github.com/jguan/aima/internal/engine" @@ -64,6 +65,138 @@ func TestDeployRunPassesConfigOverrides(t *testing.T) { } } +func TestDeployDryRunPassesConfigOverrides(t *testing.T) { + s := NewServer() + + var ( + gotModel string + gotEngine string + gotSlot string + gotConfig map[string]any + ) + registerDeployTools(s, &ToolDeps{ + DeployDryRun: func(ctx context.Context, engineType, model, slot string, configOverrides map[string]any) (json.RawMessage, error) { + gotModel = model + gotEngine = engineType + gotSlot = slot + gotConfig = configOverrides + return json.RawMessage(`{"status":"preview"}`), nil + }, + }) + + result, err := s.ExecuteTool(context.Background(), "deploy.dry_run", json.RawMessage(`{ + "model":"qwen3-8b", + "engine":"vllm", + "slot":"slot-1", + "config":{"gpu_memory_utilization":0.8,"kv_cache_dtype":"fp8"}, + "max_cold_start_s":12 + }`)) + if err != nil { + t.Fatalf("ExecuteTool: %v", err) + } + + if gotModel != "qwen3-8b" { + t.Fatalf("model = %q, want qwen3-8b", gotModel) + } + if gotEngine != "vllm" { + t.Fatalf("engine = %q, want vllm", gotEngine) + } + if gotSlot != "slot-1" { + t.Fatalf("slot = %q, want slot-1", gotSlot) + } + if gotConfig["gpu_memory_utilization"] != 0.8 { + t.Fatalf("gpu_memory_utilization = %#v, want 0.8", gotConfig["gpu_memory_utilization"]) + } + if gotConfig["kv_cache_dtype"] != "fp8" { + t.Fatalf("kv_cache_dtype = %#v, want fp8", gotConfig["kv_cache_dtype"]) + } + if gotConfig["max_cold_start_s"] != float64(12) && gotConfig["max_cold_start_s"] != 12 { + t.Fatalf("max_cold_start_s = %#v, want 12", gotConfig["max_cold_start_s"]) + } + if len(result.Content) == 0 || result.IsError { + t.Fatalf("unexpected result = %+v", result) + } +} + +func TestDeployDefaultsStoresDeviceLocalSettings(t *testing.T) { + s := NewServer() + store := map[string]string{} + registerDeployTools(s, &ToolDeps{ + GetConfig: func(ctx context.Context, key string) (string, error) { + value, ok := store[key] + if !ok { + return "", context.Canceled + } + return value, nil + }, + SetConfig: func(ctx context.Context, key, value string) error { + store[key] = value + return nil + }, + }) + + setResult, err := s.ExecuteTool(context.Background(), "deploy.defaults", json.RawMessage(`{ + "action":"set", + "model":"Qwen3-8B", + "engine":"vllm", + "slot":"slot-1", + "no_pull":true, + "port":"8003", + "config":{"gpu_memory_utilization":0.7,"max_model_len":8192} + }`)) + if err != nil { + t.Fatalf("set ExecuteTool: %v", err) + } + if setResult.IsError { + t.Fatalf("set returned error: %+v", setResult) + } + + getResult, err := s.ExecuteTool(context.Background(), "deploy.defaults", json.RawMessage(`{"action":"get","model":"qwen3-8b"}`)) + if err != nil { + t.Fatalf("get ExecuteTool: %v", err) + } + if getResult.IsError { + t.Fatalf("get returned error: %+v", getResult) + } + var got struct { + Exists bool `json:"exists"` + Defaults struct { + Engine string `json:"engine"` + Slot string `json:"slot"` + NoPull bool `json:"no_pull"` + Port string `json:"port"` + Config map[string]any `json:"config"` + } `json:"defaults"` + } + if err := json.Unmarshal([]byte(getResult.Content[0].Text), &got); err != nil { + t.Fatalf("unmarshal get result: %v", err) + } + if !got.Exists { + t.Fatal("defaults should exist") + } + if got.Defaults.Engine != "vllm" || got.Defaults.Slot != "slot-1" || !got.Defaults.NoPull || got.Defaults.Port != "8003" { + t.Fatalf("defaults = %+v", got.Defaults) + } + if got.Defaults.Config["gpu_memory_utilization"] != float64(0.7) { + t.Fatalf("gpu_memory_utilization = %#v, want 0.7", got.Defaults.Config["gpu_memory_utilization"]) + } + + clearResult, err := s.ExecuteTool(context.Background(), "deploy.defaults", json.RawMessage(`{"action":"clear","model":"qwen3-8b"}`)) + if err != nil { + t.Fatalf("clear ExecuteTool: %v", err) + } + if clearResult.IsError { + t.Fatalf("clear returned error: %+v", clearResult) + } + getResult, err = s.ExecuteTool(context.Background(), "deploy.defaults", json.RawMessage(`{"action":"get","model":"qwen3-8b"}`)) + if err != nil { + t.Fatalf("get after clear ExecuteTool: %v", err) + } + if !strings.Contains(getResult.Content[0].Text, `"exists":false`) { + t.Fatalf("get after clear = %s, want exists=false", getResult.Content[0].Text) + } +} + func TestDeployApplyPassesNoPull(t *testing.T) { s := NewServer() diff --git a/internal/ui/handler_test.go b/internal/ui/handler_test.go index 0ef7361..9cc319d 100644 --- a/internal/ui/handler_test.go +++ b/internal/ui/handler_test.go @@ -483,6 +483,52 @@ func TestRegisterRoutes_IndexIncludesDeploymentStageFeedback(t *testing.T) { } } +func TestRegisterRoutes_IndexDeployDetailUsesBackendDefaultsAndImmediateClose(t *testing.T) { + t.Parallel() + + mux := http.NewServeMux() + RegisterRoutes(nil)(mux) + + req := httptest.NewRequest(http.MethodGet, "/ui/", nil) + rec := httptest.NewRecorder() + mux.ServeHTTP(rec, req) + + body := rec.Body.String() + for _, token := range []string{ + `this.callTool('deploy.defaults', { action: 'get', model: modelName })`, + `this.callTool('deploy.defaults', { action: 'set', model: modelName, ...payload })`, + `const data = await this.callTool('deploy.run', request);`, + `this.deployDetailOpen = false;`, + `await this.refreshDeployDryRun();`, + `if (!kvApplied) suggestions.push({ key: 'kv_cache_dtype', value: 'fp8'`, + `deploy_started_background`, + `deploy_restore_recommended: 'Recommended parameters'`, + } { + if !strings.Contains(body, token) { + t.Fatalf("body missing deploy detail token %q", token) + } + } + + start := strings.Index(body, "async confirmDeployDetail() {") + if start == -1 { + t.Fatal("confirmDeployDetail not found") + } + end := strings.Index(body[start:], "\n componentStatusNote(model)") + if end == -1 { + t.Fatal("could not isolate confirmDeployDetail body") + } + fnBody := body[start : start+end] + closeIdx := strings.Index(fnBody, `this.deployDetailOpen = false;`) + runIdx := strings.Index(fnBody, `const data = await this.callTool('deploy.run', request);`) + if closeIdx == -1 || runIdx == -1 || closeIdx > runIdx { + t.Fatalf("deploy detail should close before awaiting deploy.run, body=%s", fnBody) + } + + if strings.Contains(body, "aima_deploy_defaults:") || strings.Contains(body, "localStorage.setItem(this.deployDefaultsKey()") { + t.Fatal("deploy defaults should not be stored only in browser localStorage") + } +} + func TestRegisterRoutes_IndexIncludesDirectModeRoutingAndModelCards(t *testing.T) { t.Parallel() diff --git a/internal/ui/static/index.html b/internal/ui/static/index.html index 9b62276..ef52de7 100644 --- a/internal/ui/static/index.html +++ b/internal/ui/static/index.html @@ -1562,7 +1562,9 @@ } .modal input[type="text"], .modal input[type="password"], -.modal input[type="url"] { +.modal input[type="url"], +.modal input[type="number"], +.modal select { width: 100%; background: var(--input-bg); border: 1px solid var(--border-light); @@ -1576,7 +1578,17 @@ border-color var(--duration-fast) var(--ease-smooth), background var(--duration-fast) var(--ease-smooth); } -.modal input:focus { +.modal select { + appearance: none; + -webkit-appearance: none; + color-scheme: dark; +} +.modal select option { + background: #1f1f24; + color: var(--text); +} +.modal input:focus, +.modal select:focus { border-color: var(--accent); background: var(--msg-tool-hover); } @@ -1624,6 +1636,386 @@ } .modal-feedback.success { color: var(--success); background: rgba(48, 209, 88, 0.1); } .modal-feedback.error { color: var(--error); background: rgba(255, 69, 58, 0.1); } +.modal-feedback.warning { color: var(--warning); background: rgba(255, 159, 10, 0.12); } + +/* Deploy detail modal */ +.deploy-modal { + width: min(680px, calc(100vw - 48px)); + max-height: min(84dvh, 820px); + padding: 0; + overflow: hidden; + display: flex; + flex-direction: column; +} +.deploy-modal.maximized { + width: min(1080px, calc(100vw - 32px)); + height: calc(100dvh - 32px); + max-height: calc(100dvh - 32px); + border-radius: 16px; +} +.deploy-modal.minimized { + width: min(420px, calc(100vw - 32px)); + max-height: none; + margin: 16px; + border-radius: 14px; +} +.deploy-modal-overlay.minimized { + align-items: flex-end; + justify-content: flex-end; + background: transparent; + pointer-events: none; +} +.deploy-modal-overlay.minimized .deploy-modal { + pointer-events: auto; +} +.deploy-modal-header { + display: flex; + align-items: flex-start; + justify-content: space-between; + gap: 14px; + padding: 18px 20px 12px; + border-bottom: 1px solid var(--border-light); + flex-shrink: 0; +} +.deploy-modal-title { + min-width: 0; +} +.deploy-modal-title h3 { + margin-bottom: 4px; + word-break: break-word; + font-size: 16px; + line-height: 1.25; +} +.deploy-modal-subtitle { + color: var(--text-tertiary); + font-size: 12px; +} +.deploy-modal-feedback { + margin: 0; + border-radius: 0; + border-bottom: 1px solid var(--border-light); + padding: 8px 20px; + flex-shrink: 0; +} +.deploy-modal-body { + padding: 16px 20px 12px; + overflow-y: auto; + flex: 1 1 auto; + min-height: 0; +} +.deploy-scroll-body { + padding: 16px 20px 12px; + overflow-y: auto; + flex: 1 1 auto; + min-height: 0; +} +.deploy-window-controls { + display: flex; + gap: 6px; + align-items: center; + flex-shrink: 0; +} +.deploy-window-btn, +.deploy-close-btn { + width: 32px; + height: 32px; + border-radius: 999px; + border: 0.5px solid var(--border-medium); + background: var(--input-bg); + color: var(--text-dim); + cursor: pointer; + flex-shrink: 0; + display: inline-flex; + align-items: center; + justify-content: center; + font-family: var(--font-mono); + font-size: 15px; +} +.deploy-window-btn:hover, +.deploy-close-btn:hover { + color: var(--text); + background: var(--btn-icon-hover); +} +.deploy-summary-strip { + display: flex; + flex-direction: column; + gap: 10px; + background: var(--input-bg); + border: 1px solid var(--border-light); + border-radius: var(--radius-sm); + padding: 10px 12px; +} +.deploy-summary-main { + min-width: 0; +} +.deploy-summary-main span, +.deploy-chip span, +.deploy-metric-label { + display: block; + color: var(--text-tertiary); + font-size: 10px; + text-transform: uppercase; + letter-spacing: 0.3px; +} +.deploy-summary-main strong { + display: block; + margin-top: 2px; + color: var(--text); + font-family: var(--font-mono); + font-size: 12px; + font-weight: 600; + overflow-wrap: anywhere; +} +.deploy-chip-row { + display: flex; + flex-wrap: wrap; + gap: 6px; +} +.deploy-chip { + min-width: 96px; + flex: 1 1 96px; + border: 1px solid var(--border-light); + border-radius: 10px; + padding: 7px 8px; + background: rgba(255, 255, 255, 0.03); +} +.deploy-chip strong, +.deploy-metric-value { + display: block; + margin-top: 2px; + color: var(--text); + font-family: var(--font-mono); + font-size: 11px; + font-weight: 500; + overflow-wrap: anywhere; +} +.deploy-grid { + display: grid; + grid-template-columns: repeat(3, minmax(0, 1fr)); + gap: 8px; +} +.deploy-info-card, +.deploy-vram-card, +.deploy-param-row, +.deploy-preview { + background: var(--input-bg); + border: 1px solid var(--border-light); + border-radius: var(--radius-sm); +} +.deploy-info-card { + padding: 8px 10px; + min-width: 0; +} +.deploy-info-label { + display: block; + color: var(--text-tertiary); + font-size: 10px; + text-transform: uppercase; + letter-spacing: 0.3px; + margin-bottom: 3px; +} +.deploy-info-value { + display: block; + color: var(--text); + font-family: var(--font-mono); + font-size: 11px; + overflow-wrap: anywhere; +} +.deploy-vram-card { + padding: 14px; +} +.deploy-vram-top { + display: flex; + justify-content: space-between; + gap: 12px; + align-items: flex-start; + margin-bottom: 10px; +} +.deploy-vram-number { + font-family: var(--font-mono); + font-size: 22px; + font-weight: 600; + color: var(--text); + line-height: 1.15; +} +.deploy-vram-state { + font-size: 11px; + color: var(--text-tertiary); + text-align: right; + margin-top: 0; +} +.deploy-vram-meter { + height: 8px; + border-radius: 999px; + background: var(--msg-tool-hover); + overflow: hidden; +} +.deploy-vram-fill { + height: 100%; + border-radius: 999px; + background: var(--success); + transition: width var(--duration-normal) var(--ease-smooth), background var(--duration-normal) var(--ease-smooth); +} +.deploy-vram-fill.warn { background: var(--warning); } +.deploy-vram-fill.crit { background: var(--error); } +.deploy-vram-breakdown { + display: grid; + grid-template-columns: repeat(4, minmax(0, 1fr)); + gap: 8px; + margin-top: 12px; +} +.deploy-vram-breakdown span { + color: var(--text-tertiary); + font-size: 10px; +} +.deploy-vram-breakdown strong { + display: block; + color: var(--text); + font-family: var(--font-mono); + font-size: 12px; + font-weight: 500; +} +.deploy-form-grid { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 12px; +} +.deploy-advanced { + margin-top: 12px; + border: 1px solid var(--border-light); + border-radius: var(--radius-sm); + background: var(--input-bg); +} +.deploy-advanced summary { + cursor: pointer; + color: var(--text-dim); + font-size: 12px; + font-weight: 600; + padding: 9px 11px; + user-select: none; +} +.deploy-advanced .deploy-form-grid { + padding: 0 11px 11px; +} +.deploy-param-list { + display: flex; + flex-direction: column; + gap: 8px; +} +.deploy-param-row { + display: grid; + grid-template-columns: minmax(140px, 1fr) minmax(120px, 1fr) auto auto; + gap: 8px; + align-items: center; + padding: 8px; +} +.deploy-param-row input { + margin: 0; +} +.deploy-param-source { + color: var(--text-tertiary); + font-size: 10px; + white-space: nowrap; +} +.deploy-param-remove { + width: 28px; + height: 28px; + border-radius: 999px; + border: 0.5px solid var(--border-medium); + background: transparent; + color: var(--text-dim); + cursor: pointer; +} +.deploy-param-remove:hover { + color: var(--error); + border-color: rgba(255, 69, 58, 0.35); +} +.deploy-inline-actions { + display: flex; + justify-content: space-between; + align-items: center; + gap: 12px; + margin-top: 10px; + flex-wrap: wrap; +} +.deploy-param-actions { + display: flex; + flex-wrap: wrap; + gap: 12px; +} +.deploy-link-btn { + border: none; + background: transparent; + color: var(--accent); + font-family: var(--font-sans); + font-size: 12px; + cursor: pointer; + padding: 4px 0; +} +.deploy-link-btn:hover { color: var(--accent-hover); } +.deploy-preview { + color: var(--text-dim); + font-family: var(--font-mono); + font-size: 11px; + line-height: 1.5; + padding: 10px; + max-height: 110px; + overflow: auto; + white-space: pre-wrap; + overflow-wrap: anywhere; +} +.deploy-suggestion-list { + display: flex; + flex-wrap: wrap; + gap: 8px; +} +.deploy-suggestion-btn { + border: 0.5px solid rgba(255, 159, 10, 0.35); + background: rgba(255, 159, 10, 0.1); + color: var(--text); + border-radius: 999px; + padding: 7px 10px; + font-size: 11px; + cursor: pointer; +} +.deploy-suggestion-btn:hover { + border-color: var(--warning); + background: rgba(255, 159, 10, 0.16); +} +.deploy-suggestion-btn.applied { + border-color: rgba(48, 209, 88, 0.45); + background: rgba(48, 209, 88, 0.12); + color: var(--success); +} +.deploy-suggestion-btn:disabled { + cursor: default; +} +.deploy-sticky-actions { + margin: 0; + padding: 12px 20px 16px; + border-top: 1px solid var(--border-light); + background: var(--modal-bg); + flex-shrink: 0; + align-items: center; + justify-content: space-between; +} +.deploy-sticky-actions .modal-btn { + padding: 9px 16px; +} +.deploy-preflight-status { + color: var(--text-tertiary); + font-size: 11px; + font-family: var(--font-mono); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + padding-right: 12px; +} +.deploy-action-buttons { + display: flex; + gap: 8px; + flex-shrink: 0; +} /* Settings tabs */ .settings-tabs { @@ -2368,6 +2760,53 @@ .mobile-metrics { display: none !important; } } +@media (max-width: 768px) { + .deploy-modal { + width: min(100vw, 640px); + max-height: 92dvh; + border-radius: 20px 20px 0 0; + margin-top: auto; + } + .deploy-modal.maximized { + width: 100vw; + height: 100dvh; + max-height: 100dvh; + border-radius: 0; + } + .deploy-modal.minimized { + width: calc(100vw - 24px); + margin: 12px; + border-radius: 16px; + } + .deploy-modal-overlay { + align-items: flex-end; + } + .deploy-modal-overlay.minimized { + align-items: flex-end; + justify-content: center; + } + .deploy-modal-header, + .deploy-modal-body, + .deploy-scroll-body, + .deploy-sticky-actions { + padding-left: 16px; + padding-right: 16px; + } + .deploy-chip-row, + .deploy-grid, + .deploy-vram-top, + .deploy-form-grid, + .deploy-vram-breakdown { + grid-template-columns: 1fr; + } + .deploy-param-row { + grid-template-columns: 1fr; + } + .deploy-param-source { + white-space: normal; + } +} + /* Empty state */ .empty { color: var(--text-tertiary); @@ -4069,7 +4508,7 @@

class="model-deploy-btn" :class="modelDeploying[m.name] ? 'is-busy' : ''" :disabled="modelDeployButtonDisabled(m.name)" - @click="deployModel(m.name)" + @click="openDeployDetail(m)" x-text="modelDeployButtonLabel(m.name)"> @@ -4934,7 +5373,7 @@

class="model-deploy-btn" :class="modelDeploying[m.name] ? 'is-busy' : ''" :disabled="modelDeployButtonDisabled(m.name)" - @click="deployModel(m.name)" + @click="openDeployDetail(m)" x-text="modelDeployButtonLabel(m.name)"> @@ -5066,6 +5505,162 @@

+ + +