From 9068e8065c1a9ec7e9aefeb51a3c989a3693545d Mon Sep 17 00:00:00 2001 From: Codex Date: Fri, 5 Jun 2026 13:12:18 +0800 Subject: [PATCH] feat: wire running service details to real status --- cmd/aima/tooldeps_deploy.go | 6 + cmd/aima/tooldeps_deploy_test.go | 17 +- internal/k3s/client.go | 7 + internal/k3s/client_test.go | 37 + internal/runtime/docker.go | 308 +++++++- internal/runtime/docker_test.go | 119 ++++ internal/runtime/k3s.go | 1 + internal/runtime/runtime.go | 33 +- internal/ui/handler_test.go | 18 +- internal/ui/static/index.html | 1117 ++++++++++++++++++++++++++---- 10 files changed, 1496 insertions(+), 167 deletions(-) diff --git a/cmd/aima/tooldeps_deploy.go b/cmd/aima/tooldeps_deploy.go index f8e8b051..8f0a955e 100644 --- a/cmd/aima/tooldeps_deploy.go +++ b/cmd/aima/tooldeps_deploy.go @@ -693,6 +693,7 @@ type deploymentOverview struct { Name string `json:"name"` Model string `json:"model"` Engine string `json:"engine,omitempty"` + Image string `json:"image,omitempty"` Slot string `json:"slot,omitempty"` Phase string `json:"phase"` Status string `json:"status"` @@ -704,6 +705,8 @@ type deploymentOverview struct { Message string `json:"message,omitempty"` Restarts int `json:"restarts,omitempty"` ExitCode *int `json:"exit_code,omitempty"` + GPUMemoryMiB int `json:"gpu_memory_mib,omitempty"` + GPUMemorySource string `json:"gpu_memory_source,omitempty"` StartupPhase string `json:"startup_phase,omitempty"` StartupProgress int `json:"startup_progress,omitempty"` StartupMessage string `json:"startup_message,omitempty"` @@ -724,6 +727,7 @@ func deploymentOverviewFromStatus(status *runtime.DeploymentStatus, cat *knowled Name: status.Name, Model: status.Model, Engine: status.Engine, + Image: status.Image, Slot: status.Slot, Phase: status.Phase, Status: status.Phase, @@ -735,6 +739,8 @@ func deploymentOverviewFromStatus(status *runtime.DeploymentStatus, cat *knowled Message: status.Message, Restarts: status.Restarts, ExitCode: status.ExitCode, + GPUMemoryMiB: status.GPUMemoryMiB, + GPUMemorySource: status.GPUMemorySource, StartupPhase: status.StartupPhase, StartupProgress: status.StartupProgress, StartupMessage: status.StartupMessage, diff --git a/cmd/aima/tooldeps_deploy_test.go b/cmd/aima/tooldeps_deploy_test.go index 203999d7..ef14802c 100644 --- a/cmd/aima/tooldeps_deploy_test.go +++ b/cmd/aima/tooldeps_deploy_test.go @@ -36,12 +36,21 @@ func TestDeploymentOverviewIncludesCatalogModelType(t *testing.T) { }}, } overview := deploymentOverviewFromStatus(&runtime.DeploymentStatus{ - Name: "qwen3-tts-0.6b-qwen-tts-fastapi", - Model: "qwen3-tts-0.6b", - Phase: "running", - Ready: true, + Name: "qwen3-tts-0.6b-qwen-tts-fastapi", + Model: "qwen3-tts-0.6b", + Image: "docker.1ms.run/example/qwen-tts:latest", + Phase: "running", + Ready: true, + GPUMemoryMiB: 1536, + GPUMemorySource: "nvidia-smi", }, cat) if overview.ModelType != "tts" { t.Fatalf("ModelType = %q, want tts", overview.ModelType) } + if overview.Image != "docker.1ms.run/example/qwen-tts:latest" { + t.Fatalf("Image = %q, want docker.1ms.run/example/qwen-tts:latest", overview.Image) + } + if overview.GPUMemoryMiB != 1536 || overview.GPUMemorySource != "nvidia-smi" { + t.Fatalf("GPU memory = %d/%q, want 1536/nvidia-smi", overview.GPUMemoryMiB, overview.GPUMemorySource) + } } diff --git a/internal/k3s/client.go b/internal/k3s/client.go index d7de31d2..183c7a8b 100644 --- a/internal/k3s/client.go +++ b/internal/k3s/client.go @@ -41,6 +41,7 @@ type PodStatus struct { DeletionTimestamp string `json:"deletion_timestamp,omitempty"` Message string `json:"message,omitempty"` ContainerPort int `json:"container_port,omitempty"` + ContainerImage string `json:"container_image,omitempty"` RestartCount int `json:"restart_count,omitempty"` ExitCode *int `json:"exit_code,omitempty"` // from Terminated state ContainerStarted string `json:"container_started,omitempty"` // when the current container instance started @@ -270,6 +271,7 @@ type kubePod struct { } `json:"metadata"` Spec struct { Containers []struct { + Image string `json:"image"` Ports []struct { ContainerPort int `json:"containerPort"` } `json:"ports"` @@ -357,8 +359,12 @@ func parsePodJSON(data []byte) (*PodStatus, error) { } containerPort := 0 + containerImage := "" if len(kp.Spec.Containers) > 0 && len(kp.Spec.Containers[0].Ports) > 0 { + containerImage = kp.Spec.Containers[0].Image containerPort = kp.Spec.Containers[0].Ports[0].ContainerPort + } else if len(kp.Spec.Containers) > 0 { + containerImage = kp.Spec.Containers[0].Image } var conditions []PodCondition @@ -376,6 +382,7 @@ func parsePodJSON(data []byte) (*PodStatus, error) { DeletionTimestamp: kp.Metadata.DeletionTimestamp, Message: msg, ContainerPort: containerPort, + ContainerImage: containerImage, RestartCount: restartCount, ExitCode: exitCode, ContainerStarted: containerStarted, diff --git a/internal/k3s/client_test.go b/internal/k3s/client_test.go index e9f4fe87..86791c14 100644 --- a/internal/k3s/client_test.go +++ b/internal/k3s/client_test.go @@ -195,6 +195,18 @@ const terminatingPodJSON = `{ "aima.dev/model": "qwen3-8b" } }, + "spec": { + "containers": [ + { + "image": "nvcr.io/nvidia/vllm:26.01-py3", + "ports": [ + { + "containerPort": 8000 + } + ] + } + ] + }, "status": { "phase": "Running", "podIP": "10.42.0.5", @@ -217,6 +229,18 @@ const runningPodJSON = `{ "aima.dev/model": "qwen3-8b" } }, + "spec": { + "containers": [ + { + "image": "nvcr.io/nvidia/vllm:26.01-py3", + "ports": [ + { + "containerPort": 8000 + } + ] + } + ] + }, "status": { "phase": "Running", "podIP": "10.42.0.5", @@ -621,3 +645,16 @@ func TestParsePodJSON_DeletionTimestamp(t *testing.T) { t.Fatal("expected raw pod readiness to reflect container status before runtime mapping") } } + +func TestParsePodJSON_ContainerImage(t *testing.T) { + pod, err := parsePodJSON([]byte(runningPodJSON)) + if err != nil { + t.Fatal(err) + } + if pod.ContainerImage != "nvcr.io/nvidia/vllm:26.01-py3" { + t.Fatalf("ContainerImage = %q, want nvcr.io/nvidia/vllm:26.01-py3", pod.ContainerImage) + } + if pod.ContainerPort != 8000 { + t.Fatalf("ContainerPort = %d, want 8000", pod.ContainerPort) + } +} diff --git a/internal/runtime/docker.go b/internal/runtime/docker.go index 3fa06984..e076a831 100644 --- a/internal/runtime/docker.go +++ b/internal/runtime/docker.go @@ -295,6 +295,7 @@ func (r *DockerRuntime) Status(ctx context.Context, name string) (*DeploymentSta di := inspects[0] ds := r.inspectToStatus(di) + r.enrichGPUMemory(ctx, ds, di.ID, di.State.Pid) asset := findEngineAsset(r.engineAssets, ds.Labels["aima.dev/engine"]) if asset != nil && ds.EstimatedTotalS == 0 && len(asset.TimeConstraints.ColdStartS) >= 2 { ds.EstimatedTotalS = asset.TimeConstraints.ColdStartS[1] @@ -357,6 +358,7 @@ func (r *DockerRuntime) List(ctx context.Context) ([]*DeploymentStatus, error) { ds := &DeploymentStatus{ Name: ps.Names, + Image: ps.Image, Phase: phase, Ready: ready, Address: addr, @@ -402,6 +404,7 @@ func (r *DockerRuntime) Logs(ctx context.Context, name string, tailLines int) (s // --- internal types --- type dockerInspect struct { + ID string `json:"Id"` Name string `json:"Name"` State struct { Status string `json:"Status"` // running, created, exited, paused, restarting @@ -409,19 +412,167 @@ type dockerInspect struct { ExitCode int `json:"ExitCode"` Running bool `json:"Running"` Restarting bool `json:"Restarting"` + Pid int `json:"Pid"` } `json:"State"` Config struct { - Labels map[string]string `json:"Labels"` + Entrypoint []string `json:"Entrypoint"` + Cmd []string `json:"Cmd"` + Image string `json:"Image"` + Labels map[string]string `json:"Labels"` } `json:"Config"` } type dockerPsEntry struct { + ID string `json:"ID"` + Image string `json:"Image"` Names string `json:"Names"` Status string `json:"Status"` Labels string `json:"Labels"` CreatedAt string `json:"CreatedAt"` } +func (r *DockerRuntime) enrichGPUMemory(ctx context.Context, ds *DeploymentStatus, containerID string, containerPID int) { + if ds == nil || ds.Name == "" || ds.Phase != "running" { + return + } + if containerPID <= 0 || strings.TrimSpace(containerID) == "" { + pidOut, err := exec.CommandContext(ctx, "docker", "inspect", "--format", "{{.State.Pid}} {{.Id}}", ds.Name).CombinedOutput() + if err == nil { + fields := strings.Fields(string(pidOut)) + if len(fields) > 0 && containerPID <= 0 { + containerPID, _ = strconv.Atoi(fields[0]) + } + if len(fields) > 1 && strings.TrimSpace(containerID) == "" { + containerID = fields[1] + } + } + } + usedMiB := containerNvidiaGPUMemoryMiB(ctx, ds.Name, containerID, containerPID) + if usedMiB <= 0 { + return + } + ds.GPUMemoryMiB = usedMiB + ds.GPUMemorySource = "nvidia-smi" +} + +func containerNvidiaGPUMemoryMiB(ctx context.Context, containerName, containerID string, containerPID int) int { + if containerPID <= 0 && strings.TrimSpace(containerName) == "" && strings.TrimSpace(containerID) == "" { + return 0 + } + smiCtx, cancel := context.WithTimeout(ctx, 2*time.Second) + defer cancel() + out, err := exec.CommandContext(smiCtx, "nvidia-smi", + "--query-compute-apps=pid,used_gpu_memory", + "--format=csv,noheader,nounits", + ).CombinedOutput() + if err != nil { + return 0 + } + totalMiB := 0 + for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { + parts := strings.SplitN(strings.TrimSpace(line), ",", 2) + if len(parts) != 2 { + continue + } + pid := strings.TrimSpace(parts[0]) + memMiB := parseNvidiaMemoryMiB(parts[1]) + if pid == "" || memMiB <= 0 { + continue + } + if processBelongsToDockerContainer(ctx, pid, containerName, containerID, containerPID) { + totalMiB += memMiB + } + } + return totalMiB +} + +func parseNvidiaMemoryMiB(value string) int { + value = strings.TrimSpace(value) + value = strings.TrimSuffix(value, "MiB") + value = strings.TrimSpace(value) + if fields := strings.Fields(value); len(fields) > 0 { + value = fields[0] + } + memMiB, err := strconv.Atoi(value) + if err != nil { + return 0 + } + return memMiB +} + +func processBelongsToDockerContainer(ctx context.Context, pid, containerName, containerID string, containerPID int) bool { + data, err := os.ReadFile(filepath.Join("/proc", pid, "cgroup")) + if err == nil { + cgroup := strings.ToLower(string(data)) + for _, token := range dockerContainerMatchTokens(containerName, containerID) { + if token != "" && strings.Contains(cgroup, token) { + return true + } + } + } + if containerPID <= 0 { + return false + } + return isDescendantPID(ctx, pid, strconv.Itoa(containerPID)) +} + +func dockerContainerMatchTokens(containerName, containerID string) []string { + id := strings.ToLower(strings.TrimSpace(containerID)) + shortID := id + if len(shortID) > 12 { + shortID = shortID[:12] + } + if id != "" { + return []string{id, shortID} + } + name := strings.ToLower(strings.TrimPrefix(strings.TrimSpace(containerName), "/")) + return []string{name} +} + +func isDescendantPID(ctx context.Context, childPID, parentPID string) bool { + current := strings.TrimSpace(childPID) + parentPID = strings.TrimSpace(parentPID) + if current == "" || parentPID == "" { + return false + } + if current == parentPID { + return true + } + for i := 0; i < 16; i++ { + select { + case <-ctx.Done(): + return false + default: + } + data, err := os.ReadFile(filepath.Join("/proc", current, "stat")) + if err != nil { + return false + } + ppid := parentPIDFromProcStat(string(data)) + if ppid == "" || ppid == "0" || ppid == "1" { + return false + } + if ppid == parentPID { + return true + } + current = ppid + } + return false +} + +func parentPIDFromProcStat(stat string) string { + stat = strings.TrimSpace(stat) + endComm := strings.LastIndex(stat, ")") + if endComm < 0 || endComm+2 >= len(stat) { + return "" + } + fields := strings.Fields(stat[endComm+1:]) + if len(fields) < 2 { + return "" + } + return fields[1] +} + func (r *DockerRuntime) inspectToStatus(di dockerInspect) *DeploymentStatus { labels := di.Config.Labels port := 0 @@ -466,9 +617,11 @@ func (r *DockerRuntime) inspectToStatus(di dockerInspect) *DeploymentStatus { ds := &DeploymentStatus{ Name: name, + Image: di.Config.Image, Phase: phase, Ready: ready, Address: addr, + Config: dockerLaunchConfigFromInspect(di), Labels: labels, Runtime: "docker", } @@ -482,6 +635,159 @@ func (r *DockerRuntime) inspectToStatus(di dockerInspect) *DeploymentStatus { return ds } +func dockerLaunchConfigFromInspect(di dockerInspect) map[string]any { + args := make([]string, 0, len(di.Config.Entrypoint)+len(di.Config.Cmd)) + args = append(args, di.Config.Entrypoint...) + args = append(args, di.Config.Cmd...) + if shellArgs := dockerShellCommandArgs(args); len(shellArgs) > 0 { + if config := parseLaunchConfigFlags(shellArgs); len(config) > 0 { + return config + } + } + return parseLaunchConfigFlags(args) +} + +func dockerShellCommandArgs(args []string) []string { + for i := 0; i+1 < len(args); i++ { + name := shellExecutableName(args[i]) + if name != "bash" && name != "sh" { + continue + } + for j := i + 1; j+1 < len(args); j++ { + if !isShellCommandFlag(args[j]) { + continue + } + command := strings.TrimSpace(args[j+1]) + if command == "" { + return nil + } + if idx := strings.LastIndex(command, " exec "); idx >= 0 { + command = command[idx+6:] + } else if strings.HasPrefix(command, "exec ") { + command = strings.TrimSpace(strings.TrimPrefix(command, "exec ")) + } + return splitShellFields(command) + } + } + return nil +} + +func isShellCommandFlag(value string) bool { + value = strings.TrimSpace(value) + if value == "-c" { + return true + } + if strings.HasPrefix(value, "--") || !strings.HasPrefix(value, "-") { + return false + } + return strings.Contains(value[1:], "c") +} + +func shellExecutableName(value string) string { + value = strings.TrimSpace(value) + if idx := strings.LastIndexAny(value, `/\`); idx >= 0 { + return value[idx+1:] + } + return value +} + +func parseLaunchConfigFlags(args []string) map[string]any { + config := make(map[string]any) + for i := 0; i < len(args); i++ { + arg := strings.TrimSpace(args[i]) + if arg == "" || !strings.HasPrefix(arg, "--") { + continue + } + key, value, hasValue := strings.Cut(strings.TrimPrefix(arg, "--"), "=") + key = strings.TrimSpace(key) + if key == "" { + continue + } + boolValue := true + if strings.HasPrefix(key, "no-") { + key = strings.TrimPrefix(key, "no-") + boolValue = false + } + key = strings.ReplaceAll(key, "-", "_") + if !hasValue && i+1 < len(args) && !strings.HasPrefix(strings.TrimSpace(args[i+1]), "--") { + value = args[i+1] + hasValue = true + i++ + } + if hasValue { + config[key] = parseLaunchConfigValue(value) + } else { + config[key] = boolValue + } + } + return config +} + +func splitShellFields(input string) []string { + var fields []string + var b strings.Builder + var quote rune + escaped := false + for _, r := range input { + if escaped { + b.WriteRune(r) + escaped = false + continue + } + if quote != '\'' && r == '\\' { + escaped = true + continue + } + if quote != 0 { + if r == quote { + quote = 0 + continue + } + b.WriteRune(r) + continue + } + if r == '\'' || r == '"' { + quote = r + continue + } + if r == ' ' || r == '\t' || r == '\n' || r == '\r' { + if b.Len() > 0 { + fields = append(fields, b.String()) + b.Reset() + } + continue + } + b.WriteRune(r) + } + if escaped { + b.WriteRune('\\') + } + if b.Len() > 0 { + fields = append(fields, b.String()) + } + return fields +} + +func parseLaunchConfigValue(value string) any { + value = strings.TrimSpace(value) + if value == "" { + return "" + } + if strings.EqualFold(value, "true") { + return true + } + if strings.EqualFold(value, "false") { + return false + } + if i, err := strconv.Atoi(value); err == nil { + return i + } + if f, err := strconv.ParseFloat(value, 64); err == nil { + return f + } + return value +} + // enrichDockerProgress reads container logs and matches engine patterns. func (r *DockerRuntime) enrichDockerProgress(ctx context.Context, ds *DeploymentStatus) string { engineName := "" diff --git a/internal/runtime/docker_test.go b/internal/runtime/docker_test.go index a24d8dd0..9e225bb7 100644 --- a/internal/runtime/docker_test.go +++ b/internal/runtime/docker_test.go @@ -1,6 +1,7 @@ package runtime import ( + "context" "strings" "testing" @@ -378,6 +379,7 @@ func TestDockerInspectToStatus(t *testing.T) { ExitCode int `json:"ExitCode"` Running bool `json:"Running"` Restarting bool `json:"Restarting"` + Pid int `json:"Pid"` }{Status: "running", Running: true, StartedAt: "2026-03-03T00:00:00Z"}, }, wantPhase: "running", @@ -392,6 +394,7 @@ func TestDockerInspectToStatus(t *testing.T) { ExitCode int `json:"ExitCode"` Running bool `json:"Running"` Restarting bool `json:"Restarting"` + Pid int `json:"Pid"` }{Status: "exited", ExitCode: 1}, }, wantPhase: "failed", @@ -410,6 +413,7 @@ func TestDockerInspectToStatus(t *testing.T) { ExitCode int `json:"ExitCode"` Running bool `json:"Running"` Restarting bool `json:"Restarting"` + Pid int `json:"Pid"` }{Status: "exited", ExitCode: 0}, }, wantPhase: "stopped", @@ -424,6 +428,7 @@ func TestDockerInspectToStatus(t *testing.T) { ExitCode int `json:"ExitCode"` Running bool `json:"Running"` Restarting bool `json:"Restarting"` + Pid int `json:"Pid"` }{Status: "restarting", ExitCode: 2, Restarting: true}, }, wantPhase: "failed", @@ -442,6 +447,7 @@ func TestDockerInspectToStatus(t *testing.T) { ExitCode int `json:"ExitCode"` Running bool `json:"Running"` Restarting bool `json:"Restarting"` + Pid int `json:"Pid"` }{Status: "created"}, }, wantPhase: "starting", @@ -469,6 +475,119 @@ func TestDockerInspectToStatus(t *testing.T) { } } +func TestDockerInspectToStatusIncludesImageAndLaunchConfig(t *testing.T) { + r := &DockerRuntime{} + di := dockerInspect{Name: "/test-vllm"} + di.State.Status = "running" + di.State.Running = true + di.Config.Entrypoint = []string{"vllm", "serve"} + di.Config.Cmd = []string{ + "--gpu-memory-utilization", "0.6", + "--max-model-len=131072", + "--served-model-name", "GLM-4.6V-Flash-FP4", + "--trust-remote-code", + "--no-enable-prefix-caching", + } + di.Config.Image = "nvcr.io/nvidia/vllm:26.01-py3" + di.Config.Labels = map[string]string{"aima.dev/port": "8000"} + ds := r.inspectToStatus(di) + + if ds.Image != "nvcr.io/nvidia/vllm:26.01-py3" { + t.Fatalf("Image = %q, want nvcr.io/nvidia/vllm:26.01-py3", ds.Image) + } + if got, ok := ds.Config["gpu_memory_utilization"].(float64); !ok || got != 0.6 { + t.Fatalf("gpu_memory_utilization = %#v, want 0.6", ds.Config["gpu_memory_utilization"]) + } + if got, ok := ds.Config["max_model_len"].(int); !ok || got != 131072 { + t.Fatalf("max_model_len = %#v, want 131072", ds.Config["max_model_len"]) + } + if got := ds.Config["served_model_name"]; got != "GLM-4.6V-Flash-FP4" { + t.Fatalf("served_model_name = %#v, want GLM-4.6V-Flash-FP4", got) + } + if got, ok := ds.Config["trust_remote_code"].(bool); !ok || !got { + t.Fatalf("trust_remote_code = %#v, want true", ds.Config["trust_remote_code"]) + } + if got, ok := ds.Config["enable_prefix_caching"].(bool); !ok || got { + t.Fatalf("enable_prefix_caching = %#v, want false", ds.Config["enable_prefix_caching"]) + } +} + +func TestDockerInspectToStatusParsesShellLaunchConfig(t *testing.T) { + r := &DockerRuntime{} + di := dockerInspect{Name: "/test-vllm-shell"} + di.State.Status = "running" + di.State.Running = true + di.Config.Entrypoint = []string{"/bin/bash"} + di.Config.Cmd = []string{ + "-c", + "python - <<'PY'\nprint('init')\nPY\n && exec vllm serve /models --gpu-memory-utilization 0.6 --served-model-name 'GLM-4.6V-Flash-FP4'", + } + di.Config.Image = "nvcr.io/nvidia/vllm:26.01-py3" + di.Config.Labels = map[string]string{"aima.dev/port": "8000"} + ds := r.inspectToStatus(di) + + if got, ok := ds.Config["gpu_memory_utilization"].(float64); !ok || got != 0.6 { + t.Fatalf("gpu_memory_utilization = %#v, want 0.6", ds.Config["gpu_memory_utilization"]) + } + if got := ds.Config["served_model_name"]; got != "GLM-4.6V-Flash-FP4" { + t.Fatalf("served_model_name = %#v, want GLM-4.6V-Flash-FP4", got) + } +} + +func TestDockerInspectToStatusParsesCombinedShellCommandFlag(t *testing.T) { + r := &DockerRuntime{} + di := dockerInspect{Name: "/test-vllm-shell-lc"} + di.State.Status = "running" + di.State.Running = true + di.Config.Entrypoint = []string{"/bin/bash"} + di.Config.Cmd = []string{"-lc", "exec vllm serve /models --gpu-memory-utilization 0.7 --max-model-len 65536"} + di.Config.Image = "nvcr.io/nvidia/vllm:26.01-py3" + di.Config.Labels = map[string]string{"aima.dev/port": "8000"} + ds := r.inspectToStatus(di) + + if got, ok := ds.Config["gpu_memory_utilization"].(float64); !ok || got != 0.7 { + t.Fatalf("gpu_memory_utilization = %#v, want 0.7", ds.Config["gpu_memory_utilization"]) + } + if got, ok := ds.Config["max_model_len"].(int); !ok || got != 65536 { + t.Fatalf("max_model_len = %#v, want 65536", ds.Config["max_model_len"]) + } +} + +func TestParseNvidiaMemoryMiB(t *testing.T) { + tests := map[string]int{ + "1700": 1700, + "1700 MiB": 1700, + " 42 ": 42, + "N/A": 0, + "": 0, + } + for input, want := range tests { + if got := parseNvidiaMemoryMiB(input); got != want { + t.Fatalf("parseNvidiaMemoryMiB(%q) = %d, want %d", input, got, want) + } + } +} + +func TestDockerContainerMatchTokensPrefersContainerID(t *testing.T) { + tokens := dockerContainerMatchTokens("qwen3-vllm", "abcdef1234567890") + if strings.Join(tokens, ",") != "abcdef1234567890,abcdef123456" { + t.Fatalf("tokens = %#v, want full and short container IDs only", tokens) + } +} + +func TestParentPIDFromProcStat(t *testing.T) { + stat := "12345 (python worker) S 6789 1 1 0 -1 4194560" + if got := parentPIDFromProcStat(stat); got != "6789" { + t.Fatalf("parentPIDFromProcStat = %q, want 6789", got) + } +} + +func TestIsDescendantPIDIncludesSelf(t *testing.T) { + if !isDescendantPID(context.Background(), "12345", "12345") { + t.Fatal("expected a container main PID to match itself") + } +} + func TestParseLabelString(t *testing.T) { tests := []struct { name string diff --git a/internal/runtime/k3s.go b/internal/runtime/k3s.go index ba0cf286..055aba24 100644 --- a/internal/runtime/k3s.go +++ b/internal/runtime/k3s.go @@ -246,6 +246,7 @@ func podToStatus(pod *k3s.PodStatus) *DeploymentStatus { ds := &DeploymentStatus{ Name: pod.Name, + Image: pod.ContainerImage, Phase: phase, Ready: ready, Address: addr, diff --git a/internal/runtime/runtime.go b/internal/runtime/runtime.go index b75f3f52..a70aa02b 100644 --- a/internal/runtime/runtime.go +++ b/internal/runtime/runtime.go @@ -53,21 +53,24 @@ type DeployRequest struct { // DeploymentStatus is the unified status across runtimes. type DeploymentStatus struct { - Name string `json:"name"` - Model string `json:"model,omitempty"` - Engine string `json:"engine,omitempty"` - Slot string `json:"slot,omitempty"` - Phase string `json:"phase"` // running / starting / stopped / failed - Ready bool `json:"ready"` - Address string `json:"address"` // host:port - Config map[string]any `json:"config,omitempty"` - Labels map[string]string `json:"labels"` - StartTime string `json:"start_time"` - StartedAtUnix int64 `json:"started_at_unix,omitempty"` - Message string `json:"message,omitempty"` - Runtime string `json:"runtime"` // "k3s", "docker", or "native" - Restarts int `json:"restarts,omitempty"` - ExitCode *int `json:"exit_code,omitempty"` + Name string `json:"name"` + Model string `json:"model,omitempty"` + Engine string `json:"engine,omitempty"` + Image string `json:"image,omitempty"` + Slot string `json:"slot,omitempty"` + Phase string `json:"phase"` // running / starting / stopped / failed + Ready bool `json:"ready"` + Address string `json:"address"` // host:port + Config map[string]any `json:"config,omitempty"` + Labels map[string]string `json:"labels"` + StartTime string `json:"start_time"` + StartedAtUnix int64 `json:"started_at_unix,omitempty"` + Message string `json:"message,omitempty"` + Runtime string `json:"runtime"` // "k3s", "docker", or "native" + Restarts int `json:"restarts,omitempty"` + ExitCode *int `json:"exit_code,omitempty"` + GPUMemoryMiB int `json:"gpu_memory_mib,omitempty"` + GPUMemorySource string `json:"gpu_memory_source,omitempty"` StartupPhase string `json:"startup_phase,omitempty"` // scheduling/pulling_image/initializing/loading_weights/cuda_graphs/ready StartupProgress int `json:"startup_progress,omitempty"` // 0-100 diff --git a/internal/ui/handler_test.go b/internal/ui/handler_test.go index 5aee95c7..932b7740 100644 --- a/internal/ui/handler_test.go +++ b/internal/ui/handler_test.go @@ -284,10 +284,10 @@ func TestRegisterRoutes_IndexShowsAPIAccessWithoutRenderingPrivateIP(t *testing. `api_access`, `api_access_desc`, `apiBaseDisplay()`, - `apiDeploymentChatCapable(dep)`, + `apiDeploymentChatCapable(deploymentDetailData)`, `api_non_chat_hint`, `copyCurrentAPIBaseURL($event)`, - `copyAPICurl(dep, $event)`, + `copyAPICurl(deploymentDetailData, $event)`, `apiCurlTemplate(dep)`, `api_public_unconfigured`, } { @@ -493,11 +493,19 @@ func TestRegisterRoutes_IndexIncludesDeploymentStageFeedback(t *testing.T) { body := rec.Body.String() for _, token := range []string{ "startup_progress", - "startup_message || dep.startup_phase || 'Initializing...'", - "dep.eta ? '~' + dep.eta", + "deployment-service-card", + "deploymentShowProgress(dep)", + "deploymentProgressValue(dep)", + "deploymentProgressText(dep)", + "openDeploymentDetail(dep)", + "deploymentDetailOpen", + "deploymentDetailRequestSeq", + "this.callTool('deploy.status', { name })", + "clearMissingGpuMemory: true", + "deploymentGpuMemoryMiB(d)", + "handleDeploymentStopClick($event, deploymentDetailData.name, { closeDetail: true })", "failure_detail: this.summarizeDeploymentFailure(d)", "summarizeDeploymentFailure(dep)", - "dep.phase === 'running' && dep.ready && dep.address", } { if !strings.Contains(body, token) { t.Fatalf("body missing %q", token) diff --git a/internal/ui/static/index.html b/internal/ui/static/index.html index 446f79b2..30d896cf 100644 --- a/internal/ui/static/index.html +++ b/internal/ui/static/index.html @@ -483,12 +483,303 @@ border-color: var(--accent); background: var(--accent); } +.api-copy-btn.icon-only { + width: 28px; + height: 28px; + padding: 0; + display: inline-flex; + align-items: center; + justify-content: center; +} .api-public-note { margin-top: 8px; color: var(--text-tertiary); font-size: 11px; line-height: 1.45; } +.deployment-services-panel .panel-title { + font-size: 15px; + padding-bottom: 10px; +} +.deployment-service-list { + display: flex; + flex-direction: column; + gap: 12px; +} +.deployment-service-card { + padding: 14px; + border: 1px solid var(--border-medium); + border-radius: var(--radius-md); + background: + linear-gradient(135deg, rgba(255, 255, 255, 0.055), rgba(255, 255, 255, 0.025)), + var(--bg-panel); + box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.04); +} +.deployment-service-content { + min-width: 0; +} +.deployment-service-head { + display: flex; + align-items: center; + gap: 10px; +} +.deployment-service-head::before { + content: ""; + width: 8px; + height: 8px; + border-radius: 999px; + flex-shrink: 0; + background: var(--warning); + box-shadow: 0 0 12px currentColor; + color: var(--warning); +} +.deployment-service-card:has(.deployment-service-badge.ready) .deployment-service-head::before { + background: var(--success); + color: var(--success); +} +.deployment-service-card:has(.deployment-service-badge.failed) .deployment-service-head::before { + background: var(--error); + color: var(--error); +} +.deployment-service-main { + min-width: 0; +} +.deployment-service-name { + color: var(--text); + font-size: 13px; + font-weight: 700; + line-height: 1.3; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.deployment-service-row { + display: grid; + align-items: center; + gap: 10px; + margin-top: 12px; +} +.deployment-service-row.ready { + grid-template-columns: auto auto auto; +} +.deployment-service-row.starting { + grid-template-columns: auto minmax(0, 1fr) auto; +} +.deployment-service-row.failed { + grid-template-columns: minmax(0, 1fr) auto auto; +} +.deployment-service-badge { + display: inline-flex; + align-items: center; + justify-content: center; + flex-shrink: 0; + min-width: 48px; + height: 24px; + padding: 0 10px; + border-radius: 999px; + font-size: 11px; + font-weight: 700; + line-height: 1; + white-space: nowrap; +} +.deployment-service-badge.ready { + color: var(--success); + background: rgba(48, 209, 88, 0.14); +} +.deployment-service-badge.starting { + color: var(--warning); + background: rgba(255, 159, 10, 0.14); +} +.deployment-service-badge.failed { + color: var(--error); + background: rgba(255, 69, 58, 0.14); +} +.deployment-service-progress { + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + align-items: center; + gap: 10px; + min-width: 0; +} +.deployment-service-progress .deploy-progress { + height: 7px; + margin-top: 0; +} +.deployment-service-progress span { + color: var(--text-dim); + font-family: var(--font-mono); + font-size: 12px; +} +.deployment-service-error { + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.deployment-service-row .deployment-service-error { + margin-top: 0; + padding-left: 0; +} +.deployment-service-row .deployment-service-action-btn { + min-width: 52px; + height: 24px; + padding: 0 10px; + font-size: 11px; + line-height: 1; + white-space: nowrap; +} +.deployment-service-btn { + min-width: 54px; + height: 32px; + padding: 0 12px; + border: 1px solid var(--border-medium); + border-radius: 9px; + background: var(--input-bg); + color: var(--text-dim); + font-family: var(--font-sans); + font-size: 12px; + font-weight: 700; + cursor: pointer; +} +.deployment-service-btn:hover { + color: var(--text); + border-color: rgba(255, 255, 255, 0.24); + background: var(--btn-icon-hover); +} +.deployment-service-btn:disabled { + opacity: 0.4; + cursor: not-allowed; +} +.deployment-service-btn.danger:hover { + color: var(--error); + border-color: rgba(255, 69, 58, 0.5); + background: rgba(255, 69, 58, 0.1); +} +@media (max-width: 520px) { + .deployment-service-row.ready, + .deployment-service-row.starting, + .deployment-service-row.failed { + grid-template-columns: minmax(0, 1fr) auto; + } + .deployment-service-row.starting .deployment-service-progress { + grid-column: 1 / -1; + } +} +.deployment-service-modal { + width: min(920px, calc(100vw - 64px)); +} +.deployment-service-modal .api-access-row { + grid-template-columns: 110px minmax(0, 1fr) auto; + min-height: 38px; + padding: 0 10px; + border-bottom: 1px solid var(--border-light); +} +.deployment-service-modal .api-access-row:last-child { + border-bottom: none; +} +.deployment-service-modal .api-copy-btn:hover { + color: var(--text); + border-color: var(--border-medium); + background: var(--btn-icon-hover); +} +.deployment-detail-status-bar { + display: flex; + align-items: center; + gap: 8px; + min-width: 0; + color: var(--text-dim); + font-size: 12px; + font-weight: 600; +} +.deployment-detail-status-dot { + width: 8px; + height: 8px; + border-radius: 999px; + flex-shrink: 0; +} +.deployment-detail-status-dot.ready { background: var(--success); box-shadow: 0 0 10px rgba(48, 209, 88, 0.45); } +.deployment-detail-status-dot.starting { background: var(--warning); box-shadow: 0 0 10px rgba(255, 159, 10, 0.42); } +.deployment-detail-status-dot.failed { background: var(--error); box-shadow: 0 0 10px rgba(255, 69, 58, 0.42); } +.deployment-diagnostics-table { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + border: 1px solid var(--border-light); + border-radius: var(--radius-sm); + overflow: hidden; + background: var(--input-bg); +} +.deployment-config-table { + display: grid; + grid-template-columns: repeat(2, minmax(300px, 1fr)); + border: 1px solid var(--border-light); + border-radius: var(--radius-sm); + overflow: hidden; + background: var(--input-bg); +} +.deployment-diagnostics-row { + display: grid; + grid-template-columns: minmax(120px, 0.8fr) minmax(0, 1fr); + gap: 10px; + align-items: center; + min-height: 44px; + padding: 9px 12px; + border-right: 1px solid var(--border-light); + border-bottom: 1px solid var(--border-light); +} +.deployment-config-row { + display: grid; + grid-template-columns: minmax(0, 1fr) minmax(132px, max-content); + column-gap: 36px; + align-items: center; + min-height: 44px; + padding: 9px 28px 9px 24px; + border-right: 1px solid var(--border-light); + border-bottom: 1px solid var(--border-light); +} +.deployment-config-row:nth-child(2n), +.deployment-diagnostics-row:nth-child(2n) { + border-right: none; +} +.deployment-config-row:nth-last-child(-n + 2), +.deployment-diagnostics-row:nth-last-child(-n + 2) { + border-bottom: none; +} +.deployment-config-key, +.deployment-diagnostics-key { + color: var(--text-tertiary); + font-size: 12px; +} +.deployment-config-key { + min-width: 0; + overflow-wrap: anywhere; +} +.deployment-config-value, +.deployment-diagnostics-value { + color: var(--text); + font-family: var(--font-mono); + font-size: 12px; + overflow-wrap: anywhere; +} +.deployment-config-value { + min-width: 0; + justify-self: start; + padding-left: 10px; +} +.deployment-diagnostics-value { + color: var(--text-dim); +} +.deployment-service-footer { + padding-top: 12px; +} +.deployment-service-footer .modal-btn.danger { + color: var(--error); + border-color: rgba(255, 69, 58, 0.45); + background: rgba(255, 69, 58, 0.08); +} +.deployment-service-footer .modal-btn.danger:hover { + border-color: var(--error); + background: rgba(255, 69, 58, 0.14); +} .model-list-item { display: flex; align-items: flex-start; @@ -2794,11 +3085,25 @@ } .deploy-chip-row, .deploy-grid, + .deployment-config-table, + .deployment-diagnostics-table, .deploy-vram-top, .deploy-form-grid, .deploy-vram-breakdown { grid-template-columns: 1fr; } + .deployment-config-row, + .deployment-diagnostics-row { + border-right: none; + } + .deployment-config-row:nth-last-child(-n + 2), + .deployment-diagnostics-row:nth-last-child(-n + 2) { + border-bottom: 1px solid var(--border-light); + } + .deployment-config-row:last-child, + .deployment-diagnostics-row:last-child { + border-bottom: none; + } .deploy-param-row { grid-template-columns: 1fr; } @@ -5054,86 +5359,77 @@

-
-
+
+
-
    +