From 8dd7e1c3f9ce957ec978360f3e3324af92411957 Mon Sep 17 00:00:00 2001 From: rjckkkkk <59609580+rjckkkkk@users.noreply.github.com> Date: Mon, 8 Jun 2026 08:42:55 +0000 Subject: [PATCH 1/3] Detect Windows hardware via CIM, add AMD GPU detection Windows 11 24H2+ removes the legacy wmic CLI, so CPU/RAM detection on modern Windows returned an empty model and zero RAM. AMD GPUs were also invisible on Windows: the probe chain only knows nvidia-smi/rocm-smi and the sysfs fallback is Linux-only, so AMD APU hosts (Ryzen AI Max+ "Strix Halo") detected no accelerator at all. Replace wmic with `powershell Get-CimInstance` for CPU, RAM, pagefile and CPU load, and add a Windows Win32_VideoController GPU fallback wired into detectGPU through a detectPlatformGPU hook (no-op on non-Windows). AMD identity (name/gfx/arch/unified) is resolved from the PCI device ID via the existing amdPCIToInfo, shared with the Linux sysfs path. CIM cannot report true APU VRAM (Win32 AdapterRAM saturates at 4 GiB) or GPU utilization, so VRAM falls back to OS-visible RAM via the existing unified-memory backfill; exact carve-out still needs amd-smi/rocm-smi. Pure CIM JSON parsers live in cim.go (no build tags) with table-driven tests in cim_test.go using fixtures captured from a real Strix Halo box. Verified on AMD Ryzen AI Max+ 395 (Radeon 8060S, gfx1151): hal detect now reports the GPU (RDNA3.5, driver 32.0.31007.1017), CPU model + 16c/32t, and 32 GB RAM. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/hal/cim.go | 204 +++++++++++++++++++ internal/hal/cim_test.go | 260 ++++++++++++++++++++++++ internal/hal/detect_windows.go | 155 ++++---------- internal/hal/detect_windows_test.go | 109 ---------- internal/hal/gpu.go | 5 + internal/hal/gpu_nonwindows.go | 12 ++ internal/hal/testhelper_windows_test.go | 9 +- 7 files changed, 526 insertions(+), 228 deletions(-) create mode 100644 internal/hal/cim.go create mode 100644 internal/hal/cim_test.go delete mode 100644 internal/hal/detect_windows_test.go create mode 100644 internal/hal/gpu_nonwindows.go diff --git a/internal/hal/cim.go b/internal/hal/cim.go new file mode 100644 index 00000000..ef07be61 --- /dev/null +++ b/internal/hal/cim.go @@ -0,0 +1,204 @@ +package hal + +import ( + "encoding/json" + "strings" +) + +// CIM (Common Information Model) parsing for Windows hardware detection. +// +// Modern Windows (11 24H2+) removes the legacy `wmic` CLI, so detection shells +// out to `powershell Get-CimInstance ... | ConvertTo-Json` instead. These +// parsers are kept free of build tags and OS calls so they unit-test on any +// platform; only the command execution lives in detect_windows.go. + +// decodeCIMObjects normalizes `ConvertTo-Json -Compress` output. PowerShell +// renders a single CIM instance as a JSON object and multiple instances as an +// array, so both shapes collapse to a slice of maps here. +func decodeCIMObjects(output string) []map[string]interface{} { + start := strings.IndexAny(output, "{[") + if start < 0 { + return nil + } + trimmed := strings.TrimSpace(output[start:]) + if trimmed == "" { + return nil + } + if trimmed[0] == '[' { + var arr []map[string]interface{} + if err := json.Unmarshal([]byte(trimmed), &arr); err != nil { + return nil + } + return arr + } + var obj map[string]interface{} + if err := json.Unmarshal([]byte(trimmed), &obj); err != nil { + return nil + } + return []map[string]interface{}{obj} +} + +// parseCIMCPU fills CPUInfo from Win32_Processor JSON. Cores and threads sum +// across sockets; name and clock come from the first processor. +func parseCIMCPU(output string, info *CPUInfo) { + objs := decodeCIMObjects(output) + if len(objs) == 0 { + return + } + var cores, threads int + for _, o := range objs { + cores += int(jsonInt(o, "NumberOfCores")) + threads += int(jsonInt(o, "NumberOfLogicalProcessors")) + } + if name := strings.TrimSpace(jsonStr(objs[0], "Name")); name != "" { + info.Model = name + } + if cores > 0 { + info.Cores = cores + } + if threads > 0 { + info.Threads = threads + } + if mhz := jsonFloat(objs[0], "MaxClockSpeed"); mhz > 0 { + info.FreqGHz = mhz / 1000.0 + } +} + +// parseCIMRAM fills RAMInfo from Win32_OperatingSystem JSON (values in KiB). +func parseCIMRAM(output string, info *RAMInfo) { + objs := decodeCIMObjects(output) + if len(objs) == 0 { + return + } + o := objs[0] + if kb := jsonInt(o, "TotalVisibleMemorySize"); kb > 0 { + info.TotalMiB = int(kb / 1024) + } + if kb := jsonInt(o, "FreePhysicalMemory"); kb > 0 { + info.AvailableMiB = int(kb / 1024) + } +} + +// parseCIMSwap sums AllocatedBaseSize (MiB) across all Win32_PageFileUsage rows. +func parseCIMSwap(output string, info *RAMInfo) { + total := 0 + for _, o := range decodeCIMObjects(output) { + total += int(jsonInt(o, "AllocatedBaseSize")) + } + if total > 0 { + info.SwapTotalMiB = total + } +} + +// parseCIMCPULoad averages LoadPercentage across all Win32_Processor rows. +func parseCIMCPULoad(output string) float64 { + objs := decodeCIMObjects(output) + if len(objs) == 0 { + return 0 + } + var sum float64 + for _, o := range objs { + sum += jsonFloat(o, "LoadPercentage") + } + return sum / float64(len(objs)) +} + +// parseWindowsGPUs builds a GPUInfo from Win32_VideoController JSON. It is the +// Windows fallback used when no vendor SMI tool (nvidia-smi/rocm-smi) is on +// PATH — common on AMD APU hosts. CIM cannot report true VRAM (Win32 AdapterRAM +// is a uint32 that saturates at 4 GiB) or utilization, so only static identity +// fields are populated; VRAM is left to the unified-memory backfill in +// detectWithRunner. AMD identity (name/gfx/unified) is resolved from the PCI +// device ID via amdPCIToInfo, shared with the Linux sysfs path. +func parseWindowsGPUs(output string) *GPUInfo { + objs := decodeCIMObjects(output) + if len(objs) == 0 { + return nil + } + + var chosen map[string]interface{} + var vendor string + count := 0 + for _, o := range objs { + v := windowsGPUVendor(jsonStr(o, "PNPDeviceID"), jsonStr(o, "AdapterCompatibility")) + if v == "" { + continue // skip Microsoft Basic Display / virtual adapters + } + if chosen == nil { + chosen = o + vendor = v + } + if v == vendor { + count++ + } + } + if chosen == nil { + return nil + } + + gpu := &GPUInfo{ + Vendor: vendor, + Name: strings.TrimSpace(jsonStr(chosen, "Name")), + DriverVersion: strings.TrimSpace(jsonStr(chosen, "DriverVersion")), + Count: count, + } + + switch vendor { + case "amd": + info := amdPCIToInfo(windowsPCIDeviceID(jsonStr(chosen, "PNPDeviceID"))) + if info.name != "" { + gpu.Name = info.name + } + gpu.ComputeID = info.computeID + gpu.UnifiedMemory = info.unified + gpu.Arch = firstNonEmptyString(gfxVersionToArch(gpu.ComputeID), amdGPUToArch(gpu.Name), "unknown") + case "intel": + gpu.Arch = intelGPUToArch(gpu.Name) + default: + gpu.Arch = "unknown" + } + return gpu +} + +// windowsGPUVendor maps a video controller to a vendor key, preferring the PCI +// vendor ID embedded in PNPDeviceID and falling back to AdapterCompatibility. +// Returns "" for non-hardware adapters (e.g. Microsoft Basic Display). +func windowsGPUVendor(pnpDeviceID, adapterCompatibility string) string { + switch up := strings.ToUpper(pnpDeviceID); { + case strings.Contains(up, "VEN_10DE"): + return "nvidia" + case strings.Contains(up, "VEN_1002"): + return "amd" + case strings.Contains(up, "VEN_8086"): + return "intel" + } + switch c := strings.ToLower(adapterCompatibility); { + case strings.Contains(c, "nvidia"): + return "nvidia" + case strings.Contains(c, "advanced micro devices"), strings.Contains(c, "amd"): + return "amd" + case strings.Contains(c, "intel"): + return "intel" + } + return "" +} + +// windowsPCIDeviceID extracts the 4-hex-digit PCI device ID from a PNPDeviceID +// such as `PCI\VEN_1002&DEV_1586&SUBSYS_...` → "1586". +func windowsPCIDeviceID(pnpDeviceID string) string { + up := strings.ToUpper(pnpDeviceID) + idx := strings.Index(up, "DEV_") + if idx < 0 { + return "" + } + rest := up[idx+len("DEV_"):] + end := 0 + for end < len(rest) && isHexByte(rest[end]) { + end++ + } + return rest[:end] +} + +func isHexByte(b byte) bool { + return (b >= '0' && b <= '9') || (b >= 'A' && b <= 'F') +} diff --git a/internal/hal/cim_test.go b/internal/hal/cim_test.go new file mode 100644 index 00000000..8455e5ab --- /dev/null +++ b/internal/hal/cim_test.go @@ -0,0 +1,260 @@ +package hal + +import "testing" + +// Fixtures captured from a real AMD Ryzen AI Max+ 395 "Strix Halo" Windows 11 +// box via `Get-CimInstance ... | ConvertTo-Json -Compress`. + +func TestDecodeCIMObjects(t *testing.T) { + tests := []struct { + name string + output string + want int + }{ + {"single object", `{"A":1}`, 1}, + {"array", `[{"A":1},{"A":2}]`, 2}, + {"leading junk then object", "\uFEFF\n{\"A\":1}", 1}, + {"empty", "", 0}, + {"whitespace", " \n ", 0}, + {"invalid json", "not json", 0}, + {"empty array", `[]`, 0}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := len(decodeCIMObjects(tt.output)); got != tt.want { + t.Errorf("len = %d, want %d", got, tt.want) + } + }) + } +} + +func TestParseCIMCPU(t *testing.T) { + tests := []struct { + name string + output string + wantModel string + wantCores int + wantThreads int + wantFreq float64 + }{ + { + name: "Strix Halo (real fixture, trailing spaces)", + output: `{"Name":"AMD RYZEN AI MAX+ 395 w/ Radeon 8060S ","NumberOfCores":16,"NumberOfLogicalProcessors":32,"MaxClockSpeed":3000}`, + wantModel: "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S", + wantCores: 16, + wantThreads: 32, + wantFreq: 3.0, + }, + { + name: "Intel single socket", + output: `{"Name":"Intel(R) Core(TM) i9-13900K","NumberOfCores":24,"NumberOfLogicalProcessors":32,"MaxClockSpeed":3600}`, + wantModel: "Intel(R) Core(TM) i9-13900K", + wantCores: 24, + wantThreads: 32, + wantFreq: 3.6, + }, + { + name: "dual socket sums cores/threads", + output: `[{"Name":"Intel Xeon Gold","NumberOfCores":32,"NumberOfLogicalProcessors":64,"MaxClockSpeed":2800},{"Name":"Intel Xeon Gold","NumberOfCores":32,"NumberOfLogicalProcessors":64,"MaxClockSpeed":2800}]`, + wantModel: "Intel Xeon Gold", + wantCores: 64, + wantThreads: 128, + wantFreq: 2.8, + }, + {"empty", "", "", 0, 0, 0}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + info := CPUInfo{} + parseCIMCPU(tt.output, &info) + if info.Model != tt.wantModel { + t.Errorf("Model = %q, want %q", info.Model, tt.wantModel) + } + if info.Cores != tt.wantCores { + t.Errorf("Cores = %d, want %d", info.Cores, tt.wantCores) + } + if info.Threads != tt.wantThreads { + t.Errorf("Threads = %d, want %d", info.Threads, tt.wantThreads) + } + if info.FreqGHz != tt.wantFreq { + t.Errorf("FreqGHz = %v, want %v", info.FreqGHz, tt.wantFreq) + } + }) + } +} + +func TestParseCIMRAM(t *testing.T) { + tests := []struct { + name string + output string + wantTotal int + wantAvailable int + }{ + { + name: "Strix Halo (real fixture, KiB)", + output: `{"TotalVisibleMemorySize":33184580,"FreePhysicalMemory":25201448}`, + wantTotal: 32406, + wantAvailable: 24610, + }, + {"empty", "", 0, 0}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + info := RAMInfo{} + parseCIMRAM(tt.output, &info) + if info.TotalMiB != tt.wantTotal { + t.Errorf("TotalMiB = %d, want %d", info.TotalMiB, tt.wantTotal) + } + if info.AvailableMiB != tt.wantAvailable { + t.Errorf("AvailableMiB = %d, want %d", info.AvailableMiB, tt.wantAvailable) + } + }) + } +} + +func TestParseCIMSwap(t *testing.T) { + tests := []struct { + name string + output string + want int + }{ + {"two pagefiles sum (real fixture)", `[{"AllocatedBaseSize":20480},{"AllocatedBaseSize":96000}]`, 116480}, + {"single pagefile", `{"AllocatedBaseSize":20480}`, 20480}, + {"system managed (empty)", "", 0}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + info := RAMInfo{} + parseCIMSwap(tt.output, &info) + if info.SwapTotalMiB != tt.want { + t.Errorf("SwapTotalMiB = %d, want %d", info.SwapTotalMiB, tt.want) + } + }) + } +} + +func TestParseCIMCPULoad(t *testing.T) { + tests := []struct { + name string + output string + want float64 + }{ + {"idle (real fixture)", `{"LoadPercentage":0}`, 0}, + {"single load", `{"LoadPercentage":42}`, 42}, + {"dual socket averaged", `[{"LoadPercentage":25},{"LoadPercentage":75}]`, 50}, + {"empty", "", 0}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := parseCIMCPULoad(tt.output); got != tt.want { + t.Errorf("parseCIMCPULoad() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestWindowsGPUVendor(t *testing.T) { + tests := []struct { + pnp string + compat string + want string + }{ + {`PCI\VEN_1002&DEV_1586&SUBSYS_801D2014`, "Advanced Micro Devices, Inc.", "amd"}, + {`PCI\VEN_10DE&DEV_2782`, "NVIDIA", "nvidia"}, + {`PCI\VEN_8086&DEV_56A0`, "Intel Corporation", "intel"}, + {`ROOT\BasicDisplay`, "(Standard display types)", ""}, + {"", "Advanced Micro Devices, Inc.", "amd"}, + {"", "NVIDIA", "nvidia"}, + {"", "", ""}, + } + for _, tt := range tests { + t.Run(tt.pnp+"|"+tt.compat, func(t *testing.T) { + if got := windowsGPUVendor(tt.pnp, tt.compat); got != tt.want { + t.Errorf("windowsGPUVendor(%q,%q) = %q, want %q", tt.pnp, tt.compat, got, tt.want) + } + }) + } +} + +func TestWindowsPCIDeviceID(t *testing.T) { + tests := []struct { + pnp string + want string + }{ + {`PCI\VEN_1002&DEV_1586&SUBSYS_801D2014&REV_C1\4&35FE04F8&0&0041`, "1586"}, + {`PCI\VEN_10DE&DEV_2782&SUBSYS_...`, "2782"}, + {`ROOT\BasicDisplay`, ""}, + {"", ""}, + } + for _, tt := range tests { + t.Run(tt.pnp, func(t *testing.T) { + if got := windowsPCIDeviceID(tt.pnp); got != tt.want { + t.Errorf("windowsPCIDeviceID(%q) = %q, want %q", tt.pnp, got, tt.want) + } + }) + } +} + +func TestParseWindowsGPUs(t *testing.T) { + t.Run("Strix Halo Radeon 8060S (real fixture)", func(t *testing.T) { + out := `{"Name":"AMD Radeon(TM) 8060S Graphics","DriverVersion":"32.0.31007.1017","AdapterCompatibility":"Advanced Micro Devices, Inc.","PNPDeviceID":"PCI\\VEN_1002&DEV_1586&SUBSYS_801D2014&REV_C1\\4&35FE04F8&0&0041","AdapterRAM":4293918720}` + gpu := parseWindowsGPUs(out) + if gpu == nil { + t.Fatal("expected non-nil GPU") + } + if gpu.Vendor != "amd" { + t.Errorf("Vendor = %q, want amd", gpu.Vendor) + } + if gpu.Name != "AMD Radeon 8060S Graphics" { + t.Errorf("Name = %q, want AMD Radeon 8060S Graphics", gpu.Name) + } + if gpu.Arch != "RDNA3.5" { + t.Errorf("Arch = %q, want RDNA3.5", gpu.Arch) + } + if gpu.ComputeID != "gfx1151" { + t.Errorf("ComputeID = %q, want gfx1151", gpu.ComputeID) + } + if gpu.DriverVersion != "32.0.31007.1017" { + t.Errorf("DriverVersion = %q, want 32.0.31007.1017", gpu.DriverVersion) + } + if !gpu.UnifiedMemory { + t.Error("UnifiedMemory = false, want true for Strix Halo APU") + } + if gpu.Count != 1 { + t.Errorf("Count = %d, want 1", gpu.Count) + } + }) + + t.Run("skips Microsoft Basic Display, picks NVIDIA", func(t *testing.T) { + out := `[{"Name":"Microsoft Basic Display Adapter","DriverVersion":"10.0.0","AdapterCompatibility":"(Standard display types)","PNPDeviceID":"ROOT\\BasicDisplay\\0000"},{"Name":"NVIDIA GeForce RTX 4060 Laptop GPU","DriverVersion":"32.0.15.6636","AdapterCompatibility":"NVIDIA","PNPDeviceID":"PCI\\VEN_10DE&DEV_28E0&SUBSYS_..."}]` + gpu := parseWindowsGPUs(out) + if gpu == nil { + t.Fatal("expected non-nil GPU") + } + if gpu.Vendor != "nvidia" { + t.Errorf("Vendor = %q, want nvidia", gpu.Vendor) + } + if gpu.Name != "NVIDIA GeForce RTX 4060 Laptop GPU" { + t.Errorf("Name = %q", gpu.Name) + } + if gpu.DriverVersion != "32.0.15.6636" { + t.Errorf("DriverVersion = %q", gpu.DriverVersion) + } + if gpu.Count != 1 { + t.Errorf("Count = %d, want 1", gpu.Count) + } + }) + + t.Run("no recognizable GPU returns nil", func(t *testing.T) { + out := `{"Name":"Microsoft Basic Display Adapter","AdapterCompatibility":"(Standard display types)","PNPDeviceID":"ROOT\\BasicDisplay\\0000"}` + if gpu := parseWindowsGPUs(out); gpu != nil { + t.Fatalf("expected nil, got %+v", gpu) + } + }) + + t.Run("empty output returns nil", func(t *testing.T) { + if gpu := parseWindowsGPUs(""); gpu != nil { + t.Fatalf("expected nil, got %+v", gpu) + } + }) +} diff --git a/internal/hal/detect_windows.go b/internal/hal/detect_windows.go index 06c65836..5a0caed4 100644 --- a/internal/hal/detect_windows.go +++ b/internal/hal/detect_windows.go @@ -6,10 +6,24 @@ import ( "context" "log/slog" "runtime" - "strconv" - "strings" ) +// Windows hardware detection via CIM. Win32 WMIC was removed in Windows 11 24H2+, +// so every query shells out to PowerShell's Get-CimInstance and is parsed by the +// build-tag-free helpers in cim.go. + +const ( + cimCPUScript = "Get-CimInstance Win32_Processor | Select-Object Name,NumberOfCores,NumberOfLogicalProcessors,MaxClockSpeed | ConvertTo-Json -Compress" + cimRAMScript = "Get-CimInstance Win32_OperatingSystem | Select-Object TotalVisibleMemorySize,FreePhysicalMemory | ConvertTo-Json -Compress" + cimPageFileScript = "Get-CimInstance Win32_PageFileUsage | Select-Object AllocatedBaseSize | ConvertTo-Json -Compress" + cimCPULoadScript = "Get-CimInstance Win32_Processor | Select-Object LoadPercentage | ConvertTo-Json -Compress" + cimGPUScript = "Get-CimInstance Win32_VideoController | Select-Object Name,DriverVersion,AdapterCompatibility,PNPDeviceID,AdapterRAM | ConvertTo-Json -Compress" +) + +func runCIM(ctx context.Context, runner CommandRunner, script string) ([]byte, error) { + return runner.Run(ctx, "powershell", "-NoProfile", "-NonInteractive", "-Command", script) +} + func detectCPU(ctx context.Context, runner CommandRunner) CPUInfo { info := CPUInfo{ Arch: runtime.GOARCH, @@ -17,142 +31,41 @@ func detectCPU(ctx context.Context, runner CommandRunner) CPUInfo { Threads: runtime.NumCPU(), } - out, err := runner.Run(ctx, "wmic", "cpu", "get", "Name,NumberOfCores,NumberOfLogicalProcessors,MaxClockSpeed", "/format:csv") + out, err := runCIM(ctx, runner, cimCPUScript) if err != nil { - slog.Warn("wmic cpu detection failed, using defaults", "error", err) + slog.Warn("CIM cpu detection failed, using defaults", "error", err) return info } - parseWMICCPU(string(out), &info) + parseCIMCPU(string(out), &info) return info } -func parseWMICCPU(output string, info *CPUInfo) { - // wmic csv output has a header line, then data lines. - // Format: Node,MaxClockSpeed,Name,NumberOfCores,NumberOfLogicalProcessors - lines := nonEmptyLines(output) - if len(lines) < 2 { - return - } - - // Find column indices from header - header := splitCSV(lines[0]) - colIdx := make(map[string]int) - for i, h := range header { - colIdx[strings.TrimSpace(h)] = i - } - - fields := splitCSV(lines[1]) - - if idx, ok := colIdx["Name"]; ok && idx < len(fields) { - info.Model = fields[idx] - } - if idx, ok := colIdx["NumberOfCores"]; ok && idx < len(fields) { - if n, err := strconv.Atoi(fields[idx]); err == nil { - info.Cores = n - } - } - if idx, ok := colIdx["NumberOfLogicalProcessors"]; ok && idx < len(fields) { - if n, err := strconv.Atoi(fields[idx]); err == nil { - info.Threads = n - } - } - if idx, ok := colIdx["MaxClockSpeed"]; ok && idx < len(fields) { - if mhz, err := strconv.ParseFloat(fields[idx], 64); err == nil { - info.FreqGHz = mhz / 1000.0 - } - } -} - func detectRAM(ctx context.Context, runner CommandRunner) RAMInfo { info := RAMInfo{} - out, err := runner.Run(ctx, "wmic", "os", "get", "TotalVisibleMemorySize,FreePhysicalMemory", "/format:csv") + out, err := runCIM(ctx, runner, cimRAMScript) if err != nil { - slog.Warn("wmic RAM detection failed, using defaults", "error", err) + slog.Warn("CIM RAM detection failed, using defaults", "error", err) return info } + parseCIMRAM(string(out), &info) - parseWMICRAM(string(out), &info) - - // Detect swap (pagefile) size - if swapOut, err := runner.Run(ctx, "wmic", "pagefile", "get", "AllocatedBaseSize", "/format:csv"); err == nil { - parseWMICSwap(string(swapOut), &info) + // Pagefile (swap) is best-effort; system-managed hosts may report nothing. + if swapOut, err := runCIM(ctx, runner, cimPageFileScript); err == nil { + parseCIMSwap(string(swapOut), &info) } return info } -func parseWMICSwap(output string, info *RAMInfo) { - lines := nonEmptyLines(output) - if len(lines) < 2 { - return - } - header := splitCSV(lines[0]) - colIdx := make(map[string]int) - for i, h := range header { - colIdx[strings.TrimSpace(h)] = i - } - fields := splitCSV(lines[1]) - if idx, ok := colIdx["AllocatedBaseSize"]; ok && idx < len(fields) { - if mb, err := strconv.Atoi(strings.TrimSpace(fields[idx])); err == nil { - info.SwapTotalMiB = mb - } - } -} - -func parseWMICRAM(output string, info *RAMInfo) { - // Format: Node,FreePhysicalMemory,TotalVisibleMemorySize - lines := nonEmptyLines(output) - if len(lines) < 2 { - return - } - - header := splitCSV(lines[0]) - colIdx := make(map[string]int) - for i, h := range header { - colIdx[strings.TrimSpace(h)] = i - } - - fields := splitCSV(lines[1]) - - if idx, ok := colIdx["TotalVisibleMemorySize"]; ok && idx < len(fields) { - if kb, err := strconv.ParseInt(fields[idx], 10, 64); err == nil { - info.TotalMiB = int(kb / 1024) - } - } - if idx, ok := colIdx["FreePhysicalMemory"]; ok && idx < len(fields) { - if kb, err := strconv.ParseInt(fields[idx], 10, 64); err == nil { - info.AvailableMiB = int(kb / 1024) - } - } -} - func collectCPUMetrics(ctx context.Context, runner CommandRunner) CPUMetrics { - out, err := runner.Run(ctx, "wmic", "cpu", "get", "LoadPercentage", "/format:csv") + out, err := runCIM(ctx, runner, cimCPULoadScript) if err != nil { - slog.Warn("wmic CPU metrics failed, using defaults", "error", err) + slog.Warn("CIM CPU metrics failed, using defaults", "error", err) return CPUMetrics{} } - - lines := nonEmptyLines(string(out)) - if len(lines) < 2 { - return CPUMetrics{} - } - - header := splitCSV(lines[0]) - colIdx := make(map[string]int) - for i, h := range header { - colIdx[strings.TrimSpace(h)] = i - } - - fields := splitCSV(lines[1]) - if idx, ok := colIdx["LoadPercentage"]; ok && idx < len(fields) { - if pct, err := strconv.ParseFloat(fields[idx], 64); err == nil { - return CPUMetrics{UsagePercent: pct} - } - } - return CPUMetrics{} + return CPUMetrics{UsagePercent: parseCIMCPULoad(string(out))} } func collectRAMMetrics(ctx context.Context, runner CommandRunner) RAMMetrics { @@ -167,3 +80,15 @@ func collectRAMMetrics(ctx context.Context, runner CommandRunner) RAMMetrics { UsedMiB: used, } } + +// detectPlatformGPU detects GPUs via Win32_VideoController (CIM) as a Windows +// fallback for when no vendor SMI tool (nvidia-smi/rocm-smi) is on PATH — the +// common case on AMD APU hosts. See parseWindowsGPUs for the limitations. +func detectPlatformGPU(ctx context.Context, runner CommandRunner) *GPUInfo { + out, err := runCIM(ctx, runner, cimGPUScript) + if err != nil { + slog.Debug("CIM GPU detection unavailable", "error", err) + return nil + } + return parseWindowsGPUs(string(out)) +} diff --git a/internal/hal/detect_windows_test.go b/internal/hal/detect_windows_test.go deleted file mode 100644 index 6e2ae08c..00000000 --- a/internal/hal/detect_windows_test.go +++ /dev/null @@ -1,109 +0,0 @@ -//go:build windows - -package hal - -import "testing" - -func TestParseWMICCPU(t *testing.T) { - tests := []struct { - name string - output string - wantModel string - wantCores int - wantThreads int - wantFreq float64 - }{ - { - name: "standard Intel", - output: "Node,MaxClockSpeed,Name,NumberOfCores,NumberOfLogicalProcessors\nWORKSTATION,3600,Intel(R) Core(TM) i9-13900K,24,32\n", - wantModel: "Intel(R) Core(TM) i9-13900K", - wantCores: 24, - wantThreads: 32, - wantFreq: 3.6, - }, - { - name: "AMD Ryzen", - output: "Node,MaxClockSpeed,Name,NumberOfCores,NumberOfLogicalProcessors\nDESKTOP,4500,AMD Ryzen 9 7950X,16,32\n", - wantModel: "AMD Ryzen 9 7950X", - wantCores: 16, - wantThreads: 32, - wantFreq: 4.5, - }, - { - name: "empty output", - output: "", - wantModel: "", - wantCores: 0, - wantThreads: 0, - wantFreq: 0, - }, - { - name: "header only", - output: "Node,MaxClockSpeed,Name,NumberOfCores,NumberOfLogicalProcessors\n", - wantModel: "", - wantCores: 0, - wantThreads: 0, - wantFreq: 0, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - info := CPUInfo{} - parseWMICCPU(tt.output, &info) - if info.Model != tt.wantModel { - t.Errorf("Model = %q, want %q", info.Model, tt.wantModel) - } - if info.Cores != tt.wantCores { - t.Errorf("Cores = %d, want %d", info.Cores, tt.wantCores) - } - if info.Threads != tt.wantThreads { - t.Errorf("Threads = %d, want %d", info.Threads, tt.wantThreads) - } - if info.FreqGHz != tt.wantFreq { - t.Errorf("FreqGHz = %f, want %f", info.FreqGHz, tt.wantFreq) - } - }) - } -} - -func TestParseWMICRAM(t *testing.T) { - tests := []struct { - name string - output string - wantTotal int - wantAvailable int - }{ - { - name: "32GB system", - output: "Node,FreePhysicalMemory,TotalVisibleMemorySize\nWORKSTATION,16777216,33554432\n", - wantTotal: 32768, - wantAvailable: 16384, - }, - { - name: "16GB system low memory", - output: "Node,FreePhysicalMemory,TotalVisibleMemorySize\nLAPTOP,2097152,16777216\n", - wantTotal: 16384, - wantAvailable: 2048, - }, - { - name: "empty output", - output: "", - wantTotal: 0, - wantAvailable: 0, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - info := RAMInfo{} - parseWMICRAM(tt.output, &info) - if info.TotalMiB != tt.wantTotal { - t.Errorf("TotalMiB = %d, want %d", info.TotalMiB, tt.wantTotal) - } - if info.AvailableMiB != tt.wantAvailable { - t.Errorf("AvailableMiB = %d, want %d", info.AvailableMiB, tt.wantAvailable) - } - }) - } -} diff --git a/internal/hal/gpu.go b/internal/hal/gpu.go index 0b3a484f..37f72d2d 100644 --- a/internal/hal/gpu.go +++ b/internal/hal/gpu.go @@ -128,6 +128,11 @@ func detectGPU(ctx context.Context, runner CommandRunner) *GPUInfo { return gpu } } + // Platform fallback (Windows CIM) for hosts without a vendor SMI tool on PATH. + if gpu := detectPlatformGPU(ctx, runner); gpu != nil { + enrichGPU(ctx, runner, gpu) + return gpu + } if gpu := detectAMDDRM(ctx, runner); gpu != nil { enrichGPU(ctx, runner, gpu) return gpu diff --git a/internal/hal/gpu_nonwindows.go b/internal/hal/gpu_nonwindows.go new file mode 100644 index 00000000..d76d29b8 --- /dev/null +++ b/internal/hal/gpu_nonwindows.go @@ -0,0 +1,12 @@ +//go:build !windows + +package hal + +import "context" + +// detectPlatformGPU is a no-op on non-Windows hosts. Linux AMD detection is +// handled by the SMI probe chain and detectAMDDRM (sysfs); the Windows CIM +// fallback lives in detect_windows.go. +func detectPlatformGPU(_ context.Context, _ CommandRunner) *GPUInfo { + return nil +} diff --git a/internal/hal/testhelper_windows_test.go b/internal/hal/testhelper_windows_test.go index d1da505b..8572fc4b 100644 --- a/internal/hal/testhelper_windows_test.go +++ b/internal/hal/testhelper_windows_test.go @@ -2,13 +2,14 @@ package hal +// Mock outputs keyed by the exact CIM PowerShell commands detect_windows.go runs. func platformMockOutputs() map[string]mockResult { return map[string]mockResult{ - "wmic cpu get Name,NumberOfCores,NumberOfLogicalProcessors,MaxClockSpeed /format:csv": { - output: []byte("Node,MaxClockSpeed,Name,NumberOfCores,NumberOfLogicalProcessors\nWORKSTATION,3600,Intel(R) Core(TM) i9-13900K,24,32\n"), + "powershell -NoProfile -NonInteractive -Command " + cimCPUScript: { + output: []byte(`{"Name":"Intel(R) Core(TM) i9-13900K","NumberOfCores":24,"NumberOfLogicalProcessors":32,"MaxClockSpeed":3600}`), }, - "wmic os get TotalVisibleMemorySize,FreePhysicalMemory /format:csv": { - output: []byte("Node,FreePhysicalMemory,TotalVisibleMemorySize\nWORKSTATION,16777216,33554432\n"), + "powershell -NoProfile -NonInteractive -Command " + cimRAMScript: { + output: []byte(`{"TotalVisibleMemorySize":33554432,"FreePhysicalMemory":16777216}`), }, } } From c94494b69e24dc2ffd4fe74eee737c718263fad7 Mon Sep 17 00:00:00 2001 From: Codex Date: Tue, 9 Jun 2026 15:47:51 +0800 Subject: [PATCH 2/3] fix(hal/windows): report true installed memory for unified-memory APUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Strix Halo and similar unified-memory APUs, Windows exposes only a fraction of physical memory to the OS (e.g. 32 GiB of 128 GiB) — the rest is carved out for the iGPU. TotalVisibleMemorySize therefore undersold a 128 GiB box as 32 GiB, which also flowed into the unified-VRAM backfill and the onboarding "统一内存" card. Query Win32_PhysicalMemory (sum of DIMM capacity) and use it as RAM.TotalMiB when it exceeds the OS-visible total; recompute AvailableMiB as total - OS-used so it stays correct for both unified and conventional hosts. Tests (cim_test.go, build-tag-free): parse the Measure-Object Sum JSON; override to 128 GiB on a Strix Halo fixture; no-shrink / no-op on conventional hosts. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/hal/cim.go | 34 +++++++++++++++++++++++++++++++ internal/hal/cim_test.go | 37 ++++++++++++++++++++++++++++++++++ internal/hal/detect_windows.go | 7 +++++++ 3 files changed, 78 insertions(+) diff --git a/internal/hal/cim.go b/internal/hal/cim.go index ef07be61..1f80878e 100644 --- a/internal/hal/cim.go +++ b/internal/hal/cim.go @@ -79,6 +79,40 @@ func parseCIMRAM(output string, info *RAMInfo) { } } +// parseCIMInstalledMemoryBytes reads the summed DIMM capacity (bytes) from +// `Win32_PhysicalMemory | Measure-Object Capacity -Sum` JSON ({"Sum":N}). +func parseCIMInstalledMemoryBytes(output string) int64 { + objs := decodeCIMObjects(output) + if len(objs) == 0 { + return 0 + } + return jsonInt(objs[0], "Sum") +} + +// applyInstalledMemoryTotal overrides RAMInfo.TotalMiB with the true installed +// memory (sum of DIMM capacity) when it exceeds the OS-visible total. On +// unified-memory APUs (e.g. Strix Halo) the OS only sees a fraction with the +// rest carved out for the iGPU, so the installed total is the meaningful figure. +// AvailableMiB is recomputed as total minus OS-used so it stays correct on both +// unified and conventional hosts. +func applyInstalledMemoryTotal(info *RAMInfo, installedBytes int64) { + if installedBytes <= 0 { + return + } + total := int(installedBytes / (1024 * 1024)) + if total <= info.TotalMiB { + return + } + osUsed := info.TotalMiB - info.AvailableMiB + if osUsed < 0 { + osUsed = 0 + } + info.TotalMiB = total + if avail := total - osUsed; avail >= 0 { + info.AvailableMiB = avail + } +} + // parseCIMSwap sums AllocatedBaseSize (MiB) across all Win32_PageFileUsage rows. func parseCIMSwap(output string, info *RAMInfo) { total := 0 diff --git a/internal/hal/cim_test.go b/internal/hal/cim_test.go index 8455e5ab..784d8760 100644 --- a/internal/hal/cim_test.go +++ b/internal/hal/cim_test.go @@ -258,3 +258,40 @@ func TestParseWindowsGPUs(t *testing.T) { } }) } + +func TestParseCIMInstalledMemoryBytes(t *testing.T) { + // Get-CimInstance Win32_PhysicalMemory | Measure-Object Capacity -Sum + if got := parseCIMInstalledMemoryBytes(`{"Sum":137438953472}`); got != 137438953472 { + t.Errorf("got %d, want 137438953472 (128 GiB)", got) + } + if got := parseCIMInstalledMemoryBytes(``); got != 0 { + t.Errorf("empty output -> %d, want 0", got) + } +} + +func TestApplyInstalledMemoryTotal(t *testing.T) { + // Strix Halo: OS sees ~32 GiB but 128 GiB is installed (rest carved out for + // the iGPU). Report the full installed total; recompute available as + // total - OS-used so it stays meaningful. + info := RAMInfo{TotalMiB: 32406, AvailableMiB: 20198} // OS-used = 12208 + applyInstalledMemoryTotal(&info, 137438953472) + if info.TotalMiB != 131072 { + t.Errorf("TotalMiB = %d, want 131072 (128 GiB)", info.TotalMiB) + } + if want := 131072 - (32406 - 20198); info.AvailableMiB != want { + t.Errorf("AvailableMiB = %d, want %d", info.AvailableMiB, want) + } +} + +func TestApplyInstalledMemoryTotalNoShrink(t *testing.T) { + // Normal box: installed nameplate not larger than OS-visible -> no change. + info := RAMInfo{TotalMiB: 32000, AvailableMiB: 16000} + applyInstalledMemoryTotal(&info, int64(31000)*1024*1024) + if info.TotalMiB != 32000 || info.AvailableMiB != 16000 { + t.Errorf("unexpected change: %+v", info) + } + applyInstalledMemoryTotal(&info, 0) // no data -> no change + if info.TotalMiB != 32000 { + t.Errorf("zero installed changed total: %+v", info) + } +} diff --git a/internal/hal/detect_windows.go b/internal/hal/detect_windows.go index 5a0caed4..46d6e971 100644 --- a/internal/hal/detect_windows.go +++ b/internal/hal/detect_windows.go @@ -15,6 +15,7 @@ import ( const ( cimCPUScript = "Get-CimInstance Win32_Processor | Select-Object Name,NumberOfCores,NumberOfLogicalProcessors,MaxClockSpeed | ConvertTo-Json -Compress" cimRAMScript = "Get-CimInstance Win32_OperatingSystem | Select-Object TotalVisibleMemorySize,FreePhysicalMemory | ConvertTo-Json -Compress" + cimPhysMemScript = "Get-CimInstance Win32_PhysicalMemory | Measure-Object -Property Capacity -Sum | Select-Object Sum | ConvertTo-Json -Compress" cimPageFileScript = "Get-CimInstance Win32_PageFileUsage | Select-Object AllocatedBaseSize | ConvertTo-Json -Compress" cimCPULoadScript = "Get-CimInstance Win32_Processor | Select-Object LoadPercentage | ConvertTo-Json -Compress" cimGPUScript = "Get-CimInstance Win32_VideoController | Select-Object Name,DriverVersion,AdapterCompatibility,PNPDeviceID,AdapterRAM | ConvertTo-Json -Compress" @@ -51,6 +52,12 @@ func detectRAM(ctx context.Context, runner CommandRunner) RAMInfo { } parseCIMRAM(string(out), &info) + // Report true installed memory: unified-memory APUs (Strix Halo) expose only + // a fraction to the OS, carving the rest out for the iGPU. + if physOut, err := runCIM(ctx, runner, cimPhysMemScript); err == nil { + applyInstalledMemoryTotal(&info, parseCIMInstalledMemoryBytes(string(physOut))) + } + // Pagefile (swap) is best-effort; system-managed hosts may report nothing. if swapOut, err := runCIM(ctx, runner, cimPageFileScript); err == nil { parseCIMSwap(string(swapOut), &info) From 5882177d66e5e7830210002a116c2ca70a96dc67 Mon Sep 17 00:00:00 2001 From: Codex Date: Tue, 9 Jun 2026 17:09:20 +0800 Subject: [PATCH 3/3] fix(hal/windows): use real AMD APU usable VRAM (ROCm) for deploy-fit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Strix Halo, Win32 AdapterRAM saturates at 4 GiB and there is no rocm-smi, so the unified-memory backfill set GPU.VRAMMiB = installed RAM (128 GiB). But the OS carves that 128 GiB pool — only ~110 GiB is GPU-addressable (dedicated VRAM + GTT) — so deploy-fit over-stated usable VRAM and could accept a model the iGPU cannot hold. When the AMD iGPU's VRAM is unknown, query the ROCm-capable llama.cpp engine's own `--list-devices` (preferring AIMA_ENGINE_DIR, else PATH) and use its reported device memory (e.g. "ROCm0: ... (110456 MiB, ...)") as GPU.VRAMMiB. Installed RAM — and thus the normalized "unified memory" the UI shows — is unchanged; only the fit-relevant usable VRAM is corrected. Tests (cim_test.go, build-tag-free): parseLlamaROCmVRAMMiB extracts the device total and ignores non-device output. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/hal/cim.go | 26 ++++++++++++++++++ internal/hal/cim_test.go | 13 +++++++++ internal/hal/detect_windows.go | 50 +++++++++++++++++++++++++++++++++- 3 files changed, 88 insertions(+), 1 deletion(-) diff --git a/internal/hal/cim.go b/internal/hal/cim.go index 1f80878e..415c675b 100644 --- a/internal/hal/cim.go +++ b/internal/hal/cim.go @@ -2,9 +2,35 @@ package hal import ( "encoding/json" + "regexp" + "strconv" "strings" ) +// llamaDeviceVRAMRe captures the total VRAM (MiB) from a llama.cpp +// `--list-devices` line such as +// " ROCm0: AMD Radeon(TM) 8060S Graphics (110456 MiB, 110301 MiB free)". +var llamaDeviceVRAMRe = regexp.MustCompile(`\(([0-9]+)\s*MiB`) + +// parseLlamaROCmVRAMMiB extracts the iGPU's total usable VRAM (MiB) as reported +// by the inference engine itself. On AMD APUs (Strix Halo) Win32 AdapterRAM +// saturates at 4 GiB and there is no rocm-smi, so the ROCm-capable llama.cpp's +// own device enumeration is the authoritative source of the GPU-addressable +// pool (dedicated VRAM + GTT). Returns 0 when no device line is present. +func parseLlamaROCmVRAMMiB(output string) int { + for _, line := range strings.Split(output, "\n") { + if !strings.Contains(line, "MiB") { + continue + } + if m := llamaDeviceVRAMRe.FindStringSubmatch(line); m != nil { + if n, err := strconv.Atoi(m[1]); err == nil && n > 0 { + return n + } + } + } + return 0 +} + // CIM (Common Information Model) parsing for Windows hardware detection. // // Modern Windows (11 24H2+) removes the legacy `wmic` CLI, so detection shells diff --git a/internal/hal/cim_test.go b/internal/hal/cim_test.go index 784d8760..e4798050 100644 --- a/internal/hal/cim_test.go +++ b/internal/hal/cim_test.go @@ -259,6 +259,19 @@ func TestParseWindowsGPUs(t *testing.T) { }) } +func TestParseLlamaROCmVRAMMiB(t *testing.T) { + out := "Available devices:\n ROCm0: AMD Radeon(TM) 8060S Graphics (110456 MiB, 110301 MiB free)\n" + if got := parseLlamaROCmVRAMMiB(out); got != 110456 { + t.Errorf("got %d, want 110456", got) + } + if got := parseLlamaROCmVRAMMiB(""); got != 0 { + t.Errorf("empty -> %d, want 0", got) + } + if got := parseLlamaROCmVRAMMiB("no rocm devices available"); got != 0 { + t.Errorf("no match -> %d, want 0", got) + } +} + func TestParseCIMInstalledMemoryBytes(t *testing.T) { // Get-CimInstance Win32_PhysicalMemory | Measure-Object Capacity -Sum if got := parseCIMInstalledMemoryBytes(`{"Sum":137438953472}`); got != 137438953472 { diff --git a/internal/hal/detect_windows.go b/internal/hal/detect_windows.go index 46d6e971..7d2f01f6 100644 --- a/internal/hal/detect_windows.go +++ b/internal/hal/detect_windows.go @@ -5,7 +5,11 @@ package hal import ( "context" "log/slog" + "os" + "os/exec" + "path/filepath" "runtime" + "strings" ) // Windows hardware detection via CIM. Win32 WMIC was removed in Windows 11 24H2+, @@ -97,5 +101,49 @@ func detectPlatformGPU(ctx context.Context, runner CommandRunner) *GPUInfo { slog.Debug("CIM GPU detection unavailable", "error", err) return nil } - return parseWindowsGPUs(string(out)) + gpu := parseWindowsGPUs(string(out)) + // AMD APU VRAM: Win32 AdapterRAM saturates at 4 GiB and there is no rocm-smi, + // so read the real GPU-addressable pool (dedicated VRAM + GTT) from the + // ROCm-capable llama.cpp engine when available. Without it, the unified-memory + // backfill (= installed RAM) applies downstream — which over-states the + // usable VRAM on hosts where the OS carves the pool (e.g. Strix Halo). + if gpu != nil && gpu.Vendor == "amd" && gpu.UnifiedMemory && gpu.VRAMMiB == 0 { + if mib := amdAPUVRAMMiBFromEngine(ctx, runner); mib > 0 { + gpu.VRAMMiB = mib + } + } + return gpu +} + +// amdAPUVRAMMiBFromEngine asks a ROCm-capable llama.cpp binary to enumerate its +// devices and returns the iGPU's total VRAM (MiB), or 0 if no engine is found. +func amdAPUVRAMMiBFromEngine(ctx context.Context, runner CommandRunner) int { + llama := findROCmLlamaBinary() + if llama == "" { + return 0 + } + // llama.cpp prints the device list to stderr; fold it into stdout (the only + // stream execRunner captures) via PowerShell redirection. + out, _ := runner.Run(ctx, "powershell", "-NoProfile", "-NonInteractive", "-Command", + "& '"+llama+"' --list-devices 2>&1") + return parseLlamaROCmVRAMMiB(string(out)) +} + +// findROCmLlamaBinary locates a llama.cpp CLI, preferring AIMA's configured +// engine directory (AIMA_ENGINE_DIR) and falling back to PATH. +func findROCmLlamaBinary() string { + if dir := strings.TrimSpace(os.Getenv("AIMA_ENGINE_DIR")); dir != "" { + for _, name := range []string{"llama-cli.exe", "llama-server.exe"} { + p := filepath.Join(dir, name) + if st, err := os.Stat(p); err == nil && !st.IsDir() { + return p + } + } + } + for _, name := range []string{"llama-cli.exe", "llama-cli"} { + if p, err := exec.LookPath(name); err == nil { + return p + } + } + return "" }