From 8dd7e1c3f9ce957ec978360f3e3324af92411957 Mon Sep 17 00:00:00 2001
From: rjckkkkk <59609580+rjckkkkk@users.noreply.github.com>
Date: Mon, 8 Jun 2026 08:42:55 +0000
Subject: [PATCH 1/3] Detect Windows hardware via CIM, add AMD GPU detection

Windows 11 24H2+ removes the legacy wmic CLI, so CPU/RAM detection on
modern Windows returned an empty model and zero RAM. AMD GPUs were also
invisible on Windows: the probe chain only knows nvidia-smi/rocm-smi and
the sysfs fallback is Linux-only, so AMD APU hosts (Ryzen AI Max+ "Strix
Halo") detected no accelerator at all.

Replace wmic with `powershell Get-CimInstance` for CPU, RAM, pagefile and
CPU load, and add a Windows Win32_VideoController GPU fallback wired into
detectGPU through a detectPlatformGPU hook (no-op on non-Windows). AMD
identity (name/gfx/arch/unified) is resolved from the PCI device ID via
the existing amdPCIToInfo, shared with the Linux sysfs path.

CIM cannot report true APU VRAM (Win32 AdapterRAM saturates at 4 GiB) or
GPU utilization, so VRAM falls back to OS-visible RAM via the existing
unified-memory backfill; exact carve-out still needs amd-smi/rocm-smi.

Pure CIM JSON parsers live in cim.go (no build tags) with table-driven
tests in cim_test.go using fixtures captured from a real Strix Halo box.

Verified on AMD Ryzen AI Max+ 395 (Radeon 8060S, gfx1151): hal detect now
reports the GPU (RDNA3.5, driver 32.0.31007.1017), CPU model + 16c/32t,
and 32 GB RAM.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 internal/hal/cim.go                     | 204 +++++++++++++++++++
 internal/hal/cim_test.go                | 260 ++++++++++++++++++++++++
 internal/hal/detect_windows.go          | 155 ++++----------
 internal/hal/detect_windows_test.go     | 109 ----------
 internal/hal/gpu.go                     |   5 +
 internal/hal/gpu_nonwindows.go          |  12 ++
 internal/hal/testhelper_windows_test.go |   9 +-
 7 files changed, 526 insertions(+), 228 deletions(-)
 create mode 100644 internal/hal/cim.go
 create mode 100644 internal/hal/cim_test.go
 delete mode 100644 internal/hal/detect_windows_test.go
 create mode 100644 internal/hal/gpu_nonwindows.go

diff --git a/internal/hal/cim.go b/internal/hal/cim.go
new file mode 100644
index 00000000..ef07be61
--- /dev/null
+++ b/internal/hal/cim.go
@@ -0,0 +1,204 @@
+package hal
+
+import (
+	"encoding/json"
+	"strings"
+)
+
+// CIM (Common Information Model) parsing for Windows hardware detection.
+//
+// Modern Windows (11 24H2+) removes the legacy `wmic` CLI, so detection shells
+// out to `powershell Get-CimInstance ... | ConvertTo-Json` instead. These
+// parsers are kept free of build tags and OS calls so they unit-test on any
+// platform; only the command execution lives in detect_windows.go.
+
+// decodeCIMObjects normalizes `ConvertTo-Json -Compress` output. PowerShell
+// renders a single CIM instance as a JSON object and multiple instances as an
+// array, so both shapes collapse to a slice of maps here.
+func decodeCIMObjects(output string) []map[string]interface{} {
+	start := strings.IndexAny(output, "{[")
+	if start < 0 {
+		return nil
+	}
+	trimmed := strings.TrimSpace(output[start:])
+	if trimmed == "" {
+		return nil
+	}
+	if trimmed[0] == '[' {
+		var arr []map[string]interface{}
+		if err := json.Unmarshal([]byte(trimmed), &arr); err != nil {
+			return nil
+		}
+		return arr
+	}
+	var obj map[string]interface{}
+	if err := json.Unmarshal([]byte(trimmed), &obj); err != nil {
+		return nil
+	}
+	return []map[string]interface{}{obj}
+}
+
+// parseCIMCPU fills CPUInfo from Win32_Processor JSON. Cores and threads sum
+// across sockets; name and clock come from the first processor.
+func parseCIMCPU(output string, info *CPUInfo) {
+	objs := decodeCIMObjects(output)
+	if len(objs) == 0 {
+		return
+	}
+	var cores, threads int
+	for _, o := range objs {
+		cores += int(jsonInt(o, "NumberOfCores"))
+		threads += int(jsonInt(o, "NumberOfLogicalProcessors"))
+	}
+	if name := strings.TrimSpace(jsonStr(objs[0], "Name")); name != "" {
+		info.Model = name
+	}
+	if cores > 0 {
+		info.Cores = cores
+	}
+	if threads > 0 {
+		info.Threads = threads
+	}
+	if mhz := jsonFloat(objs[0], "MaxClockSpeed"); mhz > 0 {
+		info.FreqGHz = mhz / 1000.0
+	}
+}
+
+// parseCIMRAM fills RAMInfo from Win32_OperatingSystem JSON (values in KiB).
+func parseCIMRAM(output string, info *RAMInfo) {
+	objs := decodeCIMObjects(output)
+	if len(objs) == 0 {
+		return
+	}
+	o := objs[0]
+	if kb := jsonInt(o, "TotalVisibleMemorySize"); kb > 0 {
+		info.TotalMiB = int(kb / 1024)
+	}
+	if kb := jsonInt(o, "FreePhysicalMemory"); kb > 0 {
+		info.AvailableMiB = int(kb / 1024)
+	}
+}
+
+// parseCIMSwap sums AllocatedBaseSize (MiB) across all Win32_PageFileUsage rows.
+func parseCIMSwap(output string, info *RAMInfo) {
+	total := 0
+	for _, o := range decodeCIMObjects(output) {
+		total += int(jsonInt(o, "AllocatedBaseSize"))
+	}
+	if total > 0 {
+		info.SwapTotalMiB = total
+	}
+}
+
+// parseCIMCPULoad averages LoadPercentage across all Win32_Processor rows.
+func parseCIMCPULoad(output string) float64 {
+	objs := decodeCIMObjects(output)
+	if len(objs) == 0 {
+		return 0
+	}
+	var sum float64
+	for _, o := range objs {
+		sum += jsonFloat(o, "LoadPercentage")
+	}
+	return sum / float64(len(objs))
+}
+
+// parseWindowsGPUs builds a GPUInfo from Win32_VideoController JSON. It is the
+// Windows fallback used when no vendor SMI tool (nvidia-smi/rocm-smi) is on
+// PATH — common on AMD APU hosts. CIM cannot report true VRAM (Win32 AdapterRAM
+// is a uint32 that saturates at 4 GiB) or utilization, so only static identity
+// fields are populated; VRAM is left to the unified-memory backfill in
+// detectWithRunner. AMD identity (name/gfx/unified) is resolved from the PCI
+// device ID via amdPCIToInfo, shared with the Linux sysfs path.
+func parseWindowsGPUs(output string) *GPUInfo {
+	objs := decodeCIMObjects(output)
+	if len(objs) == 0 {
+		return nil
+	}
+
+	var chosen map[string]interface{}
+	var vendor string
+	count := 0
+	for _, o := range objs {
+		v := windowsGPUVendor(jsonStr(o, "PNPDeviceID"), jsonStr(o, "AdapterCompatibility"))
+		if v == "" {
+			continue // skip Microsoft Basic Display / virtual adapters
+		}
+		if chosen == nil {
+			chosen = o
+			vendor = v
+		}
+		if v == vendor {
+			count++
+		}
+	}
+	if chosen == nil {
+		return nil
+	}
+
+	gpu := &GPUInfo{
+		Vendor:        vendor,
+		Name:          strings.TrimSpace(jsonStr(chosen, "Name")),
+		DriverVersion: strings.TrimSpace(jsonStr(chosen, "DriverVersion")),
+		Count:         count,
+	}
+
+	switch vendor {
+	case "amd":
+		info := amdPCIToInfo(windowsPCIDeviceID(jsonStr(chosen, "PNPDeviceID")))
+		if info.name != "" {
+			gpu.Name = info.name
+		}
+		gpu.ComputeID = info.computeID
+		gpu.UnifiedMemory = info.unified
+		gpu.Arch = firstNonEmptyString(gfxVersionToArch(gpu.ComputeID), amdGPUToArch(gpu.Name), "unknown")
+	case "intel":
+		gpu.Arch = intelGPUToArch(gpu.Name)
+	default:
+		gpu.Arch = "unknown"
+	}
+	return gpu
+}
+
+// windowsGPUVendor maps a video controller to a vendor key, preferring the PCI
+// vendor ID embedded in PNPDeviceID and falling back to AdapterCompatibility.
+// Returns "" for non-hardware adapters (e.g. Microsoft Basic Display).
+func windowsGPUVendor(pnpDeviceID, adapterCompatibility string) string {
+	switch up := strings.ToUpper(pnpDeviceID); {
+	case strings.Contains(up, "VEN_10DE"):
+		return "nvidia"
+	case strings.Contains(up, "VEN_1002"):
+		return "amd"
+	case strings.Contains(up, "VEN_8086"):
+		return "intel"
+	}
+	switch c := strings.ToLower(adapterCompatibility); {
+	case strings.Contains(c, "nvidia"):
+		return "nvidia"
+	case strings.Contains(c, "advanced micro devices"), strings.Contains(c, "amd"):
+		return "amd"
+	case strings.Contains(c, "intel"):
+		return "intel"
+	}
+	return ""
+}
+
+// windowsPCIDeviceID extracts the 4-hex-digit PCI device ID from a PNPDeviceID
+// such as `PCI\VEN_1002&DEV_1586&SUBSYS_...` → "1586".
+func windowsPCIDeviceID(pnpDeviceID string) string {
+	up := strings.ToUpper(pnpDeviceID)
+	idx := strings.Index(up, "DEV_")
+	if idx < 0 {
+		return ""
+	}
+	rest := up[idx+len("DEV_"):]
+	end := 0
+	for end < len(rest) && isHexByte(rest[end]) {
+		end++
+	}
+	return rest[:end]
+}
+
+func isHexByte(b byte) bool {
+	return (b >= '0' && b <= '9') || (b >= 'A' && b <= 'F')
+}
diff --git a/internal/hal/cim_test.go b/internal/hal/cim_test.go
new file mode 100644
index 00000000..8455e5ab
--- /dev/null
+++ b/internal/hal/cim_test.go
@@ -0,0 +1,260 @@
+package hal
+
+import "testing"
+
+// Fixtures captured from a real AMD Ryzen AI Max+ 395 "Strix Halo" Windows 11
+// box via `Get-CimInstance ... | ConvertTo-Json -Compress`.
+
+func TestDecodeCIMObjects(t *testing.T) {
+	tests := []struct {
+		name   string
+		output string
+		want   int
+	}{
+		{"single object", `{"A":1}`, 1},
+		{"array", `[{"A":1},{"A":2}]`, 2},
+		{"leading junk then object", "\uFEFF\n{\"A\":1}", 1},
+		{"empty", "", 0},
+		{"whitespace", "  \n ", 0},
+		{"invalid json", "not json", 0},
+		{"empty array", `[]`, 0},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := len(decodeCIMObjects(tt.output)); got != tt.want {
+				t.Errorf("len = %d, want %d", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestParseCIMCPU(t *testing.T) {
+	tests := []struct {
+		name        string
+		output      string
+		wantModel   string
+		wantCores   int
+		wantThreads int
+		wantFreq    float64
+	}{
+		{
+			name:        "Strix Halo (real fixture, trailing spaces)",
+			output:      `{"Name":"AMD RYZEN AI MAX+ 395 w/ Radeon 8060S          ","NumberOfCores":16,"NumberOfLogicalProcessors":32,"MaxClockSpeed":3000}`,
+			wantModel:   "AMD RYZEN AI MAX+ 395 w/ Radeon 8060S",
+			wantCores:   16,
+			wantThreads: 32,
+			wantFreq:    3.0,
+		},
+		{
+			name:        "Intel single socket",
+			output:      `{"Name":"Intel(R) Core(TM) i9-13900K","NumberOfCores":24,"NumberOfLogicalProcessors":32,"MaxClockSpeed":3600}`,
+			wantModel:   "Intel(R) Core(TM) i9-13900K",
+			wantCores:   24,
+			wantThreads: 32,
+			wantFreq:    3.6,
+		},
+		{
+			name:        "dual socket sums cores/threads",
+			output:      `[{"Name":"Intel Xeon Gold","NumberOfCores":32,"NumberOfLogicalProcessors":64,"MaxClockSpeed":2800},{"Name":"Intel Xeon Gold","NumberOfCores":32,"NumberOfLogicalProcessors":64,"MaxClockSpeed":2800}]`,
+			wantModel:   "Intel Xeon Gold",
+			wantCores:   64,
+			wantThreads: 128,
+			wantFreq:    2.8,
+		},
+		{"empty", "", "", 0, 0, 0},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			info := CPUInfo{}
+			parseCIMCPU(tt.output, &info)
+			if info.Model != tt.wantModel {
+				t.Errorf("Model = %q, want %q", info.Model, tt.wantModel)
+			}
+			if info.Cores != tt.wantCores {
+				t.Errorf("Cores = %d, want %d", info.Cores, tt.wantCores)
+			}
+			if info.Threads != tt.wantThreads {
+				t.Errorf("Threads = %d, want %d", info.Threads, tt.wantThreads)
+			}
+			if info.FreqGHz != tt.wantFreq {
+				t.Errorf("FreqGHz = %v, want %v", info.FreqGHz, tt.wantFreq)
+			}
+		})
+	}
+}
+
+func TestParseCIMRAM(t *testing.T) {
+	tests := []struct {
+		name          string
+		output        string
+		wantTotal     int
+		wantAvailable int
+	}{
+		{
+			name:          "Strix Halo (real fixture, KiB)",
+			output:        `{"TotalVisibleMemorySize":33184580,"FreePhysicalMemory":25201448}`,
+			wantTotal:     32406,
+			wantAvailable: 24610,
+		},
+		{"empty", "", 0, 0},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			info := RAMInfo{}
+			parseCIMRAM(tt.output, &info)
+			if info.TotalMiB != tt.wantTotal {
+				t.Errorf("TotalMiB = %d, want %d", info.TotalMiB, tt.wantTotal)
+			}
+			if info.AvailableMiB != tt.wantAvailable {
+				t.Errorf("AvailableMiB = %d, want %d", info.AvailableMiB, tt.wantAvailable)
+			}
+		})
+	}
+}
+
+func TestParseCIMSwap(t *testing.T) {
+	tests := []struct {
+		name   string
+		output string
+		want   int
+	}{
+		{"two pagefiles sum (real fixture)", `[{"AllocatedBaseSize":20480},{"AllocatedBaseSize":96000}]`, 116480},
+		{"single pagefile", `{"AllocatedBaseSize":20480}`, 20480},
+		{"system managed (empty)", "", 0},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			info := RAMInfo{}
+			parseCIMSwap(tt.output, &info)
+			if info.SwapTotalMiB != tt.want {
+				t.Errorf("SwapTotalMiB = %d, want %d", info.SwapTotalMiB, tt.want)
+			}
+		})
+	}
+}
+
+func TestParseCIMCPULoad(t *testing.T) {
+	tests := []struct {
+		name   string
+		output string
+		want   float64
+	}{
+		{"idle (real fixture)", `{"LoadPercentage":0}`, 0},
+		{"single load", `{"LoadPercentage":42}`, 42},
+		{"dual socket averaged", `[{"LoadPercentage":25},{"LoadPercentage":75}]`, 50},
+		{"empty", "", 0},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := parseCIMCPULoad(tt.output); got != tt.want {
+				t.Errorf("parseCIMCPULoad() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestWindowsGPUVendor(t *testing.T) {
+	tests := []struct {
+		pnp    string
+		compat string
+		want   string
+	}{
+		{`PCI\VEN_1002&DEV_1586&SUBSYS_801D2014`, "Advanced Micro Devices, Inc.", "amd"},
+		{`PCI\VEN_10DE&DEV_2782`, "NVIDIA", "nvidia"},
+		{`PCI\VEN_8086&DEV_56A0`, "Intel Corporation", "intel"},
+		{`ROOT\BasicDisplay`, "(Standard display types)", ""},
+		{"", "Advanced Micro Devices, Inc.", "amd"},
+		{"", "NVIDIA", "nvidia"},
+		{"", "", ""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.pnp+"|"+tt.compat, func(t *testing.T) {
+			if got := windowsGPUVendor(tt.pnp, tt.compat); got != tt.want {
+				t.Errorf("windowsGPUVendor(%q,%q) = %q, want %q", tt.pnp, tt.compat, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestWindowsPCIDeviceID(t *testing.T) {
+	tests := []struct {
+		pnp  string
+		want string
+	}{
+		{`PCI\VEN_1002&DEV_1586&SUBSYS_801D2014&REV_C1\4&35FE04F8&0&0041`, "1586"},
+		{`PCI\VEN_10DE&DEV_2782&SUBSYS_...`, "2782"},
+		{`ROOT\BasicDisplay`, ""},
+		{"", ""},
+	}
+	for _, tt := range tests {
+		t.Run(tt.pnp, func(t *testing.T) {
+			if got := windowsPCIDeviceID(tt.pnp); got != tt.want {
+				t.Errorf("windowsPCIDeviceID(%q) = %q, want %q", tt.pnp, got, tt.want)
+			}
+		})
+	}
+}
+
+func TestParseWindowsGPUs(t *testing.T) {
+	t.Run("Strix Halo Radeon 8060S (real fixture)", func(t *testing.T) {
+		out := `{"Name":"AMD Radeon(TM) 8060S Graphics","DriverVersion":"32.0.31007.1017","AdapterCompatibility":"Advanced Micro Devices, Inc.","PNPDeviceID":"PCI\\VEN_1002&DEV_1586&SUBSYS_801D2014&REV_C1\\4&35FE04F8&0&0041","AdapterRAM":4293918720}`
+		gpu := parseWindowsGPUs(out)
+		if gpu == nil {
+			t.Fatal("expected non-nil GPU")
+		}
+		if gpu.Vendor != "amd" {
+			t.Errorf("Vendor = %q, want amd", gpu.Vendor)
+		}
+		if gpu.Name != "AMD Radeon 8060S Graphics" {
+			t.Errorf("Name = %q, want AMD Radeon 8060S Graphics", gpu.Name)
+		}
+		if gpu.Arch != "RDNA3.5" {
+			t.Errorf("Arch = %q, want RDNA3.5", gpu.Arch)
+		}
+		if gpu.ComputeID != "gfx1151" {
+			t.Errorf("ComputeID = %q, want gfx1151", gpu.ComputeID)
+		}
+		if gpu.DriverVersion != "32.0.31007.1017" {
+			t.Errorf("DriverVersion = %q, want 32.0.31007.1017", gpu.DriverVersion)
+		}
+		if !gpu.UnifiedMemory {
+			t.Error("UnifiedMemory = false, want true for Strix Halo APU")
+		}
+		if gpu.Count != 1 {
+			t.Errorf("Count = %d, want 1", gpu.Count)
+		}
+	})
+
+	t.Run("skips Microsoft Basic Display, picks NVIDIA", func(t *testing.T) {
+		out := `[{"Name":"Microsoft Basic Display Adapter","DriverVersion":"10.0.0","AdapterCompatibility":"(Standard display types)","PNPDeviceID":"ROOT\\BasicDisplay\\0000"},{"Name":"NVIDIA GeForce RTX 4060 Laptop GPU","DriverVersion":"32.0.15.6636","AdapterCompatibility":"NVIDIA","PNPDeviceID":"PCI\\VEN_10DE&DEV_28E0&SUBSYS_..."}]`
+		gpu := parseWindowsGPUs(out)
+		if gpu == nil {
+			t.Fatal("expected non-nil GPU")
+		}
+		if gpu.Vendor != "nvidia" {
+			t.Errorf("Vendor = %q, want nvidia", gpu.Vendor)
+		}
+		if gpu.Name != "NVIDIA GeForce RTX 4060 Laptop GPU" {
+			t.Errorf("Name = %q", gpu.Name)
+		}
+		if gpu.DriverVersion != "32.0.15.6636" {
+			t.Errorf("DriverVersion = %q", gpu.DriverVersion)
+		}
+		if gpu.Count != 1 {
+			t.Errorf("Count = %d, want 1", gpu.Count)
+		}
+	})
+
+	t.Run("no recognizable GPU returns nil", func(t *testing.T) {
+		out := `{"Name":"Microsoft Basic Display Adapter","AdapterCompatibility":"(Standard display types)","PNPDeviceID":"ROOT\\BasicDisplay\\0000"}`
+		if gpu := parseWindowsGPUs(out); gpu != nil {
+			t.Fatalf("expected nil, got %+v", gpu)
+		}
+	})
+
+	t.Run("empty output returns nil", func(t *testing.T) {
+		if gpu := parseWindowsGPUs(""); gpu != nil {
+			t.Fatalf("expected nil, got %+v", gpu)
+		}
+	})
+}
diff --git a/internal/hal/detect_windows.go b/internal/hal/detect_windows.go
index 06c65836..5a0caed4 100644
--- a/internal/hal/detect_windows.go
+++ b/internal/hal/detect_windows.go
@@ -6,10 +6,24 @@ import (
 	"context"
 	"log/slog"
 	"runtime"
-	"strconv"
-	"strings"
 )
 
+// Windows hardware detection via CIM. Win32 WMIC was removed in Windows 11 24H2+,
+// so every query shells out to PowerShell's Get-CimInstance and is parsed by the
+// build-tag-free helpers in cim.go.
+
+const (
+	cimCPUScript      = "Get-CimInstance Win32_Processor | Select-Object Name,NumberOfCores,NumberOfLogicalProcessors,MaxClockSpeed | ConvertTo-Json -Compress"
+	cimRAMScript      = "Get-CimInstance Win32_OperatingSystem | Select-Object TotalVisibleMemorySize,FreePhysicalMemory | ConvertTo-Json -Compress"
+	cimPageFileScript = "Get-CimInstance Win32_PageFileUsage | Select-Object AllocatedBaseSize | ConvertTo-Json -Compress"
+	cimCPULoadScript  = "Get-CimInstance Win32_Processor | Select-Object LoadPercentage | ConvertTo-Json -Compress"
+	cimGPUScript      = "Get-CimInstance Win32_VideoController | Select-Object Name,DriverVersion,AdapterCompatibility,PNPDeviceID,AdapterRAM | ConvertTo-Json -Compress"
+)
+
+func runCIM(ctx context.Context, runner CommandRunner, script string) ([]byte, error) {
+	return runner.Run(ctx, "powershell", "-NoProfile", "-NonInteractive", "-Command", script)
+}
+
 func detectCPU(ctx context.Context, runner CommandRunner) CPUInfo {
 	info := CPUInfo{
 		Arch:    runtime.GOARCH,
@@ -17,142 +31,41 @@ func detectCPU(ctx context.Context, runner CommandRunner) CPUInfo {
 		Threads: runtime.NumCPU(),
 	}
 
-	out, err := runner.Run(ctx, "wmic", "cpu", "get", "Name,NumberOfCores,NumberOfLogicalProcessors,MaxClockSpeed", "/format:csv")
+	out, err := runCIM(ctx, runner, cimCPUScript)
 	if err != nil {
-		slog.Warn("wmic cpu detection failed, using defaults", "error", err)
+		slog.Warn("CIM cpu detection failed, using defaults", "error", err)
 		return info
 	}
 
-	parseWMICCPU(string(out), &info)
+	parseCIMCPU(string(out), &info)
 	return info
 }
 
-func parseWMICCPU(output string, info *CPUInfo) {
-	// wmic csv output has a header line, then data lines.
-	// Format: Node,MaxClockSpeed,Name,NumberOfCores,NumberOfLogicalProcessors
-	lines := nonEmptyLines(output)
-	if len(lines) < 2 {
-		return
-	}
-
-	// Find column indices from header
-	header := splitCSV(lines[0])
-	colIdx := make(map[string]int)
-	for i, h := range header {
-		colIdx[strings.TrimSpace(h)] = i
-	}
-
-	fields := splitCSV(lines[1])
-
-	if idx, ok := colIdx["Name"]; ok && idx < len(fields) {
-		info.Model = fields[idx]
-	}
-	if idx, ok := colIdx["NumberOfCores"]; ok && idx < len(fields) {
-		if n, err := strconv.Atoi(fields[idx]); err == nil {
-			info.Cores = n
-		}
-	}
-	if idx, ok := colIdx["NumberOfLogicalProcessors"]; ok && idx < len(fields) {
-		if n, err := strconv.Atoi(fields[idx]); err == nil {
-			info.Threads = n
-		}
-	}
-	if idx, ok := colIdx["MaxClockSpeed"]; ok && idx < len(fields) {
-		if mhz, err := strconv.ParseFloat(fields[idx], 64); err == nil {
-			info.FreqGHz = mhz / 1000.0
-		}
-	}
-}
-
 func detectRAM(ctx context.Context, runner CommandRunner) RAMInfo {
 	info := RAMInfo{}
 
-	out, err := runner.Run(ctx, "wmic", "os", "get", "TotalVisibleMemorySize,FreePhysicalMemory", "/format:csv")
+	out, err := runCIM(ctx, runner, cimRAMScript)
 	if err != nil {
-		slog.Warn("wmic RAM detection failed, using defaults", "error", err)
+		slog.Warn("CIM RAM detection failed, using defaults", "error", err)
 		return info
 	}
+	parseCIMRAM(string(out), &info)
 
-	parseWMICRAM(string(out), &info)
-
-	// Detect swap (pagefile) size
-	if swapOut, err := runner.Run(ctx, "wmic", "pagefile", "get", "AllocatedBaseSize", "/format:csv"); err == nil {
-		parseWMICSwap(string(swapOut), &info)
+	// Pagefile (swap) is best-effort; system-managed hosts may report nothing.
+	if swapOut, err := runCIM(ctx, runner, cimPageFileScript); err == nil {
+		parseCIMSwap(string(swapOut), &info)
 	}
 
 	return info
 }
 
-func parseWMICSwap(output string, info *RAMInfo) {
-	lines := nonEmptyLines(output)
-	if len(lines) < 2 {
-		return
-	}
-	header := splitCSV(lines[0])
-	colIdx := make(map[string]int)
-	for i, h := range header {
-		colIdx[strings.TrimSpace(h)] = i
-	}
-	fields := splitCSV(lines[1])
-	if idx, ok := colIdx["AllocatedBaseSize"]; ok && idx < len(fields) {
-		if mb, err := strconv.Atoi(strings.TrimSpace(fields[idx])); err == nil {
-			info.SwapTotalMiB = mb
-		}
-	}
-}
-
-func parseWMICRAM(output string, info *RAMInfo) {
-	// Format: Node,FreePhysicalMemory,TotalVisibleMemorySize
-	lines := nonEmptyLines(output)
-	if len(lines) < 2 {
-		return
-	}
-
-	header := splitCSV(lines[0])
-	colIdx := make(map[string]int)
-	for i, h := range header {
-		colIdx[strings.TrimSpace(h)] = i
-	}
-
-	fields := splitCSV(lines[1])
-
-	if idx, ok := colIdx["TotalVisibleMemorySize"]; ok && idx < len(fields) {
-		if kb, err := strconv.ParseInt(fields[idx], 10, 64); err == nil {
-			info.TotalMiB = int(kb / 1024)
-		}
-	}
-	if idx, ok := colIdx["FreePhysicalMemory"]; ok && idx < len(fields) {
-		if kb, err := strconv.ParseInt(fields[idx], 10, 64); err == nil {
-			info.AvailableMiB = int(kb / 1024)
-		}
-	}
-}
-
 func collectCPUMetrics(ctx context.Context, runner CommandRunner) CPUMetrics {
-	out, err := runner.Run(ctx, "wmic", "cpu", "get", "LoadPercentage", "/format:csv")
+	out, err := runCIM(ctx, runner, cimCPULoadScript)
 	if err != nil {
-		slog.Warn("wmic CPU metrics failed, using defaults", "error", err)
+		slog.Warn("CIM CPU metrics failed, using defaults", "error", err)
 		return CPUMetrics{}
 	}
-
-	lines := nonEmptyLines(string(out))
-	if len(lines) < 2 {
-		return CPUMetrics{}
-	}
-
-	header := splitCSV(lines[0])
-	colIdx := make(map[string]int)
-	for i, h := range header {
-		colIdx[strings.TrimSpace(h)] = i
-	}
-
-	fields := splitCSV(lines[1])
-	if idx, ok := colIdx["LoadPercentage"]; ok && idx < len(fields) {
-		if pct, err := strconv.ParseFloat(fields[idx], 64); err == nil {
-			return CPUMetrics{UsagePercent: pct}
-		}
-	}
-	return CPUMetrics{}
+	return CPUMetrics{UsagePercent: parseCIMCPULoad(string(out))}
 }
 
 func collectRAMMetrics(ctx context.Context, runner CommandRunner) RAMMetrics {
@@ -167,3 +80,15 @@ func collectRAMMetrics(ctx context.Context, runner CommandRunner) RAMMetrics {
 		UsedMiB:      used,
 	}
 }
+
+// detectPlatformGPU detects GPUs via Win32_VideoController (CIM) as a Windows
+// fallback for when no vendor SMI tool (nvidia-smi/rocm-smi) is on PATH — the
+// common case on AMD APU hosts. See parseWindowsGPUs for the limitations.
+func detectPlatformGPU(ctx context.Context, runner CommandRunner) *GPUInfo {
+	out, err := runCIM(ctx, runner, cimGPUScript)
+	if err != nil {
+		slog.Debug("CIM GPU detection unavailable", "error", err)
+		return nil
+	}
+	return parseWindowsGPUs(string(out))
+}
diff --git a/internal/hal/detect_windows_test.go b/internal/hal/detect_windows_test.go
deleted file mode 100644
index 6e2ae08c..00000000
--- a/internal/hal/detect_windows_test.go
+++ /dev/null
@@ -1,109 +0,0 @@
-//go:build windows
-
-package hal
-
-import "testing"
-
-func TestParseWMICCPU(t *testing.T) {
-	tests := []struct {
-		name        string
-		output      string
-		wantModel   string
-		wantCores   int
-		wantThreads int
-		wantFreq    float64
-	}{
-		{
-			name:        "standard Intel",
-			output:      "Node,MaxClockSpeed,Name,NumberOfCores,NumberOfLogicalProcessors\nWORKSTATION,3600,Intel(R) Core(TM) i9-13900K,24,32\n",
-			wantModel:   "Intel(R) Core(TM) i9-13900K",
-			wantCores:   24,
-			wantThreads: 32,
-			wantFreq:    3.6,
-		},
-		{
-			name:        "AMD Ryzen",
-			output:      "Node,MaxClockSpeed,Name,NumberOfCores,NumberOfLogicalProcessors\nDESKTOP,4500,AMD Ryzen 9 7950X,16,32\n",
-			wantModel:   "AMD Ryzen 9 7950X",
-			wantCores:   16,
-			wantThreads: 32,
-			wantFreq:    4.5,
-		},
-		{
-			name:        "empty output",
-			output:      "",
-			wantModel:   "",
-			wantCores:   0,
-			wantThreads: 0,
-			wantFreq:    0,
-		},
-		{
-			name:        "header only",
-			output:      "Node,MaxClockSpeed,Name,NumberOfCores,NumberOfLogicalProcessors\n",
-			wantModel:   "",
-			wantCores:   0,
-			wantThreads: 0,
-			wantFreq:    0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			info := CPUInfo{}
-			parseWMICCPU(tt.output, &info)
-			if info.Model != tt.wantModel {
-				t.Errorf("Model = %q, want %q", info.Model, tt.wantModel)
-			}
-			if info.Cores != tt.wantCores {
-				t.Errorf("Cores = %d, want %d", info.Cores, tt.wantCores)
-			}
-			if info.Threads != tt.wantThreads {
-				t.Errorf("Threads = %d, want %d", info.Threads, tt.wantThreads)
-			}
-			if info.FreqGHz != tt.wantFreq {
-				t.Errorf("FreqGHz = %f, want %f", info.FreqGHz, tt.wantFreq)
-			}
-		})
-	}
-}
-
-func TestParseWMICRAM(t *testing.T) {
-	tests := []struct {
-		name          string
-		output        string
-		wantTotal     int
-		wantAvailable int
-	}{
-		{
-			name:          "32GB system",
-			output:        "Node,FreePhysicalMemory,TotalVisibleMemorySize\nWORKSTATION,16777216,33554432\n",
-			wantTotal:     32768,
-			wantAvailable: 16384,
-		},
-		{
-			name:          "16GB system low memory",
-			output:        "Node,FreePhysicalMemory,TotalVisibleMemorySize\nLAPTOP,2097152,16777216\n",
-			wantTotal:     16384,
-			wantAvailable: 2048,
-		},
-		{
-			name:          "empty output",
-			output:        "",
-			wantTotal:     0,
-			wantAvailable: 0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			info := RAMInfo{}
-			parseWMICRAM(tt.output, &info)
-			if info.TotalMiB != tt.wantTotal {
-				t.Errorf("TotalMiB = %d, want %d", info.TotalMiB, tt.wantTotal)
-			}
-			if info.AvailableMiB != tt.wantAvailable {
-				t.Errorf("AvailableMiB = %d, want %d", info.AvailableMiB, tt.wantAvailable)
-			}
-		})
-	}
-}
diff --git a/internal/hal/gpu.go b/internal/hal/gpu.go
index 0b3a484f..37f72d2d 100644
--- a/internal/hal/gpu.go
+++ b/internal/hal/gpu.go
@@ -128,6 +128,11 @@ func detectGPU(ctx context.Context, runner CommandRunner) *GPUInfo {
 			return gpu
 		}
 	}
+	// Platform fallback (Windows CIM) for hosts without a vendor SMI tool on PATH.
+	if gpu := detectPlatformGPU(ctx, runner); gpu != nil {
+		enrichGPU(ctx, runner, gpu)
+		return gpu
+	}
 	if gpu := detectAMDDRM(ctx, runner); gpu != nil {
 		enrichGPU(ctx, runner, gpu)
 		return gpu
diff --git a/internal/hal/gpu_nonwindows.go b/internal/hal/gpu_nonwindows.go
new file mode 100644
index 00000000..d76d29b8
--- /dev/null
+++ b/internal/hal/gpu_nonwindows.go
@@ -0,0 +1,12 @@
+//go:build !windows
+
+package hal
+
+import "context"
+
+// detectPlatformGPU is a no-op on non-Windows hosts. Linux AMD detection is
+// handled by the SMI probe chain and detectAMDDRM (sysfs); the Windows CIM
+// fallback lives in detect_windows.go.
+func detectPlatformGPU(_ context.Context, _ CommandRunner) *GPUInfo {
+	return nil
+}
diff --git a/internal/hal/testhelper_windows_test.go b/internal/hal/testhelper_windows_test.go
index d1da505b..8572fc4b 100644
--- a/internal/hal/testhelper_windows_test.go
+++ b/internal/hal/testhelper_windows_test.go
@@ -2,13 +2,14 @@
 
 package hal
 
+// Mock outputs keyed by the exact CIM PowerShell commands detect_windows.go runs.
 func platformMockOutputs() map[string]mockResult {
 	return map[string]mockResult{
-		"wmic cpu get Name,NumberOfCores,NumberOfLogicalProcessors,MaxClockSpeed /format:csv": {
-			output: []byte("Node,MaxClockSpeed,Name,NumberOfCores,NumberOfLogicalProcessors\nWORKSTATION,3600,Intel(R) Core(TM) i9-13900K,24,32\n"),
+		"powershell -NoProfile -NonInteractive -Command " + cimCPUScript: {
+			output: []byte(`{"Name":"Intel(R) Core(TM) i9-13900K","NumberOfCores":24,"NumberOfLogicalProcessors":32,"MaxClockSpeed":3600}`),
 		},
-		"wmic os get TotalVisibleMemorySize,FreePhysicalMemory /format:csv": {
-			output: []byte("Node,FreePhysicalMemory,TotalVisibleMemorySize\nWORKSTATION,16777216,33554432\n"),
+		"powershell -NoProfile -NonInteractive -Command " + cimRAMScript: {
+			output: []byte(`{"TotalVisibleMemorySize":33554432,"FreePhysicalMemory":16777216}`),
 		},
 	}
 }

From c94494b69e24dc2ffd4fe74eee737c718263fad7 Mon Sep 17 00:00:00 2001
From: Codex <codex@local>
Date: Tue, 9 Jun 2026 15:47:51 +0800
Subject: [PATCH 2/3] fix(hal/windows): report true installed memory for
 unified-memory APUs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Strix Halo and similar unified-memory APUs, Windows exposes only a fraction
of physical memory to the OS (e.g. 32 GiB of 128 GiB) — the rest is carved out
for the iGPU. TotalVisibleMemorySize therefore undersold a 128 GiB box as 32 GiB,
which also flowed into the unified-VRAM backfill and the onboarding "统一内存" card.

Query Win32_PhysicalMemory (sum of DIMM capacity) and use it as RAM.TotalMiB when
it exceeds the OS-visible total; recompute AvailableMiB as total - OS-used so it
stays correct for both unified and conventional hosts.

Tests (cim_test.go, build-tag-free): parse the Measure-Object Sum JSON; override
to 128 GiB on a Strix Halo fixture; no-shrink / no-op on conventional hosts.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 internal/hal/cim.go            | 34 +++++++++++++++++++++++++++++++
 internal/hal/cim_test.go       | 37 ++++++++++++++++++++++++++++++++++
 internal/hal/detect_windows.go |  7 +++++++
 3 files changed, 78 insertions(+)

diff --git a/internal/hal/cim.go b/internal/hal/cim.go
index ef07be61..1f80878e 100644
--- a/internal/hal/cim.go
+++ b/internal/hal/cim.go
@@ -79,6 +79,40 @@ func parseCIMRAM(output string, info *RAMInfo) {
 	}
 }
 
+// parseCIMInstalledMemoryBytes reads the summed DIMM capacity (bytes) from
+// `Win32_PhysicalMemory | Measure-Object Capacity -Sum` JSON ({"Sum":N}).
+func parseCIMInstalledMemoryBytes(output string) int64 {
+	objs := decodeCIMObjects(output)
+	if len(objs) == 0 {
+		return 0
+	}
+	return jsonInt(objs[0], "Sum")
+}
+
+// applyInstalledMemoryTotal overrides RAMInfo.TotalMiB with the true installed
+// memory (sum of DIMM capacity) when it exceeds the OS-visible total. On
+// unified-memory APUs (e.g. Strix Halo) the OS only sees a fraction with the
+// rest carved out for the iGPU, so the installed total is the meaningful figure.
+// AvailableMiB is recomputed as total minus OS-used so it stays correct on both
+// unified and conventional hosts.
+func applyInstalledMemoryTotal(info *RAMInfo, installedBytes int64) {
+	if installedBytes <= 0 {
+		return
+	}
+	total := int(installedBytes / (1024 * 1024))
+	if total <= info.TotalMiB {
+		return
+	}
+	osUsed := info.TotalMiB - info.AvailableMiB
+	if osUsed < 0 {
+		osUsed = 0
+	}
+	info.TotalMiB = total
+	if avail := total - osUsed; avail >= 0 {
+		info.AvailableMiB = avail
+	}
+}
+
 // parseCIMSwap sums AllocatedBaseSize (MiB) across all Win32_PageFileUsage rows.
 func parseCIMSwap(output string, info *RAMInfo) {
 	total := 0
diff --git a/internal/hal/cim_test.go b/internal/hal/cim_test.go
index 8455e5ab..784d8760 100644
--- a/internal/hal/cim_test.go
+++ b/internal/hal/cim_test.go
@@ -258,3 +258,40 @@ func TestParseWindowsGPUs(t *testing.T) {
 		}
 	})
 }
+
+func TestParseCIMInstalledMemoryBytes(t *testing.T) {
+	// Get-CimInstance Win32_PhysicalMemory | Measure-Object Capacity -Sum
+	if got := parseCIMInstalledMemoryBytes(`{"Sum":137438953472}`); got != 137438953472 {
+		t.Errorf("got %d, want 137438953472 (128 GiB)", got)
+	}
+	if got := parseCIMInstalledMemoryBytes(``); got != 0 {
+		t.Errorf("empty output -> %d, want 0", got)
+	}
+}
+
+func TestApplyInstalledMemoryTotal(t *testing.T) {
+	// Strix Halo: OS sees ~32 GiB but 128 GiB is installed (rest carved out for
+	// the iGPU). Report the full installed total; recompute available as
+	// total - OS-used so it stays meaningful.
+	info := RAMInfo{TotalMiB: 32406, AvailableMiB: 20198} // OS-used = 12208
+	applyInstalledMemoryTotal(&info, 137438953472)
+	if info.TotalMiB != 131072 {
+		t.Errorf("TotalMiB = %d, want 131072 (128 GiB)", info.TotalMiB)
+	}
+	if want := 131072 - (32406 - 20198); info.AvailableMiB != want {
+		t.Errorf("AvailableMiB = %d, want %d", info.AvailableMiB, want)
+	}
+}
+
+func TestApplyInstalledMemoryTotalNoShrink(t *testing.T) {
+	// Normal box: installed nameplate not larger than OS-visible -> no change.
+	info := RAMInfo{TotalMiB: 32000, AvailableMiB: 16000}
+	applyInstalledMemoryTotal(&info, int64(31000)*1024*1024)
+	if info.TotalMiB != 32000 || info.AvailableMiB != 16000 {
+		t.Errorf("unexpected change: %+v", info)
+	}
+	applyInstalledMemoryTotal(&info, 0) // no data -> no change
+	if info.TotalMiB != 32000 {
+		t.Errorf("zero installed changed total: %+v", info)
+	}
+}
diff --git a/internal/hal/detect_windows.go b/internal/hal/detect_windows.go
index 5a0caed4..46d6e971 100644
--- a/internal/hal/detect_windows.go
+++ b/internal/hal/detect_windows.go
@@ -15,6 +15,7 @@ import (
 const (
 	cimCPUScript      = "Get-CimInstance Win32_Processor | Select-Object Name,NumberOfCores,NumberOfLogicalProcessors,MaxClockSpeed | ConvertTo-Json -Compress"
 	cimRAMScript      = "Get-CimInstance Win32_OperatingSystem | Select-Object TotalVisibleMemorySize,FreePhysicalMemory | ConvertTo-Json -Compress"
+	cimPhysMemScript  = "Get-CimInstance Win32_PhysicalMemory | Measure-Object -Property Capacity -Sum | Select-Object Sum | ConvertTo-Json -Compress"
 	cimPageFileScript = "Get-CimInstance Win32_PageFileUsage | Select-Object AllocatedBaseSize | ConvertTo-Json -Compress"
 	cimCPULoadScript  = "Get-CimInstance Win32_Processor | Select-Object LoadPercentage | ConvertTo-Json -Compress"
 	cimGPUScript      = "Get-CimInstance Win32_VideoController | Select-Object Name,DriverVersion,AdapterCompatibility,PNPDeviceID,AdapterRAM | ConvertTo-Json -Compress"
@@ -51,6 +52,12 @@ func detectRAM(ctx context.Context, runner CommandRunner) RAMInfo {
 	}
 	parseCIMRAM(string(out), &info)
 
+	// Report true installed memory: unified-memory APUs (Strix Halo) expose only
+	// a fraction to the OS, carving the rest out for the iGPU.
+	if physOut, err := runCIM(ctx, runner, cimPhysMemScript); err == nil {
+		applyInstalledMemoryTotal(&info, parseCIMInstalledMemoryBytes(string(physOut)))
+	}
+
 	// Pagefile (swap) is best-effort; system-managed hosts may report nothing.
 	if swapOut, err := runCIM(ctx, runner, cimPageFileScript); err == nil {
 		parseCIMSwap(string(swapOut), &info)

From 5882177d66e5e7830210002a116c2ca70a96dc67 Mon Sep 17 00:00:00 2001
From: Codex <codex@local>
Date: Tue, 9 Jun 2026 17:09:20 +0800
Subject: [PATCH 3/3] fix(hal/windows): use real AMD APU usable VRAM (ROCm) for
 deploy-fit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Strix Halo, Win32 AdapterRAM saturates at 4 GiB and there is no rocm-smi, so
the unified-memory backfill set GPU.VRAMMiB = installed RAM (128 GiB). But the OS
carves that 128 GiB pool — only ~110 GiB is GPU-addressable (dedicated VRAM +
GTT) — so deploy-fit over-stated usable VRAM and could accept a model the iGPU
cannot hold.

When the AMD iGPU's VRAM is unknown, query the ROCm-capable llama.cpp engine's
own `--list-devices` (preferring AIMA_ENGINE_DIR, else PATH) and use its reported
device memory (e.g. "ROCm0: ... (110456 MiB, ...)") as GPU.VRAMMiB. Installed RAM
— and thus the normalized "unified memory" the UI shows — is unchanged; only the
fit-relevant usable VRAM is corrected.

Tests (cim_test.go, build-tag-free): parseLlamaROCmVRAMMiB extracts the device
total and ignores non-device output.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 internal/hal/cim.go            | 26 ++++++++++++++++++
 internal/hal/cim_test.go       | 13 +++++++++
 internal/hal/detect_windows.go | 50 +++++++++++++++++++++++++++++++++-
 3 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/internal/hal/cim.go b/internal/hal/cim.go
index 1f80878e..415c675b 100644
--- a/internal/hal/cim.go
+++ b/internal/hal/cim.go
@@ -2,9 +2,35 @@ package hal
 
 import (
 	"encoding/json"
+	"regexp"
+	"strconv"
 	"strings"
 )
 
+// llamaDeviceVRAMRe captures the total VRAM (MiB) from a llama.cpp
+// `--list-devices` line such as
+// "  ROCm0: AMD Radeon(TM) 8060S Graphics (110456 MiB, 110301 MiB free)".
+var llamaDeviceVRAMRe = regexp.MustCompile(`\(([0-9]+)\s*MiB`)
+
+// parseLlamaROCmVRAMMiB extracts the iGPU's total usable VRAM (MiB) as reported
+// by the inference engine itself. On AMD APUs (Strix Halo) Win32 AdapterRAM
+// saturates at 4 GiB and there is no rocm-smi, so the ROCm-capable llama.cpp's
+// own device enumeration is the authoritative source of the GPU-addressable
+// pool (dedicated VRAM + GTT). Returns 0 when no device line is present.
+func parseLlamaROCmVRAMMiB(output string) int {
+	for _, line := range strings.Split(output, "\n") {
+		if !strings.Contains(line, "MiB") {
+			continue
+		}
+		if m := llamaDeviceVRAMRe.FindStringSubmatch(line); m != nil {
+			if n, err := strconv.Atoi(m[1]); err == nil && n > 0 {
+				return n
+			}
+		}
+	}
+	return 0
+}
+
 // CIM (Common Information Model) parsing for Windows hardware detection.
 //
 // Modern Windows (11 24H2+) removes the legacy `wmic` CLI, so detection shells
diff --git a/internal/hal/cim_test.go b/internal/hal/cim_test.go
index 784d8760..e4798050 100644
--- a/internal/hal/cim_test.go
+++ b/internal/hal/cim_test.go
@@ -259,6 +259,19 @@ func TestParseWindowsGPUs(t *testing.T) {
 	})
 }
 
+func TestParseLlamaROCmVRAMMiB(t *testing.T) {
+	out := "Available devices:\n  ROCm0: AMD Radeon(TM) 8060S Graphics (110456 MiB, 110301 MiB free)\n"
+	if got := parseLlamaROCmVRAMMiB(out); got != 110456 {
+		t.Errorf("got %d, want 110456", got)
+	}
+	if got := parseLlamaROCmVRAMMiB(""); got != 0 {
+		t.Errorf("empty -> %d, want 0", got)
+	}
+	if got := parseLlamaROCmVRAMMiB("no rocm devices available"); got != 0 {
+		t.Errorf("no match -> %d, want 0", got)
+	}
+}
+
 func TestParseCIMInstalledMemoryBytes(t *testing.T) {
 	// Get-CimInstance Win32_PhysicalMemory | Measure-Object Capacity -Sum
 	if got := parseCIMInstalledMemoryBytes(`{"Sum":137438953472}`); got != 137438953472 {
diff --git a/internal/hal/detect_windows.go b/internal/hal/detect_windows.go
index 46d6e971..7d2f01f6 100644
--- a/internal/hal/detect_windows.go
+++ b/internal/hal/detect_windows.go
@@ -5,7 +5,11 @@ package hal
 import (
 	"context"
 	"log/slog"
+	"os"
+	"os/exec"
+	"path/filepath"
 	"runtime"
+	"strings"
 )
 
 // Windows hardware detection via CIM. Win32 WMIC was removed in Windows 11 24H2+,
@@ -97,5 +101,49 @@ func detectPlatformGPU(ctx context.Context, runner CommandRunner) *GPUInfo {
 		slog.Debug("CIM GPU detection unavailable", "error", err)
 		return nil
 	}
-	return parseWindowsGPUs(string(out))
+	gpu := parseWindowsGPUs(string(out))
+	// AMD APU VRAM: Win32 AdapterRAM saturates at 4 GiB and there is no rocm-smi,
+	// so read the real GPU-addressable pool (dedicated VRAM + GTT) from the
+	// ROCm-capable llama.cpp engine when available. Without it, the unified-memory
+	// backfill (= installed RAM) applies downstream — which over-states the
+	// usable VRAM on hosts where the OS carves the pool (e.g. Strix Halo).
+	if gpu != nil && gpu.Vendor == "amd" && gpu.UnifiedMemory && gpu.VRAMMiB == 0 {
+		if mib := amdAPUVRAMMiBFromEngine(ctx, runner); mib > 0 {
+			gpu.VRAMMiB = mib
+		}
+	}
+	return gpu
+}
+
+// amdAPUVRAMMiBFromEngine asks a ROCm-capable llama.cpp binary to enumerate its
+// devices and returns the iGPU's total VRAM (MiB), or 0 if no engine is found.
+func amdAPUVRAMMiBFromEngine(ctx context.Context, runner CommandRunner) int {
+	llama := findROCmLlamaBinary()
+	if llama == "" {
+		return 0
+	}
+	// llama.cpp prints the device list to stderr; fold it into stdout (the only
+	// stream execRunner captures) via PowerShell redirection.
+	out, _ := runner.Run(ctx, "powershell", "-NoProfile", "-NonInteractive", "-Command",
+		"& '"+llama+"' --list-devices 2>&1")
+	return parseLlamaROCmVRAMMiB(string(out))
+}
+
+// findROCmLlamaBinary locates a llama.cpp CLI, preferring AIMA's configured
+// engine directory (AIMA_ENGINE_DIR) and falling back to PATH.
+func findROCmLlamaBinary() string {
+	if dir := strings.TrimSpace(os.Getenv("AIMA_ENGINE_DIR")); dir != "" {
+		for _, name := range []string{"llama-cli.exe", "llama-server.exe"} {
+			p := filepath.Join(dir, name)
+			if st, err := os.Stat(p); err == nil && !st.IsDir() {
+				return p
+			}
+		}
+	}
+	for _, name := range []string{"llama-cli.exe", "llama-cli"} {
+		if p, err := exec.LookPath(name); err == nil {
+			return p
+		}
+	}
+	return ""
 }