From 27f368175507a2dbd7b91f0af213b09480e0926b Mon Sep 17 00:00:00 2001 From: rjckkkkk <59609580+rjckkkkk@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:13:16 +0000 Subject: [PATCH] Add AMD ROCm/HIP Windows llama.cpp engine for auto-download on Strix Halo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a no-NVIDIA AMD Windows box (Ryzen AI Max+ 395 / Radeon 8060S, RDNA3.5), the only catalog llama.cpp source for windows/amd64 was the CUDA build, which runs CPU-only there; the AMD engines (llamacpp-vulkan / llamacpp-rocm-rdna3) are linux-only. So out-of-the-box auto-download fetched the wrong (CUDA) build and users had to manually install a ROCm/HIP llama.cpp and point AIMA_ENGINE_DIR at it. Add a native engine asset `llamacpp-hip-windows` (gpu_arch RDNA3.5, windows/amd64, source = official llama.cpp `win-hip-radeon-x64.zip`, version b9330). The native engine resolver already skips engines whose source has no binary for the host platform, so on RDNA3.5+Windows this exact-arch, windows-supported asset is selected over the linux-only vulkan asset and the NVIDIA-only universal CUDA source — making `aima deploy ` auto-download the right ROCm/HIP build with no AIMA_ENGINE_DIR. Knowledge-only, no Go changes (INV-1). Co-Authored-By: Claude Opus 4.8 (1M context) --- catalog/engines/llamacpp-hip-windows.yaml | 42 +++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 catalog/engines/llamacpp-hip-windows.yaml diff --git a/catalog/engines/llamacpp-hip-windows.yaml b/catalog/engines/llamacpp-hip-windows.yaml new file mode 100644 index 0000000..6e32252 --- /dev/null +++ b/catalog/engines/llamacpp-hip-windows.yaml @@ -0,0 +1,42 @@ +kind: engine_asset +_profile: llamacpp +metadata: + name: llamacpp-hip-windows + type: llamacpp + version: "b9330" + supported_model_types: [llm, vlm, embedding] +hardware: + # RDNA3.5 == AMD Ryzen AI Max+ 395 "Strix Halo" Radeon 8060S iGPU (gfx1151). + # Exact gpu_arch match so the native-runtime engine resolver — which on a + # native host skips engines whose source has no binary for the host platform + # (resolver.go: "Skip native-only incompatibility") — selects this Windows + # HIP build instead of the linux-only llamacpp-vulkan or the NVIDIA-only + # win-cuda llamacpp-universal source. This is what makes auto-download pick + # the right (ROCm/HIP) llama.cpp on a no-NVIDIA AMD box, with no AIMA_ENGINE_DIR. + gpu_arch: RDNA3.5 + vram_min_mib: 0 +amplifier: + features: + - rocm_hip_offload + performance_gain: "AMD ROCm/HIP GPU offload on Strix Halo (RDNA3.5), all layers on the iGPU" +time_constraints: + cold_start_s: [5, 30] + model_switch_s: [5, 30] +power_constraints: + typical_draw_watts: [20, 90] +runtime: + default: "native" + platform_recommendations: + windows/amd64: "native" +source: + binary: "llama-server" + # Native binary only — official llama.cpp Windows ROCm/HIP release. Self-contained + # (bundles ggml-hip.dll + rocblas), so no separate ROCm install is required. + platforms: [windows/amd64] + url_template: "https://github.com/ggml-org/llama.cpp/releases/download/{version}/llama-{version}-bin-{platform_file}" + platform_files: + windows/amd64: "win-hip-radeon-x64.zip" + mirror_templates: + - "https://ghfast.top/{url}" + - "https://cf.ghproxy.cc/{url}" + - "https://gh-proxy.com/{url}"