From 99841395adea5550026421f959a19285215eb0c3 Mon Sep 17 00:00:00 2001 From: yadaish Date: Mon, 8 Jun 2026 11:18:33 +0800 Subject: [PATCH] [hotfix] always use fp4x2 for swiglu separated per_1x32 path Drop the M < _SWIGLU_MXFP4_BF16_BOUND threshold that fell back to bf16 for small M in the Swiglu + SEPARATED gate path; use fp4x2 unconditionally. Co-Authored-By: Claude Opus 4.8 (1M context) --- aiter/fused_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiter/fused_moe.py b/aiter/fused_moe.py index 40314a768e..5af15ad9c7 100644 --- a/aiter/fused_moe.py +++ b/aiter/fused_moe.py @@ -402,7 +402,7 @@ def fused_moe_( q_dtype_a = dtypes.bf16 elif quant_type == QuantType.per_1x32: if activation == ActivationType.Swiglu and gate_mode == GateMode.SEPARATED: - q_dtype_a = dtypes.bf16 if M < _SWIGLU_MXFP4_BF16_BOUND else dtypes.fp4x2 + q_dtype_a = dtypes.fp4x2 elif activation == ActivationType.Swiglu or gate_mode == GateMode.INTERLEAVE: if get_gfx() != "gfx950" or M < bf16_fp8_bound: q_dtype_a = dtypes.bf16