From 51b3b9566bef31c1783aba932d91566de33400f4 Mon Sep 17 00:00:00 2001 From: James Sandri <7078671+jlsandri@users.noreply.github.com> Date: Mon, 6 Apr 2026 18:45:53 +1000 Subject: [PATCH] Codegen: PS2-accurate division-by-zero semantics for COP1 and VU0 FP, plus RSQRT operand fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five related floating-point codegen corrections for EE COP1 and VU0: 1. COP1_S_RSQRT was emitting `fd = 1.0f / sqrtf(fs)`, which is the formula for reciprocal-sqrt of fs. The actual PS2 rsqrt.s operation is `fd = fs / sqrt(ft)` — the old codegen ignored ft entirely and used the wrong source operand. This operand fix is a prerequisite for the div-by-zero handling below. 2. COP1_S_DIV on the PS2 EE does not produce IEEE infinity on divide by zero. It returns 0x7F7FFFFF (max finite float) with the sign bit set to (sign(fs) XOR sign(ft)). Replace the prior `copysignf(INFINITY, ...)` path with the correct max-float form. 3. COP1_S_RSQRT div-by-zero: returns 0x7F7FFFFF with sign preserved from fs. Also guards the sqrt input with std::max(0.0f, ft) to avoid producing NaN for negative operands (matches EE behaviour of ignoring the sign bit when reading the radicand). 4. VU0 vdiv div-by-zero: same 0x7F7FFFFF + sign semantics, writing to vu0_q. 5. VU0 vrsqrt div-by-zero: same 0x7F7FFFFF semantics (unsigned). All five use std::bit_cast(uint32_t) for the bit-level construction. Commented in-line so the magic constant 0x7F7FFFFF is self-documenting. --- ps2xRecomp/src/lib/code_generator.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ps2xRecomp/src/lib/code_generator.cpp b/ps2xRecomp/src/lib/code_generator.cpp index 666a59e7..bd74a1c6 100644 --- a/ps2xRecomp/src/lib/code_generator.cpp +++ b/ps2xRecomp/src/lib/code_generator.cpp @@ -1640,10 +1640,12 @@ namespace ps2recomp case COP1_S_MUL: return fmt::format("ctx->f[{}] = FPU_MUL_S(ctx->f[{}], ctx->f[{}]);", fd, fs, ft); case COP1_S_DIV: + // PS2 EE div.s: division by zero returns max float (0x7F7FFFFF) with sign preserved, not IEEE infinity return fmt::format("if (ctx->f[{}] == 0.0f) {{ ctx->fcr31 |= 0x100000; /* DZ flag */ " - "ctx->f[{}] = copysignf(INFINITY, ctx->f[{}] * 0.0f); }} " + "uint32_t __sign = (std::bit_cast(ctx->f[{}]) ^ std::bit_cast(ctx->f[{}])) & 0x80000000u; " + "uint32_t __maxf = 0x7F7FFFFFu | __sign; ctx->f[{}] = std::bit_cast(__maxf); }} " "else ctx->f[{}] = ctx->f[{}] / ctx->f[{}];", - ft, fd, fs, fd, fs, ft); + ft, fs, ft, fd, fd, fs, ft); case COP1_S_SQRT: return fmt::format("ctx->f[{}] = FPU_SQRT_S(ctx->f[{}]);", fd, fs); case COP1_S_ABS: @@ -1663,7 +1665,8 @@ namespace ps2recomp case COP1_S_CVT_W: return fmt::format("{{ int32_t tmp = FPU_CVT_W_S(ctx->f[{}]); std::memcpy(&ctx->f[{}], &tmp, sizeof(tmp)); }}", fs, fd); case COP1_S_RSQRT: - return fmt::format("ctx->f[{}] = 1.0f / sqrtf(ctx->f[{}]);", fd, fs); + // PS2 EE rsqrt.s: fd = fs / sqrt(ft). Division by zero returns max float (0x7F7FFFFF) with sign preserved + return fmt::format("{{ float __sq = sqrtf(std::max(0.0f, ctx->f[{}])); if (__sq != 0.0f) {{ ctx->f[{}] = ctx->f[{}] / __sq; }} else {{ uint32_t __sign = std::bit_cast(ctx->f[{}]) & 0x80000000u; uint32_t __maxf = 0x7F7FFFFFu | __sign; ctx->f[{}] = std::bit_cast(__maxf); }} }}", ft, fd, fs, fs, fd); case COP1_S_ADDA: return fmt::format("ctx->f[31] = FPU_ADD_S(ctx->f[{}], ctx->f[{}]);", fs, ft); case COP1_S_SUBA: @@ -2881,7 +2884,8 @@ namespace ps2recomp uint8_t fs_reg = inst.rd; uint8_t ft_reg = inst.rt; - return fmt::format("{{ float fs = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); float ft = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); ctx->vu0_q = (ft != 0.0f) ? (fs / ft) : 0.0f; }}", fs_reg, fs_reg, fsf, ft_reg, ft_reg, ftf); + // PS2 VU0 vdiv: division by zero returns max float (0x7F7FFFFF) with sign preserved + return fmt::format("{{ float fs = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); float ft = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); if (ft != 0.0f) {{ ctx->vu0_q = fs / ft; }} else {{ uint32_t sign = (std::bit_cast(fs) ^ std::bit_cast(ft)) & 0x80000000u; uint32_t maxf = 0x7F7FFFFFu | sign; ctx->vu0_q = std::bit_cast(maxf); }} }}", fs_reg, fs_reg, fsf, ft_reg, ft_reg, ftf); } std::string CodeGenerator::translateVU_VSQRT(const Instruction &inst) @@ -2895,7 +2899,8 @@ namespace ps2recomp { uint8_t ftf = inst.vectorInfo.ftf; uint8_t ft_reg = inst.rt; - return fmt::format("{{ float ft = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); ctx->vu0_q = (ft > 0.0f) ? (1.0f / sqrtf(ft)) : 0.0f; }}", ft_reg, ft_reg, ftf); + // PS2 VU0 vrsqrt: division by zero returns max float (0x7F7FFFFF) + return fmt::format("{{ float ft = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); if (ft > 0.0f) {{ ctx->vu0_q = 1.0f / sqrtf(ft); }} else {{ uint32_t maxf = 0x7F7FFFFFu; ctx->vu0_q = std::bit_cast(maxf); }} }}", ft_reg, ft_reg, ftf); } std::string CodeGenerator::translateVU_VMTIR(const Instruction &inst)