From 51b3b9566bef31c1783aba932d91566de33400f4 Mon Sep 17 00:00:00 2001
From: James Sandri <7078671+jlsandri@users.noreply.github.com>
Date: Mon, 6 Apr 2026 18:45:53 +1000
Subject: [PATCH] Codegen: PS2-accurate division-by-zero semantics for COP1 and
 VU0 FP, plus RSQRT operand fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Five related floating-point codegen corrections for EE COP1 and VU0:

1. COP1_S_RSQRT was emitting `fd = 1.0f / sqrtf(fs)`, which is the
   formula for reciprocal-sqrt of fs. The actual PS2 rsqrt.s operation
   is `fd = fs / sqrt(ft)` — the old codegen ignored ft entirely and
   used the wrong source operand. This operand fix is a prerequisite
   for the div-by-zero handling below.

2. COP1_S_DIV on the PS2 EE does not produce IEEE infinity on divide
   by zero. It returns 0x7F7FFFFF (max finite float) with the sign
   bit set to (sign(fs) XOR sign(ft)). Replace the prior
   `copysignf(INFINITY, ...)` path with the correct max-float form.

3. COP1_S_RSQRT div-by-zero: returns 0x7F7FFFFF with sign preserved
   from fs. Also guards the sqrt input with std::max(0.0f, ft) to
   avoid producing NaN for negative operands (matches EE behaviour
   of ignoring the sign bit when reading the radicand).

4. VU0 vdiv div-by-zero: same 0x7F7FFFFF + sign semantics, writing
   to vu0_q.

5. VU0 vrsqrt div-by-zero: same 0x7F7FFFFF semantics (unsigned).

All five use std::bit_cast<float>(uint32_t) for the bit-level
construction. Commented in-line so the magic constant 0x7F7FFFFF
is self-documenting.
---
 ps2xRecomp/src/lib/code_generator.cpp | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/ps2xRecomp/src/lib/code_generator.cpp b/ps2xRecomp/src/lib/code_generator.cpp
index 666a59e7..bd74a1c6 100644
--- a/ps2xRecomp/src/lib/code_generator.cpp
+++ b/ps2xRecomp/src/lib/code_generator.cpp
@@ -1640,10 +1640,12 @@ namespace ps2recomp
             case COP1_S_MUL:
                 return fmt::format("ctx->f[{}] = FPU_MUL_S(ctx->f[{}], ctx->f[{}]);", fd, fs, ft);
             case COP1_S_DIV:
+                // PS2 EE div.s: division by zero returns max float (0x7F7FFFFF) with sign preserved, not IEEE infinity
                 return fmt::format("if (ctx->f[{}] == 0.0f) {{ ctx->fcr31 |= 0x100000; /* DZ flag */ "
-                                   "ctx->f[{}] = copysignf(INFINITY, ctx->f[{}] * 0.0f); }} "
+                                   "uint32_t __sign = (std::bit_cast<uint32_t>(ctx->f[{}]) ^ std::bit_cast<uint32_t>(ctx->f[{}])) & 0x80000000u; "
+                                   "uint32_t __maxf = 0x7F7FFFFFu | __sign; ctx->f[{}] = std::bit_cast<float>(__maxf); }} "
                                    "else ctx->f[{}] = ctx->f[{}] / ctx->f[{}];",
-                                   ft, fd, fs, fd, fs, ft);
+                                   ft, fs, ft, fd, fd, fs, ft);
             case COP1_S_SQRT:
                 return fmt::format("ctx->f[{}] = FPU_SQRT_S(ctx->f[{}]);", fd, fs);
             case COP1_S_ABS:
@@ -1663,7 +1665,8 @@ namespace ps2recomp
             case COP1_S_CVT_W:
                 return fmt::format("{{ int32_t tmp = FPU_CVT_W_S(ctx->f[{}]); std::memcpy(&ctx->f[{}], &tmp, sizeof(tmp)); }}", fs, fd);
             case COP1_S_RSQRT:
-                return fmt::format("ctx->f[{}] = 1.0f / sqrtf(ctx->f[{}]);", fd, fs);
+                // PS2 EE rsqrt.s: fd = fs / sqrt(ft). Division by zero returns max float (0x7F7FFFFF) with sign preserved
+                return fmt::format("{{ float __sq = sqrtf(std::max(0.0f, ctx->f[{}])); if (__sq != 0.0f) {{ ctx->f[{}] = ctx->f[{}] / __sq; }} else {{ uint32_t __sign = std::bit_cast<uint32_t>(ctx->f[{}]) & 0x80000000u; uint32_t __maxf = 0x7F7FFFFFu | __sign; ctx->f[{}] = std::bit_cast<float>(__maxf); }} }}", ft, fd, fs, fs, fd);
             case COP1_S_ADDA:
                 return fmt::format("ctx->f[31] = FPU_ADD_S(ctx->f[{}], ctx->f[{}]);", fs, ft);
             case COP1_S_SUBA:
@@ -2881,7 +2884,8 @@ namespace ps2recomp
         uint8_t fs_reg = inst.rd;
         uint8_t ft_reg = inst.rt;
 
-        return fmt::format("{{ float fs = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); float ft = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); ctx->vu0_q = (ft != 0.0f) ? (fs / ft) : 0.0f; }}", fs_reg, fs_reg, fsf, ft_reg, ft_reg, ftf);
+        // PS2 VU0 vdiv: division by zero returns max float (0x7F7FFFFF) with sign preserved
+        return fmt::format("{{ float fs = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); float ft = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); if (ft != 0.0f) {{ ctx->vu0_q = fs / ft; }} else {{ uint32_t sign = (std::bit_cast<uint32_t>(fs) ^ std::bit_cast<uint32_t>(ft)) & 0x80000000u; uint32_t maxf = 0x7F7FFFFFu | sign; ctx->vu0_q = std::bit_cast<float>(maxf); }} }}", fs_reg, fs_reg, fsf, ft_reg, ft_reg, ftf);
     }
 
     std::string CodeGenerator::translateVU_VSQRT(const Instruction &inst)
@@ -2895,7 +2899,8 @@ namespace ps2recomp
     {
         uint8_t ftf = inst.vectorInfo.ftf;
         uint8_t ft_reg = inst.rt;
-        return fmt::format("{{ float ft = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); ctx->vu0_q = (ft > 0.0f) ? (1.0f / sqrtf(ft)) : 0.0f; }}", ft_reg, ft_reg, ftf);
+        // PS2 VU0 vrsqrt: division by zero returns max float (0x7F7FFFFF)
+        return fmt::format("{{ float ft = _mm_cvtss_f32(_mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,{}))); if (ft > 0.0f) {{ ctx->vu0_q = 1.0f / sqrtf(ft); }} else {{ uint32_t maxf = 0x7F7FFFFFu; ctx->vu0_q = std::bit_cast<float>(maxf); }} }}", ft_reg, ft_reg, ftf);
     }
 
     std::string CodeGenerator::translateVU_VMTIR(const Instruction &inst)