diff --git a/ps2xRecomp/src/lib/code_generator.cpp b/ps2xRecomp/src/lib/code_generator.cpp index 666a59e7..06a6c17a 100644 --- a/ps2xRecomp/src/lib/code_generator.cpp +++ b/ps2xRecomp/src/lib/code_generator.cpp @@ -2359,14 +2359,37 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", inst.rd, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, inst.rt, inst.rt); } case VU0_S2_VMOVE: - return fmt::format("ctx->vu0_vf[{}] = ctx->vu0_vf[{}];", inst.rt, inst.rd); + { + uint8_t dest_mask = inst.vectorInfo.vectorField; + if (dest_mask == 0xF) { + return fmt::format("ctx->vu0_vf[{}] = ctx->vu0_vf[{}];", inst.rt, inst.rd); + } + return fmt::format("{{ __m128i mask = _mm_set_epi32({}, {}, {}, {}); " + "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _mm_castsi128_ps(mask)); }}", + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, + inst.rt, inst.rt, inst.rd); + } case VU0_S2_VMR32: - return fmt::format("ctx->vu0_vf[{}] = _mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,0,0,1));", inst.rt, inst.rd, inst.rd); + { + uint8_t dest_mask = inst.vectorInfo.vectorField; + if (dest_mask == 0xF) { + // All components — no blend needed + return fmt::format("ctx->vu0_vf[{}] = _mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,3,2,1));", inst.rt, inst.rd, inst.rd); + } + return fmt::format("{{ __m128 res = _mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], _MM_SHUFFLE(0,3,2,1)); " + "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " + "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", + inst.rd, inst.rd, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, + inst.rt, inst.rt); + } case VU0_S2_VCLIPw: { uint8_t field = inst.function & 0x3; @@ -2510,7 +2533,7 @@ namespace ps2recomp uint8_t dest_mask = inst.vectorInfo.vectorField; uint8_t field = inst.function & 0x3; std::string shuffle_pattern = fmt::format("_MM_SHUFFLE({},{},{},{})", field, field, field, field); - return fmt::format("{{ __m128 res = PS2_VADD(ctx->vu0_vf[{}], _mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], {})); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, vft, shuffle_pattern, (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, vfd, vfd); + return fmt::format("{{ __m128 res = PS2_VADD(ctx->vu0_vf[{}], _mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], {})); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, vft, shuffle_pattern, (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } std::string CodeGenerator::translateVU_VSUB_Field(const Instruction &inst) @@ -2521,7 +2544,7 @@ namespace ps2recomp uint8_t dest_mask = inst.vectorInfo.vectorField; uint8_t field = inst.function & 0x3; std::string shuffle_pattern = fmt::format("_MM_SHUFFLE({},{},{},{})", field, field, field, field); - return fmt::format("{{ __m128 res = PS2_VSUB(ctx->vu0_vf[{}], _mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], {})); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, vft, shuffle_pattern, (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, vfd, vfd); + return fmt::format("{{ __m128 res = PS2_VSUB(ctx->vu0_vf[{}], _mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], {})); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, vft, shuffle_pattern, (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } std::string CodeGenerator::translateVU_VMUL_Field(const Instruction &inst) @@ -2532,7 +2555,7 @@ namespace ps2recomp uint8_t dest_mask = inst.vectorInfo.vectorField; uint8_t field = inst.function & 0x3; std::string shuffle_pattern = fmt::format("_MM_SHUFFLE({},{},{},{})", field, field, field, field); - return fmt::format("{{ __m128 res = PS2_VMUL(ctx->vu0_vf[{}], _mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], {})); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, vft, shuffle_pattern, (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, vfd, vfd); + return fmt::format("{{ __m128 res = PS2_VMUL(ctx->vu0_vf[{}], _mm_shuffle_ps(ctx->vu0_vf[{}], ctx->vu0_vf[{}], {})); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, vft, shuffle_pattern, (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } std::string CodeGenerator::translateVU_VADD(const Instruction &inst) @@ -2541,7 +2564,7 @@ namespace ps2recomp uint8_t vfs = inst.rd; uint8_t vft = inst.rt; uint8_t dest_mask = inst.vectorInfo.vectorField; - return fmt::format("{{ __m128 res = PS2_VADD(ctx->vu0_vf[{}], ctx->vu0_vf[{}]); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = PS2_VBLEND(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, vfd, vfd); + return fmt::format("{{ __m128 res = PS2_VADD(ctx->vu0_vf[{}], ctx->vu0_vf[{}]); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = PS2_VBLEND(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } std::string CodeGenerator::translateVU_VSUB(const Instruction &inst) @@ -2550,7 +2573,7 @@ namespace ps2recomp uint8_t vfs = inst.rd; uint8_t vft = inst.rt; uint8_t dest_mask = inst.vectorInfo.vectorField; - return fmt::format("{{ __m128 res = PS2_VSUB(ctx->vu0_vf[{}], ctx->vu0_vf[{}]); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = PS2_VBLEND(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, vfd, vfd); + return fmt::format("{{ __m128 res = PS2_VSUB(ctx->vu0_vf[{}], ctx->vu0_vf[{}]); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = PS2_VBLEND(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } std::string CodeGenerator::translateVU_VMUL(const Instruction &inst) @@ -2559,7 +2582,7 @@ namespace ps2recomp uint8_t vfs = inst.rd; uint8_t vft = inst.rt; uint8_t dest_mask = inst.vectorInfo.vectorField; - return fmt::format("{{ __m128 res = PS2_VMUL(ctx->vu0_vf[{}], ctx->vu0_vf[{}]); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = PS2_VBLEND(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, vfd, vfd); + return fmt::format("{{ __m128 res = PS2_VMUL(ctx->vu0_vf[{}], ctx->vu0_vf[{}]); __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = PS2_VBLEND(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } std::string CodeGenerator::translatePEXT5(const Instruction &inst) @@ -2912,8 +2935,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", inst.rd, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, inst.rt, inst.rt); } @@ -3018,8 +3041,8 @@ namespace ps2recomp "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); " "ctx->vu0_acc = res; }}", vfs, vft, vft, shuffle_pattern, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3039,8 +3062,8 @@ namespace ps2recomp "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); " "ctx->vu0_acc = res; }}", vfs, vft, vft, shuffle_pattern, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3058,8 +3081,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, vft, shuffle_pattern, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3077,8 +3100,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, vft, shuffle_pattern, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3094,8 +3117,8 @@ namespace ps2recomp "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); " "ctx->vu0_acc = res; }}", vfs, vft, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3110,8 +3133,8 @@ namespace ps2recomp "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); " "ctx->vu0_acc = res; }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3126,8 +3149,8 @@ namespace ps2recomp "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); " "ctx->vu0_acc = res; }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3141,8 +3164,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3155,8 +3178,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3169,8 +3192,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3183,8 +3206,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3197,8 +3220,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3215,8 +3238,8 @@ namespace ps2recomp "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); " "ctx->vu0_acc = res; }}", vfs, vft, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3231,8 +3254,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3245,8 +3268,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3258,14 +3281,14 @@ namespace ps2recomp uint8_t vfs = inst.rd; uint8_t vft = inst.rt; uint8_t dest_mask = inst.vectorInfo.vectorField; - return fmt::format("{{ __m128 mul_res = PS2_VMUL(ctx->vu0_vf[{}], ctx->vu0_vf[{}]); " + return fmt::format("{{ __m128 mul_res = PS2_VOPMUL(ctx->vu0_vf[{}], ctx->vu0_vf[{}]); " "__m128 res = PS2_VSUB(ctx->vu0_acc, mul_res); " "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); " "ctx->vu0_acc = res; }}", vfs, vft, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3279,8 +3302,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, vft, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3295,8 +3318,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3309,8 +3332,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3326,8 +3349,8 @@ namespace ps2recomp "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); " "ctx->vu0_acc = res; }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3342,8 +3365,8 @@ namespace ps2recomp "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); " "ctx->vu0_acc = res; }}", vfs, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vfd, vfd); } @@ -3529,7 +3552,7 @@ namespace ps2recomp { uint8_t vfs = inst.rd; uint8_t vft = inst.rt; - return fmt::format("ctx->vu0_acc = PS2_VMUL(ctx->vu0_vf[{}], ctx->vu0_vf[{}]);", + return fmt::format("ctx->vu0_acc = PS2_VOPMUL(ctx->vu0_vf[{}], ctx->vu0_vf[{}]);", vfs, vft); } @@ -3545,8 +3568,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, formatFloatLiteral(scale), - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, inst.rt, inst.rt); } @@ -3563,8 +3586,8 @@ namespace ps2recomp "__m128i mask = _mm_set_epi32({}, {}, {}, {}); " "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vfs, formatFloatLiteral(scale), - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, inst.rt, inst.rt); } @@ -3578,8 +3601,8 @@ namespace ps2recomp "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); " "ctx->vi[{}] = (ctx->vi[{}] + 1) & 0x3FF; }}", vis, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, inst.rt, inst.rt, vis, vis); } @@ -3595,8 +3618,8 @@ namespace ps2recomp "ctx->vi[{}] = (ctx->vi[{}] + 1) & 0x3FF; }}", vis, inst.rt, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, vis, vis); } @@ -3611,8 +3634,8 @@ namespace ps2recomp "ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", vis, vis, vis, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, inst.rt, inst.rt); } @@ -3628,15 +3651,15 @@ namespace ps2recomp vis, vis, vis, inst.rt, - (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, - (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0); + (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, + (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0); } std::string CodeGenerator::translateVU_VRGET(const Instruction &inst) { uint8_t dest_mask = inst.vectorInfo.vectorField; uint8_t ft_reg = inst.rt; - return fmt::format("{{ __m128 res = ctx->vu0_r; __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", (dest_mask & 0x8) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x1) ? -1 : 0, ft_reg, ft_reg); + return fmt::format("{{ __m128 res = ctx->vu0_r; __m128i mask = _mm_set_epi32({}, {}, {}, {}); ctx->vu0_vf[{}] = _mm_blendv_ps(ctx->vu0_vf[{}], res, _mm_castsi128_ps(mask)); }}", (dest_mask & 0x1) ? -1 : 0, (dest_mask & 0x2) ? -1 : 0, (dest_mask & 0x4) ? -1 : 0, (dest_mask & 0x8) ? -1 : 0, ft_reg, ft_reg); } std::string CodeGenerator::translateVU_VRINIT(const Instruction &inst)