diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-03-26 17:33:37 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-03-26 17:33:37 +0100 |
| commit | eaa5f8feec4ed2d8f477753b5fc2ff9e50d1003c (patch) | |
| tree | 1bd33aa61e60538a5559eeec2dc585c8bc97e8d0 | |
| parent | 8c6d9f04ec5e1d42a40599dcc3b6d8e5080baa3f (diff) | |
| download | box64-eaa5f8feec4ed2d8f477753b5fc2ff9e50d1003c.tar.gz box64-eaa5f8feec4ed2d8f477753b5fc2ff9e50d1003c.zip | |
[ARM64_DYNAREC] Improved (V)MAXP[S/D] and (V)MINP[S/D] opcodes to more closely match x6 behavior when using fastnan=0 (for #1046)
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 20 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 24 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_0f.c | 20 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f.c | 23 |
4 files changed, 47 insertions, 40 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 9dc2759a..89a9af89 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -1216,11 +1216,12 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETEX(v1, 0, 0); // FMIN/FMAX wll not copy the value if v0[x] is NaN // but x86 will copy if either v0[x] or v1[x] is NaN, so lets force a copy if source is NaN - VFMINQS(v0, v0, v1); - if(!BOX64ENV(dynarec_fastnan) && (v0!=v1)) { + if(BOX64ENV(dynarec_fastnan)) { + VFMINQS(v0, v0, v1); + } else { q0 = fpu_get_scratch(dyn, ninst); - VFCMEQQS(q0, v0, v0); // 0 is NaN, 1 is not NaN, so MASK for NaN - VBIFQ(v0, v1, q0); // copy dest where source is NaN + VFCMGTQS(q0, v1, v0); // 0 is NaN or v1 GT v0, so invert mask for copy + VBIFQ(v0, v1, q0); } break; case 0x5E: @@ -1249,12 +1250,13 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETGX(v0, 1); GETEX(v1, 0, 0); // FMIN/FMAX wll not copy the value if v0[x] is NaN - // but x86 will copy if either v0[x] or v1[x] is NaN, so lets force a copy if source is NaN - VFMAXQS(v0, v0, v1); - if(!BOX64ENV(dynarec_fastnan) && (v0!=v1)) { + // but x86 will copy if either v0[x] or v1[x] is NaN, or if values are equals, so lets force a copy if source is NaN + if(BOX64ENV(dynarec_fastnan)) { + VFMAXQS(v0, v0, v1); + } else { q0 = fpu_get_scratch(dyn, ninst); - VFCMEQQS(q0, v0, v0); // 0 is NaN, 1 is not NaN, so MASK for NaN - VBIFQ(v0, v1, q0); // copy dest where source is NaN + VFCMGTQS(q0, v0, v1); // 0 is NaN or v0 GT v1, so invert mask for copy + VBIFQ(v0, v1, q0); } break; case 0x60: diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 068dcc8c..94cca9e7 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -1828,14 +1828,13 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETEX(v1, 0, 0); // FMIN/FMAX wll not copy the value if v0[x] is NaN // but x86 will copy if either v0[x] or v1[x] is NaN, so lets force a copy if source is NaN - if(!BOX64ENV(dynarec_fastnan) && v0!=v1) { + if(BOX64ENV(dynarec_fastnan)) { + VFMINQD(v0, v0, v1); + } else { q0 = fpu_get_scratch(dyn, ninst); - VFCMEQQD(q0, v0, v0); // 0 is NaN, 1 is not NaN, so MASK for NaN - VANDQ(v0, v0, q0); - VBICQ(q0, v1, q0); - VORRQ(v0, v0, q0); + VFCMGTQD(q0, v1, v0); // 0 is NaN or v1 GT v0, so invert mask for copy + VBIFQ(v0, v1, q0); } - VFMINQD(v0, v0, v1); break; case 0x5E: INST_NAME("DIVPD Gx, Ex"); @@ -1863,15 +1862,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETGX(v0, 1); GETEX(v1, 0, 0); // FMIN/FMAX wll not copy the value if v0[x] is NaN - // but x86 will copy if either v0[x] or v1[x] is NaN, so lets force a copy if source is NaN - if(!BOX64ENV(dynarec_fastnan) && v0!=v1) { + // but x86 will copy if either v0[x] or v1[x] is NaN, or if values are equals, so lets force a copy if source is NaN + if(BOX64ENV(dynarec_fastnan)) { + VFMAXQD(v0, v0, v1); + } else { q0 = fpu_get_scratch(dyn, ninst); - VFCMEQQD(q0, v0, v0); // 0 is NaN, 1 is not NaN, so MASK for NaN - VANDQ(v0, v0, q0); - VBICQ(q0, v1, q0); - VORRQ(v0, v0, q0); + VFCMGTQD(q0, v0, v1); // 0 is NaN or v0 GT v1, so invert mask for copy + VBIFQ(v0, v1, q0); } - VFMAXQD(v0, v0, v1); break; case 0x60: INST_NAME("PUNPCKLBW Gx,Ex"); diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_0f.c index 4a809721..412def45 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f.c @@ -540,10 +540,12 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } // FMIN/FMAX wll not copy a NaN if either is NaN // but x86 will copy src2 if either value is NaN, so lets force a copy of Src2 (Ex) if result is NaN - VFMINQS(v0, v2, v1); - if(!BOX64ENV(dynarec_fastnan) && (v2!=v1)) { - VFCMEQQS(q0, v0, v0); // 0 is NaN, 1 is not NaN, so MASK for NaN - VBIFQ(v0, v1, q0); // copy dest where source is NaN + if(BOX64ENV(dynarec_fastnan)) { + VFMINQS(v0, v2, v1); + } else { + VFCMGTQS(q0, v1, v2); // 0 if NaN or v1 GT v2, so invert mask for copy + if(v0!=v1) VBIFQ(v0, v1, q0); + if(v0!=v2) VBITQ(v0, v2, q0); } } if(!vex.l) YMM0(gd); @@ -582,10 +584,12 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } // FMIN/FMAX wll not copy a NaN if either is NaN // but x86 will copy src2 if either value is NaN, so lets force a copy of Src2 (Ex) if result is NaN - VFMAXQS(v0, v2, v1); - if(!BOX64ENV(dynarec_fastnan) && (v2!=v1)) { - VFCMEQQS(q0, v0, v0); // 0 is NaN, 1 is not NaN, so MASK for NaN - VBIFQ(v0, v1, q0); // copy dest where source is NaN + if(BOX64ENV(dynarec_fastnan)) { + VFMAXQS(v0, v2, v1); + } else { + VFCMGTQS(q0, v2, v1); // 0 if NaN or v2 GT v1, so invert mask for copy + if(v0!=v1) VBIFQ(v0, v1, q0); + if(v0!=v2) VBITQ(v0, v2, q0); } } if(!vex.l) YMM0(gd); diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c index eec6e087..f2c0f1d0 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c @@ -497,10 +497,12 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } // FMIN/FMAX wll not copy a NaN if either is NaN // but x86 will copy src2 if either value is NaN, so lets force a copy of Src2 (Ex) if result is NaN - VFMINQD(v0, v2, v1); - if(!BOX64ENV(dynarec_fastnan) && (v2!=v1)) { - VFCMEQQD(q0, v0, v0); // 0 is NaN, 1 is not NaN, so MASK for NaN - VBIFQ(v0, v1, q0); // copy dest where source is NaN + if(BOX64ENV(dynarec_fastnan)) { + VFMINQD(v0, v2, v1); + } else { + VFCMGTQD(q0, v1, v2); // 0 if NaN or v1 GT v2, so invert mask for copy + if(v0!=v1) VBIFQ(v0, v1, q0); + if(v0!=v2) VBITQ(v0, v2, q0); } } if(!vex.l) YMM0(gd); @@ -532,17 +534,18 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, case 0x5F: INST_NAME("VMAXPD Gx, Vx, Ex"); nextop = F8; - if(!BOX64ENV(dynarec_fastnan)) { + if(!BOX64ENV(dynarec_fastnan)) q0 = fpu_get_scratch(dyn, ninst); - } for(int l=0; l<1+vex.l; ++l) { if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } // FMIN/FMAX wll not copy a NaN if either is NaN // but x86 will copy src2 if either value is NaN, so lets force a copy of Src2 (Ex) if result is NaN - VFMAXQD(v0, v2, v1); - if(!BOX64ENV(dynarec_fastnan) && (v2!=v1)) { - VFCMEQQD(q0, v0, v0); // 0 is NaN, 1 is not NaN, so MASK for NaN - VBIFQ(v0, v1, q0); // copy dest where source is NaN + if(BOX64ENV(dynarec_fastnan)) { + VFMAXQD(v0, v2, v1); + } else { + VFCMGTQD(q0, v2, v1); // 0 if NaN or v2 GT v1, so invert mask for copy + if(v0!=v1) VBIFQ(v0, v1, q0); + if(v0!=v2) VBITQ(v0, v2, q0); } } if(!vex.l) YMM0(gd); |