about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-03-26 17:33:37 +0100
committerptitSeb <sebastien.chev@gmail.com>2025-03-26 17:33:37 +0100
commiteaa5f8feec4ed2d8f477753b5fc2ff9e50d1003c (patch)
tree1bd33aa61e60538a5559eeec2dc585c8bc97e8d0
parent8c6d9f04ec5e1d42a40599dcc3b6d8e5080baa3f (diff)
downloadbox64-eaa5f8feec4ed2d8f477753b5fc2ff9e50d1003c.tar.gz
box64-eaa5f8feec4ed2d8f477753b5fc2ff9e50d1003c.zip
[ARM64_DYNAREC] Improved (V)MAXP[S/D] and (V)MINP[S/D] opcodes to more closely match x6 behavior when using fastnan=0 (for #1046)
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c20
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c24
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_0f.c20
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f.c23
4 files changed, 47 insertions, 40 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 9dc2759a..89a9af89 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -1216,11 +1216,12 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             GETEX(v1, 0, 0);

             // FMIN/FMAX wll not copy the value if v0[x] is NaN

             // but x86 will copy if either v0[x] or v1[x] is NaN, so lets force a copy if source is NaN

-            VFMINQS(v0, v0, v1);

-            if(!BOX64ENV(dynarec_fastnan) && (v0!=v1)) {

+            if(BOX64ENV(dynarec_fastnan)) {

+                VFMINQS(v0, v0, v1);

+            } else {

                 q0 = fpu_get_scratch(dyn, ninst);

-                VFCMEQQS(q0, v0, v0);   // 0 is NaN, 1 is not NaN, so MASK for NaN

-                VBIFQ(v0, v1, q0);   // copy dest where source is NaN

+                VFCMGTQS(q0, v1, v0);   // 0 is NaN or v1 GT v0, so invert mask for copy

+                VBIFQ(v0, v1, q0);

             }

             break;

         case 0x5E:

@@ -1249,12 +1250,13 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             GETGX(v0, 1);

             GETEX(v1, 0, 0);

             // FMIN/FMAX wll not copy the value if v0[x] is NaN

-            // but x86 will copy if either v0[x] or v1[x] is NaN, so lets force a copy if source is NaN

-            VFMAXQS(v0, v0, v1);

-            if(!BOX64ENV(dynarec_fastnan) && (v0!=v1)) {

+            // but x86 will copy if either v0[x] or v1[x] is NaN, or if values are equals, so lets force a copy if source is NaN

+            if(BOX64ENV(dynarec_fastnan)) {

+                VFMAXQS(v0, v0, v1);

+            } else {

                 q0 = fpu_get_scratch(dyn, ninst);

-                VFCMEQQS(q0, v0, v0);   // 0 is NaN, 1 is not NaN, so MASK for NaN

-                VBIFQ(v0, v1, q0);   // copy dest where source is NaN

+                VFCMGTQS(q0, v0, v1);   // 0 is NaN or v0 GT v1, so invert mask for copy

+                VBIFQ(v0, v1, q0);

             }

             break;

         case 0x60:

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 068dcc8c..94cca9e7 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -1828,14 +1828,13 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETEX(v1, 0, 0);

             // FMIN/FMAX wll not copy the value if v0[x] is NaN

             // but x86 will copy if either v0[x] or v1[x] is NaN, so lets force a copy if source is NaN

-            if(!BOX64ENV(dynarec_fastnan) && v0!=v1) {

+            if(BOX64ENV(dynarec_fastnan)) {

+                VFMINQD(v0, v0, v1);

+            } else {

                 q0 = fpu_get_scratch(dyn, ninst);

-                VFCMEQQD(q0, v0, v0);   // 0 is NaN, 1 is not NaN, so MASK for NaN

-                VANDQ(v0, v0, q0);

-                VBICQ(q0, v1, q0);

-                VORRQ(v0, v0, q0);

+                VFCMGTQD(q0, v1, v0);   // 0 is NaN or v1 GT v0, so invert mask for copy

+                VBIFQ(v0, v1, q0);

             }

-            VFMINQD(v0, v0, v1);

             break;

         case 0x5E:

             INST_NAME("DIVPD Gx, Ex");

@@ -1863,15 +1862,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETGX(v0, 1);

             GETEX(v1, 0, 0);

             // FMIN/FMAX wll not copy the value if v0[x] is NaN

-            // but x86 will copy if either v0[x] or v1[x] is NaN, so lets force a copy if source is NaN

-            if(!BOX64ENV(dynarec_fastnan) && v0!=v1) {

+            // but x86 will copy if either v0[x] or v1[x] is NaN, or if values are equals, so lets force a copy if source is NaN

+            if(BOX64ENV(dynarec_fastnan)) {

+                VFMAXQD(v0, v0, v1);

+            } else {

                 q0 = fpu_get_scratch(dyn, ninst);

-                VFCMEQQD(q0, v0, v0);   // 0 is NaN, 1 is not NaN, so MASK for NaN

-                VANDQ(v0, v0, q0);

-                VBICQ(q0, v1, q0);

-                VORRQ(v0, v0, q0);

+                VFCMGTQD(q0, v0, v1);   // 0 is NaN or v0 GT v1, so invert mask for copy

+                VBIFQ(v0, v1, q0);

             }

-            VFMAXQD(v0, v0, v1);

             break;

         case 0x60:

             INST_NAME("PUNPCKLBW Gx,Ex");

diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_0f.c
index 4a809721..412def45 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_0f.c
@@ -540,10 +540,12 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
                 if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
                 // FMIN/FMAX wll not copy a NaN if either is NaN
                 // but x86 will copy src2 if either value is NaN, so lets force a copy of Src2 (Ex) if result is NaN
-                VFMINQS(v0, v2, v1);
-                if(!BOX64ENV(dynarec_fastnan) && (v2!=v1)) {
-                    VFCMEQQS(q0, v0, v0);   // 0 is NaN, 1 is not NaN, so MASK for NaN
-                    VBIFQ(v0, v1, q0);   // copy dest where source is NaN
+                if(BOX64ENV(dynarec_fastnan)) {
+                    VFMINQS(v0, v2, v1);
+                } else {
+                    VFCMGTQS(q0, v1, v2);   // 0 if NaN or v1 GT v2, so invert mask for copy
+                    if(v0!=v1) VBIFQ(v0, v1, q0);
+                    if(v0!=v2) VBITQ(v0, v2, q0);
                 }
             }
             if(!vex.l) YMM0(gd);
@@ -582,10 +584,12 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
                 if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
                 // FMIN/FMAX wll not copy a NaN if either is NaN
                 // but x86 will copy src2 if either value is NaN, so lets force a copy of Src2 (Ex) if result is NaN
-                VFMAXQS(v0, v2, v1);
-                if(!BOX64ENV(dynarec_fastnan) && (v2!=v1)) {
-                    VFCMEQQS(q0, v0, v0);   // 0 is NaN, 1 is not NaN, so MASK for NaN
-                    VBIFQ(v0, v1, q0);   // copy dest where source is NaN
+                if(BOX64ENV(dynarec_fastnan)) {
+                    VFMAXQS(v0, v2, v1);
+                } else {
+                    VFCMGTQS(q0, v2, v1);   // 0 if NaN or v2 GT v1, so invert mask for copy
+                    if(v0!=v1) VBIFQ(v0, v1, q0);
+                    if(v0!=v2) VBITQ(v0, v2, q0);
                 }
             }
             if(!vex.l) YMM0(gd);
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
index eec6e087..f2c0f1d0 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
@@ -497,10 +497,12 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
                 if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
                 // FMIN/FMAX wll not copy a NaN if either is NaN
                 // but x86 will copy src2 if either value is NaN, so lets force a copy of Src2 (Ex) if result is NaN
-                VFMINQD(v0, v2, v1);
-                if(!BOX64ENV(dynarec_fastnan) && (v2!=v1)) {
-                    VFCMEQQD(q0, v0, v0);   // 0 is NaN, 1 is not NaN, so MASK for NaN
-                    VBIFQ(v0, v1, q0);   // copy dest where source is NaN
+                if(BOX64ENV(dynarec_fastnan)) {
+                    VFMINQD(v0, v2, v1);
+                } else {
+                    VFCMGTQD(q0, v1, v2);   // 0 if NaN or v1 GT v2, so invert mask for copy
+                    if(v0!=v1) VBIFQ(v0, v1, q0);
+                    if(v0!=v2) VBITQ(v0, v2, q0);
                 }
             }
             if(!vex.l) YMM0(gd);
@@ -532,17 +534,18 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
         case 0x5F:
             INST_NAME("VMAXPD Gx, Vx, Ex");
             nextop = F8;
-            if(!BOX64ENV(dynarec_fastnan)) {
+            if(!BOX64ENV(dynarec_fastnan))
                 q0 = fpu_get_scratch(dyn, ninst);
-            }
             for(int l=0; l<1+vex.l; ++l) {
                 if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
                 // FMIN/FMAX wll not copy a NaN if either is NaN
                 // but x86 will copy src2 if either value is NaN, so lets force a copy of Src2 (Ex) if result is NaN
-                VFMAXQD(v0, v2, v1);
-                if(!BOX64ENV(dynarec_fastnan) && (v2!=v1)) {
-                    VFCMEQQD(q0, v0, v0);   // 0 is NaN, 1 is not NaN, so MASK for NaN
-                    VBIFQ(v0, v1, q0);   // copy dest where source is NaN
+                if(BOX64ENV(dynarec_fastnan)) {
+                    VFMAXQD(v0, v2, v1);
+                } else {
+                    VFCMGTQD(q0, v2, v1);   // 0 if NaN or v2 GT v1, so invert mask for copy
+                    if(v0!=v1) VBIFQ(v0, v1, q0);
+                    if(v0!=v2) VBITQ(v0, v2, q0);
                 }
             }
             if(!vex.l) YMM0(gd);