about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-03-26 17:59:25 +0100
committerptitSeb <sebastien.chev@gmail.com>2025-03-26 17:59:25 +0100
commitfed73858c684602c15623a5172d789a0f7d8bfc2 (patch)
tree3ad210808dafb01d9a4ed808649373adf5732975 /src
parentcb5a52bf40d9f730710edba7f38675317fb58b90 (diff)
downloadbox64-fed73858c684602c15623a5172d789a0f7d8bfc2.tar.gz
box64-fed73858c684602c15623a5172d789a0f7d8bfc2.zip
[ARM64_DYNAREC] Improved (V)HADDPS with fastnan=0
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c18
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f20f.c25
2 files changed, 39 insertions, 4 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
index 44396561..4b810da5 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
@@ -399,9 +399,27 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
         case 0x7C:
             INST_NAME("VHADDPS Gx, Vx, Ex");
             nextop = F8;
+            if(!BOX64ENV(dynarec_fastnan)) {
+                q0 = fpu_get_scratch(dyn, ninst);
+                q1 = fpu_get_scratch(dyn, ninst);
+            }
             for(int l=0; l<1+vex.l; ++l) {
                 if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
+                if(!BOX64ENV(dynarec_fastnan)) {
+                    // check if any input value was NAN
+                    // but need to mix low/high part
+                    VUZP1Q_32(q0, v2, v1);
+                    VUZP2Q_32(q1, v2, v1);
+                    VFMAXQS(q0, q0, q1);    // propagate NAN
+                    VFCMEQQS(q0, q0, q0);    // 0 if NAN, 1 if not NAN
+                }
                 VFADDPQS(v0, v2, v1);
+                if(!BOX64ENV(dynarec_fastnan)) {
+                    VFCMEQQS(q1, v0, v0);    // 0 => out is NAN
+                    VBICQ(q1, q0, q1);      // forget it in any input was a NAN already
+                    VSHLQ_32(q1, q1, 31);   // only keep the sign bit
+                    VORRQ(v0, v0, q1);      // NAN -> -NAN
+                }
             }
             if(!vex.l) YMM0(gd);
             break;
diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c
index b62fd4d9..b4bdc963 100644
--- a/src/dynarec/arm64/dynarec_arm64_f20f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f20f.c
@@ -34,7 +34,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
     int32_t i32, i32_;

     int cacheupd = 0;

     int v0, v1;

-    int q0;

+    int q0, q1;

     int d0, d1;

     int64_t fixedaddress;

     int unscaled;

@@ -42,6 +42,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
     MAYUSE(d0);

     MAYUSE(d1);

     MAYUSE(q0);

+    MAYUSE(q1);

     MAYUSE(v0);

     MAYUSE(v1);

 

@@ -404,9 +405,25 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x7C:

             INST_NAME("HADDPS Gx, Ex");

             nextop = F8;

-            GETGX(v0, 1);

-            GETEX(v1, 0, 1);

-            VFADDPQS(v0, v0, v1);

+            GETGX(q1, 1);

+            GETEX(q0, 0, 0);

+            if(!BOX64ENV(dynarec_fastnan)) {

+                v0 = fpu_get_scratch(dyn, ninst);

+                v1 = fpu_get_scratch(dyn, ninst);

+                // check if any input value was NAN

+                // but need to mix low/high part

+                VUZP1Q_32(v0, q0, q1);

+                VUZP2Q_32(v1, q0, q1);

+                VFMAXQS(v0, v0, v1);    // propagate NAN

+                VFCMEQQS(v0, v0, v0);    // 0 if NAN, 1 if not NAN

+            }

+            VFADDPQS(q1, q1, q0);

+            if(!BOX64ENV(dynarec_fastnan)) {

+                VFCMEQQS(v1, q1, q1);    // 0 => out is NAN

+                VBICQ(v1, v0, v1);      // forget it in any input was a NAN already

+                VSHLQ_32(v1, v1, 31);   // only keep the sign bit

+                VORRQ(q1, q1, v1);      // NAN -> -NAN

+            }

             break;

         case 0x7D:

             INST_NAME("HSUBPS Gx, Ex");