about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-03-26 18:29:24 +0100
committerptitSeb <sebastien.chev@gmail.com>2025-03-26 18:29:24 +0100
commit6d9560870d6976d9aa84c693af29286627f4a062 (patch)
tree5106d293cf183e9f86ea2053193018cc8c6a1ab0 /src
parent95d6106e3a73837d7fe685f71b9a0bbd2196edc8 (diff)
downloadbox64-6d9560870d6976d9aa84c693af29286627f4a062.tar.gz
box64-6d9560870d6976d9aa84c693af29286627f4a062.zip
[ARM64_DYNAREC] Improved VHSUBPS opcode, and added fastnan=0 path
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c29
1 files changed, 16 insertions, 13 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
index 4b810da5..2bdcb373 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
@@ -427,22 +427,25 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             INST_NAME("VHSUBPS Gx, Vx, Ex");
             nextop = F8;
             q0 = fpu_get_scratch(dyn, ninst);
-            if(MODREG || (v1==v2)) {
+            if(!BOX64ENV(dynarec_fastnan))
                 q1 = fpu_get_scratch(dyn, ninst);
-            } else 
-                q1 = v1;
-            if(vex.l)
-                q2 = fpu_get_scratch(dyn, ninst);
-            else
-                q2 = q0;
-            // q0 will contains -1 / 0 / -1 / 0
-            MOVIQ_64(q0, 0xf0);
-            VSHLQ_32(q0, q0, 31);   // keep sign bit
             for(int l=0; l<1+vex.l; ++l) {
                 if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
-                VEORQ(q1, v1, q0);
-                VEORQ(q2, v2, q0);
-                VFADDPQS(v0, q2, q1);
+                VUZP1Q_32(q0, v2, v1);
+                VUZP2Q_32(v0, v2, v1);
+                if(!BOX64ENV(dynarec_fastnan)) {
+                    // check if any input value was NAN
+                    // but need to mix low/high part
+                    VFMAXQS(q1, v0, q0);    // propagate NAN
+                    VFCMEQQS(q1, q1, q1);    // 0 if NAN, 1 if not NAN
+                }
+                VFSUBQS(v0, q0, v0);
+                if(!BOX64ENV(dynarec_fastnan)) {
+                    VFCMEQQS(q0, v0, v0);    // 0 => out is NAN
+                    VBICQ(q1, q1, q0);      // forget it in any input was a NAN already
+                    VSHLQ_32(q1, q1, 31);   // only keep the sign bit
+                    VORRQ(v0, v0, q1);      // NAN -> -NAN
+                }
             }
             if(!vex.l) YMM0(gd);
             break;