diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-03-26 17:59:25 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-03-26 17:59:25 +0100 |
| commit | fed73858c684602c15623a5172d789a0f7d8bfc2 (patch) | |
| tree | 3ad210808dafb01d9a4ed808649373adf5732975 /src | |
| parent | cb5a52bf40d9f730710edba7f38675317fb58b90 (diff) | |
| download | box64-fed73858c684602c15623a5172d789a0f7d8bfc2.tar.gz box64-fed73858c684602c15623a5172d789a0f7d8bfc2.zip | |
[ARM64_DYNAREC] Improved (V)HADDPS with fastnan=0
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c | 18 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_f20f.c | 25 |
2 files changed, 39 insertions, 4 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c index 44396561..4b810da5 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c @@ -399,9 +399,27 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, case 0x7C: INST_NAME("VHADDPS Gx, Vx, Ex"); nextop = F8; + if(!BOX64ENV(dynarec_fastnan)) { + q0 = fpu_get_scratch(dyn, ninst); + q1 = fpu_get_scratch(dyn, ninst); + } for(int l=0; l<1+vex.l; ++l) { if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } + if(!BOX64ENV(dynarec_fastnan)) { + // check if any input value was NAN + // but need to mix low/high part + VUZP1Q_32(q0, v2, v1); + VUZP2Q_32(q1, v2, v1); + VFMAXQS(q0, q0, q1); // propagate NAN + VFCMEQQS(q0, q0, q0); // 0 if NAN, 1 if not NAN + } VFADDPQS(v0, v2, v1); + if(!BOX64ENV(dynarec_fastnan)) { + VFCMEQQS(q1, v0, v0); // 0 => out is NAN + VBICQ(q1, q0, q1); // forget it in any input was a NAN already + VSHLQ_32(q1, q1, 31); // only keep the sign bit + VORRQ(v0, v0, q1); // NAN -> -NAN + } } if(!vex.l) YMM0(gd); break; diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index b62fd4d9..b4bdc963 100644 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -34,7 +34,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n int32_t i32, i32_; int cacheupd = 0; int v0, v1; - int q0; + int q0, q1; int d0, d1; int64_t fixedaddress; int unscaled; @@ -42,6 +42,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MAYUSE(d0); MAYUSE(d1); MAYUSE(q0); + MAYUSE(q1); MAYUSE(v0); MAYUSE(v1); @@ -404,9 +405,25 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n case 0x7C: INST_NAME("HADDPS Gx, Ex"); nextop = F8; - GETGX(v0, 1); - GETEX(v1, 0, 1); - VFADDPQS(v0, v0, v1); + GETGX(q1, 1); + GETEX(q0, 0, 0); + if(!BOX64ENV(dynarec_fastnan)) { + v0 = fpu_get_scratch(dyn, ninst); + v1 = fpu_get_scratch(dyn, ninst); + // check if any input value was NAN + // but need to mix low/high part + VUZP1Q_32(v0, q0, q1); + VUZP2Q_32(v1, q0, q1); + VFMAXQS(v0, v0, v1); // propagate NAN + VFCMEQQS(v0, v0, v0); // 0 if NAN, 1 if not NAN + } + VFADDPQS(q1, q1, q0); + if(!BOX64ENV(dynarec_fastnan)) { + VFCMEQQS(v1, q1, q1); // 0 => out is NAN + VBICQ(v1, v0, v1); // forget it in any input was a NAN already + VSHLQ_32(v1, v1, 31); // only keep the sign bit + VORRQ(q1, q1, v1); // NAN -> -NAN + } break; case 0x7D: INST_NAME("HSUBPS Gx, Ex"); |