From ff4ae1f4d83d49b1071506146ca403316d914b61 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Sat, 26 Apr 2025 10:46:11 +0200 Subject: [ARM64_DYNAREC] Allow shift with saturation on (V)PMULH(U)W because it will never saturate --- src/dynarec/arm64/dynarec_arm64_0f.c | 3 +-- src/dynarec/arm64/dynarec_arm64_660f.c | 6 ++---- src/dynarec/arm64/dynarec_arm64_avx_66_0f.c | 6 ++---- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 5d983399..7850393d 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -2814,8 +2814,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETEM(v1, 0); q0 = fpu_get_scratch(dyn, ninst); VUMULL_16(q0, v0, v1); - VSHRQ_32(q0, q0, 16); - XTN_16(v0, q0); + UQSHRN_16(v0, q0, 16); // saturation will never happens as only 16bits remain and fits in 16bits break; case 0xE5: INST_NAME("PMULHW Gm,Em"); diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index d36dea7b..49477f16 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -3108,10 +3108,8 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n q1 = fpu_get_scratch(dyn, ninst); VUMULL_16(q0, v0, v1); VUMULL2_16(q1, v0, v1); - VSHRQ_32(q0, q0, 16); - VSHRQ_32(q1, q1, 16); - XTN_16(v0, q0); - XTN2_16(v0, q1); + UQSHRN_16(v0, q0, 16); // 16bits->16bits: no saturation + UQSHRN2_16(v0, q1, 16); break; case 0xE5: INST_NAME("PMULHW Gx, Ex"); diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c index aea2f378..e878056d 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c @@ -1626,10 +1626,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } VUMULL_16(q0, v2, v1); VUMULL2_16(q1, v2, v1); - VSHRQ_32(q0, q0, 16); - VSHRQ_32(q1, q1, 16); - XTN_16(v0, q0); - XTN2_16(v0, q1); + UQSHRN_16(v0, q0, 16); // 16bits->16bits: no saturation + UQSHRN2_16(v0, q1, 16); } if(!vex.l) YMM0(gd); break; -- cgit 1.4.1