diff options
| author | wannacu <76616478+wannacu@users.noreply.github.com> | 2025-02-11 20:26:24 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-02-11 13:26:24 +0100 |
| commit | ee83ef4c010ef3d718a3e1ccaf684a086ffb6da4 (patch) | |
| tree | f2c20c2e50ec5b70b1682952335dc35a655e448b | |
| parent | 4b7e116aa7ec6234682553b0dab8affca06c4071 (diff) | |
| download | box64-ee83ef4c010ef3d718a3e1ccaf684a086ffb6da4.tar.gz box64-ee83ef4c010ef3d718a3e1ccaf684a086ffb6da4.zip | |
[ARM64_DYNAREC] Fixed wrong COUNT operand in VPSLL{W,D,Q} (#2344)
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f.c | 27 |
1 files changed, 15 insertions, 12 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c index 642cf169..599774c3 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c @@ -1792,12 +1792,13 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, nextop = F8; q0 = fpu_get_scratch(dyn, ninst); q1 = fpu_get_scratch(dyn, ninst); + GETEX(v1, 0, 0); + UQXTN_32(q0, v1); MOVI_32(q1, 16); + UMIN_32(q0, q0, q1); // limit to 0 .. +16 values + VDUPQ_16(q0, q0, 0); // only the low 8bits will be used anyway for(int l=0; l<1+vex.l; ++l) { - if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } - UQXTN_32(q0, v1); - UMIN_32(q0, q0, q1); // limit to 0 .. +16 values - VDUPQ_16(q0, q0, 0); // only the low 8bits will be used anyway + if(!l) { GETGX_empty_VX(v0, v2); } else { GETGY_empty_VY(v0, v2, 0, -1, -1); } USHLQ_16(v0, v2, q0); } if(!vex.l) YMM0(gd); @@ -1807,12 +1808,13 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, nextop = F8; q0 = fpu_get_scratch(dyn, ninst); q1 = fpu_get_scratch(dyn, ninst); + GETEX(v1, 0, 0); + UQXTN_32(q0, v1); MOVI_32(q1, 32); + UMIN_32(q0, q0, q1); // limit to 0 .. +32 values + VDUPQ_32(q0, q0, 0); // only the low 8bits will be used anyway for(int l=0; l<1+vex.l; ++l) { - if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } - UQXTN_32(q0, v1); - UMIN_32(q0, q0, q1); // limit to 0 .. +32 values - VDUPQ_32(q0, q0, 0); // only the low 8bits will be used anyway + if(!l) { GETGX_empty_VX(v0, v2); } else { GETGY_empty_VY(v0, v2, 0, -1, -1); } USHLQ_32(v0, v2, q0); } if(!vex.l) YMM0(gd); @@ -1822,12 +1824,13 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, nextop = F8; q0 = fpu_get_scratch(dyn, ninst); q1 = fpu_get_scratch(dyn, ninst); + GETEX(v1, 0, 0); + UQXTN_32(q0, v1); MOVI_32(q1, 64); + UMIN_32(q0, q0, q1); // limit to 0 .. +64 values + VDUPQ_64(q0, q0, 0); // only the low 8bits will be used anyway for(int l=0; l<1+vex.l; ++l) { - if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); } - UQXTN_32(q0, v1); - UMIN_32(q0, q0, q1); // limit to 0 .. +64 values - VDUPQ_64(q0, q0, 0); // only the low 8bits will be used anyway + if(!l) { GETGX_empty_VX(v0, v2); } else { GETGY_empty_VY(v0, v2, 0, -1, -1); } USHLQ_64(v0, v2, q0); } if(!vex.l) YMM0(gd); |