about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-10 15:15:57 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-10 15:15:57 +0200
commit84a8500323ad35246003e52cb669d34ee266d531 (patch)
treef19b15c8c59a00da84494ab0cb8d2db1039837c3 /src
parenta8911fc97942739515d76d025937d438055ea4e2 (diff)
downloadbox64-84a8500323ad35246003e52cb669d34ee266d531.tar.gz
box64-84a8500323ad35246003e52cb669d34ee266d531.zip
[ARM64_DYNAREC] Fixed 256bits version of AVX.66.0F D1/D2/D3 opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f.c36
1 files changed, 21 insertions, 15 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
index 492fd094..62dd2c0f 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
@@ -1225,11 +1225,13 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             q1 = fpu_get_scratch(dyn, ninst);
             MOVI_32(q1, 16);
             for(int l=0; l<1+vex.l; ++l) {
-                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
-                UQXTN_32(q0, v1);
-                UMIN_32(q0, q0, q1);    // limit to 0 .. +16 values
-                NEG_32(q0, q0);         // neg to do shr
-                VDUPQ_16(q0, q0, 0);    // only the low 8bits will be used anyway
+                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VY(v0, v2, 0, -1, -1); }
+                if(!l) {
+                    UQXTN_32(q0, v1);
+                    UMIN_32(q0, q0, q1);    // limit to 0 .. +16 values
+                    NEG_32(q0, q0);         // neg to do shr
+                    VDUPQ_16(q0, q0, 0);    // only the low 8bits will be used anyway
+                }
                 USHLQ_16(v0, v2, q0);   // SHR x8
             }
             if(!vex.l) YMM0(gd);
@@ -1241,11 +1243,13 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             q1 = fpu_get_scratch(dyn, ninst);
             MOVI_32(q1, 32);
             for(int l=0; l<1+vex.l; ++l) {
-                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
-                UQXTN_32(q0, v1);
-                UMIN_32(q0, q0, q1);    // limit to 0 .. +32 values
-                NEG_32(q0, q0);         // neg to do shr
-                VDUPQ_16(q0, q0, 0);    // only the low 8bits will be used anyway
+                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VY(v0, v2, 0, -1, -1); }
+                if(!l) {
+                    UQXTN_32(q0, v1);
+                    UMIN_32(q0, q0, q1);    // limit to 0 .. +32 values
+                    NEG_32(q0, q0);         // neg to do shr
+                    VDUPQ_16(q0, q0, 0);    // only the low 8bits will be used anyway
+                }
                 USHLQ_32(v0, v2, q0);   // SHR x4
             }
             if(!vex.l) YMM0(gd);
@@ -1257,11 +1261,13 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             q1 = fpu_get_scratch(dyn, ninst);
             MOVI_32(q1, 64);
             for(int l=0; l<1+vex.l; ++l) {
-                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VYEY(v0, v2, v1); }
-                UQXTN_32(q0, v1);
-                UMIN_32(q0, q0, q1);    // limit to 0 .. +64 values
-                NEG_32(q0, q0);         // neg to do shr
-                VDUPQ_16(q0, q0, 0);    // only the low 8bits will be used anyway
+                if(!l) { GETGX_empty_VXEX(v0, v2, v1, 0); } else { GETGY_empty_VY(v0, v2, 0, -1, -1); }
+                if(!l) {
+                    UQXTN_32(q0, v1);
+                    UMIN_32(q0, q0, q1);    // limit to 0 .. +64 values
+                    NEG_32(q0, q0);         // neg to do shr
+                    VDUPQ_16(q0, q0, 0);    // only the low 8bits will be used anyway
+                }
                 USHLQ_64(v0, v2, q0);
             }
             if(!vex.l) YMM0(gd);