about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-04-07 12:39:10 +0200
committerptitSeb <sebastien.chev@gmail.com>2023-04-07 12:39:26 +0200
commitae34c7e1ccdb318d81574315d01b2605c40fbc66 (patch)
treea52e6993db930d1a438b4626870c9c7f549e33f6 /src
parent07a3a0e1c87f1568af524a0a553ac8c26d4c1e44 (diff)
downloadbox64-ae34c7e1ccdb318d81574315d01b2605c40fbc66.tar.gz
box64-ae34c7e1ccdb318d81574315d01b2605c40fbc66.zip
[ARM64_DYNAREC] Fix/Added 66 0F E1/E2/F1/F2/F3 opcodes
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_660f.c48
1 files changed, 31 insertions, 17 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index eb459690..cce34ec6 100755
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -2137,12 +2137,10 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETEX(q1, 0, 0);

             v0 = fpu_get_scratch(dyn);

             v1 = fpu_get_scratch(dyn);

-            SQXTN_32(v0, q1);

-            NEG_32(v0, v0);

+            UQXTN_32(v0, q1);

             MOVI_32(v1, 15);

-            SMIN_32(v0, v0, v1);

-            NEG_32(v1, v1);

-            SMAX_32(v0, v0, v1);    // limit to -15 .. +15 values

+            UMIN_32(v0, v0, v1);    // limit to -15 .. +15 values

+            NEG_32(v0, v0);

             VDUPQ_16(v0, v0, 0);    // only the low 8bits will be used anyway

             SSHLQ_16(q0, q0, v0);

             break;

@@ -2153,12 +2151,10 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETEX(q1, 0, 0);

             v0 = fpu_get_scratch(dyn);

             v1 = fpu_get_scratch(dyn);

-            SQXTN_32(v0, q1);

-            NEG_32(v0, v0);

+            UQXTN_32(v0, q1);

             MOVI_32(v1, 31);

-            SMIN_32(v0, v0, v1);

-            NEG_32(v1, v1);

-            SMAX_32(v0, v0, v1);    // limit to -31 .. +31 values

+            UMIN_32(v0, v0, v1);        // limit to 0 .. +31 values

+            NEG_32(v0, v0);

             VDUPQ_32(v0, v0, 0);    // only the low 8bits will be used anyway

             SSHLQ_32(q0, q0, v0);

             break;

@@ -2301,17 +2297,31 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             }

             break;

 

+        case 0xF1:

+            INST_NAME("PSLLW Gx,Ex");

+            nextop = F8;

+            GETGX(q0, 1);

+            GETEX(q1, 0, 0);

+            v0 = fpu_get_scratch(dyn);

+            v1 = fpu_get_scratch(dyn);

+            UQXTN_32(v0, q1);

+            MOVI_32(v1, 16);

+            UMIN_32(v0, v0, v1);    // limit to 0 .. +16 values

+            VDUPQ_16(v0, v0, 0);    // only the low 8bits will be used anyway

+            USHLQ_16(q0, q0, v0);

+            break;

         case 0xF2:

             INST_NAME("PSLLD Gx,Ex");

             nextop = F8;

             GETGX(q0, 1);

             GETEX(q1, 0, 0);

             v0 = fpu_get_scratch(dyn);

-            VMOVeD(v0, 0, q1, 0);

-            VMOVeD(v0, 1, q1, 0);

-            SQXTN_32(v0, v0); // 2*q1 in 32bits now

-            VMOVeD(v0, 1, v0, 0);

-            SSHLQ_32(q0, q0, v0);

+            v1 = fpu_get_scratch(dyn);

+            UQXTN_32(v0, q1);

+            MOVI_32(v1, 32);

+            UMIN_32(v0, v0, v1);    // limit to 0 .. +32 values

+            VDUPQ_32(v0, v0, 0);    // only the low 8bits will be used anyway

+            USHLQ_32(q0, q0, v0);

             break;

         case 0xF3:

             INST_NAME("PSLLQ Gx,Ex");

@@ -2319,8 +2329,12 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETGX(q0, 1);

             GETEX(q1, 0, 0);

             v0 = fpu_get_scratch(dyn);

-            VMOVQ(v0, q1);

-            VMOVeD(v0, 1, v0, 0);

+            v0 = fpu_get_scratch(dyn);

+            v1 = fpu_get_scratch(dyn);

+            UQXTN_32(v0, q1);

+            MOVI_32(v1, 64);

+            UMIN_32(v0, v0, v1);    // limit to 0 .. +64 values

+            VDUPQ_64(v0, v0, 0);    // only the low 8bits will be used anyway

             USHLQ_64(q0, q0, v0);

             break;

         case 0xF4: