about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-01 19:22:39 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-01 19:22:39 +0200
commit1ad69d785f6cdbfd2a1aaa75b7cfa09de7b0f493 (patch)
treece3f3899276afd060152284934b82f4a6f2c1f2b /src/dynarec
parentef3acd9f336b2019b8d576167e1c9cee775a97c1 (diff)
downloadbox64-1ad69d785f6cdbfd2a1aaa75b7cfa09de7b0f493.tar.gz
box64-1ad69d785f6cdbfd2a1aaa75b7cfa09de7b0f493.zip
[ARM64_DYNAREC] Added AVX.66.0F C4/C5/D7 opcodes
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f.c74
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h7
2 files changed, 81 insertions, 0 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
index c5485880..b1f187f1 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
@@ -56,6 +56,9 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
     MAYUSE(s0);
     MAYUSE(j64);
     MAYUSE(cacheupd);
+    #if STEP > 1
+    static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 };
+    #endif
 
     /* Remember to not create a new fpu_scratch after some GY/VY/EY is created, because Y can be in the scratch area and might overlap (and scratch will win) */
 
@@ -864,6 +867,77 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             }
             break;
 
+        case 0xC4:
+            INST_NAME("VPINSRW Gx, Vx, Ed, Ib");
+            nextop = F8;
+            GETGX_empty_VX(v0, v2);
+            if(v0!=v2) VMOVQ(v0, v2);
+            if(MODREG) {
+                u8 = (F8)&7;
+                ed = xRAX+(nextop&7)+(rex.b<<3);
+                VMOVQHfrom(v0, u8, ed);
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1);
+                u8 = (F8)&7;
+                VLD1_16(v0, u8, wback);
+            }
+            YMM0(gd);
+            break;
+        case 0xC5:
+            INST_NAME("VPEXTRW Gd, Ex, Ib");
+            nextop = F8;
+            GETGD;
+            if(MODREG) {
+                GETEX(v0, 0, 1);
+                u8 = (F8)&7;
+                VMOVHto(gd, v0, u8);
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1);
+                u8 = (F8)&7;
+                LDRH_U12(gd, wback, u8*2);
+            }
+            break;
+
+        case 0xD7:
+            nextop = F8;
+            INST_NAME("VPMOVMSKB Gd, Ex");
+            v0 = fpu_get_scratch(dyn, ninst);
+            v1 = fpu_get_scratch(dyn, ninst);
+            q1 = fpu_get_scratch(dyn, ninst);
+            GETEX_Y(q0, 0, 0);
+            GETGD;
+            TABLE64(x1, (uintptr_t)&mask_shift8);
+            VLDR64_U12(v0, x1, 0);     // load shift
+            MOVI_8(v1, 0x80);   // load mask
+            VAND(q1, v1, q0);
+            USHL_8(q1, q1, v0); // shift
+            UADDLV_8(q1, q1);   // accumalte
+            VMOVBto(gd, q1, 0);
+            // and now the high part
+            VMOVeD(q1, 0, q0, 1);
+            VAND(q1, v1, q1);  // keep highest bit
+            USHL_8(q1, q1, v0); // shift
+            UADDLV_8(q1, q1);   // accumalte
+            VMOVBto(x1, q1, 0);
+            BFIx(gd, x1, 8, 8);
+            if(vex.l) {
+                GETEY(q0);
+                VAND(q1, v1, q0);
+                USHL_8(q1, q1, v0); // shift
+                UADDLV_8(q1, q1);   // accumalte
+                VMOVBto(x1, q1, 0);
+                BFIx(gd, x1, 16, 8);
+                // and now the high part
+                VMOVeD(q1, 0, q0, 1);
+                VAND(q1, v1, q1);  // keep highest bit
+                USHL_8(q1, q1, v0); // shift
+                UADDLV_8(q1, q1);   // accumalte
+                VMOVBto(x1, q1, 0);
+                BFIx(gd, x1, 24, 8);
+            }
+            break;
         case 0xD8:
             INST_NAME("VPSUBUSB Gx, Vx, Ex");
             nextop = F8;
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 3e4c605d..30f967a7 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -537,6 +537,13 @@
         VLDR128_U12(ey, ed, fixedaddress+16);                                                   \
     gy = ymm_get_reg(dyn, ninst, x1, gd, 0, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1, -1)
 
+// Get EY
+#define GETEY(ey)                                                                               \
+    if(MODREG)                                                                                  \
+        ey = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1, -1, -1, -1);                 \
+    else                                                                                        \
+        VLDR128_U12(ey, ed, fixedaddress+16);                                                   \
+
 // Get empty EY and non-writen GY
 #define GETGYEY_empty(gy, ey)                                                                   \
     gy = ymm_get_reg(dyn, ninst, x1, gd, 0, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1, -1);       \