about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorwannacu <wannacu2049@gmail.com>2023-08-10 16:46:01 +0800
committerwannacu <wannacu2049@gmail.com>2023-08-11 11:43:45 +0800
commit8d04940c9195ef8b31fce1983a395a88ca12801b (patch)
tree5c9595c07fe14dcd55636585d06f61c3ffebc756
parent4f1d3a75c29c580779be238d398f4137d1f13ae5 (diff)
downloadbox64-8d04940c9195ef8b31fce1983a395a88ca12801b.tar.gz
box64-8d04940c9195ef8b31fce1983a395a88ca12801b.zip
[ARM64_DYNAREC] Added 0F D1/D2 opcode
-rw-r--r--src/dynarec/arm64/arm64_emitter.h25
-rw-r--r--src/dynarec/arm64/arm64_printer.c15
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c51
3 files changed, 80 insertions, 11 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 98708b1a..8d1c28a6 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1446,14 +1446,23 @@
 #define VUZP1Q_64(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b11, Rm, 0, Rn, Rt))
 #define VUZP2Q_64(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b11, Rm, 1, Rn, Rt))
 
-#define DUP_gen(Q, imm5, Rn, Rd)    ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 1<<10 | (Rn)<<5 | (Rd))
-#define VDUP_8(Vd, Vn, idx)         EMIT(DUP_gen(0, ((idx)<<1|1), Vn, Vd))
-#define VDUPQ_8(Vd, Vn, idx)        EMIT(DUP_gen(1, ((idx)<<1|1), Vn, Vd))
-#define VDUP_16(Vd, Vn, idx)        EMIT(DUP_gen(0, ((idx)<<2|0b10), Vn, Vd))
-#define VDUPQ_16(Vd, Vn, idx)       EMIT(DUP_gen(1, ((idx)<<2|0b10), Vn, Vd))
-#define VDUP_32(Vd, Vn, idx)        EMIT(DUP_gen(0, ((idx)<<3|0b100), Vn, Vd))
-#define VDUPQ_32(Vd, Vn, idx)       EMIT(DUP_gen(1, ((idx)<<3|0b100), Vn, Vd))
-#define VDUPQ_64(Vd, Vn, idx)       EMIT(DUP_gen(1, ((idx)<<4|0b1000), Vn, Vd))
+#define DUP_element(Q, imm5, Rn, Rd)    ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 1<<10 | (Rn)<<5 | (Rd))
+#define VDUP_8(Vd, Vn, idx)         EMIT(DUP_element(0, ((idx)<<1|1), Vn, Vd))
+#define VDUPQ_8(Vd, Vn, idx)        EMIT(DUP_element(1, ((idx)<<1|1), Vn, Vd))
+#define VDUP_16(Vd, Vn, idx)        EMIT(DUP_element(0, ((idx)<<2|0b10), Vn, Vd))
+#define VDUPQ_16(Vd, Vn, idx)       EMIT(DUP_element(1, ((idx)<<2|0b10), Vn, Vd))
+#define VDUP_32(Vd, Vn, idx)        EMIT(DUP_element(0, ((idx)<<3|0b100), Vn, Vd))
+#define VDUPQ_32(Vd, Vn, idx)       EMIT(DUP_element(1, ((idx)<<3|0b100), Vn, Vd))
+#define VDUPQ_64(Vd, Vn, idx)       EMIT(DUP_element(1, ((idx)<<4|0b1000), Vn, Vd))
+
+#define DUP_general(Q, imm5, Rn, Rd)    ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 0b11<<10 | (Rn)<<5 | (Rd))
+#define VDUPB(Vd, Wn)         EMIT(DUP_general(0, 0b1, Wn, Vd))
+#define VDUPQB(Vd, Wn)        EMIT(DUP_general(1, 0b1, Wn, Vd))
+#define VDUPH(Vd, Wn)        EMIT(DUP_general(0, 0b10, Wn, Vd))
+#define VDUPQH(Vd, Wn)       EMIT(DUP_general(1, 0b10, Wn, Vd))
+#define VDUPS(Vd, Wn)        EMIT(DUP_general(0, 0b100, Wn, Vd))
+#define VDUPQS(Vd, Wn)       EMIT(DUP_general(1, 0b100, Wn, Vd))
+#define VDUPQD(Vd, Xn)       EMIT(DUP_general(1, 0b1000, Xn, Vd))
 
 // TBL
 #define TBL_gen(Q, Rm, len, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (Rm)<<16 | (len)<<13 | (op)<<12 | (Rn)<<5 | (Rd))
diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c
index 2c134dee..2806497b 100644
--- a/src/dynarec/arm64/arm64_printer.c
+++ b/src/dynarec/arm64/arm64_printer.c
@@ -1499,7 +1499,7 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
 

     // DUP

     if(isMask(opcode, "0Q001110000iiiii000001nnnnnddddd", &a)) {

-        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "2D", "??"};

+        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"};

         const char* Z[] = {"B", "H", "S", "D"};

         int sz = 3;

         if((imm&0b0001)==0b0001) sz=0;

@@ -1511,7 +1511,18 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "DUP V%d.%s, V%d.%s[%d]", Rd, Vd, Rn, Vn, sh);

         return buff;

     }

-    

+

+    if(isMask(opcode, "0Q001110000iiiii000011nnnnnddddd", &a)) {

+        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"};

+        int sz = 3;

+        if((imm&0b0001)==0b0001) sz=0;

+        else if((imm&0b0011)==0b0010) sz=1;

+        else if((imm&0b0111)==0b0100) sz=2;

+        const char* Vd = Y[(sz<<1)|a.Q];

+        snprintf(buff, sizeof(buff), "DUP V%d.%s, X%d", Rd, Vd, Rn);

+        return buff;

+    }

+

     // AES

     if(isMask(opcode, "0100111000101000010f10nnnnnddddd", &a)) {

         snprintf(buff, sizeof(buff), "AES%c V%d.16B, V%d.16B", sf?'D':'E', Rd, Rn);

diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 236df733..789fe27e 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -1700,7 +1700,56 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             gd = xRAX+(opcode&7)+(rex.b<<3);

             REVxw(gd, gd);

             break;

-

+        case 0xD1:

+            INST_NAME("PSRLW Gm, Em");

+            nextop = F8;

+            GETGM(d0);

+            GETEM(d1, 0);

+            if(MODREG) {

+                q0 = fpu_get_scratch(dyn);

+            }

+            else {

+                q0 = d1;

+            }

+            q1 = fpu_get_scratch(dyn);

+            VMOVBto(x1, d1, 0);

+            MOVZw(x2, 16);

+            SUBSw_REG(x2, x2, x1);

+            B_MARK(cGT);

+            VMOVQDfrom(d0, 0, xZR);

+            B_NEXT_nocond;

+            MARK;

+            VDUPQS(q1, x2);

+            UXTL_16(q0, d0);

+            USHLQ_32(q0, q0, q1);

+            VUZP2Q_16(q0, q0, q0);

+            VMOVeD(d0, 0, q0, 0);

+            break;

+        case 0xD2:

+            INST_NAME("PSRLD Gm, Em");

+            nextop = F8;

+            GETGM(d0);

+            GETEM(d1, 0);

+            if(MODREG) {

+                q0 = fpu_get_scratch(dyn);

+            }

+            else {

+                q0 = d1;

+            }

+            q1 = fpu_get_scratch(dyn);

+            VMOVBto(x1, d1, 0);

+            MOVZw(x2, 32);

+            SUBSw_REG(x2, x2, x1);

+            B_MARK(cGT);

+            VMOVQDfrom(d0, 0, xZR);

+            B_NEXT_nocond;

+            MARK;

+            VDUPQD(q1, x2);

+            UXTL_32(q0, d0);

+            USHLQ_64(q0, q0, q1);

+            VUZP2Q_32(q0, q0, q0);

+            VMOVeD(d0, 0, q0, 0);

+            break;

         case 0xD3:

             INST_NAME("PSRLQ Gm,Em");

             nextop = F8;