about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-01-06 18:09:26 +0100
committerptitSeb <sebastien.chev@gmail.com>2022-01-06 18:09:26 +0100
commit1e010f38c901e087502412256a632d429fe6ad24 (patch)
treefb507fd21d2620b8217365fe339ba5bb8646d8fd /src
parent8260931ad733cdf6125b39ef9e8212d7fbf2bd91 (diff)
downloadbox64-1e010f38c901e087502412256a632d429fe6ad24.tar.gz
box64-1e010f38c901e087502412256a632d429fe6ad24.zip
[DYNAREC] Added 66 0F D1 opcode and improved 66 0F D2 opcode
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h9
-rwxr-xr-xsrc/dynarec/arm64_printer.c15
-rwxr-xr-xsrc/dynarec/dynarec_arm64_660f.c17
3 files changed, 36 insertions, 5 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index a063c958..b06aa319 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -1315,6 +1315,15 @@
 #define VUZP1Q_64(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b11, Rm, 0, Rn, Rt))
 #define VUZP2Q_64(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b11, Rm, 1, Rn, Rt))
 
+#define DUP_gen(Q, imm5, Rn, Rd)    ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 1<<10 | (Rn)<<5 | (Rd))
+#define VDUP_8(Vd, Vn, idx)         EMIT(DUP_gen(0, ((idx)<<1|1), Vn, Vd))
+#define VDUPQ_8(Vd, Vn, idx)        EMIT(DUP_gen(1, ((idx)<<1|1), Vn, Vd))
+#define VDUP_16(Vd, Vn, idx)        EMIT(DUP_gen(0, ((idx)<<2|0b10), Vn, Vd))
+#define VDUPQ_16(Vd, Vn, idx)       EMIT(DUP_gen(1, ((idx)<<2|0b10), Vn, Vd))
+#define VDUP_32(Vd, Vn, idx)        EMIT(DUP_gen(0, ((idx)<<3|0b100), Vn, Vd))
+#define VDUPQ_32(Vd, Vn, idx)       EMIT(DUP_gen(1, ((idx)<<3|0b100), Vn, Vd))
+#define VDUPQ_64(Vd, Vn, idx)       EMIT(DUP_gen(1, ((idx)<<4|0b1000), Vn, Vd))
+
 // TBL
 #define TBL_gen(Q, Rm, len, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (Rm)<<16 | (len)<<13 | (op)<<12 | (Rn)<<5 | (Rd))
 //Use Rm[] to pick from Rn element and store in Rd. Out-of-range element gets 0
diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c
index c19b48ea..7849db23 100755
--- a/src/dynarec/arm64_printer.c
+++ b/src/dynarec/arm64_printer.c
@@ -1311,6 +1311,21 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
             snprintf(buff, sizeof(buff), "%cSHLL%s V%d.%s, V%d.%s, #%d", a.U?'U':'S', a.Q?"2":"", Rd, Va, Rn, Vd, sh);

         return buff;

     }

+

+    // DUP

+    if(isMask(opcode, "0Q001110000iiiii000001nnnnnddddd", &a)) {

+        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "2D", "??"};

+        const char* Z[] = {"B", "H", "S", "D"};

+        int sz = 3;

+        if((imm&0b0001)==0b0001) sz=0;

+        else if((imm&0b0011)==0b0010) sz=1;

+        else if((imm&0b0111)==0b0100) sz=2;

+        int sh=imm - (1<<sz);

+        const char* Vd = Y[(sz<<1)|a.Q];

+        const char* Vn = Z[sz];

+        snprintf(buff, sizeof(buff), "DUP V%d.%s, V%d.%s[%d]", Rd, Vd, Rn, Vn, sh);

+        return buff;

+    }

     

     // AES

     if(isMask(opcode, "0100111000101000010f10nnnnnddddd", &a)) {

diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c
index ce959952..97d49ea5 100755
--- a/src/dynarec/dynarec_arm64_660f.c
+++ b/src/dynarec/dynarec_arm64_660f.c
@@ -1521,17 +1521,24 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             }

             break;

 

-

+        case 0xD1:

+            INST_NAME("PSRLW Gx,Ex");

+            nextop = F8;

+            GETGX(q0);

+            GETEX(q1, 0);

+            v0 = fpu_get_scratch(dyn);

+            VDUPQ_16(v0, q0, 0);

+            NEGQ_16(v0, v0);        // neg, because SHR

+            USHLQ_16(q0, q0, v0);   // SHR x8

+            break;

         case 0xD2:

             INST_NAME("PSRLD Gx,Ex");

             nextop = F8;

             GETGX(q0);

             GETEX(q1, 0);

             v0 = fpu_get_scratch(dyn);

-            SQSHRN_32(v0, q1, 0);   // S64x1->S32x1

-            VMOVeS(v0, 1, v0, 0);   // S32x1->S32x2

-            NEG_32(v0, v0);         // neg, because SHR

-            VMOVeD(v0, 1, v0, 0);   // S32x2->S32x4

+            VDUPQ_32(v0, q0, 0);

+            NEGQ_32(v0, v0);        // neg, because SHR

             USHLQ_32(q0, q0, v0);   // SHR x4

             break;

         case 0xD3: