about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-31 16:26:34 +0200
committerptitSeb <sebastien.chev@gmail.com>2021-03-31 16:26:34 +0200
commit139509024113e7303fc8c55854d1509eadf52da5 (patch)
treed33b7b0b2df19452d2def6d50728ec98e0683736 /src
parentdc938e8a60c7a466bb8b25fc465e6dedaff4b183 (diff)
downloadbox64-139509024113e7303fc8c55854d1509eadf52da5.tar.gz
box64-139509024113e7303fc8c55854d1509eadf52da5.zip
[DYNAREC] Added 66 0F 38 04 opcode
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h52
-rwxr-xr-xsrc/dynarec/arm64_printer.c56
-rwxr-xr-xsrc/dynarec/dynarec_arm64_660f.c19
3 files changed, 126 insertions, 1 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 364dda4d..a3394276 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -629,6 +629,20 @@
 // imm16 must be 4-aligned
 #define VSTR128_U12(Qt, Rn, imm16)          EMIT(VMEM_gen(0b00, 0b10, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt))
 
+#define VMEMUR_vector(size, opc, imm9, Rn, Rt)  ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (Rn)<<5 | (Rt))
+// signed offset, no alignement!
+#define VLDR8_I9(Vt, Rn, imm9)              EMIT(VMEMUR(0b00, 0b01, (imm9)&0b111111111, Rn, Vt))
+#define VLDR16_I9(Vt, Rn, imm9)             EMIT(VMEMUR(0b01, 0b01, (imm9)&0b111111111, Rn, Vt))
+#define VLDR32_I9(Vt, Rn, imm9)             EMIT(VMEMUR(0b10, 0b01, (imm9)&0b111111111, Rn, Vt))
+#define VLDR64_I9(Vt, Rn, imm9)             EMIT(VMEMUR(0b11, 0b01, (imm9)&0b111111111, Rn, Vt))
+#define VLDR128_I9(Vt, Rn, imm9)            EMIT(VMEMUR(0b00, 0b11, (imm9)&0b111111111, Rn, Vt))
+// signed offset, no alignement!
+#define VSTR8_I9(Vt, Rn, imm9)              EMIT(VMEMUR(0b00, 0b00, (imm9)&0b111111111, Rn, Vt))
+#define VSTR16_I9(Vt, Rn, imm9)             EMIT(VMEMUR(0b01, 0b00, (imm9)&0b111111111, Rn, Vt))
+#define VSTR32_I9(Vt, Rn, imm9)             EMIT(VMEMUR(0b10, 0b00, (imm9)&0b111111111, Rn, Vt))
+#define VSTR64_I9(Vt, Rn, imm9)             EMIT(VMEMUR(0b11, 0b00, (imm9)&0b111111111, Rn, Vt))
+#define VSTR128_I9(Vt, Rn, imm9)            EMIT(VMEMUR(0b00, 0b10, (imm9)&0b111111111, Rn, Vt))
+
 #define VMEMW_gen(size, opc, imm9, op2, Rn, Rt)  ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt))
 #define VLDR64_S9_postindex(Rt, Rn, imm9)   EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b01, Rn, Rt))
 #define VLDR64_S9_preindex(Rt, Rn, imm9)    EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b11, Rn, Rt))
@@ -1243,6 +1257,15 @@
 #define VUMULL2_16(Rd, Rn, Rm)      EMIT(MULL_vector(1, 1, 0b01, Rm, Rn, Rd))
 #define VUMULL2_32(Rd, Rn, Rm)      EMIT(MULL_vector(1, 1, 0b10, Rm, Rn, Rd))
 
+// MUL
+#define MUL_vector(Q, size, Rm, Rn, Rd)     ((Q)<<30 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10011<<11 | 1<<10 | (Rn)<<5 | (Rd))
+#define VMUL_8(Vd, Vn, Vm)          EMIT(MUL_vector(0, 0b00, Vm, Vn, Vd))
+#define VMUL_16(Vd, Vn, Vm)         EMIT(MUL_vector(0, 0b01, Vm, Vn, Vd))
+#define VMUL_32(Vd, Vn, Vm)         EMIT(MUL_vector(0, 0b10, Vm, Vn, Vd))
+#define VMULQ_8(Vd, Vn, Vm)         EMIT(MUL_vector(1, 0b00, Vm, Vn, Vd))
+#define VMULQ_16(Vd, Vn, Vm)        EMIT(MUL_vector(1, 0b01, Vm, Vn, Vd))
+#define VMULQ_32(Vd, Vn, Vm)        EMIT(MUL_vector(1, 0b10, Vm, Vn, Vd))
+
 // Absolute Difference
 #define AD_vector(Q, U, size, Rm, ac, Rn, Rd)   ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b0111<<12 | (ac)<<11 | 1<<10 | (Rn)<<5 | (Rd))
 // Signed Absolute Difference and accumulate
@@ -1321,4 +1344,33 @@
 #define MOVIQ_8(Rd, imm8)           EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd))
 #define MOVI_8(Rd, imm8)            EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd))
 
+// SHLL and eXtend Long
+#define SHLL_vector(Q, U, immh, immb, Rn, Rd)  ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b10100<<11 | 1<<10 | (Rn)<<5 | (Rd))
+#define USHLL2_8(Vd, Vn, imm)       EMIT(SHLL_vector(1, 1, 0b0001, (imm)&0x7, Vn, Vd))
+#define USHLL_8(Vd, Vn, imm)        EMIT(SHLL_vector(0, 1, 0b0001, (imm)&0x7, Vn, Vd))
+#define SSHLL2_8(Vd, Vn, imm)       EMIT(SHLL_vector(1, 0, 0b0001, (imm)&0x7, Vn, Vd))
+#define SSHLL_8(Vd, Vn, imm)        EMIT(SHLL_vector(0, 0, 0b0001, (imm)&0x7, Vn, Vd))
+#define USHLL2_16(Vd, Vn, imm)      EMIT(SHLL_vector(1, 1, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd))
+#define USHLL_16(Vd, Vn, imm)       EMIT(SHLL_vector(0, 1, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd))
+#define SSHLL2_16(Vd, Vn, imm)      EMIT(SHLL_vector(1, 0, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd))
+#define SSHLL_16(Vd, Vn, imm)       EMIT(SHLL_vector(0, 0, 0b0010|(((imm)>>3)&1), (imm)&0x7, Vn, Vd))
+#define USHLL2_32(Vd, Vn, imm)      EMIT(SHLL_vector(1, 1, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd))
+#define USHLL_32(Vd, Vn, imm)       EMIT(SHLL_vector(0, 1, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd))
+#define SSHLL2_32(Vd, Vn, imm)      EMIT(SHLL_vector(1, 0, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd))
+#define SSHLL_32(Vd, Vn, imm)       EMIT(SHLL_vector(0, 0, 0b0100|(((imm)>>3)&3), (imm)&0x7, Vn, Vd))
+
+#define UXTL_8(Vd, Vn)              USHLL_8(Vd, Vn, 0)
+#define UXTL2_8(Vd, Vn)             USHLL2_8(Vd, Vn, 0)
+#define UXTL_16(Vd, Vn)             USHLL_16(Vd, Vn, 0)
+#define UXTL2_16(Vd, Vn)            USHLL2_16(Vd, Vn, 0)
+#define UXTL_32(Vd, Vn)             USHLL_32(Vd, Vn, 0)
+#define UXTL2_32(Vd, Vn)            USHLL2_32(Vd, Vn, 0)
+
+#define SXTL_8(Vd, Vn)              SSHLL_8(Vd, Vn, 0)
+#define SXTL2_8(Vd, Vn)             SSHLL2_8(Vd, Vn, 0)
+#define SXTL_16(Vd, Vn)             SSHLL_16(Vd, Vn, 0)
+#define SXTL2_16(Vd, Vn)            SSHLL2_16(Vd, Vn, 0)
+#define SXTL_32(Vd, Vn)             SSHLL_32(Vd, Vn, 0)
+#define SXTL2_32(Vd, Vn)            SSHLL2_32(Vd, Vn, 0)
+
 #endif  //__ARM64_EMITTER_H__
diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c
index fd87692d..c47a0279 100755
--- a/src/dynarec/arm64_printer.c
+++ b/src/dynarec/arm64_printer.c
@@ -856,6 +856,13 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         return buff;

     }

 

+    // VMUL

+    if(isMask(opcode, "0Q001110ff1mmmmm100111nnnnnddddd", &a)) {

+        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"};

+        const char* Vd = Y[((sf)<<1) | a.Q];

+        snprintf(buff, sizeof(buff), "VMUL V%d.%s, V%d.%s, V%d.%s", Rd, Vd, Rn, Vd, Rm, Vd);

+        return buff;

+    }

     // CMP

     if(isMask(opcode, "0Q101110ff1mmmmm100011nnnnnddddd", &a)) {

         const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "2D"};

@@ -1157,7 +1164,9 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
     if(isMask(opcode, "0QU01110ff1000000c1010nnnnnddddd", &a)) {

         const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "??", "???"};

         const char* Vd = Y[(sf<<1) | a.Q];

-        snprintf(buff, sizeof(buff), "%cAD%cLP V%d.%s, V%d.%s", a.U?'U':'S', a.c?'A':'D', Rd, Vd, Rn, Vd);

+        const char* Z[] = {"4H", "8H", "2S", "4S", "1D", "2D", "??", "???"};

+        const char* Va = Z[(sf<<1) | a.Q];

+        snprintf(buff, sizeof(buff), "%cAD%cLP V%d.%s, V%d.%s", a.U?'U':'S', a.c?'A':'D', Rd, Va, Rn, Vd);

         return buff;

     }

     if(isMask(opcode, "0QU01110ff110000001110nnnnnddddd", &a)) {

@@ -1196,6 +1205,32 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "%s1 {V%d.%s}[%d], %s", a.L?"LD":"ST", Rd, Y[scale], idx, XtSp[Rt]);

         return buff;

     }

+    // LDUR/STUR

+    if(isMask(opcode, "ff111100cL0iiiiiiiii00nnnnnttttt", &a)) {

+        const char* Y[] = {"B", "H", "S", "D", "Q"};

+        int sz = sf;

+        if(sz==0 && a.c)

+            sz = 4;

+        int offset = signExtend(imm, 9);

+        if(!offset)

+            snprintf(buff, sizeof(buff), "%sUR %s%d, [%s]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn]);

+        else

+            snprintf(buff, sizeof(buff), "%sUR %s%d, [%s, %+d]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn], imm);

+        return buff;

+    }

+    // LDR/STR vector immediate

+    if(isMask(opcode, "ff111101cLiiiiiiiiiiiinnnnnttttt", &a)) {

+        const char* Y[] = {"B", "H", "S", "D", "Q"};

+        int sz = sf;

+        if(sz==0 && a.c)

+            sz = 4;

+        int offset = imm<<sz;

+        if(!offset)

+            snprintf(buff, sizeof(buff), "%sR %s%d, [%s]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn]);

+        else

+            snprintf(buff, sizeof(buff), "%sR %s%d, [%s, %+d]", a.L?"LD":"ST", Y[sz], Rd, XtSp[Rn], imm);

+        return buff;

+    }

 

     // (S/U)QXT(U)N

     if(isMask(opcode, "0Q101110ff100001001010nnnnnddddd", &a)) {

@@ -1215,6 +1250,25 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         return buff;

     }

 

+    // (S/U)SSHL(2) / (U/S)XTL(2)

+    if(isMask(opcode, "0QU011110hhhhiii101001nnnnnddddd", &a)) {

+        const char* Y[] = {"8B", "16B", "4H", "8H", "2S", "4S", "?", "??"};

+        const char* Z[] = {"8H", "4S", "2D", "?"};

+        int sz = 3;

+        if((a.h&0b1111)==0b0001) sz=0;

+        else if((a.h&0b1110)==0b0010) sz=1;

+        else if((a.h&0b1100)==0b0100) sz=2;

+        int sh=(((a.h)<<3)|(imm)) - (8<<sz);

+        const char* Vd = Y[(sz<<1)|a.Q];

+        const char* Va = Z[sz];

+        if(!sh)

+            snprintf(buff, sizeof(buff), "%cXTL%s V%d.%s, V%d.%s", a.U?'U':'S', a.Q?"2":"", Rd, Va, Rn, Vd);

+        else

+            snprintf(buff, sizeof(buff), "%cSHLL%s V%d.%s, V%d.%s, #%d", a.U?'U':'S', a.Q?"2":"", Rd, Va, Rn, Vd, sh);

+        return buff;

+    }

+    

+

 

     snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode));

     return buff;

diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c
index 98f44979..36071457 100755
--- a/src/dynarec/dynarec_arm64_660f.c
+++ b/src/dynarec/dynarec_arm64_660f.c
@@ -234,6 +234,25 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     VTBLQ1_8(q0, q0, d0);

                     break;

 

+                case 0x04:

+                    INST_NAME("PMADDUBSW Gx,Ex");

+                    nextop = F8;

+                    GETGX(q0);

+                    GETEX(q1, 0);

+                    v0 = fpu_get_scratch(dyn);

+                    v1 = fpu_get_scratch(dyn);

+                    UXTL_8(v0, q0);   // this is unsigned, so 0 extended

+                    SXTL_8(v1, q1);   // this is signed

+                    VMULQ_16(v0, v0, v1);

+                    SADDLPQ_16(v1, v0);

+                    UXTL2_8(v0, q0);   // this is unsigned

+                    SQXTN_16(q0, v1);   // SQXTN reset the vector so need to grab the high part first

+                    SXTL2_8(v1, q1);   // this is signed

+                    VMULQ_16(v0, v0, v1);

+                    SADDLPQ_16(v0, v0);

+                    SQXTN2_16(q0, v0);

+                    break;

+

                 default:

                     DEFAULT;

             }