about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h59
-rwxr-xr-xsrc/dynarec/arm64_printer.c68
-rwxr-xr-xsrc/dynarec/dynarec_arm64_660f.c58
3 files changed, 170 insertions, 15 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 08117512..9bf56a5b 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -517,13 +517,13 @@
 // VLDR
 #define VMEM_gen(size, opc, imm12, Rn, Rt)  ((size)<<30 | 0b111<<27 | 1<<26 | 0b01<<24 | (opc)<<22 | (imm12)<<10 | (Rn)<<5 | (Rt))
 // imm15 must be 3-aligned
-#define VLDR64_U12(Dt, Rn, imm15)           EMIT(VMEM_gen(0b11, 0b01, ((imm15)>>3)&0xfff, Rn, Dt))
+#define VLDR64_U12(Dt, Rn, imm15)           EMIT(VMEM_gen(0b11, 0b01, ((uint32_t)((imm15)>>3))&0xfff, Rn, Dt))
 // imm16 must be 4-aligned
-#define VLDR128_U12(Qt, Rn, imm16)          EMIT(VMEM_gen(0b11, 0b11, ((imm16)>>4)&0xfff, Rn, Qt))
-// imm15 must be 3-aligned
-#define VSTR64_U12(Dt, Rn, imm15)           EMIT(VMEM_gen(0b11, 0b00, ((imm15)>>3)&0xfff, Rn, Dt))
+#define VLDR128_U12(Qt, Rn, imm16)          EMIT(VMEM_gen(0b00, 0b11, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt))
+// (imm15) must be 3-aligned
+#define VSTR64_U12(Dt, Rn, imm15)           EMIT(VMEM_gen(0b11, 0b00, ((uint32_t)(imm15>>3))&0xfff, Rn, Dt))
 // imm16 must be 4-aligned
-#define VSTR128_U12(Qt, Rn, imm16)          EMIT(VMEM_gen(0b11, 0b10, ((imm16)>>4)&0xfff, Rn, Qt))
+#define VSTR128_U12(Qt, Rn, imm16)          EMIT(VMEM_gen(0b00, 0b10, ((uint32_t)((imm16)>>4))&0xfff, Rn, Qt))
 
 #define VMEMW_gen(size, opc, imm9, op2, Rn, Rt)  ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt))
 #define VLDR64_S9_postindex(Rt, Rn, imm9)   EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b01, Rn, Rt))
@@ -547,5 +547,54 @@
 #define VSTR128_REG(Qt, Rn, Rm)             EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt))
 #define VSTR128_REG_LSL4(Qt, Rn, Rm)        EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 1, Rn, Dt))
 
+// LOGIC
+#define VLOGIC_gen(Q, opc2, Rm, Rn, Rd)     ((Q)<<30 | 1<<29 | 0b01110<<24 | (opc2)<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd))
+#define VEORQ(Vd, Vn, Vm)                   EMIT(VLOGIC_gen(1, 0b00, Vm, Vn, Vd))
+#define VEOR(Vd, Vn, Vm)                    EMIT(VLOGIC_gen(0, 0b00, Vm, Vn, Vd))
+
+// FMOV
+#define FMOV_general(sf, type, mode, opcode, Rn, Rd)    ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (mode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd))
+// 32-bit to single-precision
+#define FMOVSw(Sd, Wn)                      EMIT(FMOV_general(0, 0b00, 0b00, 0b111, Wn, Sd))
+// Single-precision to 32-bit
+#define FMOVwS(Wd, Sn)                      EMIT(FMOV_general(0, 0b00, 0b00, 0b110, Sn, Wd))
+// 64-bit to double-precision
+#define FMOVDx(Dd, Xn)                      EMIT(FMOV_general(1, 0b01, 0b00, 0b111, Xn, Dd))
+// 64-bit to top half of 128-bit
+#define FMOVD1x(Vd, Xn)                     EMIT(FMOV_general(1, 0b10, 0b01, 0b111, Xn, Vd))
+// Double-precision to 64-bit
+#define FMOVxD(Xd, Dn)                      EMIT(FMOV_general(1, 0b01, 0b00, 0b110, Dn, Xd))
+// Top half of 128-bit to 64-bit
+#define FMOVxD1(Xd, Vn)                     EMIT(FMOV_general(1, 0b10, 0b01, ob110, Vn, Xd))
+
+#define FMOV_register(type, Rn, Rd)         (0b11110<<24 | (type)<<22 | 1<<21 | (Rn)<<5 | (Rd))
+#define FMOVS(Sd, Sn)                       EMIT(FMOV_register, 0b00, Sn, Sd)
+#define FMOVD(Dd, Dn)                       EMIT(FMOV_register, 0b01, Dn, Dd)
+
+// VMOV
+#define VMOV_element(imm5, imm4, Rn, Rd)    (1<<30 | 1<<29 | 0b01110000<<21 | (imm5)<<16 | (imm4)<<11 | 1<<10 | (Rn)<<5 | (Rd))
+#define VMOVeB(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<1) | 1, i2, Vn, Vd))
+#define VMOVeH(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<2) | 2, i2<<1, Vn, Vd))
+#define VMOVeS(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<3) | 4, i2<<2, Vn, Vd))
+#define VMOVeD(Vd, i1, Vn, i2)              EMIT(VMOV_element(((i1)<<4) | 8, i2<<3, Vn, Vd))
+
+#define VMOV_from(imm5, Rn, Rd)     (1<<30 | 0<<29 | 0b01110000<<21 | (imm5)<<16 | 0b0011<<11 | 1<<10 | (Rn)<<5 | (Rd))
+#define VMOVQBfrom(Vd, index, Wn)    EMIT(VMOV_from(((index)<<1) | 1, Wn, Vd))
+#define VMOVQHfrom(Vd, index, Wn)    EMIT(VMOV_from(((index)<<2) | 2, Wn, Vd))
+#define VMOVQSfrom(Vd, index, Wn)    EMIT(VMOV_from(((index)<<3) | 4, Wn, Vd))
+#define VMOVQDfrom(Vd, index, Xn)    EMIT(VMOV_from(((index)<<4) | 8, Xn, Vd))
+
+#define UMOV_gen(Q, imm5, Rn, Rd)   ((Q)<<30 | 0b01110000<<21 | (imm5)<<16 | 0b01<<13 | 1<<12 | 1<<11 | 1<<10 | (Rn)<<5 | (Rd))
+#define VMOVQDto(Xd, Vn, index)     EMIT(UMOV_gen(1, ((index)<<4) | 8, Vn, Xd))
+#define VMOVBto(Wd, Vn, index)      EMIT(UMOV_gen(0, ((index)<<1) | 1, Vn, Wd))
+#define VMOVHto(Wd, Vn, index)      EMIT(UMOV_gen(0, ((index)<<2) | 2, Vn, Wd))
+#define VMOVSto(Wd, Vn, index)      EMIT(UMOV_gen(0, ((index)<<3) | 4, Vn, Wd))
+
+// VORR
+#define ORR_vector(Q, Rm, Rn, Rd)   ((Q)<<30 | 0b01110<<24 | 0b10<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd))
+#define VORRQ(Vd, Vn, Vm)           EMIT(ORR_vector(1, Vm, Vn, Vd))
+#define VORR(Dd, Dn, Dm)            EMIT(ORR_vector(0, Dm, Dn, Dd))
+#define VMOVQ(Vd, Vn)               EMIT(ORR_vector(1, Vn, Vn, Vd))
+#define VMOV(Dd, Dn)                EMIT(ORR_vector(0, Dn, Dn, Dd))
 
 #endif  //__ARM64_EMITTER_H__
diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c
index 00fd4476..ed179df0 100755
--- a/src/dynarec/arm64_printer.c
+++ b/src/dynarec/arm64_printer.c
@@ -16,7 +16,7 @@ static const char* conds[] = {"cEQ", "cNE", "cCS", "cCC", "cMI", "cPL", "cVS", "
 #define abs(A) (((A)<0)?(-(A)):(A))

 

 typedef struct arm64_print_s {

-    int N, S, U, L;

+    int N, S, U, L, Q;

     int t, n, m, d, t2, a;

     int f, c, o, h, p;

     int i, r, s;

@@ -61,6 +61,7 @@ int isMask(uint32_t opcode, const char* mask, arm64_print_t *a)
             case 'S': a->S = (a->S<<1) | v; break;

             case 'U': a->U = (a->U<<1) | v; break;

             case 'L': a->L = (a->L<<1) | v; break;

+            case 'Q': a->Q = (a->Q<<1) | v; break;

             case 't': a->t = (a->t<<1) | v; break;

             case '2': a->t2 = (a->t2<<1) | v; break;

             case 'n': a->n = (a->n<<1) | v; break;

@@ -113,6 +114,7 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
     #define cond a.c

     #define immr a.r

     #define imms a.s

+    #define opc a.c

     if(isMask(opcode, "11010101000000110010000000011111", &a)) {

         snprintf(buff, sizeof(buff), "NOP");

         return buff;

@@ -704,6 +706,70 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         return buff;

     }

 

+    //  ----------- NEON / FPU

+

+    // VORR

+    if(isMask(opcode, "0Q001110101mmmmm000111nnnnnddddd", &a)) {

+        char q = a.Q?'Q':'D';

+        if(Rn==Rm)

+            snprintf(buff, sizeof(buff), "VMOV %c%d, %c%d", q, Rd, q, Rn);

+        else

+            snprintf(buff, sizeof(buff), "VORR %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm);

+        return buff;

+    }

+

+    // VEOR

+    if(isMask(opcode, "0Q101110001mmmmm000111nnnnnddddd", &a)) {

+        char q = a.Q?'Q':'D';

+        snprintf(buff, sizeof(buff), "VEOR %c%d, %c%d, %c%d", q, Rd, q, Rn, q, Rm);

+        return buff;

+    }

+

+    // INS

+    if(isMask(opcode, "01101110000rrrrr0ssss1nnnnnddddd", &a)) {

+        char s = '?';

+        int idx1=0, idx2=0;

+        if(immr&1) {s='B'; idx1=(immr)>>1; idx2 = imms; }

+        else if((immr&3)==2) {s='H'; idx1=(immr)>>2; idx2=(imms)>>1;}

+        else if((immr&7)==4) {s='S'; idx1=(immr)>>3; idx2=(imms)>>2;}

+        else if((immr&15)==8) {s='D'; idx1=(immr)>>4; idx2=(imms)>>3;}

+        snprintf(buff, sizeof(buff), "INS V%d.%c[%d], V%d.%c[%d]", Rd, s, idx1, Rn, s, idx2);

+        return buff;

+    }

+    if(isMask(opcode, "01001110000rrrrr000111nnnnnddddd", &a)) {

+        char s = '?', R = 0;

+        int idx1=0;

+        if(immr&1) {s='B'; idx1=(immr)>>1; }

+        else if((immr&3)==2) {s='H'; idx1=(immr)>>2;}

+        else if((immr&7)==4) {s='S'; idx1=(immr)>>3;}

+        else if((immr&15)==8) {s='D'; idx1=(immr)>>4; R=1;}

+        snprintf(buff, sizeof(buff), "INS V%d.%c[%d], %s", Rd, s, idx1, R?Xt[Rn]:Wt[Rn]);

+        return buff;

+    }

+

+    // LDR / STR

+    if(isMask(opcode, "ss111101cciiiiiiiiiiiinnnnnttttt", &a)) {

+        char s = '?';

+        int size=imms;

+        int op=0;

+        if(size==0 && opc==1) {s='B';}

+        else if(size==1 && opc==1) {s='H';}

+        else if(size==2 && opc==1) {s='S';}

+        else if(size==3 && opc==1) {s='D';}

+        else if(size==0 && opc==3) {s='Q'; size = 4;}

+        else if(size==0 && opc==0) {s='B'; op=1;}

+        else if(size==1 && opc==0) {s='H'; op=1;}

+        else if(size==2 && opc==0) {s='S'; op=1;}

+        else if(size==3 && opc==0) {s='D'; op=1;}

+        else if(size==0 && opc==2) {s='Q'; op=1; size = 4;}

+

+        int offset = imm<<size;

+        if(!offset)

+            snprintf(buff, sizeof(buff), "%s %c%d, [%s]", op?"STR":"LDR", s, Rt, XtSp[Rn]);

+        else

+            snprintf(buff, sizeof(buff), "%s %c%d, [%s, %d]", op?"STR":"LDR", s, Rt, XtSp[Rn], offset);

+        return buff;

+    }

 

     snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode));

     return buff;

diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c
index 6dba108f..7dafc606 100755
--- a/src/dynarec/dynarec_arm64_660f.c
+++ b/src/dynarec/dynarec_arm64_660f.c
@@ -23,16 +23,16 @@
 #include "dynarec_arm64_helper.h"

 

 // Get EX as a quad

-#define GETEX(a)                \

-    if(MODREG) {   \

-        a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));  \

-    } else {                    \

-        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0); \

-        a = fpu_get_scratch_quad(dyn); \

-        VLD1Q_8(a, ed);       \

+#define GETEX(a, D)                                                                                     \

+    if(MODREG) {                                                                                        \

+        a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));                                         \

+    } else {                                                                                            \

+        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, (1<<4)-1, rex, 0, D);  \

+        a = fpu_get_scratch_quad(dyn);                                                                  \

+        VLDR128_U12(a, ed);                                                                             \

     }

-#define GETGX(a)    \

-    gd = ((nextop&0x38)>>3)+(rex.r<<3);  \

+#define GETGX(a)                        \

+    gd = ((nextop&0x38)>>3)+(rex.r<<3); \

     a = sse_get_reg(dyn, ninst, x1, gd)

 

 uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)

@@ -86,6 +86,46 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         GOCOND(0x40, "CMOV", "Gw, Ew");

         #undef GO

 

+        case 0x6E:

+            INST_NAME("MOVD Gx, Ed");

+            nextop = F8;

+            gd = ((nextop&0x38)>>3)+(rex.r<<3);

+            GETED(0);

+            v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

+            VEORQ(v0, v0, v0); // RAZ vector

+            if(rex.w) {

+                VMOVQDfrom(v0, 0, ed);

+            } else {

+                VMOVQSfrom(v0, 0, ed);

+            }

+            break;

+

+        case 0x7E:

+            INST_NAME("MOVD Ed,Gx");

+            nextop = F8;

+            gd = ((nextop&0x38)>>3)+(rex.r<<3);

+            v0 = sse_get_reg(dyn, ninst, x1, gd);

+            if(rex.w) {

+                if((nextop&0xC0)==0xC0) {

+                    ed = xRAX + (nextop&7) + (rex.b<<3);

+                    VMOVQDto(ed, v0, 0);

+                } else {

+                    VMOVQDto(x2, v0, 0); // to avoid Bus Error, using regular store

+                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0);

+                    STRx_U12(x2, ed, fixedaddress);

+                }

+            } else {

+                if((nextop&0xC0)==0xC0) {

+                    ed = xRAX + (nextop&7) + (rex.b<<3);

+                    VMOVSto(ed, v0, 0);

+                } else {

+                    VMOVSto(x2, v0, 0); // to avoid Bus Error, using regular store

+                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);

+                    STRw_U12(x2, ed, fixedaddress);

+                }

+            }

+            break;

+

         case 0xA3:

             INST_NAME("BT Ew, Gw");

             SETFLAGS(X_CF, SF_SET);