about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h46
-rwxr-xr-xsrc/dynarec/dynarec_arm64_660f.c56
2 files changed, 102 insertions, 0 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 069d65d2..9b347af7 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -614,6 +614,21 @@
 #define VSTR128_REG(Qt, Rn, Rm)             EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt))
 #define VSTR128_REG_LSL4(Qt, Rn, Rm)        EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 1, Rn, Dt))
 
+#define LD1R_gen(Q, size, Rn, Rt)           ((Q)<<30 | 0b0011010<<23 | 1<<22 | 0<<21 | 0b110<<13 | (size)<<10 | (Rn)<<5 | (Rt))
+#define VLDQ1R_8(Rt, Rn)                    EMIT(LD1R_gen(1, 0b00, Rn, Rt))
+#define VLDQ1R_16(Rt, Rn)                   EMIT(LD1R_gen(1, 0b01, Rn, Rt))
+#define VLDQ1R_32(Rt, Rn)                   EMIT(LD1R_gen(1, 0b10, Rn, Rt))
+#define VLDQ1R_64(Rt, Rn)                   EMIT(LD1R_gen(1, 0b11, Rn, Rt))
+#define VLD1R_8(Rt, Rn)                     EMIT(LD1R_gen(0, 0b00, Rn, Rt))
+#define VLD1R_16(Rt, Rn)                    EMIT(LD1R_gen(0, 0b01, Rn, Rt))
+#define VLD1R_32(Rt, Rn)                    EMIT(LD1R_gen(0, 0b10, Rn, Rt))
+
+#define LD1_single(Q, opcode, S, size, Rn, Rt)  ((Q)<<30 | 0b0011010<<23 | 1<<22 | 0<<21 | (opcode)<<13 | (S)<<12 | (size)<<10 | (Rn)<<5 | (Rt))
+#define VLD1_8(Rt, index, Rn)               EMIT(LD1_single(((index)>>3)&1, 0b000, ((index)>>2)&1, (index)&3, Rn, Rt))
+#define VLD1_16(Rt, index, Rn)              EMIT(LD1_single(((index)>>2)&1, 0b010, ((index)>>1)&1, ((index)&1)<<1, Rn, Rt))
+#define VLD1_32(Rt, index, Rn)              EMIT(LD1_single(((index)>>1)&1, 0b100, ((index))&1, 0b00, Rn, Rt))
+#define VLD1_64(Rt, index, Rn)              EMIT(LD1_single(((index))&1, 0b100, 0, 0b01, Rn, Rt))
+
 // LOGIC
 #define VLOGIC_gen(Q, opc2, Rm, Rn, Rd)     ((Q)<<30 | 1<<29 | 0b01110<<24 | (opc2)<<22 | 1<<21 | (Rm)<<16 | 0b00011<<11 | 1<<10 | (Rn)<<5 | (Rd))
 #define VEORQ(Vd, Vn, Vm)                   EMIT(VLOGIC_gen(1, 0b00, Vm, Vn, Vd))
@@ -941,4 +956,35 @@
 #define VZIP1Q_64(Rt, Rn, Rm)       EMIT(ZIP_gen(1, 0b11, Rm, 0, Rn, Rt))
 #define VZIP2Q_64(Rt, Rn, Rm)       EMIT(ZIP_gen(1, 0b11, Rm, 1, Rn, Rt))
 
+// TBL
+#define TBL_gen(Q, Rm, len, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (Rm)<<16 | (len)<<13 | (op)<<12 | (Rn)<<5 | (Rd))
+//Use Rm[] to pick from Rn element and store in Rd. Out-of-range element gets 0
+#define VTBLQ1_8(Rd, Rn, Rm)        EMIT(TBL_gen(1, Rm, 0b00, 0, Rn, Rd))
+//Use Rm[] to pick from Rn, Rn+1 element and store in Rd. Out-of-range element gets 0
+#define VTBLQ2_8(Rd, Rn, Rm)        EMIT(TBL_gen(1, Rm, 0b01, 0, Rn, Rd))
+//Use Rm[] to pick from Rn, Rn+1, Rn+2 element and store in Rd. Out-of-range element gets 0
+#define VTBLQ3_8(Rd, Rn, Rm)        EMIT(TBL_gen(1, Rm, 0b10, 0, Rn, Rd))
+//Use Rm[] to pick from Rn, Rn+1, Rn+2, Rn+3 element and store in Rd. Out-of-range element gets 0
+#define VTBLQ4_8(Rd, Rn, Rm)        EMIT(TBL_gen(1, Rm, 0b11, 0, Rn, Rd))
+//Use Rm[] to pick from Rn element and store in Rd. Out-of-range element stay untouched
+#define VTBXQ1_8(Rd, Rn, Rm)        EMIT(TBL_gen(1, Rm, 0b00, 0, Rn, Rd))
+//Use Rm[] to pick from Rn, Rn+1 element and store in Rd. Out-of-range element stay untouched
+#define VTBXQ2_8(Rd, Rn, Rm)        EMIT(TBL_gen(1, Rm, 0b01, 0, Rn, Rd))
+//Use Rm[] to pick from Rn, Rn+1, Rn+2 element and store in Rd. Out-of-range element stay untouched
+#define VTBXQ3_8(Rd, Rn, Rm)        EMIT(TBL_gen(1, Rm, 0b10, 0, Rn, Rd))
+//Use Rm[] to pick from Rn, Rn+1, Rn+2, Rn+3 element and store in Rd. Out-of-range element stay untouched
+#define VTBXQ4_8(Rd, Rn, Rm)        EMIT(TBL_gen(1, Rm, 0b11, 0, Rn, Rd))
+
+// TRN
+#define TRN_gen(Q, size, Rm, op, Rn, Rd)    ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<15 | 0b10<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
+#define VTRNQ1_64(Rd, Rn, Rm)       EMIT(TRN_gen(1, 0b11, Rm, 0, Rn, Rm))
+#define VTRNQ1_32(Rd, Rn, Rm)       EMIT(TRN_gen(1, 0b10, Rm, 0, Rn, Rm))
+#define VTRNQ1_16(Rd, Rn, Rm)       EMIT(TRN_gen(1, 0b01, Rm, 0, Rn, Rm))
+#define VTRNQ1_8(Rd, Rn, Rm)        EMIT(TRN_gen(1, 0b00, Rm, 0, Rn, Rm))
+#define VSWP(Rd, Rn)                VTRNQ1_64(Rd, Rn, Rn)
+#define VTRNQ2_64(Rd, Rn, Rm)       EMIT(TRN_gen(1, 0b11, Rm, 1, Rn, Rm))
+#define VTRNQ2_32(Rd, Rn, Rm)       EMIT(TRN_gen(1, 0b10, Rm, 1, Rn, Rm))
+#define VTRNQ2_16(Rd, Rn, Rm)       EMIT(TRN_gen(1, 0b01, Rm, 1, Rn, Rm))
+#define VTRNQ2_8(Rd, Rn, Rm)        EMIT(TRN_gen(1, 0b00, Rm, 1, Rn, Rm))
+
 #endif  //__ARM64_EMITTER_H__
diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c
index bdf29b01..4905ee36 100755
--- a/src/dynarec/dynarec_arm64_660f.c
+++ b/src/dynarec/dynarec_arm64_660f.c
@@ -223,6 +223,62 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 VLDR128_U12(v0, ed, fixedaddress);

             }

             break;

+        case 0x70:

+            INST_NAME("PSHUFD Gx,Ex,Ib");

+            nextop = F8;

+            GETG;

+            i32 = -1;

+            v0 = sse_get_reg(dyn, ninst, x1, gd);

+            if(MODREG) {

+                u8 = F8;

+                v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));

+                if(u8==0x4E) {

+                    if(v0==v1) {

+                        VSWP(v0, v0+1);

+                    } else {

+                        VMOVeD(v0, 0, v1, 1);

+                        VMOVeD(v0, 1, v1, 0);

+                    }

+                } else {

+                    uint32_t swp[4] = {

+                        (0)|(1<<8)|(2<<16)|(3<<24),

+                        (4)|(5<<8)|(6<<16)|(7<<24),

+                        (8)|(9<<8)|(10<<16)|(11<<24),

+                        (12)|(13<<8)|(14<<16)|(15<<24)

+                    };

+                    d0 = fpu_get_scratch(dyn);

+                    if(v0==v1) {

+                        q1 = fpu_get_scratch(dyn);

+                        VMOVQ(q1, v1);

+                    } else

+                        q1 = v1;

+                    MOV32w(x2, swp[(u8>>(0*2))&3]);

+                    MOV32w(x3, swp[(u8>>(1*2))&3]);

+                    VMOVQSfrom(d0, 0, x2);

+                    VMOVQSfrom(d0, 1, x3);

+                    MOV32w(x2, swp[(u8>>(2*2))&3]);

+                    MOV32w(x3, swp[(u8>>(3*2))&3]);

+                    VMOVQSfrom(d0, 2, x2);

+                    VMOVQSfrom(d0, 3, x3);

+                    VTBLQ1_8(v0, v1, d0);

+                }

+            } else {

+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 1);

+                u8 = F8;

+                if (u8) {

+                    for (int i=0; i<4; ++i) {

+                        int32_t idx = (u8>>(i*2))&3;

+                        if(idx!=i32) {

+                            ADDx_U12(x2, ed, idx*4);

+                            i32 = idx;

+                        }

+                        VLD1_32(v0, i, x2);

+                    }

+                } else {

+                    VLD1R_32(v0, ed);

+                }

+            }

+            break;

 

         case 0x7E:

             INST_NAME("MOVD Ed,Gx");