about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h26
-rwxr-xr-xsrc/dynarec/dynarec_arm64_660f.c16
2 files changed, 41 insertions, 1 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index e143f4f4..23e45fad 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -1033,7 +1033,7 @@
 // FMAXNM NaN vs Number: number is picked
 #define FMAXNMD(Dd, Dn, Dm)         EMIT(FMINMAX_scalar(0b01, Dm, 0b10, Dn, Dd))
 
-// ZIP
+// ZIP / UZP
 #define ZIP_gen(Q, size, Rm, op, Rn, Rd)    ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<14 | 0b11<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
 #define VZIP1Q_8(Rt, Rn, Rm)        EMIT(ZIP_gen(1, 0b00, Rm, 0, Rn, Rt))
 #define VZIP2Q_8(Rt, Rn, Rm)        EMIT(ZIP_gen(1, 0b00, Rm, 1, Rn, Rt))
@@ -1050,6 +1050,22 @@
 #define VZIP1Q_64(Rt, Rn, Rm)       EMIT(ZIP_gen(1, 0b11, Rm, 0, Rn, Rt))
 #define VZIP2Q_64(Rt, Rn, Rm)       EMIT(ZIP_gen(1, 0b11, Rm, 1, Rn, Rt))
 
+#define UZP_gen(Q, size, Rm, op, Rn, Rd)    ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<14 | 0b01<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
+#define VUZP1Q_8(Rt, Rn, Rm)        EMIT(UZP_gen(1, 0b00, Rm, 0, Rn, Rt))
+#define VUZP2Q_8(Rt, Rn, Rm)        EMIT(UZP_gen(1, 0b00, Rm, 1, Rn, Rt))
+#define VUZP1_8(Rt, Rn, Rm)         EMIT(UZP_gen(0, 0b00, Rm, 0, Rn, Rt))
+#define VUZP2_8(Rt, Rn, Rm)         EMIT(UZP_gen(0, 0b00, Rm, 1, Rn, Rt))
+#define VUZP1Q_16(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b01, Rm, 0, Rn, Rt))
+#define VUZP2Q_16(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b01, Rm, 1, Rn, Rt))
+#define VUZP1_16(Rt, Rn, Rm)        EMIT(UZP_gen(0, 0b01, Rm, 0, Rn, Rt))
+#define VUZP2_16(Rt, Rn, Rm)        EMIT(UZP_gen(0, 0b01, Rm, 1, Rn, Rt))
+#define VUZP1Q_32(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b10, Rm, 0, Rn, Rt))
+#define VUZP2Q_32(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b10, Rm, 1, Rn, Rt))
+#define VUZP1_32(Rt, Rn, Rm)        EMIT(UZP_gen(0, 0b10, Rm, 0, Rn, Rt))
+#define VUZP2_32(Rt, Rn, Rm)        EMIT(UZP_gen(0, 0b10, Rm, 1, Rn, Rt))
+#define VUZP1Q_64(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b11, Rm, 0, Rn, Rt))
+#define VUZP2Q_64(Rt, Rn, Rm)       EMIT(UZP_gen(1, 0b11, Rm, 1, Rn, Rt))
+
 // TBL
 #define TBL_gen(Q, Rm, len, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (Rm)<<16 | (len)<<13 | (op)<<12 | (Rn)<<5 | (Rd))
 //Use Rm[] to pick from Rn element and store in Rd. Out-of-range element gets 0
@@ -1138,5 +1154,13 @@
 #define VCHIQQ_32(Rd, Rn, Rm)       EMIT(CMG_vector(1, 1, 0b10, 0, Rm, Rn, Rd))
 #define VCHIQQ_64(Rd, Rn, Rm)       EMIT(CMG_vector(1, 1, 0b11, 0, Rm, Rn, Rd))
 
+// UMULL / SMULL
+#define MULL_vector(Q, U, size, Rm, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b1100<<12 |(Rn)<<5 |(Rd))
+#define VUMULL_8(Rd, Rn, Rm)        EMIT(MULL_vector(0, 1, 0b00, Rm, Rn, Rd))
+#define VUMULL_16(Rd, Rn, Rm)       EMIT(MULL_vector(0, 1, 0b01, Rm, Rn, Rd))
+#define VUMULL_32(Rd, Rn, Rm)       EMIT(MULL_vector(0, 1, 0b10, Rm, Rn, Rd))
+#define VUMULL2_8(Rd, Rn, Rm)       EMIT(MULL_vector(1, 1, 0b00, Rm, Rn, Rd))
+#define VUMULL2_16(Rd, Rn, Rm)      EMIT(MULL_vector(1, 1, 0b01, Rm, Rn, Rd))
+#define VUMULL2_32(Rd, Rn, Rm)      EMIT(MULL_vector(1, 1, 0b10, Rm, Rn, Rd))
 
 #endif  //__ARM64_EMITTER_H__
diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c
index 60be939c..ed7123c4 100755
--- a/src/dynarec/dynarec_arm64_660f.c
+++ b/src/dynarec/dynarec_arm64_660f.c
@@ -797,6 +797,22 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             }

             break;

 

+        case 0xF4:

+            INST_NAME("PMULUDQ Gx,Ex");

+            nextop = F8;

+            GETGX(v0);

+            GETEX(v1, 0);

+            q0 = fpu_get_scratch(dyn);

+            VUZP1Q_32(q0, v0, v0);  //A3 A2 A1 A0 -> A3 A1 A2 A0

+            if(MODREG) {

+                q1 = fpu_get_scratch(dyn);

+            } else {

+                q1 = v1;

+            }

+            VUZP1Q_32(q1, v1, v1);

+            VUMULL_32(v0, q0, q1);

+            break;

+

         default:

             DEFAULT;

     }