about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-31 20:19:11 +0200
committerptitSeb <sebastien.chev@gmail.com>2021-03-31 20:19:11 +0200
commit2d666965cfeb3fe148dfb8523e57483ea0314b75 (patch)
tree0cb74d2fb239f233547990549288d39bbf5dc601
parent444d5c7580a8070752a631933d962d4fe40dedb4 (diff)
downloadbox64-2d666965cfeb3fe148dfb8523e57483ea0314b75.tar.gz
box64-2d666965cfeb3fe148dfb8523e57483ea0314b75.zip
[DYNAREC] Added 0F 70 opcode
-rwxr-xr-xsrc/dynarec/arm64_emitter.h1
-rwxr-xr-xsrc/dynarec/dynarec_arm64_0f.c62
2 files changed, 62 insertions, 1 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 92197a15..9cf9bffa 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -724,6 +724,7 @@
 #define VSSHR_32(Vd, Vn, shift)             EMIT(SHR_vector(0, 0, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd))
 
 #define EXT_vector(Q, Rm, imm4, Rn, Rd)     ((Q)<<30 | 0b101110<<24 | (Rm)<<16 | (imm4)<<11 | (Rn)<<5 | (Rd))
+#define VEXT_8(Rd, Rn, Rm, index)           EMIT(EXT_vector(0, Rm, index, Rn, Rd))
 #define VEXTQ_8(Rd, Rn, Rm, index)          EMIT(EXT_vector(1, Rm, index, Rn, Rd))
 
 // Shift Left and Insert (not touching lower part of dest)
diff --git a/src/dynarec/dynarec_arm64_0f.c b/src/dynarec/dynarec_arm64_0f.c
index 82437aa7..aef8d569 100755
--- a/src/dynarec/dynarec_arm64_0f.c
+++ b/src/dynarec/dynarec_arm64_0f.c
@@ -70,6 +70,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     uint8_t gd, ed;

     uint8_t wback, wb2;

     uint8_t eb1, eb2;

+    uint64_t tmp64u;

     int v0, v1;

     int q0, q1;

     int d0, d1;

@@ -479,7 +480,66 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VLD1_64(v0, 0, ed);

             }

             break;

-

+        case 0x70:

+            INST_NAME("PSHUFW Gm,Em,Ib");

+            nextop = F8;

+            gd = (nextop&0x38)>>3;

+            if(MODREG) {

+                u8 = F8;

+                v1 = mmx_get_reg(dyn, ninst, x1, (nextop&7));

+                v0 = mmx_get_reg_empty(dyn, ninst, x1, gd);

+                if(u8==0x4E) {

+                    if(v0==v1) {

+                        VEXT_8(v0, v0, v0, 4); // Swap Up/Lower 32bits parts

+                    } else {

+                        VMOVeS(v0, 0, v1, 1);

+                        VMOVeS(v0, 1, v1, 0);

+                    }

+                } else if(u8==0x00) {

+                    // dumplicate lower 16bits to all spot

+                    if(v0!=v1) {

+                        VMOVeH(v0, 0, v1, 0);

+                    }

+                    VMOVeH(v0, 1, v1, 0);

+                    VMOVeS(v0, 1, v1, 0);

+                } else if(v0!=v1) {

+                    VMOVeH(v0, 0, v1, (u8>>(0*2))&3);

+                    VMOVeH(v0, 1, v1, (u8>>(1*2))&3);

+                    VMOVeH(v0, 2, v1, (u8>>(2*2))&3);

+                    VMOVeH(v0, 3, v1, (u8>>(3*2))&3);

+                } else {

+                    uint64_t swp[4] = {

+                        (0)|(1<<8),

+                        (2)|(3<<8),

+                        (4)|(5<<8),

+                        (6)|(7<<8)

+                    };

+                    d0 = fpu_get_scratch(dyn);

+                    tmp64u = swp[(u8>>(0*2))&3] | (swp[(u8>>(1*2))&3]<<16);

+                    tmp64u = (swp[(u8>>(2*2))&3]<<32) | (swp[(u8>>(3*2))&3]<<48);

+                    MOV64x(x2, tmp64u);

+                    VMOVQDfrom(d0, 0, x2);

+                    VTBL1_8(v0, v1, d0);

+                }

+            } else {

+                v0 = mmx_get_reg_empty(dyn, ninst, x1, gd);

+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 1);

+                u8 = F8;

+                if (u8) {

+                    i32 = -1;

+                    for (int i=0; i<4; ++i) {

+                        int32_t idx = (u8>>(i*2))&3;

+                        if(idx!=i32) {

+                            ADDx_U12(x2, ed, idx*2);

+                            i32 = idx;

+                        }

+                        VLD1_16(v0, i, x2);

+                    }

+                } else {

+                    VLD1R_16(v0, ed);

+                }

+            }

+            break;

         case 0x71:

             nextop = F8;

             switch((nextop>>3)&7) {