about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-23 20:12:11 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-23 20:12:11 +0100
commit2487fb4d980003f567ff78bf10ba06a95bd9d1b4 (patch)
tree37ca8d8dd9f1bb59741c04fa0eca8ed22b46ac1c /src
parentde0718811e271b8971444fea68f2a04f3d9bca26 (diff)
downloadbox64-2487fb4d980003f567ff78bf10ba06a95bd9d1b4.tar.gz
box64-2487fb4d980003f567ff78bf10ba06a95bd9d1b4.zip
[DYNAREC] Added 0F 58/59 opcodes, plus lots of small bugfix (IntoTheBreach sounds good now)
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h42
-rwxr-xr-xsrc/dynarec/arm64_printer.c8
-rwxr-xr-xsrc/dynarec/dynablock.c4
-rwxr-xr-xsrc/dynarec/dynarec_arm64_00.c1
-rwxr-xr-xsrc/dynarec/dynarec_arm64_0f.c40
-rw-r--r--src/dynarec/dynarec_arm64_64.c11
-rwxr-xr-xsrc/dynarec/dynarec_arm64_66.c1
-rwxr-xr-xsrc/dynarec/dynarec_arm64_660f.c39
-rwxr-xr-xsrc/dynarec/dynarec_arm64_67.c10
-rw-r--r--src/dynarec/dynarec_arm64_f0.c18
-rwxr-xr-xsrc/dynarec/dynarec_arm64_f20f.c19
-rwxr-xr-xsrc/dynarec/dynarec_arm64_f30f.c16
-rwxr-xr-xsrc/dynarec/dynarec_arm64_functions.c3
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.c4
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.h6
-rw-r--r--src/emu/x64runf0.c2
16 files changed, 109 insertions, 115 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 47ee286f..f6eef0c8 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -645,28 +645,28 @@
 
 #define SHL_vector(Q, immh, immb, Rn, Rd)   ((Q)<<30 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01010<<11 | 1<<10 | (Rn)<<5 | (Rd))
 #define VSHLQ_8(Vd, Vn, shift)              EMIT(SHL_vector(1, 0b0001, (shift)&7, Vn, Vd))
-#define VSHLQ_16(Vd, Vn, shift)             EMIT(SHL_vector(1, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd))
-#define VSHLQ_32(Vd, Vn, shift)             EMIT(SHL_vector(1, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd))
-#define VSHLQ_64(Vd, Vn, shift)             EMIT(SHL_vector(1, 0b1000 | ((shift)>>3)&7, (shift)&7, Vn, Vd))
+#define VSHLQ_16(Vd, Vn, shift)             EMIT(SHL_vector(1, 0b0010 | (((shift)>>3)&1), (shift)&7, Vn, Vd))
+#define VSHLQ_32(Vd, Vn, shift)             EMIT(SHL_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd))
+#define VSHLQ_64(Vd, Vn, shift)             EMIT(SHL_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd))
 #define VSHL_8(Vd, Vn, shift)               EMIT(SHL_vector(0, 0b0001, (shift)&7, Vn, Vd))
 #define VSHL_16(Vd, Vn, shift)              EMIT(SHL_vector(0, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd))
-#define VSHL_32(Vd, Vn, shift)              EMIT(SHL_vector(0, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd))
+#define VSHL_32(Vd, Vn, shift)              EMIT(SHL_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd))
 
 #define SHR_vector(Q, U, immh, immb, Rn, Rd)  ((Q)<<30 | (U)<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b00000<<11 | 1<<10 | (Rn)<<5 | (Rd))
 #define VSHRQ_8(Vd, Vn, shift)              EMIT(SHR_vector(1, 1, 0b0001, (8-(shift))&7, Vn, Vd))
-#define VSHRQ_16(Vd, Vn, shift)             EMIT(SHR_vector(1, 1, 0b0010 | ((16-(shift))>>3)&1, (16-(shift))&7, Vn, Vd))
-#define VSHRQ_32(Vd, Vn, shift)             EMIT(SHR_vector(1, 1, 0b0100 | ((32-(shift))>>3)&3, (32-(shift))&7, Vn, Vd))
-#define VSHRQ_64(Vd, Vn, shift)             EMIT(SHR_vector(1, 1, 0b1000 | ((64-(shift))>>3)&7, (64-(shift))&7, Vn, Vd))
+#define VSHRQ_16(Vd, Vn, shift)             EMIT(SHR_vector(1, 1, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd))
+#define VSHRQ_32(Vd, Vn, shift)             EMIT(SHR_vector(1, 1, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd))
+#define VSHRQ_64(Vd, Vn, shift)             EMIT(SHR_vector(1, 1, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, Vn, Vd))
 #define VSHR_8(Vd, Vn, shift)               EMIT(SHR_vector(0, 1, 0b0001, (8-(shift))&7, Vn, Vd))
-#define VSHR_16(Vd, Vn, shift)              EMIT(SHR_vector(0, 1, 0b0010 | ((16-(shift))>>3)&1, (16-(shift))&7, Vn, Vd))
-#define VSHR_32(Vd, Vn, shift)              EMIT(SHR_vector(0, 1, 0b0100 | ((32-(shift))>>3)&3, (32-(shift))&7, Vn, Vd))
+#define VSHR_16(Vd, Vn, shift)              EMIT(SHR_vector(0, 1, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd))
+#define VSHR_32(Vd, Vn, shift)              EMIT(SHR_vector(0, 1, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd))
 #define VSSHRQ_8(Vd, Vn, shift)             EMIT(SHR_vector(1, 0, 0b0001, (8-(shift))&7, Vn, Vd))
-#define VSSHRQ_16(Vd, Vn, shift)            EMIT(SHR_vector(1, 0, 0b0010 | ((16-(shift))>>3)&1, (16-(shift))&7, Vn, Vd))
-#define VSSHRQ_32(Vd, Vn, shift)            EMIT(SHR_vector(1, 0, 0b0100 | ((32-(shift))>>3)&3, (32-(shift))&7, Vn, Vd))
-#define VSSHRQ_64(Vd, Vn, shift)            EMIT(SHR_vector(1, 0, 0b1000 | ((64-(shift))>>3)&7, (64-(shift))&7, Vn, Vd))
+#define VSSHRQ_16(Vd, Vn, shift)            EMIT(SHR_vector(1, 0, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd))
+#define VSSHRQ_32(Vd, Vn, shift)            EMIT(SHR_vector(1, 0, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd))
+#define VSSHRQ_64(Vd, Vn, shift)            EMIT(SHR_vector(1, 0, 0b1000 | (((64-(shift))>>3)&7), (64-(shift))&7, Vn, Vd))
 #define VSSHR_8(Vd, Vn, shift)              EMIT(SHR_vector(0, 0, 0b0001, (8-(shift))&7, Vn, Vd))
-#define VSSHR_16(Vd, Vn, shift)             EMIT(SHR_vector(0, 0, 0b0010 | ((16-(shift))>>3)&1, (16-(shift))&7, Vn, Vd))
-#define VSSHR_32(Vd, Vn, shift)             EMIT(SHR_vector(0, 0, 0b0100 | ((32-(shift))>>3)&3, (32-(shift))&7, Vn, Vd))
+#define VSSHR_16(Vd, Vn, shift)             EMIT(SHR_vector(0, 0, 0b0010 | (((16-(shift))>>3)&1), (16-(shift))&7, Vn, Vd))
+#define VSSHR_32(Vd, Vn, shift)             EMIT(SHR_vector(0, 0, 0b0100 | (((32-(shift))>>3)&3), (32-(shift))&7, Vn, Vd))
 
 #define EXT_vector(Q, Rm, imm4, Rn, Rd)     ((Q)<<30 | 0b101110<<24 | (Rm)<<16 | (imm4)<<11 | (Rn)<<5 | (Rd))
 #define VEXTQ_8(Rd, Rn, Rm, index)          EMIT(EXT_vector(1, Rm, index, Rn, Rd))
@@ -675,21 +675,21 @@
 #define SLI_vector(Q, immh, immb, Rn, Rd)   ((Q)<<30 | 1<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01010<<1 | 1<<10 | (Rn)<<5 | (Rd))
 #define VSLIQ_8(Vd, Vn, shift)              EMIT(VSLI_vector(1, 0b0001, (shift)&7, Vn, Vd))
 #define VSLIQ_16(Vd, Vn, shift)             EMIT(VSLI_vector(1, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd))
-#define VSLIQ_32(Vd, Vn, shift)             EMIT(VSLI_vector(1, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd))
-#define VSLIQ_64(Vd, Vn, shift)             EMIT(VSLI_vector(1, 0b1000 | ((shift)>>3)&7, (shift)&7, Vn, Vd))
+#define VSLIQ_32(Vd, Vn, shift)             EMIT(VSLI_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd))
+#define VSLIQ_64(Vd, Vn, shift)             EMIT(VSLI_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd))
 #define VSLI_8(Vd, Vn, shift)               EMIT(VSLI_vector(0, 0b0001, (shift)&7, Vn, Vd))
 #define VSLI_16(Vd, Vn, shift)              EMIT(VSLI_vector(0, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd))
-#define VSLI_32(Vd, Vn, shift)              EMIT(VSLI_vector(0, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd))
+#define VSLI_32(Vd, Vn, shift)              EMIT(VSLI_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd))
 
 // Shift Right and Insert (not touching higher part of dest)
 #define SRI_vector(Q, immh, immb, Rn, Rd)   ((Q)<<30 | 1<<29 | 0b011110<<23 | (immh)<<19 | (immb)<<16 | 0b01000<<1 | 1<<10 | (Rn)<<5 | (Rd))
 #define VSRIQ_8(Vd, Vn, shift)              EMIT(VSRI_vector(1, 0b0001, (shift)&7, Vn, Vd))
 #define VSRIQ_16(Vd, Vn, shift)             EMIT(VSRI_vector(1, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd))
-#define VSRIQ_32(Vd, Vn, shift)             EMIT(VSRI_vector(1, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd))
-#define VSRIQ_64(Vd, Vn, shift)             EMIT(VSRI_vector(1, 0b1000 | ((shift)>>3)&7, (shift)&7, Vn, Vd))
+#define VSRIQ_32(Vd, Vn, shift)             EMIT(VSRI_vector(1, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd))
+#define VSRIQ_64(Vd, Vn, shift)             EMIT(VSRI_vector(1, 0b1000 | (((shift)>>3)&7), (shift)&7, Vn, Vd))
 #define VSRI_8(Vd, Vn, shift)               EMIT(VSRI_vector(0, 0b0001, (shift)&7, Vn, Vd))
 #define VSRI_16(Vd, Vn, shift)              EMIT(VSRI_vector(0, 0b0010 | ((shift)>>3)&1, (shift)&7, Vn, Vd))
-#define VSRI_32(Vd, Vn, shift)              EMIT(VSRI_vector(0, 0b0100 | ((shift)>>3)&3, (shift)&7, Vn, Vd))
+#define VSRI_32(Vd, Vn, shift)              EMIT(VSRI_vector(0, 0b0100 | (((shift)>>3)&3), (shift)&7, Vn, Vd))
 
 // Integer MATH
 #define ADDSUB_vector(Q, U, size, Rm, Rn, Rd)   ((Q)<<30 | (U)<<29 | 0b01110<<24 | (size)<<22 | 1<<21 | (Rm)<<16 | 0b10000<<11 | 1<<10 | (Rn)<<5 | (Rd))
@@ -786,7 +786,7 @@
 #define FSUBD(Dd, Dn, Dm)           EMIT(FADDSUB_scalar(0b01, Dm, 1, Dn, Dd))
 
 // MUL
-#define FMUL_vector(Q, sz, Rm, Rn, Rd)  ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b011<<11 | 1<<10 | (Rn)<<5 | (Rd))
+#define FMUL_vector(Q, sz, Rm, Rn, Rd)  ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11011<<11 | 1<<10 | (Rn)<<5 | (Rd))
 #define VFMULS(Sd, Sn, Sm)          EMIT(FMUL_vector(0, 0, Sm, Sn, Sd))
 #define VFMULQS(Sd, Sn, Sm)         EMIT(FMUL_vector(1, 0, Sm, Sn, Sd))
 #define VFMULQD(Sd, Sn, Sm)         EMIT(FMUL_vector(1, 1, Sm, Sn, Sd))
diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c
index 8d9aedfe..ad36e3c0 100755
--- a/src/dynarec/arm64_printer.c
+++ b/src/dynarec/arm64_printer.c
@@ -91,9 +91,9 @@ int isMask(uint32_t opcode, const char* mask, arm64_print_t *a)
 int64_t signExtend(uint32_t val, int sz)

 {

     int64_t ret = val;

-    if((val>>(sz-1))&1 == 1)

-        val |= (0xffffffffffffffffll<<sz);

-    return val;

+    if((val>>(sz-1))&1)

+        ret |= (0xffffffffffffffffll<<sz);

+    return ret;

 }

 

 const char* arm64_print(uint32_t opcode, uintptr_t addr)

@@ -911,7 +911,7 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         char s = a.Q?'V':'D';

         char d = sf?'D':'S';

         int n = (a.Q && !sf)?4:2;

-        snprintf(buff, sizeof(buff), "VFMUL %c%d.%d%c, %c%d.%d%c, %c%d.%c%d", s, Rd, n, d, s, Rn, n, d, s, Rm, s, d);

+        snprintf(buff, sizeof(buff), "VFMUL %c%d.%d%c, %c%d.%d%c, %c%d.%d%c", s, Rd, n, d, s, Rn, n, d, s, Rm, n, d);

         return buff;

     }

     if(isMask(opcode, "00011110ff1mmmmm000010nnnnnddddd", &a)) {

diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c
index 79a07027..836d7eb0 100755
--- a/src/dynarec/dynablock.c
+++ b/src/dynarec/dynablock.c
@@ -323,7 +323,7 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t
     if(!created)
         return block;   // existing block...
 
-    #if 0
+    #if 1
     if(box64_dynarec_dump)
         pthread_mutex_lock(&my_context->mutex_dyndump);
     #endif
@@ -338,7 +338,7 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t
         free(block);
         block = NULL;
     }
-    #if 0
+    #if 1
     if(box64_dynarec_dump)
         pthread_mutex_unlock(&my_context->mutex_dyndump);
     #endif
diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c
index 2e44b5bf..38c6a447 100755
--- a/src/dynarec/dynarec_arm64_00.c
+++ b/src/dynarec/dynarec_arm64_00.c
@@ -43,6 +43,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     opcode = F8;
     MAYUSE(eb1);
     MAYUSE(eb2);
+    MAYUSE(wb2);
     MAYUSE(tmp);
     MAYUSE(j32);
 
diff --git a/src/dynarec/dynarec_arm64_0f.c b/src/dynarec/dynarec_arm64_0f.c
index 8905878f..7a9ca5b8 100755
--- a/src/dynarec/dynarec_arm64_0f.c
+++ b/src/dynarec/dynarec_arm64_0f.c
@@ -34,7 +34,7 @@
 

 #define GETEX(a, D)                                             \

     if(MODREG) {                                                \

-        a = sse_get_reg(dyn, ninst, x1, nextop&7+(rex.b<<3));   \

+        a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));   \

     } else {                                                    \

         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, 0, 0); \

         a = fpu_get_scratch(dyn);                               \

@@ -42,11 +42,11 @@
     }

 

 #define GETGM(a)            \

-    gd = (nextop&0x38)>>3;  \

+    gd = ((nextop&0x38)>>3);  \

     a = mmx_get_reg(dyn, ninst, x1, gd)

 #define GETEM(a, D)    \

     if(MODREG) {        \

-        a = mmx_get_reg(dyn, ninst, x1, nextop&7); \

+        a = mmx_get_reg(dyn, ninst, x1, (nextop&7)); \

     } else {                                        \

         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0); \

         a = fpu_get_scratch_double(dyn); \

@@ -63,24 +63,25 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     uint8_t nextop, u8;

     int32_t i32, i32_, j32;

     uint8_t gd, ed;

-    uint8_t wback, wb1, wb2;

+    uint8_t wback, wb2;

     uint8_t eb1, eb2;

-    uint8_t gb1, gb2;

-    int v0, v1, v2;

+    int v0, v1;

     int q0, q1;

-    int d0, d1;

+    int d0;

     int s0;

     int fixedaddress;

-    int parity;

     MAYUSE(s0);

+    MAYUSE(q0);

     MAYUSE(q1);

-    MAYUSE(v2);

-    MAYUSE(gb2);

-    MAYUSE(gb1);

+    MAYUSE(v0);

+    MAYUSE(v1);

+    MAYUSE(d0);

     MAYUSE(eb2);

     MAYUSE(eb1);

     MAYUSE(wb2);

     MAYUSE(j32);

+    MAYUSE(i32);

+    MAYUSE(u8);

     #if STEP == 3

     //static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 };

     #endif

@@ -135,7 +136,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             break;

         case 0x16:

             nextop = F8;

-            if((nextop&0xC0)==0xC0) {

+            if(MODREG) {

                 INST_NAME("MOVLHPS Gx,Ex");

                 GETGX(v0);

                 v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));

@@ -260,7 +261,20 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             GETGX(v0);

             VEORQ(v0, v0, q0);

             break;

-

+        case 0x58:

+            INST_NAME("ADDPS Gx, Ex");

+            nextop = F8;

+            GETEX(q0, 0);

+            GETGX(v0);

+            VFADDQS(v0, v0, q0);

+            break;

+        case 0x59:

+            INST_NAME("MULPS Gx, Ex");

+            nextop = F8;

+            GETEX(q0, 0);

+            GETGX(v0);

+            VFMULQS(v0, v0, q0);

+            break;

         case 0x5A:

             INST_NAME("CVTPS2PD Gx, Ex");

             nextop = F8;

diff --git a/src/dynarec/dynarec_arm64_64.c b/src/dynarec/dynarec_arm64_64.c
index 8854ca3f..d86be338 100644
--- a/src/dynarec/dynarec_arm64_64.c
+++ b/src/dynarec/dynarec_arm64_64.c
@@ -26,16 +26,11 @@
 uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
 {
     uint8_t opcode = F8;
-    uint8_t nextop, u8;
-    uint32_t u32;
-    int32_t i32, j32;
-    int16_t i16;
-    uint16_t u16;
+    uint8_t nextop;
+    int32_t j32;
     uint8_t gd, ed;
-    uint8_t wback, wb1, wb2, gb1, gb2;
+    uint8_t wback;
     int fixedaddress;
-    MAYUSE(u16);
-    MAYUSE(u8);
     MAYUSE(j32);
 
     while((opcode==0xF2) || (opcode==0xF3)) {
diff --git a/src/dynarec/dynarec_arm64_66.c b/src/dynarec/dynarec_arm64_66.c
index 5637b3f8..91f8d618 100755
--- a/src/dynarec/dynarec_arm64_66.c
+++ b/src/dynarec/dynarec_arm64_66.c
@@ -27,7 +27,6 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
 {

     uint8_t opcode = F8;

     uint8_t nextop, u8;

-    uint32_t u32;

     int32_t i32, j32;

     int16_t i16;

     uint16_t u16;

diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c
index 2221e2db..3429049c 100755
--- a/src/dynarec/dynarec_arm64_660f.c
+++ b/src/dynarec/dynarec_arm64_660f.c
@@ -53,16 +53,15 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
     int v0, v1;

     int q0, q1;

     int d0;

-    int s0;

     int fixedaddress;

-    int parity;

 

     MAYUSE(d0);

+    MAYUSE(q0);

     MAYUSE(q1);

     MAYUSE(eb1);

     MAYUSE(eb2);

     MAYUSE(j32);

-    #if STEP == 3

+    #if 0//STEP == 3

     static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 };

     #endif

 

@@ -74,13 +73,12 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETGX(v0);

             if(MODREG) {

-                v1 = sse_get_reg(dyn, ninst, x1, nextop&7);

+                v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));

+                VMOVeD(v0, 1, v1, 0);

             } else {

-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0);

-                v1 = fpu_get_scratch(dyn);

-                VLDR64_U12(v1, ed, fixedaddress);

+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0);

+                VLD1_64(v0, 1, ed);

             }

-            VMOVeD(v0, 1, v1, 0);

             break;

         case 0x15:

             INST_NAME("UNPCKHPD Gx, Ex");

@@ -88,13 +86,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETGX(v0);

             VMOVeD(v0, 0, v0, 1);

             if(MODREG) {

-                v1 = sse_get_reg(dyn, ninst, x1, nextop&7);

+                v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3));

+                VMOVeD(v0, 1, v1, 1);

             } else {

-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0);

+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, 0, 0);

                 v1 = fpu_get_scratch(dyn);

-                VLDR64_U12(v1, ed, fixedaddress);

+                ADDx_U12(ed, ed, 8);

+                VLD1_64(v0, 1, ed);

             }

-            VMOVeD(v0, 1, v1, 1);

             break;

 

         case 0x1F:

@@ -106,7 +105,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x28:

             INST_NAME("MOVAPD Gx,Ex");

             nextop = F8;

-            gd = ((nextop&0x38)>>3) + (rex.r<<3);

+            GETG;

             if(MODREG) {

                 ed = (nextop&7)+(rex.b<<3);

                 v1 = sse_get_reg(dyn, ninst, x1, ed);

@@ -121,7 +120,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x29:

             INST_NAME("MOVAPD Ex,Gx");

             nextop = F8;

-            gd = ((nextop&0x38)>>3) + (rex.r<<3);

+            GETG;

             v0 = sse_get_reg(dyn, ninst, x1, gd);

             if(MODREG) {

                 ed = (nextop&7)+(rex.b<<3);

@@ -139,8 +138,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             if(opcode==0x2F) {INST_NAME("COMISD Gx, Ex");} else {INST_NAME("UCOMISD Gx, Ex");}

             SETFLAGS(X_ALL, SF_SET);

             nextop = F8;

-            gd = ((nextop&0x38)>>3) + (rex.r<<3);

-            v0 = sse_get_reg(dyn, ninst, x1, gd);

+            GETGX(v0);

             GETEX(q0, 0);

             FCMPD(v0, q0);

             FCOMI(x1, x2);

@@ -416,15 +414,14 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x7E:

             INST_NAME("MOVD Ed,Gx");

             nextop = F8;

-            gd = ((nextop&0x38)>>3)+(rex.r<<3);

-            v0 = sse_get_reg(dyn, ninst, x1, gd);

+            GETGX(v0);

             if(rex.w) {

                 if(MODREG) {

                     ed = xRAX + (nextop&7) + (rex.b<<3);

                     VMOVQDto(ed, v0, 0);

                 } else {

                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, 0, 0);

-                    VSTR64_U12(x2, ed, fixedaddress);

+                    VSTR64_U12(v0, ed, fixedaddress);

                 }

             } else {

                 if(MODREG) {

@@ -432,7 +429,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     VMOVSto(ed, v0, 0);

                 } else {

                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, 0, 0);

-                    VSTR32_U12(x2, ed, fixedaddress);

+                    VSTR32_U12(v0, ed, fixedaddress);

                 }

             }

             break;

@@ -569,7 +566,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETGD;

             GETEW(x1, 0);  // Get EW

-            TSTw_REG(x1, x1);

+            TSTw_REG(x1, x1);   // Don't use CBZ here, as the flag is reused later

             B_MARK(cEQ);

             LSLw(x1, x1, 16);   // put bits on top

             CLZw(x2, x1);       // x2 gets leading 0

diff --git a/src/dynarec/dynarec_arm64_67.c b/src/dynarec/dynarec_arm64_67.c
index 1b691710..f2c7ebe3 100755
--- a/src/dynarec/dynarec_arm64_67.c
+++ b/src/dynarec/dynarec_arm64_67.c
@@ -26,18 +26,10 @@
 uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)

 {

     uint8_t opcode = F8;

-    uint8_t nextop, u8;

     int8_t  i8;

-    uint32_t u32;

     int32_t i32, j32;

-    int16_t i16;

-    uint16_t u16;

-    uint8_t gd, ed;

-    uint8_t wback, wb1;

-    int fixedaddress;

-    MAYUSE(u16);

-    MAYUSE(u8);

     MAYUSE(j32);

+    MAYUSE(i32);

 

     // REX prefix before the 67 are ignored

     rex.rex = 0;

diff --git a/src/dynarec/dynarec_arm64_f0.c b/src/dynarec/dynarec_arm64_f0.c
index a36f2589..7b6991f2 100644
--- a/src/dynarec/dynarec_arm64_f0.c
+++ b/src/dynarec/dynarec_arm64_f0.c
@@ -26,17 +26,15 @@
 uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
 {
     uint8_t opcode = F8;
-    uint8_t nextop, u8;
-    uint32_t u32;
-    int32_t i32, j32;
-    int16_t i16;
-    uint16_t u16;
+    uint8_t nextop;
+    int32_t j32;
     uint8_t gd, ed;
-    uint8_t wback, wb1, wb2, gb1, gb2;
+    uint8_t wback, wb2, gb1, gb2;
     int64_t i64;
     int fixedaddress;
-    MAYUSE(u16);
-    MAYUSE(u8);
+    MAYUSE(gb1);
+    MAYUSE(gb2);
+    MAYUSE(wb2);
     MAYUSE(j32);
 
     while((opcode==0xF2) || (opcode==0xF3)) {
@@ -67,7 +65,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 }
                 UBFXw(x1, wback, wb2*8, 8);   
                 emit_add8(dyn, ninst, x1, x2, x4, x3);
-                BFIx(wback, ed, wb2*8, 8);
+                BFIx(wback, x1, wb2*8, 8);
             } else {                   
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, 0, 0);
                 MARKLOCK;
@@ -363,7 +361,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     GETED(0);
                     // No need to LOCK, this is readonly
                     if(opcode==0x81) i64 = F32S; else i64 = F8S;
-                    if(i32) {
+                    if(i64) {
                         MOV64xw(x5, i64);
                         emit_cmp32(dyn, ninst, rex, ed, x5, x3, x4, x6);
                     } else {
diff --git a/src/dynarec/dynarec_arm64_f20f.c b/src/dynarec/dynarec_arm64_f20f.c
index 193f1169..195a3e59 100755
--- a/src/dynarec/dynarec_arm64_f20f.c
+++ b/src/dynarec/dynarec_arm64_f20f.c
@@ -40,23 +40,18 @@
 uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog)

 {

     uint8_t opcode = F8;

-    uint8_t nextop, u8;

-    int32_t i32, j32;

+    uint8_t nextop;

     uint8_t gd, ed;

-    uint8_t wback, wb1;

-    uint8_t eb1, eb2;

-    int v0, v1;

-    int q0, q1;

+    uint8_t wback;

+    int v0;

+    int q0;

     int d0, d1;

-    int s0;

     int fixedaddress;

-    int parity;

 

     MAYUSE(d0);

-    MAYUSE(q1);

-    MAYUSE(eb1);

-    MAYUSE(eb2);

-    MAYUSE(j32);

+    MAYUSE(d1);

+    MAYUSE(q0);

+    MAYUSE(v0);

 

     switch(opcode) {

 

diff --git a/src/dynarec/dynarec_arm64_f30f.c b/src/dynarec/dynarec_arm64_f30f.c
index c7bd738a..574764b8 100755
--- a/src/dynarec/dynarec_arm64_f30f.c
+++ b/src/dynarec/dynarec_arm64_f30f.c
@@ -41,22 +41,18 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
 {

     uint8_t opcode = F8;

     uint8_t nextop, u8;

-    int32_t i32, j32;

     uint8_t gd, ed;

-    uint8_t wback, wb1;

-    uint8_t eb1, eb2;

+    uint8_t wback;

     int v0, v1;

-    int q0, q1;

+    int q0;

     int d0, d1;

-    int s0;

     int fixedaddress;

-    int parity;

 

     MAYUSE(d0);

-    MAYUSE(q1);

-    MAYUSE(eb1);

-    MAYUSE(eb2);

-    MAYUSE(j32);

+    MAYUSE(d1);

+    MAYUSE(q0);

+    MAYUSE(v0);

+    MAYUSE(v1);

 

     switch(opcode) {

 

diff --git a/src/dynarec/dynarec_arm64_functions.c b/src/dynarec/dynarec_arm64_functions.c
index d2ba4f6f..da6175af 100755
--- a/src/dynarec/dynarec_arm64_functions.c
+++ b/src/dynarec/dynarec_arm64_functions.c
@@ -268,7 +268,8 @@ int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, i
                 return ((sib&0x7)==4 && (sib>>6)>=parity)?1:0;
             }
         } else if((nextop&7)==5) {
-            uint64_t tmp = F32S64+addr+delta;
+            uint64_t tmp = F32S64;
+            tmp+=addr+delta;
             return (tmp&tested)?0:1;
         } else {
             return 0;
diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c
index 6be6efc4..f22cac82 100755
--- a/src/dynarec/dynarec_arm64_helper.c
+++ b/src/dynarec/dynarec_arm64_helper.c
@@ -240,6 +240,7 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
             MOVx_REG(xRIP, reg);
         }
         uintptr_t tbl = getJumpTable64();
+        MAYUSE(tbl);
         TABLE64(x2, tbl);
         UBFXx(x3, xRIP, 48, JMPTABL_SHIFT);
         LDRx_REG_LSL3(x2, x2, x3);
@@ -250,7 +251,8 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
         UBFXx(x3, xRIP, 0, JMPTABL_SHIFT);
         LDRx_REG_LSL3(x3, x2, x3);
     } else {
-        uintptr_t p = getJumpTableAddress64(ip); 
+        uintptr_t p = getJumpTableAddress64(ip);
+        MAYUSE(p);
         TABLE64(x2, p);
         GETIP_(ip);
         LDRx_U12(x3, x2, 0);
diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h
index ff397379..ed940272 100755
--- a/src/dynarec/dynarec_arm64_helper.h
+++ b/src/dynarec/dynarec_arm64_helper.h
@@ -265,6 +265,10 @@
 #define CBNZx_MARK(reg)             \
     j32 = GETMARK-(dyn->arm_size);  \
     CBNZx(reg, j32)
+// Branch to MARK if reg is not 0 (use j32)
+#define CBNZw_MARK(reg)             \
+    j32 = GETMARK-(dyn->arm_size);  \
+    CBNZw(reg, j32)
 // Branch to MARK2 if cond (use j32)
 #define B_MARK2(cond)               \
     j32 = GETMARK2-(dyn->arm_size); \
@@ -440,7 +444,7 @@
     if(A==xRIP) dyn->last_ip = 0
 
 #define SET_DFNONE(S)    if(!dyn->dfnone) {MOVZw(S, d_none); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=1;}
-#define SET_DF(S, N)     if(N) {MOVZw(S, N); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=0;} else SET_DFNONE(S)
+#define SET_DF(S, N)     if((N)!=d_none) {MOVZw(S, (N)); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=0;} else SET_DFNONE(S)
 #define SET_NODF()          dyn->dfnone = 0
 #define SET_DFOK()          dyn->dfnone = 1
 
diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c
index b09fd3aa..48eee438 100644
--- a/src/emu/x64runf0.c
+++ b/src/emu/x64runf0.c
@@ -301,7 +301,7 @@ int RunF0(x64emu_t *emu, rex_t rex)
                         case 4: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = and32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break;

                         case 5: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = sub32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break;

                         case 6: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = xor32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break;

-                        case 7:                cmp32(emu, ED->dword[0], tmp32u); break;

+                        case 7:                                                 cmp32(emu, ED->dword[0], tmp64u); break;

                     }

             }

 #else