about summary refs log tree commit diff stats
path: root/src/dynarec/arm64/dynarec_arm64_00.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynarec/arm64/dynarec_arm64_00.c')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c492
1 files changed, 358 insertions, 134 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 7696a6ec..0e4dee33 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -1,7 +1,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>
-#include <pthread.h>
 #include <errno.h>
 #include <signal.h>
 
@@ -26,6 +25,7 @@
 #include "dynarec_arm64_helper.h"
 
 int isSimpleWrapper(wrapper_t fun);
+int isRetX87Wrapper(wrapper_t fun);
 
 uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
 {
@@ -102,7 +102,25 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             i64 = F32S;
             emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5);
             break;
-
+        case 0x06:
+            if(rex.is32bits) {
+                INST_NAME("PUSH ES");
+                LDRH_U12(x1, xEmu, offsetof(x64emu_t, segs[_ES]));
+                PUSH1_32(x1);
+            } else {
+                DEFAULT;
+            }
+            break;
+        case 0x07:
+            if(rex.is32bits) {
+                INST_NAME("POP ES");
+                POP1_32(x1);
+                STRH_U12(x1, xEmu, offsetof(x64emu_t, segs[_ES]));
+                STRw_U12(xZR, xEmu, offsetof(x64emu_t, segs_serial[_ES]));
+            } else {
+                DEFAULT;
+            }
+            break;
         case 0x08:
             INST_NAME("OR Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -278,7 +296,25 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             MOV64xw(x2, i64);
             emit_sbb32(dyn, ninst, rex, xRAX, x2, x3, x4);
             break;
-
+        case 0x1E:
+            if(rex.is32bits) {
+                INST_NAME("PUSH DS");
+                LDRH_U12(x1, xEmu, offsetof(x64emu_t, segs[_DS]));
+                PUSH1_32(x1);
+            } else {
+                DEFAULT;
+            }
+            break;
+        case 0x1F:
+            if(rex.is32bits) {
+                INST_NAME("POP DS");
+                POP1_32(x1);
+                STRH_U12(x1, xEmu, offsetof(x64emu_t, segs[_DS]));
+                STRw_U12(xZR, xEmu, offsetof(x64emu_t, segs_serial[_DS]));
+            } else {
+                DEFAULT;
+            }
+            break;
         case 0x20:
             INST_NAME("AND Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -490,6 +526,32 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 emit_cmp32_0(dyn, ninst, rex, xRAX, x3, x4);
             break;
 
+        case 0x40:
+        case 0x41:
+        case 0x42:
+        case 0x43:
+        case 0x44:
+        case 0x45:
+        case 0x46:
+        case 0x47:
+            INST_NAME("INC Reg (32bits)");
+            SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
+            gd = xRAX + (opcode&7);
+            emit_inc32(dyn, ninst, rex, gd, x1, x2);
+            break;
+        case 0x48:
+        case 0x49:
+        case 0x4A:
+        case 0x4B:
+        case 0x4C:
+        case 0x4D:
+        case 0x4E:
+        case 0x4F:
+            INST_NAME("DEC Reg (32bits)");
+            SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
+            gd = xRAX + (opcode&7);
+            emit_dec32(dyn, ninst, rex, gd, x1, x2);
+            break;
         case 0x50:
         case 0x51:
         case 0x52:
@@ -504,31 +566,33 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 dyn->doublepush = 0;
             } else {
                 gd = xRAX+(opcode&0x07)+(rex.b<<3);
-                if(gd==xRSP) {
-                    MOVx_REG(x1, gd);
-                    gd = x1;
-                }
-                u32 = 0;
-                i32 = 0;
-                do {
-                    rex.rex = u32;
-                    u32 = PK(i32);
-                    i32++;
-                } while(u32>=0x40 && u32<=0x4f);
-                if(!box64_dynarec_test && u32>=0x50 && u32<=0x57 && (dyn->size>(ninst+1) && dyn->insts[ninst+1].pred_sz==1)) {
-                    // double push!
+                u32 = PK(0);
+                i32 = 1;
+                rex.rex = 0;
+                if(!rex.is32bits)
+                    while(u32>=0x40 && u32<=0x4f) {
+                        rex.rex = u32;
+                        u32 = PK(i32);
+                        i32++;
+                    }
+                if(!box64_dynarec_test && u32>=0x50 && u32<=0x57 && (dyn->size>(ninst+1) && dyn->insts[ninst+1].pred_sz==1) && gd != xRSP) {
                     u32= xRAX+(u32&0x07)+(rex.b<<3);
-                    MESSAGE(LOG_DUMP, "DOUBLE PUSH\n");
                     if(u32==xRSP) {
-                        MOVx_REG(x1, u32);
-                        u32 = x1;
+                        PUSH1z(gd);
+                    } else {
+                        // double push!
+                        MESSAGE(LOG_DUMP, "DOUBLE PUSH\n");
+                        PUSH2z(gd, u32);
+                        dyn->doublepush = 1;
                     }
-                    PUSH2(gd, u32);
-                    dyn->doublepush = 1;
-                    SKIPTEST(x1);  // disable test for this OP
                 } else {
-                    PUSH1(gd);
-                }   
+                    if (gd == xRSP) {
+                        MOVz_REG(x1, xRSP);
+                        PUSH1z(x1);
+                    } else {
+                        PUSH1z(gd);
+                    }
+                }
             }
             break;
         case 0x58:
@@ -545,58 +609,88 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 dyn->doublepop = 0;
             } else {
                 gd = xRAX+(opcode&0x07)+(rex.b<<3);
-                u32 = 0;
-                i32 = 0;
-                do {
-                    rex.rex = u32;
-                    u32 = PK(i32);
-                    i32++;
-                } while(u32>=0x40 && u32<=0x4f);
+                u32 = PK(0);
+                i32 = 1;
+                rex.rex = 0;
+                if(!rex.is32bits)
+                    while(u32>=0x40 && u32<=0x4f) {
+                        rex.rex = u32;
+                        u32 = PK(i32);
+                        i32++;
+                    }
                 if(!box64_dynarec_test && (gd!=xRSP) && u32>=0x58 && u32<=0x5f && (dyn->size>(ninst+1) && dyn->insts[ninst+1].pred_sz==1)) {
                     // double pop!
                     u32= xRAX+(u32&0x07)+(rex.b<<3);
                     MESSAGE(LOG_DUMP, "DOUBLE POP\n");
                     if(gd==u32) {
-                        ADDx_U12(xRSP, xRSP, 0x8);
-                        POP1(gd);
+                        ADDz_U12(xRSP, xRSP, rex.is32bits?0x4:0x8);
+                        POP1z(gd);
                     } else {
-                        POP2(gd, (u32==xRSP)?x1:u32);
+                        POP2z(gd, (u32==xRSP)?x1:u32);
                         if(u32==xRSP) {
-                            MOVx_REG(u32, x1);
+                            MOVz_REG(u32, x1);
                         }
                     }
                     dyn->doublepop = 1;
                     SKIPTEST(x1);  // disable test for this OP
                 } else {
                     if(gd == xRSP) {
-                        POP1(x1);
-                        MOVx_REG(gd, x1);
+                        POP1z(x1);
+                        MOVz_REG(gd, x1);
                     } else {
-                        POP1(gd);
+                        POP1z(gd);
                     }
                 }
             }
             break;
+        case 0x60:
+            if(rex.is32bits) {
+                INST_NAME("PUSHAD");
+                MOVw_REG(x1, xRSP);
+                PUSH2_32(xRAX, xRCX);
+                PUSH2_32(xRDX, xRBX);
+                PUSH2_32(x1, xRBP);
+                PUSH2_32(xRSI, xRDI);
+            } else {
+                DEFAULT;
+            }
+            break;
+        case 0x61:
+            if(rex.is32bits) {
+                INST_NAME("POPAD");
+                POP2_32(xRDI, xRSI);
+                POP2_32(xRBP, x1);
+                POP2_32(xRBX, xRDX);
+                POP2_32(xRCX, xRAX);
+            } else {
+                DEFAULT;
+            }
+            break;
 
         case 0x63:
-            INST_NAME("MOVSXD Gd, Ed");
-            nextop = F8;
-            GETGD;
-            if(rex.w) {
-                if(MODREG) {   // reg <= reg
-                    SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3));
-                } else {                    // mem <= reg
-                    SMREAD();
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    LDSW(gd, ed, fixedaddress);
-                }
+            if(rex.is32bits) {
+                // ARPL here
+                DEFAULT;                
             } else {
-                if(MODREG) {   // reg <= reg
-                    MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3));
-                } else {                    // mem <= reg
-                    SMREAD();
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    LDW(gd, ed, fixedaddress);
+                INST_NAME("MOVSXD Gd, Ed");
+                nextop = F8;
+                GETGD;
+                if(rex.w) {
+                    if(MODREG) {   // reg <= reg
+                        SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3));
+                    } else {                    // mem <= reg
+                        SMREAD();
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                        LDSW(gd, ed, fixedaddress);
+                    }
+                } else {
+                    if(MODREG) {   // reg <= reg
+                        MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3));
+                    } else {                    // mem <= reg
+                        SMREAD();
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                        LDW(gd, ed, fixedaddress);
+                    }
                 }
             }
             break;
@@ -619,10 +713,10 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 MESSAGE(LOG_DUMP, "PUSH then RET, using indirect\n");
                 TABLE64(x3, addr-4);
                 LDRSW_U12(x1, x3, 0);
-                PUSH1(x1);
+                PUSH1z(x1);
             } else {
-                MOV64x(x3, i64);
-                PUSH1(x3);
+                MOV64z(x3, i64);
+                PUSH1z(x3);
             }
             break;
         case 0x69:
@@ -661,8 +755,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0x6A:
             INST_NAME("PUSH Ib");
             i64 = F8S;
-            MOV64x(x3, i64);
-            PUSH1(x3);
+            MOV64z(x3, i64);
+            PUSH1z(x3);
             break;
         case 0x6B:
             INST_NAME("IMUL Gd, Ed, Ib");
@@ -698,6 +792,18 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             }
             break;
 
+        case 0x6D:
+            INST_NAME("INSD");
+            SETFLAGS(X_ALL, SF_SET);    // Hack to set flags in "don't care" state
+            GETIP(ip);
+            STORE_XEMU_CALL(xRIP);
+            CALL(native_priv, -1);
+            LOAD_XEMU_CALL(xRIP);
+            jump_to_epilog(dyn, 0, xRIP, ninst);
+            *need_epilog = 0;
+            *ok = 0;
+            break;
+
         #define GO(GETFLAGS, NO, YES, F)                                \
             READFLAGS(F);                                               \
             i8 = F8S;                                                   \
@@ -727,7 +833,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         GOCOND(0x70, "J", "ib");
 
         #undef GO
-        
+
+        case 0x82:
+            if(!rex.is32bits) {
+                DEFAULT;
+                return ip;
+            }
+            // fallthru
         case 0x80:
             nextop = F8;
             switch((nextop>>3)&7) {
@@ -1053,12 +1165,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0x8C:
             INST_NAME("MOV Ed, Seg");
             nextop=F8;
+            u8 = (nextop&0x38)>>3;
             if((nextop&0xC0)==0xC0) {   // reg <= seg
-                LDRH_U12(xRAX+(nextop&7)+(rex.b<<3), xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3]));
+                LDRw_U12(xRAX+(nextop&7)+(rex.b<<3), xEmu, offsetof(x64emu_t, segs[u8]));
             } else {                    // mem <= seg
-                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                LDRH_U12(x3, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3]));
-                STH(x3, ed, fixedaddress);
+                LDRw_U12(x3, xEmu, offsetof(x64emu_t, segs[u8]));
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                STH(x3, wback, fixedaddress);
                 SMWRITE2();
             }
             break;
@@ -1073,7 +1186,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(gd!=ed) {    // it's sometimes used as a 3 bytes NOP
                     MOVxw_REG(gd, ed);
                 }
-                else if(!rex.w) {
+                else if(!rex.w && !rex.is32bits) {
                     MOVw_REG(gd, gd);   //truncate the higher 32bits as asked
                 }
             }
@@ -1081,32 +1194,33 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0x8E:
             INST_NAME("MOV Seg,Ew");
             nextop = F8;
+            u8 = (nextop&0x38)>>3;
             if((nextop&0xC0)==0xC0) {
                 ed = xRAX+(nextop&7)+(rex.b<<3);
             } else {
                 SMREAD();
-                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 1, rex, NULL, 0, 0);
-                LDH(x1, ed, fixedaddress);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                LDH(x1, wback, fixedaddress);
                 ed = x1;
             }
-            STRw_U12(ed, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3]));
-            STRw_U12(wZR, xEmu, offsetof(x64emu_t, segs_serial[(nextop&0x38)>>3]));
+            STRw_U12(ed, xEmu, offsetof(x64emu_t, segs[u8]));
+            STRw_U12(wZR, xEmu, offsetof(x64emu_t, segs_serial[u8]));
             break;
         case 0x8F:
             INST_NAME("POP Ed");
             nextop = F8;
             if(MODREG) {
-                POP1(xRAX+(nextop&7)+(rex.b<<3));
+                POP1z(xRAX+(nextop&7)+(rex.b<<3));
             } else {
-                POP1(x2); // so this can handle POP [ESP] and maybe some variant too
+                POP1z(x2); // so this can handle POP [ESP] and maybe some variant too
                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
                 if(ed==xRSP) {
-                    STx(x2, ed, fixedaddress);
+                    STz(x2, ed, fixedaddress);
                 } else {
                     // complicated to just allow a segfault that can be recovered correctly
-                    SUBx_U12(xRSP, xRSP, 8);
-                    STx(x2, ed, fixedaddress);
-                    ADDx_U12(xRSP, xRSP, 8);
+                    SUBz_U12(xRSP, xRSP, rex.is32bits?4:8);
+                    STz(x2, ed, fixedaddress);
+                    ADDz_U12(xRSP, xRSP, rex.is32bits?4:8);
                 }
             }
             break;
@@ -1148,27 +1262,33 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0x9C:
             INST_NAME("PUSHF");
             READFLAGS(X_ALL);
-            
-            PUSH1(xFlags);
+            PUSH1z(xFlags);
             break;
         case 0x9D:
             INST_NAME("POPF");
             SETFLAGS(X_ALL, SF_SET);
-            POP1(xFlags);
+            POP1z(xFlags);
             MOV32w(x1, 0x3F7FD7);
             ANDw_REG(xFlags, xFlags, x1);
             ORRw_mask(xFlags, xFlags, 0b011111, 0);   //mask=0x00000002
             SET_DFNONE(x1);
+            if(box64_wine) {    // should this be done all the time?
+                TBZ_NEXT(xFlags, F_TF);
+                MOV64x(x1, addr);
+                STORE_XEMU_CALL(x1);
+                CALL(native_singlestep, -1);
+                BFCw(xFlags, F_TF, 1);
+            }
             break;
         case 0x9E:
             INST_NAME("SAHF");
             SETFLAGS(X_CF|X_PF|X_AF|X_ZF|X_SF, SF_SUBSET);
             MOV32w(x2, 0b11010101);
             BICw_REG(xFlags, xFlags, x2);
-            UBFXx(x1, xRAX, 8, 8);
+            UBFXw(x1, xRAX, 8, 8);
             ANDw_REG(x1, x1, x2);
             ORRw_REG(xFlags, xFlags, x1);
-            SET_DFNONE(x1);	
+            SET_DFNONE(x1);
             break;
         case 0x9F:
             INST_NAME("LAHF");
@@ -1177,28 +1297,40 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             break;
         case 0xA0:
             INST_NAME("MOV AL,Ob");
-            u64 = F64;
-            MOV64x(x1, u64);
+            if(rex.is32bits)
+                u64 = F32;
+            else
+                u64 = F64;
+            MOV64z(x1, u64);
             LDRB_U12(x2, x1, 0);
             BFIx(xRAX, x2, 0, 8);
             break;
         case 0xA1:
             INST_NAME("MOV EAX,Od");
-            u64 = F64;
-            MOV64x(x1, u64);
+            if(rex.is32bits)
+                u64 = F32;
+            else
+                u64 = F64;
+            MOV64z(x1, u64);
             LDRxw_U12(xRAX, x1, 0);
             break;
         case 0xA2:
             INST_NAME("MOV Ob,AL");
-            u64 = F64;
-            MOV64x(x1, u64);
+            if(rex.is32bits)
+                u64 = F32;
+            else
+                u64 = F64;
+            MOV64z(x1, u64);
             STRB_U12(xRAX, x1, 0);
             SMWRITE();
             break;
         case 0xA3:
             INST_NAME("MOV Od,EAX");
-            u64 = F64;
-            MOV64x(x1, u64);
+            if(rex.is32bits)
+                u64 = F32;
+            else
+                u64 = F64;
+            MOV64z(x1, u64);
             STRxw_U12(xRAX, x1, 0);
             SMWRITE();
             break;
@@ -1449,7 +1581,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 break;
             }
             break;
-        
+
 
         case 0xB0:
         case 0xB1:
@@ -1679,7 +1811,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             }
             BARRIER(BARRIER_FLOAT);
             i32 = F16;
-            retn_to_epilog(dyn, ninst, i32);
+            retn_to_epilog(dyn, ninst, rex, i32);
             *need_epilog = 0;
             *ok = 0;
             break;
@@ -1690,7 +1822,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 READFLAGS(X_PEND);  // so instead, force the deferred flags, so it's not too slow, and flags are not lost
             }
             BARRIER(BARRIER_FLOAT);
-            ret_to_epilog(dyn, ninst);
+            ret_to_epilog(dyn, ninst, rex);
             *need_epilog = 0;
             *ok = 0;
             break;
@@ -1706,7 +1838,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     eb2 = (ed&4)>>2;    // L or H
                 } else {
                     eb1 = xRAX+(nextop&7)+(rex.b<<3);
-                    eb2 = 0;            
+                    eb2 = 0;
                 }
                 MOV32w(x3, u8);
                 BFIx(eb1, x3, eb2*8, 8);
@@ -1716,7 +1848,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(u8) {
                     MOV32w(x3, u8);
                     ed = x3;
-                } else 
+                } else
                     ed = xZR;
                 STB(ed, wback, fixedaddress);
                 SMWRITELOCK(lock);
@@ -1744,8 +1876,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
 
         case 0xC9:
             INST_NAME("LEAVE");
-            MOVx_REG(xRSP, xRBP);
-            POP1(xRBP);
+            MOVz_REG(xRSP, xRBP);
+            POP1z(xRBP);
             break;
 
         case 0xCC:
@@ -1770,6 +1902,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     x87_forget(dyn, ninst, x3, x4, 0);
                     sse_purge07cache(dyn, ninst, x3);
                     tmp = isSimpleWrapper(*(wrapper_t*)(addr));
+                    if(isRetX87Wrapper(*(wrapper_t*)(addr)))
+                        // return value will be on the stack, so the stack depth needs to be updated
+                        x87_purgecache(dyn, ninst, 0, x3, x1, x4);
                     if((box64_log<2 && !cycle_log) && tmp) {
                         //GETIP(ip+3+8+8); // read the 0xCC
                         call_n(dyn, ninst, *(void**)(addr+8), tmp);
@@ -1808,6 +1943,48 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 #endif
             }
             break;
+        case 0xCD:
+            u8 = F8;
+            if(box64_wine && u8==0x2D) {
+                INST_NAME("INT 2D");
+                // lets do nothing
+                MESSAGE(LOG_INFO, "INT 2D Windows anti-debug hack\n");
+            } else if (u8==0x80) {
+                INST_NAME("32bits SYSCALL");
+                NOTEST(x1);
+                SMEND();
+                GETIP(addr);
+                STORE_XEMU_CALL(xRIP);
+                CALL_S(x86Syscall, -1);
+                LOAD_XEMU_CALL(xRIP);
+                TABLE64(x3, addr); // expected return address
+                CMPSx_REG(xRIP, x3);
+                B_MARK(cNE);
+                LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit));
+                CBZw_NEXT(w1);
+                MARK;
+                LOAD_XEMU_REM();
+                jump_to_epilog(dyn, 0, xRIP, ninst);
+            } else if(box64_wine && u8==0x29) {
+                INST_NAME("INT 0x29");
+                // __fastfail ignored!
+                MOV32w(x1, 1);
+                STRw_U12(x1, xEmu, offsetof(x64emu_t, quit));
+                jump_to_epilog(dyn, 0, xRIP, ninst);
+                *need_epilog = 0;
+                *ok = 0;
+            } else {
+                INST_NAME("INT n");
+                SETFLAGS(X_ALL, SF_SET);    // Hack to set flags in "don't care" state
+                GETIP(ip);
+                STORE_XEMU_CALL(xRIP);
+                CALL(native_priv, -1);
+                LOAD_XEMU_CALL(xRIP);
+                jump_to_epilog(dyn, 0, xRIP, ninst);
+                *need_epilog = 0;
+                *ok = 0;
+            }
+            break;
 
         case 0xCF:
             INST_NAME("IRET");
@@ -1995,7 +2172,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MOV64xw(x4, (rex.w?64:32));
                     SUBx_REG(x3, x4, x3);
                     GETED(0);
-                    if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
+                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
                     B_NEXT(cEQ);
                     RORxw_REG(ed, ed, x3);
                     WBACK;
@@ -2019,7 +2196,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETED(0);
-                    if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
+                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
                     B_NEXT(cEQ);
                     RORxw_REG(ed, ed, x3);
                     WBACK;
@@ -2046,7 +2223,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         ANDSw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETEDW(x4, x1, 0);
-                    if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
+                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
                     B_NEXT(cEQ);
                     CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4);
                     WBACK;
@@ -2062,7 +2239,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         ANDSw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETEDW(x4, x1, 0);
-                    if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
+                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
                     B_NEXT(cEQ);
                     CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4);
                     WBACK;
@@ -2077,7 +2254,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETED(0);
-                    if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
+                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
                     B_NEXT(cEQ);
                     emit_shl32(dyn, ninst, rex, ed, x3, x5, x4);
                     WBACK;
@@ -2091,7 +2268,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETED(0);
-                    if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
+                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
                     B_NEXT(cEQ);
                     emit_shr32(dyn, ninst, rex, ed, x3, x5, x4);
                     WBACK;
@@ -2105,7 +2282,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
                     }
                     GETED(0);
-                    if(!rex.w && MODREG) {MOVw_REG(ed, ed);}
+                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
                     B_NEXT(cEQ);
                     UFLAG_OP12(ed, x3);
                     ASRxw_REG(ed, ed, x3);
@@ -2145,7 +2322,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 CHECK_CACHE()) {                                        \
                 /* out of the block */                                  \
                 i32 = dyn->insts[ninst].epilog-(dyn->native_size);      \
-                if(Z) {CBNZx(xRCX, i32);} else {CBZx(xRCX, i32);};      \
+                if(Z) {CBNZz(xRCX, i32);} else {CBZz(xRCX, i32);};      \
                 if(dyn->insts[ninst].x64.jmp_insts==-1) {               \
                     if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT))  \
                         fpu_purgecache(dyn, ninst, 1, x1, x2, x3);      \
@@ -2158,13 +2335,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {                                                    \
                 /* inside the block */                                  \
                 i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);    \
-                if(Z) {CBZx(xRCX, i32);} else {CBNZx(xRCX, i32);};      \
+                if(Z) {CBZz(xRCX, i32);} else {CBNZz(xRCX, i32);};      \
             }
         case 0xE0:
             INST_NAME("LOOPNZ");
             READFLAGS(X_ZF);
             i8 = F8S;
-            SUBx_U12(xRCX, xRCX, 1);
+            SUBz_U12(xRCX, xRCX, 1);
             TBNZ_NEXT(xFlags, 1<<F_ZF);
             GO(0);
             break;
@@ -2172,14 +2349,14 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("LOOPZ");
             READFLAGS(X_ZF);
             i8 = F8S;
-            SUBx_U12(xRCX, xRCX, 1);
+            SUBz_U12(xRCX, xRCX, 1);
             TBZ_NEXT(xFlags, 1<<F_ZF);
             GO(0);
             break;
         case 0xE2:
             INST_NAME("LOOP");
             i8 = F8S;
-            SUBx_U12(xRCX, xRCX, 1);
+            SUBz_U12(xRCX, xRCX, 1);
             GO(0);
             break;
         case 0xE3:
@@ -2198,9 +2375,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 #endif
             }
             #if STEP < 2
-            if(isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn))
+            if(!rex.is32bits && isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn))
                 tmp = dyn->insts[ninst].pass2choice = 3;
-            else 
+            else
                 tmp = dyn->insts[ninst].pass2choice = 0;
             #else
                 tmp = dyn->insts[ninst].pass2choice;
@@ -2219,10 +2396,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SKIPTEST(x1);    // disable test as this hack dos 2 instructions for 1
                     // calling a native function
                     sse_purge07cache(dyn, ninst, x3);
-                    if((box64_log<2 && !cycle_log) && dyn->insts[ninst].natcall)
+                    if((box64_log<2 && !cycle_log) && dyn->insts[ninst].natcall) {
                         tmp=isSimpleWrapper(*(wrapper_t*)(dyn->insts[ninst].natcall+2));
-                    else
+                    } else
                         tmp=0;
+                    if(dyn->insts[ninst].natcall && isRetX87Wrapper(*(wrapper_t*)(dyn->insts[ninst].natcall+2)))
+                    // return value will be on the stack, so the stack depth needs to be updated
+                        x87_purgecache(dyn, ninst, 0, x3, x1, x4);
                     if((box64_log<2 && !cycle_log) && dyn->insts[ninst].natcall && tmp) {
                         //GETIP(ip+3+8+8); // read the 0xCC
                         call_n(dyn, ninst, *(void**)(dyn->insts[ninst].natcall+2+8), tmp);
@@ -2268,8 +2448,12 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         *need_epilog = 0;
                         *ok = 0;
                     }
-                    TABLE64(x2, addr);
-                    PUSH1(x2);
+                    if(rex.is32bits) {
+                        MOV32w(x2, addr);
+                    } else {
+                        TABLE64(x2, addr);
+                    }
+                    PUSH1z(x2);
                     if(box64_dynarec_callret) {
                         // Push actual return address
                         if(addr < (dyn->start+dyn->isize)) {
@@ -2286,12 +2470,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         *ok = 0;
                         *need_epilog = 0;
                     }
-                    if(addr+i32==0) {   // self modifying code maybe? so use indirect address fetching
-                        TABLE64(x4, addr-4);
-                        LDRx_U12(x4, x4, 0);
-                        jump_to_next(dyn, 0, x4, ninst);
-                    } else
-                        jump_to_next(dyn, addr+i32, 0, ninst);
+                    jump_to_next(dyn, addr+i32, 0, ninst);
                     break;
             }
             break;
@@ -2305,11 +2484,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 INST_NAME("JMP Ib");
                 i32 = F8S;
             }
-            JUMP(addr+i32, 0);
+            JUMP((uintptr_t)getAlternate((void*)(addr+i32)), 0);
             if(dyn->insts[ninst].x64.jmp_insts==-1) {
                 // out of the block
                 fpu_purgecache(dyn, ninst, 1, x1, x2, x3);
-                jump_to_next(dyn, addr+i32, 0, ninst);
+                jump_to_next(dyn, (uintptr_t)getAlternate((void*)(addr+i32)), 0, ninst);
             } else {
                 // inside the block
                 CacheTransform(dyn, ninst, CHECK_CACHE(), x1, x2, x3);
@@ -2324,6 +2503,21 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             *ok = 0;
             break;
 
+        case 0xEC:                      /* IN AL, DX */
+        case 0xED:                      /* IN EAX, DX */
+        case 0xEE:                      /* OUT DX, AL */
+        case 0xEF:                      /* OUT DX, EAX */
+            INST_NAME(opcode==0xEC?"IN AL, DX":(opcode==0xED?"IN EAX, DX":(opcode==0xEE?"OUT DX? AL":"OUT DX, EAX")));
+            SETFLAGS(X_ALL, SF_SET);    // Hack to set flags in "don't care" state
+            GETIP(ip);
+            STORE_XEMU_CALL(xRIP);
+            CALL(native_priv, -1);
+            LOAD_XEMU_CALL(xRIP);
+            jump_to_epilog(dyn, 0, xRIP, ninst);
+            *need_epilog = 0;
+            *ok = 0;
+            break;
+
         case 0xF0:
             addr = dynarec64_F0(dyn, addr, ip, ninst, rex, rep, ok, need_epilog);
             break;
@@ -2485,9 +2679,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         MOVw_REG(xRAX, x2);
                         MOVw_REG(xRDX, x4);
                     } else {
-                        if(ninst 
-                           && dyn->insts[ninst-1].x64.addr 
-                           && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 
+                        if(ninst
+                           && dyn->insts[ninst-1].x64.addr
+                           && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31
                            && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) {
                             SET_DFNONE(x2);
                             GETED(0);
@@ -2523,7 +2717,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         MOVw_REG(xRDX, x4);
                     } else {
                         if(ninst && dyn->insts
-                           &&  dyn->insts[ninst-1].x64.addr 
+                           &&  dyn->insts[ninst-1].x64.addr
                            && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48
                            && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) {
                             SET_DFNONE(x2)
@@ -2567,7 +2761,18 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             SET_DFNONE(x1);
             ORRx_mask(xFlags, xFlags, 1, 0, 0); // xFlags | 1
             break;
-        
+        case 0xFA:                      /* STI */
+        case 0xFB:                      /* CLI */
+            INST_NAME(opcode==0xFA?"CLI":"STI");
+            SETFLAGS(X_ALL, SF_SET);    // Hack to set flags in "don't care" state
+            GETIP(ip);
+            STORE_XEMU_CALL(xRIP);
+            CALL(native_priv, -1);
+            LOAD_XEMU_CALL(xRIP);
+            jump_to_epilog(dyn, 0, xRIP, ninst);
+            *need_epilog = 0;
+            *ok = 0;
+            break;
         case 0xFC:
             INST_NAME("CLD");
             BFCw(xFlags, F_DF, 1);
@@ -2617,7 +2822,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 2: // CALL Ed
                     INST_NAME("CALL Ed");
-                    PASS2IF((box64_dynarec_safeflags>1) || 
+                    PASS2IF((box64_dynarec_safeflags>1) ||
                         ((ninst && dyn->insts[ninst-1].x64.set_flags)
                         || ((ninst>1) && dyn->insts[ninst-2].x64.set_flags)), 1)
                     {
@@ -2625,7 +2830,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     } else {
                         SETFLAGS(X_ALL, SF_SET);    //Hack to put flag in "don't care" state
                     }
-                    GETEDx(0);
+                    GETEDz(0);
                     if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
                         BARRIER(BARRIER_FULL);
                     } else {
@@ -2647,22 +2852,41 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         }
                         STPx_S7_preindex(x4, xRIP, xSP, -16);
                     }
-                    PUSH1(xRIP);
+                    PUSH1z(xRIP);
                     jump_to_next(dyn, 0, ed, ninst);
                     break;
                 case 4: // JMP Ed
                     INST_NAME("JMP Ed");
                     READFLAGS(X_PEND);
                     BARRIER(BARRIER_FLOAT);
-                    GETEDx(0);
+                    GETEDz(0);
                     jump_to_next(dyn, 0, ed, ninst);
                     *need_epilog = 0;
                     *ok = 0;
                     break;
+                case 5: // JMP FAR Ed
+                    if(MODREG) {
+                        DEFAULT;
+                    } else {
+                        INST_NAME("JMP FAR Ed");
+                        READFLAGS(X_PEND);
+                        BARRIER(BARRIER_FLOAT);
+                        SMREAD()
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0, 0, rex, NULL, 0, 0);
+                        LDxw(x1, wback, 0);
+                        ed = x1;
+                        LDH(x3, wback, rex.w?8:4);
+                        STW(x3, xEmu, offsetof(x64emu_t, segs[_CS]));
+                        STW(xZR, xEmu, offsetof(x64emu_t, segs_serial[_CS]));
+                        jump_to_epilog(dyn, 0, ed, ninst);
+                        *need_epilog = 0;
+                        *ok = 0;
+                    }
+                    break;
                 case 6: // Push Ed
                     INST_NAME("PUSH Ed");
-                    GETEDx(0);
-                    PUSH1(ed);
+                    GETEDz(0);
+                    PUSH1z(ed);
                     break;
 
                 default:
@@ -2673,6 +2897,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         default:
             DEFAULT;
     }
- 
+
      return addr;
 }