diff options
Diffstat (limited to 'src/dynarec/arm64/dynarec_arm64_00.c')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 492 |
1 files changed, 358 insertions, 134 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 7696a6ec..0e4dee33 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1,7 +1,6 @@ #include <stdio.h> #include <stdlib.h> #include <stddef.h> -#include <pthread.h> #include <errno.h> #include <signal.h> @@ -26,6 +25,7 @@ #include "dynarec_arm64_helper.h" int isSimpleWrapper(wrapper_t fun); +int isRetX87Wrapper(wrapper_t fun); uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) { @@ -102,7 +102,25 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin i64 = F32S; emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5); break; - + case 0x06: + if(rex.is32bits) { + INST_NAME("PUSH ES"); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, segs[_ES])); + PUSH1_32(x1); + } else { + DEFAULT; + } + break; + case 0x07: + if(rex.is32bits) { + INST_NAME("POP ES"); + POP1_32(x1); + STRH_U12(x1, xEmu, offsetof(x64emu_t, segs[_ES])); + STRw_U12(xZR, xEmu, offsetof(x64emu_t, segs_serial[_ES])); + } else { + DEFAULT; + } + break; case 0x08: INST_NAME("OR Eb, Gb"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -278,7 +296,25 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOV64xw(x2, i64); emit_sbb32(dyn, ninst, rex, xRAX, x2, x3, x4); break; - + case 0x1E: + if(rex.is32bits) { + INST_NAME("PUSH DS"); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, segs[_DS])); + PUSH1_32(x1); + } else { + DEFAULT; + } + break; + case 0x1F: + if(rex.is32bits) { + INST_NAME("POP DS"); + POP1_32(x1); + STRH_U12(x1, xEmu, offsetof(x64emu_t, segs[_DS])); + STRw_U12(xZR, xEmu, offsetof(x64emu_t, segs_serial[_DS])); + } else { + DEFAULT; + } + break; case 0x20: INST_NAME("AND Eb, Gb"); SETFLAGS(X_ALL, SF_SET_PENDING); @@ -490,6 +526,32 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_cmp32_0(dyn, ninst, rex, xRAX, x3, x4); break; + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + INST_NAME("INC Reg (32bits)"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + gd = xRAX + (opcode&7); + emit_inc32(dyn, ninst, rex, gd, x1, x2); + break; + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + INST_NAME("DEC Reg (32bits)"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + gd = xRAX + (opcode&7); + emit_dec32(dyn, ninst, rex, gd, x1, x2); + break; case 0x50: case 0x51: case 0x52: @@ -504,31 +566,33 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin dyn->doublepush = 0; } else { gd = xRAX+(opcode&0x07)+(rex.b<<3); - if(gd==xRSP) { - MOVx_REG(x1, gd); - gd = x1; - } - u32 = 0; - i32 = 0; - do { - rex.rex = u32; - u32 = PK(i32); - i32++; - } while(u32>=0x40 && u32<=0x4f); - if(!box64_dynarec_test && u32>=0x50 && u32<=0x57 && (dyn->size>(ninst+1) && dyn->insts[ninst+1].pred_sz==1)) { - // double push! + u32 = PK(0); + i32 = 1; + rex.rex = 0; + if(!rex.is32bits) + while(u32>=0x40 && u32<=0x4f) { + rex.rex = u32; + u32 = PK(i32); + i32++; + } + if(!box64_dynarec_test && u32>=0x50 && u32<=0x57 && (dyn->size>(ninst+1) && dyn->insts[ninst+1].pred_sz==1) && gd != xRSP) { u32= xRAX+(u32&0x07)+(rex.b<<3); - MESSAGE(LOG_DUMP, "DOUBLE PUSH\n"); if(u32==xRSP) { - MOVx_REG(x1, u32); - u32 = x1; + PUSH1z(gd); + } else { + // double push! + MESSAGE(LOG_DUMP, "DOUBLE PUSH\n"); + PUSH2z(gd, u32); + dyn->doublepush = 1; } - PUSH2(gd, u32); - dyn->doublepush = 1; - SKIPTEST(x1); // disable test for this OP } else { - PUSH1(gd); - } + if (gd == xRSP) { + MOVz_REG(x1, xRSP); + PUSH1z(x1); + } else { + PUSH1z(gd); + } + } } break; case 0x58: @@ -545,58 +609,88 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin dyn->doublepop = 0; } else { gd = xRAX+(opcode&0x07)+(rex.b<<3); - u32 = 0; - i32 = 0; - do { - rex.rex = u32; - u32 = PK(i32); - i32++; - } while(u32>=0x40 && u32<=0x4f); + u32 = PK(0); + i32 = 1; + rex.rex = 0; + if(!rex.is32bits) + while(u32>=0x40 && u32<=0x4f) { + rex.rex = u32; + u32 = PK(i32); + i32++; + } if(!box64_dynarec_test && (gd!=xRSP) && u32>=0x58 && u32<=0x5f && (dyn->size>(ninst+1) && dyn->insts[ninst+1].pred_sz==1)) { // double pop! u32= xRAX+(u32&0x07)+(rex.b<<3); MESSAGE(LOG_DUMP, "DOUBLE POP\n"); if(gd==u32) { - ADDx_U12(xRSP, xRSP, 0x8); - POP1(gd); + ADDz_U12(xRSP, xRSP, rex.is32bits?0x4:0x8); + POP1z(gd); } else { - POP2(gd, (u32==xRSP)?x1:u32); + POP2z(gd, (u32==xRSP)?x1:u32); if(u32==xRSP) { - MOVx_REG(u32, x1); + MOVz_REG(u32, x1); } } dyn->doublepop = 1; SKIPTEST(x1); // disable test for this OP } else { if(gd == xRSP) { - POP1(x1); - MOVx_REG(gd, x1); + POP1z(x1); + MOVz_REG(gd, x1); } else { - POP1(gd); + POP1z(gd); } } } break; + case 0x60: + if(rex.is32bits) { + INST_NAME("PUSHAD"); + MOVw_REG(x1, xRSP); + PUSH2_32(xRAX, xRCX); + PUSH2_32(xRDX, xRBX); + PUSH2_32(x1, xRBP); + PUSH2_32(xRSI, xRDI); + } else { + DEFAULT; + } + break; + case 0x61: + if(rex.is32bits) { + INST_NAME("POPAD"); + POP2_32(xRDI, xRSI); + POP2_32(xRBP, x1); + POP2_32(xRBX, xRDX); + POP2_32(xRCX, xRAX); + } else { + DEFAULT; + } + break; case 0x63: - INST_NAME("MOVSXD Gd, Ed"); - nextop = F8; - GETGD; - if(rex.w) { - if(MODREG) { // reg <= reg - SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3)); - } else { // mem <= reg - SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); - LDSW(gd, ed, fixedaddress); - } + if(rex.is32bits) { + // ARPL here + DEFAULT; } else { - if(MODREG) { // reg <= reg - MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); - } else { // mem <= reg - SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); - LDW(gd, ed, fixedaddress); + INST_NAME("MOVSXD Gd, Ed"); + nextop = F8; + GETGD; + if(rex.w) { + if(MODREG) { // reg <= reg + SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3)); + } else { // mem <= reg + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); + LDSW(gd, ed, fixedaddress); + } + } else { + if(MODREG) { // reg <= reg + MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); + } else { // mem <= reg + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); + LDW(gd, ed, fixedaddress); + } } } break; @@ -619,10 +713,10 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "PUSH then RET, using indirect\n"); TABLE64(x3, addr-4); LDRSW_U12(x1, x3, 0); - PUSH1(x1); + PUSH1z(x1); } else { - MOV64x(x3, i64); - PUSH1(x3); + MOV64z(x3, i64); + PUSH1z(x3); } break; case 0x69: @@ -661,8 +755,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x6A: INST_NAME("PUSH Ib"); i64 = F8S; - MOV64x(x3, i64); - PUSH1(x3); + MOV64z(x3, i64); + PUSH1z(x3); break; case 0x6B: INST_NAME("IMUL Gd, Ed, Ib"); @@ -698,6 +792,18 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } break; + case 0x6D: + INST_NAME("INSD"); + SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state + GETIP(ip); + STORE_XEMU_CALL(xRIP); + CALL(native_priv, -1); + LOAD_XEMU_CALL(xRIP); + jump_to_epilog(dyn, 0, xRIP, ninst); + *need_epilog = 0; + *ok = 0; + break; + #define GO(GETFLAGS, NO, YES, F) \ READFLAGS(F); \ i8 = F8S; \ @@ -727,7 +833,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GOCOND(0x70, "J", "ib"); #undef GO - + + case 0x82: + if(!rex.is32bits) { + DEFAULT; + return ip; + } + // fallthru case 0x80: nextop = F8; switch((nextop>>3)&7) { @@ -1053,12 +1165,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x8C: INST_NAME("MOV Ed, Seg"); nextop=F8; + u8 = (nextop&0x38)>>3; if((nextop&0xC0)==0xC0) { // reg <= seg - LDRH_U12(xRAX+(nextop&7)+(rex.b<<3), xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); + LDRw_U12(xRAX+(nextop&7)+(rex.b<<3), xEmu, offsetof(x64emu_t, segs[u8])); } else { // mem <= seg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0); - LDRH_U12(x3, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); - STH(x3, ed, fixedaddress); + LDRw_U12(x3, xEmu, offsetof(x64emu_t, segs[u8])); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0); + STH(x3, wback, fixedaddress); SMWRITE2(); } break; @@ -1073,7 +1186,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(gd!=ed) { // it's sometimes used as a 3 bytes NOP MOVxw_REG(gd, ed); } - else if(!rex.w) { + else if(!rex.w && !rex.is32bits) { MOVw_REG(gd, gd); //truncate the higher 32bits as asked } } @@ -1081,32 +1194,33 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x8E: INST_NAME("MOV Seg,Ew"); nextop = F8; + u8 = (nextop&0x38)>>3; if((nextop&0xC0)==0xC0) { ed = xRAX+(nextop&7)+(rex.b<<3); } else { SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 1, rex, NULL, 0, 0); - LDH(x1, ed, fixedaddress); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0); + LDH(x1, wback, fixedaddress); ed = x1; } - STRw_U12(ed, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3])); - STRw_U12(wZR, xEmu, offsetof(x64emu_t, segs_serial[(nextop&0x38)>>3])); + STRw_U12(ed, xEmu, offsetof(x64emu_t, segs[u8])); + STRw_U12(wZR, xEmu, offsetof(x64emu_t, segs_serial[u8])); break; case 0x8F: INST_NAME("POP Ed"); nextop = F8; if(MODREG) { - POP1(xRAX+(nextop&7)+(rex.b<<3)); + POP1z(xRAX+(nextop&7)+(rex.b<<3)); } else { - POP1(x2); // so this can handle POP [ESP] and maybe some variant too + POP1z(x2); // so this can handle POP [ESP] and maybe some variant too addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); if(ed==xRSP) { - STx(x2, ed, fixedaddress); + STz(x2, ed, fixedaddress); } else { // complicated to just allow a segfault that can be recovered correctly - SUBx_U12(xRSP, xRSP, 8); - STx(x2, ed, fixedaddress); - ADDx_U12(xRSP, xRSP, 8); + SUBz_U12(xRSP, xRSP, rex.is32bits?4:8); + STz(x2, ed, fixedaddress); + ADDz_U12(xRSP, xRSP, rex.is32bits?4:8); } } break; @@ -1148,27 +1262,33 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x9C: INST_NAME("PUSHF"); READFLAGS(X_ALL); - - PUSH1(xFlags); + PUSH1z(xFlags); break; case 0x9D: INST_NAME("POPF"); SETFLAGS(X_ALL, SF_SET); - POP1(xFlags); + POP1z(xFlags); MOV32w(x1, 0x3F7FD7); ANDw_REG(xFlags, xFlags, x1); ORRw_mask(xFlags, xFlags, 0b011111, 0); //mask=0x00000002 SET_DFNONE(x1); + if(box64_wine) { // should this be done all the time? + TBZ_NEXT(xFlags, F_TF); + MOV64x(x1, addr); + STORE_XEMU_CALL(x1); + CALL(native_singlestep, -1); + BFCw(xFlags, F_TF, 1); + } break; case 0x9E: INST_NAME("SAHF"); SETFLAGS(X_CF|X_PF|X_AF|X_ZF|X_SF, SF_SUBSET); MOV32w(x2, 0b11010101); BICw_REG(xFlags, xFlags, x2); - UBFXx(x1, xRAX, 8, 8); + UBFXw(x1, xRAX, 8, 8); ANDw_REG(x1, x1, x2); ORRw_REG(xFlags, xFlags, x1); - SET_DFNONE(x1); + SET_DFNONE(x1); break; case 0x9F: INST_NAME("LAHF"); @@ -1177,28 +1297,40 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0xA0: INST_NAME("MOV AL,Ob"); - u64 = F64; - MOV64x(x1, u64); + if(rex.is32bits) + u64 = F32; + else + u64 = F64; + MOV64z(x1, u64); LDRB_U12(x2, x1, 0); BFIx(xRAX, x2, 0, 8); break; case 0xA1: INST_NAME("MOV EAX,Od"); - u64 = F64; - MOV64x(x1, u64); + if(rex.is32bits) + u64 = F32; + else + u64 = F64; + MOV64z(x1, u64); LDRxw_U12(xRAX, x1, 0); break; case 0xA2: INST_NAME("MOV Ob,AL"); - u64 = F64; - MOV64x(x1, u64); + if(rex.is32bits) + u64 = F32; + else + u64 = F64; + MOV64z(x1, u64); STRB_U12(xRAX, x1, 0); SMWRITE(); break; case 0xA3: INST_NAME("MOV Od,EAX"); - u64 = F64; - MOV64x(x1, u64); + if(rex.is32bits) + u64 = F32; + else + u64 = F64; + MOV64z(x1, u64); STRxw_U12(xRAX, x1, 0); SMWRITE(); break; @@ -1449,7 +1581,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; } break; - + case 0xB0: case 0xB1: @@ -1679,7 +1811,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } BARRIER(BARRIER_FLOAT); i32 = F16; - retn_to_epilog(dyn, ninst, i32); + retn_to_epilog(dyn, ninst, rex, i32); *need_epilog = 0; *ok = 0; break; @@ -1690,7 +1822,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin READFLAGS(X_PEND); // so instead, force the deferred flags, so it's not too slow, and flags are not lost } BARRIER(BARRIER_FLOAT); - ret_to_epilog(dyn, ninst); + ret_to_epilog(dyn, ninst, rex); *need_epilog = 0; *ok = 0; break; @@ -1706,7 +1838,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin eb2 = (ed&4)>>2; // L or H } else { eb1 = xRAX+(nextop&7)+(rex.b<<3); - eb2 = 0; + eb2 = 0; } MOV32w(x3, u8); BFIx(eb1, x3, eb2*8, 8); @@ -1716,7 +1848,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(u8) { MOV32w(x3, u8); ed = x3; - } else + } else ed = xZR; STB(ed, wback, fixedaddress); SMWRITELOCK(lock); @@ -1744,8 +1876,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xC9: INST_NAME("LEAVE"); - MOVx_REG(xRSP, xRBP); - POP1(xRBP); + MOVz_REG(xRSP, xRBP); + POP1z(xRBP); break; case 0xCC: @@ -1770,6 +1902,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin x87_forget(dyn, ninst, x3, x4, 0); sse_purge07cache(dyn, ninst, x3); tmp = isSimpleWrapper(*(wrapper_t*)(addr)); + if(isRetX87Wrapper(*(wrapper_t*)(addr))) + // return value will be on the stack, so the stack depth needs to be updated + x87_purgecache(dyn, ninst, 0, x3, x1, x4); if((box64_log<2 && !cycle_log) && tmp) { //GETIP(ip+3+8+8); // read the 0xCC call_n(dyn, ninst, *(void**)(addr+8), tmp); @@ -1808,6 +1943,48 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin #endif } break; + case 0xCD: + u8 = F8; + if(box64_wine && u8==0x2D) { + INST_NAME("INT 2D"); + // lets do nothing + MESSAGE(LOG_INFO, "INT 2D Windows anti-debug hack\n"); + } else if (u8==0x80) { + INST_NAME("32bits SYSCALL"); + NOTEST(x1); + SMEND(); + GETIP(addr); + STORE_XEMU_CALL(xRIP); + CALL_S(x86Syscall, -1); + LOAD_XEMU_CALL(xRIP); + TABLE64(x3, addr); // expected return address + CMPSx_REG(xRIP, x3); + B_MARK(cNE); + LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); + CBZw_NEXT(w1); + MARK; + LOAD_XEMU_REM(); + jump_to_epilog(dyn, 0, xRIP, ninst); + } else if(box64_wine && u8==0x29) { + INST_NAME("INT 0x29"); + // __fastfail ignored! + MOV32w(x1, 1); + STRw_U12(x1, xEmu, offsetof(x64emu_t, quit)); + jump_to_epilog(dyn, 0, xRIP, ninst); + *need_epilog = 0; + *ok = 0; + } else { + INST_NAME("INT n"); + SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state + GETIP(ip); + STORE_XEMU_CALL(xRIP); + CALL(native_priv, -1); + LOAD_XEMU_CALL(xRIP); + jump_to_epilog(dyn, 0, xRIP, ninst); + *need_epilog = 0; + *ok = 0; + } + break; case 0xCF: INST_NAME("IRET"); @@ -1995,7 +2172,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOV64xw(x4, (rex.w?64:32)); SUBx_REG(x3, x4, x3); GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); RORxw_REG(ed, ed, x3); WBACK; @@ -2019,7 +2196,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); RORxw_REG(ed, ed, x3); WBACK; @@ -2046,7 +2223,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f } GETEDW(x4, x1, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4); WBACK; @@ -2062,7 +2239,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f } GETEDW(x4, x1, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4); WBACK; @@ -2077,7 +2254,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); emit_shl32(dyn, ninst, rex, ed, x3, x5, x4); WBACK; @@ -2091,7 +2268,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); WBACK; @@ -2105,7 +2282,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); UFLAG_OP12(ed, x3); ASRxw_REG(ed, ed, x3); @@ -2145,7 +2322,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CHECK_CACHE()) { \ /* out of the block */ \ i32 = dyn->insts[ninst].epilog-(dyn->native_size); \ - if(Z) {CBNZx(xRCX, i32);} else {CBZx(xRCX, i32);}; \ + if(Z) {CBNZz(xRCX, i32);} else {CBZz(xRCX, i32);}; \ if(dyn->insts[ninst].x64.jmp_insts==-1) { \ if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT)) \ fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ @@ -2158,13 +2335,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { \ /* inside the block */ \ i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ - if(Z) {CBZx(xRCX, i32);} else {CBNZx(xRCX, i32);}; \ + if(Z) {CBZz(xRCX, i32);} else {CBNZz(xRCX, i32);}; \ } case 0xE0: INST_NAME("LOOPNZ"); READFLAGS(X_ZF); i8 = F8S; - SUBx_U12(xRCX, xRCX, 1); + SUBz_U12(xRCX, xRCX, 1); TBNZ_NEXT(xFlags, 1<<F_ZF); GO(0); break; @@ -2172,14 +2349,14 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("LOOPZ"); READFLAGS(X_ZF); i8 = F8S; - SUBx_U12(xRCX, xRCX, 1); + SUBz_U12(xRCX, xRCX, 1); TBZ_NEXT(xFlags, 1<<F_ZF); GO(0); break; case 0xE2: INST_NAME("LOOP"); i8 = F8S; - SUBx_U12(xRCX, xRCX, 1); + SUBz_U12(xRCX, xRCX, 1); GO(0); break; case 0xE3: @@ -2198,9 +2375,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin #endif } #if STEP < 2 - if(isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn)) + if(!rex.is32bits && isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn)) tmp = dyn->insts[ninst].pass2choice = 3; - else + else tmp = dyn->insts[ninst].pass2choice = 0; #else tmp = dyn->insts[ninst].pass2choice; @@ -2219,10 +2396,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SKIPTEST(x1); // disable test as this hack dos 2 instructions for 1 // calling a native function sse_purge07cache(dyn, ninst, x3); - if((box64_log<2 && !cycle_log) && dyn->insts[ninst].natcall) + if((box64_log<2 && !cycle_log) && dyn->insts[ninst].natcall) { tmp=isSimpleWrapper(*(wrapper_t*)(dyn->insts[ninst].natcall+2)); - else + } else tmp=0; + if(dyn->insts[ninst].natcall && isRetX87Wrapper(*(wrapper_t*)(dyn->insts[ninst].natcall+2))) + // return value will be on the stack, so the stack depth needs to be updated + x87_purgecache(dyn, ninst, 0, x3, x1, x4); if((box64_log<2 && !cycle_log) && dyn->insts[ninst].natcall && tmp) { //GETIP(ip+3+8+8); // read the 0xCC call_n(dyn, ninst, *(void**)(dyn->insts[ninst].natcall+2+8), tmp); @@ -2268,8 +2448,12 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin *need_epilog = 0; *ok = 0; } - TABLE64(x2, addr); - PUSH1(x2); + if(rex.is32bits) { + MOV32w(x2, addr); + } else { + TABLE64(x2, addr); + } + PUSH1z(x2); if(box64_dynarec_callret) { // Push actual return address if(addr < (dyn->start+dyn->isize)) { @@ -2286,12 +2470,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin *ok = 0; *need_epilog = 0; } - if(addr+i32==0) { // self modifying code maybe? so use indirect address fetching - TABLE64(x4, addr-4); - LDRx_U12(x4, x4, 0); - jump_to_next(dyn, 0, x4, ninst); - } else - jump_to_next(dyn, addr+i32, 0, ninst); + jump_to_next(dyn, addr+i32, 0, ninst); break; } break; @@ -2305,11 +2484,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("JMP Ib"); i32 = F8S; } - JUMP(addr+i32, 0); + JUMP((uintptr_t)getAlternate((void*)(addr+i32)), 0); if(dyn->insts[ninst].x64.jmp_insts==-1) { // out of the block fpu_purgecache(dyn, ninst, 1, x1, x2, x3); - jump_to_next(dyn, addr+i32, 0, ninst); + jump_to_next(dyn, (uintptr_t)getAlternate((void*)(addr+i32)), 0, ninst); } else { // inside the block CacheTransform(dyn, ninst, CHECK_CACHE(), x1, x2, x3); @@ -2324,6 +2503,21 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin *ok = 0; break; + case 0xEC: /* IN AL, DX */ + case 0xED: /* IN EAX, DX */ + case 0xEE: /* OUT DX, AL */ + case 0xEF: /* OUT DX, EAX */ + INST_NAME(opcode==0xEC?"IN AL, DX":(opcode==0xED?"IN EAX, DX":(opcode==0xEE?"OUT DX? AL":"OUT DX, EAX"))); + SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state + GETIP(ip); + STORE_XEMU_CALL(xRIP); + CALL(native_priv, -1); + LOAD_XEMU_CALL(xRIP); + jump_to_epilog(dyn, 0, xRIP, ninst); + *need_epilog = 0; + *ok = 0; + break; + case 0xF0: addr = dynarec64_F0(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); break; @@ -2485,9 +2679,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRAX, x2); MOVw_REG(xRDX, x4); } else { - if(ninst - && dyn->insts[ninst-1].x64.addr - && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 + if(ninst + && dyn->insts[ninst-1].x64.addr + && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) { SET_DFNONE(x2); GETED(0); @@ -2523,7 +2717,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOVw_REG(xRDX, x4); } else { if(ninst && dyn->insts - && dyn->insts[ninst-1].x64.addr + && dyn->insts[ninst-1].x64.addr && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48 && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) { SET_DFNONE(x2) @@ -2567,7 +2761,18 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SET_DFNONE(x1); ORRx_mask(xFlags, xFlags, 1, 0, 0); // xFlags | 1 break; - + case 0xFA: /* STI */ + case 0xFB: /* CLI */ + INST_NAME(opcode==0xFA?"CLI":"STI"); + SETFLAGS(X_ALL, SF_SET); // Hack to set flags in "don't care" state + GETIP(ip); + STORE_XEMU_CALL(xRIP); + CALL(native_priv, -1); + LOAD_XEMU_CALL(xRIP); + jump_to_epilog(dyn, 0, xRIP, ninst); + *need_epilog = 0; + *ok = 0; + break; case 0xFC: INST_NAME("CLD"); BFCw(xFlags, F_DF, 1); @@ -2617,7 +2822,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 2: // CALL Ed INST_NAME("CALL Ed"); - PASS2IF((box64_dynarec_safeflags>1) || + PASS2IF((box64_dynarec_safeflags>1) || ((ninst && dyn->insts[ninst-1].x64.set_flags) || ((ninst>1) && dyn->insts[ninst-2].x64.set_flags)), 1) { @@ -2625,7 +2830,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { SETFLAGS(X_ALL, SF_SET); //Hack to put flag in "don't care" state } - GETEDx(0); + GETEDz(0); if(box64_dynarec_callret && box64_dynarec_bigblock>1) { BARRIER(BARRIER_FULL); } else { @@ -2647,22 +2852,41 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } STPx_S7_preindex(x4, xRIP, xSP, -16); } - PUSH1(xRIP); + PUSH1z(xRIP); jump_to_next(dyn, 0, ed, ninst); break; case 4: // JMP Ed INST_NAME("JMP Ed"); READFLAGS(X_PEND); BARRIER(BARRIER_FLOAT); - GETEDx(0); + GETEDz(0); jump_to_next(dyn, 0, ed, ninst); *need_epilog = 0; *ok = 0; break; + case 5: // JMP FAR Ed + if(MODREG) { + DEFAULT; + } else { + INST_NAME("JMP FAR Ed"); + READFLAGS(X_PEND); + BARRIER(BARRIER_FLOAT); + SMREAD() + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0, 0, rex, NULL, 0, 0); + LDxw(x1, wback, 0); + ed = x1; + LDH(x3, wback, rex.w?8:4); + STW(x3, xEmu, offsetof(x64emu_t, segs[_CS])); + STW(xZR, xEmu, offsetof(x64emu_t, segs_serial[_CS])); + jump_to_epilog(dyn, 0, ed, ninst); + *need_epilog = 0; + *ok = 0; + } + break; case 6: // Push Ed INST_NAME("PUSH Ed"); - GETEDx(0); - PUSH1(ed); + GETEDz(0); + PUSH1z(ed); break; default: @@ -2673,6 +2897,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin default: DEFAULT; } - + return addr; } |