diff options
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64/arm64_emitter.h | 25 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_00.c | 229 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_0f.c | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_64.c | 60 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_f20f.c | 5 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_f30f.c | 5 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_functions.c | 4 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_functions.h | 2 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_helper.c | 168 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_helper.h | 25 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_pass0.h | 5 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_pass3.h | 2 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_native.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass0.h | 5 |
14 files changed, 384 insertions, 158 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index d480d2d9..52e29dbb 100755 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -130,6 +130,7 @@ } #define MOV64xw(Rd, imm64) if(rex.w) {MOV64x(Rd, imm64);} else {MOV32w(Rd, imm64);} +#define MOV64z(Rd, imm64) if(rex.is32bits) {MOV32w(Rd, imm64);} else {MOV64x(Rd, imm64);} // ADD / SUB @@ -141,6 +142,7 @@ #define ADDSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd)) #define ADDw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, lsl, Rn, Rd)) #define ADDxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b00, Rm, 0, Rn, Rd)) +#define ADDz_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 0, 0, 0b00, Rm, 0, Rn, Rd)) #define ADDSxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 1, 0b00, Rm, 0, Rn, Rd)) #define ADDxw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b01, Rm, lsr, Rn, Rd)) @@ -151,6 +153,7 @@ #define ADDSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) #define ADDxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) #define ADDSxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDz_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.is32bits?0:1, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) #define SUBx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, 0, Rn, Rd)) #define SUBSx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd)) @@ -160,6 +163,7 @@ #define SUBSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd)) #define SUBSw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, lsl, Rn, Rd)) #define SUBxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 0, 0b00, Rm, 0, Rn, Rd)) +#define SUBz_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 1, 0, 0b00, Rm, 0, Rn, Rd)) #define SUBSxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd)) #define CMPSx_REG(Rn, Rm) SUBSx_REG(xZR, Rn, Rm) #define CMPSw_REG(Rn, Rm) SUBSw_REG(wZR, Rn, Rm) @@ -176,6 +180,7 @@ #define SUBw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) #define SUBSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) #define SUBxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBz_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.is32bits?0:1, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) #define SUBSxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) #define CMPSx_U12(Rn, imm12) SUBSx_U12(xZR, Rn, imm12) #define CMPSw_U12(Rn, imm12) SUBSw_U12(wZR, Rn, imm12) @@ -221,6 +226,7 @@ #define LDRB_U12(Rt, Rn, imm12) EMIT(LD_gen(0b00, 0b01, ((uint32_t)((imm12)))&0xfff, Rn, Rt)) #define LDRH_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b01, ((uint32_t)((imm12)>>1))&0xfff, Rn, Rt)) #define LDRxw_U12(Rt, Rn, imm12) EMIT(LD_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)((imm12)>>(2+rex.w)))&0xfff, Rn, Rt)) +#define LDRz_U12(Rt, Rn, imm12) EMIT(LD_gen((rex.is32bits)?0b10:0b11, 0b01, ((uint32_t)((imm12)>>(rex.is32bits?2:3)))&0xfff, Rn, Rt)) #define LDS_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b10<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) #define LDRSW_U12(Rt, Rn, imm12) EMIT(LDS_gen(0b10, 0b01, ((uint32_t)((imm12)>>2))&0xfff, Rn, Rt)) @@ -232,6 +238,7 @@ #define LDRw_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt)) #define LDRw_REG_LSL2(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b10, Rm, 0b011, 1, Rn, Rt)) #define LDRxw_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b10+rex.w, Rm, 0b011, 0, Rn, Rt)) +#define LDRz_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(rex.is32bits?0b10:0b11, Rm, 0b011, 0, Rn, Rt)) #define LDRB_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt)) #define LDRH_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt)) @@ -253,6 +260,7 @@ #define LDURx_I9(Rt, Rn, imm9) EMIT(LDU_gen(0b11, 0b01, imm9, Rn, Rt)) #define LDURw_I9(Rt, Rn, imm9) EMIT(LDU_gen(0b10, 0b01, imm9, Rn, Rt)) #define LDURxw_I9(Rt, Rn, imm9) EMIT(LDU_gen((rex.w)?0b11:0b10, 0b01, imm9, Rn, Rt)) +#define LDURz_I9(Rt, Rn, imm9) EMIT(LDU_gen((rex.is32bits)?0b10:0b11, 0b01, imm9, Rn, Rt)) #define LDURH_I9(Rt, Rn, imm9) EMIT(LDU_gen(0b01, 0b01, imm9, Rn, Rt)) #define LDURB_I9(Rt, Rn, imm9) EMIT(LDU_gen(0b00, 0b01, imm9, Rn, Rt)) #define LDURSW_I9(Rt, Rn, imm9) EMIT(LDU_gen(0b10, 0b10, imm9, Rn, Rt)) @@ -264,6 +272,7 @@ #define LDURSBxw_I9(Rt, Rn, imm9) EMIT(LDU_gen(0b00, (rex.w)?0b10:0b11, imm9, Rn, Rt)) #define LDxw(A, B, C) if(unscaled) {LDURxw_I9(A, B, C);} else {LDRxw_U12(A, B, C);} +#define LDz(A, B, C) if(unscaled) {LDURz_I9(A, B, C);} else {LDRz_U12(A, B, C);} #define LDx(A, B, C) if(unscaled) {LDURx_I9(A, B, C);} else {LDRx_U12(A, B, C);} #define LDW(A, B, C) if(unscaled) {LDURw_I9(A, B, C);} else {LDRw_U12(A, B, C);} #define LDH(A, B, C) if(unscaled) {LDURH_I9(A, B, C);} else {LDRH_U12(A, B, C);} @@ -276,6 +285,7 @@ #define LDSBx(A, B, C) if(unscaled) {LDURSBx_I9(A, B, C);} else {LDRSBx_U12(A, B, C);} #define LDSBw(A, B, C) if(unscaled) {LDURSBw_I9(A, B, C);} else {LDRSBw_U12(A, B, C);} #define STxw(A, B, C) if(unscaled) {STURxw_I9(A, B, C);} else {STRxw_U12(A, B, C);} +#define STz(A, B, C) if(unscaled) {STURz_I9(A, B, C);} else {STRz_U12(A, B, C);} #define STx(A, B, C) if(unscaled) {STURx_I9(A, B, C);} else {STRx_U12(A, B, C);} #define STW(A, B, C) if(unscaled) {STURw_I9(A, B, C);} else {STRw_U12(A, B, C);} #define STH(A, B, C) if(unscaled) {STURH_I9(A, B, C);} else {STRH_U12(A, B, C);} @@ -297,11 +307,13 @@ #define STRB_U12(Rt, Rn, imm12) EMIT(ST_gen(0b00, 0b01, ((uint32_t)((imm12)))&0xfff, Rn, Rt)) #define STRH_U12(Rt, Rn, imm12) EMIT(ST_gen(0b01, 0b01, ((uint32_t)((imm12)>>1))&0xfff, Rn, Rt)) #define STRxw_U12(Rt, Rn, imm12) EMIT(ST_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)((imm12)>>(2+rex.w)))&0xfff, Rn, Rt)) +#define STRz_U12(Rt, Rn, imm12) EMIT(ST_gen((rex.is32bits)?0b10:0b11, 0b01, ((uint32_t)((imm12)>>(rex.is32bits?2:3)))&0xfff, Rn, Rt)) #define STU_gen(size, opc, imm9, Rn, Rt) ((size)<<30 | 0b111<<27 | (opc)<<22 | ((imm9)&0x1ff)<<12 | (Rn)<<5 | (Rt)) #define STURx_I9(Rt, Rn, imm9) EMIT(STU_gen(0b11, 0b00, imm9, Rn, Rt)) #define STURw_I9(Rt, Rn, imm9) EMIT(STU_gen(0b10, 0b00, imm9, Rn, Rt)) #define STURxw_I9(Rt, Rn, imm9) EMIT(STU_gen((rex.w)?0b11:0b10, 0b00, imm9, Rn, Rt)) +#define STURz_I9(Rt, Rn, imm9) EMIT(STU_gen((rex.is32bits)?0b10:0b11, 0b00, imm9, Rn, Rt)) #define STURH_I9(Rt, Rn, imm9) EMIT(STU_gen(0b01, 0b00, imm9, Rn, Rt)) #define STURB_I9(Rt, Rn, imm9) EMIT(STU_gen(0b00, 0b00, imm9, Rn, Rt)) @@ -344,6 +356,16 @@ #define POP2(reg1, reg2) LDPx_S7_postindex(reg1, reg2, xRSP, 16) #define PUSH2(reg1, reg2) STPx_S7_preindex(reg2, reg1, xRSP, -16) +#define POP1_32(reg) LDRw_S9_postindex(reg, xRSP, 4) +#define PUSH1_32(reg) STRw_S9_preindex(reg, xRSP, -4) +#define POP2_32(reg1, reg2) LDPw_S7_postindex(reg1, reg2, xRSP, 8) +#define PUSH2_32(reg1, reg2) STPw_S7_preindex(reg2, reg1, xRSP, -8) + +#define POP1z(reg) if(rex.is32bits) {POP1_32(reg);} else {POP1(reg);} +#define PUSH1z(reg) if(rex.is32bits) {PUSH1_32(reg);} else {PUSH1(reg);} +#define POP2z(reg1, reg2) if(rex.is32bits) {POP2_32(reg1, reg2);} else {POP2(reg1, reg2);} +#define PUSH2z(reg1, reg2) if(rex.is32bits) {PUSH2_32(reg1, reg2);} else {PUSH2(reg1, reg2);} + // LOAD/STORE Acquire Exclusive #define MEMAX_gen(size, L, Rs, Rn, Rt) ((size)<<30 | 0b001000<<24 | (L)<<22 | (Rs)<<16 | 1<<15 | 0b11111<<10 | (Rn)<<5 | (Rt)) #define LDAXRB(Rt, Rn) EMIT(MEMAX_gen(0b00, 1, 31, Rn, Rt)) @@ -414,9 +436,11 @@ #define CBNZx(Rt, imm19) EMIT(CB_gen(1, 1, ((imm19)>>2)&0x7FFFF, Rt)) #define CBNZw(Rt, imm19) EMIT(CB_gen(0, 1, ((imm19)>>2)&0x7FFFF, Rt)) #define CBNZxw(Rt, imm19) EMIT(CB_gen(rex.w, 1, ((imm19)>>2)&0x7FFFF, Rt)) +#define CBNZz(Rt, imm19) EMIT(CB_gen(rex.is32bits?0:1, 1, ((imm19)>>2)&0x7FFFF, Rt)) #define CBZx(Rt, imm19) EMIT(CB_gen(1, 0, ((imm19)>>2)&0x7FFFF, Rt)) #define CBZw(Rt, imm19) EMIT(CB_gen(0, 0, ((imm19)>>2)&0x7FFFF, Rt)) #define CBZxw(Rt, imm19) EMIT(CB_gen(rex.w, 0, ((imm19)>>2)&0x7FFFF, Rt)) +#define CBZz(Rt, imm19) EMIT(CB_gen(rex.is32bits?0:1, 0, ((imm19)>>2)&0x7FFFF, Rt)) #define TB_gen(b5, op, b40, imm14, Rt) ((b5)<<31 | 0b011011<<25 | (op)<<24 | (b40)<<19 | (imm14)<<5 | (Rt)) #define TBZ(Rt, bit, imm16) EMIT(TB_gen(((bit)>>5)&1, 0, (bit)&0x1f, ((imm16)>>2)&0x3FFF, Rt)) @@ -511,6 +535,7 @@ #define MOVx_REG(Rd, Rm) ORRx_REG(Rd, xZR, Rm) #define MOVw_REG(Rd, Rm) ORRw_REG(Rd, xZR, Rm) #define MOVxw_REG(Rd, Rm) ORRxw_REG(Rd, xZR, Rm) +#define MOVz_REG(Rd, Rm) if(rex.is32bits) {MOVw_REG(Rd, Rm);} else {MOVx_REG(Rd, Rm);} #define LSLw_IMM(Rd, Rm, lsl) ORRw_REG_LSL(Rd, xZR, Rm, lsl) #define LSLx_IMM(Rd, Rm, lsl) ORRx_REG_LSL(Rd, xZR, Rm, lsl) #define LSLxw_IMM(Rd, Rm, lsl) ORRxw_REG_LSL(Rd, xZR, Rm, lsl) diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 5ba08514..20319c87 100755 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -52,11 +52,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MAYUSE(lock); MAYUSE(cacheupd); - if(rex.is32bits) { - DEFAULT; - return ip; - } - switch(opcode) { case 0x00: INST_NAME("ADD Eb, Gb"); @@ -495,6 +490,32 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin emit_cmp32_0(dyn, ninst, rex, xRAX, x3, x4); break; + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + INST_NAME("INC Reg (32bits)"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + gd = xRAX + (opcode&7); + emit_inc32(dyn, ninst, rex, gd, x1, x2); + break; + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + INST_NAME("DEC Reg (32bits)"); + SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING); + gd = xRAX + (opcode&7); + emit_dec32(dyn, ninst, rex, gd, x1, x2); + break; case 0x50: case 0x51: case 0x52: @@ -519,15 +540,15 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!box64_dynarec_test && u32>=0x50 && u32<=0x57 && (dyn->size>(ninst+1) && dyn->insts[ninst+1].pred_sz==1) && gd != xRSP) { u32= xRAX+(u32&0x07)+(rex.b<<3); if(u32==xRSP) { - PUSH1(gd); + PUSH1z(gd); } else { // double push! MESSAGE(LOG_DUMP, "DOUBLE PUSH\n"); - PUSH2(gd, u32); + PUSH2z(gd, u32); dyn->doublepush = 1; } } else { - PUSH1(gd); + PUSH1z(gd); } } break; @@ -557,46 +578,51 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u32= xRAX+(u32&0x07)+(rex.b<<3); MESSAGE(LOG_DUMP, "DOUBLE POP\n"); if(gd==u32) { - ADDx_U12(xRSP, xRSP, 0x8); - POP1(gd); + ADDz_U12(xRSP, xRSP, rex.is32bits?0x4:0x8); + POP1z(gd); } else { - POP2(gd, (u32==xRSP)?x1:u32); + POP2z(gd, (u32==xRSP)?x1:u32); if(u32==xRSP) { - MOVx_REG(u32, x1); + MOVz_REG(u32, x1); } } dyn->doublepop = 1; SKIPTEST(x1); // disable test for this OP } else { if(gd == xRSP) { - POP1(x1); - MOVx_REG(gd, x1); + POP1z(x1); + MOVz_REG(gd, x1); } else { - POP1(gd); + POP1z(gd); } } } break; case 0x63: - INST_NAME("MOVSXD Gd, Ed"); - nextop = F8; - GETGD; - if(rex.w) { - if(MODREG) { // reg <= reg - SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3)); - } else { // mem <= reg - SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); - LDSW(gd, ed, fixedaddress); - } + if(rex.is32bits) { + // ARPL here + DEFAULT; } else { - if(MODREG) { // reg <= reg - MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); - } else { // mem <= reg - SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); - LDW(gd, ed, fixedaddress); + INST_NAME("MOVSXD Gd, Ed"); + nextop = F8; + GETGD; + if(rex.w) { + if(MODREG) { // reg <= reg + SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3)); + } else { // mem <= reg + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); + LDSW(gd, ed, fixedaddress); + } + } else { + if(MODREG) { // reg <= reg + MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); + } else { // mem <= reg + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); + LDW(gd, ed, fixedaddress); + } } } break; @@ -619,10 +645,10 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "PUSH then RET, using indirect\n"); TABLE64(x3, addr-4); LDRSW_U12(x1, x3, 0); - PUSH1(x1); + PUSH1z(x1); } else { - MOV64x(x3, i64); - PUSH1(x3); + MOV64z(x3, i64); + PUSH1z(x3); } break; case 0x69: @@ -661,8 +687,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x6A: INST_NAME("PUSH Ib"); i64 = F8S; - MOV64x(x3, i64); - PUSH1(x3); + MOV64z(x3, i64); + PUSH1z(x3); break; case 0x6B: INST_NAME("IMUL Gd, Ed, Ib"); @@ -1056,7 +1082,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin u8 = (nextop&0x38)>>3; LDRw_U12(x3, xEmu, offsetof(x64emu_t, segs[u8])); if((nextop&0xC0)==0xC0) { // reg <= seg - UXTHw(xRAX+(nextop&7)+(rex.b<<3), x1); + UXTHw(xRAX+(nextop&7)+(rex.b<<3), x3); } else { // mem <= seg addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0); STH(x3, wback, fixedaddress); @@ -1074,7 +1100,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(gd!=ed) { // it's sometimes used as a 3 bytes NOP MOVxw_REG(gd, ed); } - else if(!rex.w) { + else if(!rex.w && !rex.is32bits) { MOVw_REG(gd, gd); //truncate the higher 32bits as asked } } @@ -1098,17 +1124,17 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("POP Ed"); nextop = F8; if(MODREG) { - POP1(xRAX+(nextop&7)+(rex.b<<3)); + POP1z(xRAX+(nextop&7)+(rex.b<<3)); } else { - POP1(x2); // so this can handle POP [ESP] and maybe some variant too + POP1z(x2); // so this can handle POP [ESP] and maybe some variant too addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); if(ed==xRSP) { - STx(x2, ed, fixedaddress); + STz(x2, ed, fixedaddress); } else { // complicated to just allow a segfault that can be recovered correctly - SUBx_U12(xRSP, xRSP, 8); - STx(x2, ed, fixedaddress); - ADDx_U12(xRSP, xRSP, 8); + SUBz_U12(xRSP, xRSP, rex.is32bits?4:8); + STz(x2, ed, fixedaddress); + ADDz_U12(xRSP, xRSP, rex.is32bits?4:8); } } break; @@ -1150,13 +1176,12 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x9C: INST_NAME("PUSHF"); READFLAGS(X_ALL); - - PUSH1(xFlags); + PUSH1z(xFlags); break; case 0x9D: INST_NAME("POPF"); SETFLAGS(X_ALL, SF_SET); - POP1(xFlags); + POP1z(xFlags); MOV32w(x1, 0x3F7FD7); ANDw_REG(xFlags, xFlags, x1); ORRw_mask(xFlags, xFlags, 0b011111, 0); //mask=0x00000002 @@ -1174,7 +1199,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_CF|X_PF|X_AF|X_ZF|X_SF, SF_SUBSET); MOV32w(x2, 0b11010101); BICw_REG(xFlags, xFlags, x2); - UBFXx(x1, xRAX, 8, 8); + UBFXw(x1, xRAX, 8, 8); ANDw_REG(x1, x1, x2); ORRw_REG(xFlags, xFlags, x1); SET_DFNONE(x1); @@ -1186,28 +1211,40 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0xA0: INST_NAME("MOV AL,Ob"); - u64 = F64; - MOV64x(x1, u64); + if(rex.is32bits) + u64 = F32; + else + u64 = F64; + MOV64z(x1, u64); LDRB_U12(x2, x1, 0); BFIx(xRAX, x2, 0, 8); break; case 0xA1: INST_NAME("MOV EAX,Od"); - u64 = F64; - MOV64x(x1, u64); + if(rex.is32bits) + u64 = F32; + else + u64 = F64; + MOV64z(x1, u64); LDRxw_U12(xRAX, x1, 0); break; case 0xA2: INST_NAME("MOV Ob,AL"); - u64 = F64; - MOV64x(x1, u64); + if(rex.is32bits) + u64 = F32; + else + u64 = F64; + MOV64z(x1, u64); STRB_U12(xRAX, x1, 0); SMWRITE(); break; case 0xA3: INST_NAME("MOV Od,EAX"); - u64 = F64; - MOV64x(x1, u64); + if(rex.is32bits) + u64 = F32; + else + u64 = F64; + MOV64z(x1, u64); STRxw_U12(xRAX, x1, 0); SMWRITE(); break; @@ -1688,7 +1725,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } BARRIER(BARRIER_FLOAT); i32 = F16; - retn_to_epilog(dyn, ninst, i32); + retn_to_epilog(dyn, ninst, rex, i32); *need_epilog = 0; *ok = 0; break; @@ -1699,7 +1736,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin READFLAGS(X_PEND); // so instead, force the deferred flags, so it's not too slow, and flags are not lost } BARRIER(BARRIER_FLOAT); - ret_to_epilog(dyn, ninst); + ret_to_epilog(dyn, ninst, rex); *need_epilog = 0; *ok = 0; break; @@ -1753,8 +1790,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xC9: INST_NAME("LEAVE"); - MOVx_REG(xRSP, xRBP); - POP1(xRBP); + MOVz_REG(xRSP, xRBP); + POP1z(xRBP); break; case 0xCC: @@ -2038,7 +2075,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MOV64xw(x4, (rex.w?64:32)); SUBx_REG(x3, x4, x3); GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); RORxw_REG(ed, ed, x3); WBACK; @@ -2062,7 +2099,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); RORxw_REG(ed, ed, x3); WBACK; @@ -2089,7 +2126,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f } GETEDW(x4, x1, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4); WBACK; @@ -2105,7 +2142,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x2, xRCX, 0, 0b00100); //mask=0x00000001f } GETEDW(x4, x1, 0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4); WBACK; @@ -2120,7 +2157,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); emit_shl32(dyn, ninst, rex, ed, x3, x5, x4); WBACK; @@ -2134,7 +2171,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); emit_shr32(dyn, ninst, rex, ed, x3, x5, x4); WBACK; @@ -2148,7 +2185,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ANDSw_mask(x3, xRCX, 0, 0b00100); //mask=0x00000001f } GETED(0); - if(!rex.w && MODREG) {MOVw_REG(ed, ed);} + if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);} B_NEXT(cEQ); UFLAG_OP12(ed, x3); ASRxw_REG(ed, ed, x3); @@ -2188,7 +2225,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin CHECK_CACHE()) { \ /* out of the block */ \ i32 = dyn->insts[ninst].epilog-(dyn->native_size); \ - if(Z) {CBNZx(xRCX, i32);} else {CBZx(xRCX, i32);}; \ + if(Z) {CBNZz(xRCX, i32);} else {CBZz(xRCX, i32);}; \ if(dyn->insts[ninst].x64.jmp_insts==-1) { \ if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT)) \ fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \ @@ -2201,13 +2238,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { \ /* inside the block */ \ i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \ - if(Z) {CBZx(xRCX, i32);} else {CBNZx(xRCX, i32);}; \ + if(Z) {CBZz(xRCX, i32);} else {CBNZz(xRCX, i32);}; \ } case 0xE0: INST_NAME("LOOPNZ"); READFLAGS(X_ZF); i8 = F8S; - SUBx_U12(xRCX, xRCX, 1); + SUBz_U12(xRCX, xRCX, 1); TBNZ_NEXT(xFlags, 1<<F_ZF); GO(0); break; @@ -2215,14 +2252,14 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("LOOPZ"); READFLAGS(X_ZF); i8 = F8S; - SUBx_U12(xRCX, xRCX, 1); + SUBz_U12(xRCX, xRCX, 1); TBZ_NEXT(xFlags, 1<<F_ZF); GO(0); break; case 0xE2: INST_NAME("LOOP"); i8 = F8S; - SUBx_U12(xRCX, xRCX, 1); + SUBz_U12(xRCX, xRCX, 1); GO(0); break; case 0xE3: @@ -2241,7 +2278,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin #endif } #if STEP < 2 - if(isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn)) + if(!rex.is32bits && isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn)) tmp = dyn->insts[ninst].pass2choice = 3; else tmp = dyn->insts[ninst].pass2choice = 0; @@ -2311,8 +2348,12 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin *need_epilog = 0; *ok = 0; } - TABLE64(x2, addr); - PUSH1(x2); + if(rex.is32bits) { + MOV32w(x2, addr); + } else { + TABLE64(x2, addr); + } + PUSH1z(x2); if(box64_dynarec_callret) { // Push actual return address if(addr < (dyn->start+dyn->isize)) { @@ -2329,12 +2370,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin *ok = 0; *need_epilog = 0; } - if(addr+i32==0) { // self modifying code maybe? so use indirect address fetching - TABLE64(x4, addr-4); - LDRx_U12(x4, x4, 0); - jump_to_next(dyn, 0, x4, ninst); - } else - jump_to_next(dyn, addr+i32, 0, ninst); + jump_to_next(dyn, addr+i32, 0, ninst); break; } break; @@ -2668,7 +2704,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { SETFLAGS(X_ALL, SF_SET); //Hack to put flag in "don't care" state } - GETEDx(0); + GETEDz(0); if(box64_dynarec_callret && box64_dynarec_bigblock>1) { BARRIER(BARRIER_FULL); } else { @@ -2690,22 +2726,41 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } STPx_S7_preindex(x4, xRIP, xSP, -16); } - PUSH1(xRIP); + PUSH1z(xRIP); jump_to_next(dyn, 0, ed, ninst); break; case 4: // JMP Ed INST_NAME("JMP Ed"); READFLAGS(X_PEND); BARRIER(BARRIER_FLOAT); - GETEDx(0); + GETEDz(0); jump_to_next(dyn, 0, ed, ninst); *need_epilog = 0; *ok = 0; break; + case 5: // JMP FAR Ed + if(MODREG) { + DEFAULT; + } else { + INST_NAME("JMP FAR Ed"); + READFLAGS(X_PEND); + BARRIER(BARRIER_FLOAT); + SMREAD() + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, (0xfff<<(2+rex.w))-(rex.w?8:4), (1<<(2+rex.w))-1, rex, NULL, 0, 0); + LDxw(x1, wback, fixedaddress); + ed = x1; + LDH(x3, wback, fixedaddress+rex.w?8:4); + STW(x3, xEmu, offsetof(x64emu_t, segs[_CS])); + STW(xZR, xEmu, offsetof(x64emu_t, segs_serial[_CS])); + jump_to_epilog(dyn, 0, ed, ninst); + *need_epilog = 0; + *ok = 0; + } + break; case 6: // Push Ed INST_NAME("PUSH Ed"); - GETEDx(0); - PUSH1(ed); + GETEDz(0); + PUSH1z(ed); break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index c5ebf3d6..b19c6590 100755 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -57,11 +57,6 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin static const int8_t mask_shift8[] = { -7, -6, -5, -4, -3, -2, -1, 0 }; #endif - if(rex.is32bits) { - DEFAULT; - return addr; - } - switch(opcode) { case 0x01: diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c index 8990a2f7..81de278b 100644 --- a/src/dynarec/arm64/dynarec_arm64_64.c +++ b/src/dynarec/arm64/dynarec_arm64_64.c @@ -52,11 +52,6 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MAYUSE(v0); MAYUSE(v1); - if(rex.is32bits) { - DEFAULT; - return addr; - } - while((opcode==0xF2) || (opcode==0xF3)) { rep = opcode-0xF1; opcode = F8; @@ -322,26 +317,31 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0x63: - INST_NAME("MOVSXD Gd, Ed"); - nextop = F8; - GETGD; - if(rex.w) { - if(MODREG) { // reg <= reg - SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3)); - } else { // mem <= reg - grab_segdata(dyn, addr, ninst, x4, seg); - SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); - LDRSW_REG(gd, ed, x4); - } + if(rex.is32bits) { + // ARPL here + DEFAULT; } else { - if(MODREG) { // reg <= reg - MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); - } else { // mem <= reg - grab_segdata(dyn, addr, ninst, x4, seg); - SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); - LDRw_REG(gd, ed, x4); + INST_NAME("MOVSXD Gd, Ed"); + nextop = F8; + GETGD; + if(rex.w) { + if(MODREG) { // reg <= reg + SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3)); + } else { // mem <= reg + grab_segdata(dyn, addr, ninst, x4, seg); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + LDRSW_REG(gd, ed, x4); + } + } else { + if(MODREG) { // reg <= reg + MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3)); + } else { // mem <= reg + grab_segdata(dyn, addr, ninst, x4, seg); + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + LDRw_REG(gd, ed, x4); + } } } break; @@ -587,7 +587,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(gd!=ed) { // it's sometimes used as a 3 bytes NOP MOVxw_REG(gd, ed); } - else if(!rex.w) { + else if(!rex.w && !rex.is32bits) { MOVw_REG(gd, gd); //truncate the higher 32bits as asked } } @@ -1010,7 +1010,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { SETFLAGS(X_ALL, SF_SET); //Hack to put flag in "don't care" state } - GETEDOx(x6, 0); + GETEDOz(x6, 0); if(box64_dynarec_callret && box64_dynarec_bigblock>1) { BARRIER(BARRIER_FULL); } else { @@ -1032,22 +1032,22 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } STPx_S7_preindex(x4, xRIP, xSP, -16); } - PUSH1(xRIP); + PUSH1z(xRIP); jump_to_next(dyn, 0, ed, ninst); break; case 4: // JMP Ed INST_NAME("JMP Ed"); READFLAGS(X_PEND); BARRIER(BARRIER_FLOAT); - GETEDOx(x6, 0); + GETEDOz(x6, 0); jump_to_next(dyn, 0, ed, ninst); *need_epilog = 0; *ok = 0; break; case 6: // Push Ed INST_NAME("PUSH Ed"); - GETEDOx(x6, 0); - PUSH1(ed); + GETEDOz(x6, 0); + PUSH1z(ed); break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c index e9b2ccc3..17f95f4c 100755 --- a/src/dynarec/arm64/dynarec_arm64_f20f.c +++ b/src/dynarec/arm64/dynarec_arm64_f20f.c @@ -44,11 +44,6 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MAYUSE(v0); MAYUSE(v1); - if(rex.is32bits) { - DEFAULT; - return addr; - } - switch(opcode) { case 0x10: diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c index bfa16cd8..5b3aa85e 100755 --- a/src/dynarec/arm64/dynarec_arm64_f30f.c +++ b/src/dynarec/arm64/dynarec_arm64_f30f.c @@ -46,11 +46,6 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n MAYUSE(v1); MAYUSE(j64); - if(rex.is32bits) { - DEFAULT; - return addr; - } - switch(opcode) { case 0x10: diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index 2a7c9b34..dfd60eae 100755 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -488,10 +488,10 @@ const char* getCacheName(int t, int n) return buff; } -void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name) +void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex) { if(box64_dynarec_dump) { - printf_x64_instruction(my_context->dec, &dyn->insts[ninst].x64, name); + printf_x64_instruction(rex.is32bits?my_context->dec32:my_context->dec, &dyn->insts[ninst].x64, name); dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d", (box64_dynarec_dump>1)?"\e[32m":"", (void*)(dyn->native_start+dyn->insts[ninst].address), diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h index 86cd0440..950345fa 100755 --- a/src/dynarec/arm64/dynarec_arm64_functions.h +++ b/src/dynarec/arm64/dynarec_arm64_functions.h @@ -43,6 +43,6 @@ int getedparity(dynarec_native_t* dyn, int ninst, uintptr_t addr, uint8_t nextop const char* getCacheName(int t, int n); -void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name); +void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex); void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode); #endif //__DYNAREC_ARM_FUNCTIONS_H__ diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 243d60a1..00a49905 100755 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -27,11 +27,16 @@ #include "dynarec_arm64_functions.h" #include "dynarec_arm64_helper.h" +static uintptr_t geted_32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int* unscaled, int absmax, uint32_t mask, int* l, int s); + /* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int* unscaled, int absmax, uint32_t mask, rex_t rex, int *l, int s, int delta) { MAYUSE(dyn); MAYUSE(ninst); MAYUSE(delta); + if(rex.is32bits) + return geted_32(dyn, addr, ninst, nextop, ed, hint, fixaddress, unscaled, absmax, mask, l, s); + int lock = l?((l==LOCK_LOCK)?1:2):0; if(unscaled) *unscaled = 0; @@ -183,6 +188,141 @@ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, u return addr; } +static uintptr_t geted_32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int* unscaled, int absmax, uint32_t mask, int* l, int s) +{ + MAYUSE(dyn); MAYUSE(ninst); + + int lock = l?((l==LOCK_LOCK)?1:2):0; + if(unscaled) + *unscaled = 0; + if(lock==2) + *l = 0; + uint8_t ret = x2; + uint8_t scratch = x2; + *fixaddress = 0; + if(hint>0) ret = hint; + if(hint>0 && hint<xRAX) scratch = hint; + int absmin = 0; + if(s) absmin=-absmax; + MAYUSE(scratch); + if(!(nextop&0xC0)) { + if((nextop&7)==4) { + uint8_t sib = F8; + int sib_reg = (sib>>3)&7; + if((sib&0x7)==5) { + int64_t tmp = F32S; + if (sib_reg!=4) { + if(tmp && (!((tmp>=absmin) && (tmp<=absmax) && !(tmp&mask))) || !(unscaled && (tmp>-256) && (tmp<256))) { + MOV32w(scratch, tmp); + ADDw_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); + } else { + LSLw(ret, xRAX+sib_reg, (sib>>6)); + *fixaddress = tmp; + if(unscaled && (tmp>-256) && (tmp<256)) + *unscaled = 1; + } + } else { + switch(lock) { + case 1: addLockAddress((int32_t)tmp); break; + case 2: if(isLockAddress((int32_t)tmp)) *l=1; break; + } + MOV32w(ret, tmp); + } + } else { + if (sib_reg!=4) { + ADDw_REG_LSL(ret, xRAX+(sib&0x7), xRAX+sib_reg, (sib>>6)); + } else { + ret = xRAX+(sib&0x7); + } + } + } else if((nextop&7)==5) { + uint64_t tmp = F32; + MOV32w(ret, tmp); + switch(lock) { + case 1: addLockAddress(tmp); break; + case 2: if(isLockAddress(tmp)) *l=1; break; + } + } else { + ret = xRAX+(nextop&7); + if(ret==hint) { + MOVw_REG(hint, ret); //to clear upper part + } + } + } else { + int64_t i32; + uint8_t sib = 0; + int sib_reg = 0; + if((nextop&7)==4) { + sib = F8; + sib_reg = (sib>>3)&7; + } + if(nextop&0x80) + i32 = F32S; + else + i32 = F8S; + if(i32==0 || ((i32>=absmin) && (i32<=absmax) && !(i32&mask)) || (unscaled && (i32>-256) && (i32<256))) { + *fixaddress = i32; + if(unscaled && (i32>-256) && (i32<256)) + *unscaled = 1; + if((nextop&7)==4) { + if (sib_reg!=4) { + ADDw_REG_LSL(ret, xRAX+(sib&0x07), xRAX+sib_reg, (sib>>6)); + } else { + ret = xRAX+(sib&0x07); + } + } else { + ret = xRAX+(nextop&0x07); + } + } else { + int64_t sub = (i32<0)?1:0; + if(sub) i32 = -i32; + if(i32<0x1000) { + if((nextop&7)==4) { + if (sib_reg!=4) { + ADDw_REG_LSL(scratch, xRAX+(sib&0x07), xRAX+sib_reg, (sib>>6)); + } else { + scratch = xRAX+(sib&0x07); + } + } else + scratch = xRAX+(nextop&0x07); + if(sub) { + SUBw_U12(ret, scratch, i32); + } else { + ADDw_U12(ret, scratch, i32); + } + } else { + MOV32w(scratch, i32); + if((nextop&7)==4) { + if (sib_reg!=4) { + if(sub) { + SUBw_REG(scratch, xRAX+(sib&0x07), scratch); + } else { + ADDw_REG(scratch, scratch, xRAX+(sib&0x07)); + } + ADDw_REG_LSL(ret, scratch, xRAX+sib_reg, (sib>>6)); + } else { + PASS3(int tmp = xRAX+(sib&0x07)); + if(sub) { + SUBw_REG(ret, tmp, scratch); + } else { + ADDw_REG(ret, tmp, scratch); + } + } + } else { + PASS3(int tmp = xRAX+(nextop&0x07)); + if(sub) { + SUBw_REG(ret, tmp, scratch); + } else { + ADDw_REG(ret, tmp, scratch); + } + } + } + } + } + *ed = ret; + return addr; +} + /* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ uintptr_t geted32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int* unscaled, int absmax, uint32_t mask, rex_t rex, int* l, int s, int delta) { @@ -258,7 +398,7 @@ uintptr_t geted32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, i64 = F32S; else i64 = F8S; - if(i64==0 || ((i64>=absmin) && (i64<=absmax) && !(i64&mask)) || (unscaled && (i64>-256) && (i64>256))) { + if(i64==0 || ((i64>=absmin) && (i64<=absmax) && !(i64&mask)) || (unscaled && (i64>-256) && (i64<256))) { *fixaddress = i64; if(unscaled && (i64>-256) && (i64<256)) *unscaled = 1; @@ -464,12 +604,12 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) BLR(x2); // save LR... } -void ret_to_epilog(dynarec_arm_t* dyn, int ninst) +void ret_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex) { MAYUSE(dyn); MAYUSE(ninst); MESSAGE(LOG_DUMP, "Ret to epilog\n"); - POP1(xRIP); - MOVx_REG(x1, xRIP); + POP1z(xRIP); + MOVz_REG(x1, xRIP); SMEND(); if(box64_dynarec_callret) { // pop the actual return address for ARM stack @@ -496,18 +636,18 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst) CLEARIP(); } -void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n) +void retn_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex, int n) { MAYUSE(dyn); MAYUSE(ninst); MESSAGE(LOG_DUMP, "Retn to epilog\n"); - POP1(xRIP); + POP1z(xRIP); if(n>0xfff) { MOV32w(w1, n); - ADDx_REG(xRSP, xRSP, x1); + ADDz_REG(xRSP, xRSP, x1); } else { - ADDx_U12(xRSP, xRSP, n); + ADDz_U12(xRSP, xRSP, n); } - MOVx_REG(x1, xRIP); + MOVz_REG(x1, xRIP); SMEND(); if(box64_dynarec_callret) { // pop the actual return address for ARM stack @@ -546,9 +686,9 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits) POP1(x2); POP1(xFlags); } else { - LDRw_S9_postindex(xRIP, xRSP, 4); - LDRw_S9_postindex(x2, xRSP, 4); - LDRw_S9_postindex(xFlags, xRSP, 4); + POP1_32(xRIP); + POP1_32(x2); + POP1_32(xFlags); } // x2 is CS STRH_U12(x2, xEmu, offsetof(x64emu_t, segs[_CS])); @@ -563,8 +703,8 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits) POP1(x3); //rsp POP1(x2); //ss } else { - LDRw_S9_postindex(x3, xRSP, 4); - LDRw_S9_postindex(x2, xRSP, 4); + POP1_32(x3); //rsp + POP1_32(x2); //ss } // POP SS STRH_U12(x2, xEmu, offsetof(x64emu_t, segs[_SS])); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 878094fe..ad395d13 100755 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -80,6 +80,15 @@ LDx(x1, wback, fixedaddress); \ ed = x1; \ } +#define GETEDz(D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, D); \ + LDz(x1, wback, fixedaddress); \ + ed = x1; \ + } #define GETEDw(D) if((nextop&0xC0)==0xC0) { \ ed = xEAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ @@ -187,6 +196,16 @@ LDRx_REG(x1, wback, O); \ ed = x1; \ } +//GETEDOz can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI +#define GETEDOz(O, D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + SMREAD(); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, D); \ + LDRz_REG(x1, wback, O); \ + ed = x1; \ + } #define GETSEDOw(O, D) if((nextop&0xC0)==0xC0) { \ ed = xRAX+(nextop&7)+(rex.b<<3); \ SXTWx(x1, ed); \ @@ -719,7 +738,7 @@ LDP_REGS(R12, R13); \ LDP_REGS(R14, R15) -#define SET_DFNONE(S) if(!dyn->f.dfnone) {MOVZw(S, d_none); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=1;} +#define SET_DFNONE(S) if(!dyn->f.dfnone) {STRw_U12(wZR, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=1;} #define SET_DF(S, N) if((N)!=d_none) {MOVZw(S, (N)); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=0;} else SET_DFNONE(S) #define SET_NODF() dyn->f.dfnone = 0 #define SET_DFOK() dyn->f.dfnone = 1 @@ -998,8 +1017,8 @@ uintptr_t geted16(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, // generic x64 helper void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst); void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst); -void ret_to_epilog(dynarec_arm_t* dyn, int ninst); -void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n); +void ret_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex); +void retn_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex, int n); void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits); void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w); diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index cd1cdc6a..ec642194 100755 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -41,8 +41,9 @@ --dyn->size; \ *ok = -1; \ if(box64_dynarec_log>=LOG_INFO || box64_dynarec_dump || box64_dynarec_missing) {\ - dynarec_log(LOG_NONE, "%p: Dynarec stopped because of Opcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \ - (void*)ip, PKip(0), \ + dynarec_log(LOG_NONE, "%p: Dynarec stopped because of %sOpcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \ + (void*)ip, rex.is32bits?"32bits ":"",\ + PKip(0), \ PKip(1), PKip(2), PKip(3), \ PKip(4), PKip(5), PKip(6), \ PKip(7), PKip(8), PKip(9), \ diff --git a/src/dynarec/arm64/dynarec_arm64_pass3.h b/src/dynarec/arm64/dynarec_arm64_pass3.h index bdc41db9..875e8af9 100755 --- a/src/dynarec/arm64/dynarec_arm64_pass3.h +++ b/src/dynarec/arm64/dynarec_arm64_pass3.h @@ -17,6 +17,6 @@ if(ninst) \ addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst-1].x64.size, dyn->insts[ninst-1].size/4); #define INST_EPILOG -#define INST_NAME(name) inst_name_pass3(dyn, ninst, name) +#define INST_NAME(name) inst_name_pass3(dyn, ninst, name, rex) #define TABLE64(A, V) {int val64offset = Table64(dyn, (V), 3); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);} #define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q, 3); MESSAGE(LOG_DUMP, " FTable64: %g\n", v.d); VLDR64_literal(A, val64offset);} diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index 0d3bd780..29162041 100755 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -547,7 +547,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int is32bits) { helper.table64 = (uint64_t*)helper.tablestart; // pass 3, emit (log emit native opcode) if(box64_dynarec_dump) { - dynarec_log(LOG_NONE, "%s%04d|Emitting %zu bytes for %u x64 bytes", (box64_dynarec_dump>1)?"\e[01;36m":"", GetTID(), helper.native_size, helper.isize); + dynarec_log(LOG_NONE, "%s%04d|Emitting %zu bytes for %u %s bytes", (box64_dynarec_dump>1)?"\e[01;36m":"", GetTID(), helper.native_size, helper.isize, is32bits?"x86":"x64"); printFunctionAddr(helper.start, " => "); dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); } diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index fc461f0a..22597270 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -42,8 +42,9 @@ --dyn->size; \ *ok = -1; \ if(box64_dynarec_log>=LOG_INFO || box64_dynarec_dump || box64_dynarec_missing) {\ - dynarec_log(LOG_NONE, "%p: Dynarec stopped because of Opcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \ - (void*)ip, PKip(0), \ + dynarec_log(LOG_NONE, "%p: Dynarec stopped because of %sOpcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \ + (void*)ip, rex.is32bits?"32bits ":"",\ + PKip(0), \ PKip(1), PKip(2), PKip(3), \ PKip(4), PKip(5), PKip(6), \ PKip(7), PKip(8), PKip(9), \ |