From c2a0d7bc32ee6068c8b99e3be67c112678c4d517 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Tue, 16 Mar 2021 14:34:44 +0100 Subject: [DYNAREC] Added a few more opcodes, some arm64_printer opcodes, and made sure it works --- src/dynarec/arm64_emitter.h | 89 ++++++----- src/dynarec/arm64_epilog.S | 44 +++--- src/dynarec/arm64_next.S | 5 +- src/dynarec/arm64_printer.c | 272 +++++++++++++++++++++++++++++++++- src/dynarec/arm64_printer.h | 2 +- src/dynarec/arm64_prolog.S | 2 +- src/dynarec/dynablock.c | 2 +- src/dynarec/dynarec_arm64.c | 33 ++++- src/dynarec/dynarec_arm64_00.c | 94 +++++++++++- src/dynarec/dynarec_arm64_emit_math.c | 13 +- src/dynarec/dynarec_arm64_functions.c | 6 +- src/dynarec/dynarec_arm64_helper.c | 26 ++-- src/dynarec/dynarec_arm64_helper.h | 36 ++++- src/dynarec/dynarec_arm64_pass2.h | 3 +- src/dynarec/dynarec_arm64_pass3.h | 3 +- src/dynarec/dynarec_arm64_private.h | 5 + src/emu/x64emu_private.h | 17 ++- 17 files changed, 540 insertions(+), 112 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 84d9cdaf..52b58dfc 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -124,12 +124,12 @@ #define ADDSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd)) #define ADDxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b00, Rm, 0, Rn, Rd)) -#define ADDSUB_IMM_gen(sf, op, S, shift, imm12, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b10001<<24 | 1<<23 | (shift)<<22 | (imm12)<<10 | (Rn)<<5 | (Rd)) -#define ADDx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 0, 0, (imm12)&0xfff, Rn, Rd)) -#define ADDSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 0, 0, (imm12)&0xfff, Rn, Rd)) -#define ADDw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 0, 0, (imm12)&0xfff, Rn, Rd)) -#define ADDSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 0, 0, (imm12)&0xfff, Rn, Rd)) -#define ADDxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 0, 0, 0, (imm12)&0xfff, Rn, Rd)) +#define ADDSUB_IMM_gen(sf, op, S, shift, imm12, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b10001<<24 | (shift)<<22 | (imm12)<<10 | (Rn)<<5 | (Rd)) +#define ADDx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define ADDxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) #define SUBx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, 0, Rn, Rd)) #define SUBSx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd)) @@ -138,27 +138,33 @@ #define SUBSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd)) #define SUBxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 0, 0b00, Rm, 0, Rn, Rd)) #define SUBSxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd)) - -#define SUBx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 0, 0, (imm12)&0xfff, Rn, Rd)) -#define SUBSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 0, 0, (imm12)&0xfff, Rn, Rd)) -#define SUBw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 0, 0, (imm12)&0xfff, Rn, Rd)) -#define SUBSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 0, 0, (imm12)&0xfff, Rn, Rd)) -#define SUBxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 0, 0, (imm12)&0xfff, Rn, Rd)) -#define SUBSxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 0, 0, (imm12)&0xfff, Rn, Rd)) +#define CMPSx_REG(Rn, Rm) SUBSx_REG(xZR, Rn, Rm) +#define CMPSw_REG(Rn, Rm) SUBSw_REG(wZR, Rn, Rm) +#define CMPSxw_REG(Rn, Rm) SUBSxw_REG(xZR, Rn, Rm) + +#define SUBx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) +#define SUBSxw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(rex.w, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) +#define CMPSx_U12(Rn, imm12) SUBSx_U12(xZR, Rn, imm12) +#define CMPSw_U12(Rn, imm12) SUBSw_U12(wZR, Rn, imm12) +#define CMPSxw_U12(Rn, imm12) SUBSxw_U12(xZR, Rn, imm12) // LDR -#define LDR_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt)) +#define LDR_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt)) #define LDRx_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) #define LDRx_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) #define LDRw_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) #define LDRw_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) -#define LD_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm12)<<10 | 0b01<<10 | (Rn)<<5 | (Rt)) -#define LDRx_U12(Rt, Rn, imm12) EMIT(LD_gen(0b11, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt)) -#define LDRw_U12(Rt, Rn, imm12) EMIT(LD_gen(0b10, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt)) +#define LD_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) +#define LDRx_U12(Rt, Rn, imm12) EMIT(LD_gen(0b11, 0b01, ((uint32_t)(imm12>>3))&0xfff, Rn, Rt)) +#define LDRw_U12(Rt, Rn, imm12) EMIT(LD_gen(0b10, 0b01, ((uint32_t)(imm12>>2))&0xfff, Rn, Rt)) #define LDRB_U12(Rt, Rn, imm12) EMIT(LD_gen(0b00, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt)) -#define LDRH_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt)) -#define LDRxw_U12(Rt, Rn, imm12) EMIT(LD_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt)) +#define LDRH_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b01, ((uint32_t)(imm12>>1))&0xfff, Rn, Rt)) +#define LDRxw_U12(Rt, Rn, imm12) EMIT(LD_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)(imm12>>(2+rex.w)))&0xfff, Rn, Rt)) #define LDR_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b01<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt)) #define LDRx_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt)) @@ -169,23 +175,26 @@ #define LDRB_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt)) #define LDRH_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt)) -#define LDRSH_gen(size, op1, opc, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt)) -#define LDRSHx_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b00, 0b10, ((uint32_t)(imm12))&0xfff, Rn, Rt)) -#define LDRSHw_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b00, 0b11, ((uint32_t)(imm12))&0xfff, Rn, Rt)) +#define LDRSH_gen(size, op1, opc, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt)) +#define LDRSHx_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b00, 0b10, ((uint32_t)(imm12>>2))&0xfff, Rn, Rt)) +#define LDRSHw_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b00, 0b11, ((uint32_t)(imm12>2))&0xfff, Rn, Rt)) + +#define LDR_PC_gen(opc, imm19, Rt) ((opc)<<30 | 0b011<<27 | (imm19)<<5 | (Rt)) +#define LDRx_literal(Rt, imm19) EMIT(LDR_PC_gen(0b01, ((imm19)>>2)&0x7FFFF, Rt)) // STR -#define STR_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b00<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt)) +#define STR_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b00<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt)) #define STRx_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) #define STRx_S9_preindex(Rt, Rn, imm9) EMIT(STR_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) #define STRw_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt)) #define STRw_S9_preindex(Rt, Rn, imm9) EMIT(STR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt)) -#define ST_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b00<<22 | (imm12)<<10 | 0b01<<10 | (Rn)<<5 | (Rt)) -#define STRx_U12(Rt, Rn, imm12) EMIT(ST_gen(0b11, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt)) -#define STRw_U12(Rt, Rn, imm12) EMIT(ST_gen(0b10, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt)) +#define ST_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b00<<22 | (imm12)<<10 | (Rn)<<5 | (Rt)) +#define STRx_U12(Rt, Rn, imm12) EMIT(ST_gen(0b11, 0b01, ((uint32_t)(imm12>>3))&0xfff, Rn, Rt)) +#define STRw_U12(Rt, Rn, imm12) EMIT(ST_gen(0b10, 0b01, ((uint32_t)(imm12>>2))&0xfff, Rn, Rt)) #define STRB_U12(Rt, Rn, imm12) EMIT(ST_gen(0b00, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt)) -#define STRH_U12(Rt, Rn, imm12) EMIT(ST_gen(0b01, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt)) -#define STRxw_U12(Rt, Rn, imm12) EMIT(ST_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt)) +#define STRH_U12(Rt, Rn, imm12) EMIT(ST_gen(0b01, 0b01, ((uint32_t)(imm12>>1))&0xfff, Rn, Rt)) +#define STRxw_U12(Rt, Rn, imm12) EMIT(ST_gen((rex.w)?0b11:0b10, 0b01, ((uint32_t)((imm12)>>(2+rex.w)))&0xfff, Rn, Rt)) #define STR_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b00<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt)) #define STRx_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt)) @@ -206,13 +215,13 @@ #define BLR(Rn) EMIT(BR_gen(0, 0b01, 0, 0, Rn, 0)) #define CB_gen(sf, op, imm19, Rt) ((sf)<<31 | 0b011010<<25 | (op)<<24 | (imm19)<<5 | (Rt)) -#define CBNZx(Rt, imm19) EMIT(CB_gen(1, 1, ((imm19)>>2)&0x80000, Rt)) -#define CBNZw(Rt, imm19) EMIT(CB_gen(0, 1, ((imm19)>>2)&0x80000, Rt)) -#define CBZx(Rt, imm19) EMIT(CB_gen(1, 0, ((imm19)>>2)&0x80000, Rt)) -#define CBZw(Rt, imm19) EMIT(CB_gen(0, 0, ((imm19)>>2)&0x80000, Rt)) +#define CBNZx(Rt, imm19) EMIT(CB_gen(1, 1, ((imm19)>>2)&0x7FFFF, Rt)) +#define CBNZw(Rt, imm19) EMIT(CB_gen(0, 1, ((imm19)>>2)&0x7FFFF, Rt)) +#define CBZx(Rt, imm19) EMIT(CB_gen(1, 0, ((imm19)>>2)&0x7FFFF, Rt)) +#define CBZw(Rt, imm19) EMIT(CB_gen(0, 0, ((imm19)>>2)&0x7FFFF, Rt)) #define Bcond_gen(imm19, cond) (0b0101010<<25 | (imm19)<<5 | (cond)) -#define Bcond(cond, imm19) EMIT(Bcond_gen(((imm19)>>2)&0x80000, cond)) +#define Bcond(cond, imm19) EMIT(Bcond_gen(((imm19)>>2)&0x7FFFF, cond)) // AND / ORR #define LOGIC_gen(sf, opc, N, immr, imms, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b100100<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn) | Rd) @@ -238,14 +247,14 @@ #define ORNx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 1, Rm, 0, Rn, Rd)) #define ORNw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 1, Rm, 0, Rn, Rd)) #define ORNxw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(rex.w, 0b01, 0b00, 1, Rm, 0, Rn, Rd)) -#define MOVx(Rm, Rd) ORRx_REG(Rd, xZR, Rm) +#define MOVx(Rd, Rm) ORRx_REG(Rd, xZR, Rm) #define MOVx_LSL(Rm, Rd, lsl) ORRx_REG_LSL(Rd, xZR, Rm, lsl) -#define MOVw(Rm, Rd) ORRw_REG(Rd, xZR, Rm) -#define MOVxw(Rm, Rd) ORRxw_REG(Rd, xZR, Rm) -#define MVNx(Rm, Rd) ORNx_REG(Rd, xZR, Rm) -#define MVNx_LSL(Rm, Rd, lsl) ORNx_REG_LSL(Rd, xZR, Rm, lsl) -#define MVNw(Rm, Rd) ORNw_REG(Rd, xZR, Rm) -#define MVNxw(Rm, Rd) ORNxw_REG(Rd, xZR, Rm) +#define MOVw(Rd, Rm) ORRw_REG(Rd, xZR, Rm) +#define MOVxw(Rd, Rm) ORRxw_REG(Rd, xZR, Rm) +#define MVNx(Rd, Rm) ORNx_REG(Rd, xZR, Rm) +#define MVNx_LSL(Rd, Rm, lsl) ORNx_REG_LSL(Rd, xZR, Rm, lsl) +#define MVNw(Rd, Rm) ORNw_REG(Rd, xZR, Rm) +#define MVNxw(Rd, Rm) ORNxw_REG(Rd, xZR, Rm) #define MOV_frmSP(Rd) ADDx_U12(Rd, xSP, 0) #define MOV_toSP(Rm) ADDx_U12(xSP, Rm, 0) #define BICx(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 1, Rm, 0, Rn, Rd)) diff --git a/src/dynarec/arm64_epilog.S b/src/dynarec/arm64_epilog.S index af39c1ba..99723873 100755 --- a/src/dynarec/arm64_epilog.S +++ b/src/dynarec/arm64_epilog.S @@ -9,8 +9,8 @@ .global arm64_epilog arm64_epilog: //update register -> emu - str x10, [x0, (8 * 0)] - str x11, [x0, (8 * 1)] + str x10, [x0, (8 * 0)] + str x11, [x0, (8 * 1)] str x12, [x0, (8 * 2)] str x13, [x0, (8 * 3)] str x14, [x0, (8 * 4)] @@ -26,7 +26,7 @@ arm64_epilog: str x24, [x0, (8 * 14)] str x25, [x0, (8 * 15)] str x26, [x0, (8 * 16)] - str x27, [x0, (8 * 17)] // put back reg value in emu, including EIP (so x25 must be EIP now) + str x27, [x0, (8 * 17)] // put back reg value in emu, including EIP (so x27 must be EIP now) //restore all used register //vpop {d8-d15} ldr x10, [sp, (8 * 0)] @@ -48,7 +48,7 @@ arm64_epilog: ldr x26, [sp, (8 * 16)] ldr x27, [sp, (8 * 17)] add sp, sp, (8 * 18) - ldp lr, fp, [sp, 16]! // saved lr + ldp lr, fp, [sp], 16 // saved lr //end, return... ret @@ -57,24 +57,24 @@ arm64_epilog: arm64_epilog_fast: //restore all used register //vpop {d8-d15} - ldr x8, [sp, (8 * 0)] - ldr x9, [sp, (8 * 1)] - ldr x10, [sp, (8 * 2)] - ldr x11, [sp, (8 * 3)] - ldr x12, [sp, (8 * 4)] - ldr x13, [sp, (8 * 5)] - ldr x14, [sp, (8 * 6)] - ldr x15, [sp, (8 * 7)] - ldr x16, [sp, (8 * 8)] - ldr x17, [sp, (8 * 9)] - ldr x18, [sp, (8 * 10)] - ldr x19, [sp, (8 * 11)] - ldr x20, [sp, (8 * 12)] - ldr x21, [sp, (8 * 13)] - ldr x22, [sp, (8 * 14)] - ldr x23, [sp, (8 * 15)] - ldr x24, [sp, (8 * 16)] - ldr x25, [sp, (8 * 17)] + ldr x10, [sp, (8 * 0)] + ldr x11, [sp, (8 * 1)] + ldr x12, [sp, (8 * 2)] + ldr x13, [sp, (8 * 3)] + ldr x14, [sp, (8 * 4)] + ldr x15, [sp, (8 * 5)] + ldr x16, [sp, (8 * 6)] + ldr x17, [sp, (8 * 7)] + ldr x18, [sp, (8 * 8)] + ldr x19, [sp, (8 * 9)] + ldr x20, [sp, (8 * 10)] + ldr x21, [sp, (8 * 11)] + ldr x22, [sp, (8 * 12)] + ldr x23, [sp, (8 * 13)] + ldr x24, [sp, (8 * 14)] + ldr x25, [sp, (8 * 15)] + ldr x26, [sp, (8 * 16)] + ldr x27, [sp, (8 * 17)] add sp, sp, (8 * 18) ldp lr, fp, [sp, 16]! // saved lr //end, return... diff --git a/src/dynarec/arm64_next.S b/src/dynarec/arm64_next.S index 2410750c..47dfe737 100755 --- a/src/dynarec/arm64_next.S +++ b/src/dynarec/arm64_next.S @@ -11,9 +11,8 @@ .global arm64_next arm64_next: // emu is r0 - // don't put put back reg value in emu, faster but more tricky to debug // IP address is r1 - sub sp, sp, (8 * 11) + sub sp, sp, (8 * 12) str x0, [sp, (8 * 0)] str x1, [sp, (8 * 1)] str x10, [sp, (8 * 2)] @@ -41,7 +40,7 @@ arm64_next: ldr x16, [sp, (8 * 8)] ldr x17, [sp, (8 * 9)] ldr x18, [sp, (8 * 10)] - add sp, sp, (8 * 11) + add sp, sp, (8 * 12) // return offset is jump address br x3 diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c index 49539a1a..dcfa3032 100755 --- a/src/dynarec/arm64_printer.c +++ b/src/dynarec/arm64_printer.c @@ -4,11 +4,279 @@ #include #include "arm64_printer.h" +#include "debug.h" -const char* arm64_print(uint32_t opcode) +static const char* Xt[] = {"xEmu", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "xRAX", "xRCX", "xRDX", "xRBX", "xRSP", "xRBP", "xRSI", "xRDI", "xR8", "xR9", "xR10", "xR11", "xR12", "xR13", "xR14", "xR15", "xFlags", "xRIP", "x28", "FP", "LR", "xZR"}; +static const char* XtSp[] = {"xEmu", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "xRAX", "xRCX", "xRDX", "xRBX", "xRSP", "xRBP", "xRSI", "xRDI", "xR8", "xR9", "xR10", "xR11", "xR12", "xR13", "xR14", "xR15", "xFlags", "xRIP", "x28", "FP", "LR", "SP"}; +static const char* Wt[] = {"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "wEAX", "wECX", "wEDX", "wEBX", "wESP", "wEBP", "wESI", "wEDI", "wR8", "wR9", "wR10", "wR11", "wR12", "wR13", "wR14", "wR15", "wFlags", "w27", "w28", "w29", "w30", "wZR"}; +static const char* WtSp[] = {"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", "w8", "w9", "wEAX", "wECX", "wEDX", "wEBX", "wESP", "wEBP", "wESI", "wEDI", "wR8", "wR9", "wR10", "wR11", "wR12", "wR13", "wR14", "wR15", "wFlags", "w27", "w28", "w29", "w30", "wSP"}; + +static const char* conds[] = {"cEQ", "cNE", "cCS", "cCC", "cMI", "cPL", "cVS", "cVC", "cHI", "cLS", "cGE", "cLT", "cGT", "cLE", "c__", "inv"}; + +typedef struct arm64_print_s { + int N, S; + int t, n, m, d; + int f, c, o, h; + int i, r, s; + int x, w; +} arm64_print_t; + +int isMask(uint32_t opcode, const char* mask, arm64_print_t *a) +{ + if(strlen(mask)!=32) { + printf_log(LOG_NONE, "Error: printer mask \"%s\" in not len 32 but %ld\n", mask, strlen(mask)); + return 0; + } + memset(a, 0, sizeof(*a)); + int i = 31; + while(*mask) { + uint8_t v = (opcode>>i)&1; + switch(*mask) { + case '0': if(v!=0) return 0; break; + case '1': if(v!=1) return 0; break; + case 'N': a->N = (a->N<<1) | v; break; + case 'S': a->S = (a->S<<1) | v; break; + case 't': a->t = (a->t<<1) | v; break; + case 'n': a->n = (a->n<<1) | v; break; + case 'm': a->m = (a->m<<1) | v; break; + case 'd': a->d = (a->d<<1) | v; break; + case 'f': a->f = (a->f<<1) | v; break; + case 'c': a->c = (a->c<<1) | v; break; + case 'i': a->i = (a->i<<1) | v; break; + case 'r': a->r = (a->r<<1) | v; break; + case 's': a->s = (a->s<<1) | v; break; + case 'o': a->o = (a->o<<1) | v; break; + case 'h': a->h = (a->h<<1) | v; break; + case 'w': a->w = (a->w<<1) | v; break; + case 'x': a->x = (a->x<<1) | v; break; + default: + printf_log(LOG_NONE, "Warning, printer mask use unhandled '%c'\n", *mask); + } + mask++; + --i; + } + + return 1; +} + +int64_t signExtend(uint32_t val, int sz) +{ + int64_t ret = val; + if((val>>(sz-1))&1 == 1) + val |= (0xffffffffffffffffll<>30)&3; + int offset = signExtend(imm, 9); + snprintf(buff, sizeof(buff), "LDR %s, [%s], %d", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], offset); + return buff; + } + if(isMask(opcode, "1x111000010iiiiiiiii11nnnnnttttt", &a)) { + int size = (opcode>>30)&3; + int offset = signExtend(imm, 9); + snprintf(buff, sizeof(buff), "LDR %s, [%s, %d]!", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], offset); + return buff; + } + if(isMask(opcode, "1x11100101iiiiiiiiiiiinnnnnttttt", &a)) { + int size = (opcode>>30)&3; + int offset = (imm)<>30)&1)?3:2; + int offset = signExtend(imm, 9)<<2; + snprintf(buff, sizeof(buff), "LDR %s, [#%+d]\t;%p", (size==2)?Wt[Rt]:Xt[Rt], offset, (void*)(addr+offset)); + return buff; + } + if(isMask(opcode, "1x111000011mmmmmoooS10nnnnnttttt", &a)) { + int size = (opcode>>30)&3; + const char* extend[] = {"?0", "?1", "UXTW", "LSL", "?4", "?5", "SXTW", "SXTX"}; + int amount = size*a.S; + if(option==3 && !amount) + snprintf(buff, sizeof(buff), "LDR %s, [%s, %s]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm]); + else if(!amount) + if(option==3 && !amount) + snprintf(buff, sizeof(buff), "LDR %s, [%s, %s, %s]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm], extend[option]); + else + snprintf(buff, sizeof(buff), "LDR %s, [%s, %s, %s %d]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm], extend[option], amount); + return buff; + } + if(isMask(opcode, "1x111000000iiiiiiiii01nnnnnttttt", &a)) { + int size = (opcode>>30)&3; + int offset = signExtend(imm, 9); + snprintf(buff, sizeof(buff), "STR %s, [%s], %d", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], offset); + return buff; + } + if(isMask(opcode, "1x111000000iiiiiiiii11nnnnnttttt", &a)) { + int size = (opcode>>30)&3; + int offset = signExtend(imm, 9); + snprintf(buff, sizeof(buff), "STR %s, [%s, %d]!", (size==0b10)?Wt[Rt]:Xt[Rt], XtSp[Rn], offset); + return buff; + } + if(isMask(opcode, "1x11100100iiiiiiiiiiiinnnnnttttt", &a)) { + int size = (opcode>>30)&3; + int offset = (imm)<>30)&3; + const char* extend[] = {"?0", "?1", "UXTW", "LSL", "?4", "?5", "SXTW", "SXTX"}; + int amount = size*a.S; + if(option==3 && !amount) + snprintf(buff, sizeof(buff), "STR %s, [%s, %s]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm]); + else if(!amount) + if(option==3 && !amount) + snprintf(buff, sizeof(buff), "STR %s, [%s, %s, %s]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm], extend[option]); + else + snprintf(buff, sizeof(buff), "STR %s, [%s, %s, %s %d]", (size==2)?Wt[Rt]:Xt[Rt], XtSp[Rn], ((option&1)==0)?Wt[Rm]:Xt[Rm], extend[option], amount); + return buff; + } + // --- MOV + if(isMask(opcode, "f0101010000mmmmm00000011111ddddd", &a)) { + snprintf(buff, sizeof(buff), "MOV %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rm]:Wt[Rm]); + return buff; + } + if(isMask(opcode, "f10100101wwiiiiiiiiiiiiiiiiddddd", &a)) { + if(!hw) + snprintf(buff, sizeof(buff), "MOVZ %s, 0x%x", sf?Xt[Rd]:Wt[Rd], imm); + else + snprintf(buff, sizeof(buff), "MOVZ %s, 0x%x LSL %d", sf?Xt[Rd]:Wt[Rd], imm, 16*hw); + return buff; + } + if(isMask(opcode, "f11100101wwiiiiiiiiiiiiiiiiddddd", &a)) { + if(!hw) + snprintf(buff, sizeof(buff), "MOVK %s, 0x%x", sf?Xt[Rd]:Wt[Rd], imm); + else + snprintf(buff, sizeof(buff), "MOVK %s, 0x%x LSL %d", sf?Xt[Rd]:Wt[Rd], imm, 16*hw); + return buff; + } + + // --- MATH + if(isMask(opcode, "f1101011001mmmmmoooiiinnnnn11111", &a)) { + int R = 0; + if(option==0b011 || option==0b111) + R = 1; + const char* extends[] ={"UXTB", "UXTH", "UXTW", "UXTX", "SXTB", "SXTH", "SXTW", "SXTX"}; + if(imm==0 && option==(sf?3:2)) + snprintf(buff, sizeof(buff), "CMP %s, %s", sf?XtSp[Rn]:WtSp[Rn], (sf&&R)?Xt[Rm]:Wt[Rm]); + else + snprintf(buff, sizeof(buff), "CMP %s, %s, %s 0x%x", sf?XtSp[Rn]:WtSp[Rn], (sf&&R)?Xt[Rm]:Wt[Rm], extends[option], imm); + return buff; + } + if(isMask(opcode, "f11100010hiiiiiiiiiiiinnnnn11111", &a)) { + if(shift==0) + snprintf(buff, sizeof(buff), "CMP %s, 0x%x", sf?XtSp[Rn]:WtSp[Rn], imm); + else + snprintf(buff, sizeof(buff), "ADD %s, 0x%x", sf?XtSp[Rn]:WtSp[Rn], imm<<12); + return buff; + } + if(isMask(opcode, "f1101011hh0mmmmmiiiiiinnnnn11111", &a)) { + const char* shifts[] = { "LSL", "LSR", "ASR", "???"}; + if(shift==0 && imm==0) + snprintf(buff, sizeof(buff), "CMP %s, %s", sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); + else + snprintf(buff, sizeof(buff), "CMP %s, %s %s %d", sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], shifts[shift], imm); + return buff; + } + if(isMask(opcode, "f0010001hhiiiiiiiiiiiinnnnnddddd", &a)) { + if(shift==0) + snprintf(buff, sizeof(buff), "ADD %s, %s, 0x%x", sf?XtSp[Rd]:WtSp[Rd], sf?XtSp[Rn]:WtSp[Rn], imm); + else if (shift==1) + snprintf(buff, sizeof(buff), "ADD %s, %s, 0x%x", sf?XtSp[Rd]:WtSp[Rd], sf?XtSp[Rn]:WtSp[Rn], imm<<12); + else + snprintf(buff, sizeof(buff), "ADD with unhandled shift %d", shift); + return buff; + } + if(isMask(opcode, "f0110001hhiiiiiiiiiiiinnnnnddddd", &a)) { + if(shift==0) + snprintf(buff, sizeof(buff), "ADDS %s, %s, 0x%x", sf?Xt[Rd]:Wt[Rd], sf?XtSp[Rn]:WtSp[Rn], imm); + else if (shift==1) + snprintf(buff, sizeof(buff), "ADDS %s, %s, 0x%x", sf?Xt[Rd]:Wt[Rd], sf?XtSp[Rn]:WtSp[Rn], imm<<12); + else + snprintf(buff, sizeof(buff), "ADDS with unhandled shift %d", shift); + return buff; + } + if(isMask(opcode, "f0001011hh0mmmmmiiiiiinnnnnddddd", &a)) { + const char* shifts[] = { "LSL", "LSR", "ASR", "???"}; + if(shift==0 && imm==0) + snprintf(buff, sizeof(buff), "ADD %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); + else + snprintf(buff, sizeof(buff), "ADD %s, %s, %s %s %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], shifts[shift], imm); + return buff; + } + if(isMask(opcode, "f1010001hhiiiiiiiiiiiinnnnnddddd", &a)) { + if(shift==0) + snprintf(buff, sizeof(buff), "SUB %s, %s, 0x%x", sf?XtSp[Rd]:WtSp[Rd], sf?XtSp[Rn]:WtSp[Rn], imm); + else if (shift==1) + snprintf(buff, sizeof(buff), "SUB %s, %s, 0x%x", sf?XtSp[Rd]:WtSp[Rd], sf?XtSp[Rn]:WtSp[Rn], imm<<12); + else + snprintf(buff, sizeof(buff), "SUB with unhandled shift %d", shift); + return buff; + } + if(isMask(opcode, "f1110001hhiiiiiiiiiiiinnnnnddddd", &a)) { + if(shift==0) + snprintf(buff, sizeof(buff), "SUBS %s, %s, 0x%x", sf?Xt[Rd]:Wt[Rd], sf?XtSp[Rn]:WtSp[Rn], imm); + else if (shift==1) + snprintf(buff, sizeof(buff), "SUBS %s, %s, 0x%x", sf?Xt[Rd]:Wt[Rd], sf?XtSp[Rn]:WtSp[Rn], imm<<12); + else + snprintf(buff, sizeof(buff), "SUBS with unhandled shift %d", shift); + return buff; + } + if(isMask(opcode, "f1001011hh0mmmmmiiiiiinnnnnddddd", &a)) { + const char* shifts[] = { "LSL", "LSR", "ASR", "???"}; + if(shift==0 && imm==0) + snprintf(buff, sizeof(buff), "SUB %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); + else + snprintf(buff, sizeof(buff), "SUB %s, %s, %s %s %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], shifts[shift], imm); + return buff; + } + // ---- LOGIC + + // ---- BRANCH / TEST + if(isMask(opcode, "1101011000011111000000nnnnn00000", &a)) { + snprintf(buff, sizeof(buff), "BR %s", Xt[Rn]); + return buff; + } + if(isMask(opcode, "1101011000111111000000nnnnn00000", &a)) { + snprintf(buff, sizeof(buff), "BLR %s", Xt[Rn]); + return buff; + } + if(isMask(opcode, "01010100iiiiiiiiiiiiiiiiiii0cccc", &a)) { + int offset = signExtend(imm, 19)<<2; + snprintf(buff, sizeof(buff), "B.%s #+%d\t; %p", conds[cond], offset, (void*)(addr + offset)); + return buff; + } + if(isMask(opcode, "f0110100iiiiiiiiiiiiiiiiiiittttt", &a)) { + int offset = signExtend(imm, 19)<<2; + snprintf(buff, sizeof(buff), "CBZ %s, #%+d\t; %p", Xt[Rt], offset, (void*)(addr + offset)); + return buff; + } - snprintf(buff, sizeof(buff), "0x%8X ???", opcode); + snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode)); return buff; } \ No newline at end of file diff --git a/src/dynarec/arm64_printer.h b/src/dynarec/arm64_printer.h index 6fe21c33..2475f631 100644 --- a/src/dynarec/arm64_printer.h +++ b/src/dynarec/arm64_printer.h @@ -1,6 +1,6 @@ #ifndef _ARM_PRINTER_H_ #define _ARM_PRINTER_H_ -const char* arm64_print(uint32_t opcode); +const char* arm64_print(uint32_t opcode, uint64_t addr); #endif //_ARM_PRINTER_H_ diff --git a/src/dynarec/arm64_prolog.S b/src/dynarec/arm64_prolog.S index f480f2ea..c1dc3b48 100755 --- a/src/dynarec/arm64_prolog.S +++ b/src/dynarec/arm64_prolog.S @@ -9,7 +9,7 @@ .global arm64_prolog arm64_prolog: //save all 18 used register - stp lr, fp, [sp, 16]! // save lr + stp lr, fp, [sp, -16]! // save lr sub sp, sp, (8 * 18) str x10, [sp, (8 * 0)] str x11, [sp, (8 * 1)] diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c index 337e1751..06694e06 100755 --- a/src/dynarec/dynablock.c +++ b/src/dynarec/dynablock.c @@ -327,7 +327,7 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t pthread_mutex_lock(&my_context->mutex_dyndump); // fill the block block->x64_addr = (void*)addr; - if(FillBlock64(block, filladdr)) { + if(!FillBlock64(block, filladdr)) { void* old = (void*)arm64_lock_xchg(&dynablocks->direct[addr-dynablocks->text], 0); if(old!=block && old) {// put it back in place, strange things are happening here! dynarec_log(LOG_INFO, "Warning, a wild block appeared at %p: %p\n", (void*)addr, old); diff --git a/src/dynarec/dynarec_arm64.c b/src/dynarec/dynarec_arm64.c index 138fdb3a..9918f4a5 100755 --- a/src/dynarec/dynarec_arm64.c +++ b/src/dynarec/dynarec_arm64.c @@ -305,6 +305,26 @@ instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size, return insts; } +// add a value to etable64 (if needed) and gives back the imm19 to use in LDR_literal +int Table64(dynarec_arm_t *dyn, uint64_t val) +{ + // find the value if already present + int idx = -1; + for(int i=0; itable64size && (idx==-1); ++i) + if(dyn->table64[i] == val) + idx = i; + // not found, add it + if(idx==-1) { + idx = dyn->table64size++; + dyn->table64 = (uint64_t*)realloc(dyn->table64, dyn->table64size * sizeof(uint64_t)); + dyn->table64[idx] = val; + } + // calculate offset + int delta = dyn->tablestart + idx*sizeof(uint64_t) - (uintptr_t)dyn->block; + return delta; +} + + void arm_pass0(dynarec_arm_t* dyn, uintptr_t addr); void arm_pass1(dynarec_arm_t* dyn, uintptr_t addr); void arm_pass2(dynarec_arm_t* dyn, uintptr_t addr); @@ -356,16 +376,18 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) { // pass 2, instruction size arm_pass2(&helper, addr); // ok, now allocate mapped memory, with executable flag on - int sz = helper.arm_size; + int sz = helper.arm_size + helper.table64size*sizeof(uint64_t); void* p = (void*)AllocDynarecMap(block, sz); if(p==NULL) { dynarec_log(LOG_DEBUG, "AllocDynarecMap(%p, %d) failed, cancelling block\n", block, sz); free(helper.insts); free(helper.next); + free(helper.table64); return NULL; } helper.block = p; helper.arm_start = (uintptr_t)p; + helper.tablestart = helper.arm_start + helper.arm_size; if(helper.sons_size) { helper.sons_x64 = (uintptr_t*)calloc(helper.sons_size, sizeof(uintptr_t)); helper.sons_arm = (void**)calloc(helper.sons_size, sizeof(void*)); @@ -378,8 +400,8 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) { } helper.arm_size = 0; arm_pass3(&helper, addr); - if(sz!=helper.arm_size) { - printf_log(LOG_NONE, "BOX64: Warning, size difference in block between pass2 (%d) & pass3 (%d)!\n", sz, helper.arm_size); + if(sz!=(helper.arm_size + helper.table64size*8)) { + printf_log(LOG_NONE, "BOX64: Warning, size difference in block between pass2 (%d) & pass3 (%d)!\n", sz, helper.arm_size+helper.table64size*8); uint8_t *dump = (uint8_t*)helper.start; printf_log(LOG_NONE, "Dump of %d x64 opcodes:\n", helper.size); for(int i=0; isize = sz; block->isize = helper.size; block->block = p; diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c index 17d0a8a9..a5d903af 100755 --- a/src/dynarec/dynarec_arm64_00.c +++ b/src/dynarec/dynarec_arm64_00.c @@ -92,7 +92,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x56: case 0x57: INST_NAME("PUSH reg"); - gd = xRAX+(opcode&0x07)+(rex.r<<3); + gd = xRAX+(opcode&0x07)+(rex.b<<3); + if(gd==xRSP) { + MOVx(x1, gd); + gd = x1; + } PUSH1(gd); break; case 0x58: @@ -104,8 +108,13 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0x5E: case 0x5F: INST_NAME("POP reg"); - gd = xRAX+(opcode&0x07)+(rex.r<<3); - POP1(gd); + gd = xRAX+(opcode&0x07)+(rex.b<<3); + if(gd == xRSP) { + POP1(x1); + MOVx(gd, x1); + } else { + POP1(gd); + } break; case 0x81: @@ -132,9 +141,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin nextop=F8; GETGD; if(MODREG) { // reg <= reg - MOVxw(xRAX+(nextop&7), gd); + MOVxw(xRAX+(nextop&7)+(rex.b<<3), gd); } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 4095, 0, rex, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, 0); STRxw_U12(gd, ed, fixedaddress); } break; @@ -153,6 +162,81 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } break; + case 0xE8: + INST_NAME("CALL Id"); + i32 = F32S; + if(addr+i32==0) { + #if STEP == 3 + printf_log(LOG_INFO, "Warning, CALL to 0x0 at %p (%p)\n", (void*)addr, (void*)(addr-1)); + #endif + } + #if STEP == 0 + if(isNativeCall(dyn, addr+i32, NULL, NULL)) + tmp = 3; + else + tmp = 0; + #elif STEP < 2 + if(isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn)) + tmp = dyn->insts[ninst].pass2choice = 3; + else + tmp = dyn->insts[ninst].pass2choice = 0; + #else + tmp = dyn->insts[ninst].pass2choice; + #endif + switch(tmp) { + case 3: + SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state + BARRIER(1); + BARRIER_NEXT(1); + TABLE64(x2, addr); + PUSH1(x2); + MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall-1)), dyn->insts[ninst].retn); + // calling a native function + x87_forget(dyn, ninst, x3, x4, 0); + TABLE64(xRIP, dyn->insts[ninst].natcall); // read the 0xCC already + STORE_XEMU_REGS(xRIP); + CALL_S(x64Int3, -1); + LOAD_XEMU_REGS(xRIP); + TABLE64(x3, dyn->insts[ninst].natcall); + ADDx_U12(x3, x3, 2+8+8); + CMPSx_REG(xRIP, x3); + B_MARK(cNE); // Not the expected address, exit dynarec block + POP1(xRIP); // pop the return address + if(dyn->insts[ninst].retn) { + ADDx_U12(xRSP, xRSP, dyn->insts[ninst].retn); + } + TABLE64(x3, addr); + CMPSx_REG(xRIP, x3); + B_MARK(cNE); // Not the expected address again + LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); + CBZw_NEXT(w1); // not quitting, so lets continue + MARK; + jump_to_epilog(dyn, 0, xRIP, ninst); + break; + default: + if(ninst && dyn->insts && dyn->insts[ninst-1].x64.set_flags) { + READFLAGS(X_PEND); // that's suspicious + } else { + SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state + } + // regular call + BARRIER(1); + BARRIER_NEXT(1); + if(!dyn->insts || ninst==dyn->size-1) { + *need_epilog = 0; + *ok = 0; + } + TABLE64(x2, addr); + PUSH1(x2); + if(addr+i32==0) { // self modifying code maybe? so use indirect address fetching + TABLE64(x4, addr-4); + LDRx_U12(x4, x4, 0); + jump_to_next(dyn, 0, x4, ninst); + } else + jump_to_next(dyn, addr+i32, 0, ninst); + break; + } + break; default: DEFAULT; diff --git a/src/dynarec/dynarec_arm64_emit_math.c b/src/dynarec/dynarec_arm64_emit_math.c index 64a9a00d..1c14991e 100755 --- a/src/dynarec/dynarec_arm64_emit_math.c +++ b/src/dynarec/dynarec_arm64_emit_math.c @@ -159,8 +159,8 @@ void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5) { IFX(X_PEND) { - STRw_U12(s1, xEmu, offsetof(x64emu_t, op1)); - STRw_U12(s2, xEmu, offsetof(x64emu_t, op2)); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); SET_DF(s3, d_sub32); } else IFX(X_ALL) { SET_DFNONE(s3); @@ -176,7 +176,7 @@ void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3 SUBxw_REG(s1, s1, s2); } IFX(X_PEND) { - STRx_U12(s1, xEmu, offsetof(x64emu_t, res)); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); } IFX(X_AF) { ANDxw_REG(s3, s3, s1); // s3 = (~op1 | op2) & res @@ -216,7 +216,12 @@ void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in if(s1==xRSP && (!dyn->insts || dyn->insts[ninst].x64.need_flags==X_PEND)) { // special case when doing math on RSP and only PEND is needed: ignoring it! - SUBxw_U12(s1, s1, c); + if(c>=0 && c<0x1000) { + SUBxw_U12(s1, s1, c); + } else { + MOV64x(s5, c); + SUBxw_REG(s1, s1, s5); + } return; } IFX(X_PEND) { diff --git a/src/dynarec/dynarec_arm64_functions.c b/src/dynarec/dynarec_arm64_functions.c index d3a1e2bb..9ad74131 100755 --- a/src/dynarec/dynarec_arm64_functions.c +++ b/src/dynarec/dynarec_arm64_functions.c @@ -319,12 +319,12 @@ uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn) { #define PK(a) *(uint8_t*)(addr+a) -#define PK64(a) *(uint64_t*)(addr+a) +#define PK32(a) *(int32_t*)(addr+a) if(!addr) return 0; - if(PK(0)==0xff && PK(1)==0x25) { // absolute jump, maybe the GOT - uintptr_t a1 = (PK64(2)); // need to add a check to see if the address is from the GOT ! + if(PK(0)==0xff && PK(1)==0x25) { // "absolute" jump, maybe the GOT (well, RIP relative in fact) + uintptr_t a1 = addr + 6 + (PK32(2)); // need to add a check to see if the address is from the GOT ! addr = *(uintptr_t*)a1; } if(addr<0x10000) // too low, that is suspicious diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c index 9e7239bf..c0a084db 100755 --- a/src/dynarec/dynarec_arm64_helper.c +++ b/src/dynarec/dynarec_arm64_helper.c @@ -63,7 +63,7 @@ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, u } else if((nextop&7)==5) { uint64_t tmp = F32S64; MOV64x(ret, tmp); - MOV64x(xRIP, addr+delta); + TABLE64(xRIP, addr+delta); ADDx_REG(ret, ret, xRIP); } else { ret = xRAX+(nextop&7)+(rex.b<<3); @@ -86,22 +86,22 @@ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, u if (sib_reg!=4) { ADDx_REG_LSL(ret, xRAX+(sib&0x07)+(rex.b<<3), xRAX+sib_reg+(rex.x<<3), (sib>>6)); } else { - ret = xRAX+(sib&0x07)+rex.b<<3; + ret = xRAX+(sib&0x07)+(rex.b<<3); } } else - ret = xRAX+(nextop&0x07)+rex.b<<3; + ret = xRAX+(nextop&0x07)+(rex.b<<3); } else { int64_t sub = (i64<0)?1:0; if(sub) i64 = -i64; if(i64<0x1000) { if((nextop&7)==4) { if (sib_reg!=4) { - ADDx_REG_LSL(scratch, xRAX+(sib&0x07), xRAX+sib_reg, (sib>>6)); + ADDx_REG_LSL(scratch, xRAX+(sib&0x07)+(rex.b<<3), xRAX+sib_reg+(rex.x<<3), (sib>>6)); } else { - scratch = xRAX+(sib&0x07); + scratch = xRAX+(sib&0x07)+(rex.b<<3); } } else - scratch = xRAX+(nextop&0x07); + scratch = xRAX+(nextop&0x07)+(rex.b<<3); if(sub) { SUBx_U12(ret, scratch, i64); } else { @@ -225,9 +225,9 @@ void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) MOVx(xRIP, reg); } } else { - MOV64x(xRIP, ip); + TABLE64(xRIP, ip); } - MOV64x(x2, (uintptr_t)arm64_epilog); + TABLE64(x2, (uintptr_t)arm64_epilog); BR(x2); } @@ -240,7 +240,7 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) MOVx(xRIP, reg); } uintptr_t tbl = getJumpTable64(); - MOV64x(x2, tbl); + TABLE64(x2, tbl); UBFXx(x3, xRIP, 48, JMPTABL_SHIFT); LDRx_REG_LSL3(x2, x2, x3); UBFXx(x3, xRIP, 32, JMPTABL_SHIFT); @@ -250,8 +250,8 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) LDRx_REG_UXTW(x3, x2, xRIP); } else { uintptr_t p = getJumpTableAddress64(ip); - MOV64x(x2, p); - MOV64x(xRIP, ip); + TABLE64(x2, p); + TABLE64(xRIP, ip); LDRx_U12(x3, x2, 0); } MOVx(x1, xRIP); @@ -323,7 +323,7 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst) void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags) { if(ret!=-2) { - STRx_S9_preindex(xSP, xEmu, -16); // ARM64 stack needs to be 16byte aligned + STRx_S9_preindex(xEmu, xSP, -16); // ARM64 stack needs to be 16byte aligned } fpu_pushcache(dyn, ninst, reg); if(saveflags) { @@ -336,7 +336,7 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save MOVx(ret, xEmu); } if(ret!=-2) { - LDRx_S9_postindex(xSP, xEmu, 16); + LDRx_S9_postindex(xEmu, xSP, 16); } if(saveflags) { LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h index d4e5a543..c7f645b0 100755 --- a/src/dynarec/dynarec_arm64_helper.h +++ b/src/dynarec/dynarec_arm64_helper.h @@ -35,7 +35,7 @@ ed = xRAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff, 0, rex, 0, D); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, 0, D); \ LDRxw_U12(x1, wback, fixedaddress); \ ed = x1; \ } @@ -43,7 +43,7 @@ ed = xRAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff, 0, rex, 0, D); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<3, 3, rex, 0, D); \ LDRx_U12(x1, wback, fixedaddress); \ ed = x1; \ } @@ -51,7 +51,7 @@ ed = xEAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff, 0, rex, 0, D); \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 2, rex, 0, D); \ LDRw_U12(x1, wback, fixedaddress); \ ed = x1; \ } @@ -84,7 +84,7 @@ #define SBACK(wb) if(wback) {STR_IMM9(wb, wback, fixedaddress);} else {MOV_REG(ed, wb);} //GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI #define GETEDO(O) if((nextop&0xC0)==0xC0) { \ - ed = xEAX+(nextop&7); \ + ed = xEAX+(nextop&7)+(rex.b<<3); \ wback = 0; \ } else { \ addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); \ @@ -246,6 +246,10 @@ #define B_NEXT(cond) \ j32 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0; \ Bcond(cond, j32) +// Branch to next instruction if reg is 0 (use j32) +#define CBZw_NEXT(reg) \ + j32 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0; \ + CBZw(reg, j32) // Branch to MARKSEG if cond (use j32) #define B_MARKSEG(cond) \ j32 = GETMARKSEG-(dyn->arm_size); \ @@ -322,6 +326,27 @@ STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ if(A) {STRx_U12(A, xEmu, offsetof(x64emu_t, ip));} +#define LOAD_REG(A) LDRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) +#define LOAD_XEMU_REGS(A) \ + LOAD_REG(RAX); \ + LOAD_REG(RCX); \ + LOAD_REG(RDX); \ + LOAD_REG(RBX); \ + LOAD_REG(RSP); \ + LOAD_REG(RBP); \ + LOAD_REG(RSI); \ + LOAD_REG(RDI); \ + LOAD_REG(R8); \ + LOAD_REG(R9); \ + LOAD_REG(R10); \ + LOAD_REG(R11); \ + LOAD_REG(R12); \ + LOAD_REG(R13); \ + LOAD_REG(R14); \ + LOAD_REG(R15); \ + LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ + if(A) {LDRx_U12(A, xEmu, offsetof(x64emu_t, ip));} + #define SET_DFNONE(S) if(!dyn->dfnone) {MOVZw(S, d_none); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=1;} #define SET_DF(S, N) if(N) {MOVZw(S, N); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=0;} else SET_DFNONE(S) #define SET_NODF() dyn->dfnone = 0 @@ -368,6 +393,9 @@ #ifndef NEW_BARRIER_INST #define NEW_BARRIER_INST #endif +#ifndef TABLE64 +#define TABLE64(A, V) +#endif #if STEP < 2 #define PASS2IF(A, B) if(A) diff --git a/src/dynarec/dynarec_arm64_pass2.h b/src/dynarec/dynarec_arm64_pass2.h index 095fe4f7..f0bf7fbd 100755 --- a/src/dynarec/dynarec_arm64_pass2.h +++ b/src/dynarec/dynarec_arm64_pass2.h @@ -6,4 +6,5 @@ #define NEW_INST if(ninst) {dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size);} #define INST_EPILOG dyn->insts[ninst].epilog = dyn->arm_size; #define INST_NAME(name) -#define NEW_BARRIER_INST if(ninst) ++dyn->sons_size \ No newline at end of file +#define NEW_BARRIER_INST if(ninst) ++dyn->sons_size +#define TABLE64(A, V) {Table64(dyn, (V)); EMIT(0);} \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_pass3.h b/src/dynarec/dynarec_arm64_pass3.h index 2f06a608..a94c61c7 100755 --- a/src/dynarec/dynarec_arm64_pass3.h +++ b/src/dynarec/dynarec_arm64_pass3.h @@ -1,7 +1,7 @@ #define INIT #define FINI #define EMIT(A) \ - if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), arm64_print(A));} \ + if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), arm64_print(A, (uintptr_t)dyn->block));} \ *(uint32_t*)(dyn->block) = (uint32_t)(A); \ dyn->block += 4; dyn->arm_size += 4;\ dyn->insts[ninst].size2 += 4 @@ -32,3 +32,4 @@ ++dyn->sons_size; \ } +#define TABLE64(A, V) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);} \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_private.h b/src/dynarec/dynarec_arm64_private.h index 2d82fd35..c375dd10 100755 --- a/src/dynarec/dynarec_arm64_private.h +++ b/src/dynarec/dynarec_arm64_private.h @@ -40,6 +40,9 @@ typedef struct dynarec_arm_s { int fpu_extra_qscratch; // some opcode need an extra quad scratch register int fpu_reg; // x87/sse/mmx reg counter int dfnone; // if defered flags is already set to df_none + uint64_t *table64; // table of 64bits value + int table64size;// size of table (will be appended at end of executable code) + uintptr_t tablestart; uintptr_t* next; // variable array of "next" jump address int next_sz; int next_cap; @@ -53,4 +56,6 @@ uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr); int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n); int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n); +int Table64(dynarec_arm_t *dyn, uint64_t val); // add a value to etable64 (if needed) and gives back the imm19 to use in LDR_literal + #endif //__DYNAREC_ARM_PRIVATE_H_ \ No newline at end of file diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h index 05570965..144a6f33 100755 --- a/src/emu/x64emu_private.h +++ b/src/emu/x64emu_private.h @@ -30,14 +30,15 @@ typedef struct x64emu_s { x87flags_t sw; uint32_t top; // top is part of sw, but it's faster to have it separatly int fpu_stack; + uint32_t mxcsr; fpu_round_t round; fpu_ld_t fpu_ld[8]; // for long double emulation / 80bits fld fst fpu_ll_t fpu_ll[8]; // for 64bits fild / fist sequence fpu_p_reg_t p_regs[8]; // sse sse_regs_t xmm[16]; - uint32_t mxcsr; // defered flags + int dummy1; // to align on 64bits with df defered_flags_t df; uint64_t op1; uint64_t op2; @@ -50,6 +51,13 @@ typedef struct x64emu_s { uint32_t segs[6]; // only 32bits value? uintptr_t segs_offs[6]; // computed offset associate with segment uint64_t segs_serial[6]; // are seg offset clean (not 0) or does they need to be re-computed (0)? For GS, serial need to be the same as context->sel_serial + // parent context + box64context_t *context; + // cpu helpers + reg64_t zero; + reg64_t *sbiidx[16]; + // scratch stack, used for alignement of double and 64bits ints on arm. 200 elements should be enough + uint64_t scratch[200]; // emu control int quit; int error; @@ -58,13 +66,6 @@ typedef struct x64emu_s { int exit; int quitonlongjmp; // quit if longjmp is called int longjmp; // if quit because of longjmp - // parent context - box64context_t *context; - // cpu helpers - reg64_t zero; - reg64_t *sbiidx[16]; - // scratch stack, used for alignement of double and 64bits ints on arm. 200 elements should be enough - uint64_t scratch[200]; // local stack, do be deleted when emu is freed void* stack2free; // this is the stack to free (can be NULL) void* init_stack; // initial stack (owned or not) -- cgit 1.4.1