From bac90cfe573465c8020b36da9dbd1665cc6ef185 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Thu, 18 Mar 2021 11:33:49 +0100 Subject: Added C1 opcodes anda few bugfix (no changes in vvvvvv and IntoTheBreach issues) --- src/dynarec/arm64_emitter.h | 84 +++++- src/dynarec/arm64_printer.c | 77 +++++- src/dynarec/dynarec_arm64_00.c | 98 +++++++ src/dynarec/dynarec_arm64_emit_shift.c | 461 +++++++++++++++++++++++++++++++++ src/dynarec/dynarec_arm64_helper.c | 12 +- src/dynarec/dynarec_arm64_helper.h | 42 +-- 6 files changed, 738 insertions(+), 36 deletions(-) create mode 100755 src/dynarec/dynarec_arm64_emit_shift.c (limited to 'src') diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 738035f5..0d182425 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -131,6 +131,7 @@ #define ADDSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd)) #define ADDxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b00, Rm, 0, Rn, Rd)) #define ADDSxw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(rex.w, 0, 1, 0b00, Rm, 0, Rn, Rd)) +#define ADDxw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b01, Rm, lsr, Rn, Rd)) #define ADDSUB_IMM_gen(sf, op, S, shift, imm12, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b10001<<24 | (shift)<<22 | (imm12)<<10 | (Rn)<<5 | (Rd)) #define ADDx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd)) @@ -214,6 +215,29 @@ #define STRB_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt)) #define STRH_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt)) +// LOAD/STORE PAIR +#define MEMPAIR_gen(size, L, op2, imm7, Rt2, Rn, Rt) ((size)<<31 | 0b101<<27 | (op2)<<23 | (L)<<22 | (imm7)<<15 | (Rt2)<<10 | (Rn)<<5 | (Rt)) + +#define LDPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b01, (imm>>3)&0x7f, Rt2, Rn, Rt)) +#define LDPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b01, (imm>>2)&0x7f, Rt2, Rn, Rt)) +#define LDPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b01, (imm>>(2+rex.w)), Rt2, Rn, Rt)) +#define LDPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b11, (imm>>3)&0x7f, Rt2, Rn, Rt)) +#define LDPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b11, (imm>>2)&0x7f, Rt2, Rn, Rt)) +#define LDPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b11, (imm>>(2+rex.w)), Rt2, Rn, Rt)) +#define LDPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b10, (imm>>3)&0x7f, Rt2, Rn, Rt)) +#define LDPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b10, (imm>>2)&0x7f, Rt2, Rn, Rt)) +#define LDPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b10, (imm>>(2+rex.w)), Rt2, Rn, Rt)) + +#define STPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b01, (imm>>3)&0x7f, Rt2, Rn, Rt)) +#define STPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b01, (imm>>2)&0x7f, Rt2, Rn, Rt)) +#define STPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b01, (imm>>(2+rex.w)), Rt2, Rn, Rt)) +#define STPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b11, (imm>>3)&0x7f, Rt2, Rn, Rt)) +#define STPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b11, (imm>>2)&0x7f, Rt2, Rn, Rt)) +#define STPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b11, (imm>>(2+rex.w)), Rt2, Rn, Rt)) +#define STPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b10, (imm>>3)&0x7f, Rt2, Rn, Rt)) +#define STPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b10, (imm>>2)&0x7f, Rt2, Rn, Rt)) +#define STPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b10, (imm>>(2+rex.w)), Rt2, Rn, Rt)) + // PUSH / POP helper #define POP1(reg) LDRx_S9_postindex(reg, xRSP, 8) #define PUSH1(reg) STRx_S9_preindex(reg, xRSP, -8) @@ -232,6 +256,11 @@ #define Bcond_gen(imm19, cond) (0b0101010<<25 | (imm19)<<5 | (cond)) #define Bcond(cond, imm19) EMIT(Bcond_gen(((imm19)>>2)&0x7FFFF, cond)) +#define B_gen(imm26) (0b000101<<26 | (imm26)) +#define B(imm26) EMIT(B_gen(((imm26)>>2)&0x3ffffff)) + +#define NOP EMIT(0b11010101000000110010000000011111) + // AND / ORR #define LOGIC_gen(sf, opc, N, immr, imms, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b100100<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | Rd) // logic to get the mask is ... convoluted... list of possible value there: https://gist.github.com/dinfuehr/51a01ac58c0b23e4de9aac313ed6a06a @@ -241,8 +270,11 @@ #define ANDSw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b11, 0, immr, imms, Rn, Rd)) #define ORRx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b01, N, immr, imms, Rn, Rd)) #define ORRw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b01, 0, immr, imms, Rn, Rd)) +#define EORx_mask(Rd, Rn, N, immr, imms) EMIT(LOGIC_gen(1, 0b10, N, immr, imms, Rn, Rd)) +#define EORw_mask(Rd, Rn, immr, imms) EMIT(LOGIC_gen(0, 0b10, 0, immr, imms, Rn, Rd)) #define TSTx_mask(Rn, immr, imms) ANDSx_mask(xZR, Rn, immr, imms) #define TSTw_mask(Rn, immr, imms) ANDSw_mask(wZR, Rn, immr, imms) +#define TSTxw_mask(Rn, immr, imms) ANDSxw_mask(xZR, Rn, immr, imms) #define LOGIC_REG_gen(sf, opc, shift, N, Rm, imm6, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b01010<<24 | (shift)<<22 | (N)<<21 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd)) #define ANDx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 0, Rm, 0, Rn, Rd)) @@ -265,6 +297,9 @@ #define EORx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(1, 0b10, 0b00, 0, Rm, lsl, Rn, Rd)) #define EORw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(0, 0b10, 0b00, 0, Rm, lsl, Rn, Rd)) #define EORxw_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b00, 0, Rm, lsl, Rn, Rd)) +#define EORx_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(1, 0b10, 0b01, 0, Rm, lsr, Rn, Rd)) +#define EORw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(0, 0b10, 0b01, 0, Rm, lsr, Rn, Rd)) +#define EORxw_REG_LSR(Rd, Rn, Rm, lsr) EMIT(LOGIC_REG_gen(rex.w, 0b10, 0b01, 0, Rm, lsr, Rn, Rd)) #define MOVx_REG(Rd, Rm) ORRx_REG(Rd, xZR, Rm) #define MOVw_REG(Rd, Rm) ORRw_REG(Rd, xZR, Rm) #define MOVxw_REG(Rd, Rm) ORRxw_REG(Rd, xZR, Rm) @@ -295,9 +330,9 @@ #define BFIx(Rd, Rn, lsb, width) BFMx(Rd, Rn, ((-lsb)%64)&0x3f, (width)-1) #define BFIw(Rd, Rn, lsb, width) BFMw(Rd, Rn, ((-lsb)%32)&0x1f, (width)-1) #define BFIxw(Rd, Rn, lsb, width) if(rex.w) {BFIx(Rd, Rn, lsb, width);} else {BFIw(Rd, Rn, lsb, width);} -#define BFCx(Rd, Rn, lsb, width) BFMx(Rd, xZR, ((-lsb)%64)&0x3f, (width)-1) -#define BFCw(Rd, Rn, lsb, width) BFMw(Rd, xZR, ((-lsb)%32)&0x1f, (width)-1) -#define BFCxw(Rd, Rn, lsb, width) if(rex.w) {BFCx(Rd, Rn, lsb, width);} else {BFCw(Rd, Rn, lsb, width);} +#define BFCx(Rd, lsb, width) BFMx(Rd, xZR, ((-lsb)%64)&0x3f, (width)-1) +#define BFCw(Rd, lsb, width) BFMw(Rd, xZR, ((-lsb)%32)&0x1f, (width)-1) +#define BFCxw(Rd, lsb, width) BFMxw(Rd, xZR, rex.w?(((-lsb)%64)&0x3f):(((-lsb)%32)&0x1f), (width)-1) // UBFX #define UBFM_gen(sf, N, immr, imms, Rn, Rd) ((sf)<<31 | 0b10<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) @@ -315,12 +350,45 @@ #define LSRxw(Rd, Rn, shift) EMIT(UBFM_gen(rex.w, rex.w, shift, (rex.w)?63:31, Rn, Rd)) #define LSLx(Rd, Rn, lsl) UBFMx(Rd, Rn, ((-(lsl))%64)&63, 63-(lsl)) #define LSLw(Rd, Rn, lsl) UBFMw(Rd, Rn, ((-(lsl))%32)&31, 31-(lsl)) +#define LSLxw(Rd, Rn, lsl) UBFMxw(Rd, Rn, rex.w?(((-(lsl))%64)&63):(((-(lsl))%32)&31), (rex.w?63:31)-(lsl)) + +// SBFM +#define SBFM_gen(sf, N, immr, imms, Rn, Rd) ((sf)<<31 | 0b00<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) +#define SBFMx(Rd, Rn, immr, imms) EMIT(SBFM_gen(1, 1, immr, imms, Rn, Rd)) +#define SBFMw(Rd, Rn, immr, imms) EMIT(SBFM_gen(0, 0, immr, imms, Rn, Rd)) +#define SBFMxw(Rd, Rn, immr, imms) EMIT(SBFM_gen(rex.w, rex.w, immr, imms, Rn, Rd)) +#define SBFXx(Rd, Rn, lsb, width) SBFMx(Rd, Rn, lsb, lsb+width-1) +#define SBFXw(Rd, Rn, lsb, width) SBFMw(Rd, Rn, lsb, lsb+width-1) +#define SBFXxw(Rd, Rn, lsb, width) SBFMxw(Rd, Rn, lsb, lsb+width-1) +#define SXTBx(Rd, Rn) SBFMx(Rd, Rn, 0, 7) +#define SXTBw(Rd, Rn) SBFMw(Rd, Rn, 0, 7) +#define SXTHx(Rd, Rn) SBFMx(Rd, Rn, 0, 15) +#define SXTHw(Rd, Rn) SBFMw(Rd, Rn, 0, 15) +#define SXTWx(Rd, Rn) SBFMx(Rd, Rn, 0, 31) +#define ASRx(Rd, Rn, shift) SBFMx(Rd, Rn, shift, 63) +#define ASRw(Rd, Rn, shift) SBFMw(Rd, Rn, shift, 31) +#define ASRxw(Rd, Rn, shift) SBFMxw(Rd, Rn, shift, rex.w?63:31) + +// EXTR +#define EXTR_gen(sf, N, Rm, imms, Rn, Rd) ((sf)<<31 | 0b00<<29 | 0b100111<<23 | (N)<<22 | (Rm)<<16 | (imms)<<10 | (Rn)<<5 | (Rd)) +#define EXTRx(Rd, Rn, Rm, lsb) EMIT(EXTR_gen(1, 1, Rm, lsb, Rn, Rd)) +#define EXTRw(Rd, Rn, Rm, lsb) EMIT(EXTR_gen(0, 0, Rm, lsb, Rn, Rd)) +#define EXTRxw(Rd, Rn, Rm, lsb) EMIT(EXTR_gen(rex.w, rex.w, Rm, lsb, Rn, Rd)) +#define RORx(Rd, Rn, lsb) EMIT(EXTR_gen(1, 1, Rn, lsb, Rn, Rd)) +#define RORw(Rd, Rn, lsb) EMIT(EXTR_gen(0, 0, Rn, lsb, Rn, Rd)) +#define RORxw(Rd, Rn, lsb) EMIT(EXTR_gen(rex.w, rex.w, Rn, lsb, Rn, Rd)) + +// LSRV / LSLV +#define LS_V_gen(sf, Rm, op2, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | (op2)<<10 | (Rn)<<5 | (Rd)) +#define LSRx_REG(Rd, Rn, Rm) EMIT(LS_V_gen(1, Rm, 0b01, Rn, Rd)) +#define LSRw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(0, Rm, 0b01, Rn, Rd)) +#define LSRxw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(rex.w, Rm, 0b01, Rn, Rd)) + +#define LSLx_REG(Rd, Rn, Rm) EMIT(LS_V_gen(1, Rm, 0b00, Rn, Rd)) +#define LSLw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(0, Rm, 0b00, Rn, Rd)) +#define LSLxw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(rex.w, Rm, 0b00, Rn, Rd)) + -// LSRV -#define LSRV_gen(sf, Rm, op2, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | (op2)<<10 | (Rn)<<5 | (Rd)) -#define LSRx_REG(Rd, Rn, Rm) EMIT(LSRV_gen(1, Rm, 0b01, Rn, Rd)) -#define LSRw_REG(Rd, Rn, Rm) EMIT(LSRV_gen(0, Rm, 0b01, Rn, Rd)) -#define LSRxw_REG(Rd, Rn, Rm) EMIT(LSRV_gen(rex.w, Rm, 0b01, Rn, Rd)) // MRS #define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt) (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt)) diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c index 3137d33a..082260b9 100755 --- a/src/dynarec/arm64_printer.c +++ b/src/dynarec/arm64_printer.c @@ -17,7 +17,7 @@ static const char* conds[] = {"cEQ", "cNE", "cCS", "cCC", "cMI", "cPL", "cVS", " typedef struct arm64_print_s { int N, S; - int t, n, m, d; + int t, n, m, d, t2; int f, c, o, h; int i, r, s; int x, w; @@ -60,6 +60,7 @@ int isMask(uint32_t opcode, const char* mask, arm64_print_t *a) case 'N': a->N = (a->N<<1) | v; break; case 'S': a->S = (a->S<<1) | v; break; case 't': a->t = (a->t<<1) | v; break; + case '2': a->t2 = (a->t2<<1) | v; break; case 'n': a->n = (a->n<<1) | v; break; case 'm': a->m = (a->m<<1) | v; break; case 'd': a->d = (a->d<<1) | v; break; @@ -96,6 +97,7 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) arm64_print_t a; #define Rn a.n #define Rt a.t + #define Rt2 a.t2 #define Rm a.m #define Rd a.d #define sf a.f @@ -106,7 +108,47 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) #define cond a.c #define immr a.r #define imms a.s + if(isMask(opcode, "11010101000000110010000000011111", &a)) { + snprintf(buff, sizeof(buff), "NOP"); + return buff; + } // --- LDR / STR + if(isMask(opcode, "f010100011iiiiiii22222nnnnnttttt", &a)) { + int offset = signExtend(imm, 9)<<(2+sf); + snprintf(buff, sizeof(buff), "LDP %s, %s, [%s], %s0x%x", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } + if(isMask(opcode, "f010100111iiiiiii22222nnnnnttttt", &a)) { + int offset = signExtend(imm, 9)<<(2+sf); + snprintf(buff, sizeof(buff), "LDP %s, %s, [%s, %s0x%x]!", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } + if(isMask(opcode, "f010100101iiiiiii22222nnnnnttttt", &a)) { + int offset = signExtend(imm, 9)<<(2+sf); + if(!offset) + snprintf(buff, sizeof(buff), "LDP %s, %s, [%s]", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn]); + else + snprintf(buff, sizeof(buff), "LDP %s, %s, [%s, %s0x%x]", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } + if(isMask(opcode, "f010100010iiiiiii22222nnnnnttttt", &a)) { + int offset = signExtend(imm, 9)<<(2+sf); + snprintf(buff, sizeof(buff), "STP %s, %s, [%s], %s0x%x", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } + if(isMask(opcode, "f010100110iiiiiii22222nnnnnttttt", &a)) { + int offset = signExtend(imm, 9)<<(2+sf); + snprintf(buff, sizeof(buff), "STP %s, %s, [%s, %s0x%x]!", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } + if(isMask(opcode, "f010100100iiiiiii22222nnnnnttttt", &a)) { + int offset = signExtend(imm, 9)<<(2+sf); + if(!offset) + snprintf(buff, sizeof(buff), "STP %s, %s, [%s]", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn]); + else + snprintf(buff, sizeof(buff), "STP %s, %s, [%s, %s0x%x]", sf?Xt[Rt]:Wt[Rt], sf?Xt[Rt2]:Wt[Rt2], XtSp[Rn], (offset<0)?"-":"", abs(offset)); + return buff; + } if(isMask(opcode, "1x111000010iiiiiiiii01nnnnnttttt", &a)) { int size = (opcode>>30)&3; int offset = signExtend(imm, 9); @@ -387,12 +429,38 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) return buff; } + if(isMask(opcode, "f00100110Nrrrrrrssssssnnnnnddddd", &a)) { + if(sf && imms==0b111111) + snprintf(buff, sizeof(buff), "ASR %s, %s, %d", Xt[Rd], Xt[Rn], immr); + else if(!sf && imms==0b011111) + snprintf(buff, sizeof(buff), "ASR %s, %s, %d", Wt[Rd], Wt[Rn], immr); + else if(immr==0 && imms==0b000111) + snprintf(buff, sizeof(buff), "SXTB %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]); + else if(immr==0 && imms==0b001111) + snprintf(buff, sizeof(buff), "SXTH %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn]); + else if(sf && immr==0 && imms==0b011111) + snprintf(buff, sizeof(buff), "SXTW %s, %s", Xt[Rd], Xt[Rn]); + else if(imms>=immr) + snprintf(buff, sizeof(buff), "SBFX %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms-immr+1); + else + snprintf(buff, sizeof(buff), "SBFM %s, %s, %d, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], immr, imms); + return buff; + } + + if(isMask(opcode, "f00100111N0mmmmmssssssnnnnnddddd", &a)) { + if(Rn==Rm) + snprintf(buff, sizeof(buff), "ROR %s, %s, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], imms); + else + snprintf(buff, sizeof(buff), "EXTR %s, %s, %s, %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], imms); + return buff; + } + if(isMask(opcode, "f01100110Nrrrrrrssssssnnnnnddddd", &a)) { if(imms>3)&7) { + case 0: + INST_NAME("ROL Ed, Ib"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETED(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { WBACK; } + break; + case 1: + INST_NAME("ROR Ed, Ib"); + SETFLAGS(X_OF|X_CF, SF_SUBSET); + GETED(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { WBACK; } + break; + case 2: + INST_NAME("RCL Ed, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEDW(x4, x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rex.w?((void*)rcl64):((void*)rcl32), ed, x4); + WBACK; + break; + case 3: + INST_NAME("RCR Ed, Ib"); + READFLAGS(X_CF); + SETFLAGS(X_OF|X_CF, SF_SET); + GETEDW(x4, x1, 1); + u8 = F8; + MOV32w(x2, u8); + CALL_(rex.w?((void*)rcr64):((void*)rcr32), ed, x4); + WBACK; + break; + case 4: + case 6: + INST_NAME("SHL Ed, Ib"); + SETFLAGS(X_ALL, SF_SET); // some flags are left undefined + GETED(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_shl32c(dyn, ninst, rex, ed, u8, x3, x4); + WBACK; + break; + case 5: + INST_NAME("SHR Ed, Ib"); + SETFLAGS(X_ALL, SF_SET); // some flags are left undefined + GETED(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_shr32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { + WBACK; + } + break; + case 7: + INST_NAME("SAR Ed, Ib"); + SETFLAGS(X_ALL, SF_SET); // some flags are left undefined + GETED(1); + u8 = (F8)&(rex.w?0x3f:0x1f); + emit_sar32c(dyn, ninst, rex, ed, u8, x3, x4); + if(u8) { + WBACK; + } + break; + } + break; case 0xC2: INST_NAME("RETN"); //SETFLAGS(X_ALL, SF_SET); // Hack, set all flags (to an unknown state...) @@ -423,6 +493,34 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; } break; + case 0xE9: + case 0xEB: + BARRIER(1); + if(opcode==0xE9) { + INST_NAME("JMP Id"); + i32 = F32S; + } else { + INST_NAME("JMP Ib"); + i32 = F8S; + } + JUMP(addr+i32); + if(dyn->insts) { + PASS2IF(dyn->insts[ninst].x64.jmp_insts==-1, 1) { + // out of the block + jump_to_next(dyn, addr+i32, 0, ninst); + } else { + // inside the block + tmp = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->arm_size); + if(tmp==4) { + NOP; + } else { + B(tmp); + } + } + } + *need_epilog = 0; + *ok = 0; + break; case 0xFF: nextop = F8; diff --git a/src/dynarec/dynarec_arm64_emit_shift.c b/src/dynarec/dynarec_arm64_emit_shift.c new file mode 100755 index 00000000..fef4b4e5 --- /dev/null +++ b/src/dynarec/dynarec_arm64_emit_shift.c @@ -0,0 +1,461 @@ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_arm64.h" +#include "dynarec_arm64_private.h" +#include "arm64_printer.h" +#include "../tools/bridge_private.h" + +#include "dynarec_arm64_functions.h" +#include "dynarec_arm64_helper.h" + +// emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3 and s4 as scratch. s3 can be same as s2 +void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + int32_t j32; + MAYUSE(j32); + + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_shl64:d_shl32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + IFX(F_OF) { + CMPSxw_U12(s2, 0); + IFX(F_OF) { + Bcond(cNE, +8); + BFCx(xFlags, F_OF, 1); + } + IFX(X_PEND) { + Bcond(cNE, +8); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + B_NEXT(cEQ); + } + IFX(X_CF | X_OF) { + MOV32w(s4, rex.w?64:32); + SUBxw_REG(s4, s4, s2); + LSRxw_REG(s4, s1, s4); + BFIw(xFlags, s4, F_CF, 1); + } + LSLxw_REG(s1, s1, s2); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTw_REG(s1, s1); + BFCx(xFlags, F_ZF, 1); + Bcond(cNE, +8); + ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40 + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_OF) { + CMPSxw_U12(s2, 1); // if s3==1 + IFX(X_SF) {} else {LSRxw(s4, s1, (rex.w)?63:31);} + BFCw(xFlags, F_OF, 1); + Bcond(cNE, +12); + EORxw_REG(s4, s4, xFlags); // CF is set if OF is asked + BFIw(xFlags, s4, F_OF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SHL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +{ + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_shl64:d_shl32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(c==0) { + IFX(F_OF) { + BFCx(xFlags, F_OF, 1); + } + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + IFX(X_CF) { + LSRxw(s3, s1, (rex.w?64:32)-c); + BFIxw(xFlags, s3, F_CF, 1); + } + LSLxw(s1, s1, c); + + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTw_REG(s1, s1); + BFCx(xFlags, F_ZF, 1); + Bcond(cNE, +8); + ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40 + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_OF) { + if(c==1) { + IFX(X_SF) {} else {LSRxw(s4, s1, (rex.w)?63:31);} + Bcond(cNE, +12); + EORxw_REG(s4, s4, xFlags); // CF is set if OF is asked + BFIw(xFlags, s4, F_OF, 1); + } else { + BFCw(xFlags, F_OF, 1); + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SHR32 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, s2 can be same as s3 +void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4) +{ + int32_t j32; + MAYUSE(j32); + + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_shr64:d_shr32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + IFX(X_ALL) { + CMPSxw_U12(s2, 0); //if(!c) + IFX(X_PEND) { + Bcond(cNE, +12); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + B_NEXT(cEQ); + } + LSRxw_REG(s1, s1, s2); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTw_REG(s1, s1); + BFCx(xFlags, F_ZF, 1); + Bcond(cNE, +8); + ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40 + } + IFX(X_CF) { + SUBxw_U12(s3, s2, 1); + LSRxw_REG(s3, s1, s3); + BFIw(xFlags, s3, 0, 1); + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_OF) { + CMPSxw_U12(s2, 1); // if s3==1 + Bcond(cNE, 4+3*4); + if(rex.w) { + LSRx(s4, s1, 62); + } else { + LSRw(s4, s1, 30); + } + EORw_mask(s4, s4, 0, 0); // CF is set if OF is asked + BFIw(xFlags, s4, F_OF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +{ + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_shr64:d_shr32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + LSRxw(s1, s1, c); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTw_REG(s1, s1); + BFCx(xFlags, F_ZF, 1); + Bcond(cNE, +8); + ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40 + } + IFX(X_CF) { + LSRxw_REG(s3, s1, c-1); + BFIw(xFlags, s3, 0, 1); + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_OF) { + if(c==1) { + LSRxw(s4, s1, rex.w?62:30); + EORw_mask(s4, s4, 0, 0); + BFIw(xFlags, s4, F_OF, 1); + } + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit SAR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +{ + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_sar64:d_sar32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + ASRxw(s1, s1, c); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + TSTw_REG(s1, s1); + BFCx(xFlags, F_ZF, 1); + Bcond(cNE, +8); + ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40 + } + IFX(X_CF) { + ASRxw(s3, s1, c-1); + BFIw(xFlags, s3, 0, 1); + } + IFX(X_SF) { + LSRxw(s4, s1, (rex.w)?63:31); + BFIx(xFlags, s4, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} + +// emit ROL32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +{ + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, d_rol32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + RORxw(s1, s1, (rex.w?64:32)-c); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF) { + BFIw(xFlags, s1, F_CF, 1); + } + IFX(X_OF) { + if(c==1) { + ADDxw_REG_LSR(s3, s1, s1, rex.w?63:31); + BFIw(xFlags, s3, F_OF, 1); + } + } +} + +// emit ROR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch +void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4) +{ + IFX(X_PEND) { + MOV32w(s3, c); + STRxw_U12(s3, xEmu, offsetof(x64emu_t, op2)); + SET_DF(s4, rex.w?d_ror64:d_ror32); + } else IFX(X_ALL) { + SET_DFNONE(s4); + } + if(!c) { + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + return; + } + RORxw(s1, s1, c); + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_CF) { + LSRxw(s3, s1, rex.w?63:31); + BFIw(xFlags, s3, F_CF, 1); + } + IFX(X_OF) { + if(c==1) { + LSRxw(s3, s1, rex.w?62:30); + EORxw_REG_LSR(s3, s3, s3, 1); + BFIw(xFlags, s4, F_OF, 1); + } + } +} + +// emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch +//void emit_shrd32c(dynarec_arm_t* dyn, int ninst, int s1, int s2, int32_t c, int s3, int s4) +//{ +// c&=0x1f; +// IFX(X_PEND) { +// MOVW(s3, c); +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); +// STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2)); +// // same flags calc as shr32 +// SET_DF(s4, d_shr32); +// } else IFX(X_ALL) { +// SET_DFNONE(s4); +// } +// if(!c) { +// IFX(X_PEND) { +// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); +// } +// return; +// } +// IFX(X_CF) { +// MOVS_REG_LSR_IMM5(s1, s1, c); +// } else { +// MOV_REG_LSR_IMM5(s1, s1, c); +// } +// IFX(X_ZF|X_CF) { +// BIC_IMM8(xFlags, xFlags, (1<insts) {dyn->insts[ninst].mark = (uintptr_t)dyn->arm_size;} #define GETMARK ((dyn->insts)?dyn->insts[ninst].mark:(dyn->arm_size+4)) @@ -388,7 +388,7 @@ j32 = (GETMARKF)-(dyn->arm_size); \ CBZw(x3, j32); \ } \ - CALL_(UpdateFlags, -1); \ + CALL_(UpdateFlags, -1, 0); \ MARKF; \ dyn->state_flags = SF_SET; \ SET_DFOK(); \ @@ -610,7 +610,7 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst); void ret_to_epilog(dynarec_arm_t* dyn, int ninst); void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n); void iret_to_epilog(dynarec_arm_t* dyn, int ninst); -void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags); +void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); //void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg); //void grab_tlsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg); //void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); @@ -673,13 +673,13 @@ void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4 //void emit_neg32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); //void emit_neg16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); //void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); -//void emit_shl32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_shl32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -//void emit_shr32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4); -//void emit_shr32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -//void emit_sar32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -//void emit_rol32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -//void emit_ror32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); +void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); +void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); +void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); +void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int32_t c, int s3, int s4); //void emit_shrd32c(dynarec_arm_t* dyn, int ninst, int s1, int s2, int32_t c, int s3, int s4); //void emit_shld32c(dynarec_arm_t* dyn, int ninst, int s1, int s2, int32_t c, int s3, int s4); -- cgit 1.4.1