diff options
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64_emitter.h | 62 | ||||
| -rwxr-xr-x | src/dynarec/arm64_printer.c | 27 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_00.c | 86 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_emit_math.c | 91 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.c | 20 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.h | 18 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_pass.c | 4 | ||||
| -rwxr-xr-x | src/emu/x64primop.c | 8 |
8 files changed, 228 insertions, 88 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 091544df..100b0d74 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -159,6 +159,12 @@ #define CMPSx_REG(Rn, Rm) SUBSx_REG(xZR, Rn, Rm) #define CMPSw_REG(Rn, Rm) SUBSw_REG(wZR, Rn, Rm) #define CMPSxw_REG(Rn, Rm) SUBSxw_REG(xZR, Rn, Rm) +#define NEGx_REG(Rd, Rm) SUBx_REG(Rd, xZR, Rm); +#define NEGw_REG(Rd, Rm) SUBw_REG(Rd, wZR, Rm); +#define NEGxw_REG(Rd, Rm) SUBxw_REG(Rd, xZR, Rm); +#define NEGSx_REG(Rd, Rm) SUBSx_REG(Rd, xZR, Rm); +#define NEGSw_REG(Rd, Rm) SUBSw_REG(Rd, wZR, Rm); +#define NEGSxw_REG(Rd, Rm) SUBSxw_REG(Rd, xZR, Rm); #define SUBx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd)) #define SUBSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd)) @@ -247,25 +253,25 @@ // LOAD/STORE PAIR #define MEMPAIR_gen(size, L, op2, imm7, Rt2, Rn, Rt) ((size)<<31 | 0b101<<27 | (op2)<<23 | (L)<<22 | (imm7)<<15 | (Rt2)<<10 | (Rn)<<5 | (Rt)) -#define LDPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b01, (imm>>3)&0x7f, Rt2, Rn, Rt)) -#define LDPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b01, (imm>>2)&0x7f, Rt2, Rn, Rt)) -#define LDPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b01, (imm>>(2+rex.w)), Rt2, Rn, Rt)) -#define LDPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b11, (imm>>3)&0x7f, Rt2, Rn, Rt)) -#define LDPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b11, (imm>>2)&0x7f, Rt2, Rn, Rt)) -#define LDPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b11, (imm>>(2+rex.w)), Rt2, Rn, Rt)) -#define LDPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b10, (imm>>3)&0x7f, Rt2, Rn, Rt)) -#define LDPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b10, (imm>>2)&0x7f, Rt2, Rn, Rt)) -#define LDPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b10, (imm>>(2+rex.w)), Rt2, Rn, Rt)) - -#define STPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b01, (imm>>3)&0x7f, Rt2, Rn, Rt)) -#define STPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b01, (imm>>2)&0x7f, Rt2, Rn, Rt)) -#define STPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b01, (imm>>(2+rex.w)), Rt2, Rn, Rt)) -#define STPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b11, (imm>>3)&0x7f, Rt2, Rn, Rt)) -#define STPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b11, (imm>>2)&0x7f, Rt2, Rn, Rt)) -#define STPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b11, (imm>>(2+rex.w)), Rt2, Rn, Rt)) -#define STPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b10, (imm>>3)&0x7f, Rt2, Rn, Rt)) -#define STPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b10, (imm>>2)&0x7f, Rt2, Rn, Rt)) -#define STPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b10, (imm>>(2+rex.w)), Rt2, Rn, Rt)) +#define LDPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b01, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define LDPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b01, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define LDPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b01, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define LDPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b11, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define LDPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b11, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define LDPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b11, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define LDPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 1, 0b10, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define LDPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 1, 0b10, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define LDPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 1, 0b10, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) + +#define STPx_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b01, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define STPw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b01, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define STPxw_S7_postindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b01, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define STPx_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b11, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define STPw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b11, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define STPxw_S7_preindex(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b11, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) +#define STPx_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(1, 0, 0b10, (((uint32_t)(imm))>>3)&0x7f, Rt2, Rn, Rt)) +#define STPw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(0, 0, 0b10, (((uint32_t)(imm))>>2)&0x7f, Rt2, Rn, Rt)) +#define STPxw_S7_offset(Rt, Rt2, Rn, imm) EMIT(MEMPAIR_gen(rex.w, 0, 0b10, (((uint32_t)(imm))>>(2+rex.w)), Rt2, Rn, Rt)) // PUSH / POP helper #define POP1(reg) LDRx_S9_postindex(reg, xRSP, 8) @@ -457,6 +463,24 @@ #define LSLw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(0, Rm, 0b00, Rn, Rd)) #define LSLxw_REG(Rd, Rn, Rm) EMIT(LS_V_gen(rex.w, Rm, 0b00, Rn, Rd)) +// UMULL / SMULL +#define MADDL_gen(U, Rm, o0, Ra, Rn, Rd) (1<<31 | 0b11011<<24 | (U)<<23 | 0b01<<21 | (Rm)<<16 | (o0)<<15 | (Ra)<<10 | (Rn)<<5 | (Rd)) +#define UMADDL(Xd, Wn, Wm, Xa) EMIT(MADDL_gen(1, Wm, 0, Xa, Wn, Xd)) +#define UMULL(Xd, Wn, Wm) UMADDL(Xd, Wn, Wm, xZR) +#define SMADDL(Xd, Wn, Wm, Xa) EMIT(MADDL_gen(0, Wm, 0, Xa, Wn, Xd)) +#define SMULL(Xd, Wn, Wm) SMADDL(Xd, Wn, Wm, xZR) + +#define MULH_gen(U, Rm, Rn, Rd) (1<<31 | 0b11011<<24 | (U)<<23 | 0b10<<21 | (Rm)<<16 | 0b11111<<10 | (Rn)<<5 | (Rd)) +#define UMULH(Xd, Xn, Xm) EMIT(MULH_gen(1, Xm, Xn, Xd)) +#define SMULH(Xd, Xn, Xm) EMIT(MULH_gen(0, Xm, Xn, Xd)) + +#define MADD_gen(sf, Rm, Ra, Rn, Rd) ((sf)<<31 | 0b11011<<24 | (Rm)<<16 | (Ra)<<10 | (Rn)<<5 | (Rd)) +#define MADDx(Rd, Rn, Rm, Ra) EMIT(MADD_gen(1, Rm, Ra, Rn, Rd)) +#define MADDw(Rd, Rn, Rm, Ra) EMIT(MADD_gen(0, Rm, Ra, Rn, Rd)) +#define MADDxw(Rd, Rn, Rm, Ra) EMIT(MADD_gen(rex.w, Rm, Ra, Rn, Rd)) +#define MULx(Rd, Rn, Rm) MADDx(Rd, Rn, Rm, xZR) +#define MULw(Rd, Rn, Rm) MADDw(Rd, Rn, Rm, xZR) +#define MULxw(Rd, Rn, Rm) MADDxw(Rd, Rn, Rm, xZR) // MRS diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c index b54d4b9e..57024b6b 100755 --- a/src/dynarec/arm64_printer.c +++ b/src/dynarec/arm64_printer.c @@ -16,8 +16,8 @@ static const char* conds[] = {"cEQ", "cNE", "cCS", "cCC", "cMI", "cPL", "cVS", " #define abs(A) (((A)<0)?(-(A)):(A)) typedef struct arm64_print_s { - int N, S; - int t, n, m, d, t2; + int N, S, U; + int t, n, m, d, t2, a; int f, c, o, h; int i, r, s; int x, w; @@ -59,10 +59,12 @@ int isMask(uint32_t opcode, const char* mask, arm64_print_t *a) case '1': if(v!=1) return 0; break; case 'N': a->N = (a->N<<1) | v; break; case 'S': a->S = (a->S<<1) | v; break; + case 'U': a->U = (a->U<<1) | v; break; case 't': a->t = (a->t<<1) | v; break; case '2': a->t2 = (a->t2<<1) | v; break; case 'n': a->n = (a->n<<1) | v; break; case 'm': a->m = (a->m<<1) | v; break; + case 'a': a->a = (a->a<<1) | v; break; case 'd': a->d = (a->d<<1) | v; break; case 'f': a->f = (a->f<<1) | v; break; case 'c': a->c = (a->c<<1) | v; break; @@ -100,6 +102,7 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) #define Rt2 a.t2 #define Rm a.m #define Rd a.d + #define Ra a.a #define sf a.f #define imm a.i #define option a.o @@ -590,6 +593,26 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) return buff; } + // MULL ADD + if(isMask(opcode, "10011011U01mmmmm0aaaaannnnnddddd", &a)) { + if(Ra==31) + snprintf(buff, sizeof(buff), "%cMULL %s, %s, %s", a.U?'U':'S', Xt[Rd], Wt[Rn], Wt[Rm]); + else + snprintf(buff, sizeof(buff), "%cMADDL %s, %s, %s, %s", a.U?'U':'S', Xt[Rd], Wt[Rn], Wt[Rm], Xt[Ra]); + return buff; + } + if(isMask(opcode, "10011011U10mmmmm011111nnnnnddddd", &a)) { + snprintf(buff, sizeof(buff), "%cMULH %s, %s, %s", a.U?'U':'S', Xt[Rd], Wt[Rn], Wt[Rm]); + return buff; + } + if(isMask(opcode, "f0011011000mmmmm0aaaaannnnnddddd", &a)) { + if(Ra==31) + snprintf(buff, sizeof(buff), "MUL %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]); + else + snprintf(buff, sizeof(buff), "MADD %s, %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], sf?Xt[Ra]:Wt[Ra]); + return buff; + } + snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode)); return buff; } \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c index ac3e470c..02a67cf1 100755 --- a/src/dynarec/dynarec_arm64_00.c +++ b/src/dynarec/dynarec_arm64_00.c @@ -676,9 +676,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin x87_forget(dyn, ninst, x3, x4, 0); sse_purge07cache(dyn, ninst, x3); GETIP(ip+1); // read the 0xCC - STORE_XEMU_MINIMUM(xRIP); + STORE_XEMU_CALL(xRIP); CALL_S(x64Int3, -1); - LOAD_XEMU_MINIMUM(xRIP); + LOAD_XEMU_CALL(xRIP); addr+=8+8; TABLE64(x3, addr); // expected return address CMPSx_REG(xRIP, x3); @@ -1030,9 +1030,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin x87_forget(dyn, ninst, x3, x4, 0); sse_purge07cache(dyn, ninst, x3); GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already - STORE_XEMU_MINIMUM(xRIP); + STORE_XEMU_CALL(xRIP); CALL_S(x64Int3, -1); - LOAD_XEMU_MINIMUM(xRIP); + LOAD_XEMU_CALL(xRIP); TABLE64(x3, dyn->insts[ninst].natcall); ADDx_U12(x3, x3, 2+8+8); CMPSx_REG(xRIP, x3); @@ -1102,6 +1102,84 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin *ok = 0; break; + case 0xF7: + nextop = F8; + switch((nextop>>3)&7) { + case 0: + case 1: + INST_NAME("TEST Ed, Id"); + SETFLAGS(X_ALL, SF_SET); + GETEDH(x1, 4); + i64 = F32S; + MOV64xw(x2, i64); + emit_test32(dyn, ninst, rex, ed, x2, x3, x4); + break; + case 2: + INST_NAME("NOT Ed"); + GETED(4); + MVNxw_REG(ed, ed); + WBACK; + break; + case 3: + INST_NAME("NEG Ed"); + SETFLAGS(X_ALL, SF_SET); + GETED(0); + emit_neg32(dyn, ninst, rex, ed, x3, x4); + WBACK; + break; + case 4: + INST_NAME("MUL EAX, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x2, rex.w?d_mul64:d_mul32); + GETED(0); + if(rex.w) { + if(ed==xRDX) gd=x3; else gd=xRDX; + UMULH(gd, xRAX, ed); + MULx(xRAX, xRAX, ed); + if(gd!=xRDX) {MOVx_REG(xRDX, gd);} + } else { + UMULL(xRDX, xRAX, ed); //64 <- 32x32 + MOVw_REG(xRAX, xRDX); + LSRx(xRDX, xRDX, 32); + } + UFLAG_RES(xRAX); + UFLAG_OP1(xRDX); + break; + case 5: + INST_NAME("IMUL EAX, Ed"); + SETFLAGS(X_ALL, SF_PENDING); + UFLAG_DF(x2, rex.w?d_imul64:d_imul32); + GETED(0); + if(rex.w) { + if(ed==xRDX) gd=x3; else gd=xRDX; + SMULH(gd, xRAX, ed); + MULx(xRAX, xRAX, ed); + if(gd!=xRDX) {MOVx_REG(xRDX, gd);} + } else { + SMULL(xRDX, xRAX, ed); //64 <- 32x32 + MOVw_REG(xRAX, xRDX); + LSRx(xRDX, xRDX, 32); + } + UFLAG_RES(xRAX); + UFLAG_OP1(xRDX); + break; + case 6: + INST_NAME("DIV Ed"); + SETFLAGS(X_ALL, SF_SET); + GETEDH(x1, 0); + if(ed!=x1) {MOVxw_REG(x1, ed);} + CALL(rex.w?((void*)div64):((void*)div32), -1); + break; + case 7: + INST_NAME("IDIV Ed"); + SETFLAGS(X_ALL, SF_SET); + GETEDH(x1, 0); + if(ed!=x1) {MOVxw_REG(x1, ed);} + CALL(rex.w?((void*)idiv64):((void*)idiv32), -1); + break; + } + break; + case 0xFF: nextop = F8; switch((nextop>>3)&7) { diff --git a/src/dynarec/dynarec_arm64_emit_math.c b/src/dynarec/dynarec_arm64_emit_math.c index d3931ccc..d76ded05 100755 --- a/src/dynarec/dynarec_arm64_emit_math.c +++ b/src/dynarec/dynarec_arm64_emit_math.c @@ -1733,52 +1733,51 @@ void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, in //} // emit NEG32 instruction, from s1, store result in s1 using s3 and s4 as scratch -//void emit_neg32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) -//{ -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1)); -// SET_DF(s3, d_neg32); -// } else IFX(X_ALL) { -// SET_DFNONE(s3); -// } -// IFX(X_ZF|X_CF) { -// BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0); -// } -// IFX(X_CF) { -// TSTS_REG_LSL_IMM5(s1, s1, 0); -// ORR_IMM8_COND(cNE, xFlags, xFlags, 1<<F_CF, 0); -// } -// IFX(X_AF) { -// MOV_REG_LSL_IMM5(s3, s1, 0); -// } -// IFX(X_ZF|X_OF) { -// RSBS_IMM8(s1, s1, 0); -// } else { -// RSB_IMM8(s1, s1, 0); -// } -// IFX(X_PEND) { -// STR_IMM9(s1, xEmu, offsetof(x64emu_t, res)); -// } -// IFX(X_ZF) { -// ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0); -// } -// IFX(X_OF) { -// ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b); -// BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b); -// } -// IFX(X_AF) { -// ORR_REG_LSL_IMM5(s3, s3, s1, 0); // bc = op1 | res -// MOV_REG_LSR_IMM5(s4, s3, 3); -// BFI(xFlags, s4, F_AF, 1); // AF: bc & 0x08 -// } -// IFX(X_SF) { -// MOV_REG_LSR_IMM5(s3, s1, 31); -// BFI(xFlags, s3, F_SF, 1); -// } -// IFX(X_PF) { -// emit_pf(dyn, ninst, s1, s3, s4); -// } -//} +void emit_neg32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4) +{ + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1)); + SET_DF(s3, rex.w?d_neg64:d_neg32); + } else IFX(X_ALL) { + SET_DFNONE(s3); + } + IFX(X_CF) { + TSTxw_REG(s1, s1); + CSETw(s4, cNE); + BFIw(xFlags, s4, F_CF, 1); + } + IFX(X_AF) { + MOVxw_REG(s3, s1); + } + IFX(X_ZF|X_OF) { + NEGSxw_REG(s1, s1); + } else { + NEGxw_REG(s1, s1); + } + IFX(X_PEND) { + STRxw_U12(s1, xEmu, offsetof(x64emu_t, res)); + } + IFX(X_ZF) { + CSETw(s4, cEQ); + BFIw(xFlags, s4, F_ZF, 1); + } + IFX(X_OF) { + CSETw(s4, cVS); + BFIw(xFlags, s4, F_OF, 1); + } + IFX(X_AF) { + ORRxw_REG(s3, s3, s1); // bc = op1 | res + LSRxw(s4, s3, 3); + BFIw(xFlags, s4, F_AF, 1); // AF: bc & 0x08 + } + IFX(X_SF) { + LSRxw(s3, s1, rex.w?63:31); + BFIw(xFlags, s3, F_SF, 1); + } + IFX(X_PF) { + emit_pf(dyn, ninst, s1, s3, s4); + } +} // emit NEG16 instruction, from s1, store result in s1 using s3 and s4 as scratch //void emit_neg16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c index 24aca546..efcc1bd4 100755 --- a/src/dynarec/dynarec_arm64_helper.c +++ b/src/dynarec/dynarec_arm64_helper.c @@ -333,6 +333,11 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save savereg = 7; if(ret!=-2) { STPx_S7_preindex(xEmu, savereg, xSP, -16); // ARM64 stack needs to be 16byte aligned + STPx_S7_offset(xRAX, xRCX, xEmu, offsetof(x64emu_t, regs[_AX])); // x9..x15, x16,x17,x18 those needs to be saved by caller + STPx_S7_offset(xRDX, xRBX, xEmu, offsetof(x64emu_t, regs[_DX])); // but x18 is R8 wich is lost, so it's fine to not save it? + STPx_S7_offset(xRSP, xRBP, xEmu, offsetof(x64emu_t, regs[_SP])); + STPx_S7_offset(xRSI, xRDI, xEmu, offsetof(x64emu_t, regs[_SI])); + STRx_U12(xR8, xEmu, offsetof(x64emu_t, regs[_R8])); } fpu_pushcache(dyn, ninst, reg); if(saveflags) { @@ -346,6 +351,21 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save } if(ret!=-2) { LDPx_S7_postindex(xEmu, savereg, xSP, 16); + #define GO(A, B) if(ret==x##A) { \ + LDRx_U12(x##B, xEmu, offsetof(x64emu_t, regs[_##B])); \ + } else if(ret==x##B) { \ + LDRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])); \ + } else { \ + LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])); \ + } + GO(RAX, RCX); + GO(RDX, RBX); + GO(RSP, RBP); + GO(RSI, RDI); + #undef GO + if(ret!=xR8) { + LDRx_U12(xR8, xEmu, offsetof(x64emu_t, regs[_R8])); + } } if(saveflags) { LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h index b3e08d7d..ac26870e 100755 --- a/src/dynarec/dynarec_arm64_helper.h +++ b/src/dynarec/dynarec_arm64_helper.h @@ -382,16 +382,12 @@ STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ if(A) {STRx_U12(A, xEmu, offsetof(x64emu_t, ip));} -#define LOAD_XEMU_MINIMUM(A) \ - LOAD_REG(RAX); \ - LOAD_REG(RCX); \ - LOAD_REG(RDX); \ - LOAD_REG(RBX); \ - LOAD_REG(RSP); \ - LOAD_REG(RBP); \ - LOAD_REG(RSI); \ - LOAD_REG(RDI); \ - LOAD_REG(R8); \ +#define STORE_XEMU_CALL(A) \ + STORE_REG(R9); \ + STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ + if(A) {STRx_U12(A, xEmu, offsetof(x64emu_t, ip));} + +#define LOAD_XEMU_CALL(A) \ LOAD_REG(R9); \ LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ if(A) {LDRx_U12(A, xEmu, offsetof(x64emu_t, ip)); if(A==xRIP) dyn->last_ip = 0;} @@ -691,7 +687,7 @@ void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4 void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); //void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4); //void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); -//void emit_neg32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); +void emit_neg32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); //void emit_neg16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); //void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4); void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); diff --git a/src/dynarec/dynarec_arm64_pass.c b/src/dynarec/dynarec_arm64_pass.c index e7adb758..8f7af07c 100755 --- a/src/dynarec/dynarec_arm64_pass.c +++ b/src/dynarec/dynarec_arm64_pass.c @@ -56,10 +56,10 @@ void arm_pass(dynarec_arm_t* dyn, uintptr_t addr) fpu_reflectcache(dyn, ninst, x1, x2, x3); GETIP(ip); MOVx_REG(x1, xRIP); - STORE_XEMU_REGS(xRIP); + STORE_XEMU_CALL(xRIP); MOV32w(x2, 1); CALL(PrintTrace, -1); - LOAD_XEMU_REGS(xRIP); + LOAD_XEMU_CALL(xRIP); MESSAGE(LOG_DUMP, "----------\n"); } } diff --git a/src/emu/x64primop.c b/src/emu/x64primop.c index cc9422e7..441518ac 100755 --- a/src/emu/x64primop.c +++ b/src/emu/x64primop.c @@ -1384,8 +1384,8 @@ void idiv32(x64emu_t *emu, uint32_t s) SET_FLAG(F_ZF); CONDITIONAL_SET_FLAG(PARITY(mod & 0xff), F_PF); - R_EAX = (uint32_t)quot; - R_EDX = (uint32_t)mod; + R_RAX = (uint32_t)quot; + R_RDX = (uint32_t)mod; } void idiv64(x64emu_t *emu, uint64_t s) @@ -1485,8 +1485,8 @@ void div32(x64emu_t *emu, uint32_t s) SET_FLAG(F_ZF); CONDITIONAL_SET_FLAG(PARITY(mod & 0xff), F_PF); - R_EAX = (uint32_t)div; - R_EDX = (uint32_t)mod; + R_RAX = (uint32_t)div; + R_RDX = (uint32_t)mod; } void div64(x64emu_t *emu, uint64_t s) |