diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-06-28 13:19:56 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-06-28 13:19:56 +0200 |
| commit | 99ba7707f63d906f5e22a44ae2cbbc8dbd5e3f3e (patch) | |
| tree | a02f388c9f30cdeec9e54bb3118c02cef672bab4 /src | |
| parent | ea1ef4e602af2f0e92181b96cff5ffa66186972f (diff) | |
| download | box64-99ba7707f63d906f5e22a44ae2cbbc8dbd5e3f3e.tar.gz box64-99ba7707f63d906f5e22a44ae2cbbc8dbd5e3f3e.zip | |
[DYNAREC] Added CF opcode and optimized native call
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64_epilog.S | 3 | ||||
| -rwxr-xr-x | src/dynarec/arm64_next.S | 9 | ||||
| -rwxr-xr-x | src/dynarec/arm64_prolog.S | 3 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_00.c | 87 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.c | 62 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.h | 8 | ||||
| -rwxr-xr-x | src/emu/x64emu_private.h | 7 |
7 files changed, 127 insertions, 52 deletions
diff --git a/src/dynarec/arm64_epilog.S b/src/dynarec/arm64_epilog.S index 4b73803a..c4dd0043 100755 --- a/src/dynarec/arm64_epilog.S +++ b/src/dynarec/arm64_epilog.S @@ -18,6 +18,9 @@ arm64_epilog: stp x22, x23, [x0, (8 * 12)] stp x24, x25, [x0, (8 * 14)] stp x26, x27, [x0, (8 * 16)] // put back reg value in emu, including EIP (so x27 must be EIP now) + // and the 4 first SSE regs too + stp q0, q1, [x0, (8 * 18)] + stp q2, q3, [x0, (8 * 22)] //restore all used register //vpop {d8-d15} ldp x19, x20, [sp, (8 * 0)] diff --git a/src/dynarec/arm64_next.S b/src/dynarec/arm64_next.S index 834c1a89..3baae986 100755 --- a/src/dynarec/arm64_next.S +++ b/src/dynarec/arm64_next.S @@ -12,13 +12,16 @@ arm64_next: // emu is r0 // IP address is r1 - sub sp, sp, (8 * 12) + sub sp, sp, (8 * 12 + 16 * 4) stp x0, x1, [sp, (8 * 0)] stp x10, x11, [sp, (8 * 2)] stp x12, x13, [sp, (8 * 4)] stp x14, x15, [sp, (8 * 6)] stp x16, x17, [sp, (8 * 8)] str x18, [sp, (8 * 10)] + stp q0, q1, [sp, (8 * 12)] + stp q2, q3, [sp, (8 * 16)] + mov x2, lr // "from" is in lr, so put in x2 // call the function bl LinkNext @@ -31,7 +34,9 @@ arm64_next: ldp x14, x15, [sp, (8 * 6)] ldp x16, x17, [sp, (8 * 8)] ldr x18, [sp, (8 * 10)] - add sp, sp, (8 * 12) + ldp q0, q1, [sp, (8 * 12)] + ldp q2, q3, [sp, (8 * 16)] + add sp, sp, (8 * 12 + 16 * 4) // return offset is jump address br x3 diff --git a/src/dynarec/arm64_prolog.S b/src/dynarec/arm64_prolog.S index 21961e5b..5c6a92cd 100755 --- a/src/dynarec/arm64_prolog.S +++ b/src/dynarec/arm64_prolog.S @@ -31,5 +31,8 @@ arm64_prolog: ldp x22, x23, [x0, (8 * 12)] ldp x24, x25, [x0, (8 * 14)] ldp x26, x27, [x0, (8 * 16)] + // grab 4 first SSE regs too + ldp q0, q1, [x0, (8 * 18)] + ldp q2, q3, [x0, (8 * 22)] //jump to function br x1 diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c index df621729..31605840 100755 --- a/src/dynarec/dynarec_arm64_00.c +++ b/src/dynarec/dynarec_arm64_00.c @@ -24,6 +24,8 @@ #include "dynarec_arm64_functions.h" #include "dynarec_arm64_helper.h" +int isSimpleWrapper(wrapper_t fun); + uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) { uint8_t nextop, opcode; @@ -1556,19 +1558,25 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Native Call to %s\n", GetNativeName(GetNativeFnc(ip))); x87_forget(dyn, ninst, x3, x4, 0); sse_purge07cache(dyn, ninst, x3); - GETIP(ip+1); // read the 0xCC - STORE_XEMU_CALL(xRIP); - CALL_S(x64Int3, -1); - LOAD_XEMU_CALL(xRIP); - addr+=8+8; - TABLE64(x3, addr); // expected return address - CMPSx_REG(xRIP, x3); - B_MARK(cNE); - LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); - CBZw_NEXT(w1); - MARK; - LOAD_XEMU_REM(); - jump_to_epilog(dyn, 0, xRIP, ninst); + if(box64_log<2 && isSimpleWrapper(*(wrapper_t*)(addr))) { + //GETIP(ip+3+8+8); // read the 0xCC + call_n(dyn, ninst, *(void**)(addr+8)); + addr+=8+8; + } else { + GETIP(ip+1); // read the 0xCC + STORE_XEMU_CALL(xRIP); + CALL_S(x64Int3, -1); + LOAD_XEMU_CALL(xRIP); + addr+=8+8; + TABLE64(x3, addr); // expected return address + CMPSx_REG(xRIP, x3); + B_MARK(cNE); + LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); + CBZw_NEXT(w1); + MARK; + LOAD_XEMU_REM(); + jump_to_epilog(dyn, 0, xRIP, ninst); + } } } else { #if 1 @@ -1587,6 +1595,15 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin #endif } break; + + case 0xCF: + INST_NAME("IRET"); + SETFLAGS(X_ALL, SF_SET); // Not a hack, EFLAGS are restored + BARRIER(2); + iret_to_epilog(dyn, ninst, rex.w); + *need_epilog = 0; + *ok = 0; + break; case 0xD0: case 0xD2: // TODO: Jump if CL is 0 nextop = F8; @@ -1979,26 +1996,32 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall-1)), dyn->insts[ninst].retn); // calling a native function sse_purge07cache(dyn, ninst, x3); - GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already - STORE_XEMU_CALL(xRIP); - CALL_S(x64Int3, -1); - LOAD_XEMU_CALL(xRIP); - TABLE64(x3, dyn->insts[ninst].natcall); - ADDx_U12(x3, x3, 2+8+8); - CMPSx_REG(xRIP, x3); - B_MARK(cNE); // Not the expected address, exit dynarec block - POP1(xRIP); // pop the return address - if(dyn->insts[ninst].retn) { - ADDx_U12(xRSP, xRSP, dyn->insts[ninst].retn); + if(box64_log<2 && dyn->insts && isSimpleWrapper(*(wrapper_t*)(dyn->insts[ninst].natcall+2))) { + //GETIP(ip+3+8+8); // read the 0xCC + call_n(dyn, ninst, *(void**)(dyn->insts[ninst].natcall+2+8)); + POP1(xRIP); // pop the return address + } else { + GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already + STORE_XEMU_CALL(xRIP); + CALL_S(x64Int3, -1); + LOAD_XEMU_CALL(xRIP); + TABLE64(x3, dyn->insts[ninst].natcall); + ADDx_U12(x3, x3, 2+8+8); + CMPSx_REG(xRIP, x3); + B_MARK(cNE); // Not the expected address, exit dynarec block + POP1(xRIP); // pop the return address + if(dyn->insts[ninst].retn) { + ADDx_U12(xRSP, xRSP, dyn->insts[ninst].retn); + } + TABLE64(x3, addr); + CMPSx_REG(xRIP, x3); + B_MARK(cNE); // Not the expected address again + LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); + CBZw_NEXT(w1); // not quitting, so lets continue + MARK; + LOAD_XEMU_REM(); // load remaining register, has they have changed + jump_to_epilog(dyn, 0, xRIP, ninst); } - TABLE64(x3, addr); - CMPSx_REG(xRIP, x3); - B_MARK(cNE); // Not the expected address again - LDRw_U12(w1, xEmu, offsetof(x64emu_t, quit)); - CBZw_NEXT(w1); // not quitting, so lets continue - MARK; - LOAD_XEMU_REM(); // load remaining register, has they have changed - jump_to_epilog(dyn, 0, xRIP, ninst); break; default: if(ninst && dyn->insts && dyn->insts[ninst-1].x64.set_flags) { diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c index 3365cc6b..add6f1d1 100755 --- a/src/dynarec/dynarec_arm64_helper.c +++ b/src/dynarec/dynarec_arm64_helper.c @@ -450,8 +450,9 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n) BLR(x2); // save LR } -void iret_to_epilog(dynarec_arm_t* dyn, int ninst) +void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits) { + #warning TODO: is64bits MAYUSE(ninst); MESSAGE(LOG_DUMP, "IRet to epilog\n"); // POP IP @@ -480,7 +481,7 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save if(saveflags) { STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); } - fpu_pushcache(dyn, ninst, reg); + fpu_pushcache(dyn, ninst, reg, 0); if(ret!=-2) { STPx_S7_preindex(xEmu, savereg, xSP, -16); // ARM64 stack needs to be 16byte aligned STPx_S7_offset(xRAX, xRCX, xEmu, offsetof(x64emu_t, regs[_AX])); // x9..x15, x16,x17,x18 those needs to be saved by caller @@ -510,13 +511,48 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save GO(R8, R9); #undef GO } - fpu_popcache(dyn, ninst, reg); + fpu_popcache(dyn, ninst, reg, 0); if(saveflags) { LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); } SET_NODF(); } +void call_n(dynarec_arm_t* dyn, int ninst, void* fnc) +{ + MAYUSE(fnc); + STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); + fpu_pushcache(dyn, ninst, x3, 1); + // x9..x15, x16,x17,x18 those needs to be saved by caller + // RDI, RSI, RDX, RCX, R8, R9 are used for function call + STPx_S7_preindex(xEmu, xRBX, xSP, -16); // ARM64 stack needs to be 16byte aligned + STPx_S7_offset(xRSP, xRBP, xEmu, offsetof(x64emu_t, regs[_SP])); + STPx_S7_offset(xRSI, xRDI, xEmu, offsetof(x64emu_t, regs[_SI])); + // prepare regs for native call + MOVx_REG(0, xRDI); + MOVx_REG(x1, xRSI); + MOVx_REG(x2, xRDX); + MOVx_REG(x3, xRCX); + MOVx_REG(x4, xR8); + MOVx_REG(x5, xR9); + // native call + TABLE64(16, (uintptr_t)fnc); // using x16 as scratch regs for call address + BLR(16); + // put return value in x86 regs + MOVx_REG(xRAX, 0); + MOVx_REG(xRDX, x1); + // all done, restore all regs + LDPx_S7_postindex(xEmu, xRBX, xSP, 16); + #define GO(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) + GO(RSP, RBP); + GO(RSI, RDI); + #undef GO + + fpu_popcache(dyn, ninst, x3, 1); + LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); + SET_NODF(); +} + void grab_segdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg, int segment) { (void)addr; @@ -1025,7 +1061,7 @@ static void sse_reset(dynarec_arm_t* dyn, int ninst) (void)ninst; #if STEP > 1 for (int i=0; i<16; ++i) - dyn->ssecache[i] = -1; + dyn->ssecache[i] = (i<4)?i:-1; #else (void)dyn; #endif @@ -1065,7 +1101,7 @@ void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1) (void) ninst; (void)s1; #if STEP > 1 int old = -1; - for (int i=0; i<8; ++i) + for (int i=4; i<8; ++i) if(dyn->ssecache[i]!=-1) { if (old==-1) { MESSAGE(LOG_DUMP, "\tPurge XMM0..7 Cache ------\n"); @@ -1089,7 +1125,7 @@ static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1) (void) ninst; (void)s1; #if STEP > 1 int old = -1; - for (int i=0; i<16; ++i) + for (int i=4; i<16; ++i) if(dyn->ssecache[i]!=-1) { if (old==-1) { MESSAGE(LOG_DUMP, "\tPurge SSE Cache ------\n"); @@ -1121,19 +1157,20 @@ static void sse_reflectcache(dynarec_arm_t* dyn, int ninst, int s1) } #endif -void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1) +void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1, int not03) { (void) ninst; (void)s1; #if STEP > 1 + int start = not03?4:0; // only SSE regs needs to be push back to xEmu int n=0; - for (int i=0; i<16; i++) + for (int i=start; i<16; i++) if(dyn->ssecache[i]!=-1) ++n; if(!n) return; MESSAGE(LOG_DUMP, "\tPush XMM Cache (%d)------\n", n); - for (int i=0; i<16; ++i) + for (int i=start; i<16; ++i) if(dyn->ssecache[i]!=-1) { VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); } @@ -1143,19 +1180,20 @@ void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1) #endif } -void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1) +void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1, int not03) { (void) ninst; (void)s1; #if STEP > 1 + int start = not03?4:0; // only SSE regs needs to be pop back from xEmu int n=0; - for (int i=0; i<16; i++) + for (int i=start; i<16; i++) if(dyn->ssecache[i]!=-1) ++n; if(!n) return; MESSAGE(LOG_DUMP, "\tPop XMM Cache (%d)------\n", n); - for (int i=0; i<16; ++i) + for (int i=start; i<16; ++i) if(dyn->ssecache[i]!=-1) { VLDR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); } diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h index aae92a2d..48bc1f1e 100755 --- a/src/dynarec/dynarec_arm64_helper.h +++ b/src/dynarec/dynarec_arm64_helper.h @@ -635,6 +635,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define retn_to_epilog STEPNAME(retn_to_epilog) #define iret_to_epilog STEPNAME(iret_to_epilog) #define call_c STEPNAME(call_c) +#define call_n STEPNAME(call_n) #define grab_segdata STEPNAME(grab_segdata) #define emit_cmp8 STEPNAME(emit_cmp8) #define emit_cmp16 STEPNAME(emit_cmp16) @@ -751,8 +752,9 @@ void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst); void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst); void ret_to_epilog(dynarec_arm_t* dyn, int ninst); void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n); -void iret_to_epilog(dynarec_arm_t* dyn, int ninst); +void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits); void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); +void call_n(dynarec_arm_t* dyn, int ninst, void* fnc); void grab_segdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg, int segment); void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); void emit_cmp16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); @@ -878,8 +880,8 @@ void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); #ifdef HAVE_TRACE void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3); #endif -void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1); -void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1); +void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1, int not03); +void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1, int not03); uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h index 70d9576b..73fe8a7a 100755 --- a/src/emu/x64emu_private.h +++ b/src/emu/x64emu_private.h @@ -30,7 +30,8 @@ typedef struct x64emu_s { reg64_t regs[16]; x64flags_t eflags; reg64_t ip; - uintptr_t old_ip; + // sse + sse_regs_t xmm[16]; // fpu / mmx mmx87_regs_t mmx87[8]; uint16_t cw,cw_mask_all; @@ -42,8 +43,8 @@ typedef struct x64emu_s { fpu_ld_t fpu_ld[8]; // for long double emulation / 80bits fld fst fpu_ll_t fpu_ll[8]; // for 64bits fild / fist sequence fpu_p_reg_t p_regs[8]; - // sse - sse_regs_t xmm[16]; + // old ip + uintptr_t old_ip; // defered flags int dummy1; // to align on 64bits with df defered_flags_t df; |