diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-03-03 01:10:44 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-03-02 18:10:44 +0100 |
| commit | a86f5972398dcebcfd718fccc1a956d7045c503b (patch) | |
| tree | 00abf5c0acd76f7f3e2c2cd475d8986b38ba63ca | |
| parent | b72d3a77e91b4a80576c97b3da5cae34ef072c2d (diff) | |
| download | box64-a86f5972398dcebcfd718fccc1a956d7045c503b.tar.gz box64-a86f5972398dcebcfd718fccc1a956d7045c503b.zip | |
[LA64_DYNAREC] Added CC native call support, fixed call_c (#1312)
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 50 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 50 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 81 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_epilog.S | 8 |
4 files changed, 163 insertions, 26 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index f4562f4e..7a5eb0f6 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -314,6 +314,56 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni *need_epilog = 0; *ok = 0; break; + case 0xCC: + SETFLAGS(X_ALL, SF_SET); + SKIPTEST(x1); + if (PK(0) == 'S' && PK(1) == 'C') { + addr += 2; + BARRIER(BARRIER_FLOAT); + INST_NAME("Special Box64 instruction"); + if (PK64(0) == 0) { + addr += 8; + MESSAGE(LOG_DEBUG, "Exit x64 Emu\n"); + MOV64x(x1, 1); + ST_W(x1, xEmu, offsetof(x64emu_t, quit)); + *ok = 0; + *need_epilog = 1; + } else { + MESSAGE(LOG_DUMP, "Native Call to %s\n", GetNativeName(GetNativeFnc(ip))); + x87_forget(dyn, ninst, x3, x4, 0); + sse_purge07cache(dyn, ninst, x3); + + // FIXME: Even the basic support of isSimpleWrapper is disabled for now. + + GETIP(ip + 1); // read the 0xCC + STORE_XEMU_CALL(x3); + ADDI_D(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip + CALL_S(x64Int3, -1); + LOAD_XEMU_CALL(); + addr += 8 + 8; + TABLE64(x3, addr); // expected return address + BNE_MARK(xRIP, x3); + LD_W(w1, xEmu, offsetof(x64emu_t, quit)); + CBZ_NEXT(w1); + MARK; + jump_to_epilog_fast(dyn, 0, xRIP, ninst); + } + } else { + if (!box64_ignoreint3) { + INST_NAME("INT 3"); + // check if TRAP signal is handled + LD_D(x1, xEmu, offsetof(x64emu_t, context)); + MOV64x(x2, offsetof(box64context_t, signals[SIGTRAP])); + ADD_D(x2, x2, x1); + LD_D(x3, x2, 0); + CBZ_NEXT(x3); + GETIP(ip); + STORE_XEMU_CALL(x3); + CALL(native_int3, -1); + LOAD_XEMU_CALL(); + } + } + break; case 0xFF: nextop = F8; switch ((nextop >> 3) & 7) { diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index ba18e76d..f8cd8de1 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -340,6 +340,25 @@ void jump_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst) BR(x2); } +void jump_to_epilog_fast(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst) +{ + MAYUSE(dyn); + MAYUSE(ip); + MAYUSE(ninst); + MESSAGE(LOG_DUMP, "Jump to epilog\n"); + + if (reg) { + if (reg != xRIP) { + MV(xRIP, reg); + } + } else { + GETIP_(ip); + } + TABLE64(x2, (uintptr_t)la64_epilog_fast); + SMEND(); + BR(x2); +} + void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits) { MAYUSE(dyn); @@ -465,14 +484,15 @@ void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav ADDI_D(xSP, xSP, -16); // RV64 stack needs to be 16byte aligned ST_D(xEmu, xSP, 0); ST_D(savereg, xSP, 8); - // x5..x8, x10..x17, x28..x31 those needs to be saved by caller + // $r4..$r20 needs to be saved by caller STORE_REG(RAX); STORE_REG(RCX); STORE_REG(RDX); - STORE_REG(R12); - STORE_REG(R13); - STORE_REG(R14); - STORE_REG(R15); + STORE_REG(RBX); + STORE_REG(RSP); + STORE_REG(RBP); + STORE_REG(RSI); + STORE_REG(RDI); ST_D(xRIP, xEmu, offsetof(x64emu_t, ip)); } TABLE64(reg, (uintptr_t)fnc); @@ -489,10 +509,11 @@ void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav GO(RAX); GO(RCX); GO(RDX); - GO(R12); - GO(R13); - GO(R14); - GO(R15); + GO(RBX); + GO(RSP); + GO(RBP); + GO(RSI); + GO(RDI); if (ret != xRIP) LD_D(xRIP, xEmu, offsetof(x64emu_t, ip)); #undef GO @@ -509,6 +530,17 @@ void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav dyn->last_ip = 0; } +void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st) +{ + // TODO +} + +// purge the SSE cache for XMM0..XMM7 (to use before function native call) +void sse_purge07cache(dynarec_la64_t* dyn, int ninst, int s1) +{ + // TODO +} + void fpu_pushcache(dynarec_la64_t* dyn, int ninst, int s1, int not07) { // TODO diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index f790ca9d..9bd7ef04 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -222,6 +222,22 @@ #define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size #define GETMARKLOCK dyn->insts[ninst].marklock +#define Bxx_gen(OP, M, reg1, reg2) \ + j64 = GET##M - dyn->native_size; \ + B##OP(reg1, reg2, j64) + +#define BxxZ_gen(OP, M, reg1, reg2) \ + j64 = GET##M - dyn->native_size; \ + B##OP##Z(reg1, j64) + +// Branch to MARK if reg1!=reg2 (use j64) +#define BNE_MARK(reg1, reg2) Bxx_gen(NE, MARK, reg1, reg2) + +// Branch to NEXT if reg1==0 (use j64) +#define CBZ_NEXT(reg1) \ + j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \ + BEQZ(reg1, j64) + #define IFX(A) if ((dyn->insts[ninst].x64.gen_flags & (A))) #define IFX_PENDOR0 if ((dyn->insts[ninst].x64.gen_flags & (X_PEND) || !dyn->insts[ninst].x64.gen_flags)) #define IFXX(A) if ((dyn->insts[ninst].x64.gen_flags == (A))) @@ -231,6 +247,22 @@ #define STORE_REG(A) ST_D(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define LOAD_REG(A) LD_D(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) +// Need to also store current value of some register, as they may be used by functions like setjmp +#define STORE_XEMU_CALL(s0) \ + STORE_REG(RBX); \ + STORE_REG(RDX); \ + STORE_REG(RSP); \ + STORE_REG(RBP); \ + STORE_REG(RDI); \ + STORE_REG(RSI); \ + STORE_REG(R8); \ + STORE_REG(R9); \ + STORE_REG(R10); \ + STORE_REG(R11); + +#define LOAD_XEMU_CALL() + + #define SET_DFNONE() \ if (!dyn->f.dfnone) { \ ST_W(xZR, xEmu, offsetof(x64emu_t, df)); \ @@ -389,6 +421,7 @@ #define MODREG ((nextop & 0xC0) == 0xC0) void la64_epilog(void); +void la64_epilog_fast(void); void* la64_next(x64emu_t* emu, uintptr_t addr); #ifndef STEPNAME @@ -401,24 +434,29 @@ void* la64_next(x64emu_t* emu, uintptr_t addr); #define dynarec64_00 STEPNAME(dynarec64_00) -#define geted STEPNAME(geted) -#define geted32 STEPNAME(geted32) -#define jump_to_epilog STEPNAME(jump_to_epilog) -#define jump_to_next STEPNAME(jump_to_next) -#define ret_to_epilog STEPNAME(ret_to_epilog) -#define call_c STEPNAME(call_c) -#define emit_test32 STEPNAME(emit_test32) -#define emit_add32 STEPNAME(emit_add32) -#define emit_add32c STEPNAME(emit_add32c) -#define emit_add8 STEPNAME(emit_add8) -#define emit_add8c STEPNAME(emit_add8c) -#define emit_sub32 STEPNAME(emit_sub32) -#define emit_sub32c STEPNAME(emit_sub32c) -#define emit_sub8 STEPNAME(emit_sub8) -#define emit_sub8c STEPNAME(emit_sub8c) +#define geted STEPNAME(geted) +#define geted32 STEPNAME(geted32) +#define jump_to_epilog STEPNAME(jump_to_epilog) +#define jump_to_epilog_fast STEPNAME(jump_to_epilog_fast) +#define jump_to_next STEPNAME(jump_to_next) +#define ret_to_epilog STEPNAME(ret_to_epilog) +#define call_c STEPNAME(call_c) +#define emit_test32 STEPNAME(emit_test32) +#define emit_add32 STEPNAME(emit_add32) +#define emit_add32c STEPNAME(emit_add32c) +#define emit_add8 STEPNAME(emit_add8) +#define emit_add8c STEPNAME(emit_add8c) +#define emit_sub32 STEPNAME(emit_sub32) +#define emit_sub32c STEPNAME(emit_sub32c) +#define emit_sub8 STEPNAME(emit_sub8) +#define emit_sub8c STEPNAME(emit_sub8c) #define emit_pf STEPNAME(emit_pf) + +#define x87_forget STEPNAME(x87_forget) +#define sse_purge07cache STEPNAME(sse_purge07cache) + #define fpu_pushcache STEPNAME(fpu_pushcache) #define fpu_popcache STEPNAME(fpu_popcache) #define fpu_reset_cache STEPNAME(fpu_reset_cache) @@ -437,6 +475,7 @@ uintptr_t geted32(dynarec_la64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop // generic x64 helper void jump_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst); +void jump_to_epilog_fast(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst); void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits); void ret_to_epilog(dynarec_la64_t* dyn, int ninst, rex_t rex); void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); @@ -464,6 +503,13 @@ void fpu_unreflectcache(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3); void fpu_pushcache(dynarec_la64_t* dyn, int ninst, int s1, int not07); void fpu_popcache(dynarec_la64_t* dyn, int ninst, int s1, int not07); +// refresh a value from the cache ->emu and then forget the cache (nothing done if value is not cached) +void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st); + +// SSE/SSE2 helpers +// purge the XMM0..XMM7 cache (before function call) +void sse_purge07cache(dynarec_la64_t* dyn, int ninst, int s1); + void CacheTransform(dynarec_la64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3); #if STEP < 2 @@ -572,6 +618,11 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ST_W(xZR, xEmu, offsetof(x64emu_t, test.clean)); \ } +#define SKIPTEST(s1) \ + if (box64_dynarec_test) { \ + ST_W(xZR, xEmu, offsetof(x64emu_t, test.clean)); \ + } + #define GOTEST(s1, s2) \ if (box64_dynarec_test) { \ MOV32w(s2, 1); \ diff --git a/src/dynarec/la64/la64_epilog.S b/src/dynarec/la64/la64_epilog.S index 97f5e9ac..14f5dc4e 100644 --- a/src/dynarec/la64/la64_epilog.S +++ b/src/dynarec/la64/la64_epilog.S @@ -7,8 +7,10 @@ .align 4 .global la64_epilog +.global la64_epilog_fast + la64_epilog: - //update register -> emu + // update register -> emu st.d $r12, $r4, (8 * 0) st.d $r13, $r4, (8 * 1) st.d $r14, $r4, (8 * 2) @@ -26,7 +28,9 @@ la64_epilog: st.d $r29, $r4, (8 * 14) st.d $r30, $r4, (8 * 15) st.d $r31, $r4, (8 * 16) // xFlags - st.d $r20, $r4, (8 * 17) // put back reg value in emu, including EIP (so x27 must be EIP now) + st.d $r20, $r4, (8 * 17) // put back reg value in emu, including EIP (so $r20 must be EIP now) + // fallback to epilog_fast now, just restoring saved regs +la64_epilog_fast: addi.d $sp, $r22, 0 // restore save sp from xSavedSP // restore all used register ld.d $r1, $sp, (8 * 0) // load ra |