diff options
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/dynarec_arm64_00.c | 5 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.c | 29 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.h | 31 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_private.h | 10 |
4 files changed, 64 insertions, 11 deletions
diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c index a5d903af..90ce3874 100755 --- a/src/dynarec/dynarec_arm64_00.c +++ b/src/dynarec/dynarec_arm64_00.c @@ -193,10 +193,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall-1)), dyn->insts[ninst].retn); // calling a native function x87_forget(dyn, ninst, x3, x4, 0); + sse_purge07cache(dyn, ninst, x3); TABLE64(xRIP, dyn->insts[ninst].natcall); // read the 0xCC already - STORE_XEMU_REGS(xRIP); + STORE_XEMU_MINIMUM(xRIP); CALL_S(x64Int3, -1); - LOAD_XEMU_REGS(xRIP); + LOAD_XEMU_MINIMUM(xRIP); TABLE64(x3, dyn->insts[ninst].natcall); ADDx_U12(x3, x3, 2+8+8); CMPSx_REG(xRIP, x3); diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c index c0a084db..2ddb3e99 100755 --- a/src/dynarec/dynarec_arm64_helper.c +++ b/src/dynarec/dynarec_arm64_helper.c @@ -812,7 +812,7 @@ static void mmx_reflectcache(dynarec_arm_t* dyn, int ninst, int s1) static void sse_reset(dynarec_arm_t* dyn, int ninst) { #if STEP > 1 - for (int i=0; i<8; ++i) + for (int i=0; i<16; ++i) dyn->ssecache[i] = -1; #endif } @@ -841,14 +841,35 @@ int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a) return 0; #endif } -// purge the SSE cache only(needs 3 scratch registers) -static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1) +// purge the SSE cache for XMM0..XMM7 (to use before function native call) +void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1) { #if STEP > 1 int old = -1; for (int i=0; i<8; ++i) if(dyn->ssecache[i]!=-1) { if (old==-1) { + MESSAGE(LOG_DUMP, "\tPurge XMM0..7 Cache ------\n"); + ++old; + } + VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); + fpu_free_reg_quad(dyn, dyn->ssecache[i]); + dyn->ssecache[i] = -1; + } + if(old!=-1) { + MESSAGE(LOG_DUMP, "\t------ Purge XMM0..7 Cache\n"); + } +#endif +} + +// purge the SSE cache only +static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1) +{ +#if STEP > 1 + int old = -1; + for (int i=0; i<16; ++i) + if(dyn->ssecache[i]!=-1) { + if (old==-1) { MESSAGE(LOG_DUMP, "\tPurge SSE Cache ------\n"); ++old; } @@ -865,7 +886,7 @@ static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1) static void sse_reflectcache(dynarec_arm_t* dyn, int ninst, int s1) { #if STEP > 1 - for (int i=0; i<8; ++i) + for (int i=0; i<16; ++i) if(dyn->ssecache[i]!=-1) { VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); } diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h index c7f645b0..68021614 100755 --- a/src/dynarec/dynarec_arm64_helper.h +++ b/src/dynarec/dynarec_arm64_helper.h @@ -347,6 +347,34 @@ LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ if(A) {LDRx_U12(A, xEmu, offsetof(x64emu_t, ip));} +#define STORE_XEMU_MINIMUM(A) \ + STORE_REG(RAX); \ + STORE_REG(RCX); \ + STORE_REG(RDX); \ + STORE_REG(RBX); \ + STORE_REG(RSP); \ + STORE_REG(RBP); \ + STORE_REG(RSI); \ + STORE_REG(RDI); \ + STORE_REG(R8); \ + STORE_REG(R9); \ + STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ + if(A) {STRx_U12(A, xEmu, offsetof(x64emu_t, ip));} + +#define LOAD_XEMU_MINIMUM(A) \ + LOAD_REG(RAX); \ + LOAD_REG(RCX); \ + LOAD_REG(RDX); \ + LOAD_REG(RBX); \ + LOAD_REG(RSP); \ + LOAD_REG(RBP); \ + LOAD_REG(RSI); \ + LOAD_REG(RDI); \ + LOAD_REG(R8); \ + LOAD_REG(R9); \ + LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \ + if(A) {LDRx_U12(A, xEmu, offsetof(x64emu_t, ip));} + #define SET_DFNONE(S) if(!dyn->dfnone) {MOVZw(S, d_none); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=1;} #define SET_DF(S, N) if(N) {MOVZw(S, N); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=0;} else SET_DFNONE(S) #define SET_NODF() dyn->dfnone = 0 @@ -535,6 +563,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define mmx_get_reg_empty STEPNAME(mmx_get_reg_empty) #define sse_get_reg STEPNAME(sse_get_reg) #define sse_get_reg_empty STEPNAME(sse_get_reg_empty) +#define sse_purge07cache STEPNAME(sse_purge07cache) #define fpu_pushcache STEPNAME(fpu_pushcache) #define fpu_popcache STEPNAME(fpu_popcache) @@ -669,6 +698,8 @@ int mmx_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a); int sse_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a); // get neon register for a SSE reg, but don't try to synch it if it needed to be created int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a); +// purge the XMM0..XMM7 cache (before function call) +void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1); // common coproc helpers // reset the cache diff --git a/src/dynarec/dynarec_arm64_private.h b/src/dynarec/dynarec_arm64_private.h index c375dd10..cb81b101 100755 --- a/src/dynarec/dynarec_arm64_private.h +++ b/src/dynarec/dynarec_arm64_private.h @@ -30,11 +30,11 @@ typedef struct dynarec_arm_s { uintptr_t arm_start; // start of the arm code int arm_size; // size of emitted arm code int state_flags;// actual state for on-demand flags - int x87cache[8];// cache status for the 8 x87 register behind the fpu stack - int x87reg[8]; // reg used for x87cache entry - int mmxcache[8];// cache status for the 8 MMX registers - int ssecache[8];// cache status for the 8 SSE(2) registers - int fpuused[24];// all 8..31 Q reg from fpu, used by x87, sse and mmx + int8_t x87cache[8];// cache status for the 8 x87 register behind the fpu stack + int8_t x87reg[8]; // reg used for x87cache entry + int8_t mmxcache[8];// cache status for the 8 MMX registers + int8_t ssecache[16];// cache status for the 16 SSE(2) registers + int8_t fpuused[24];// all 8..31 Q reg from fpu, used by x87, sse and mmx int x87stack; // cache stack counter int fpu_scratch;// scratch counter int fpu_extra_qscratch; // some opcode need an extra quad scratch register |