diff options
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64_epilog.S | 12 | ||||
| -rwxr-xr-x | src/dynarec/arm64_prolog.S | 6 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_functions.c | 56 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_functions.h | 24 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.c | 64 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_private.h | 3 |
6 files changed, 83 insertions, 82 deletions
diff --git a/src/dynarec/arm64_epilog.S b/src/dynarec/arm64_epilog.S index eef0d906..4b73803a 100755 --- a/src/dynarec/arm64_epilog.S +++ b/src/dynarec/arm64_epilog.S @@ -25,7 +25,11 @@ arm64_epilog: ldp x23, x24, [sp, (8 * 4)] ldp x25, x26, [sp, (8 * 6)] ldr x27, [sp, (8 * 8)] - add sp, sp, (8 * 10) + ldp d8, d9, [sp, (8 *10)] + ldp d10, d11, [sp, (8 *12)] + ldp d12, d13, [sp, (8 *14)] + ldp d14, d15, [sp, (8 *16)] + add sp, sp, (8 * 18) ldp lr, fp, [sp], 16 // saved lr //end, return... ret @@ -40,7 +44,11 @@ arm64_epilog_fast: ldp x23, x24, [sp, (8 * 4)] ldp x25, x26, [sp, (8 * 6)] ldr x27, [sp, (8 * 8)] - add sp, sp, (8 * 10) + ldp d8, d9, [sp, (8 *10)] + ldp d10, d11, [sp, (8 *12)] + ldp d12, d13, [sp, (8 *14)] + ldp d14, d15, [sp, (8 *16)] + add sp, sp, (8 * 18) ldp lr, fp, [sp], 16 // saved lr //end, return... ret diff --git a/src/dynarec/arm64_prolog.S b/src/dynarec/arm64_prolog.S index a583c081..21961e5b 100755 --- a/src/dynarec/arm64_prolog.S +++ b/src/dynarec/arm64_prolog.S @@ -10,12 +10,16 @@ arm64_prolog: //save all 18 used register stp lr, fp, [sp, -16]! // save lr - sub sp, sp, (8 * 10) + sub sp, sp, (8 * 18) stp x19, x20, [sp, (8 * 0)] stp x21, x22, [sp, (8 * 2)] stp x23, x24, [sp, (8 * 4)] stp x25, x26, [sp, (8 * 6)] str x27, [sp, (8 * 8)] + stp d8, d9, [sp, (8 *10)] + stp d10, d11, [sp, (8 *12)] + stp d12, d13, [sp, (8 *14)] + stp d14, d15, [sp, (8 *16)] //vpush {d8-d15} // save NEON regs? //setup emu -> register ldp x10, x11, [x0, (8 * 0)] diff --git a/src/dynarec/dynarec_arm64_functions.c b/src/dynarec/dynarec_arm64_functions.c index 9ad74131..b6bb5e3c 100755 --- a/src/dynarec/dynarec_arm64_functions.c +++ b/src/dynarec/dynarec_arm64_functions.c @@ -183,68 +183,68 @@ void arm_fprem1(x64emu_t* emu) emu->sw.f.F87_C3 = ((tmp32s>>1)&1); emu->sw.f.F87_C1 = ((tmp32s>>2)&1); } - +#define XMM0 0 +#define XMM8 16 +#define X870 8 +#define EMM0 8 +#define SCRATCH0 24 // Get a FPU single scratch reg int fpu_get_scratch_single(dynarec_arm_t* dyn) { - return dyn->fpu_scratch++; // return an Sx + return SCRATCH0 + dyn->fpu_scratch++; // return an Sx } // Get a FPU double scratch reg int fpu_get_scratch_double(dynarec_arm_t* dyn) { - return dyn->fpu_scratch++; // return an Dx (same as Sx) + return SCRATCH0 + dyn->fpu_scratch++; // return an Dx (same as Sx) } // Get a FPU quad scratch reg int fpu_get_scratch_quad(dynarec_arm_t* dyn) { - return dyn->fpu_scratch++; // return an Qx (same as Dx or Sx) + return SCRATCH0 + dyn->fpu_scratch++; // return an Qx (same as Dx or Sx) } // Reset scratch regs counter void fpu_reset_scratch(dynarec_arm_t* dyn) { dyn->fpu_scratch = 0; - if(dyn->fpu_extra_qscratch) { - fpu_free_reg_quad(dyn, dyn->fpu_extra_qscratch); - dyn->fpu_extra_qscratch = 0; - } } -#define FPUFIRST 8 -// Get a FPU double reg -int fpu_get_reg_double(dynarec_arm_t* dyn) +// Get a x87 double reg +int fpu_get_reg_x87(dynarec_arm_t* dyn) { - // TODO: check upper limit? - int i=0; + int i=X870; while (dyn->fpuused[i]) ++i; dyn->fpuused[i] = 1; - return i+FPUFIRST; // return a Dx + return i; // return a Dx } // Free a FPU double reg -void fpu_free_reg_double(dynarec_arm_t* dyn, int reg) +void fpu_free_reg(dynarec_arm_t* dyn, int reg) { // TODO: check upper limit? - int i=reg-FPUFIRST; - dyn->fpuused[i] = 0; + dyn->fpuused[reg] = 0; } -// Get a FPU quad reg -int fpu_get_reg_quad(dynarec_arm_t* dyn) +// Get an MMX double reg +int fpu_get_reg_emm(dynarec_arm_t* dyn, int emm) { - int i=0; - while (dyn->fpuused[i]) ++i; - dyn->fpuused[i] = 1; - return i+FPUFIRST; // return a Qx, it's the same as Dx on aarch64 + dyn->fpuused[EMM0 + emm] = 1; + return EMM0 + emm; } -// Free a FPU quad reg -void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg) +// Get an XMM quad reg +int fpu_get_reg_xmm(dynarec_arm_t* dyn, int xmm) { - int i=reg-FPUFIRST; - dyn->fpuused[i] = 0; + if(xmm>7) { + dyn->fpuused[XMM8 + xmm - 8] = 1; + return XMM8 + xmm - 8; + } else { + dyn->fpuused[XMM0 + xmm] = 1; + return XMM0 + xmm; + } } // Reset fpu regs counter void fpu_reset_reg(dynarec_arm_t* dyn) { dyn->fpu_reg = 0; - for (int i=0; i<24; ++i) + for (int i=0; i<32; ++i) dyn->fpuused[i]=0; } diff --git a/src/dynarec/dynarec_arm64_functions.h b/src/dynarec/dynarec_arm64_functions.h index d932aa4d..0d6a02a5 100755 --- a/src/dynarec/dynarec_arm64_functions.h +++ b/src/dynarec/dynarec_arm64_functions.h @@ -31,22 +31,18 @@ void arm_fprem1(x64emu_t* emu); void arm_ud(x64emu_t* emu); -// Get an FPU single scratch reg -int fpu_get_scratch_single(dynarec_arm_t* dyn); -// Get an FPU double scratch reg -int fpu_get_scratch_double(dynarec_arm_t* dyn); -// Get an FPU quad scratch reg -int fpu_get_scratch_quad(dynarec_arm_t* dyn); +// Get an FPU scratch reg +int fpu_get_scratch(dynarec_arm_t* dyn); // Reset scratch regs counter void fpu_reset_scratch(dynarec_arm_t* dyn); -// Get an FPU double reg -int fpu_get_reg_double(dynarec_arm_t* dyn); -// Free a FPU double reg -void fpu_free_reg_double(dynarec_arm_t* dyn, int reg); -// Get an FPU quad reg -int fpu_get_reg_quad(dynarec_arm_t* dyn); -// Free a FPU quad reg -void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg); +// Get an x87 double reg +int fpu_get_reg_x87(dynarec_arm_t* dyn); +// Get an MMX double reg +int fpu_get_reg_emm(dynarec_arm_t* dyn, int emm); +// Get an XMM quad reg +int fpu_get_reg_xmm(dynarec_arm_t* dyn, int xmm); +// Free a FPU/MMX/XMM reg +void fpu_free_reg(dynarec_arm_t* dyn, int reg); // Reset fpu regs counter void fpu_reset_reg(dynarec_arm_t* dyn); diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c index efcc1bd4..ae106539 100755 --- a/src/dynarec/dynarec_arm64_helper.c +++ b/src/dynarec/dynarec_arm64_helper.c @@ -461,7 +461,7 @@ int x87_do_push(dynarec_arm_t* dyn, int ninst) ++dyn->x87cache[i]; else if(ret==-1) { dyn->x87cache[i] = 0; - ret=dyn->x87reg[i]=fpu_get_reg_double(dyn); + ret=dyn->x87reg[i]=fpu_get_reg_x87(dyn); } return ret; #else @@ -489,7 +489,7 @@ void x87_do_pop(dynarec_arm_t* dyn, int ninst) if(dyn->x87cache[i]!=-1) { --dyn->x87cache[i]; if(dyn->x87cache[i]==-1) { - fpu_free_reg_double(dyn, dyn->x87reg[i]); + fpu_free_reg(dyn, dyn->x87reg[i]); dyn->x87reg[i] = -1; } } @@ -555,7 +555,7 @@ static void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3 ADDw_U12(s3, s2, dyn->x87cache[i]); ANDw_mask(s3, s3, 0b011111, 1); // (emu->top + st)&7 VSTR64_REG_LSL3(dyn->x87reg[i], s1, s3); - fpu_free_reg_double(dyn, dyn->x87reg[i]); + fpu_free_reg(dyn, dyn->x87reg[i]); dyn->x87reg[i] = -1; dyn->x87cache[i] = -1; } @@ -604,7 +604,7 @@ int x87_get_cache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) ret = i; // found, setup and grab the value dyn->x87cache[ret] = st; - dyn->x87reg[ret] = fpu_get_reg_double(dyn); + dyn->x87reg[ret] = fpu_get_reg_x87(dyn); ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87)); LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); int a = st - dyn->x87stack; @@ -683,7 +683,7 @@ void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2); MESSAGE(LOG_DUMP, "\t--------x87 Cache for ST%d\n", st); // and forget that cache - fpu_free_reg_double(dyn, dyn->x87reg[ret]); + fpu_free_reg(dyn, dyn->x87reg[ret]); dyn->x87cache[ret] = -1; dyn->x87reg[ret] = -1; #endif @@ -720,7 +720,7 @@ void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) ret = i; // found, setup and grab the value dyn->x87cache[ret] = st; - dyn->x87reg[ret] = fpu_get_reg_double(dyn); + dyn->x87reg[ret] = fpu_get_reg_x87(dyn); ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87)); LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); int a = st - dyn->x87stack; @@ -785,7 +785,7 @@ int mmx_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a) #if STEP > 1 if(dyn->mmxcache[a]!=-1) return dyn->mmxcache[a]; - int ret = dyn->mmxcache[a] = fpu_get_reg_double(dyn); + int ret = dyn->mmxcache[a] = fpu_get_reg_emm(dyn, a); VLDR64_U12(ret, xEmu, offsetof(x64emu_t, mmx87[a])); return ret; #else @@ -798,7 +798,7 @@ int mmx_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a) #if STEP > 1 if(dyn->mmxcache[a]!=-1) return dyn->mmxcache[a]; - int ret = dyn->mmxcache[a] = fpu_get_reg_double(dyn); + int ret = dyn->mmxcache[a] = fpu_get_reg_emm(dyn, a); return ret; #else return 0; @@ -816,7 +816,7 @@ static void mmx_purgecache(dynarec_arm_t* dyn, int ninst, int s1) ++old; } VSTR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx87[i])); - fpu_free_reg_double(dyn, dyn->mmxcache[i]); + fpu_free_reg(dyn, dyn->mmxcache[i]); dyn->mmxcache[i] = -1; } if(old!=-1) { @@ -851,7 +851,7 @@ int sse_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a) #if STEP > 1 if(dyn->ssecache[a]!=-1) return dyn->ssecache[a]; - int ret = dyn->ssecache[a] = fpu_get_reg_quad(dyn); + int ret = dyn->ssecache[a] = fpu_get_reg_xmm(dyn, a); VLDR128_U12(ret, xEmu, offsetof(x64emu_t, xmm[a])); return ret; #else @@ -864,7 +864,7 @@ int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a) #if STEP > 1 if(dyn->ssecache[a]!=-1) return dyn->ssecache[a]; - int ret = dyn->ssecache[a] = fpu_get_reg_quad(dyn); + int ret = dyn->ssecache[a] = fpu_get_reg_xmm(dyn, a); return ret; #else return 0; @@ -882,7 +882,7 @@ void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1) ++old; } VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); - fpu_free_reg_quad(dyn, dyn->ssecache[i]); + fpu_free_reg(dyn, dyn->ssecache[i]); dyn->ssecache[i] = -1; } if(old!=-1) { @@ -903,7 +903,7 @@ static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1) ++old; } VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); - fpu_free_reg_quad(dyn, dyn->ssecache[i]); + fpu_free_reg(dyn, dyn->ssecache[i]); dyn->ssecache[i] = -1; } if(old!=-1) { @@ -926,44 +926,38 @@ static void sse_reflectcache(dynarec_arm_t* dyn, int ninst, int s1) void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1) { #if STEP > 1 - // only need to push 16-31... + // only SSE regs needs to be push back to xEmu int n=0; - for (int i=8; i<32; i++) - if(dyn->fpuused[i-8]) + for (int i=0; i<16; i++) + if(dyn->ssecache[i]!=-1) ++n; if(!n) return; - MESSAGE(LOG_DUMP, "\tPush FPU Cache (%d)------\n", n); - SUBx_U12(xSP, xSP, n*16); - MOV_frmSP(s1); - for (int i=8; i<32; ++i) { - if(dyn->fpuused[i-8]) { - VSTR128_S9_postindex(i, s1, 16); + MESSAGE(LOG_DUMP, "\tPush XMM Cache (%d)------\n", n); + for (int i=0; i<16; ++i) + if(dyn->ssecache[i]!=-1) { + VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); } - } - MESSAGE(LOG_DUMP, "\t------- Push FPU Cache (%d)\n", n); + MESSAGE(LOG_DUMP, "\t------- Push XMM Cache (%d)\n", n); #endif } void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1) { #if STEP > 1 - // we need to push 8-31 (because on 8..15 only low part is preserved) + // only SSE regs needs to be pop back from xEmu int n=0; - for (int i=8; i<32; i++) - if(dyn->fpuused[i-8]) + for (int i=16; i<32; i++) + if(dyn->ssecache[i]!=-1) ++n; if(!n) return; - MESSAGE(LOG_DUMP, "\tPop FPU Cache (%d)------\n", n); - MOV_frmSP(s1); - for (int i=8; i<32; ++i) { - if(dyn->fpuused[i-8]) { - VLDR128_S9_postindex(i, s1, 16); + MESSAGE(LOG_DUMP, "\tPop XMM Cache (%d)------\n", n); + for (int i=0; i<16; ++i) + if(dyn->ssecache[i]!=-1) { + VLDR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i])); } - } - ADDx_U12(xSP, xSP, n*16); - MESSAGE(LOG_DUMP, "\t------- Pop FPU Cache (%d)\n", n); + MESSAGE(LOG_DUMP, "\t------- Pop XMM Cache (%d)\n", n); #endif } diff --git a/src/dynarec/dynarec_arm64_private.h b/src/dynarec/dynarec_arm64_private.h index 413e7edf..9cadbc63 100755 --- a/src/dynarec/dynarec_arm64_private.h +++ b/src/dynarec/dynarec_arm64_private.h @@ -35,10 +35,9 @@ typedef struct dynarec_arm_s { int8_t x87reg[8]; // reg used for x87cache entry int8_t mmxcache[8];// cache status for the 8 MMX registers int8_t ssecache[16];// cache status for the 16 SSE(2) registers - int8_t fpuused[24];// all 8..31 Q reg from fpu, used by x87, sse and mmx + int8_t fpuused[32];// all 8..31 Q reg from fpu, used by x87, sse and mmx int x87stack; // cache stack counter int fpu_scratch;// scratch counter - int fpu_extra_qscratch; // some opcode need an extra quad scratch register int fpu_reg; // x87/sse/mmx reg counter int dfnone; // if defered flags is already set to df_none uint64_t *table64; // table of 64bits value |