diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-02-11 16:12:46 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-02-11 16:12:46 +0100 |
| commit | 5e6af3753292b8da43d4fbee186a78f3e5068141 (patch) | |
| tree | 3cd5ec700844143a309d9448bc650f9d35738c91 /src | |
| parent | c0184f926dd98792f313194d3b80a92f4fe1c04a (diff) | |
| download | box64-5e6af3753292b8da43d4fbee186a78f3e5068141.tar.gz box64-5e6af3753292b8da43d4fbee186a78f3e5068141.zip | |
[ARM64_DYNAREC] Improved FFREE handling (fixing gameplay of Serious Sam 2, probably some other game too)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_d9.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_dd.c | 9 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 119 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 3 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 1 |
5 files changed, 115 insertions, 19 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c index f0fa18ac..49ab9fe0 100644 --- a/src/dynarec/arm64/dynarec_arm64_d9.c +++ b/src/dynarec/arm64/dynarec_arm64_d9.c @@ -162,7 +162,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } // load tag ADDx_U12(x1, xEmu, offsetof(x64emu_t, p_regs)); - LDRw_REG_LSL2(x3, x1, x2); + LDRw_REG_LSL2(x3, x1, x4); CMPSw_U12(x3, 0b11); // empty MOV32w(x3, 0b100000100000000); CSELx(x4, x3, x4, cEQ); // empty: C3,C2,C0 = 101 diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c index 42a6f634..e349053c 100644 --- a/src/dynarec/arm64/dynarec_arm64_dd.c +++ b/src/dynarec/arm64/dynarec_arm64_dd.c @@ -52,14 +52,7 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xC7: INST_NAME("FFREE STx"); #if 1 - x87_forget(dyn, ninst, x1, x2, nextop&7); - // empty tags - MOVZw(x3, 0b11); - ADDx_U12(x1, xEmu, offsetof(x64emu_t, p_regs)); - LDRw_U12(x2, xEmu, offsetof(x64emu_t, top)); - ADDw_U12(x2, x2, nextop&7); - ANDw_mask(x2, x2, 0, 2); // mask=7 - STRw_REG_LSL2(x3, x1, x2); + x87_free(dyn, ninst, x1, x2, x3, nextop&7); #else MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_purgecache(dyn, ninst, 0, x1, x2, x3); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 88008fe8..efeb883e 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -853,8 +853,10 @@ void grab_segdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg, int se // x87 stuffs static void x87_reset(dynarec_arm_t* dyn) { - for (int i=0; i<8; ++i) + for (int i=0; i<8; ++i) { dyn->n.x87cache[i] = -1; + dyn->n.freed[i] = -1; + } dyn->n.x87stack = 0; dyn->n.stack = 0; dyn->n.stack_next = 0; @@ -967,7 +969,9 @@ int x87_do_push(dynarec_arm_t* dyn, int ninst, int s1, int t) ||(dyn->n.neoncache[j].t == NEON_CACHE_ST_I64)) ++dyn->n.neoncache[j].n; int ret = -1; - for(int i=0; i<8; ++i) + for(int i=0; i<8; ++i) { + if(dyn->n.freed[i]!=-1) + ++dyn->n.freed[i]; if(dyn->n.x87cache[i]!=-1) ++dyn->n.x87cache[i]; else if(ret==-1) { @@ -975,6 +979,7 @@ int x87_do_push(dynarec_arm_t* dyn, int ninst, int s1, int t) ret=dyn->n.x87reg[i]=fpu_get_reg_x87(dyn, t, 0); dyn->n.neoncache[ret].t = X87_ST0; } + } return ret; } void x87_do_push_empty(dynarec_arm_t* dyn, int ninst, int s1) @@ -991,20 +996,17 @@ void x87_do_push_empty(dynarec_arm_t* dyn, int ninst, int s1) ||(dyn->n.neoncache[j].t == NEON_CACHE_ST_F) ||(dyn->n.neoncache[j].t == NEON_CACHE_ST_I64)) ++dyn->n.neoncache[j].n; - for(int i=0; i<8; ++i) + for(int i=0; i<8; ++i) { + if(dyn->n.freed[i]!=-1) + ++dyn->n.freed[i]; if(dyn->n.x87cache[i]!=-1) ++dyn->n.x87cache[i]; + } if(s1) x87_stackcount(dyn, ninst, s1); } -void x87_do_pop(dynarec_arm_t* dyn, int ninst, int s1) +void static internal_x87_dopop(dynarec_arm_t* dyn) { - if(dyn->n.mmxcount) - mmx_purgecache(dyn, ninst, 0, s1); - dyn->n.x87stack-=1; - dyn->n.stack_next-=1; - dyn->n.stack_pop+=1; - // move all regs in cache, poping ST0 for(int i=0; i<8; ++i) if(dyn->n.x87cache[i]!=-1) { --dyn->n.x87cache[i]; @@ -1014,6 +1016,32 @@ void x87_do_pop(dynarec_arm_t* dyn, int ninst, int s1) } } } +int static internal_x87_dofree(dynarec_arm_t* dyn) +{ + int ret = 0; + for(int i=0; i<8; ++i) + if(dyn->n.freed[i]!=-1) { + --dyn->n.freed[i]; + if(dyn->n.freed[i]<=0) { + MESSAGE(LOG_DUMP, "\t--------x87 FREED ST0, poping 1 more\n"); + dyn->n.freed[i] = -1; + ret = 1; + } + } + return ret; +} +void x87_do_pop(dynarec_arm_t* dyn, int ninst, int s1) +{ + if(dyn->n.mmxcount) + mmx_purgecache(dyn, ninst, 0, s1); + do { + dyn->n.x87stack-=1; + dyn->n.stack_next-=1; + dyn->n.stack_pop+=1; + // move all regs in cache, poping ST0 + internal_x87_dopop(dyn); + } while(internal_x87_dofree(dyn)); +} static int x87_is_stcached(dynarec_arm_t* dyn, int st) { for (int i=0; i<8; ++i) @@ -1110,6 +1138,8 @@ void x87_purgecache(dynarec_arm_t* dyn, int ninst, int next, int s1, int s2, int } if(!next) { dyn->n.stack_next = 0; + for(int i=0; i<8; ++i) + dyn->n.freed[i] = -1; #if STEP < 2 // refresh the cached valued, in case it's a purge outside a instruction dyn->insts[ninst].n.barrier = 1; @@ -1359,6 +1389,75 @@ void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st); } +void x87_free(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int st) +{ + int ret = -1; + for (int i=0; (i<8) && (ret==-1); ++i) + if(dyn->n.x87cache[i] == st) + ret = i; + MESSAGE(LOG_DUMP, "\tFFREE%s x87 Cache for ST%d\n", (ret!=-1)?" (and Forget)":"", st); + if(ret!=-1) { + const int reg = dyn->n.x87reg[ret]; + #if STEP == 1 + if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_F || dyn->n.neoncache[reg].t==NEON_CACHE_ST_I64) + neoncache_promote_double(dyn, ninst, st); + #endif + // prepare offset to fpu => s1 + ADDx_U12(s1, xEmu, offsetof(x64emu_t, x87)); + // Get top + LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); + // Update + int ast = st - dyn->n.x87stack; + if(ast) { + if(ast>0) { + ADDw_U12(s2, s2, ast); + } else { + SUBw_U12(s2, s2, -ast); + } + ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 + } + if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_F) { + FCVT_D_S(31, reg); + VSTR64_REG_LSL3(31, s1, s2); + } else if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_I64) { + SCVTFDD(31, reg); + VSTR64_REG_LSL3(31, s1, s2); + } else { + VSTR64_REG_LSL3(reg, s1, s2); + } + // and forget that cache + fpu_free_reg(dyn, reg); + dyn->n.neoncache[reg].v = 0; + dyn->n.x87cache[ret] = -1; + dyn->n.x87reg[ret] = -1; + } else { + // Get top + LDRw_U12(s2, xEmu, offsetof(x64emu_t, top)); + // Update + int ast = st - dyn->n.x87stack; + if(ast) { + if(ast>0) { + ADDw_U12(s2, s2, ast); + } else { + SUBw_U12(s2, s2, -ast); + } + ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 + } + } + // mark as free + ADDx_U12(s1, xEmu, offsetof(x64emu_t, p_regs)); + MOVZw(s3, 0b11); + STRw_REG_LSL2(s3, s1, s2); + // add mark in the freed array + for(int i=0; i<8; ++i) + if(dyn->n.freed[i]==-1) { + dyn->n.freed[i]=st; + MESSAGE(LOG_DUMP, "\t--------x87 Marked ST%d as Freed\n", st); + break; + } + MESSAGE(LOG_DUMP, "\t--------x87 FFREE for ST%d\n", st); +} + void x87_swapreg(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a, int b) { int i1, i2, i3; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 918b8b2a..fa7c63b0 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -1090,6 +1090,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define x87_get_neoncache STEPNAME(x87_get_neoncache) #define x87_get_st STEPNAME(x87_get_st) #define x87_get_st_empty STEPNAME(x87_get_st) +#define x87_free STEPNAME(x87_free) #define x87_forget STEPNAME(x87_forget) #define x87_reget_st STEPNAME(x87_reget_st) #define x87_stackcount STEPNAME(x87_stackcount) @@ -1253,6 +1254,8 @@ int x87_get_neoncache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a); int x87_get_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a, int t); // get vfpu register for a x87 reg, create the entry if needed. Do not fetch the Stx if not already in cache int x87_get_st_empty(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a, int t); +// Free st, using the FFREE opcode (so it's freed but stack is not moved) +void x87_free(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int st); // refresh a value from the cache ->emu and then forget the cache (nothing done if value is not cached) void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st); // refresh the cache value from emu diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 1a7b387e..f7e7b008 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -46,6 +46,7 @@ typedef struct neoncache_s { // fpu cache int8_t x87cache[8]; // cache status for the 8 x87 register behind the fpu stack int8_t x87reg[8]; // reg used for x87cache entry + int8_t freed[8]; // set when FFREE is used, -1 else int8_t mmxcache[8]; // cache status for the 8 MMX registers sse_cache_t ssecache[16]; // cache status for the 16 SSE(2) registers int8_t fpuused[24]; // all 0..24 double reg from fpu, used by x87, sse and mmx |