diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-09 14:51:41 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-09 14:51:41 +0200 |
| commit | 06df8c7bede707331777b88c2fa78e2056e8193d (patch) | |
| tree | 6201f082228362057657858086c695b2b4b2431a /src | |
| parent | 950241d52f7fb777b8afa7a253c88f3de65530b8 (diff) | |
| download | box64-06df8c7bede707331777b88c2fa78e2056e8193d.tar.gz box64-06df8c7bede707331777b88c2fa78e2056e8193d.zip | |
[DYNAREC] Another fix for YMM Zero'd upper reg tracking
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.c | 10 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 24 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass0.h | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 2 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native.c | 48 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_pass0.h | 4 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_private.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass0.h | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 2 |
9 files changed, 55 insertions, 45 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index 269197b3..468b11d5 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -407,7 +407,7 @@ int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) { if(!i2) { // just purge if(dyn->insts[ninst].n.stack_next) return 1; - if(dyn->insts[ninst].ymm_zero) + if(dyn->insts[ninst].ymm0_out) return 1; for(int i=0; i<32 && !ret; ++i) if(dyn->insts[ninst].n.neoncache[i].v) { // there is something at ninst for i @@ -424,7 +424,7 @@ int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) { if(dyn->insts[ninst].n.stack_next != dyn->insts[i2].n.stack-dyn->insts[i2].n.stack_push) { return 1; } - if(dyn->insts[ninst].ymm_zero && (dyn->insts[ninst].ymm_zero&~dyn->insts[i2].ymm_zero)) + if(dyn->insts[ninst].ymm0_out && (dyn->insts[ninst].ymm0_out&~dyn->insts[i2].ymm0_in)) return 1; neoncache_t cache_i2 = dyn->insts[i2].n; neoncacheUnwind(&cache_i2); @@ -673,12 +673,12 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r dynarec_log(LOG_NONE, " V%d:%s", ii, getCacheName(dyn->n.neoncache[ii].t, dyn->n.neoncache[ii].n)); dynarec_log(LOG_NONE, "->%s", getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); } - dynarec_log(LOG_NONE, ")%s", (box64_dynarec_dump>1)?"\e[32m":""); + dynarec_log(LOG_NONE, ")%s", (box64_dynarec_dump>1)?"\e[0;32m":""); } if(dyn->insts[ninst].n.ymm_used) dynarec_log(LOG_NONE, " ymmUsed=%04x", dyn->insts[ninst].n.ymm_used); - if(dyn->ymm_zero || dyn->insts[ninst].ymm0_add || dyn->insts[ninst].ymm0_sub) - dynarec_log(LOG_NONE, " ymm0=%04x(+%04x-%04x)", dyn->ymm_zero, dyn->insts[ninst].ymm0_add ,dyn->insts[ninst].ymm0_sub); + if(dyn->ymm_zero || dyn->insts[ninst].ymm0_add || dyn->insts[ninst].ymm0_sub || dyn->insts[ninst].ymm0_out) + dynarec_log(LOG_NONE, " ymm0=(%04x/%04x+%04x-%04x=%04x)", dyn->ymm_zero, dyn->insts[ninst].ymm0_in, dyn->insts[ninst].ymm0_add ,dyn->insts[ninst].ymm0_sub, dyn->insts[ninst].ymm0_out); if(dyn->insts[ninst].purge_ymm) dynarec_log(LOG_NONE, " purgeYmm=%04x", dyn->insts[ninst].purge_ymm); if(dyn->mmx87 || dyn->scratchs) diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 4eedadd1..42f09b21 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -1797,19 +1797,24 @@ int ymm_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a, int forwrite, int if((dyn->n.neoncache[i].t==NEON_CACHE_YMMR || dyn->n.neoncache[i].t==NEON_CACHE_YMMW) && dyn->n.neoncache[i].n==a) { if(forwrite) { dyn->n.neoncache[i].t = NEON_CACHE_YMMW; - dyn->ymm_zero&=~(1<<a); } + dyn->ymm_zero&=~(1<<a); + #if STEP == 0 + dyn->insts[ninst].ymm0_sub |= (1<<a); + #endif return i; } // nope, grab a new one int ret = fpu_get_reg_ymm(dyn, ninst, forwrite?NEON_CACHE_YMMW:NEON_CACHE_YMMR, a, k1, k2, k3); if(dyn->ymm_zero&(1<<a)) { VEORQ(ret, ret, ret); - if(forwrite) - dyn->ymm_zero&=~(1<<a); + dyn->ymm_zero&=~(1<<a); } else { VLDR128_U12(ret, xEmu, offsetof(x64emu_t, ymm[a])); } + #if STEP == 0 + dyn->insts[ninst].ymm0_sub |= (1<<a); + #endif return ret; } // get neon register for a YMM reg, but don't try to synch it if it needed to be created @@ -1820,12 +1825,18 @@ int ymm_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a, int k1, int if((dyn->n.neoncache[i].t==NEON_CACHE_YMMR || dyn->n.neoncache[i].t==NEON_CACHE_YMMW) && dyn->n.neoncache[i].n==a) { dyn->n.neoncache[i].t = NEON_CACHE_YMMW; dyn->ymm_zero&=~(1<<a); + #if STEP == 0 + dyn->insts[ninst].ymm0_sub |= (1<<a); + #endif return i; } // nope, grab a new one int ret = fpu_get_reg_ymm(dyn, ninst, NEON_CACHE_YMMW, a, k1, k2, k3); if(dyn->ymm_zero&(1<<a)) dyn->ymm_zero&=~(1<<a); + #if STEP == 0 + dyn->insts[ninst].ymm0_sub |= (1<<a); + #endif return ret; } @@ -1842,6 +1853,9 @@ void ymm_mark_zero(dynarec_arm_t* dyn, int ninst, int a) } dyn->n.neoncache[i].v = 0; // forget it! } + #if STEP == 0 + dyn->insts[ninst].ymm0_add |= (1<<a); + #endif avx_mark_zero(dyn, ninst, a); } @@ -2457,10 +2471,10 @@ void fpu_reset_cache(dynarec_arm_t* dyn, int ninst, int reset_n) #if STEP > 1 // for STEP 2 & 3, just need to refrest with current, and undo the changes (push & swap) dyn->n = dyn->insts[ninst].n; - dyn->ymm_zero = dyn->insts[ninst].ymm_zero; + dyn->ymm_zero = dyn->insts[ninst].ymm0_out; #else dyn->n = dyn->insts[reset_n].n; - dyn->ymm_zero = dyn->insts[reset_n].ymm_zero; + dyn->ymm_zero = dyn->insts[reset_n].ymm0_out; #endif neoncacheUnwind(&dyn->n); #if STEP == 0 diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index 8b2fc6fb..510dd4ab 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -26,14 +26,12 @@ dyn->n.combined1 = dyn->n.combined2 = 0;\ dyn->n.swapped = 0; dyn->n.barrier = 0; \ dyn->insts[ninst].f_entry = dyn->f; \ - dyn->insts[ninst].ymm_zero = dyn->ymm_zero;\ + dyn->insts[ninst].ymm0_in = dyn->ymm_zero;\ if(ninst) {dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr;} #define INST_EPILOG \ dyn->insts[ninst].f_exit = dyn->f; \ dyn->insts[ninst].n = dyn->n; \ - dyn->insts[ninst].ymm0_add = dyn->ymm_zero&~dyn->insts[ninst].ymm_zero; \ - dyn->insts[ninst].ymm0_sub = dyn->insts[ninst].ymm_zero&~dyn->ymm_zero; \ dyn->insts[ninst].ymm0_out = dyn->ymm_zero;\ dyn->insts[ninst].x64.has_next = (ok>0)?1:0; #define INST_NAME(name) diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index db205a3b..7ce3f1c4 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -81,8 +81,8 @@ typedef struct instruction_arm64_s { int pass2choice;// value for choices that are fixed on pass2 for pass3 uintptr_t natcall; uint16_t retn; - uint16_t ymm_zero; // bitmap of ymm to zero at purge uint16_t purge_ymm; // need to purge some ymm + uint16_t ymm0_in; // bitmap of ymm to zero at purge uint16_t ymm0_add; // the ymm0 added by the opcode uint16_t ymm0_sub; // the ymm0 removed by the opcode uint16_t ymm0_out; // the ymmm0 at th end of the opcode diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index 161e577e..f3e60590 100644 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -400,27 +400,29 @@ static int updateNeed(dynarec_native_t* dyn, int ninst, uint8_t need) { return ninst; } -// ypdate Ymm0 and Purge_ymm0. -static int updateYmm0(dynarec_native_t* dyn, int ninst, uint16_t mask) { - while (ninst<dyn->size) { - uint16_t ymm0 = mask&~dyn->insts[ninst].purge_ymm; // current ymm0 - uint16_t to_purge = dyn->insts[ninst].ymm_zero & ~ymm0; // the new to purge - uint16_t ymm0_out = (mask|dyn->insts[ninst].ymm0_add)&~dyn->insts[ninst].ymm0_sub; // ymm0 at the output - //check if need to recurse further - int ok = (ymm0==dyn->insts[ninst].ymm_zero) && (!to_purge) && (ymm0_out==dyn->insts[ninst].ymm0_out); - if(ok && dyn->insts[ninst].x64.has_next) - ok = (dyn->insts[ninst+1].ymm_zero==(ymm0_out&~dyn->insts[ninst+1].purge_ymm)); - if(ok && dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1) - ok = (dyn->insts[dyn->insts[ninst].x64.jmp_insts].ymm_zero==(ymm0_out&~dyn->insts[dyn->insts[ninst].x64.jmp_insts].purge_ymm)); - if(ok) - return ninst+1; - dyn->insts[ninst].ymm_zero = ymm0; - dyn->insts[ninst].purge_ymm |= to_purge; - dyn->insts[ninst].ymm0_out = ymm0_out; - if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1) - updateYmm0(dyn, dyn->insts[ninst].x64.jmp_insts, ymm0_out); - if(!dyn->insts[ninst].x64.has_next) - return ninst+1; +// update Ymm0 and Purge_ymm0. +static int updateYmm0(dynarec_native_t* dyn, int ninst) { + int ok = 1; + while (ok && ninst<dyn->size) { + uint16_t ymm0 = dyn->insts[ninst].ymm0_in; // entry ymm0 + ymm0&=~dyn->insts[ninst].purge_ymm; // entry after purge + uint16_t ymm0_out = (ymm0|dyn->insts[ninst].ymm0_add)&~dyn->insts[ninst].ymm0_sub; // ymm0 after the opcode + ok = dyn->insts[ninst].x64.has_next; // continue? + if(ok) ok = (dyn->insts[ninst].ymm0_in!=ymm0) || (dyn->insts[ninst+1].ymm0_in!=ymm0_out); // continue if there has been any change... + if(ok) dyn->insts[ninst+1].ymm0_in=ymm0_out; // make the change + dyn->insts[ninst].ymm0_out = ymm0_out; // update ymm0_out + dyn->insts[ninst].ymm0_in = ymm0; // write purged ymm0, as it's done at the entry + int jmp = (dyn->insts[ninst].x64.jmp)?dyn->insts[ninst].x64.jmp_insts:-1; + if(jmp!=-1) { + // check if a purge is needed at jump point + ymm0_out&=~dyn->insts[jmp].purge_ymm; + uint16_t ymm0_jmp = dyn->insts[jmp].ymm0_in; + uint16_t to_purge = ymm0_jmp&~ymm0_out; // if there are too many ymm0 at jump point + if(to_purge) { + dyn->insts[jmp].purge_ymm|=to_purge; + updateYmm0(dyn, jmp); + } + } ++ninst; } return ninst; @@ -600,14 +602,14 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit pos = updateNeed(&helper, pos, 0); pos = 0; while(pos<helper.size) - pos = updateYmm0(&helper, pos, helper.insts[pos].ymm_zero); + pos = updateYmm0(&helper, pos); // remove fpu stuff on non-executed code for(int i=1; i<helper.size-1; ++i) if(!helper.insts[i].pred_sz) { int ii = i; while(ii<helper.size && !helper.insts[ii].pred_sz) { fpu_reset_ninst(&helper, ii++); - helper.insts[ii].ymm0_sub = helper.insts[ii].ymm0_add = helper.insts[ii].ymm0_out = helper.insts[ii].purge_ymm = 0; + helper.insts[ii].ymm0_in = helper.insts[ii].ymm0_sub = helper.insts[ii].ymm0_add = helper.insts[ii].ymm0_out = helper.insts[ii].purge_ymm = 0; } i = ii; } diff --git a/src/dynarec/la64/dynarec_la64_pass0.h b/src/dynarec/la64/dynarec_la64_pass0.h index 62f65853..0ea40a86 100644 --- a/src/dynarec/la64/dynarec_la64_pass0.h +++ b/src/dynarec/la64/dynarec_la64_pass0.h @@ -30,14 +30,12 @@ dyn->lsx.combined1 = dyn->lsx.combined2 = 0; \ dyn->lsx.swapped = 0; \ dyn->lsx.barrier = 0; \ - dyn->insts[ninst].ymm_zero = dyn->ymm_zero; \ + dyn->insts[ninst].ymm0_in = dyn->ymm_zero; \ dyn->insts[ninst].f_entry = dyn->f; \ if (ninst) { dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr; } #define INST_EPILOG \ dyn->insts[ninst].f_exit = dyn->f; \ dyn->insts[ninst].lsx = dyn->lsx; \ - dyn->insts[ninst].ymm0_add = dyn->ymm_zero&~dyn->insts[ninst].ymm_zero; \ - dyn->insts[ninst].ymm0_sub = dyn->insts[ninst].ymm_zero&~dyn->ymm_zero; \ dyn->insts[ninst].ymm0_out = dyn->ymm_zero;\ dyn->insts[ninst].x64.has_next = (ok > 0) ? 1 : 0; #define INST_NAME(name) diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h index 9fa7618b..2dddd155 100644 --- a/src/dynarec/la64/dynarec_la64_private.h +++ b/src/dynarec/la64/dynarec_la64_private.h @@ -80,8 +80,8 @@ typedef struct instruction_la64_s { int pass2choice;// value for choices that are fixed on pass2 for pass3 uintptr_t natcall; uint16_t retn; - uint16_t ymm_zero; // bitmap of ymm to zero at purge uint16_t purge_ymm; // need to purge some ymm + uint16_t ymm0_in; // bitmap of ymm to zero at purge uint16_t ymm0_add; // the ymm0 added by the opcode uint16_t ymm0_sub; // the ymm0 removed by the opcode uint16_t ymm0_out; // the ymmm0 at th end of the opcode diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index 174bb092..04857e8c 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -28,14 +28,12 @@ dyn->e.swapped = 0; dyn->e.barrier = 0; \ for(int i=0; i<16; ++i) dyn->e.olds[i].v = 0;\ dyn->insts[ninst].f_entry = dyn->f; \ - dyn->insts[ninst].ymm_zero = dyn->ymm_zero;\ + dyn->insts[ninst].ymm0_in = dyn->ymm_zero;\ if(ninst) {dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr;} #define INST_EPILOG \ dyn->insts[ninst].f_exit = dyn->f; \ dyn->insts[ninst].e = dyn->e; \ - dyn->insts[ninst].ymm0_add = dyn->ymm_zero&~dyn->insts[ninst].ymm_zero; \ - dyn->insts[ninst].ymm0_sub = dyn->insts[ninst].ymm_zero&~dyn->ymm_zero; \ dyn->insts[ninst].ymm0_out = dyn->ymm_zero;\ dyn->insts[ninst].x64.has_next = (ok>0)?1:0; #define INST_NAME(name) diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 1b32b7fe..612e331c 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -90,8 +90,8 @@ typedef struct instruction_rv64_s { int pass2choice;// value for choices that are fixed on pass2 for pass3 uintptr_t natcall; uint16_t retn; - uint16_t ymm_zero; // bitmap of ymm to zero at purge uint16_t purge_ymm; // need to purge some ymm + uint16_t ymm0_in; // bitmap of ymm to zero at purge uint16_t ymm0_add; // the ymm0 added by the opcode uint16_t ymm0_sub; // the ymm0 removed by the opcode uint16_t ymm0_out; // the ymmm0 at th end of the opcode |