diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.c | 1 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 6 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native.c | 16 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_pass.c | 19 |
4 files changed, 25 insertions, 17 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index 8bf313a5..607b8a93 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -743,6 +743,7 @@ void fpu_reset(dynarec_arm_t* dyn) mmx_reset(&dyn->n); sse_reset(&dyn->n); fpu_reset_reg(dyn); + dyn->ymm_zero = 0; } void fpu_reset_ninst(dynarec_arm_t* dyn, int ninst) diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index f22a4a46..db317039 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -2323,7 +2323,7 @@ static void flagsCacheTransform(dynarec_arm_t* dyn, int ninst, int s1) int jmp = dyn->insts[ninst].x64.jmp_insts; if(jmp<0) return; - if(dyn->f.dfnone) // flags are fully known, nothing we can do more + if(dyn->f.dfnone || dyn->insts[jmp].f_exit.dfnone_here) // flags are fully known, nothing we can do more return; MESSAGE(LOG_DUMP, "\tFlags fetch ---- ninst=%d -> %d\n", ninst, jmp); int go = (dyn->insts[jmp].f_entry.dfnone && !dyn->f.dfnone)?1:0; @@ -2475,11 +2475,11 @@ void fpu_reset_cache(dynarec_arm_t* dyn, int ninst, int reset_n) // for STEP 2 & 3, just need to refrest with current, and undo the changes (push & swap) dyn->n = dyn->insts[ninst].n; dyn->ymm_zero = dyn->insts[ninst].ymm0_in; + neoncacheUnwind(&dyn->n); #else dyn->n = dyn->insts[reset_n].n; - dyn->ymm_zero = dyn->insts[reset_n].ymm0_in; + dyn->ymm_zero = dyn->insts[reset_n].ymm0_out; #endif - neoncacheUnwind(&dyn->n); #if STEP == 0 if(box64_dynarec_dump) dynarec_log(LOG_NONE, "New x87stack=%d\n", dyn->n.x87stack); #endif diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index f3e60590..040ce70a 100644 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -416,12 +416,15 @@ static int updateYmm0(dynarec_native_t* dyn, int ninst) { if(jmp!=-1) { // check if a purge is needed at jump point ymm0_out&=~dyn->insts[jmp].purge_ymm; + ok = (dyn->insts[jmp].pred_sz==1) && (dyn->insts[jmp].ymm0_in!=ymm0_out); + if(dyn->insts[jmp].pred_sz==1) + dyn->insts[jmp].ymm0_in = ymm0_out; uint16_t ymm0_jmp = dyn->insts[jmp].ymm0_in; uint16_t to_purge = ymm0_jmp&~ymm0_out; // if there are too many ymm0 at jump point - if(to_purge) { + if(to_purge) dyn->insts[jmp].purge_ymm|=to_purge; + if(to_purge || ok) updateYmm0(dyn, jmp); - } } ++ninst; } @@ -600,19 +603,20 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit int pos = helper.size; while (pos>=0) pos = updateNeed(&helper, pos, 0); - pos = 0; - while(pos<helper.size) - pos = updateYmm0(&helper, pos); // remove fpu stuff on non-executed code for(int i=1; i<helper.size-1; ++i) if(!helper.insts[i].pred_sz) { int ii = i; while(ii<helper.size && !helper.insts[ii].pred_sz) { - fpu_reset_ninst(&helper, ii++); + fpu_reset_ninst(&helper, ii); helper.insts[ii].ymm0_in = helper.insts[ii].ymm0_sub = helper.insts[ii].ymm0_add = helper.insts[ii].ymm0_out = helper.insts[ii].purge_ymm = 0; + ++ii; } i = ii; } + pos = 0; + while(pos<helper.size) + pos = updateYmm0(&helper, pos); // pass 1, float optimizations, first pass for flags diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index a308e264..916d47b7 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -108,6 +108,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int else if(ninst && (dyn->insts[ninst].pred_sz>1 || (dyn->insts[ninst].pred_sz==1 && dyn->insts[ninst].pred[0]!=ninst-1))) dyn->last_ip = 0; // reset IP if some jump are coming here #endif + dyn->f.dfnone_here = 0; NEW_INST; MESSAGE(LOG_DUMP, "New Instruction x64:%p, native:%p\n", (void*)addr, (void*)dyn->block); #if STEP == 0 @@ -191,23 +192,25 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int if(!ok && !need_epilog && (addr < (dyn->start+dyn->isize))) { ok = 1; // we use the 1st predecessor here - int ii = ninst+1; - if(ii<dyn->size && !dyn->insts[ii].x64.alive) { - while(ii<dyn->size && !dyn->insts[ii].x64.alive) { + if((ninst+1)<dyn->size && !dyn->insts[ninst+1].x64.alive) { + // reset fpu value... + dyn->f.dfnone = 0; + dyn->f.pending = 0; + fpu_reset(dyn); + while((ninst+1)<dyn->size && !dyn->insts[ninst+1].x64.alive) { // may need to skip opcodes to advance ++ninst; NEW_INST; MESSAGE(LOG_DEBUG, "Skipping unused opcode\n"); INST_NAME("Skipped opcode"); + addr += dyn->insts[ninst].x64.size; INST_EPILOG; - addr += dyn->insts[ii].x64.size; - ++ii; } } - if((dyn->insts[ii].x64.barrier&BARRIER_FULL)==BARRIER_FULL) + if((dyn->insts[ninst+1].x64.barrier&BARRIER_FULL)==BARRIER_FULL) reset_n = -2; // hack to say Barrier! else { - reset_n = getNominalPred(dyn, ii); // may get -1 if no predecessor are available + reset_n = getNominalPred(dyn, ninst+1); // may get -1 if no predecessor are available if(reset_n==-1) { reset_n = -2; if(!dyn->insts[ninst].x64.has_callret) { @@ -226,7 +229,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int if(dyn->forward) { if(dyn->forward_to == addr && !need_epilog && ok>=0) { // we made it! - reset_n = get_first_jump(dyn, addr); + reset_n = dyn->forward_ninst; if(box64_dynarec_dump) dynarec_log(LOG_NONE, "Forward extend block for %d bytes %s%p -> %p (ninst %d - %d)\n", dyn->forward_to-dyn->forward, dyn->insts[dyn->forward_ninst].x64.has_callret?"(opt. call) ":"", (void*)dyn->forward, (void*)dyn->forward_to, reset_n, ninst); if(dyn->insts[dyn->forward_ninst].x64.has_callret && !dyn->insts[dyn->forward_ninst].x64.has_next) dyn->insts[dyn->forward_ninst].x64.has_next = 1; // this block actually continue |