diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-07-07 10:37:51 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-07-07 10:37:51 +0200 |
| commit | 14b0323bf64ba28b081effb78ef7d3897fd5d64d (patch) | |
| tree | 724d37afadc393dc6ffd994449cafa8f57212f86 /src | |
| parent | b4828477794a8e69a96f0ca7991ad0e619d1b2a3 (diff) | |
| download | box64-14b0323bf64ba28b081effb78ef7d3897fd5d64d.tar.gz box64-14b0323bf64ba28b081effb78ef7d3897fd5d64d.zip | |
[ARM64_DYNAREC] Reworked ymm0 propagation
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.c | 10 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.h | 1 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 8 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass0.h | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass1.h | 2 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arch.h | 3 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native.c | 100 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_pass.c | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_pass0.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass0.h | 2 | ||||
| -rw-r--r-- | src/include/dynarec_native.h | 1 |
11 files changed, 83 insertions, 50 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index afb1ed6b..8fd0929e 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -688,6 +688,8 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r dynarec_log(LOG_NONE, ", jmp=%d", dyn->insts[ninst].x64.jmp_insts); if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts==-1) dynarec_log(LOG_NONE, ", jmp=out"); + if(dyn->insts[ninst].x64.has_callret) + dynarec_log(LOG_NONE, ", callret"); if(dyn->last_ip) dynarec_log(LOG_NONE, ", last_ip=%p", (void*)dyn->last_ip); for(int ii=0; ii<32; ++ii) { @@ -789,14 +791,6 @@ void fpu_reset_ninst(dynarec_native_t* dyn, int ninst) } -void arm64_fpu_reset(dynarec_native_t* dyn, int ninst, int step) -{ - if(step<2) { - dyn->insts[ninst].ymm0_in = 0; - dyn->insts[ninst].ymm0_out = 0; - } -} - int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st) { return (dyn->n.tags&(0b11<<(st*2)))?1:0; diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h index 0af490e4..b6c95904 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.h +++ b/src/dynarec/arm64/dynarec_arm64_functions.h @@ -69,7 +69,6 @@ void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode); // reset the cache void fpu_reset(dynarec_native_t* dyn); void fpu_reset_ninst(dynarec_native_t* dyn, int ninst); -void arm64_fpu_reset(dynarec_native_t* dyn, int ninst, int step); // is st freed int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 136c0f8c..04fa97f6 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -1806,6 +1806,8 @@ int ymm_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a, int forwrite, int return i; } // nope, grab a new one + if(dyn->ymm_zero&(1<<a)) + forwrite = 1; // if the reg was zero, then it will need to be write back int ret = fpu_get_reg_ymm(dyn, ninst, forwrite?NEON_CACHE_YMMW:NEON_CACHE_YMMR, a, k1, k2, k3); if(dyn->ymm_zero&(1<<a)) { VEORQ(ret, ret, ret); @@ -2365,6 +2367,12 @@ void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) x87_reflectcache(dyn, ninst, s1, s2, s3); mmx_reflectcache(dyn, ninst, s1); //sse_reflectcache(dyn, ninst, s1); // no need, it's pushed/unpushed during call + // but ymm0 needs to be pushed + if(dyn->ymm_zero) { + ADDx_U12(s1, xEmu, offsetof(x64emu_t, ymm[0])); + for(int i=0; i<16; ++i) + STPx_S7_offset(xZR, xZR, s1, 16*i); + } } void fpu_unreflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3) diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index 510dd4ab..7d4c0c2d 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -26,13 +26,11 @@ dyn->n.combined1 = dyn->n.combined2 = 0;\ dyn->n.swapped = 0; dyn->n.barrier = 0; \ dyn->insts[ninst].f_entry = dyn->f; \ - dyn->insts[ninst].ymm0_in = dyn->ymm_zero;\ if(ninst) {dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr;} #define INST_EPILOG \ dyn->insts[ninst].f_exit = dyn->f; \ dyn->insts[ninst].n = dyn->n; \ - dyn->insts[ninst].ymm0_out = dyn->ymm_zero;\ dyn->insts[ninst].x64.has_next = (ok>0)?1:0; #define INST_NAME(name) #define DEFAULT \ diff --git a/src/dynarec/arm64/dynarec_arm64_pass1.h b/src/dynarec/arm64/dynarec_arm64_pass1.h index ab1f5fc4..6cf92feb 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass1.h +++ b/src/dynarec/arm64/dynarec_arm64_pass1.h @@ -5,12 +5,10 @@ #define NEW_INST \ dyn->insts[ninst].f_entry = dyn->f; \ dyn->n.combined1 = dyn->n.combined2 = 0;\ - dyn->insts[ninst].ymm0_in = dyn->ymm_zero;\ dyn->n.swapped = 0; dyn->n.barrier = 0 #define INST_EPILOG \ dyn->insts[ninst].n = dyn->n; \ - dyn->insts[ninst].ymm0_out = dyn->ymm_zero;\ dyn->insts[ninst].f_exit = dyn->f #define INST_NAME(name) diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index 351d9fcd..f89125a7 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -17,7 +17,6 @@ #include "arm64/dynarec_arm64_functions.h" // Limit here is defined by LD litteral, that is 19bits #define MAXBLOCK_SIZE ((1<<19)-200) -#define ARM_FPU_RESET() arm64_fpu_reset(dyn, ninst, STEP) #elif defined(LA64) #define instruction_native_t instruction_la64_t @@ -34,7 +33,6 @@ #include "la64/dynarec_la64_functions.h" // Limit here is unconditionnal jump, that is signed 28bits #define MAXBLOCK_SIZE ((1 << 27) - 200) -#define ARM_FPU_RESET() #elif defined(RV64) #define instruction_native_t instruction_rv64_t @@ -51,7 +49,6 @@ #include "rv64/dynarec_rv64_functions.h" // Limit here is unconditionnal jump, that is signed 21bits #define MAXBLOCK_SIZE ((1<<20)-200) -#define ARM_FPU_RESET() #else #error Unsupported platform #endif diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index 15ecdce8..a233c690 100644 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -405,35 +405,81 @@ static int updateNeed(dynarec_native_t* dyn, int ninst, uint8_t need) { return ninst; } -// update Ymm0 and Purge_ymm0. -static int updateYmm0(dynarec_native_t* dyn, int ninst) { +static void updateYmm0s(dynarec_native_t* dyn, int ninst, int max_ninst_reached) { + int can_incr = ninst == max_ninst_reached; // Are we the top-level call? int ok = 1; - while (ok && ninst<dyn->size) { - uint16_t ymm0 = dyn->insts[ninst].ymm0_in; // entry ymm0 - ymm0&=~dyn->insts[ninst].purge_ymm; // entry after purge - uint16_t ymm0_out = (ymm0|dyn->insts[ninst].ymm0_add)&~dyn->insts[ninst].ymm0_sub; // ymm0 after the opcode - ok = dyn->insts[ninst].x64.has_next; // continue? - if(ok) ok = (dyn->insts[ninst].ymm0_in!=ymm0) || (dyn->insts[ninst+1].ymm0_in!=ymm0_out); // continue if there has been any change... - if(ok) dyn->insts[ninst+1].ymm0_in=ymm0_out; // make the change - dyn->insts[ninst].ymm0_out = ymm0_out; // update ymm0_out - dyn->insts[ninst].ymm0_in = ymm0; // write purged ymm0, as it's done at the entry - int jmp = (dyn->insts[ninst].x64.jmp)?dyn->insts[ninst].x64.jmp_insts:-1; - if(jmp!=-1) { - // check if a purge is needed at jump point - ymm0_out&=~dyn->insts[jmp].purge_ymm; - ok = (dyn->insts[jmp].pred_sz==1) && (dyn->insts[jmp].ymm0_in!=ymm0_out); - if(dyn->insts[jmp].pred_sz==1) - dyn->insts[jmp].ymm0_in = ymm0_out; - uint16_t ymm0_jmp = dyn->insts[jmp].ymm0_in; - uint16_t to_purge = ymm0_jmp&~ymm0_out; // if there are too many ymm0 at jump point - if(to_purge) - dyn->insts[jmp].purge_ymm|=to_purge; - if(to_purge || ok) - updateYmm0(dyn, jmp); + while ((can_incr || ok) && ninst<dyn->size) { + //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "update ninst=%d (%d): can_incr=%d\n", ninst, max_ninst_reached, can_incr); + uint16_t new_purge_ymm, new_ymm0_in, new_ymm0_out; + + if (ninst && dyn->insts[ninst].pred_sz && dyn->insts[ninst].x64.alive) { + uint16_t ymm0_union = 0, ymm0_inter = (uint16_t)-1; // The union of the empty set is empty, the intersection is the universe + for (int i = 0; i < dyn->insts[ninst].pred_sz; ++i) { + int pred = dyn->insts[ninst].pred[i]; + //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "\twith pred[%d] = %d", i, pred); + if (pred >= max_ninst_reached) { + //if(box64_dynarec_dump) dynarec_log(LOG_NONE, " (skipped)\n"); + continue; + } + + int pred_out = dyn->insts[pred].x64.has_callret ? 0 : dyn->insts[pred].ymm0_out; + //if(box64_dynarec_dump) dynarec_log(LOG_NONE, " ~> %04X\n", pred_out); + ymm0_union |= pred_out; + ymm0_inter &= pred_out; + } + //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "\t=> %04X,%04X\n", ymm0_union, ymm0_inter); + // Notice the default values yield something coherent here (if all pred are after ninst) + new_purge_ymm = ymm0_union & ~ymm0_inter; + new_ymm0_in = ymm0_inter; + new_ymm0_out = (ymm0_inter | dyn->insts[ninst].ymm0_add) & ~dyn->insts[ninst].ymm0_sub; + + if ((dyn->insts[ninst].purge_ymm != new_purge_ymm) || (dyn->insts[ninst].ymm0_in != new_ymm0_in) || (dyn->insts[ninst].ymm0_out != new_ymm0_out)) { + // Need to update self and next(s) + dyn->insts[ninst].purge_ymm = new_purge_ymm; + dyn->insts[ninst].ymm0_in = new_ymm0_in; + dyn->insts[ninst].ymm0_out = new_ymm0_out; + + if (can_incr) { + // We always have ninst == max_ninst_reached when can_incr == 1 + ++max_ninst_reached; + } else { + // We need to stop here if the opcode has no "real" next or if we reached the ninst of the toplevel + ok = (max_ninst_reached - 1 != ninst) && dyn->insts[ninst].x64.has_next && !dyn->insts[ninst].x64.has_callret; + } + + int jmp = (dyn->insts[ninst].x64.jmp)?dyn->insts[ninst].x64.jmp_insts:-1; + if((jmp!=-1) && (jmp < max_ninst_reached)) { + //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "\t! jump to %d\n", jmp); + // The jump goes before the last instruction reached, update the destination + // If this is the top level call, this means the jump goes backward (jmp != ninst) + // Otherwise, since we don't update all instructions, we may miss the update (don't use jmp < ninst) + updateYmm0s(dyn, jmp, max_ninst_reached); + } + } else { + if (can_incr) { + // We always have ninst == max_ninst_reached when can_incr == 1 + ++max_ninst_reached; + + // Also update jumps to before (they are skipped otherwise) + int jmp = (dyn->insts[ninst].x64.jmp)?dyn->insts[ninst].x64.jmp_insts:-1; + if((jmp!=-1) && (jmp < max_ninst_reached)) { + //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "\t! jump to %d\n", jmp); + updateYmm0s(dyn, jmp, max_ninst_reached); + } + } else { + // We didn't update anything, we can leave + ok = 0; + } + } + } else if (can_incr) { + // We always have ninst == max_ninst_reached when can_incr == 1 + ++max_ninst_reached; + } else { + // We didn't update anything, we can leave + ok = 0; } ++ninst; } - return ninst; } void* current_helper = NULL; @@ -628,9 +674,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit CancelBlock64(0); return CreateEmptyBlock(block, addr); } - pos = 0; - while(pos<helper.size) - pos = updateYmm0(&helper, pos); + updateYmm0s(&helper, 0, 0); // pass 1, float optimizations, first pass for flags diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index 14f80103..2ebc89cc 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -89,14 +89,12 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int dyn->f.dfnone = 0; dyn->f.pending = 0; fpu_reset(dyn); - ARM_FPU_RESET(); } else { fpu_reset_cache(dyn, ninst, reset_n); dyn->f = dyn->insts[reset_n].f_exit; if(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT) { MESSAGE(LOG_DEBUG, "Apply Barrier Float\n"); fpu_reset(dyn); - ARM_FPU_RESET(); } if(dyn->insts[ninst].x64.barrier&BARRIER_FLAGS) { MESSAGE(LOG_DEBUG, "Apply Barrier Flags\n"); diff --git a/src/dynarec/la64/dynarec_la64_pass0.h b/src/dynarec/la64/dynarec_la64_pass0.h index 0ea40a86..99a897a6 100644 --- a/src/dynarec/la64/dynarec_la64_pass0.h +++ b/src/dynarec/la64/dynarec_la64_pass0.h @@ -30,13 +30,11 @@ dyn->lsx.combined1 = dyn->lsx.combined2 = 0; \ dyn->lsx.swapped = 0; \ dyn->lsx.barrier = 0; \ - dyn->insts[ninst].ymm0_in = dyn->ymm_zero; \ dyn->insts[ninst].f_entry = dyn->f; \ if (ninst) { dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr; } #define INST_EPILOG \ dyn->insts[ninst].f_exit = dyn->f; \ dyn->insts[ninst].lsx = dyn->lsx; \ - dyn->insts[ninst].ymm0_out = dyn->ymm_zero;\ dyn->insts[ninst].x64.has_next = (ok > 0) ? 1 : 0; #define INST_NAME(name) #define DEFAULT \ diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index 04857e8c..3ee1685f 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -28,13 +28,11 @@ dyn->e.swapped = 0; dyn->e.barrier = 0; \ for(int i=0; i<16; ++i) dyn->e.olds[i].v = 0;\ dyn->insts[ninst].f_entry = dyn->f; \ - dyn->insts[ninst].ymm0_in = dyn->ymm_zero;\ if(ninst) {dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr;} #define INST_EPILOG \ dyn->insts[ninst].f_exit = dyn->f; \ dyn->insts[ninst].e = dyn->e; \ - dyn->insts[ninst].ymm0_out = dyn->ymm_zero;\ dyn->insts[ninst].x64.has_next = (ok>0)?1:0; #define INST_NAME(name) #define DEFAULT \ diff --git a/src/include/dynarec_native.h b/src/include/dynarec_native.h index 3bda443c..dd5218f6 100644 --- a/src/include/dynarec_native.h +++ b/src/include/dynarec_native.h @@ -5,6 +5,7 @@ typedef struct dynablock_s dynablock_t; typedef struct x64emu_s x64emu_t; typedef struct instsize_s instsize_t; + //#define USE_CUSTOM_MEM #ifdef USE_CUSTOM_MEM #define dynaMalloc customMalloc |