diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-18 15:58:57 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-18 15:58:57 +0200 |
| commit | a4cb62d6d60c83c8684101f19156d1bb9f5ceb81 (patch) | |
| tree | c0502dbfa5fe770aeda22cfd6ad0a20637c461c3 /src | |
| parent | e4062c6031e0e8c5910cc8edd48f2ba572a2ccef (diff) | |
| download | box64-a4cb62d6d60c83c8684101f19156d1bb9f5ceb81.tar.gz box64-a4cb62d6d60c83c8684101f19156d1bb9f5ceb81.zip | |
[ARM64_DYNAREC] Restaured a better way to handle ymm register, now that the traking is improved
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.c | 12 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 28 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 2 |
3 files changed, 18 insertions, 24 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index 468b11d5..cad5427a 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -32,7 +32,7 @@ int fpu_get_scratch(dynarec_arm_t* dyn, int ninst) int ret = SCRATCH0 + dyn->n.fpu_scratch++; if(dyn->n.neoncache[ret].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret].t==NEON_CACHE_YMMW) { // should only happens in step 0... - dyn->scratchs |= (1<<(dyn->n.fpu_scratch-1)); // mark as not free + dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret].n); // mark as purged dyn->n.neoncache[ret].v = 0; // reset it } return ret; @@ -43,12 +43,12 @@ int fpu_get_double_scratch(dynarec_arm_t* dyn, int ninst) int ret = SCRATCH0 + dyn->n.fpu_scratch; if(dyn->n.neoncache[ret].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret].t==NEON_CACHE_YMMW) { // should only happens in step 0... - dyn->scratchs |= (1<<(dyn->n.fpu_scratch)); // mark as not free + dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret].n); // mark as purged dyn->n.neoncache[ret].v = 0; // reset it } if(dyn->n.neoncache[ret+1].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret+1].t==NEON_CACHE_YMMW) { // should only happens in step 0... - dyn->scratchs |= (1<<(dyn->n.fpu_scratch+1)); // mark as not free + dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret+1].n); // mark as purged dyn->n.neoncache[ret+1].v = 0; // reset it } dyn->n.fpu_scratch+=2; @@ -67,7 +67,7 @@ int fpu_get_reg_x87(dynarec_arm_t* dyn, int ninst, int t, int n) while (dyn->n.fpuused[i]) ++i; if(dyn->n.neoncache[i].t==NEON_CACHE_YMMR || dyn->n.neoncache[i].t==NEON_CACHE_YMMW) { // should only happens in step 0... - dyn->mmx87 |= (1<<(i-1-X870)); // mark as purged + dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[i].n); // mark as purged dyn->n.neoncache[i].v = 0; // reset it } dyn->n.fpuused[i] = 1; @@ -92,7 +92,7 @@ int fpu_get_reg_emm(dynarec_arm_t* dyn, int ninst, int emm) int ret = EMM0 + emm; if(dyn->n.neoncache[ret].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret].t==NEON_CACHE_YMMW) { // should only happens in step 0... - dyn->mmx87 |= (1<<emm); // mark as purged + dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret].n); // mark as purged dyn->n.neoncache[ret].v = 0; // reset it } dyn->n.fpuused[ret] = 1; @@ -681,8 +681,6 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r dynarec_log(LOG_NONE, " ymm0=(%04x/%04x+%04x-%04x=%04x)", dyn->ymm_zero, dyn->insts[ninst].ymm0_in, dyn->insts[ninst].ymm0_add ,dyn->insts[ninst].ymm0_sub, dyn->insts[ninst].ymm0_out); if(dyn->insts[ninst].purge_ymm) dynarec_log(LOG_NONE, " purgeYmm=%04x", dyn->insts[ninst].purge_ymm); - if(dyn->mmx87 || dyn->scratchs) - dynarec_log(LOG_NONE, " mask=%04x-%04x", dyn->mmx87, dyn->scratchs); if(dyn->n.stack || dyn->insts[ninst].n.stack_next || dyn->insts[ninst].n.x87stack) dynarec_log(LOG_NONE, " X87:%d/%d(+%d/-%d)%d", dyn->n.stack, dyn->insts[ninst].n.stack_next, dyn->insts[ninst].n.stack_push, dyn->insts[ninst].n.stack_pop, dyn->insts[ninst].n.x87stack); if(dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2) diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 049cc36d..8d2961d8 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -2602,22 +2602,21 @@ int fpu_get_reg_ymm(dynarec_arm_t* dyn, int ninst, int t, int ymm, int k1, int k i = EMM0; // first pass see if a slot is free in EMM/x87 slots for(int j=0; j<8; ++j) { - if(!dyn->n.fpuused[i+j] && !(dyn->mmx87&(1<<j))) { + if(!dyn->n.fpuused[i+j]) { int ret = internal_mark_ymm(dyn, t, ymm, i+j); if(ret>=0) return ret; } } // no slot in the emm space, look for scratch space in reverse i = SCRATCH0; - for(int j=7; j>=dyn->n.fpu_scratch; --j) - if(!(dyn->scratchs&(1<<j))) { - int ret = internal_mark_ymm(dyn, t, ymm, i+j); - if(ret>=0) return ret; + for(int j=7; j>=dyn->n.fpu_scratch; --j) { + int ret = internal_mark_ymm(dyn, t, ymm, i+j); + if(ret>=0) return ret; } // no free slot, needs to purge a value... First loop on the YMMR, they are easier to purge i = EMM0; for(int j=0; j<8; ++j) { - if(!dyn->n.fpuused[i+j] && !(dyn->mmx87&(1<<j))) { + if(!dyn->n.fpuused[i+j]) { // should a test be done to check if ymm is already in the purge list? if(!is_ymm_to_keep(dyn, i+j, k1, k2, k3) && (dyn->n.neoncache[i+j].t==NEON_CACHE_YMMR)) { dyn->n.neoncache[i+j].v = 0; @@ -2627,18 +2626,17 @@ int fpu_get_reg_ymm(dynarec_arm_t* dyn, int ninst, int t, int ymm, int k1, int k } } i = SCRATCH0; - for(int j=dyn->n.fpu_scratch; j<8; ++j) - if(!(dyn->scratchs&(1<<j))) { - if(!is_ymm_to_keep(dyn, i+j, k1, k2, k3) && (dyn->n.neoncache[i+j].t==NEON_CACHE_YMMR)) { - dyn->n.neoncache[i+j].v = 0; - int ret = internal_mark_ymm(dyn, t, ymm, i+j); - if(ret>=0) return ret; - } + for(int j=dyn->n.fpu_scratch; j<8; ++j) { + if(!is_ymm_to_keep(dyn, i+j, k1, k2, k3) && (dyn->n.neoncache[i+j].t==NEON_CACHE_YMMR)) { + dyn->n.neoncache[i+j].v = 0; + int ret = internal_mark_ymm(dyn, t, ymm, i+j); + if(ret>=0) return ret; + } } // make space in the scratch area for(int j=dyn->n.fpu_scratch; j<8; ++j) { // should a test be done to check if ymm is already in the purge list? - if(!(dyn->scratchs&(1<<j)) && !is_ymm_to_keep(dyn, i+j, k1, k2, k3)) { + if(!is_ymm_to_keep(dyn, i+j, k1, k2, k3)) { // Save the reg and recycle it VSTR128_U12(i+j, xEmu, offsetof(x64emu_t, ymm[dyn->n.neoncache[i+j].n])); dyn->n.neoncache[i+j].v = 0; @@ -2649,7 +2647,7 @@ int fpu_get_reg_ymm(dynarec_arm_t* dyn, int ninst, int t, int ymm, int k1, int k // last resort, go back in the EMM area... i = EMM0; for(int j=7; j>=0; --j) { - if(!dyn->n.fpuused[i+j] && !(dyn->mmx87&(1<<j))) { + if(!dyn->n.fpuused[i+j]) { // should a test be done to check if ymm is already in the purge list? if((dyn->n.neoncache[i+j].t==NEON_CACHE_YMMW) && !is_ymm_to_keep(dyn, i+j, k1, k2, k3)) { VSTR128_U12(i+j, xEmu, offsetof(x64emu_t, ymm[dyn->n.neoncache[i+j].n])); diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 7ce3f1c4..963ecb9d 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -133,8 +133,6 @@ typedef struct dynarec_arm_s { uint8_t doublepop; uint8_t always_test; uint8_t abort; // abort the creation of the block - uint8_t scratchs; // mask of the 8 scratch neon register globaly used in the dynablock - uint8_t mmx87; // mask of the 8 mmx/x87 neon register globaly used in the dynablock } dynarec_arm_t; void add_next(dynarec_arm_t *dyn, uintptr_t addr); |