diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-05-28 18:19:52 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-05-28 18:19:52 +0200 |
| commit | a0693590bd9c84844bfa767ffe51d7da916df3d5 (patch) | |
| tree | 218cac4164dab2c5ec13f661754977cda4fe7723 | |
| parent | f1df65d88f05f9712229f6c9323ed20551171286 (diff) | |
| download | box64-a0693590bd9c84844bfa767ffe51d7da916df3d5.tar.gz box64-a0693590bd9c84844bfa767ffe51d7da916df3d5.zip | |
[ARM64_DYNAREC] More optimisation of unused XMM/YMM purge
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.c | 21 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 17 |
2 files changed, 28 insertions, 10 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index 38cc2355..1e0ea0f9 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -454,11 +454,13 @@ int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) { return 1; for(int i=0; i<32 && !ret; ++i) if(dyn->insts[ninst].n.neoncache[i].v) { // there is something at ninst for i + int t = dyn->insts[ninst].n.neoncache[i].t; + int n = dyn->insts[ninst].n.neoncache[i].n; if(!( - (dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F - || dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_D - || dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_I64) - && dyn->insts[ninst].n.neoncache[i].n<dyn->insts[ninst].n.stack_pop)) + (t==NEON_CACHE_ST_F + || t==NEON_CACHE_ST_D + || t==NEON_CACHE_ST_I64) + && n<dyn->insts[ninst].n.stack_pop)) ret = 1; } return ret; @@ -474,15 +476,20 @@ int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) { for(int i=0; i<32; ++i) { if(dyn->insts[ninst].n.neoncache[i].v) { // there is something at ninst for i + int t = dyn->insts[ninst].n.neoncache[i].t; + int n = dyn->insts[ninst].n.neoncache[i].n; if(!cache_i2.neoncache[i].v) { // but there is nothing at i2 for i + if(((t==NEON_CACHE_XMMR) || (t==NEON_CACHE_XMMW)) && (cache_i2.xmm_unneeded&(1<<n))) { /* nothing*/} + else if(((t==NEON_CACHE_YMMR) || (t==NEON_CACHE_YMMW)) && (cache_i2.ymm_unneeded&(1<<n))) { /* nothing*/} + else ret = 1; } else if(dyn->insts[ninst].n.neoncache[i].v!=cache_i2.neoncache[i].v) { // there is something different - if(dyn->insts[ninst].n.neoncache[i].n!=cache_i2.neoncache[i].n) { // not the same x64 reg + if(n!=cache_i2.neoncache[i].n) { // not the same x64 reg ret = 1; } - else if(dyn->insts[ninst].n.neoncache[i].t == NEON_CACHE_XMMR && cache_i2.neoncache[i].t == NEON_CACHE_XMMW) + else if((t == NEON_CACHE_XMMR) && cache_i2.neoncache[i].t == NEON_CACHE_XMMW) {/* nothing */ } - else if(dyn->insts[ninst].n.neoncache[i].t == NEON_CACHE_YMMR && cache_i2.neoncache[i].t == NEON_CACHE_YMMW) + else if((t == NEON_CACHE_YMMR) && cache_i2.neoncache[i].t == NEON_CACHE_YMMW) {/* nothing */ } else ret = 1; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index db67dd4f..a3c8f027 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -1811,7 +1811,7 @@ static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int next, int s1) if(next) dyn->n.xmm_used |= (1<<i); if(dyn->n.ssecache[i].write) { if (old==-1) { - MESSAGE(LOG_DUMP, "\tPurge %sSSE Cache ------\n", next?"locally ":""); + MESSAGE(LOG_DUMP, "\tPurge %sSSE Cache ------\n", next?"localy ":""); ++old; } VSTR128_U12(dyn->n.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); @@ -2314,8 +2314,19 @@ static void fpuCacheTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2, int neoncacheUnwind(&cache_i2); if(!cache_i2.stack) { - int purge = 1; - for (int i=0; i<24 && purge; ++i) + int purge = 0; // default to purge if there is any regs that are not needed at jump + // but first check if there is regs that can be discarded because unneeded at jump point + for(int i=0; i<32 && !purge; ++i) { + if(dyn->insts[ninst].n.neoncache[i].v) { + int t = dyn->insts[ninst].n.neoncache[i].t; + int n = dyn->insts[ninst].n.neoncache[i].n; + if(((t==NEON_CACHE_XMMR) || (t==NEON_CACHE_XMMW)) && (cache_i2.xmm_unneeded&(1<<n))) {/* nothing */} + else if(((t==NEON_CACHE_YMMR) || (t==NEON_CACHE_YMMW)) && (cache_i2.ymm_unneeded&(1<<n))) {/* nothing */} + else ++purge; + } + } + // Now check if there is any regs at jump point + for (int i=0; i<32 && purge; ++i) if(cache_i2.neoncache[i].v) purge = 0; if(purge) { |