diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.c | 21 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 17 |
2 files changed, 28 insertions, 10 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index 38cc2355..1e0ea0f9 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -454,11 +454,13 @@ int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) { return 1; for(int i=0; i<32 && !ret; ++i) if(dyn->insts[ninst].n.neoncache[i].v) { // there is something at ninst for i + int t = dyn->insts[ninst].n.neoncache[i].t; + int n = dyn->insts[ninst].n.neoncache[i].n; if(!( - (dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F - || dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_D - || dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_I64) - && dyn->insts[ninst].n.neoncache[i].n<dyn->insts[ninst].n.stack_pop)) + (t==NEON_CACHE_ST_F + || t==NEON_CACHE_ST_D + || t==NEON_CACHE_ST_I64) + && n<dyn->insts[ninst].n.stack_pop)) ret = 1; } return ret; @@ -474,15 +476,20 @@ int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) { for(int i=0; i<32; ++i) { if(dyn->insts[ninst].n.neoncache[i].v) { // there is something at ninst for i + int t = dyn->insts[ninst].n.neoncache[i].t; + int n = dyn->insts[ninst].n.neoncache[i].n; if(!cache_i2.neoncache[i].v) { // but there is nothing at i2 for i + if(((t==NEON_CACHE_XMMR) || (t==NEON_CACHE_XMMW)) && (cache_i2.xmm_unneeded&(1<<n))) { /* nothing*/} + else if(((t==NEON_CACHE_YMMR) || (t==NEON_CACHE_YMMW)) && (cache_i2.ymm_unneeded&(1<<n))) { /* nothing*/} + else ret = 1; } else if(dyn->insts[ninst].n.neoncache[i].v!=cache_i2.neoncache[i].v) { // there is something different - if(dyn->insts[ninst].n.neoncache[i].n!=cache_i2.neoncache[i].n) { // not the same x64 reg + if(n!=cache_i2.neoncache[i].n) { // not the same x64 reg ret = 1; } - else if(dyn->insts[ninst].n.neoncache[i].t == NEON_CACHE_XMMR && cache_i2.neoncache[i].t == NEON_CACHE_XMMW) + else if((t == NEON_CACHE_XMMR) && cache_i2.neoncache[i].t == NEON_CACHE_XMMW) {/* nothing */ } - else if(dyn->insts[ninst].n.neoncache[i].t == NEON_CACHE_YMMR && cache_i2.neoncache[i].t == NEON_CACHE_YMMW) + else if((t == NEON_CACHE_YMMR) && cache_i2.neoncache[i].t == NEON_CACHE_YMMW) {/* nothing */ } else ret = 1; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index db67dd4f..a3c8f027 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -1811,7 +1811,7 @@ static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int next, int s1) if(next) dyn->n.xmm_used |= (1<<i); if(dyn->n.ssecache[i].write) { if (old==-1) { - MESSAGE(LOG_DUMP, "\tPurge %sSSE Cache ------\n", next?"locally ":""); + MESSAGE(LOG_DUMP, "\tPurge %sSSE Cache ------\n", next?"localy ":""); ++old; } VSTR128_U12(dyn->n.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); @@ -2314,8 +2314,19 @@ static void fpuCacheTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2, int neoncacheUnwind(&cache_i2); if(!cache_i2.stack) { - int purge = 1; - for (int i=0; i<24 && purge; ++i) + int purge = 0; // default to purge if there is any regs that are not needed at jump + // but first check if there is regs that can be discarded because unneeded at jump point + for(int i=0; i<32 && !purge; ++i) { + if(dyn->insts[ninst].n.neoncache[i].v) { + int t = dyn->insts[ninst].n.neoncache[i].t; + int n = dyn->insts[ninst].n.neoncache[i].n; + if(((t==NEON_CACHE_XMMR) || (t==NEON_CACHE_XMMW)) && (cache_i2.xmm_unneeded&(1<<n))) {/* nothing */} + else if(((t==NEON_CACHE_YMMR) || (t==NEON_CACHE_YMMW)) && (cache_i2.ymm_unneeded&(1<<n))) {/* nothing */} + else ++purge; + } + } + // Now check if there is any regs at jump point + for (int i=0; i<32 && purge; ++i) if(cache_i2.neoncache[i].v) purge = 0; if(purge) { |