about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-05-28 18:19:52 +0200
committerptitSeb <sebastien.chev@gmail.com>2025-05-28 18:19:52 +0200
commita0693590bd9c84844bfa767ffe51d7da916df3d5 (patch)
tree218cac4164dab2c5ec13f661754977cda4fe7723
parentf1df65d88f05f9712229f6c9323ed20551171286 (diff)
downloadbox64-a0693590bd9c84844bfa767ffe51d7da916df3d5.tar.gz
box64-a0693590bd9c84844bfa767ffe51d7da916df3d5.zip
[ARM64_DYNAREC] More optimisation of unused XMM/YMM purge
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.c21
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c17
2 files changed, 28 insertions, 10 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index 38cc2355..1e0ea0f9 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -454,11 +454,13 @@ int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) {
             return 1;
         for(int i=0; i<32 && !ret; ++i)
             if(dyn->insts[ninst].n.neoncache[i].v) {       // there is something at ninst for i
+                int t = dyn->insts[ninst].n.neoncache[i].t;
+                int n = dyn->insts[ninst].n.neoncache[i].n;
                 if(!(
-                (dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_F
-                || dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_D
-                || dyn->insts[ninst].n.neoncache[i].t==NEON_CACHE_ST_I64)
-                && dyn->insts[ninst].n.neoncache[i].n<dyn->insts[ninst].n.stack_pop))
+                (t==NEON_CACHE_ST_F
+                || t==NEON_CACHE_ST_D
+                || t==NEON_CACHE_ST_I64)
+                && n<dyn->insts[ninst].n.stack_pop))
                     ret = 1;
             }
         return ret;
@@ -474,15 +476,20 @@ int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) {
 
     for(int i=0; i<32; ++i) {
         if(dyn->insts[ninst].n.neoncache[i].v) {       // there is something at ninst for i
+            int t = dyn->insts[ninst].n.neoncache[i].t;
+            int n = dyn->insts[ninst].n.neoncache[i].n;
             if(!cache_i2.neoncache[i].v) {    // but there is nothing at i2 for i
+                if(((t==NEON_CACHE_XMMR) || (t==NEON_CACHE_XMMW)) && (cache_i2.xmm_unneeded&(1<<n))) { /* nothing*/}
+                else if(((t==NEON_CACHE_YMMR) || (t==NEON_CACHE_YMMW)) && (cache_i2.ymm_unneeded&(1<<n))) { /* nothing*/}
+                else 
                 ret = 1;
             } else if(dyn->insts[ninst].n.neoncache[i].v!=cache_i2.neoncache[i].v) {  // there is something different
-                if(dyn->insts[ninst].n.neoncache[i].n!=cache_i2.neoncache[i].n) {   // not the same x64 reg
+                if(n!=cache_i2.neoncache[i].n) {   // not the same x64 reg
                     ret = 1;
                 }
-                else if(dyn->insts[ninst].n.neoncache[i].t == NEON_CACHE_XMMR && cache_i2.neoncache[i].t == NEON_CACHE_XMMW)
+                else if((t == NEON_CACHE_XMMR) && cache_i2.neoncache[i].t == NEON_CACHE_XMMW)
                     {/* nothing */ }
-                else if(dyn->insts[ninst].n.neoncache[i].t == NEON_CACHE_YMMR && cache_i2.neoncache[i].t == NEON_CACHE_YMMW)
+                else if((t == NEON_CACHE_YMMR) && cache_i2.neoncache[i].t == NEON_CACHE_YMMW)
                     {/* nothing */ }
                 else
                     ret = 1;
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index db67dd4f..a3c8f027 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -1811,7 +1811,7 @@ static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int next, int s1)
             if(next) dyn->n.xmm_used |= (1<<i);
             if(dyn->n.ssecache[i].write) {
                 if (old==-1) {
-                    MESSAGE(LOG_DUMP, "\tPurge %sSSE Cache ------\n", next?"locally ":"");
+                    MESSAGE(LOG_DUMP, "\tPurge %sSSE Cache ------\n", next?"localy ":"");
                     ++old;
                 }
                 VSTR128_U12(dyn->n.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
@@ -2314,8 +2314,19 @@ static void fpuCacheTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2, int
     neoncacheUnwind(&cache_i2);
 
     if(!cache_i2.stack) {
-        int purge = 1;
-        for (int i=0; i<24 && purge; ++i)
+        int purge = 0;  // default to purge if there is any regs that are not needed at jump
+        // but first check if there is regs that can be discarded because unneeded at jump point
+        for(int i=0; i<32 && !purge; ++i) {
+            if(dyn->insts[ninst].n.neoncache[i].v) {
+                int t = dyn->insts[ninst].n.neoncache[i].t;
+                int n = dyn->insts[ninst].n.neoncache[i].n;
+                if(((t==NEON_CACHE_XMMR) || (t==NEON_CACHE_XMMW)) && (cache_i2.xmm_unneeded&(1<<n))) {/* nothing */}
+                else if(((t==NEON_CACHE_YMMR) || (t==NEON_CACHE_YMMW)) && (cache_i2.ymm_unneeded&(1<<n))) {/* nothing */}
+                else ++purge;
+            }
+        }
+        // Now check if there is any regs at jump point
+        for (int i=0; i<32 && purge; ++i)
             if(cache_i2.neoncache[i].v)
                 purge = 0;
         if(purge) {