about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-18 15:23:46 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-18 15:23:46 +0200
commit08844f31c948bde6c537dee07626c01d24607c5c (patch)
treed2cb3e60dc5c8c81569c13a1289ddb4211d5b91b /src
parent4dfdba001bdbd3fefad4a152c447edeadb6c4dc5 (diff)
downloadbox64-08844f31c948bde6c537dee07626c01d24607c5c.tar.gz
box64-08844f31c948bde6c537dee07626c01d24607c5c.zip
[ARM64_DYNAREC] Better tracking of used ymm (seems redundent with ymm0_sub)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c29
1 files changed, 16 insertions, 13 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index 3d5065ec..049cc36d 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -1799,6 +1799,7 @@ int ymm_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a, int forwrite, int
                 dyn->n.neoncache[i].t = NEON_CACHE_YMMW;
             }
             dyn->ymm_zero&=~(1<<a);
+            dyn->n.ymm_used|=(1<<a);
             #if STEP == 0
             dyn->insts[ninst].ymm0_sub |= (1<<a);
             #endif
@@ -1825,6 +1826,7 @@ int ymm_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a, int k1, int
         if((dyn->n.neoncache[i].t==NEON_CACHE_YMMR || dyn->n.neoncache[i].t==NEON_CACHE_YMMW) && dyn->n.neoncache[i].n==a) {
             dyn->n.neoncache[i].t = NEON_CACHE_YMMW;
             dyn->ymm_zero&=~(1<<a);
+            dyn->n.ymm_used|=(1<<a);
             #if STEP == 0
             dyn->insts[ninst].ymm0_sub |= (1<<a);
             #endif
@@ -2189,6 +2191,20 @@ static void fpuCacheTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2, int
     int s1_val = 0;
     int s2_val = 0;
     // unload every uneeded cache
+    // ymm0 first
+    s3_top = 1;
+    uint16_t to_purge = dyn->ymm_zero&~dyn->insts[i2].ymm0_in;
+    if(dyn->ymm_zero && (dyn->insts[i2].purge_ymm|to_purge)) {
+        MESSAGE(LOG_DUMP, "\t- YMM Zero %04x / %04x\n", dyn->ymm_zero, (dyn->insts[i2].purge_ymm|to_purge));
+        for(int i=0; i<16; ++i)
+            if(is_avx_zero(dyn, ninst, i) && (dyn->insts[i2].purge_ymm|to_purge)&(1<<i)) {
+                if(s3_top) {
+                    ADDx_U12(s3, xEmu,offsetof(x64emu_t, ymm[0]));
+                    s3_top = 0;
+                }
+                STPx_S7_offset(xZR, xZR, s3, i*16);
+            }
+    }
     // check SSE first, than MMX, in order, to optimise successive memory write
     for(int i=0; i<16; ++i) {
         int j=findCacheSlot(dyn, ninst, NEON_CACHE_XMMW, i, &cache);
@@ -2267,19 +2283,6 @@ static void fpuCacheTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2, int
             }
         }
     }
-    // ymm0
-    s3_top = 1;
-    uint16_t to_purge = dyn->ymm_zero&~dyn->insts[i2].ymm0_in;
-    if(dyn->ymm_zero && (dyn->insts[i2].purge_ymm|to_purge)) {
-        for(int i=0; i<16; ++i)
-            if(is_avx_zero(dyn, ninst, i) && (dyn->insts[i2].purge_ymm|to_purge)&(1<<i)) {
-                if(s3_top) {
-                    ADDx_U12(s3, xEmu,offsetof(x64emu_t, ymm[0]));
-                    s3_top = 0;
-                }
-                STPx_S7_offset(xZR, xZR, s3, i*16);
-            }
-    }
     if(stack_cnt != cache_i2.stack) {
         MESSAGE(LOG_DUMP, "\t    - adjust stack count %d -> %d -\n", stack_cnt, cache_i2.stack);
         int a = stack_cnt - cache_i2.stack;