about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-09 14:51:41 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-09 14:51:41 +0200
commit06df8c7bede707331777b88c2fa78e2056e8193d (patch)
tree6201f082228362057657858086c695b2b4b2431a /src
parent950241d52f7fb777b8afa7a253c88f3de65530b8 (diff)
downloadbox64-06df8c7bede707331777b88c2fa78e2056e8193d.tar.gz
box64-06df8c7bede707331777b88c2fa78e2056e8193d.zip
[DYNAREC] Another fix for YMM Zero'd upper reg tracking
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.c10
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c24
-rw-r--r--src/dynarec/arm64/dynarec_arm64_pass0.h4
-rw-r--r--src/dynarec/arm64/dynarec_arm64_private.h2
-rw-r--r--src/dynarec/dynarec_native.c48
-rw-r--r--src/dynarec/la64/dynarec_la64_pass0.h4
-rw-r--r--src/dynarec/la64/dynarec_la64_private.h2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass0.h4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h2
9 files changed, 55 insertions, 45 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index 269197b3..468b11d5 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -407,7 +407,7 @@ int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) {
     if(!i2) { // just purge
         if(dyn->insts[ninst].n.stack_next)
             return 1;
-        if(dyn->insts[ninst].ymm_zero)
+        if(dyn->insts[ninst].ymm0_out)
             return 1;
         for(int i=0; i<32 && !ret; ++i)
             if(dyn->insts[ninst].n.neoncache[i].v) {       // there is something at ninst for i
@@ -424,7 +424,7 @@ int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst) {
     if(dyn->insts[ninst].n.stack_next != dyn->insts[i2].n.stack-dyn->insts[i2].n.stack_push) {
         return 1;
     }
-    if(dyn->insts[ninst].ymm_zero && (dyn->insts[ninst].ymm_zero&~dyn->insts[i2].ymm_zero))
+    if(dyn->insts[ninst].ymm0_out && (dyn->insts[ninst].ymm0_out&~dyn->insts[i2].ymm0_in))
         return 1;
     neoncache_t cache_i2 = dyn->insts[i2].n;
     neoncacheUnwind(&cache_i2);
@@ -673,12 +673,12 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
                 dynarec_log(LOG_NONE, " V%d:%s", ii, getCacheName(dyn->n.neoncache[ii].t, dyn->n.neoncache[ii].n));
                 dynarec_log(LOG_NONE, "->%s", getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n));
             }
-            dynarec_log(LOG_NONE, ")%s", (box64_dynarec_dump>1)?"\e[32m":"");
+            dynarec_log(LOG_NONE, ")%s", (box64_dynarec_dump>1)?"\e[0;32m":"");
         }
         if(dyn->insts[ninst].n.ymm_used)
             dynarec_log(LOG_NONE, " ymmUsed=%04x", dyn->insts[ninst].n.ymm_used);
-        if(dyn->ymm_zero || dyn->insts[ninst].ymm0_add || dyn->insts[ninst].ymm0_sub)
-            dynarec_log(LOG_NONE, " ymm0=%04x(+%04x-%04x)", dyn->ymm_zero, dyn->insts[ninst].ymm0_add ,dyn->insts[ninst].ymm0_sub);
+        if(dyn->ymm_zero || dyn->insts[ninst].ymm0_add || dyn->insts[ninst].ymm0_sub || dyn->insts[ninst].ymm0_out)
+            dynarec_log(LOG_NONE, " ymm0=(%04x/%04x+%04x-%04x=%04x)", dyn->ymm_zero, dyn->insts[ninst].ymm0_in, dyn->insts[ninst].ymm0_add ,dyn->insts[ninst].ymm0_sub, dyn->insts[ninst].ymm0_out);
         if(dyn->insts[ninst].purge_ymm)
             dynarec_log(LOG_NONE, " purgeYmm=%04x", dyn->insts[ninst].purge_ymm);
         if(dyn->mmx87 || dyn->scratchs)
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index 4eedadd1..42f09b21 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -1797,19 +1797,24 @@ int ymm_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a, int forwrite, int
         if((dyn->n.neoncache[i].t==NEON_CACHE_YMMR || dyn->n.neoncache[i].t==NEON_CACHE_YMMW) && dyn->n.neoncache[i].n==a) {
             if(forwrite) {
                 dyn->n.neoncache[i].t = NEON_CACHE_YMMW;
-                dyn->ymm_zero&=~(1<<a);
             }
+            dyn->ymm_zero&=~(1<<a);
+            #if STEP == 0
+            dyn->insts[ninst].ymm0_sub |= (1<<a);
+            #endif
             return i;
         }
     // nope, grab a new one
     int ret =  fpu_get_reg_ymm(dyn, ninst, forwrite?NEON_CACHE_YMMW:NEON_CACHE_YMMR, a, k1, k2, k3);
     if(dyn->ymm_zero&(1<<a)) {
         VEORQ(ret, ret, ret);
-        if(forwrite)
-            dyn->ymm_zero&=~(1<<a);
+        dyn->ymm_zero&=~(1<<a);
     } else {
         VLDR128_U12(ret, xEmu, offsetof(x64emu_t, ymm[a]));
     }
+    #if STEP == 0
+    dyn->insts[ninst].ymm0_sub |= (1<<a);
+    #endif
     return ret;
 }
 // get neon register for a YMM reg, but don't try to synch it if it needed to be created
@@ -1820,12 +1825,18 @@ int ymm_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a, int k1, int
         if((dyn->n.neoncache[i].t==NEON_CACHE_YMMR || dyn->n.neoncache[i].t==NEON_CACHE_YMMW) && dyn->n.neoncache[i].n==a) {
             dyn->n.neoncache[i].t = NEON_CACHE_YMMW;
             dyn->ymm_zero&=~(1<<a);
+            #if STEP == 0
+            dyn->insts[ninst].ymm0_sub |= (1<<a);
+            #endif
             return i;
         }
     // nope, grab a new one
     int ret =  fpu_get_reg_ymm(dyn, ninst, NEON_CACHE_YMMW, a, k1, k2, k3);
     if(dyn->ymm_zero&(1<<a))
         dyn->ymm_zero&=~(1<<a);
+    #if STEP == 0
+    dyn->insts[ninst].ymm0_sub |= (1<<a);
+    #endif
     return ret;
 }
 
@@ -1842,6 +1853,9 @@ void ymm_mark_zero(dynarec_arm_t* dyn, int ninst, int a)
             }
             dyn->n.neoncache[i].v = 0;  // forget it!
         }
+    #if STEP == 0
+    dyn->insts[ninst].ymm0_add |= (1<<a);
+    #endif
     avx_mark_zero(dyn, ninst, a);
 }
 
@@ -2457,10 +2471,10 @@ void fpu_reset_cache(dynarec_arm_t* dyn, int ninst, int reset_n)
     #if STEP > 1
     // for STEP 2 & 3, just need to refrest with current, and undo the changes (push & swap)
     dyn->n = dyn->insts[ninst].n;
-    dyn->ymm_zero = dyn->insts[ninst].ymm_zero;
+    dyn->ymm_zero = dyn->insts[ninst].ymm0_out;
     #else
     dyn->n = dyn->insts[reset_n].n;
-    dyn->ymm_zero = dyn->insts[reset_n].ymm_zero;
+    dyn->ymm_zero = dyn->insts[reset_n].ymm0_out;
     #endif
     neoncacheUnwind(&dyn->n);
     #if STEP == 0
diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h
index 8b2fc6fb..510dd4ab 100644
--- a/src/dynarec/arm64/dynarec_arm64_pass0.h
+++ b/src/dynarec/arm64/dynarec_arm64_pass0.h
@@ -26,14 +26,12 @@
         dyn->n.combined1 = dyn->n.combined2 = 0;\
         dyn->n.swapped = 0; dyn->n.barrier = 0; \
         dyn->insts[ninst].f_entry = dyn->f;     \
-        dyn->insts[ninst].ymm_zero = dyn->ymm_zero;\
+        dyn->insts[ninst].ymm0_in = dyn->ymm_zero;\
         if(ninst) {dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr;}
 
 #define INST_EPILOG                             \
         dyn->insts[ninst].f_exit = dyn->f;      \
         dyn->insts[ninst].n = dyn->n;           \
-        dyn->insts[ninst].ymm0_add = dyn->ymm_zero&~dyn->insts[ninst].ymm_zero; \
-        dyn->insts[ninst].ymm0_sub = dyn->insts[ninst].ymm_zero&~dyn->ymm_zero; \
         dyn->insts[ninst].ymm0_out = dyn->ymm_zero;\
         dyn->insts[ninst].x64.has_next = (ok>0)?1:0;
 #define INST_NAME(name) 
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index db205a3b..7ce3f1c4 100644
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -81,8 +81,8 @@ typedef struct instruction_arm64_s {
     int                 pass2choice;// value for choices that are fixed on pass2 for pass3
     uintptr_t           natcall;
     uint16_t            retn;
-    uint16_t            ymm_zero;   // bitmap of ymm to zero at purge
     uint16_t            purge_ymm;  // need to purge some ymm
+    uint16_t            ymm0_in;    // bitmap of ymm to zero at purge
     uint16_t            ymm0_add;   // the ymm0 added by the opcode
     uint16_t            ymm0_sub;   // the ymm0 removed by the opcode
     uint16_t            ymm0_out;   // the ymmm0 at th end of the opcode
diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c
index 161e577e..f3e60590 100644
--- a/src/dynarec/dynarec_native.c
+++ b/src/dynarec/dynarec_native.c
@@ -400,27 +400,29 @@ static int updateNeed(dynarec_native_t* dyn, int ninst, uint8_t need) {
     return ninst;
 }
 
-// ypdate Ymm0 and Purge_ymm0.
-static int updateYmm0(dynarec_native_t* dyn, int ninst, uint16_t mask) {
-    while (ninst<dyn->size) {
-        uint16_t ymm0 = mask&~dyn->insts[ninst].purge_ymm; // current ymm0
-        uint16_t to_purge = dyn->insts[ninst].ymm_zero & ~ymm0; // the new to purge
-        uint16_t ymm0_out = (mask|dyn->insts[ninst].ymm0_add)&~dyn->insts[ninst].ymm0_sub; // ymm0 at the output
-        //check if need to recurse further
-        int ok = (ymm0==dyn->insts[ninst].ymm_zero) && (!to_purge) && (ymm0_out==dyn->insts[ninst].ymm0_out);
-        if(ok && dyn->insts[ninst].x64.has_next)
-            ok = (dyn->insts[ninst+1].ymm_zero==(ymm0_out&~dyn->insts[ninst+1].purge_ymm));
-        if(ok && dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1)
-            ok = (dyn->insts[dyn->insts[ninst].x64.jmp_insts].ymm_zero==(ymm0_out&~dyn->insts[dyn->insts[ninst].x64.jmp_insts].purge_ymm));
-        if(ok)
-            return ninst+1;
-        dyn->insts[ninst].ymm_zero = ymm0;
-        dyn->insts[ninst].purge_ymm |= to_purge;
-        dyn->insts[ninst].ymm0_out = ymm0_out;
-        if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1)
-            updateYmm0(dyn, dyn->insts[ninst].x64.jmp_insts, ymm0_out);
-        if(!dyn->insts[ninst].x64.has_next)
-            return ninst+1;
+// update Ymm0 and Purge_ymm0.
+static int updateYmm0(dynarec_native_t* dyn, int ninst) {
+    int ok = 1;
+    while (ok && ninst<dyn->size) {
+        uint16_t ymm0 = dyn->insts[ninst].ymm0_in; // entry ymm0
+        ymm0&=~dyn->insts[ninst].purge_ymm; // entry after purge
+        uint16_t ymm0_out = (ymm0|dyn->insts[ninst].ymm0_add)&~dyn->insts[ninst].ymm0_sub;  // ymm0 after the opcode
+        ok = dyn->insts[ninst].x64.has_next;    // continue?
+        if(ok) ok = (dyn->insts[ninst].ymm0_in!=ymm0) || (dyn->insts[ninst+1].ymm0_in!=ymm0_out); // continue if there has been any change...
+        if(ok) dyn->insts[ninst+1].ymm0_in=ymm0_out;   // make the change
+        dyn->insts[ninst].ymm0_out = ymm0_out;  // update ymm0_out
+        dyn->insts[ninst].ymm0_in = ymm0;  // write purged ymm0, as it's done at the entry
+        int jmp = (dyn->insts[ninst].x64.jmp)?dyn->insts[ninst].x64.jmp_insts:-1;
+        if(jmp!=-1) {
+            // check if a purge is needed at jump point
+            ymm0_out&=~dyn->insts[jmp].purge_ymm;
+            uint16_t ymm0_jmp = dyn->insts[jmp].ymm0_in;
+            uint16_t to_purge = ymm0_jmp&~ymm0_out; // if there are too many ymm0 at jump point
+            if(to_purge) {
+                dyn->insts[jmp].purge_ymm|=to_purge;
+                updateYmm0(dyn, jmp);
+            }
+        }
         ++ninst;
     }
     return ninst;
@@ -600,14 +602,14 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
         pos = updateNeed(&helper, pos, 0);
     pos = 0;
     while(pos<helper.size)
-        pos = updateYmm0(&helper, pos, helper.insts[pos].ymm_zero);
+        pos = updateYmm0(&helper, pos);
     // remove fpu stuff on non-executed code
     for(int i=1; i<helper.size-1; ++i)
         if(!helper.insts[i].pred_sz) {
             int ii = i;
             while(ii<helper.size && !helper.insts[ii].pred_sz) {
                 fpu_reset_ninst(&helper, ii++);
-                helper.insts[ii].ymm0_sub = helper.insts[ii].ymm0_add = helper.insts[ii].ymm0_out = helper.insts[ii].purge_ymm = 0;
+                helper.insts[ii].ymm0_in = helper.insts[ii].ymm0_sub = helper.insts[ii].ymm0_add = helper.insts[ii].ymm0_out = helper.insts[ii].purge_ymm = 0;
             }
             i = ii;
         }
diff --git a/src/dynarec/la64/dynarec_la64_pass0.h b/src/dynarec/la64/dynarec_la64_pass0.h
index 62f65853..0ea40a86 100644
--- a/src/dynarec/la64/dynarec_la64_pass0.h
+++ b/src/dynarec/la64/dynarec_la64_pass0.h
@@ -30,14 +30,12 @@
     dyn->lsx.combined1 = dyn->lsx.combined2 = 0; \
     dyn->lsx.swapped = 0;                        \
     dyn->lsx.barrier = 0;                        \
-    dyn->insts[ninst].ymm_zero = dyn->ymm_zero;  \
+    dyn->insts[ninst].ymm0_in = dyn->ymm_zero;   \
     dyn->insts[ninst].f_entry = dyn->f;          \
     if (ninst) { dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr; }
 #define INST_EPILOG                    \
     dyn->insts[ninst].f_exit = dyn->f; \
     dyn->insts[ninst].lsx = dyn->lsx;  \
-    dyn->insts[ninst].ymm0_add = dyn->ymm_zero&~dyn->insts[ninst].ymm_zero; \
-    dyn->insts[ninst].ymm0_sub = dyn->insts[ninst].ymm_zero&~dyn->ymm_zero; \
     dyn->insts[ninst].ymm0_out = dyn->ymm_zero;\
     dyn->insts[ninst].x64.has_next = (ok > 0) ? 1 : 0;
 #define INST_NAME(name)
diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h
index 9fa7618b..2dddd155 100644
--- a/src/dynarec/la64/dynarec_la64_private.h
+++ b/src/dynarec/la64/dynarec_la64_private.h
@@ -80,8 +80,8 @@ typedef struct instruction_la64_s {
     int                 pass2choice;// value for choices that are fixed on pass2 for pass3
     uintptr_t           natcall;
     uint16_t            retn;
-    uint16_t            ymm_zero;   // bitmap of ymm to zero at purge
     uint16_t            purge_ymm;  // need to purge some ymm
+    uint16_t            ymm0_in;    // bitmap of ymm to zero at purge
     uint16_t            ymm0_add;   // the ymm0 added by the opcode
     uint16_t            ymm0_sub;   // the ymm0 removed by the opcode
     uint16_t            ymm0_out;   // the ymmm0 at th end of the opcode
diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h
index 174bb092..04857e8c 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass0.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass0.h
@@ -28,14 +28,12 @@
         dyn->e.swapped = 0; dyn->e.barrier = 0; \
         for(int i=0; i<16; ++i) dyn->e.olds[i].v = 0;\
         dyn->insts[ninst].f_entry = dyn->f;     \
-        dyn->insts[ninst].ymm_zero = dyn->ymm_zero;\
+        dyn->insts[ninst].ymm0_in = dyn->ymm_zero;\
         if(ninst) {dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr;}
 
 #define INST_EPILOG                             \
         dyn->insts[ninst].f_exit = dyn->f;      \
         dyn->insts[ninst].e = dyn->e;           \
-        dyn->insts[ninst].ymm0_add = dyn->ymm_zero&~dyn->insts[ninst].ymm_zero; \
-        dyn->insts[ninst].ymm0_sub = dyn->insts[ninst].ymm_zero&~dyn->ymm_zero; \
         dyn->insts[ninst].ymm0_out = dyn->ymm_zero;\
         dyn->insts[ninst].x64.has_next = (ok>0)?1:0;
 #define INST_NAME(name) 
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index 1b32b7fe..612e331c 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -90,8 +90,8 @@ typedef struct instruction_rv64_s {
     int                 pass2choice;// value for choices that are fixed on pass2 for pass3
     uintptr_t           natcall;
     uint16_t            retn;
-    uint16_t            ymm_zero;   // bitmap of ymm to zero at purge
     uint16_t            purge_ymm;  // need to purge some ymm
+    uint16_t            ymm0_in;    // bitmap of ymm to zero at purge
     uint16_t            ymm0_add;   // the ymm0 added by the opcode
     uint16_t            ymm0_sub;   // the ymm0 removed by the opcode
     uint16_t            ymm0_out;   // the ymmm0 at th end of the opcode