about summary refs log tree commit diff stats
path: root/src/dynarec/dynarec_native.c
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-05 10:44:31 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-05 10:44:31 +0200
commitba411303e951cb51766d42a15be59e2b9d5e67ec (patch)
treed348eb0b9c0561bde343f686ef6e308476a2e9a5 /src/dynarec/dynarec_native.c
parent8848bc2e7f404c72396392b307ee6c3494392488 (diff)
downloadbox64-ba411303e951cb51766d42a15be59e2b9d5e67ec.tar.gz
box64-ba411303e951cb51766d42a15be59e2b9d5e67ec.zip
[DYNAREC] Improved handling of the Ymm0 attribute
Diffstat (limited to 'src/dynarec/dynarec_native.c')
-rw-r--r--src/dynarec/dynarec_native.c37
1 files changed, 30 insertions, 7 deletions
diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c
index 649ba1a1..a9086625 100644
--- a/src/dynarec/dynarec_native.c
+++ b/src/dynarec/dynarec_native.c
@@ -357,7 +357,7 @@ static void fillPredecessors(dynarec_native_t* dyn)
     }
 }
 
-// updateNeed goes backward, from last instruction to top
+// updateNeed for the current block. recursive function that goes backward
 static int updateNeed(dynarec_native_t* dyn, int ninst, uint8_t need) {
     while (ninst>=0) {
         // need pending but instruction is only a subset: remove pend and use an X_ALL instead
@@ -400,6 +400,32 @@ static int updateNeed(dynarec_native_t* dyn, int ninst, uint8_t need) {
     return ninst;
 }
 
+// ypdate Ymm0 and Purge_ymm0.
+static int updateYmm0(dynarec_native_t* dyn, int ninst, uint16_t mask) {
+    while (ninst<dyn->size) {
+        uint16_t ymm0 = mask&~dyn->insts[ninst].purge_ymm; // current ymm0
+        uint16_t to_purge = dyn->insts[ninst].ymm_zero & ~ymm0; // the new to purge
+        uint16_t ymm0_out = (mask|dyn->insts[ninst].ymm0_add)&~dyn->insts[ninst].ymm0_sub; // ymm0 at the output
+        //check if need to recurse further
+        int ok = (ymm0==dyn->insts[ninst].ymm_zero) && (!to_purge) && (ymm0_out==dyn->insts[ninst].ymm0_out);
+        if(ok && dyn->insts[ninst].x64.has_next)
+            ok = (dyn->insts[ninst+1].ymm_zero==(ymm0_out&~dyn->insts[ninst+1].purge_ymm));
+        if(ok && dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1)
+            ok = (dyn->insts[dyn->insts[ninst].x64.jmp_insts].ymm_zero==(ymm0_out&~dyn->insts[dyn->insts[ninst].x64.jmp_insts].purge_ymm));
+        if(ok)
+            return ninst+1;
+        dyn->insts[ninst].ymm_zero = ymm0;
+        dyn->insts[ninst].purge_ymm |= to_purge;
+        dyn->insts[ninst].ymm0_out = ymm0_out;
+        if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1)
+            updateYmm0(dyn, dyn->insts[ninst].x64.jmp_insts, ymm0_out);
+        if(!dyn->insts[ninst].x64.has_next)
+            return ninst+1;
+        ++ninst;
+    }
+    return ninst;
+}
+
 void* current_helper = NULL;
 static int static_jmps[MAX_INSTS+2];
 static uintptr_t static_next[MAX_INSTS+2];
@@ -556,12 +582,6 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
                 if(!helper.insts[i].barrier_maybe)
                     helper.insts[k].x64.barrier |= BARRIER_FULL;
                 helper.insts[i].x64.jmp_insts = k;
-                if(helper.insts[i].ymm_zero || helper.insts[k].ymm_zero) {
-                    // move to pureg the reg that are present in k (jump to) but not in i (jump from)
-                    uint16_t to_purge = helper.insts[k].ymm_zero & ~helper.insts[i].ymm_zero;
-                    helper.insts[k].purge_ymm |= to_purge;
-                    helper.insts[k].ymm_zero &= ~to_purge;
-                }
             }
         }
     }
@@ -578,6 +598,9 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
     int pos = helper.size;
     while (pos>=0)
         pos = updateNeed(&helper, pos, 0);
+    pos = 0;
+    while(pos<helper.size)
+        pos = updateYmm0(&helper, pos, helper.insts[pos].ymm_zero);
     // remove fpu stuff on non-executed code
     for(int i=1; i<helper.size-1; ++i)
         if(!helper.insts[i].pred_sz) {