about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-18 15:58:57 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-18 15:58:57 +0200
commita4cb62d6d60c83c8684101f19156d1bb9f5ceb81 (patch)
treec0502dbfa5fe770aeda22cfd6ad0a20637c461c3 /src
parente4062c6031e0e8c5910cc8edd48f2ba572a2ccef (diff)
downloadbox64-a4cb62d6d60c83c8684101f19156d1bb9f5ceb81.tar.gz
box64-a4cb62d6d60c83c8684101f19156d1bb9f5ceb81.zip
[ARM64_DYNAREC] Restaured a better way to handle ymm register, now that the traking is improved
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.c12
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c28
-rw-r--r--src/dynarec/arm64/dynarec_arm64_private.h2
3 files changed, 18 insertions, 24 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index 468b11d5..cad5427a 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -32,7 +32,7 @@ int fpu_get_scratch(dynarec_arm_t* dyn, int ninst)
     int ret = SCRATCH0 + dyn->n.fpu_scratch++;
     if(dyn->n.neoncache[ret].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret].t==NEON_CACHE_YMMW) {
         // should only happens in step 0...
-        dyn->scratchs |= (1<<(dyn->n.fpu_scratch-1)); // mark as not free
+        dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret].n); // mark as purged
         dyn->n.neoncache[ret].v = 0; // reset it
     }
     return ret;
@@ -43,12 +43,12 @@ int fpu_get_double_scratch(dynarec_arm_t* dyn, int ninst)
     int ret = SCRATCH0 + dyn->n.fpu_scratch;
     if(dyn->n.neoncache[ret].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret].t==NEON_CACHE_YMMW) {
         // should only happens in step 0...
-        dyn->scratchs |= (1<<(dyn->n.fpu_scratch)); // mark as not free
+        dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret].n); // mark as purged
         dyn->n.neoncache[ret].v = 0; // reset it
     }
     if(dyn->n.neoncache[ret+1].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret+1].t==NEON_CACHE_YMMW) {
         // should only happens in step 0...
-        dyn->scratchs |= (1<<(dyn->n.fpu_scratch+1)); // mark as not free
+        dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret+1].n); // mark as purged
         dyn->n.neoncache[ret+1].v = 0; // reset it
     }
     dyn->n.fpu_scratch+=2;
@@ -67,7 +67,7 @@ int fpu_get_reg_x87(dynarec_arm_t* dyn, int ninst, int t, int n)
     while (dyn->n.fpuused[i]) ++i;
     if(dyn->n.neoncache[i].t==NEON_CACHE_YMMR || dyn->n.neoncache[i].t==NEON_CACHE_YMMW) {
         // should only happens in step 0...
-        dyn->mmx87 |= (1<<(i-1-X870)); // mark as purged
+        dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[i].n); // mark as purged
         dyn->n.neoncache[i].v = 0; // reset it
     }
     dyn->n.fpuused[i] = 1;
@@ -92,7 +92,7 @@ int fpu_get_reg_emm(dynarec_arm_t* dyn, int ninst, int emm)
     int ret = EMM0 + emm;
     if(dyn->n.neoncache[ret].t==NEON_CACHE_YMMR || dyn->n.neoncache[ret].t==NEON_CACHE_YMMW) {
         // should only happens in step 0...
-        dyn->mmx87 |= (1<<emm); // mark as purged
+        dyn->insts[ninst].purge_ymm |= (1<<dyn->n.neoncache[ret].n); // mark as purged
         dyn->n.neoncache[ret].v = 0; // reset it
     }
     dyn->n.fpuused[ret] = 1;
@@ -681,8 +681,6 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
             dynarec_log(LOG_NONE, " ymm0=(%04x/%04x+%04x-%04x=%04x)", dyn->ymm_zero, dyn->insts[ninst].ymm0_in, dyn->insts[ninst].ymm0_add ,dyn->insts[ninst].ymm0_sub, dyn->insts[ninst].ymm0_out);
         if(dyn->insts[ninst].purge_ymm)
             dynarec_log(LOG_NONE, " purgeYmm=%04x", dyn->insts[ninst].purge_ymm);
-        if(dyn->mmx87 || dyn->scratchs)
-            dynarec_log(LOG_NONE, " mask=%04x-%04x", dyn->mmx87, dyn->scratchs);
         if(dyn->n.stack || dyn->insts[ninst].n.stack_next || dyn->insts[ninst].n.x87stack)
             dynarec_log(LOG_NONE, " X87:%d/%d(+%d/-%d)%d", dyn->n.stack, dyn->insts[ninst].n.stack_next, dyn->insts[ninst].n.stack_push, dyn->insts[ninst].n.stack_pop, dyn->insts[ninst].n.x87stack);
         if(dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2)
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index 049cc36d..8d2961d8 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -2602,22 +2602,21 @@ int fpu_get_reg_ymm(dynarec_arm_t* dyn, int ninst, int t, int ymm, int k1, int k
     i = EMM0;
     // first pass see if a slot is free in EMM/x87 slots
     for(int j=0; j<8; ++j) {
-        if(!dyn->n.fpuused[i+j] && !(dyn->mmx87&(1<<j))) {
+        if(!dyn->n.fpuused[i+j]) {
             int ret = internal_mark_ymm(dyn, t, ymm, i+j);
             if(ret>=0) return ret;
         }
     }
     // no slot in the emm space, look for scratch space in reverse
     i = SCRATCH0;
-    for(int j=7; j>=dyn->n.fpu_scratch; --j) 
-        if(!(dyn->scratchs&(1<<j))) {
-            int ret = internal_mark_ymm(dyn, t, ymm, i+j);
-            if(ret>=0) return ret;
+    for(int j=7; j>=dyn->n.fpu_scratch; --j) {
+        int ret = internal_mark_ymm(dyn, t, ymm, i+j);
+        if(ret>=0) return ret;
     }
     // no free slot, needs to purge a value... First loop on the YMMR, they are easier to purge
     i = EMM0;
     for(int j=0; j<8; ++j) {
-        if(!dyn->n.fpuused[i+j] && !(dyn->mmx87&(1<<j))) {
+        if(!dyn->n.fpuused[i+j]) {
             // should a test be done to check if ymm is already in the purge list?
             if(!is_ymm_to_keep(dyn, i+j, k1, k2, k3) && (dyn->n.neoncache[i+j].t==NEON_CACHE_YMMR)) {
                 dyn->n.neoncache[i+j].v = 0;
@@ -2627,18 +2626,17 @@ int fpu_get_reg_ymm(dynarec_arm_t* dyn, int ninst, int t, int ymm, int k1, int k
         }
     }
     i = SCRATCH0;
-    for(int j=dyn->n.fpu_scratch; j<8; ++j) 
-        if(!(dyn->scratchs&(1<<j))) {
-            if(!is_ymm_to_keep(dyn, i+j, k1, k2, k3) && (dyn->n.neoncache[i+j].t==NEON_CACHE_YMMR)) {
-                dyn->n.neoncache[i+j].v = 0;
-                int ret = internal_mark_ymm(dyn, t, ymm, i+j);
-                if(ret>=0) return ret;
-            }
+    for(int j=dyn->n.fpu_scratch; j<8; ++j) {
+        if(!is_ymm_to_keep(dyn, i+j, k1, k2, k3) && (dyn->n.neoncache[i+j].t==NEON_CACHE_YMMR)) {
+            dyn->n.neoncache[i+j].v = 0;
+            int ret = internal_mark_ymm(dyn, t, ymm, i+j);
+            if(ret>=0) return ret;
+        }
     }
     // make space in the scratch area
     for(int j=dyn->n.fpu_scratch; j<8; ++j) {
             // should a test be done to check if ymm is already in the purge list?
-            if(!(dyn->scratchs&(1<<j)) && !is_ymm_to_keep(dyn, i+j, k1, k2, k3)) {
+            if(!is_ymm_to_keep(dyn, i+j, k1, k2, k3)) {
                 // Save the reg and recycle it
                 VSTR128_U12(i+j, xEmu, offsetof(x64emu_t, ymm[dyn->n.neoncache[i+j].n]));
                 dyn->n.neoncache[i+j].v = 0;
@@ -2649,7 +2647,7 @@ int fpu_get_reg_ymm(dynarec_arm_t* dyn, int ninst, int t, int ymm, int k1, int k
     // last resort, go back in the EMM area...
     i = EMM0;
     for(int j=7; j>=0; --j) {
-        if(!dyn->n.fpuused[i+j] && !(dyn->mmx87&(1<<j))) {
+        if(!dyn->n.fpuused[i+j]) {
             // should a test be done to check if ymm is already in the purge list?
             if((dyn->n.neoncache[i+j].t==NEON_CACHE_YMMW) && !is_ymm_to_keep(dyn, i+j, k1, k2, k3)) {
                 VSTR128_U12(i+j, xEmu, offsetof(x64emu_t, ymm[dyn->n.neoncache[i+j].n]));
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index 7ce3f1c4..963ecb9d 100644
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -133,8 +133,6 @@ typedef struct dynarec_arm_s {
     uint8_t             doublepop;
     uint8_t             always_test;
     uint8_t             abort;      // abort the creation of the block
-    uint8_t             scratchs;   // mask of the 8 scratch neon register globaly used in the dynablock
-    uint8_t             mmx87;      // mask of the 8 mmx/x87 neon register globaly used in the dynablock
 } dynarec_arm_t;
 
 void add_next(dynarec_arm_t *dyn, uintptr_t addr);