about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-03-29 11:05:44 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-03-29 11:05:53 +0100
commit68d710cd942105a09606c66edd6b8ea0db15d5d5 (patch)
tree90cd2d2ee1fecde69fb1cdf18ee65f9ef47fede4 /src
parentda64a3eeab26ddc1ebbe2d38054adce959375714 (diff)
downloadbox64-68d710cd942105a09606c66edd6b8ea0db15d5d5.tar.gz
box64-68d710cd942105a09606c66edd6b8ea0db15d5d5.zip
[ARM64_DYNAREC] Fixed some issue with x87 stack managment
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c48
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h2
-rw-r--r--src/dynarec/dynarec_native_pass.c5
3 files changed, 31 insertions, 24 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index 690daeec..69850eb6 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -1096,29 +1096,34 @@ void x87_purgecache(dynarec_arm_t* dyn, int ninst, int next, int s1, int s2, int
         // loop all cache entries
         for (int i=0; i<8; ++i)
             if(dyn->n.x87cache[i]!=-1) {
+                int st = dyn->n.x87cache[i]+dyn->n.stack_pop;
                 #if STEP == 1
                 if(!next) {   // don't force promotion here
                     // pre-apply pop, because purge happens in-between
-                    neoncache_promote_double(dyn, ninst, dyn->n.x87cache[i]+dyn->n.stack_pop);
+                    neoncache_promote_double(dyn, ninst, st);
                 }
                 #endif
                 #if STEP == 3
-                if(!next && neoncache_get_st_f(dyn, ninst, dyn->n.x87cache[i])>=0) {
-                    MESSAGE(LOG_DUMP, "Warning, incoherency with purged ST%d cache\n", dyn->n.x87cache[i]);
+                if(!next && neoncache_get_current_st(dyn, ninst, st)!=NEON_CACHE_ST_D) {
+                    MESSAGE(LOG_DUMP, "Warning, incoherency with purged ST%d cache\n", st);
                 }
                 #endif
-                ADDw_U12(s3, s2, dyn->n.x87cache[i]);
+                ADDw_U12(s3, s2, dyn->n.x87cache[i]);   // unadjusted count, as it's relative to real top
                 ANDw_mask(s3, s3, 0, 2); //mask=7   // (emu->top + st)&7
-                if(next) {
-                    // need to check if a ST_F need local promotion
-                    if(neoncache_get_st_f(dyn, ninst, dyn->n.x87cache[i])>=0) {
-                        FCVT_D_S(0, dyn->n.x87reg[i]);
-                        VSTR64_REG_LSL3(0, s1, s3);    // save the value
-                    } else {
+                switch(neoncache_get_current_st(dyn, ninst, st)) {
+                    case NEON_CACHE_ST_D:
                         VSTR64_REG_LSL3(dyn->n.x87reg[i], s1, s3);    // save the value
-                    }
-                } else {
-                    VSTR64_REG_LSL3(dyn->n.x87reg[i], s1, s3);
+                        break;
+                    case NEON_CACHE_ST_F:
+                        FCVT_D_S(SCRATCH, dyn->n.x87reg[i]);
+                        VSTR64_REG_LSL3(SCRATCH, s1, s3);    // save the value
+                        break;
+                    case NEON_CACHE_ST_I64:
+                        SCVTFDD(SCRATCH, dyn->n.x87reg[i]);
+                        VSTR64_REG_LSL3(SCRATCH, s1, s3);    // save the value
+                        break;
+                }
+                if(!next) {
                     fpu_free_reg(dyn, dyn->n.x87reg[i]);
                     dyn->n.x87reg[i] = -1;
                     dyn->n.x87cache[i] = -1;
@@ -1313,11 +1318,11 @@ void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
         ANDw_mask(s2, s2, 0, 2); //mask=7    // (emu->top + i)&7
     }
     if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_F) {
-        FCVT_D_S(31, reg);
-        VSTR64_REG_LSL3(31, s1, s2);
+        FCVT_D_S(SCRATCH, reg);
+        VSTR64_REG_LSL3(SCRATCH, s1, s2);
     } else if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_I64) {
-        SCVTFDD(31, reg);
-        VSTR64_REG_LSL3(31, s1, s2);
+        SCVTFDD(SCRATCH, reg);
+        VSTR64_REG_LSL3(SCRATCH, s1, s2);
     } else {
         VSTR64_REG_LSL3(reg, s1, s2);
     }
@@ -1407,11 +1412,11 @@ void x87_free(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int st)
             ANDw_mask(s2, s2, 0, 2); //mask=7    // (emu->top + i)&7
         }
         if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_F) {
-            FCVT_D_S(31, reg);
-            VSTR64_REG_LSL3(31, s1, s2);
+            FCVT_D_S(SCRATCH, reg);
+            VSTR64_REG_LSL3(SCRATCH, s1, s2);
         } else if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_I64) {
-            SCVTFDD(31, reg);
-            VSTR64_REG_LSL3(31, s1, s2);
+            SCVTFDD(SCRATCH, reg);
+            VSTR64_REG_LSL3(SCRATCH, s1, s2);
         } else {
             VSTR64_REG_LSL3(reg, s1, s2);
         }
@@ -1792,7 +1797,6 @@ static void swapCache(dynarec_arm_t* dyn, int ninst, int i, int j, neoncache_t *
     MESSAGE(LOG_DUMP, "\t  - Swapping %d <-> %d\n", i, j);
     // There is no VSWP in Arm64 NEON to swap 2 register contents!
     // so use a scratch...
-    #define SCRATCH 31
     if(quad) {
         VMOVQ(SCRATCH, i);
         VMOVQ(i, j);
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index e1e94f88..fa624c1e 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -1313,6 +1313,8 @@ void arm64_move64(dynarec_arm_t* dyn, int ninst, int reg, uint64_t val);
 
 #define neoncache_st_coherency STEPNAME(neoncache_st_coherency)
 int neoncache_st_coherency(dynarec_arm_t* dyn, int ninst, int a, int b);
+// scratch fpu regs for convertions
+#define SCRATCH 31
 
 #if STEP == 0
 #define ST_IS_F(A)          0
diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c
index 24ceda70..67d78ff6 100644
--- a/src/dynarec/dynarec_native_pass.c
+++ b/src/dynarec/dynarec_native_pass.c
@@ -76,6 +76,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
             break;
         }
         #endif
+        fpu_propagate_stack(dyn, ninst);
         ip = addr;
         if (reset_n!=-1) {
             dyn->last_ip = 0;
@@ -103,7 +104,6 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
         else if(ninst && (dyn->insts[ninst].pred_sz>1 || (dyn->insts[ninst].pred_sz==1 && dyn->insts[ninst].pred[0]!=ninst-1)))
             dyn->last_ip = 0;   // reset IP if some jump are coming here
         #endif
-        fpu_propagate_stack(dyn, ninst);
         NEW_INST;
         #if STEP == 0
         if(ninst && dyn->insts[ninst-1].x64.barrier_next) {
@@ -170,8 +170,9 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
             next = dyn->insts[ninst].x64.jmp_insts;
         #endif
         if(dyn->insts[ninst].x64.has_next && dyn->insts[next].x64.barrier) {
-            if(dyn->insts[next].x64.barrier&BARRIER_FLOAT)
+            if(dyn->insts[next].x64.barrier&BARRIER_FLOAT) {
                 fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
+            }
             if(dyn->insts[next].x64.barrier&BARRIER_FLAGS) {
                 dyn->f.pending = 0;
                 dyn->f.dfnone = 0;