about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-02 13:12:06 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-02 13:12:06 +0200
commiteb209abe6237223a9af643593fcea04d223d4afe (patch)
tree08a10d4a91fee849acb90cafc77e58c8e0938121
parent13551fd410f8484373f3ad73282828ca8713bb3f (diff)
downloadbox64-eb209abe6237223a9af643593fcea04d223d4afe.tar.gz
box64-eb209abe6237223a9af643593fcea04d223d4afe.zip
[ARM64_DYNAREC] Don't use fix scratch for x87 conversion, it might conflict with YMM handling
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.c2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c50
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h1
3 files changed, 36 insertions, 17 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index e2e0f1e0..febec38b 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -70,6 +70,8 @@ void fpu_free_reg(dynarec_arm_t* dyn, int reg)
     dyn->n.fpuused[reg] = 0;
     if(dyn->n.neoncache[reg].t!=NEON_CACHE_ST_F && dyn->n.neoncache[reg].t!=NEON_CACHE_ST_D && dyn->n.neoncache[reg].t!=NEON_CACHE_ST_I64)
         dyn->n.neoncache[reg].v = 0;
+    if(dyn->n.fpu_scratch && reg==SCRATCH0+dyn->n.fpu_scratch-1)
+        --dyn->n.fpu_scratch;
 }
 // Get an MMX double reg
 int fpu_get_reg_emm(dynarec_arm_t* dyn, int ninst, int emm)
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index 20304660..6da4607d 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -1119,12 +1119,20 @@ void x87_purgecache(dynarec_arm_t* dyn, int ninst, int next, int s1, int s2, int
                         VSTR64_REG_LSL3(dyn->n.x87reg[i], s1, s3);    // save the value
                         break;
                     case NEON_CACHE_ST_F:
-                        FCVT_D_S(SCRATCH, dyn->n.x87reg[i]);
-                        VSTR64_REG_LSL3(SCRATCH, s1, s3);    // save the value
+                        {
+                            int scratch = fpu_get_scratch(dyn, ninst);
+                            FCVT_D_S(scratch, dyn->n.x87reg[i]);
+                            VSTR64_REG_LSL3(scratch, s1, s3);    // save the value
+                            fpu_free_reg(dyn, scratch);
+                        }
                         break;
                     case NEON_CACHE_ST_I64:
-                        SCVTFDD(SCRATCH, dyn->n.x87reg[i]);
-                        VSTR64_REG_LSL3(SCRATCH, s1, s3);    // save the value
+                        {
+                            int scratch = fpu_get_scratch(dyn, ninst);
+                            SCVTFDD(scratch, dyn->n.x87reg[i]);
+                            VSTR64_REG_LSL3(scratch, s1, s3);    // save the value
+                            fpu_free_reg(dyn, scratch);
+                        }
                         break;
                 }
                 if(!next) {
@@ -1342,11 +1350,15 @@ void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
         ANDw_mask(s2, s2, 0, 2); //mask=7    // (emu->top + i)&7
     }
     if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_F) {
-        FCVT_D_S(SCRATCH, reg);
-        VSTR64_REG_LSL3(SCRATCH, s1, s2);
+        int scratch = fpu_get_scratch(dyn, ninst);
+        FCVT_D_S(scratch, reg);
+        VSTR64_REG_LSL3(scratch, s1, s2);
+        fpu_free_reg(dyn, scratch);
     } else if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_I64) {
-        SCVTFDD(SCRATCH, reg);
-        VSTR64_REG_LSL3(SCRATCH, s1, s2);
+        int scratch = fpu_get_scratch(dyn, ninst);
+        SCVTFDD(scratch, reg);
+        VSTR64_REG_LSL3(scratch, s1, s2);
+        fpu_free_reg(dyn, scratch);
     } else {
         VSTR64_REG_LSL3(reg, s1, s2);
     }
@@ -1436,11 +1448,15 @@ void x87_free(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int st)
             ANDw_mask(s2, s2, 0, 2); //mask=7    // (emu->top + i)&7
         }
         if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_F) {
-            FCVT_D_S(SCRATCH, reg);
-            VSTR64_REG_LSL3(SCRATCH, s1, s2);
+            int scratch = fpu_get_scratch(dyn, ninst);
+            FCVT_D_S(scratch, reg);
+            VSTR64_REG_LSL3(scratch, s1, s2);
+            fpu_free_reg(dyn, scratch);
         } else if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_I64) {
-            SCVTFDD(SCRATCH, reg);
-            VSTR64_REG_LSL3(SCRATCH, s1, s2);
+            int scratch = fpu_get_scratch(dyn, ninst);
+            SCVTFDD(scratch, reg);
+            VSTR64_REG_LSL3(scratch, s1, s2);
+            fpu_free_reg(dyn, scratch);
         } else {
             VSTR64_REG_LSL3(reg, s1, s2);
         }
@@ -1965,15 +1981,17 @@ static void swapCache(dynarec_arm_t* dyn, int ninst, int i, int j, neoncache_t *
     MESSAGE(LOG_DUMP, "\t  - Swapping %d <-> %d\n", i, j);
     // There is no VSWP in Arm64 NEON to swap 2 register contents!
     // so use a scratch...
+    int scratch = fpu_get_scratch(dyn, ninst);
     if(quad) {
-        VMOVQ(SCRATCH, i);
+        VMOVQ(scratch, i);
         VMOVQ(i, j);
-        VMOVQ(j, SCRATCH);
+        VMOVQ(j, scratch);
     } else {
-        VMOV(SCRATCH, i);
+        VMOV(scratch, i);
         VMOV(i, j);
-        VMOV(j, SCRATCH);
+        VMOV(j, scratch);
     }
+    fpu_free_reg(dyn, scratch);
     tmp.v = cache->neoncache[i].v;
     cache->neoncache[i].v = cache->neoncache[j].v;
     cache->neoncache[j].v = tmp.v;
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 0252a052..7471ba55 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -1500,7 +1500,6 @@ void arm64_move64(dynarec_arm_t* dyn, int ninst, int reg, uint64_t val);
 #define neoncache_st_coherency STEPNAME(neoncache_st_coherency)
 int neoncache_st_coherency(dynarec_arm_t* dyn, int ninst, int a, int b);
 // scratch fpu regs for convertions
-#define SCRATCH 31
 
 #if STEP == 0
 #define ST_IS_F(A)          0