about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-06-08 11:52:00 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-06-08 11:52:00 +0200
commit4c285d4d8a13a13b7fd618884eb983b332740fa7 (patch)
treea0596ac160df98a051b967b44a46d7ff0224ef1c /src
parentcd5fbc1492546ea2233363226721d6a6065ce33c (diff)
downloadbox64-4c285d4d8a13a13b7fd618884eb983b332740fa7.tar.gz
box64-4c285d4d8a13a13b7fd618884eb983b332740fa7.zip
[ARM64_DYNAREC] Fixed an issue when purging an YMM that is used in the same opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.c1
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c7
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h7
-rw-r--r--src/dynarec/arm64/dynarec_arm64_private.h1
4 files changed, 16 insertions, 0 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index 49adddf1..7248ef69 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -41,6 +41,7 @@ int fpu_get_scratch(dynarec_arm_t* dyn, int ninst)
 void fpu_reset_scratch(dynarec_arm_t* dyn)
 {
     dyn->n.fpu_scratch = 0;
+    dyn->ymm_used = 0;
 }
 // Get a x87 double reg
 int fpu_get_reg_x87(dynarec_arm_t* dyn, int ninst, int t, int n)
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index f1a67270..528035de 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -1834,6 +1834,12 @@ void ymm_mark_zero(dynarec_arm_t* dyn, int ninst, int a)
     // look if already exist
     for(int i=0; i<32; ++i)
         if((dyn->n.neoncache[i].t==NEON_CACHE_YMMR || dyn->n.neoncache[i].t==NEON_CACHE_YMMW) && dyn->n.neoncache[i].n==a) {
+            if(dyn->ymm_used&(1<<a)) {
+                // special case, the reg was just added in the opcode and cannot be marked as 0, so just RAZ it now
+                dyn->n.neoncache[i].t = NEON_CACHE_YMMW;
+                VEORQ(i, i, i);
+                return;
+            }
             dyn->n.neoncache[i].v = 0;  // forget it!
         }
     avx_mark_zero(dyn, ninst, a);
@@ -2553,6 +2559,7 @@ void avx_purge_ymm(dynarec_arm_t* dyn, int ninst, uint16_t mask, int s1)
 int fpu_get_reg_ymm(dynarec_arm_t* dyn, int ninst, int t, int ymm, int k1, int k2, int k3)
 {
     int i = -1;
+    dyn->ymm_used|=(1<<ymm);
     #if STEP >1
     // check the cached neoncache, it should be exact
     // look for it
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 62d3ec0f..96c13450 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -566,6 +566,13 @@
     else                                                                                        \
         VLDR128_U12(ey, ed, fixedaddress+16);                                                   \
 
+// Get written EY
+#define GETEYw(ey)                                                                              \
+    if(MODREG)                                                                                  \
+        ey = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 1, -1, -1, -1);                 \
+    else                                                                                        \
+        VLDR128_U12(ey, ed, fixedaddress+16);                                                   \
+
 // Get empty EY and non-writen GY
 #define GETGYEY_empty(gy, ey)                                                                   \
     gy = ymm_get_reg(dyn, ninst, x1, gd, 0, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1, -1);       \
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index a2e32e80..07b66a39 100644
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -126,6 +126,7 @@ typedef struct dynarec_arm_s {
     int32_t             forward_size;   // size at the forward point
     int                 forward_ninst;  // ninst at the forward point
     uint16_t            ymm_zero;   // bitmap of ymm to zero at purge
+    uint16_t            ymm_used;   // mask of the ymm regs used in this opcode
     uint8_t             smwrite;    // for strongmem model emulation
     uint8_t             smread;
     uint8_t             doublepush;