about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-07-02 10:34:37 +0200
committerptitSeb <sebastien.chev@gmail.com>2024-07-02 10:34:37 +0200
commit4b0b3fc98ae4a1e848765e0cd48f958a13fc683d (patch)
tree8f6be5a089600eb79972f340f01491189e512ec1 /src
parentc6afd44c1c91485c278376622871975d6d20f176 (diff)
downloadbox64-4b0b3fc98ae4a1e848765e0cd48f958a13fc683d.tar.gz
box64-4b0b3fc98ae4a1e848765e0cd48f958a13fc683d.zip
[DYNAREC] Improved cache coherency and internal jump handling, [ARM64_DYNAREC] Improved YMM register tracking
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.c30
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_private.h4
-rw-r--r--src/dynarec/dynarec_native.c16
-rw-r--r--src/dynarec/dynarec_native_pass.c4
5 files changed, 52 insertions, 4 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index 607b8a93..ea60745c 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -61,6 +61,9 @@ void fpu_reset_scratch(dynarec_arm_t* dyn)
 {
     dyn->n.fpu_scratch = 0;
     dyn->n.ymm_used = 0;
+    dyn->n.ymm_regs = 0;
+    dyn->n.ymm_write = 0;
+    dyn->n.ymm_removed = 0;
 }
 // Get a x87 double reg
 int fpu_get_reg_x87(dynarec_arm_t* dyn, int ninst, int t, int n)
@@ -83,6 +86,15 @@ void fpu_free_reg(dynarec_arm_t* dyn, int reg)
 {
     // TODO: check upper limit?
     dyn->n.fpuused[reg] = 0;
+    if(dyn->n.neoncache[reg].t==NEON_CACHE_YMMR || dyn->n.neoncache[reg].t==NEON_CACHE_YMMW) {
+        dyn->n.ymm_removed |= 1<<dyn->n.neoncache[reg].n;
+        if(dyn->n.neoncache[reg].t==NEON_CACHE_YMMW)
+            dyn->n.ymm_write |= 1<<dyn->n.neoncache[reg].n;
+        if(reg>SCRATCH0)
+            dyn->n.ymm_regs |= (8LL+reg-SCRATCH0)<<(dyn->n.neoncache[reg].n*4);
+        else
+            dyn->n.ymm_regs |= ((uint64_t)(reg-EMM0))<<(dyn->n.neoncache[reg].n*4);
+    }
     if(dyn->n.neoncache[reg].t!=NEON_CACHE_ST_F && dyn->n.neoncache[reg].t!=NEON_CACHE_ST_D && dyn->n.neoncache[reg].t!=NEON_CACHE_ST_I64)
         dyn->n.neoncache[reg].v = 0;
     if(dyn->n.fpu_scratch && reg==SCRATCH0+dyn->n.fpu_scratch-1)
@@ -560,6 +572,24 @@ void neoncacheUnwind(neoncache_t* cache)
             cache->fpuused[i] = 0;
         }
     }
+    // add back removed YMM
+    if(cache->ymm_removed) {
+        for(int i=0; i<16; ++i)
+            if(cache->ymm_removed&(1<<i)) {
+                int reg = cache->ymm_regs>>(i*4)&15;
+                if(reg>7)
+                    reg = reg - 8 + SCRATCH0;
+                else
+                    reg = reg + EMM0;
+                if(cache->neoncache[reg].v)
+                    printf_log(LOG_INFO, "Warning, recreating YMM%d on non empty slot %s", i, getCacheName(cache->neoncache[reg].t, cache->neoncache[reg].n));
+                cache->neoncache[reg].t = (cache->ymm_write&(1<<i))?NEON_CACHE_YMMW:NEON_CACHE_YMMR;
+                cache->neoncache[reg].n = i;
+            }
+        cache->ymm_regs = 0;
+        cache->ymm_write = cache->ymm_removed = 0;
+    }
+    cache->ymm_used = 0;
 }
 
 #define F8      *(uint8_t*)(addr++)
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index b333e5b9..32de5146 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -2481,7 +2481,7 @@ void fpu_reset_cache(dynarec_arm_t* dyn, int ninst, int reset_n)
     dyn->ymm_zero = dyn->insts[reset_n].ymm0_out;
     #endif
     #if STEP == 0
-    if(box64_dynarec_dump) dynarec_log(LOG_NONE, "New x87stack=%d\n", dyn->n.x87stack);
+    if(box64_dynarec_dump && dyn->n.x87stack) dynarec_log(LOG_NONE, "New x87stack=%d at ResetCache in inst %d with %d\n", dyn->n.x87stack, ninst, reset_n);
         #endif
     #if defined(HAVE_TRACE) && (STEP>2)
     if(box64_dynarec_dump && 0) //disable for now, need more work
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index b86f1b11..c1015abd 100644
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -59,6 +59,9 @@ typedef struct neoncache_s {
     int8_t              fpu_scratch;    // scratch counter
     int8_t              fpu_reg;        // x87/sse/mmx reg counter
     uint16_t            ymm_used;       // mask of the ymm regs used in this opcode
+    uint64_t            ymm_regs;       // 4bits (0-15) position of 16 ymmXX regs removed
+    uint16_t            ymm_write;      // 1bits of ymmXX removed write
+    uint16_t            ymm_removed;    // 1bits if ymmXX was removed
 } neoncache_t;
 
 typedef struct flagcache_s {
@@ -140,6 +143,7 @@ void add_next(dynarec_arm_t *dyn, uintptr_t addr);
 uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr);
 void add_jump(dynarec_arm_t *dyn, int ninst);
 int get_first_jump(dynarec_arm_t *dyn, int next);
+int get_first_jump_addr(dynarec_arm_t *dyn, uintptr_t next);
 int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n);
 int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n);
 
diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c
index 0bfa80ee..15ecdce8 100644
--- a/src/dynarec/dynarec_native.c
+++ b/src/dynarec/dynarec_native.c
@@ -101,6 +101,11 @@ void add_jump(dynarec_native_t *dyn, int ninst) {
     dyn->jmps[dyn->jmp_sz++] = ninst;
 }
 int get_first_jump(dynarec_native_t *dyn, int next) {
+    if(next<0 || next>dyn->size)
+        return -2;
+    return get_first_jump_addr(dyn, dyn->insts[next].x64.addr);
+}
+int get_first_jump_addr(dynarec_native_t *dyn, uintptr_t next) {
     for(int i=0; i<dyn->jmp_sz; ++i)
         if(dyn->insts[dyn->jmps[i]].x64.jmp == next)
             return dyn->jmps[i];
@@ -612,6 +617,17 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
             }
             i = ii;
         }
+    // remove trailling dead code
+    while(helper.size && !helper.insts[helper.size-1].x64.alive) {
+        helper.isize-=helper.insts[helper.size-1].x64.size;
+        --helper.size;
+    }
+    if(!helper.size) {
+        // NULL block after removing dead code, how is that possible?
+        dynarec_log(LOG_INFO, "Warning, null-sized dynarec block after trimming dead code (%p)\n", (void*)addr);
+        CancelBlock64(0);
+        return CreateEmptyBlock(block, addr);
+    }
     pos = 0;
     while(pos<helper.size)
         pos = updateYmm0(&helper, pos);
diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c
index 7a885c76..2ebc89cc 100644
--- a/src/dynarec/dynarec_native_pass.c
+++ b/src/dynarec/dynarec_native_pass.c
@@ -171,8 +171,6 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
         fpu_reset_scratch(dyn);
         int next = ninst+1;
         #if STEP > 0
-        if(!dyn->insts[ninst].x64.has_next && dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1)
-            next = dyn->insts[ninst].x64.jmp_insts;
         if(dyn->insts[ninst].x64.has_next && dyn->insts[next].x64.barrier) {
             if(dyn->insts[next].x64.barrier&BARRIER_FLOAT) {
                 fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
@@ -228,7 +226,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
         if(dyn->forward) {
             if(dyn->forward_to == addr && !need_epilog && ok>=0) {
                 // we made it!
-                reset_n = get_first_jump(dyn, addr);
+                reset_n = get_first_jump_addr(dyn, addr);
                 if(box64_dynarec_dump) dynarec_log(LOG_NONE, "Forward extend block for %d bytes %s%p -> %p (ninst %d - %d)\n", dyn->forward_to-dyn->forward, dyn->insts[dyn->forward_ninst].x64.has_callret?"(opt. call) ":"", (void*)dyn->forward, (void*)dyn->forward_to, reset_n, ninst);
                 if(dyn->insts[dyn->forward_ninst].x64.has_callret && !dyn->insts[dyn->forward_ninst].x64.has_next)
                     dyn->insts[dyn->forward_ninst].x64.has_next = 1;  // this block actually continue