about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-12-13 13:05:38 +0100
committerptitSeb <sebastien.chev@gmail.com>2023-12-13 13:05:38 +0100
commit88575ebc5d0df38c709f199e85030fa5435a1a4a (patch)
tree5c4e32ac6eb266f3085bb3a9c2583ce9a5031a78 /src
parentd8a19e98327e723588bfac16d6d96e8730c30199 (diff)
downloadbox64-88575ebc5d0df38c709f199e85030fa5435a1a4a.tar.gz
box64-88575ebc5d0df38c709f199e85030fa5435a1a4a.zip
[DYNAREC] Made callret default, improved callret efficiency, and dynarec speed on larger blocks
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c6
-rw-r--r--src/dynarec/arm64/dynarec_arm64_64.c4
-rw-r--r--src/dynarec/arm64/dynarec_arm64_pass0.h4
-rw-r--r--src/dynarec/arm64/dynarec_arm64_private.h5
-rw-r--r--src/dynarec/dynarec_native.c85
-rw-r--r--src/dynarec/dynarec_native_pass.c10
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c1
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass0.h4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h5
-rw-r--r--src/main.c4
10 files changed, 91 insertions, 37 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 9967f784..697ff0d9 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -2678,7 +2678,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             switch(tmp) {
                 case 3:
                     SETFLAGS(X_ALL, SF_SET);    // Hack to set flags to "dont'care" state
-                    //BARRIER_NEXT(BARRIER_FULL);
                     if(dyn->last_ip && (addr-dyn->last_ip<0x1000)) {
                         ADDx_U12(x2, xRIP, addr-dyn->last_ip);
                     } else {
@@ -2734,9 +2733,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         SETFLAGS(X_ALL, SF_SET);    // Hack to set flags to "dont'care" state
                     }
                     // regular call
-                    //BARRIER_NEXT(1);
                     if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
                         BARRIER(BARRIER_FULL);
+                        BARRIER_NEXT(BARRIER_FULL);
                     } else {
                         BARRIER(BARRIER_FLOAT);
                         *need_epilog = 0;
@@ -2752,7 +2751,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         SET_HASCALLRET();
                         // Push actual return address
                         if(addr < (dyn->start+dyn->isize)) {
-                            BARRIER_NEXT(BARRIER_FULL);
                             // there is a next...
                             j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
                             ADR_S20(x4, j64);
@@ -3149,6 +3147,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     GETEDz(0);
                     if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
                         BARRIER(BARRIER_FULL);
+                        BARRIER_NEXT(BARRIER_FULL);
                     } else {
                         BARRIER(BARRIER_FLOAT);
                         *need_epilog = 0;
@@ -3160,7 +3159,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         // Push actual return address
                         if(addr < (dyn->start+dyn->isize)) {
                             // there is a next...
-                            BARRIER_NEXT(BARRIER_FULL);
                             j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
                             ADR_S20(x4, j64);
                             MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2);
diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c
index bfb5702f..1c8343bf 100644
--- a/src/dynarec/arm64/dynarec_arm64_64.c
+++ b/src/dynarec/arm64/dynarec_arm64_64.c
@@ -1121,6 +1121,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     GETEDOz(x6, 0);
                     if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
                         BARRIER(BARRIER_FULL);
+                        BARRIER_NEXT(BARRIER_FULL);
                     } else {
                         BARRIER(BARRIER_FLOAT);
                         *need_epilog = 0;
@@ -1128,12 +1129,15 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     GETIP_(addr);
                     if(box64_dynarec_callret) {
+                        SET_HASCALLRET();
                         // Push actual return address
                         if(addr < (dyn->start+dyn->isize)) {
                             // there is a next...
                             j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
                             ADR_S20(x4, j64);
+                            MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2);
                         } else {
+                            MESSAGE(LOG_NONE, "\tCALLRET set return to Jmptable(%p)\n", (void*)addr);
                             j64 = getJumpTableAddress64(addr);
                             TABLE64(x4, j64);
                             LDRx_U12(x4, x4, 0);
diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h
index 6e2dd09e..918bb3c1 100644
--- a/src/dynarec/arm64/dynarec_arm64_pass0.h
+++ b/src/dynarec/arm64/dynarec_arm64_pass0.h
@@ -16,9 +16,9 @@
         dyn->f.pending=(B)&SF_SET_PENDING;      \
         dyn->f.dfnone=((B)&SF_SET)?1:0;
 #define EMIT(A)     
-#define JUMP(A, C)         add_next(dyn, (uintptr_t)A); SMEND(); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C
+#define JUMP(A, C)         add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); SMEND(); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C
 #define BARRIER(A)      if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x64.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1
-#define BARRIER_NEXT(A) dyn->insts[ninst+1].x64.barrier_next = A
+#define BARRIER_NEXT(A) dyn->insts[ninst].x64.barrier_next = A
 #define SET_HASCALLRET()    dyn->insts[ninst].x64.has_callret = 1
 #define NEW_INST \
         ++dyn->size;                            \
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index acbbe2e8..0e5c0e9b 100644
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -102,6 +102,9 @@ typedef struct dynarec_arm_s {
     uintptr_t*          next;       // variable array of "next" jump address
     int                 next_sz;
     int                 next_cap;
+    int*                jmps;       // variable array of jump instructions
+    int                 jmp_sz;
+    int                 jmp_cap;
     int*                predecessor;// single array of all predecessor
     dynablock_t*        dynablock;
     instsize_t*         instsize;
@@ -119,6 +122,8 @@ typedef struct dynarec_arm_s {
 
 void add_next(dynarec_arm_t *dyn, uintptr_t addr);
 uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr);
+void add_jump(dynarec_arm_t *dyn, int ninst);
+int get_first_jump(dynarec_arm_t *dyn, int next);
 int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n);
 int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n);
 
diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c
index 98486d55..a68b5c4f 100644
--- a/src/dynarec/dynarec_native.c
+++ b/src/dynarec/dynarec_native.c
@@ -94,6 +94,21 @@ uintptr_t get_closest_next(dynarec_native_t *dyn, uintptr_t addr) {
     }
     return best;
 }
+void add_jump(dynarec_native_t *dyn, int ninst) {
+    // add slots
+    if(dyn->jmp_sz == dyn->jmp_cap) {
+        dyn->jmp_cap += 64;
+        dyn->jmps = (int*)dynaRealloc(dyn->jmps, dyn->jmp_cap*sizeof(int));
+    }
+    dyn->jmps[dyn->jmp_sz++] = ninst;
+}
+int get_first_jump(dynarec_native_t *dyn, int next) {
+    for(int i=0; i<dyn->jmp_sz; ++i)
+        if(dyn->insts[dyn->jmps[i]].x64.jmp == next)
+            return i;
+    return -2;
+}
+
 #define PK(A) (*((uint8_t*)(addr+(A))))
 int is_nops(dynarec_native_t *dyn, uintptr_t addr, int n)
 {
@@ -392,6 +407,7 @@ void CancelBlock64(int need_lock)
     current_helper = NULL;
     if(helper) {
         dynaFree(helper->next);
+        dynaFree(helper->jmps);
         dynaFree(helper->insts);
         dynaFree(helper->predecessor);
         if(helper->table64 && (helper->table64!=(uint64_t*)helper->tablestart))
@@ -471,10 +487,6 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
     helper.insts = (instruction_native_t*)dynaCalloc(helper.cap, sizeof(instruction_native_t));
     // pass 0, addresses, x64 jump addresses, overall size of the block
     uintptr_t end = native_pass0(&helper, addr, alternate, is32bits);
-    // no need for next anymore
-    dynaFree(helper.next);
-    helper.next_sz = helper.next_cap = 0;
-    helper.next = NULL;
     // basic checks
     if(!helper.size) {
         dynarec_log(LOG_INFO, "Warning, null-sized dynarec block (%p)\n", (void*)addr);
@@ -493,26 +505,57 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
     // compute hash signature
     uint32_t hash = X31_hash_code((void*)addr, end-addr);
     // calculate barriers
-    for(int i=0; i<helper.size; ++i)
-        if(helper.insts[i].x64.jmp) {
-            uintptr_t j = helper.insts[i].x64.jmp;
-            if(j<start || j>=end || j==helper.insts[i].x64.addr) {
-                if(j==helper.insts[i].x64.addr) // if there is a loop on some opcode, make the block "always to tested"
-                    helper.always_test = 1;
-                helper.insts[i].x64.jmp_insts = -1;
-                helper.insts[i].x64.need_after |= X_PEND;
-            } else {
-                // find jump address instruction
-                int k=-1;
-                for(int i2=0; i2<helper.size && k==-1; ++i2) {
-                    if(helper.insts[i2].x64.addr==j)
-                        k=i2;
+    for(int ii=0; ii<helper.jmp_sz; ++ii) {
+        int i = helper.jmps[ii];
+        uintptr_t j = helper.insts[i].x64.jmp;
+        if(j<start || j>=end || j==helper.insts[i].x64.addr) {
+            if(j==helper.insts[i].x64.addr) // if there is a loop on some opcode, make the block "always to tested"
+                helper.always_test = 1;
+            helper.insts[i].x64.jmp_insts = -1;
+            helper.insts[i].x64.need_after |= X_PEND;
+        } else {
+            // find jump address instruction
+            int k=-1;
+            int search = ((j>=helper.insts[0].x64.addr) && j<helper.insts[0].x64.addr+helper.isize)?1:0;
+            int imin = 0;
+            int imax = helper.size;
+            int i2 = helper.size/2;
+            // dichotomy search
+            while(search) {
+                if(helper.insts[i2].x64.addr == j) {
+                    k = i2;
+                    search = 0;
+                } else if(helper.insts[i2].x64.addr>j) {
+                    imax = i2;
+                    i2 = (imax+imin)/2;
+                } else {
+                    imin = i2;
+                    i2 = (imax+imin)/2;
+                }
+                if(search && (imax-imin)<2) {
+                    search = 0;
+                    if(helper.insts[imin].x64.addr==j)
+                        k = imin;
+                    else if(helper.insts[imax].x64.addr==j)
+                        k = imax;
                 }
-                if(k!=-1 && !helper.insts[i].barrier_maybe)
-                    helper.insts[k].x64.barrier |= BARRIER_FULL;
-                helper.insts[i].x64.jmp_insts = k;
             }
+            /*for(int i2=0; i2<helper.size && k==-1; ++i2) {
+                if(helper.insts[i2].x64.addr==j)
+                    k=i2;
+            }*/
+            if(k!=-1 && !helper.insts[i].barrier_maybe)
+                helper.insts[k].x64.barrier |= BARRIER_FULL;
+            helper.insts[i].x64.jmp_insts = k;
         }
+    }
+    // no need for next and jmps anymore
+    dynaFree(helper.next);
+    helper.next_sz = helper.next_cap = 0;
+    helper.next = NULL;
+    dynaFree(helper.jmps);
+    helper.jmp_sz = helper.jmp_cap = 0;
+    helper.jmps = NULL;
     // fill predecessors with the jump address
     fillPredecessors(&helper);
 
diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c
index e67a9848..5686e322 100644
--- a/src/dynarec/dynarec_native_pass.c
+++ b/src/dynarec/dynarec_native_pass.c
@@ -77,9 +77,11 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
             dyn->last_ip = 0;   // reset IP if some jump are coming here
         fpu_propagate_stack(dyn, ninst);
         NEW_INST;
+        #if STEP == 0
         if(ninst && dyn->insts[ninst-1].x64.barrier_next) {
             BARRIER(dyn->insts[ninst-1].x64.barrier_next);
         }
+        #endif
         if(!ninst) {
             GOTEST(x1, x2);
         }
@@ -214,12 +216,8 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
                         dyn->insts[ninst].x64.has_next = 1;  // this block actually continue
                     } else {
                         // need to find back that instruction to copy the caches, as previous version cannot be used anymore
-                        reset_n = -2;
-                        for(int ii=0; ii<ninst; ++ii)
-                            if(dyn->insts[ii].x64.jmp == next) {
-                                reset_n = ii;
-                                ii=ninst;
-                            }
+                        // and pred table is not ready yet
+                        reset_n = get_first_jump(dyn, next);
                     }
                     if(box64_dynarec_dump) dynarec_log(LOG_NONE, "Extend block %p, %s%p -> %p (ninst=%d, jump from %d)\n", dyn, dyn->insts[ninst].x64.has_callret?"(opt. call) ":"", (void*)addr, (void*)next, ninst, dyn->insts[ninst].x64.has_callret?ninst:reset_n);
                 } else if(next && (next-addr)<box64_dynarec_forward && (getProtection(next)&PROT_READ)/*box64_dynarec_bigblock>=stopblock*/) {
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index 9cdedfe7..ec59707d 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -767,7 +767,6 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         SETFLAGS(X_ALL, SF_SET);    // Hack to set flags to "dont'care" state
                     }
                     // regular call
-                    //BARRIER_NEXT(1);
                     if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
                         BARRIER(BARRIER_FULL);
                     } else {
diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h
index 8854a7f7..89c4214e 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass0.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass0.h
@@ -16,9 +16,9 @@
         dyn->f.pending=(B)&SF_SET_PENDING;      \
         dyn->f.dfnone=((B)&SF_SET)?1:0;
 #define EMIT(A)     
-#define JUMP(A, C)         add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C
+#define JUMP(A, C)         add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C
 #define BARRIER(A)      if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x64.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1
-#define BARRIER_NEXT(A) dyn->insts[ninst+1].x64.barrier_next = A
+#define BARRIER_NEXT(A) dyn->insts[ninst].x64.barrier_next = A
 #define SET_HASCALLRET()    dyn->insts[ninst].x64.has_callret = 1
 #define NEW_INST \
         ++dyn->size;                            \
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index b7058c9a..907de0a3 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -112,6 +112,9 @@ typedef struct dynarec_rv64_s {
     uintptr_t*          next;       // variable array of "next" jump address
     int                 next_sz;
     int                 next_cap;
+    int*                jmps;       // variable array of jump instructions
+    int                 jmp_sz;
+    int                 jmp_cap;
     int*                predecessor;// single array of all predecessor
     dynablock_t*        dynablock;
     instsize_t*         instsize;
@@ -132,6 +135,8 @@ typedef struct dynarec_rv64_s {
 
 void add_next(dynarec_rv64_t *dyn, uintptr_t addr);
 uintptr_t get_closest_next(dynarec_rv64_t *dyn, uintptr_t addr);
+void add_jump(dynarec_rv64_t *dyn, int ninst);
+int get_first_jump(dynarec_rv64_t *dyn, int next);
 int is_nops(dynarec_rv64_t *dyn, uintptr_t addr, int n);
 int is_instructions(dynarec_rv64_t *dyn, uintptr_t addr, int n);
 
diff --git a/src/main.c b/src/main.c
index 383df52e..26185aeb 100644
--- a/src/main.c
+++ b/src/main.c
@@ -60,7 +60,7 @@ int box64_dynarec_x87double = 0;
 int box64_dynarec_fastnan = 1;
 int box64_dynarec_fastround = 1;
 int box64_dynarec_safeflags = 1;
-int box64_dynarec_callret = 0;
+int box64_dynarec_callret = 1;
 int box64_dynarec_hotpage = 0;
 int box64_dynarec_fastpage = 0;
 int box64_dynarec_bleeding_edge = 1;
@@ -656,6 +656,8 @@ void LoadLogEnv()
         }
         if(box64_dynarec_callret)
             printf_log(LOG_INFO, "Dynarec will optimize CALL/RET\n");
+        else
+            printf_log(LOG_INFO, "Dynarec will not optimize CALL/RET\n");
     }
     p = getenv("BOX64_DYNAREC_BLEEDING_EDGE");
     if(p) {