about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-11-29 18:48:59 +0100
committerptitSeb <sebastien.chev@gmail.com>2022-11-29 18:48:59 +0100
commitc08268d0ae1977ebaaf8ecde7216461525b08238 (patch)
tree43b9edebd7594b120880bf765453fe51d8104d33 /src
parent0e4d031054b7980edc806ae131ea8ce565632dbb (diff)
downloadbox64-c08268d0ae1977ebaaf8ecde7216461525b08238.tar.gz
box64-c08268d0ae1977ebaaf8ecde7216461525b08238.zip
[DYNAREC] Added BOX64_DYNAREC_CALLRET option, in rcfile too (faster handling of CALL/RET opcode, not compatible with JIT/Dynarec)
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64/arm64_emitter.h1
-rwxr-xr-xsrc/dynarec/arm64/arm64_epilog.S6
-rwxr-xr-xsrc/dynarec/arm64/arm64_next.S1
-rwxr-xr-xsrc/dynarec/arm64/arm64_prolog.S9
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_00.c44
-rw-r--r--src/dynarec/arm64/dynarec_arm64_64.c20
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_helper.c28
-rwxr-xr-xsrc/include/debug.h1
-rwxr-xr-xsrc/main.c10
-rw-r--r--src/tools/rcfile.c2
10 files changed, 107 insertions, 15 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index db4ef096..a119f14c 100755
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -24,6 +24,7 @@
 #define xR15    25
 #define xFlags  26
 #define xRIP    27
+#define xSavedSP 28
 // 32bits version
 #define wEAX    xRAX
 #define wECX    xRCX
diff --git a/src/dynarec/arm64/arm64_epilog.S b/src/dynarec/arm64/arm64_epilog.S
index f2704efd..2e186265 100755
--- a/src/dynarec/arm64/arm64_epilog.S
+++ b/src/dynarec/arm64/arm64_epilog.S
@@ -19,12 +19,13 @@ arm64_epilog:
     stp     x24, x25, [x0, (8 * 14)]
     stp     x26, x27, [x0, (8 * 16)] // put back reg value in emu, including EIP (so x27 must be EIP now)
     //restore all used register
+    add     sp, x28, 0
     //vpop     {d8-d15}
     ldp     x19, x20, [sp, (8 * 0)]
     ldp     x21, x22, [sp, (8 * 2)]
     ldp     x23, x24, [sp, (8 * 4)]
     ldp     x25, x26, [sp, (8 * 6)]
-    ldr     x27, [sp, (8 * 8)]
+    ldp     x27, x28, [sp, (8 * 8)]
     ldp     d8, d9,   [sp, (8 *10)]
     ldp     d10, d11, [sp, (8 *12)]
     ldp     d12, d13, [sp, (8 *14)]
@@ -38,12 +39,13 @@ arm64_epilog:
 .global arm64_epilog_fast
 arm64_epilog_fast:
     //restore all used register
+    add     sp, x28, 0
     //vpop     {d8-d15}
     ldp     x19, x20, [sp, (8 * 0)]
     ldp     x21, x22, [sp, (8 * 2)]
     ldp     x23, x24, [sp, (8 * 4)]
     ldp     x25, x26, [sp, (8 * 6)]
-    ldr     x27, [sp, (8 * 8)]
+    ldp     x27, x28, [sp, (8 * 8)]
     ldp     d8, d9,   [sp, (8 *10)]
     ldp     d10, d11, [sp, (8 *12)]
     ldp     d12, d13, [sp, (8 *14)]
diff --git a/src/dynarec/arm64/arm64_next.S b/src/dynarec/arm64/arm64_next.S
index 837dfb12..f96a50d7 100755
--- a/src/dynarec/arm64/arm64_next.S
+++ b/src/dynarec/arm64/arm64_next.S
@@ -9,6 +9,7 @@
 .extern LinkNext
 
 .global arm64_next
+
 arm64_next:
     // emu is r0
     // IP address is r1
diff --git a/src/dynarec/arm64/arm64_prolog.S b/src/dynarec/arm64/arm64_prolog.S
index 12998b47..5635fde2 100755
--- a/src/dynarec/arm64/arm64_prolog.S
+++ b/src/dynarec/arm64/arm64_prolog.S
@@ -6,6 +6,8 @@
 .text
 .align 4
 
+.extern arm64_next
+
 .global arm64_prolog
 arm64_prolog:
     //save all 18 used register
@@ -15,7 +17,7 @@ arm64_prolog:
     stp     x21, x22, [sp, (8 * 2)]
     stp     x23, x24, [sp, (8 * 4)]
     stp     x25, x26, [sp, (8 * 6)]
-    str     x27, [sp, (8 * 8)]
+    stp     x27, X28, [sp, (8 * 8)]
     stp     d8, d9,   [sp, (8 *10)]
     stp     d10, d11, [sp, (8 *12)]
     stp     d12, d13, [sp, (8 *14)]
@@ -31,5 +33,10 @@ arm64_prolog:
     ldp     x22, x23, [x0, (8 * 12)]
     ldp     x24, x25, [x0, (8 * 14)]
     ldp     x26, x27, [x0, (8 * 16)]
+    // Push a jump next on the stack
+    adr     x28, arm64_next
+    stp     x28, xzr, [sp, -16]!
+    // Save old xSP in x28
+    add     x28, sp, 16
     //jump to function
     br       x1
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 8a3c1a2e..d5928ef5 100755
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -18,6 +18,7 @@
 #include "emu/x64run_private.h"
 #include "x64trace.h"
 #include "dynarec_native.h"
+#include "custommem.h"
 
 #include "arm64_printer.h"
 #include "dynarec_arm64_private.h"
@@ -2154,12 +2155,29 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         SETFLAGS(X_ALL, SF_SET);    // Hack to set flags to "dont'care" state
                     }
                     // regular call
-                    BARRIER(BARRIER_FULL);
                     //BARRIER_NEXT(1);
-                    *need_epilog = 0;
-                    *ok = 0;
+                    if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
+                        BARRIER(BARRIER_FULL);
+                    } else {
+                        BARRIER(BARRIER_FLOAT);
+                        *need_epilog = 0;
+                        *ok = 0;
+                    }
                     TABLE64(x2, addr);
                     PUSH1(x2);
+                    if(box64_dynarec_callret) {
+                        // Push actual return address
+                        if(addr < (dyn->start+dyn->isize)) {
+                            // there is a next...
+                            j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
+                            ADR_S20(x4, j64);
+                        } else {
+                            j64 = getJumpTableAddress64(addr);
+                            TABLE64(x4, j64);
+                            LDRx_U12(x4, x4, 0);
+                        }
+                        STPx_S7_preindex(x4, x2, xSP, -16);
+                    }
                     if(addr+i32==0) {   // self modifying code maybe? so use indirect address fetching
                         TABLE64(x4, addr-4);
                         LDRx_U12(x4, x4, 0);
@@ -2487,13 +2505,27 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         SETFLAGS(X_ALL, SF_SET);    //Hack to put flag in "don't care" state
                     }
                     GETEDx(0);
-                    BARRIER(BARRIER_FLOAT);
-                    //BARRIER_NEXT(BARRIER_FULL);
-                    if(!dyn->insts || ninst==dyn->size-1) {
+                    if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
+                        BARRIER(BARRIER_FULL);
+                    } else {
+                        BARRIER(BARRIER_FLOAT);
                         *need_epilog = 0;
                         *ok = 0;
                     }
                     GETIP_(addr);
+                    if(box64_dynarec_callret) {
+                        // Push actual return address
+                        if(addr < (dyn->start+dyn->isize)) {
+                            // there is a next...
+                            j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
+                            ADR_S20(x4, j64);
+                        } else {
+                            j64 = getJumpTableAddress64(addr);
+                            TABLE64(x4, j64);
+                            LDRx_U12(x4, x4, 0);
+                        }
+                        STPx_S7_preindex(x4, xRIP, xSP, -16);
+                    }
                     PUSH1(xRIP);
                     jump_to_next(dyn, 0, ed, ninst);
                     break;
diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c
index 6d4ac7d6..23507d1f 100644
--- a/src/dynarec/arm64/dynarec_arm64_64.c
+++ b/src/dynarec/arm64/dynarec_arm64_64.c
@@ -16,6 +16,7 @@
 #include "emu/x64run_private.h"
 #include "x64trace.h"
 #include "dynarec_native.h"
+#include "custommem.h"
 
 #include "arm64_printer.h"
 #include "dynarec_arm64_private.h"
@@ -970,12 +971,27 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         SETFLAGS(X_ALL, SF_SET);    //Hack to put flag in "don't care" state
                     }
                     GETEDOx(x6, 0);
-                    BARRIER(BARRIER_FLOAT);
-                    if(!dyn->insts || ninst==dyn->size-1) {
+                    if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
+                        BARRIER(BARRIER_FULL);
+                    } else {
+                        BARRIER(BARRIER_FLOAT);
                         *need_epilog = 0;
                         *ok = 0;
                     }
                     GETIP_(addr);
+                    if(box64_dynarec_callret) {
+                        // Push actual return address
+                        if(addr < (dyn->start+dyn->isize)) {
+                            // there is a next...
+                            j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
+                            ADR_S20(x4, j64);
+                        } else {
+                            j64 = getJumpTableAddress64(addr);
+                            TABLE64(x4, j64);
+                            LDRx_U12(x4, x4, 0);
+                        }
+                        STPx_S7_preindex(x4, xRIP, xSP, -16);
+                    }
                     PUSH1(xRIP);
                     jump_to_next(dyn, 0, ed, ninst);
                     break;
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index d3d37a0c..5fecf8a2 100755
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -436,6 +436,18 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst)
     MAYUSE(dyn); MAYUSE(ninst);
     MESSAGE(LOG_DUMP, "Ret to epilog\n");
     POP1(xRIP);
+    MOVx_REG(x1, xRIP);
+    SMEND();
+    if(box64_dynarec_callret) {
+        // pop the actual return address for ARM stack
+        LDPx_S7_offset(x2, x6, xSP, 0);
+        CBZx(x6, 5*4);
+        ADDx_U12(xSP, xSP, 16);
+        SUBx_REG(x6, x6, xRIP); // is it the right address?
+        CBNZx(x6, 2*4);
+        BLR(x2);
+        // not the correct return address, regular jump
+    }
     uintptr_t tbl = getJumpTable64();
     MOV64x(x2, tbl);
     UBFXx(x3, xRIP, 48, JMPTABL_SHIFT);
@@ -446,8 +458,6 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst)
     LDRx_REG_LSL3(x2, x2, x3);
     UBFXx(x3, xRIP, 0, JMPTABL_SHIFT);
     LDRx_REG_LSL3(x2, x2, x3);
-    MOVx_REG(x1, xRIP);
-    SMEND();
     BLR(x2); // save LR
     CLEARIP();
 }
@@ -463,6 +473,18 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n)
     } else {
         ADDx_U12(xRSP, xRSP, n);
     }
+    MOVx_REG(x1, xRIP);
+    SMEND();
+    if(box64_dynarec_callret) {
+        // pop the actual return address for ARM stack
+        LDPx_S7_offset(x2, x6, xSP, 0);
+        CBZx(x6, 5*4);
+        ADDx_U12(xSP, xSP, 16);
+        SUBx_REG(x6, x6, xRIP); // is it the right address?
+        CBNZx(x6, 2*4);
+        BLR(x2);
+        // not the correct return address, regular jump
+    }
     uintptr_t tbl = getJumpTable64();
     MOV64x(x2, tbl);
     UBFXx(x3, xRIP, 48, JMPTABL_SHIFT);
@@ -473,8 +495,6 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n)
     LDRx_REG_LSL3(x2, x2, x3);
     UBFXx(x3, xRIP, 0, JMPTABL_SHIFT);
     LDRx_REG_LSL3(x2, x2, x3);
-    MOVx_REG(x1, xRIP);
-    SMEND();
     BLR(x2); // save LR
     CLEARIP();
 }
diff --git a/src/include/debug.h b/src/include/debug.h
index 5a6aec91..3b45911d 100755
--- a/src/include/debug.h
+++ b/src/include/debug.h
@@ -19,6 +19,7 @@ extern int box64_dynarec_strongmem;
 extern int box64_dynarec_fastnan;
 extern int box64_dynarec_x87double;
 extern int box64_dynarec_safeflags;
+extern int box64_dynarec_callret;
 #ifdef ARM64
 extern int arm64_asimd;
 extern int arm64_aes;
diff --git a/src/main.c b/src/main.c
index 6cc2e6d4..da01c4e5 100755
--- a/src/main.c
+++ b/src/main.c
@@ -51,6 +51,7 @@ int box64_dynarec_strongmem = 0;
 int box64_dynarec_x87double = 0;
 int box64_dynarec_fastnan = 1;
 int box64_dynarec_safeflags = 1;
+int box64_dynarec_callret = 0;
 uintptr_t box64_nodynarec_start = 0;
 uintptr_t box64_nodynarec_end = 0;
 #ifdef ARM64
@@ -480,6 +481,15 @@ void LoadLogEnv()
         else
             printf_log(LOG_INFO, "Dynarec will play %s safe with x64 flags\n", (box64_dynarec_safeflags==1)?"moderatly":"it");
     }
+    p = getenv("BOX64_DYNAREC_CALLRET");
+    if(p) {
+        if(strlen(p)==1) {
+            if(p[0]>='0' && p[0]<='1')
+                box64_dynarec_callret = p[0]-'0';
+        }
+        if(!box64_dynarec_callret)
+            printf_log(LOG_INFO, "Dynarec will optimize CALL/RET\n");
+    }
     p = getenv("BOX64_NODYNAREC");
     if(p) {
         if (strchr(p,'-')) {
diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c
index f3406301..93ef8081 100644
--- a/src/tools/rcfile.c
+++ b/src/tools/rcfile.c
@@ -75,6 +75,7 @@ ENTRYINT(BOX64_DYNAREC_STRONGMEM, box64_dynarec_strongmem, 0, 2, 2) \
 ENTRYBOOL(BOX64_DYNAREC_X87DOUBLE, box64_dynarec_x87double)         \
 ENTRYBOOL(BOX64_DYNAREC_FASTNAN, box64_dynarec_fastnan)             \
 ENTRYINT(BOX64_DYNAREC_SAFEFLAGS, box64_dynarec_safeflags, 0, 2, 2) \
+ENTRYBOOL(BOX64_DYNAREC_CALLRET, box64_dynarec_callret)             \
 ENTRYSTRING_(BOX64_NODYNAREC, box64_nodynarec)                      \
 
 #else
@@ -87,6 +88,7 @@ IGNORE(BOX64_DYNAREC_STRONGMEM)                                     \
 IGNORE(BOX64_DYNAREC_X87DOUBLE)                                     \
 IGNORE(BOX64_DYNAREC_FASTNAN)                                       \
 IGNORE(BOX64_DYNAREC_SAFEFLAGS)                                     \
+IGNORE(BOX64_DYNAREC_CALLRET)                                       \
 IGNORE(BOX64_NODYNAREC)                                             \
 
 #endif