about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-01-05 23:55:25 +0800
committerGitHub <noreply@github.com>2024-01-05 16:55:25 +0100
commitade0aa770aabe99f5f54b228208059818ae08a89 (patch)
treee9aa48d6d60d1db62cbdf2bf74d31f0c01368ccc /src
parenta5f2f3b3f1ecdd8651d1c4687a5ab6c0c3cf6546 (diff)
downloadbox64-ade0aa770aabe99f5f54b228208059818ae08a89.tar.gz
box64-ade0aa770aabe99f5f54b228208059818ae08a89.zip
[DYNAREC_RV64] Added CALL/RET optimization (#1183)
* [DYNAREC_RV64] Added CALL/RET optimization

* More hacks on the call/ret optimization

* Small fixes, but still not working

* More fixes

* More fixes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/dynarec.c13
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c36
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c39
-rw-r--r--src/dynarec/rv64/rv64_epilog.S3
-rw-r--r--src/dynarec/rv64/rv64_prolog.S8
-rw-r--r--src/emu/x64emu_private.h6
-rw-r--r--src/libtools/signals.c41
7 files changed, 91 insertions, 55 deletions
diff --git a/src/dynarec/dynarec.c b/src/dynarec/dynarec.c
index a46997f8..6fef6935 100644
--- a/src/dynarec/dynarec.c
+++ b/src/dynarec/dynarec.c
@@ -129,16 +129,22 @@ void DynaRun(x64emu_t* emu)
     JUMPBUFF jmpbuf[1] = {0};
     int skip = 0;
     JUMPBUFF *old_jmpbuf = emu->jmpbuf;
+    #ifdef RV64
+    uintptr_t old_savesp = emu->xSPSave;
+    #endif
     emu->flags.jmpbuf_ready = 0;
 
     while(!(emu->quit)) {
         if(!emu->jmpbuf || (emu->flags.need_jmpbuf && emu->jmpbuf!=jmpbuf)) {
             emu->jmpbuf = jmpbuf;
+            #ifdef RV64
+            emu->old_savedsp = emu->xSPSave;
+            #endif
             emu->flags.jmpbuf_ready = 1;
             #ifdef ANDROID
-            if((skip=sigsetjmp(*(JUMPBUFF*)emu->jmpbuf, 1))) 
+            if((skip=sigsetjmp(*(JUMPBUFF*)emu->jmpbuf, 1)))
             #else
-            if((skip=sigsetjmp(emu->jmpbuf, 1))) 
+            if((skip=sigsetjmp(emu->jmpbuf, 1)))
             #endif
             {
                 printf_log(LOG_DEBUG, "Setjmp DynaRun, fs=0x%x\n", emu->segs[_FS]);
@@ -192,4 +198,7 @@ void DynaRun(x64emu_t* emu)
     }
     // clear the setjmp
     emu->jmpbuf = old_jmpbuf;
+    #ifdef RV64
+    emu->xSPSave = old_savesp;
+    #endif
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index 2f55ab16..6d1296bd 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -880,22 +880,25 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         TABLE64(x2, addr);
                     }
                     PUSH1z(x2);
-                    // TODO: Add support for CALLRET optim
-                    /*if(box64_dynarec_callret) {
+                    if(box64_dynarec_callret) {
+                        SET_HASCALLRET();
                         // Push actual return address
                         if(addr < (dyn->start+dyn->isize)) {
                             // there is a next...
                             j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
-                            ADR_S20(x4, j64);
+                            AUIPC(x4, ((j64 + 0x800) >> 12) & 0xfffff);
+                            ADDI(x4, x4, j64 & 0xfff);
+                            MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2);
                         } else {
+                            MESSAGE(LOG_NONE, "\tCALLRET set return to Jmptable(%p)\n", (void*)addr);
                             j64 = getJumpTableAddress64(addr);
                             TABLE64(x4, j64);
-                            LDR(x4, x4, 0);
+                            LD(x4, x4, 0);
                         }
-                        PUSH1(x4);
-                        PUSH1(x2);
-                    } else */ //CALLRET optim disable for now.
-                    {
+                        ADDI(xSP, xSP, -16);
+                        SD(x4, xSP, 0);
+                        SD(x2, xSP, 8);
+                    } else {
                         *ok = 0;
                         *need_epilog = 0;
                     }
@@ -1249,20 +1252,25 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         *ok = 0;
                     }
                     GETIP_(addr);
-                    // TODO: Add suport for CALLRET optim
-                    /*if(box64_dynarec_callret) {
+                    if(box64_dynarec_callret) {
+                        SET_HASCALLRET();
                         // Push actual return address
                         if(addr < (dyn->start+dyn->isize)) {
                             // there is a next...
                             j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0;
-                            ADR_S20(x4, j64);
+                            AUIPC(x4, ((j64 + 0x800) >> 12) & 0xfffff);
+                            ADDI(x4, x4, j64 & 0xfff);
+                            MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2);
                         } else {
+                            MESSAGE(LOG_NONE, "\tCALLRET set return to Jmptable(%p)\n", (void*)addr);
                             j64 = getJumpTableAddress64(addr);
                             TABLE64(x4, j64);
-                            LDRx_U12(x4, x4, 0);
+                            LD(x4, x4, 0);
                         }
-                        STPx_S7_preindex(x4, xRIP, xSP, -16);
-                    }*/
+                        ADDI(xSP, xSP, -16);
+                        SD(x4, xSP, 0);
+                        SD(xRIP, xSP, 8);
+                    }
                     PUSH1z(xRIP);
                     jump_to_next(dyn, 0, ed, ninst);
                     break;
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 7ef65dc8..2e887b84 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -527,16 +527,18 @@ void ret_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex)
     POP1z(xRIP);
     MVz(x1, xRIP);
     SMEND();
-    /*if(box64_dynarec_callret) {
+    if (box64_dynarec_callret) {
         // pop the actual return address from RV64 stack
-        LDPx_S7_offset(x2, x6, xSP, 0);
-        CBZx(x6, 5*4);
-        ADDx_U12(xSP, xSP, 16);
-        SUBx_REG(x6, x6, xRIP); // is it the right address?
-        CBNZx(x6, 2*4);
-        BLR(x2);
-        // not the correct return address, regular jump
-    }*/
+        LD(x2, xSP, 0);     // native addr
+        LD(x6, xSP, 8);     // x86 addr
+        ADDI(xSP, xSP, 16); // pop
+        BNE(x6, xRIP, 2*4); // is it the right address?
+        JALR(x2);
+        // not the correct return address, regular jump, but purge the stack first, it's unsync now...
+        LD(xSP, xEmu, offsetof(x64emu_t, xSPSave));
+        ADDI(xSP, xSP, -16);
+    }
+
     uintptr_t tbl = getJumpTable64();
     MOV64x(x3, tbl);
     SRLI(x2, xRIP, JMPTABL_START3);
@@ -581,16 +583,17 @@ void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex, int n)
     }
     MVz(x1, xRIP);
     SMEND();
-    /*if(box64_dynarec_callret) {
+    if (box64_dynarec_callret) {
         // pop the actual return address from RV64 stack
-        LDPx_S7_offset(x2, x6, xSP, 0);
-        CBZx(x6, 5*4);
-        ADDx_U12(xSP, xSP, 16);
-        SUBx_REG(x6, x6, xRIP); // is it the right address?
-        CBNZx(x6, 2*4);
-        BLR(x2);
-        // not the correct return address, regular jump
-    }*/
+        LD(x2, xSP, 0);     // native addr
+        LD(x6, xSP, 8);     // x86 addr
+        ADDI(xSP, xSP, 16); // pop
+        BNE(x6, xRIP, 2*4); // is it the right address?
+        JALR(x2);
+        // not the correct return address, regular jump, but purge the stack first, it's unsync now...
+        LD(xSP, xEmu, offsetof(x64emu_t, xSPSave));
+        ADDI(xSP, xSP, -16);
+    }
     uintptr_t tbl = getJumpTable64();
     MOV64x(x3, tbl);
     SRLI(x2, xRIP, JMPTABL_START3);
diff --git a/src/dynarec/rv64/rv64_epilog.S b/src/dynarec/rv64/rv64_epilog.S
index 17dc117f..820dff02 100644
--- a/src/dynarec/rv64/rv64_epilog.S
+++ b/src/dynarec/rv64/rv64_epilog.S
@@ -37,6 +37,9 @@ rv64_epilog:
     sd      x7, 136(a0)     // put back reg value in emu, including EIP (so x7 must be EIP now)
     // fallback to epilog_fast now, just restoring saved regs
 rv64_epilog_fast:
+    ld      sp, 552(a0) // restore saved sp from emu->xSPSave, see rv64_prolog
+    ld      x9, -8(sp)
+    sd      x9, 552(a0) // put back old value
     ld      ra, (sp)  // save ra
     ld      x8, 8(sp) // save fp
     ld      x18, (2*8)(sp)
diff --git a/src/dynarec/rv64/rv64_prolog.S b/src/dynarec/rv64/rv64_prolog.S
index 96a85d3b..9a780bd6 100644
--- a/src/dynarec/rv64/rv64_prolog.S
+++ b/src/dynarec/rv64/rv64_prolog.S
@@ -6,8 +6,6 @@
 .text
 .align 4
 
-.extern rv64_next
-
 .global rv64_prolog
 rv64_prolog:
     //save all 18 used register
@@ -59,6 +57,12 @@ rv64_prolog:
     srli    x5, x8, 11-5
     andi    x5, x5, 1<<5
     or      x8, x8, x5
+    ld      x5, 552(a0) // grab an old value of emu->xSPSave
+    sd      sp, 552(a0) // save current sp to emu->xSPSave
+    // push sentinel onto the stack
+    sd      x5, -16(sp)
+    sd      zero, -8(sp)
+    addi    sp, sp, -16
     // setup xMASK
     xori    x5, x0, -1
     srli    x5, x5, 32
diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h
index aa6584a7..0c994e59 100644
--- a/src/emu/x64emu_private.h
+++ b/src/emu/x64emu_private.h
@@ -67,6 +67,9 @@ typedef struct x64emu_s {
 	x87control_t cw;
     uint16_t    dummy_cw;   // align...
     mmxcontrol_t mxcsr;
+    #ifdef RV64         // it would be better to use a dedicated register for this like arm64 xSavedSP, but we're running of of free registers.
+    uintptr_t xSPSave;  // sp base value of current dynarec frame, used by call/ret optimization to reset stack when unmatch.
+    #endif
     fpu_ld_t    fpu_ld[8]; // for long double emulation / 80bits fld fst
     fpu_ll_t    fpu_ll[8]; // for 64bits fild / fist sequence
 	fpu_p_reg_t p_regs[8];
@@ -114,6 +117,9 @@ typedef struct x64emu_s {
     void*       init_stack; // initial stack (owned or not)
     uint32_t    size_stack; // stack size (owned or not)
     JUMPBUFF*   jmpbuf;
+    #ifdef RV64
+    uintptr_t   old_savedsp;
+    #endif
 
     x64_ucontext_t *uc_link; // to handle setcontext
 
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index cc9c4968..117242c7 100644
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -304,11 +304,11 @@ uint64_t RunFunctionHandler(int* exit, int dynarec, x64_ucontext_t* sigcontext,
     if(box64_dynarec_test)
         emu->test.test = 0;
     #endif
-    
+
     /*SetFS(emu, default_fs);*/
     for (int i=0; i<6; ++i)
         emu->segs_serial[i] = 0;
-        
+
     if(nargs>6)
         R_RSP -= (nargs-6)*sizeof(void*);   // need to push in reverse order
 
@@ -333,7 +333,7 @@ uint64_t RunFunctionHandler(int* exit, int dynarec, x64_ucontext_t* sigcontext,
     emu->flags.quitonlongjmp = 2;
     int old_cs = R_CS;
     R_CS = 0x33;
-    
+
     emu->eflags.x64 &= ~(1<<F_TF); // this one needs to cleared
 
     if(dynarec)
@@ -821,7 +821,7 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void
     // get that actual ESP first!
     x64emu_t *emu = thread_get_emu();
     uintptr_t frame = R_RSP;
-#if defined(DYNAREC) 
+#if defined(DYNAREC)
 #if defined(ARM64)
     dynablock_t* db = (dynablock_t*)cur_db;//FindDynablockFromNativeAddress(pc);
     ucontext_t *p = (ucontext_t *)ucntx;
@@ -1149,6 +1149,9 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void
             if(Locks & is_dyndump_locked)
                 CancelBlock64(1);
             #endif
+            #ifdef RV64
+            emu->xSPSave = emu->old_savedsp;
+            #endif
             #ifdef ANDROID
             siglongjmp(*emu->jmpbuf, 1);
             #else
@@ -1213,8 +1216,8 @@ static pthread_mutex_t mutex_dynarec_prot = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER
 #define lock_signal()     mutex_lock(&mutex_dynarec_prot)
 #define unlock_signal()   mutex_unlock(&mutex_dynarec_prot)
 #else   // USE_SIGNAL_MUTEX
-#define lock_signal()     
-#define unlock_signal()   
+#define lock_signal()
+#define unlock_signal()
 #endif
 
 extern int box64_quit;
@@ -1616,14 +1619,14 @@ exit(-1);
             uint32_t hash = 0;
             if(db)
                 hash = X31_hash_code(db->x64_addr, db->x64_size);
-            printf_log(log_minimum, "%04d|%s @%p (%s) (x64pc=%p/%s:\"%s\", rsp=%p, stack=%p:%p own=%p fp=%p), for accessing %p (code=%d/prot=%x), db=%p(%p:%p/%p:%p/%s:%s, hash:%x/%x) handler=%p", 
-                GetTID(), signame, pc, name, (void*)x64pc, elfname?elfname:"???", x64name?x64name:"???", rsp, 
-                emu->init_stack, emu->init_stack+emu->size_stack, emu->stack2free, (void*)R_RBP, 
-                addr, info->si_code, 
-                prot, db, db?db->block:0, db?(db->block+db->size):0, 
-                db?db->x64_addr:0, db?(db->x64_addr+db->x64_size):0, 
-                getAddrFunctionName((uintptr_t)(db?db->x64_addr:0)), 
-                (db?getNeedTest((uintptr_t)db->x64_addr):0)?"need_stest":"clean", db?db->hash:0, hash, 
+            printf_log(log_minimum, "%04d|%s @%p (%s) (x64pc=%p/%s:\"%s\", rsp=%p, stack=%p:%p own=%p fp=%p), for accessing %p (code=%d/prot=%x), db=%p(%p:%p/%p:%p/%s:%s, hash:%x/%x) handler=%p",
+                GetTID(), signame, pc, name, (void*)x64pc, elfname?elfname:"???", x64name?x64name:"???", rsp,
+                emu->init_stack, emu->init_stack+emu->size_stack, emu->stack2free, (void*)R_RBP,
+                addr, info->si_code,
+                prot, db, db?db->block:0, db?(db->block+db->size):0,
+                db?db->x64_addr:0, db?(db->x64_addr+db->x64_size):0,
+                getAddrFunctionName((uintptr_t)(db?db->x64_addr:0)),
+                (db?getNeedTest((uintptr_t)db->x64_addr):0)?"need_stest":"clean", db?db->hash:0, hash,
                 (void*)my_context->signals[sig]);
 #if defined(ARM64)
             if(db) {
@@ -1797,7 +1800,7 @@ EXPORT sighandler_t my_signal(x64emu_t* emu, int signum, sighandler_t handler)
         newact.sa_sigaction = my_sigactionhandler;
         sigaction(signum, &newact, &oldact);
         return oldact.sa_handler;
-    } else 
+    } else
         return signal(signum, handler);
 }
 EXPORT sighandler_t my___sysv_signal(x64emu_t* emu, int signum, sighandler_t handler) __attribute__((alias("my_signal")));
@@ -1810,7 +1813,7 @@ int EXPORT my_sigaction(x64emu_t* emu, int signum, const x64_sigaction_t *act, x
         errno = EINVAL;
         return -1;
     }
-    
+
     if(signum==SIGSEGV && emu->context->no_sigsegv)
         return 0;
 
@@ -1867,7 +1870,7 @@ int EXPORT my_syscall_rt_sigaction(x64emu_t* emu, int signum, const x64_sigactio
         errno = EINVAL;
         return -1;
     }
-    
+
     if(signum==SIGSEGV && emu->context->no_sigsegv)
         return 0;
     // TODO, how to handle sigsetsize>4?!
@@ -2099,7 +2102,7 @@ EXPORT int my_makecontext(x64emu_t* emu, void* ucp, void* fnc, int32_t argc, int
     --rsp;
     *rsp = my_context->exit_bridge;
     u->uc_mcontext.gregs[X64_RSP] = (uintptr_t)rsp;
-    
+
     return 0;
 }
 
@@ -2118,7 +2121,7 @@ static void atfork_child_dynarec_prot(void)
     #ifdef USE_CUSTOM_MUTEX
     native_lock_store(&mutex_dynarec_prot, 0);
     #else
-    pthread_mutex_t tmp = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; 
+    pthread_mutex_t tmp = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP;
     memcpy(&mutex_dynarec_prot, &tmp, sizeof(mutex_dynarec_prot));
     #endif
 }