diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-01-05 23:55:25 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-01-05 16:55:25 +0100 |
| commit | ade0aa770aabe99f5f54b228208059818ae08a89 (patch) | |
| tree | e9aa48d6d60d1db62cbdf2bf74d31f0c01368ccc /src | |
| parent | a5f2f3b3f1ecdd8651d1c4687a5ab6c0c3cf6546 (diff) | |
| download | box64-ade0aa770aabe99f5f54b228208059818ae08a89.tar.gz box64-ade0aa770aabe99f5f54b228208059818ae08a89.zip | |
[DYNAREC_RV64] Added CALL/RET optimization (#1183)
* [DYNAREC_RV64] Added CALL/RET optimization * More hacks on the call/ret optimization * Small fixes, but still not working * More fixes * More fixes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/dynarec.c | 13 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_3.c | 36 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 39 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_epilog.S | 3 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_prolog.S | 8 | ||||
| -rw-r--r-- | src/emu/x64emu_private.h | 6 | ||||
| -rw-r--r-- | src/libtools/signals.c | 41 |
7 files changed, 91 insertions, 55 deletions
diff --git a/src/dynarec/dynarec.c b/src/dynarec/dynarec.c index a46997f8..6fef6935 100644 --- a/src/dynarec/dynarec.c +++ b/src/dynarec/dynarec.c @@ -129,16 +129,22 @@ void DynaRun(x64emu_t* emu) JUMPBUFF jmpbuf[1] = {0}; int skip = 0; JUMPBUFF *old_jmpbuf = emu->jmpbuf; + #ifdef RV64 + uintptr_t old_savesp = emu->xSPSave; + #endif emu->flags.jmpbuf_ready = 0; while(!(emu->quit)) { if(!emu->jmpbuf || (emu->flags.need_jmpbuf && emu->jmpbuf!=jmpbuf)) { emu->jmpbuf = jmpbuf; + #ifdef RV64 + emu->old_savedsp = emu->xSPSave; + #endif emu->flags.jmpbuf_ready = 1; #ifdef ANDROID - if((skip=sigsetjmp(*(JUMPBUFF*)emu->jmpbuf, 1))) + if((skip=sigsetjmp(*(JUMPBUFF*)emu->jmpbuf, 1))) #else - if((skip=sigsetjmp(emu->jmpbuf, 1))) + if((skip=sigsetjmp(emu->jmpbuf, 1))) #endif { printf_log(LOG_DEBUG, "Setjmp DynaRun, fs=0x%x\n", emu->segs[_FS]); @@ -192,4 +198,7 @@ void DynaRun(x64emu_t* emu) } // clear the setjmp emu->jmpbuf = old_jmpbuf; + #ifdef RV64 + emu->xSPSave = old_savesp; + #endif } diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index 2f55ab16..6d1296bd 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -880,22 +880,25 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int TABLE64(x2, addr); } PUSH1z(x2); - // TODO: Add support for CALLRET optim - /*if(box64_dynarec_callret) { + if(box64_dynarec_callret) { + SET_HASCALLRET(); // Push actual return address if(addr < (dyn->start+dyn->isize)) { // there is a next... j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; - ADR_S20(x4, j64); + AUIPC(x4, ((j64 + 0x800) >> 12) & 0xfffff); + ADDI(x4, x4, j64 & 0xfff); + MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); } else { + MESSAGE(LOG_NONE, "\tCALLRET set return to Jmptable(%p)\n", (void*)addr); j64 = getJumpTableAddress64(addr); TABLE64(x4, j64); - LDR(x4, x4, 0); + LD(x4, x4, 0); } - PUSH1(x4); - PUSH1(x2); - } else */ //CALLRET optim disable for now. - { + ADDI(xSP, xSP, -16); + SD(x4, xSP, 0); + SD(x2, xSP, 8); + } else { *ok = 0; *need_epilog = 0; } @@ -1249,20 +1252,25 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int *ok = 0; } GETIP_(addr); - // TODO: Add suport for CALLRET optim - /*if(box64_dynarec_callret) { + if(box64_dynarec_callret) { + SET_HASCALLRET(); // Push actual return address if(addr < (dyn->start+dyn->isize)) { // there is a next... j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; - ADR_S20(x4, j64); + AUIPC(x4, ((j64 + 0x800) >> 12) & 0xfffff); + ADDI(x4, x4, j64 & 0xfff); + MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); } else { + MESSAGE(LOG_NONE, "\tCALLRET set return to Jmptable(%p)\n", (void*)addr); j64 = getJumpTableAddress64(addr); TABLE64(x4, j64); - LDRx_U12(x4, x4, 0); + LD(x4, x4, 0); } - STPx_S7_preindex(x4, xRIP, xSP, -16); - }*/ + ADDI(xSP, xSP, -16); + SD(x4, xSP, 0); + SD(xRIP, xSP, 8); + } PUSH1z(xRIP); jump_to_next(dyn, 0, ed, ninst); break; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 7ef65dc8..2e887b84 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -527,16 +527,18 @@ void ret_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex) POP1z(xRIP); MVz(x1, xRIP); SMEND(); - /*if(box64_dynarec_callret) { + if (box64_dynarec_callret) { // pop the actual return address from RV64 stack - LDPx_S7_offset(x2, x6, xSP, 0); - CBZx(x6, 5*4); - ADDx_U12(xSP, xSP, 16); - SUBx_REG(x6, x6, xRIP); // is it the right address? - CBNZx(x6, 2*4); - BLR(x2); - // not the correct return address, regular jump - }*/ + LD(x2, xSP, 0); // native addr + LD(x6, xSP, 8); // x86 addr + ADDI(xSP, xSP, 16); // pop + BNE(x6, xRIP, 2*4); // is it the right address? + JALR(x2); + // not the correct return address, regular jump, but purge the stack first, it's unsync now... + LD(xSP, xEmu, offsetof(x64emu_t, xSPSave)); + ADDI(xSP, xSP, -16); + } + uintptr_t tbl = getJumpTable64(); MOV64x(x3, tbl); SRLI(x2, xRIP, JMPTABL_START3); @@ -581,16 +583,17 @@ void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex, int n) } MVz(x1, xRIP); SMEND(); - /*if(box64_dynarec_callret) { + if (box64_dynarec_callret) { // pop the actual return address from RV64 stack - LDPx_S7_offset(x2, x6, xSP, 0); - CBZx(x6, 5*4); - ADDx_U12(xSP, xSP, 16); - SUBx_REG(x6, x6, xRIP); // is it the right address? - CBNZx(x6, 2*4); - BLR(x2); - // not the correct return address, regular jump - }*/ + LD(x2, xSP, 0); // native addr + LD(x6, xSP, 8); // x86 addr + ADDI(xSP, xSP, 16); // pop + BNE(x6, xRIP, 2*4); // is it the right address? + JALR(x2); + // not the correct return address, regular jump, but purge the stack first, it's unsync now... + LD(xSP, xEmu, offsetof(x64emu_t, xSPSave)); + ADDI(xSP, xSP, -16); + } uintptr_t tbl = getJumpTable64(); MOV64x(x3, tbl); SRLI(x2, xRIP, JMPTABL_START3); diff --git a/src/dynarec/rv64/rv64_epilog.S b/src/dynarec/rv64/rv64_epilog.S index 17dc117f..820dff02 100644 --- a/src/dynarec/rv64/rv64_epilog.S +++ b/src/dynarec/rv64/rv64_epilog.S @@ -37,6 +37,9 @@ rv64_epilog: sd x7, 136(a0) // put back reg value in emu, including EIP (so x7 must be EIP now) // fallback to epilog_fast now, just restoring saved regs rv64_epilog_fast: + ld sp, 552(a0) // restore saved sp from emu->xSPSave, see rv64_prolog + ld x9, -8(sp) + sd x9, 552(a0) // put back old value ld ra, (sp) // save ra ld x8, 8(sp) // save fp ld x18, (2*8)(sp) diff --git a/src/dynarec/rv64/rv64_prolog.S b/src/dynarec/rv64/rv64_prolog.S index 96a85d3b..9a780bd6 100644 --- a/src/dynarec/rv64/rv64_prolog.S +++ b/src/dynarec/rv64/rv64_prolog.S @@ -6,8 +6,6 @@ .text .align 4 -.extern rv64_next - .global rv64_prolog rv64_prolog: //save all 18 used register @@ -59,6 +57,12 @@ rv64_prolog: srli x5, x8, 11-5 andi x5, x5, 1<<5 or x8, x8, x5 + ld x5, 552(a0) // grab an old value of emu->xSPSave + sd sp, 552(a0) // save current sp to emu->xSPSave + // push sentinel onto the stack + sd x5, -16(sp) + sd zero, -8(sp) + addi sp, sp, -16 // setup xMASK xori x5, x0, -1 srli x5, x5, 32 diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h index aa6584a7..0c994e59 100644 --- a/src/emu/x64emu_private.h +++ b/src/emu/x64emu_private.h @@ -67,6 +67,9 @@ typedef struct x64emu_s { x87control_t cw; uint16_t dummy_cw; // align... mmxcontrol_t mxcsr; + #ifdef RV64 // it would be better to use a dedicated register for this like arm64 xSavedSP, but we're running of of free registers. + uintptr_t xSPSave; // sp base value of current dynarec frame, used by call/ret optimization to reset stack when unmatch. + #endif fpu_ld_t fpu_ld[8]; // for long double emulation / 80bits fld fst fpu_ll_t fpu_ll[8]; // for 64bits fild / fist sequence fpu_p_reg_t p_regs[8]; @@ -114,6 +117,9 @@ typedef struct x64emu_s { void* init_stack; // initial stack (owned or not) uint32_t size_stack; // stack size (owned or not) JUMPBUFF* jmpbuf; + #ifdef RV64 + uintptr_t old_savedsp; + #endif x64_ucontext_t *uc_link; // to handle setcontext diff --git a/src/libtools/signals.c b/src/libtools/signals.c index cc9c4968..117242c7 100644 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -304,11 +304,11 @@ uint64_t RunFunctionHandler(int* exit, int dynarec, x64_ucontext_t* sigcontext, if(box64_dynarec_test) emu->test.test = 0; #endif - + /*SetFS(emu, default_fs);*/ for (int i=0; i<6; ++i) emu->segs_serial[i] = 0; - + if(nargs>6) R_RSP -= (nargs-6)*sizeof(void*); // need to push in reverse order @@ -333,7 +333,7 @@ uint64_t RunFunctionHandler(int* exit, int dynarec, x64_ucontext_t* sigcontext, emu->flags.quitonlongjmp = 2; int old_cs = R_CS; R_CS = 0x33; - + emu->eflags.x64 &= ~(1<<F_TF); // this one needs to cleared if(dynarec) @@ -821,7 +821,7 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void // get that actual ESP first! x64emu_t *emu = thread_get_emu(); uintptr_t frame = R_RSP; -#if defined(DYNAREC) +#if defined(DYNAREC) #if defined(ARM64) dynablock_t* db = (dynablock_t*)cur_db;//FindDynablockFromNativeAddress(pc); ucontext_t *p = (ucontext_t *)ucntx; @@ -1149,6 +1149,9 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void if(Locks & is_dyndump_locked) CancelBlock64(1); #endif + #ifdef RV64 + emu->xSPSave = emu->old_savedsp; + #endif #ifdef ANDROID siglongjmp(*emu->jmpbuf, 1); #else @@ -1213,8 +1216,8 @@ static pthread_mutex_t mutex_dynarec_prot = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER #define lock_signal() mutex_lock(&mutex_dynarec_prot) #define unlock_signal() mutex_unlock(&mutex_dynarec_prot) #else // USE_SIGNAL_MUTEX -#define lock_signal() -#define unlock_signal() +#define lock_signal() +#define unlock_signal() #endif extern int box64_quit; @@ -1616,14 +1619,14 @@ exit(-1); uint32_t hash = 0; if(db) hash = X31_hash_code(db->x64_addr, db->x64_size); - printf_log(log_minimum, "%04d|%s @%p (%s) (x64pc=%p/%s:\"%s\", rsp=%p, stack=%p:%p own=%p fp=%p), for accessing %p (code=%d/prot=%x), db=%p(%p:%p/%p:%p/%s:%s, hash:%x/%x) handler=%p", - GetTID(), signame, pc, name, (void*)x64pc, elfname?elfname:"???", x64name?x64name:"???", rsp, - emu->init_stack, emu->init_stack+emu->size_stack, emu->stack2free, (void*)R_RBP, - addr, info->si_code, - prot, db, db?db->block:0, db?(db->block+db->size):0, - db?db->x64_addr:0, db?(db->x64_addr+db->x64_size):0, - getAddrFunctionName((uintptr_t)(db?db->x64_addr:0)), - (db?getNeedTest((uintptr_t)db->x64_addr):0)?"need_stest":"clean", db?db->hash:0, hash, + printf_log(log_minimum, "%04d|%s @%p (%s) (x64pc=%p/%s:\"%s\", rsp=%p, stack=%p:%p own=%p fp=%p), for accessing %p (code=%d/prot=%x), db=%p(%p:%p/%p:%p/%s:%s, hash:%x/%x) handler=%p", + GetTID(), signame, pc, name, (void*)x64pc, elfname?elfname:"???", x64name?x64name:"???", rsp, + emu->init_stack, emu->init_stack+emu->size_stack, emu->stack2free, (void*)R_RBP, + addr, info->si_code, + prot, db, db?db->block:0, db?(db->block+db->size):0, + db?db->x64_addr:0, db?(db->x64_addr+db->x64_size):0, + getAddrFunctionName((uintptr_t)(db?db->x64_addr:0)), + (db?getNeedTest((uintptr_t)db->x64_addr):0)?"need_stest":"clean", db?db->hash:0, hash, (void*)my_context->signals[sig]); #if defined(ARM64) if(db) { @@ -1797,7 +1800,7 @@ EXPORT sighandler_t my_signal(x64emu_t* emu, int signum, sighandler_t handler) newact.sa_sigaction = my_sigactionhandler; sigaction(signum, &newact, &oldact); return oldact.sa_handler; - } else + } else return signal(signum, handler); } EXPORT sighandler_t my___sysv_signal(x64emu_t* emu, int signum, sighandler_t handler) __attribute__((alias("my_signal"))); @@ -1810,7 +1813,7 @@ int EXPORT my_sigaction(x64emu_t* emu, int signum, const x64_sigaction_t *act, x errno = EINVAL; return -1; } - + if(signum==SIGSEGV && emu->context->no_sigsegv) return 0; @@ -1867,7 +1870,7 @@ int EXPORT my_syscall_rt_sigaction(x64emu_t* emu, int signum, const x64_sigactio errno = EINVAL; return -1; } - + if(signum==SIGSEGV && emu->context->no_sigsegv) return 0; // TODO, how to handle sigsetsize>4?! @@ -2099,7 +2102,7 @@ EXPORT int my_makecontext(x64emu_t* emu, void* ucp, void* fnc, int32_t argc, int --rsp; *rsp = my_context->exit_bridge; u->uc_mcontext.gregs[X64_RSP] = (uintptr_t)rsp; - + return 0; } @@ -2118,7 +2121,7 @@ static void atfork_child_dynarec_prot(void) #ifdef USE_CUSTOM_MUTEX native_lock_store(&mutex_dynarec_prot, 0); #else - pthread_mutex_t tmp = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; + pthread_mutex_t tmp = PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP; memcpy(&mutex_dynarec_prot, &tmp, sizeof(mutex_dynarec_prot)); #endif } |