diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-12-13 13:05:38 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-12-13 13:05:38 +0100 |
| commit | 88575ebc5d0df38c709f199e85030fa5435a1a4a (patch) | |
| tree | 5c4e32ac6eb266f3085bb3a9c2583ce9a5031a78 /src | |
| parent | d8a19e98327e723588bfac16d6d96e8730c30199 (diff) | |
| download | box64-88575ebc5d0df38c709f199e85030fa5435a1a4a.tar.gz box64-88575ebc5d0df38c709f199e85030fa5435a1a4a.zip | |
[DYNAREC] Made callret default, improved callret efficiency, and dynarec speed on larger blocks
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 6 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_64.c | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass0.h | 4 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 5 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native.c | 85 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_pass.c | 10 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_3.c | 1 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass0.h | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 5 | ||||
| -rw-r--r-- | src/main.c | 4 |
10 files changed, 91 insertions, 37 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 9967f784..697ff0d9 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -2678,7 +2678,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin switch(tmp) { case 3: SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state - //BARRIER_NEXT(BARRIER_FULL); if(dyn->last_ip && (addr-dyn->last_ip<0x1000)) { ADDx_U12(x2, xRIP, addr-dyn->last_ip); } else { @@ -2734,9 +2733,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state } // regular call - //BARRIER_NEXT(1); if(box64_dynarec_callret && box64_dynarec_bigblock>1) { BARRIER(BARRIER_FULL); + BARRIER_NEXT(BARRIER_FULL); } else { BARRIER(BARRIER_FLOAT); *need_epilog = 0; @@ -2752,7 +2751,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SET_HASCALLRET(); // Push actual return address if(addr < (dyn->start+dyn->isize)) { - BARRIER_NEXT(BARRIER_FULL); // there is a next... j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; ADR_S20(x4, j64); @@ -3149,6 +3147,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETEDz(0); if(box64_dynarec_callret && box64_dynarec_bigblock>1) { BARRIER(BARRIER_FULL); + BARRIER_NEXT(BARRIER_FULL); } else { BARRIER(BARRIER_FLOAT); *need_epilog = 0; @@ -3160,7 +3159,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // Push actual return address if(addr < (dyn->start+dyn->isize)) { // there is a next... - BARRIER_NEXT(BARRIER_FULL); j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; ADR_S20(x4, j64); MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c index bfb5702f..1c8343bf 100644 --- a/src/dynarec/arm64/dynarec_arm64_64.c +++ b/src/dynarec/arm64/dynarec_arm64_64.c @@ -1121,6 +1121,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETEDOz(x6, 0); if(box64_dynarec_callret && box64_dynarec_bigblock>1) { BARRIER(BARRIER_FULL); + BARRIER_NEXT(BARRIER_FULL); } else { BARRIER(BARRIER_FLOAT); *need_epilog = 0; @@ -1128,12 +1129,15 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } GETIP_(addr); if(box64_dynarec_callret) { + SET_HASCALLRET(); // Push actual return address if(addr < (dyn->start+dyn->isize)) { // there is a next... j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; ADR_S20(x4, j64); + MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); } else { + MESSAGE(LOG_NONE, "\tCALLRET set return to Jmptable(%p)\n", (void*)addr); j64 = getJumpTableAddress64(addr); TABLE64(x4, j64); LDRx_U12(x4, x4, 0); diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index 6e2dd09e..918bb3c1 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -16,9 +16,9 @@ dyn->f.pending=(B)&SF_SET_PENDING; \ dyn->f.dfnone=((B)&SF_SET)?1:0; #define EMIT(A) -#define JUMP(A, C) add_next(dyn, (uintptr_t)A); SMEND(); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C +#define JUMP(A, C) add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); SMEND(); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C #define BARRIER(A) if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x64.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1 -#define BARRIER_NEXT(A) dyn->insts[ninst+1].x64.barrier_next = A +#define BARRIER_NEXT(A) dyn->insts[ninst].x64.barrier_next = A #define SET_HASCALLRET() dyn->insts[ninst].x64.has_callret = 1 #define NEW_INST \ ++dyn->size; \ diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index acbbe2e8..0e5c0e9b 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -102,6 +102,9 @@ typedef struct dynarec_arm_s { uintptr_t* next; // variable array of "next" jump address int next_sz; int next_cap; + int* jmps; // variable array of jump instructions + int jmp_sz; + int jmp_cap; int* predecessor;// single array of all predecessor dynablock_t* dynablock; instsize_t* instsize; @@ -119,6 +122,8 @@ typedef struct dynarec_arm_s { void add_next(dynarec_arm_t *dyn, uintptr_t addr); uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr); +void add_jump(dynarec_arm_t *dyn, int ninst); +int get_first_jump(dynarec_arm_t *dyn, int next); int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n); int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n); diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index 98486d55..a68b5c4f 100644 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -94,6 +94,21 @@ uintptr_t get_closest_next(dynarec_native_t *dyn, uintptr_t addr) { } return best; } +void add_jump(dynarec_native_t *dyn, int ninst) { + // add slots + if(dyn->jmp_sz == dyn->jmp_cap) { + dyn->jmp_cap += 64; + dyn->jmps = (int*)dynaRealloc(dyn->jmps, dyn->jmp_cap*sizeof(int)); + } + dyn->jmps[dyn->jmp_sz++] = ninst; +} +int get_first_jump(dynarec_native_t *dyn, int next) { + for(int i=0; i<dyn->jmp_sz; ++i) + if(dyn->insts[dyn->jmps[i]].x64.jmp == next) + return i; + return -2; +} + #define PK(A) (*((uint8_t*)(addr+(A)))) int is_nops(dynarec_native_t *dyn, uintptr_t addr, int n) { @@ -392,6 +407,7 @@ void CancelBlock64(int need_lock) current_helper = NULL; if(helper) { dynaFree(helper->next); + dynaFree(helper->jmps); dynaFree(helper->insts); dynaFree(helper->predecessor); if(helper->table64 && (helper->table64!=(uint64_t*)helper->tablestart)) @@ -471,10 +487,6 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit helper.insts = (instruction_native_t*)dynaCalloc(helper.cap, sizeof(instruction_native_t)); // pass 0, addresses, x64 jump addresses, overall size of the block uintptr_t end = native_pass0(&helper, addr, alternate, is32bits); - // no need for next anymore - dynaFree(helper.next); - helper.next_sz = helper.next_cap = 0; - helper.next = NULL; // basic checks if(!helper.size) { dynarec_log(LOG_INFO, "Warning, null-sized dynarec block (%p)\n", (void*)addr); @@ -493,26 +505,57 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit // compute hash signature uint32_t hash = X31_hash_code((void*)addr, end-addr); // calculate barriers - for(int i=0; i<helper.size; ++i) - if(helper.insts[i].x64.jmp) { - uintptr_t j = helper.insts[i].x64.jmp; - if(j<start || j>=end || j==helper.insts[i].x64.addr) { - if(j==helper.insts[i].x64.addr) // if there is a loop on some opcode, make the block "always to tested" - helper.always_test = 1; - helper.insts[i].x64.jmp_insts = -1; - helper.insts[i].x64.need_after |= X_PEND; - } else { - // find jump address instruction - int k=-1; - for(int i2=0; i2<helper.size && k==-1; ++i2) { - if(helper.insts[i2].x64.addr==j) - k=i2; + for(int ii=0; ii<helper.jmp_sz; ++ii) { + int i = helper.jmps[ii]; + uintptr_t j = helper.insts[i].x64.jmp; + if(j<start || j>=end || j==helper.insts[i].x64.addr) { + if(j==helper.insts[i].x64.addr) // if there is a loop on some opcode, make the block "always to tested" + helper.always_test = 1; + helper.insts[i].x64.jmp_insts = -1; + helper.insts[i].x64.need_after |= X_PEND; + } else { + // find jump address instruction + int k=-1; + int search = ((j>=helper.insts[0].x64.addr) && j<helper.insts[0].x64.addr+helper.isize)?1:0; + int imin = 0; + int imax = helper.size; + int i2 = helper.size/2; + // dichotomy search + while(search) { + if(helper.insts[i2].x64.addr == j) { + k = i2; + search = 0; + } else if(helper.insts[i2].x64.addr>j) { + imax = i2; + i2 = (imax+imin)/2; + } else { + imin = i2; + i2 = (imax+imin)/2; + } + if(search && (imax-imin)<2) { + search = 0; + if(helper.insts[imin].x64.addr==j) + k = imin; + else if(helper.insts[imax].x64.addr==j) + k = imax; } - if(k!=-1 && !helper.insts[i].barrier_maybe) - helper.insts[k].x64.barrier |= BARRIER_FULL; - helper.insts[i].x64.jmp_insts = k; } + /*for(int i2=0; i2<helper.size && k==-1; ++i2) { + if(helper.insts[i2].x64.addr==j) + k=i2; + }*/ + if(k!=-1 && !helper.insts[i].barrier_maybe) + helper.insts[k].x64.barrier |= BARRIER_FULL; + helper.insts[i].x64.jmp_insts = k; } + } + // no need for next and jmps anymore + dynaFree(helper.next); + helper.next_sz = helper.next_cap = 0; + helper.next = NULL; + dynaFree(helper.jmps); + helper.jmp_sz = helper.jmp_cap = 0; + helper.jmps = NULL; // fill predecessors with the jump address fillPredecessors(&helper); diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index e67a9848..5686e322 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -77,9 +77,11 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int dyn->last_ip = 0; // reset IP if some jump are coming here fpu_propagate_stack(dyn, ninst); NEW_INST; + #if STEP == 0 if(ninst && dyn->insts[ninst-1].x64.barrier_next) { BARRIER(dyn->insts[ninst-1].x64.barrier_next); } + #endif if(!ninst) { GOTEST(x1, x2); } @@ -214,12 +216,8 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int dyn->insts[ninst].x64.has_next = 1; // this block actually continue } else { // need to find back that instruction to copy the caches, as previous version cannot be used anymore - reset_n = -2; - for(int ii=0; ii<ninst; ++ii) - if(dyn->insts[ii].x64.jmp == next) { - reset_n = ii; - ii=ninst; - } + // and pred table is not ready yet + reset_n = get_first_jump(dyn, next); } if(box64_dynarec_dump) dynarec_log(LOG_NONE, "Extend block %p, %s%p -> %p (ninst=%d, jump from %d)\n", dyn, dyn->insts[ninst].x64.has_callret?"(opt. call) ":"", (void*)addr, (void*)next, ninst, dyn->insts[ninst].x64.has_callret?ninst:reset_n); } else if(next && (next-addr)<box64_dynarec_forward && (getProtection(next)&PROT_READ)/*box64_dynarec_bigblock>=stopblock*/) { diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index 9cdedfe7..ec59707d 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -767,7 +767,6 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SETFLAGS(X_ALL, SF_SET); // Hack to set flags to "dont'care" state } // regular call - //BARRIER_NEXT(1); if(box64_dynarec_callret && box64_dynarec_bigblock>1) { BARRIER(BARRIER_FULL); } else { diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index 8854a7f7..89c4214e 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -16,9 +16,9 @@ dyn->f.pending=(B)&SF_SET_PENDING; \ dyn->f.dfnone=((B)&SF_SET)?1:0; #define EMIT(A) -#define JUMP(A, C) add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C +#define JUMP(A, C) add_jump(dyn, ninst); add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C #define BARRIER(A) if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x64.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1 -#define BARRIER_NEXT(A) dyn->insts[ninst+1].x64.barrier_next = A +#define BARRIER_NEXT(A) dyn->insts[ninst].x64.barrier_next = A #define SET_HASCALLRET() dyn->insts[ninst].x64.has_callret = 1 #define NEW_INST \ ++dyn->size; \ diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index b7058c9a..907de0a3 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -112,6 +112,9 @@ typedef struct dynarec_rv64_s { uintptr_t* next; // variable array of "next" jump address int next_sz; int next_cap; + int* jmps; // variable array of jump instructions + int jmp_sz; + int jmp_cap; int* predecessor;// single array of all predecessor dynablock_t* dynablock; instsize_t* instsize; @@ -132,6 +135,8 @@ typedef struct dynarec_rv64_s { void add_next(dynarec_rv64_t *dyn, uintptr_t addr); uintptr_t get_closest_next(dynarec_rv64_t *dyn, uintptr_t addr); +void add_jump(dynarec_rv64_t *dyn, int ninst); +int get_first_jump(dynarec_rv64_t *dyn, int next); int is_nops(dynarec_rv64_t *dyn, uintptr_t addr, int n); int is_instructions(dynarec_rv64_t *dyn, uintptr_t addr, int n); diff --git a/src/main.c b/src/main.c index 383df52e..26185aeb 100644 --- a/src/main.c +++ b/src/main.c @@ -60,7 +60,7 @@ int box64_dynarec_x87double = 0; int box64_dynarec_fastnan = 1; int box64_dynarec_fastround = 1; int box64_dynarec_safeflags = 1; -int box64_dynarec_callret = 0; +int box64_dynarec_callret = 1; int box64_dynarec_hotpage = 0; int box64_dynarec_fastpage = 0; int box64_dynarec_bleeding_edge = 1; @@ -656,6 +656,8 @@ void LoadLogEnv() } if(box64_dynarec_callret) printf_log(LOG_INFO, "Dynarec will optimize CALL/RET\n"); + else + printf_log(LOG_INFO, "Dynarec will not optimize CALL/RET\n"); } p = getenv("BOX64_DYNAREC_BLEEDING_EDGE"); if(p) { |