diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-06-08 14:16:45 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-06-08 14:16:45 +0200 |
| commit | 685afa230291d64f350afbfdfa8fc82536d99f82 (patch) | |
| tree | d079b3e3118023ec4651e43280d3f64602e79d58 /src | |
| parent | d3f0d1c30f14fd789fc747e3704286259026fbc9 (diff) | |
| download | box64-685afa230291d64f350afbfdfa8fc82536d99f82.tar.gz box64-685afa230291d64f350afbfdfa8fc82536d99f82.zip | |
[DYNAREC] Modified JumpTable slightly so 32bits and 48bits address space jmp can be done with only 2 and 3 memory fetch (todo: RV64 and LA64 handling of 48bits)
Diffstat (limited to 'src')
| -rw-r--r-- | src/custommem.c | 57 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 124 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 18 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 64 | ||||
| -rw-r--r-- | src/include/custommem.h | 9 |
5 files changed, 180 insertions, 92 deletions
diff --git a/src/custommem.c b/src/custommem.c index 6e99bcdd..96e1130e 100644 --- a/src/custommem.c +++ b/src/custommem.c @@ -31,12 +31,23 @@ static mmaplist_t *mmaplist = NULL; static rbtree_t *rbt_dynmem = NULL; static uint64_t jmptbl_allocated = 0, jmptbl_allocated1 = 0, jmptbl_allocated2 = 0, jmptbl_allocated3 = 0; +#if JMPTABL_SHIFTMAX != 16 +#error Incorect value for jumptable shift max that should be 16 +#endif #ifdef JMPTABL_SHIFT4 +#if JMPTABL_SHIFT3 != 16 +#error Incorect value for jumptable shift3 that should be 16 +#endif static uint64_t jmptbl_allocated4 = 0; static uintptr_t**** box64_jmptbl4[1<<JMPTABL_SHIFT4]; static uintptr_t*** box64_jmptbldefault3[1<<JMPTABL_SHIFT3]; +static uintptr_t*** box64_jmptbl_48[1<<JMPTABL_SHIFT3]; #else +#if JMPTABL_SHIFT2 != 16 +#error Incorect value for jumptable shift2 that should be 16 +#endif static uintptr_t*** box64_jmptbl3[1<<JMPTABL_SHIFT3]; +static uintptr_t** box64_jmptbl_48[1<<JMPTABL_SHIFT2]; #endif static uintptr_t** box64_jmptbldefault2[1<<JMPTABL_SHIFT2]; static uintptr_t* box64_jmptbldefault1[1<<JMPTABL_SHIFT1]; @@ -1281,7 +1292,7 @@ int cleanDBFromAddressRange(uintptr_t addr, size_t size, int destroy) } #ifdef JMPTABL_SHIFT4 -static uintptr_t *create_jmptbl(uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, uintptr_t idx3, uintptr_t idx4) +static uintptr_t *create_jmptbl(int for32bits, uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, uintptr_t idx3, uintptr_t idx4) { if(box64_jmptbl4[idx4] == box64_jmptbldefault3) { uintptr_t**** tbl = (uintptr_t****)customMalloc((1<<JMPTABL_SHIFT3)*sizeof(uintptr_t***)); @@ -1309,6 +1320,7 @@ static uintptr_t *create_jmptbl(uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, } #endif } + if(for32bits) return NULL; if(box64_jmptbl4[idx4][idx3][idx2] == box64_jmptbldefault1) { uintptr_t** tbl = (uintptr_t**)customMalloc((1<<JMPTABL_SHIFT1)*sizeof(uintptr_t*)); for(int i=0; i<(1<<JMPTABL_SHIFT1); ++i) @@ -1338,7 +1350,7 @@ static uintptr_t *create_jmptbl(uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, return &box64_jmptbl4[idx4][idx3][idx2][idx1][idx0]; } #else -static uintptr_t *create_jmptbl(uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, uintptr_t idx3) +static uintptr_t *create_jmptbl(int for32bits, uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, uintptr_t idx3) { if(box64_jmptbl3[idx3] == box64_jmptbldefault2) { uintptr_t*** tbl = (uintptr_t***)customMalloc((1<<JMPTABL_SHIFT2)*sizeof(uintptr_t**)); @@ -1366,6 +1378,7 @@ static uintptr_t *create_jmptbl(uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, } #endif } + if(for32bits) return NULL; if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) { uintptr_t* tbl = (uintptr_t*)customMalloc((1<<JMPTABL_SHIFT0)*sizeof(uintptr_t)); for(int i=0; i<(1<<JMPTABL_SHIFT0); ++i) @@ -1396,9 +1409,9 @@ int addJumpTableIfDefault64(void* addr, void* jmp) idx0 = (((uintptr_t)addr) )&JMPTABLE_MASK0; #ifdef JMPTABL_SHIFT4 - return (native_lock_storeifref(create_jmptbl(idx0, idx1, idx2, idx3, idx4), jmp, native_next)==jmp)?1:0; + return (native_lock_storeifref(create_jmptbl(0, idx0, idx1, idx2, idx3, idx4), jmp, native_next)==jmp)?1:0; #else - return (native_lock_storeifref(create_jmptbl(idx0, idx1, idx2, idx3), jmp, native_next)==jmp)?1:0; + return (native_lock_storeifref(create_jmptbl(0, idx0, idx1, idx2, idx3), jmp, native_next)==jmp)?1:0; #endif } void setJumpTableDefault64(void* addr) @@ -1456,9 +1469,9 @@ int setJumpTableIfRef64(void* addr, void* jmp, void* ref) idx1 = (((uintptr_t)addr)>>JMPTABL_START1)&JMPTABLE_MASK1; idx0 = (((uintptr_t)addr) )&JMPTABLE_MASK0; #ifdef JMPTABL_SHIFT4 - return (native_lock_storeifref(create_jmptbl(idx0, idx1, idx2, idx3, idx4), jmp, ref)==jmp)?1:0; + return (native_lock_storeifref(create_jmptbl(0, idx0, idx1, idx2, idx3, idx4), jmp, ref)==jmp)?1:0; #else - return (native_lock_storeifref(create_jmptbl(idx0, idx1, idx2, idx3), jmp, ref)==jmp)?1:0; + return (native_lock_storeifref(create_jmptbl(0, idx0, idx1, idx2, idx3), jmp, ref)==jmp)?1:0; #endif } int isJumpTableDefault64(void* addr) @@ -1491,13 +1504,19 @@ uintptr_t getJumpTable64() return (uintptr_t)box64_jmptbl3; #endif } +uintptr_t getJumpTable48() +{ + return (uintptr_t)box64_jmptbl_48; +} uintptr_t getJumpTable32() { #ifdef JMPTABL_SHIFT4 + create_jmptbl(1, 0, 0, 0, 0, 0); return (uintptr_t)box64_jmptbl4[0][0]; #else - return (uintptr_t)box64_jmptbl3[0]; + create_jmptbl(1, 0, 0, 0, 0); + return (uintptr_t)box64_jmptbl3[0][0]; #endif } @@ -1512,9 +1531,9 @@ uintptr_t getJumpTableAddress64(uintptr_t addr) idx1 = ((addr)>>JMPTABL_START1)&JMPTABLE_MASK1; idx0 = ((addr) )&JMPTABLE_MASK0; #ifdef JMPTABL_SHIFT4 - return (uintptr_t)create_jmptbl(idx0, idx1, idx2, idx3, idx4); + return (uintptr_t)create_jmptbl(0, idx0, idx1, idx2, idx3, idx4); #else - return (uintptr_t)create_jmptbl(idx0, idx1, idx2, idx3); + return (uintptr_t)create_jmptbl(0, idx0, idx1, idx2, idx3); #endif } @@ -2242,11 +2261,17 @@ void init_custommem_helper(box64context_t* ctx) #ifdef JMPTABL_SHIFT4 for(int i=0; i<(1<<JMPTABL_SHIFT4); ++i) box64_jmptbl4[i] = box64_jmptbldefault3; - for(int i=0; i<(1<<JMPTABL_SHIFT3); ++i) + for(int i=0; i<(1<<JMPTABL_SHIFT3); ++i) { box64_jmptbldefault3[i] = box64_jmptbldefault2; + box64_jmptbl_48[i] = box64_jmptbldefault2; + } + box64_jmptbl4[0] = box64_jmptbl_48; #else - for(int i=0; i<(1<<JMPTABL_SHIFT3); ++i) - box64_jmptbl3[i] = box64_jmptbldefault2; + for(int i=0; i<(1<<JMPTABL_SHIFT3); ++i) { + box64_jmptbl3[i] = box64_jmptbldefault2; + box64_jmptbl_48[i] = box64_jmptbldefault1; + } + box64_jmptbl3[0] = box64_jmptbl_48; #endif for(int i=0; i<(1<<JMPTABL_SHIFT2); ++i) box64_jmptbldefault2[i] = box64_jmptbldefault1; @@ -2336,10 +2361,14 @@ void fini_custommem_helper(box64context_t *ctx) } customFree(box64_jmptbl3[i3][i2]); } - customFree(box64_jmptbl3[i3]); + #ifndef JMPTABL_SHIFT4 + if(i3) + #endif + customFree(box64_jmptbl3[i3]); } #ifdef JMPTABL_SHIFT4 - customFree(box64_jmptbl4[i4]); + if(i4) + customFree(box64_jmptbl4[i4]); } #endif } diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 7f78e4de..088129e7 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -579,23 +579,39 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst, int is32 MOVx_REG(xRIP, reg); } NOTEST(x2); - uintptr_t tbl = is32bits?getJumpTable32():getJumpTable64(); - MAYUSE(tbl); - MOV64x(x3, tbl); if(!is32bits) { + // check higher 48bits + LSRx_IMM(x2, xRIP, 48); + CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable48(); // this is a static value, so will be a low address + MOV64x(x3, tbl); #ifdef JMPTABL_SHIFT4 - UBFXx(x2, xRIP, JMPTABL_START4, JMPTABL_SHIFT4); + UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); LDRx_REG_LSL3(x3, x3, x2); #endif - UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); + } else { + // check higher 32bits disabled + //LSRx_IMM(x2, xRIP, 32); + //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable32(); // this will not be a low address + TABLE64(x3, tbl); + #ifdef JMPTABL_SHIFT4 + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); LDRx_REG_LSL3(x3, x3, x2); + #endif + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); } - UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); - LDRx_REG_LSL3(x3, x3, x2); - UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); - LDRx_REG_LSL3(x3, x3, x2); - UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); - LDRx_REG_LSL3(x2, x3, x2); } else { NOTEST(x2); uintptr_t p = getJumpTableAddress64(ip); @@ -636,23 +652,40 @@ void ret_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int ninst, rex_t rex) // not the correct return address, regular jump, but purge the stack first, it's unsync now... SUBx_U12(xSP, xSavedSP, 16); } - uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64(); NOTEST(x2); - MOV64x(x2, tbl); if(!rex.is32bits) { + // check higher 48bits + LSRx_IMM(x2, xRIP, 48); + CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable48(); + MOV64x(x3, tbl); #ifdef JMPTABL_SHIFT4 - UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4); - LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); + LDRx_REG_LSL3(x3, x3, x2); #endif - UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); - LDRx_REG_LSL3(x2, x2, x3); - } - UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); - LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); + } else { + // check higher 32bits disabled + //LSRx_IMM(x2, xRIP, 32); + //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable32(); + TABLE64(x3, tbl); + #ifdef JMPTABL_SHIFT4 + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); + LDRx_REG_LSL3(x3, x3, x2); + #endif + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); + } #ifdef HAVE_TRACE BLR(x2); #else @@ -683,23 +716,40 @@ void retn_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int ninst, rex_t rex, int // not the correct return address, regular jump SUBx_U12(xSP, xSavedSP, 16); } - uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64(); NOTEST(x2); - MOV64x(x2, tbl); if(!rex.is32bits) { + // check higher 48bits + LSRx_IMM(x2, xRIP, 48); + CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable48(); + MOV64x(x3, tbl); #ifdef JMPTABL_SHIFT4 - UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4); - LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); + LDRx_REG_LSL3(x3, x3, x2); #endif - UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); - LDRx_REG_LSL3(x2, x2, x3); - } - UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); - LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); + } else { + // check higher 32bits disbaled + //LSRx_IMM(x2, xRIP, 32); + //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable32(); + TABLE64(x3, tbl); + #ifdef JMPTABL_SHIFT4 + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); + LDRx_REG_LSL3(x3, x3, x2); + #endif + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); + } #ifdef HAVE_TRACE BLR(x2); #else diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index b6fc31c2..97ed2d97 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -547,10 +547,10 @@ void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); + BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + ALSL_D(x3, x2, x3, 3); + LD_D(x3, x3, 0); } - BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - ALSL_D(x3, x2, x3, 3); - LD_D(x3, x3, 0); BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); @@ -601,10 +601,10 @@ void ret_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex) BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); + BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + ALSL_D(x3, x2, x3, 3); + LD_D(x3, x3, 0); } - BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - ALSL_D(x3, x2, x3, 3); - LD_D(x3, x3, 0); BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); @@ -646,10 +646,10 @@ void retn_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); + BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + ALSL_D(x3, x2, x3, 3); + LD_D(x3, x3, 0); } - BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - ALSL_D(x3, x2, x3, 3); - LD_D(x3, x3, 0); BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index e9ba7119..1f4ef25b 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -605,10 +605,10 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); TH_ADDSL(x3, x3, x2, 3); LD(x3, x3, 0); + TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + TH_ADDSL(x3, x3, x2, 3); + LD(x3, x3, 0); } - TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - TH_ADDSL(x3, x3, x2, 3); - LD(x3, x3, 0); TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); TH_ADDSL(x3, x3, x2, 3); LD(x3, x3, 0); @@ -625,14 +625,16 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 ADD(x3, x3, x2); } LD(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety - } - MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask - SRLI(x2, xRIP, JMPTABL_START2 - 3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); // LR_D(x3, x3, 1, 1); - if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { - MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask + SRLI(x2, xRIP, JMPTABL_START2 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); // LR_D(x3, x3, 1, 1); + if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + } + } else { + MOV64x(x4, JMPTABLE_MASK1 << 3); } SRLI(x2, xRIP, JMPTABL_START1 - 3); AND(x2, x2, x4); @@ -695,9 +697,9 @@ void ret_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex) if (!rex.is32bits) { TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); TH_LRD(x3, x3, x2, 3); + TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + TH_LRD(x3, x3, x2, 3); } - TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - TH_LRD(x3, x3, x2, 3); TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); TH_LRD(x3, x3, x2, 3); TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0); @@ -707,13 +709,15 @@ void ret_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex) SRLI(x2, xRIP, JMPTABL_START3); ADDSL(x3, x3, x2, 3, x2); LD(x3, x3, 0); - } - MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask - SRLI(x2, xRIP, JMPTABL_START2 - 3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); - if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask + SRLI(x2, xRIP, JMPTABL_START2 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); + if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + } + } else { MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask } SRLI(x2, xRIP, JMPTABL_START1 - 3); @@ -772,9 +776,9 @@ void retn_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int if (!rex.is32bits) { TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); TH_LRD(x3, x3, x2, 3); + TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + TH_LRD(x3, x3, x2, 3); } - TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - TH_LRD(x3, x3, x2, 3); TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT2 - 1, JMPTABL_START1); TH_LRD(x3, x3, x2, 3); TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0); @@ -784,13 +788,15 @@ void retn_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int SRLI(x2, xRIP, JMPTABL_START3); ADDSL(x3, x3, x2, 3, x2); LD(x3, x3, 0); - } - MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask - SRLI(x2, xRIP, JMPTABL_START2 - 3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); - if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask + SRLI(x2, xRIP, JMPTABL_START2 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); + if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + } + } else { MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask } SRLI(x2, xRIP, JMPTABL_START1 - 3); diff --git a/src/include/custommem.h b/src/include/custommem.h index 4bfcc98a..d7b79296 100644 --- a/src/include/custommem.h +++ b/src/include/custommem.h @@ -47,13 +47,15 @@ void setJumpTableDefault64(void* addr); void setJumpTableDefaultRef64(void* addr, void* jmp); int isJumpTableDefault64(void* addr); uintptr_t getJumpTable64(void); +uintptr_t getJumpTable48(void); uintptr_t getJumpTable32(void); uintptr_t getJumpTableAddress64(uintptr_t addr); uintptr_t getJumpAddress64(uintptr_t addr); #ifdef SAVE_MEM +#define JMPTABL_SHIFTMAX JMPTABL_SHIFT4 #define JMPTABL_SHIFT4 16 -#define JMPTABL_SHIFT3 14 +#define JMPTABL_SHIFT3 16 #define JMPTABL_SHIFT2 12 #define JMPTABL_SHIFT1 12 #define JMPTABL_SHIFT0 10 @@ -68,10 +70,11 @@ uintptr_t getJumpAddress64(uintptr_t addr); #define JMPTABLE_MASK1 ((1<<JMPTABL_SHIFT1)-1) #define JMPTABLE_MASK0 ((1<<JMPTABL_SHIFT0)-1) #else +#define JMPTABL_SHIFTMAX JMPTABL_SHIFT3 #define JMPTABL_SHIFT3 16 -#define JMPTABL_SHIFT2 18 +#define JMPTABL_SHIFT2 16 #define JMPTABL_SHIFT1 18 -#define JMPTABL_SHIFT0 12 +#define JMPTABL_SHIFT0 14 #define JMPTABL_START3 (JMPTABL_START2+JMPTABL_SHIFT2) #define JMPTABL_START2 (JMPTABL_START1+JMPTABL_SHIFT1) #define JMPTABL_START1 (JMPTABL_START0+JMPTABL_SHIFT0) |