diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-06-08 14:16:45 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-06-08 14:16:45 +0200 |
| commit | 685afa230291d64f350afbfdfa8fc82536d99f82 (patch) | |
| tree | d079b3e3118023ec4651e43280d3f64602e79d58 /src/dynarec | |
| parent | d3f0d1c30f14fd789fc747e3704286259026fbc9 (diff) | |
| download | box64-685afa230291d64f350afbfdfa8fc82536d99f82.tar.gz box64-685afa230291d64f350afbfdfa8fc82536d99f82.zip | |
[DYNAREC] Modified JumpTable slightly so 32bits and 48bits address space jmp can be done with only 2 and 3 memory fetch (todo: RV64 and LA64 handling of 48bits)
Diffstat (limited to 'src/dynarec')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 124 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 18 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 64 |
3 files changed, 131 insertions, 75 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 7f78e4de..088129e7 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -579,23 +579,39 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst, int is32 MOVx_REG(xRIP, reg); } NOTEST(x2); - uintptr_t tbl = is32bits?getJumpTable32():getJumpTable64(); - MAYUSE(tbl); - MOV64x(x3, tbl); if(!is32bits) { + // check higher 48bits + LSRx_IMM(x2, xRIP, 48); + CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable48(); // this is a static value, so will be a low address + MOV64x(x3, tbl); #ifdef JMPTABL_SHIFT4 - UBFXx(x2, xRIP, JMPTABL_START4, JMPTABL_SHIFT4); + UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); LDRx_REG_LSL3(x3, x3, x2); #endif - UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); + } else { + // check higher 32bits disabled + //LSRx_IMM(x2, xRIP, 32); + //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable32(); // this will not be a low address + TABLE64(x3, tbl); + #ifdef JMPTABL_SHIFT4 + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); LDRx_REG_LSL3(x3, x3, x2); + #endif + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); } - UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); - LDRx_REG_LSL3(x3, x3, x2); - UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); - LDRx_REG_LSL3(x3, x3, x2); - UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); - LDRx_REG_LSL3(x2, x3, x2); } else { NOTEST(x2); uintptr_t p = getJumpTableAddress64(ip); @@ -636,23 +652,40 @@ void ret_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int ninst, rex_t rex) // not the correct return address, regular jump, but purge the stack first, it's unsync now... SUBx_U12(xSP, xSavedSP, 16); } - uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64(); NOTEST(x2); - MOV64x(x2, tbl); if(!rex.is32bits) { + // check higher 48bits + LSRx_IMM(x2, xRIP, 48); + CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable48(); + MOV64x(x3, tbl); #ifdef JMPTABL_SHIFT4 - UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4); - LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); + LDRx_REG_LSL3(x3, x3, x2); #endif - UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); - LDRx_REG_LSL3(x2, x2, x3); - } - UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); - LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); + } else { + // check higher 32bits disabled + //LSRx_IMM(x2, xRIP, 32); + //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable32(); + TABLE64(x3, tbl); + #ifdef JMPTABL_SHIFT4 + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); + LDRx_REG_LSL3(x3, x3, x2); + #endif + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); + } #ifdef HAVE_TRACE BLR(x2); #else @@ -683,23 +716,40 @@ void retn_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int ninst, rex_t rex, int // not the correct return address, regular jump SUBx_U12(xSP, xSavedSP, 16); } - uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64(); NOTEST(x2); - MOV64x(x2, tbl); if(!rex.is32bits) { + // check higher 48bits + LSRx_IMM(x2, xRIP, 48); + CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable48(); + MOV64x(x3, tbl); #ifdef JMPTABL_SHIFT4 - UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4); - LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); + LDRx_REG_LSL3(x3, x3, x2); #endif - UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3); - LDRx_REG_LSL3(x2, x2, x3); - } - UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); - LDRx_REG_LSL3(x2, x2, x3); + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); + } else { + // check higher 32bits disbaled + //LSRx_IMM(x2, xRIP, 32); + //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block); + // load table + uintptr_t tbl = getJumpTable32(); + TABLE64(x3, tbl); + #ifdef JMPTABL_SHIFT4 + UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2); + LDRx_REG_LSL3(x3, x3, x2); + #endif + UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0); + LDRx_REG_LSL3(x2, x3, x2); + } #ifdef HAVE_TRACE BLR(x2); #else diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index b6fc31c2..97ed2d97 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -547,10 +547,10 @@ void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); + BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + ALSL_D(x3, x2, x3, 3); + LD_D(x3, x3, 0); } - BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - ALSL_D(x3, x2, x3, 3); - LD_D(x3, x3, 0); BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); @@ -601,10 +601,10 @@ void ret_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex) BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); + BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + ALSL_D(x3, x2, x3, 3); + LD_D(x3, x3, 0); } - BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - ALSL_D(x3, x2, x3, 3); - LD_D(x3, x3, 0); BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); @@ -646,10 +646,10 @@ void retn_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); + BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + ALSL_D(x3, x2, x3, 3); + LD_D(x3, x3, 0); } - BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - ALSL_D(x3, x2, x3, 3); - LD_D(x3, x3, 0); BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index e9ba7119..1f4ef25b 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -605,10 +605,10 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); TH_ADDSL(x3, x3, x2, 3); LD(x3, x3, 0); + TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + TH_ADDSL(x3, x3, x2, 3); + LD(x3, x3, 0); } - TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - TH_ADDSL(x3, x3, x2, 3); - LD(x3, x3, 0); TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); TH_ADDSL(x3, x3, x2, 3); LD(x3, x3, 0); @@ -625,14 +625,16 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 ADD(x3, x3, x2); } LD(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety - } - MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask - SRLI(x2, xRIP, JMPTABL_START2 - 3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); // LR_D(x3, x3, 1, 1); - if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { - MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask + SRLI(x2, xRIP, JMPTABL_START2 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); // LR_D(x3, x3, 1, 1); + if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + } + } else { + MOV64x(x4, JMPTABLE_MASK1 << 3); } SRLI(x2, xRIP, JMPTABL_START1 - 3); AND(x2, x2, x4); @@ -695,9 +697,9 @@ void ret_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex) if (!rex.is32bits) { TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); TH_LRD(x3, x3, x2, 3); + TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + TH_LRD(x3, x3, x2, 3); } - TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - TH_LRD(x3, x3, x2, 3); TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); TH_LRD(x3, x3, x2, 3); TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0); @@ -707,13 +709,15 @@ void ret_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex) SRLI(x2, xRIP, JMPTABL_START3); ADDSL(x3, x3, x2, 3, x2); LD(x3, x3, 0); - } - MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask - SRLI(x2, xRIP, JMPTABL_START2 - 3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); - if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask + SRLI(x2, xRIP, JMPTABL_START2 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); + if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + } + } else { MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask } SRLI(x2, xRIP, JMPTABL_START1 - 3); @@ -772,9 +776,9 @@ void retn_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int if (!rex.is32bits) { TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); TH_LRD(x3, x3, x2, 3); + TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + TH_LRD(x3, x3, x2, 3); } - TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); - TH_LRD(x3, x3, x2, 3); TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT2 - 1, JMPTABL_START1); TH_LRD(x3, x3, x2, 3); TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0); @@ -784,13 +788,15 @@ void retn_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int SRLI(x2, xRIP, JMPTABL_START3); ADDSL(x3, x3, x2, 3, x2); LD(x3, x3, 0); - } - MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask - SRLI(x2, xRIP, JMPTABL_START2 - 3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); - if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask + SRLI(x2, xRIP, JMPTABL_START2 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); + if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + } + } else { MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask } SRLI(x2, xRIP, JMPTABL_START1 - 3); |