about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-06-08 14:16:45 +0200
committerptitSeb <sebastien.chev@gmail.com>2025-06-08 14:16:45 +0200
commit685afa230291d64f350afbfdfa8fc82536d99f82 (patch)
treed079b3e3118023ec4651e43280d3f64602e79d58 /src/dynarec
parentd3f0d1c30f14fd789fc747e3704286259026fbc9 (diff)
downloadbox64-685afa230291d64f350afbfdfa8fc82536d99f82.tar.gz
box64-685afa230291d64f350afbfdfa8fc82536d99f82.zip
[DYNAREC] Modified JumpTable slightly so 32bits and 48bits address space jmp can be done with only 2 and 3 memory fetch (todo: RV64 and LA64 handling of 48bits)
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c124
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.c18
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c64
3 files changed, 131 insertions, 75 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index 7f78e4de..088129e7 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -579,23 +579,39 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst, int is32
             MOVx_REG(xRIP, reg);
         }
         NOTEST(x2);
-        uintptr_t tbl = is32bits?getJumpTable32():getJumpTable64();
-        MAYUSE(tbl);
-        MOV64x(x3, tbl);
         if(!is32bits) {
+            // check higher 48bits
+            LSRx_IMM(x2, xRIP, 48);
+            CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+            // load table
+            uintptr_t tbl = getJumpTable48();   // this is a static value, so will be a low address
+            MOV64x(x3, tbl);
             #ifdef JMPTABL_SHIFT4
-            UBFXx(x2, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
+            UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
             LDRx_REG_LSL3(x3, x3, x2);
             #endif
-            UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
+            UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
+            LDRx_REG_LSL3(x3, x3, x2);
+            UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+            LDRx_REG_LSL3(x3, x3, x2);
+            UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+            LDRx_REG_LSL3(x2, x3, x2);
+        } else {
+            // check higher 32bits disabled
+            //LSRx_IMM(x2, xRIP, 32);
+            //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+            // load table
+            uintptr_t tbl = getJumpTable32();   // this will not be a low address
+            TABLE64(x3, tbl);
+            #ifdef JMPTABL_SHIFT4
+            UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
             LDRx_REG_LSL3(x3, x3, x2);
+            #endif
+            UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+            LDRx_REG_LSL3(x3, x3, x2);
+            UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+            LDRx_REG_LSL3(x2, x3, x2);
         }
-        UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
-        LDRx_REG_LSL3(x3, x3, x2);
-        UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
-        LDRx_REG_LSL3(x3, x3, x2);
-        UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
-        LDRx_REG_LSL3(x2, x3, x2);
     } else {
         NOTEST(x2);
         uintptr_t p = getJumpTableAddress64(ip);
@@ -636,23 +652,40 @@ void ret_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int ninst, rex_t rex)
         // not the correct return address, regular jump, but purge the stack first, it's unsync now...
         SUBx_U12(xSP, xSavedSP, 16);
     }
-    uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64();
     NOTEST(x2);
-    MOV64x(x2, tbl);
     if(!rex.is32bits) {
+        // check higher 48bits
+        LSRx_IMM(x2, xRIP, 48);
+        CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+        // load table
+        uintptr_t tbl = getJumpTable48();
+        MOV64x(x3, tbl);
         #ifdef JMPTABL_SHIFT4
-        UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
-        LDRx_REG_LSL3(x2, x2, x3);
+        UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
+        LDRx_REG_LSL3(x3, x3, x2);
         #endif
-        UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
-        LDRx_REG_LSL3(x2, x2, x3);
-    }
-    UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
-    LDRx_REG_LSL3(x2, x2, x3);
-    UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
-    LDRx_REG_LSL3(x2, x2, x3);
-    UBFXx(x3, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
-    LDRx_REG_LSL3(x2, x2, x3);
+        UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+        LDRx_REG_LSL3(x2, x3, x2);
+    } else {
+        // check higher 32bits disabled
+        //LSRx_IMM(x2, xRIP, 32);
+        //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+        // load table
+        uintptr_t tbl = getJumpTable32();
+        TABLE64(x3, tbl);
+        #ifdef JMPTABL_SHIFT4
+        UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
+        LDRx_REG_LSL3(x3, x3, x2);
+        #endif
+        UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+        LDRx_REG_LSL3(x2, x3, x2);
+    }
     #ifdef HAVE_TRACE
     BLR(x2);
     #else
@@ -683,23 +716,40 @@ void retn_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int ninst, rex_t rex, int
         // not the correct return address, regular jump
         SUBx_U12(xSP, xSavedSP, 16);
     }
-    uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64();
     NOTEST(x2);
-    MOV64x(x2, tbl);
     if(!rex.is32bits) {
+        // check higher 48bits
+        LSRx_IMM(x2, xRIP, 48);
+        CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+        // load table
+        uintptr_t tbl = getJumpTable48();
+        MOV64x(x3, tbl);
         #ifdef JMPTABL_SHIFT4
-        UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
-        LDRx_REG_LSL3(x2, x2, x3);
+        UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
+        LDRx_REG_LSL3(x3, x3, x2);
         #endif
-        UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
-        LDRx_REG_LSL3(x2, x2, x3);
-    }
-    UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
-    LDRx_REG_LSL3(x2, x2, x3);
-    UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
-    LDRx_REG_LSL3(x2, x2, x3);
-    UBFXx(x3, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
-    LDRx_REG_LSL3(x2, x2, x3);
+        UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+        LDRx_REG_LSL3(x2, x3, x2);
+    } else {
+        // check higher 32bits disbaled
+        //LSRx_IMM(x2, xRIP, 32);
+        //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+        // load table
+        uintptr_t tbl = getJumpTable32();
+        TABLE64(x3, tbl);
+        #ifdef JMPTABL_SHIFT4
+        UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
+        LDRx_REG_LSL3(x3, x3, x2);
+        #endif
+        UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+        LDRx_REG_LSL3(x2, x3, x2);
+    }
     #ifdef HAVE_TRACE
     BLR(x2);
     #else
diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c
index b6fc31c2..97ed2d97 100644
--- a/src/dynarec/la64/dynarec_la64_helper.c
+++ b/src/dynarec/la64/dynarec_la64_helper.c
@@ -547,10 +547,10 @@ void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is3
             BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
             ALSL_D(x3, x2, x3, 3);
             LD_D(x3, x3, 0);
+            BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+            ALSL_D(x3, x2, x3, 3);
+            LD_D(x3, x3, 0);
         }
-        BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-        ALSL_D(x3, x2, x3, 3);
-        LD_D(x3, x3, 0);
         BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
         ALSL_D(x3, x2, x3, 3);
         LD_D(x3, x3, 0);
@@ -601,10 +601,10 @@ void ret_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex)
         BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
         ALSL_D(x3, x2, x3, 3);
         LD_D(x3, x3, 0);
+        BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+        ALSL_D(x3, x2, x3, 3);
+        LD_D(x3, x3, 0);
     }
-    BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-    ALSL_D(x3, x2, x3, 3);
-    LD_D(x3, x3, 0);
     BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
     ALSL_D(x3, x2, x3, 3);
     LD_D(x3, x3, 0);
@@ -646,10 +646,10 @@ void retn_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int
         BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
         ALSL_D(x3, x2, x3, 3);
         LD_D(x3, x3, 0);
+        BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+        ALSL_D(x3, x2, x3, 3);
+        LD_D(x3, x3, 0);
     }
-    BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-    ALSL_D(x3, x2, x3, 3);
-    LD_D(x3, x3, 0);
     BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
     ALSL_D(x3, x2, x3, 3);
     LD_D(x3, x3, 0);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index e9ba7119..1f4ef25b 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -605,10 +605,10 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3
                 TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
                 TH_ADDSL(x3, x3, x2, 3);
                 LD(x3, x3, 0);
+                TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+                TH_ADDSL(x3, x3, x2, 3);
+                LD(x3, x3, 0);
             }
-            TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-            TH_ADDSL(x3, x3, x2, 3);
-            LD(x3, x3, 0);
             TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
             TH_ADDSL(x3, x3, x2, 3);
             LD(x3, x3, 0);
@@ -625,14 +625,16 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3
                     ADD(x3, x3, x2);
                 }
                 LD(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety
-            }
-            MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
-            SRLI(x2, xRIP, JMPTABL_START2 - 3);
-            AND(x2, x2, x4);
-            ADD(x3, x3, x2);
-            LD(x3, x3, 0); // LR_D(x3, x3, 1, 1);
-            if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
-                MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+                MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
+                SRLI(x2, xRIP, JMPTABL_START2 - 3);
+                AND(x2, x2, x4);
+                ADD(x3, x3, x2);
+                LD(x3, x3, 0); // LR_D(x3, x3, 1, 1);
+                if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+                    MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+                }
+            } else {
+                MOV64x(x4, JMPTABLE_MASK1 << 3);
             }
             SRLI(x2, xRIP, JMPTABL_START1 - 3);
             AND(x2, x2, x4);
@@ -695,9 +697,9 @@ void ret_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex)
         if (!rex.is32bits) {
             TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
             TH_LRD(x3, x3, x2, 3);
+            TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+            TH_LRD(x3, x3, x2, 3);
         }
-        TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-        TH_LRD(x3, x3, x2, 3);
         TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
         TH_LRD(x3, x3, x2, 3);
         TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0);
@@ -707,13 +709,15 @@ void ret_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex)
             SRLI(x2, xRIP, JMPTABL_START3);
             ADDSL(x3, x3, x2, 3, x2);
             LD(x3, x3, 0);
-        }
-        MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
-        SRLI(x2, xRIP, JMPTABL_START2 - 3);
-        AND(x2, x2, x4);
-        ADD(x3, x3, x2);
-        LD(x3, x3, 0);
-        if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+            MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
+            SRLI(x2, xRIP, JMPTABL_START2 - 3);
+            AND(x2, x2, x4);
+            ADD(x3, x3, x2);
+            LD(x3, x3, 0);
+            if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+                MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+            }
+        } else {
             MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
         }
         SRLI(x2, xRIP, JMPTABL_START1 - 3);
@@ -772,9 +776,9 @@ void retn_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int
         if (!rex.is32bits) {
             TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
             TH_LRD(x3, x3, x2, 3);
+            TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+            TH_LRD(x3, x3, x2, 3);
         }
-        TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-        TH_LRD(x3, x3, x2, 3);
         TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT2 - 1, JMPTABL_START1);
         TH_LRD(x3, x3, x2, 3);
         TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0);
@@ -784,13 +788,15 @@ void retn_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int
             SRLI(x2, xRIP, JMPTABL_START3);
             ADDSL(x3, x3, x2, 3, x2);
             LD(x3, x3, 0);
-        }
-        MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
-        SRLI(x2, xRIP, JMPTABL_START2 - 3);
-        AND(x2, x2, x4);
-        ADD(x3, x3, x2);
-        LD(x3, x3, 0);
-        if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+            MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
+            SRLI(x2, xRIP, JMPTABL_START2 - 3);
+            AND(x2, x2, x4);
+            ADD(x3, x3, x2);
+            LD(x3, x3, 0);
+            if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+                MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+            }
+        } else {
             MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
         }
         SRLI(x2, xRIP, JMPTABL_START1 - 3);