about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-06-08 14:16:45 +0200
committerptitSeb <sebastien.chev@gmail.com>2025-06-08 14:16:45 +0200
commit685afa230291d64f350afbfdfa8fc82536d99f82 (patch)
treed079b3e3118023ec4651e43280d3f64602e79d58 /src
parentd3f0d1c30f14fd789fc747e3704286259026fbc9 (diff)
downloadbox64-685afa230291d64f350afbfdfa8fc82536d99f82.tar.gz
box64-685afa230291d64f350afbfdfa8fc82536d99f82.zip
[DYNAREC] Modified JumpTable slightly so 32bits and 48bits address space jmp can be done with only 2 and 3 memory fetch (todo: RV64 and LA64 handling of 48bits)
Diffstat (limited to 'src')
-rw-r--r--src/custommem.c57
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c124
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.c18
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c64
-rw-r--r--src/include/custommem.h9
5 files changed, 180 insertions, 92 deletions
diff --git a/src/custommem.c b/src/custommem.c
index 6e99bcdd..96e1130e 100644
--- a/src/custommem.c
+++ b/src/custommem.c
@@ -31,12 +31,23 @@
 static mmaplist_t          *mmaplist = NULL;
 static rbtree_t            *rbt_dynmem = NULL;
 static uint64_t jmptbl_allocated = 0, jmptbl_allocated1 = 0, jmptbl_allocated2 = 0, jmptbl_allocated3 = 0;
+#if JMPTABL_SHIFTMAX != 16
+#error Incorect value for jumptable shift max that should be 16
+#endif
 #ifdef JMPTABL_SHIFT4
+#if JMPTABL_SHIFT3 != 16
+#error Incorect value for jumptable shift3 that should be 16
+#endif
 static uint64_t jmptbl_allocated4 = 0;
 static uintptr_t****       box64_jmptbl4[1<<JMPTABL_SHIFT4];
 static uintptr_t***        box64_jmptbldefault3[1<<JMPTABL_SHIFT3];
+static uintptr_t***        box64_jmptbl_48[1<<JMPTABL_SHIFT3];
 #else
+#if JMPTABL_SHIFT2 != 16
+#error Incorect value for jumptable shift2 that should be 16
+#endif
 static uintptr_t***        box64_jmptbl3[1<<JMPTABL_SHIFT3];
+static uintptr_t**         box64_jmptbl_48[1<<JMPTABL_SHIFT2];
 #endif
 static uintptr_t**         box64_jmptbldefault2[1<<JMPTABL_SHIFT2];
 static uintptr_t*          box64_jmptbldefault1[1<<JMPTABL_SHIFT1];
@@ -1281,7 +1292,7 @@ int cleanDBFromAddressRange(uintptr_t addr, size_t size, int destroy)
 }
 
 #ifdef JMPTABL_SHIFT4
-static uintptr_t *create_jmptbl(uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, uintptr_t idx3, uintptr_t idx4)
+static uintptr_t *create_jmptbl(int for32bits, uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, uintptr_t idx3, uintptr_t idx4)
 {
     if(box64_jmptbl4[idx4] == box64_jmptbldefault3) {
         uintptr_t**** tbl = (uintptr_t****)customMalloc((1<<JMPTABL_SHIFT3)*sizeof(uintptr_t***));
@@ -1309,6 +1320,7 @@ static uintptr_t *create_jmptbl(uintptr_t idx0, uintptr_t idx1, uintptr_t idx2,
         }
 #endif
     }
+    if(for32bits) return NULL;
     if(box64_jmptbl4[idx4][idx3][idx2] == box64_jmptbldefault1) {
         uintptr_t** tbl = (uintptr_t**)customMalloc((1<<JMPTABL_SHIFT1)*sizeof(uintptr_t*));
         for(int i=0; i<(1<<JMPTABL_SHIFT1); ++i)
@@ -1338,7 +1350,7 @@ static uintptr_t *create_jmptbl(uintptr_t idx0, uintptr_t idx1, uintptr_t idx2,
     return &box64_jmptbl4[idx4][idx3][idx2][idx1][idx0];
 }
 #else
-static uintptr_t *create_jmptbl(uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, uintptr_t idx3)
+static uintptr_t *create_jmptbl(int for32bits, uintptr_t idx0, uintptr_t idx1, uintptr_t idx2, uintptr_t idx3)
 {
     if(box64_jmptbl3[idx3] == box64_jmptbldefault2) {
         uintptr_t*** tbl = (uintptr_t***)customMalloc((1<<JMPTABL_SHIFT2)*sizeof(uintptr_t**));
@@ -1366,6 +1378,7 @@ static uintptr_t *create_jmptbl(uintptr_t idx0, uintptr_t idx1, uintptr_t idx2,
         }
 #endif
     }
+    if(for32bits) return NULL;
     if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) {
         uintptr_t* tbl = (uintptr_t*)customMalloc((1<<JMPTABL_SHIFT0)*sizeof(uintptr_t));
         for(int i=0; i<(1<<JMPTABL_SHIFT0); ++i)
@@ -1396,9 +1409,9 @@ int addJumpTableIfDefault64(void* addr, void* jmp)
     idx0 = (((uintptr_t)addr)                )&JMPTABLE_MASK0;
 
     #ifdef JMPTABL_SHIFT4
-    return (native_lock_storeifref(create_jmptbl(idx0, idx1, idx2, idx3, idx4), jmp, native_next)==jmp)?1:0;
+    return (native_lock_storeifref(create_jmptbl(0, idx0, idx1, idx2, idx3, idx4), jmp, native_next)==jmp)?1:0;
     #else
-    return (native_lock_storeifref(create_jmptbl(idx0, idx1, idx2, idx3), jmp, native_next)==jmp)?1:0;
+    return (native_lock_storeifref(create_jmptbl(0, idx0, idx1, idx2, idx3), jmp, native_next)==jmp)?1:0;
     #endif
 }
 void setJumpTableDefault64(void* addr)
@@ -1456,9 +1469,9 @@ int setJumpTableIfRef64(void* addr, void* jmp, void* ref)
     idx1 = (((uintptr_t)addr)>>JMPTABL_START1)&JMPTABLE_MASK1;
     idx0 = (((uintptr_t)addr)    )&JMPTABLE_MASK0;
     #ifdef JMPTABL_SHIFT4
-    return (native_lock_storeifref(create_jmptbl(idx0, idx1, idx2, idx3, idx4), jmp, ref)==jmp)?1:0;
+    return (native_lock_storeifref(create_jmptbl(0, idx0, idx1, idx2, idx3, idx4), jmp, ref)==jmp)?1:0;
     #else
-    return (native_lock_storeifref(create_jmptbl(idx0, idx1, idx2, idx3), jmp, ref)==jmp)?1:0;
+    return (native_lock_storeifref(create_jmptbl(0, idx0, idx1, idx2, idx3), jmp, ref)==jmp)?1:0;
     #endif
 }
 int isJumpTableDefault64(void* addr)
@@ -1491,13 +1504,19 @@ uintptr_t getJumpTable64()
     return (uintptr_t)box64_jmptbl3;
     #endif
 }
+uintptr_t getJumpTable48()
+{
+    return (uintptr_t)box64_jmptbl_48;
+}
 
 uintptr_t getJumpTable32()
 {
     #ifdef JMPTABL_SHIFT4
+    create_jmptbl(1, 0, 0, 0, 0, 0);
     return (uintptr_t)box64_jmptbl4[0][0];
     #else
-    return (uintptr_t)box64_jmptbl3[0];
+    create_jmptbl(1, 0, 0, 0, 0);
+    return (uintptr_t)box64_jmptbl3[0][0];
     #endif
 }
 
@@ -1512,9 +1531,9 @@ uintptr_t getJumpTableAddress64(uintptr_t addr)
     idx1 = ((addr)>>JMPTABL_START1)&JMPTABLE_MASK1;
     idx0 = ((addr)                )&JMPTABLE_MASK0;
     #ifdef JMPTABL_SHIFT4
-    return (uintptr_t)create_jmptbl(idx0, idx1, idx2, idx3, idx4);
+    return (uintptr_t)create_jmptbl(0, idx0, idx1, idx2, idx3, idx4);
     #else
-    return (uintptr_t)create_jmptbl(idx0, idx1, idx2, idx3);
+    return (uintptr_t)create_jmptbl(0, idx0, idx1, idx2, idx3);
     #endif
 }
 
@@ -2242,11 +2261,17 @@ void init_custommem_helper(box64context_t* ctx)
         #ifdef JMPTABL_SHIFT4
         for(int i=0; i<(1<<JMPTABL_SHIFT4); ++i)
             box64_jmptbl4[i] = box64_jmptbldefault3;
-        for(int i=0; i<(1<<JMPTABL_SHIFT3); ++i)
+        for(int i=0; i<(1<<JMPTABL_SHIFT3); ++i) {
             box64_jmptbldefault3[i] = box64_jmptbldefault2;
+            box64_jmptbl_48[i] = box64_jmptbldefault2;
+        }
+        box64_jmptbl4[0] = box64_jmptbl_48;
         #else
-        for(int i=0; i<(1<<JMPTABL_SHIFT3); ++i)
-            box64_jmptbl3[i] = box64_jmptbldefault2;
+        for(int i=0; i<(1<<JMPTABL_SHIFT3); ++i) {
+                box64_jmptbl3[i] = box64_jmptbldefault2;
+                box64_jmptbl_48[i] = box64_jmptbldefault1;
+            }
+        box64_jmptbl3[0] = box64_jmptbl_48;
         #endif
         for(int i=0; i<(1<<JMPTABL_SHIFT2); ++i)
             box64_jmptbldefault2[i] = box64_jmptbldefault1;
@@ -2336,10 +2361,14 @@ void fini_custommem_helper(box64context_t *ctx)
                             }
                         customFree(box64_jmptbl3[i3][i2]);
                     }
-                customFree(box64_jmptbl3[i3]);
+                #ifndef JMPTABL_SHIFT4
+                if(i3)
+                #endif
+                    customFree(box64_jmptbl3[i3]);
             }
         #ifdef JMPTABL_SHIFT4
-                customFree(box64_jmptbl4[i4]);
+                if(i4)
+                    customFree(box64_jmptbl4[i4]);
             }
         #endif
     }
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index 7f78e4de..088129e7 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -579,23 +579,39 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst, int is32
             MOVx_REG(xRIP, reg);
         }
         NOTEST(x2);
-        uintptr_t tbl = is32bits?getJumpTable32():getJumpTable64();
-        MAYUSE(tbl);
-        MOV64x(x3, tbl);
         if(!is32bits) {
+            // check higher 48bits
+            LSRx_IMM(x2, xRIP, 48);
+            CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+            // load table
+            uintptr_t tbl = getJumpTable48();   // this is a static value, so will be a low address
+            MOV64x(x3, tbl);
             #ifdef JMPTABL_SHIFT4
-            UBFXx(x2, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
+            UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
             LDRx_REG_LSL3(x3, x3, x2);
             #endif
-            UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
+            UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
+            LDRx_REG_LSL3(x3, x3, x2);
+            UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+            LDRx_REG_LSL3(x3, x3, x2);
+            UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+            LDRx_REG_LSL3(x2, x3, x2);
+        } else {
+            // check higher 32bits disabled
+            //LSRx_IMM(x2, xRIP, 32);
+            //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+            // load table
+            uintptr_t tbl = getJumpTable32();   // this will not be a low address
+            TABLE64(x3, tbl);
+            #ifdef JMPTABL_SHIFT4
+            UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
             LDRx_REG_LSL3(x3, x3, x2);
+            #endif
+            UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+            LDRx_REG_LSL3(x3, x3, x2);
+            UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+            LDRx_REG_LSL3(x2, x3, x2);
         }
-        UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
-        LDRx_REG_LSL3(x3, x3, x2);
-        UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
-        LDRx_REG_LSL3(x3, x3, x2);
-        UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
-        LDRx_REG_LSL3(x2, x3, x2);
     } else {
         NOTEST(x2);
         uintptr_t p = getJumpTableAddress64(ip);
@@ -636,23 +652,40 @@ void ret_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int ninst, rex_t rex)
         // not the correct return address, regular jump, but purge the stack first, it's unsync now...
         SUBx_U12(xSP, xSavedSP, 16);
     }
-    uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64();
     NOTEST(x2);
-    MOV64x(x2, tbl);
     if(!rex.is32bits) {
+        // check higher 48bits
+        LSRx_IMM(x2, xRIP, 48);
+        CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+        // load table
+        uintptr_t tbl = getJumpTable48();
+        MOV64x(x3, tbl);
         #ifdef JMPTABL_SHIFT4
-        UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
-        LDRx_REG_LSL3(x2, x2, x3);
+        UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
+        LDRx_REG_LSL3(x3, x3, x2);
         #endif
-        UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
-        LDRx_REG_LSL3(x2, x2, x3);
-    }
-    UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
-    LDRx_REG_LSL3(x2, x2, x3);
-    UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
-    LDRx_REG_LSL3(x2, x2, x3);
-    UBFXx(x3, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
-    LDRx_REG_LSL3(x2, x2, x3);
+        UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+        LDRx_REG_LSL3(x2, x3, x2);
+    } else {
+        // check higher 32bits disabled
+        //LSRx_IMM(x2, xRIP, 32);
+        //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+        // load table
+        uintptr_t tbl = getJumpTable32();
+        TABLE64(x3, tbl);
+        #ifdef JMPTABL_SHIFT4
+        UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
+        LDRx_REG_LSL3(x3, x3, x2);
+        #endif
+        UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+        LDRx_REG_LSL3(x2, x3, x2);
+    }
     #ifdef HAVE_TRACE
     BLR(x2);
     #else
@@ -683,23 +716,40 @@ void retn_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int ninst, rex_t rex, int
         // not the correct return address, regular jump
         SUBx_U12(xSP, xSavedSP, 16);
     }
-    uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64();
     NOTEST(x2);
-    MOV64x(x2, tbl);
     if(!rex.is32bits) {
+        // check higher 48bits
+        LSRx_IMM(x2, xRIP, 48);
+        CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+        // load table
+        uintptr_t tbl = getJumpTable48();
+        MOV64x(x3, tbl);
         #ifdef JMPTABL_SHIFT4
-        UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
-        LDRx_REG_LSL3(x2, x2, x3);
+        UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
+        LDRx_REG_LSL3(x3, x3, x2);
         #endif
-        UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
-        LDRx_REG_LSL3(x2, x2, x3);
-    }
-    UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
-    LDRx_REG_LSL3(x2, x2, x3);
-    UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
-    LDRx_REG_LSL3(x2, x2, x3);
-    UBFXx(x3, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
-    LDRx_REG_LSL3(x2, x2, x3);
+        UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+        LDRx_REG_LSL3(x2, x3, x2);
+    } else {
+        // check higher 32bits disbaled
+        //LSRx_IMM(x2, xRIP, 32);
+        //CBNZw(x2, (intptr_t)dyn->jmp_next - (intptr_t)dyn->block);
+        // load table
+        uintptr_t tbl = getJumpTable32();
+        TABLE64(x3, tbl);
+        #ifdef JMPTABL_SHIFT4
+        UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
+        LDRx_REG_LSL3(x3, x3, x2);
+        #endif
+        UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
+        LDRx_REG_LSL3(x3, x3, x2);
+        UBFXx(x2, xRIP, JMPTABL_START0, JMPTABL_SHIFT0);
+        LDRx_REG_LSL3(x2, x3, x2);
+    }
     #ifdef HAVE_TRACE
     BLR(x2);
     #else
diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c
index b6fc31c2..97ed2d97 100644
--- a/src/dynarec/la64/dynarec_la64_helper.c
+++ b/src/dynarec/la64/dynarec_la64_helper.c
@@ -547,10 +547,10 @@ void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is3
             BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
             ALSL_D(x3, x2, x3, 3);
             LD_D(x3, x3, 0);
+            BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+            ALSL_D(x3, x2, x3, 3);
+            LD_D(x3, x3, 0);
         }
-        BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-        ALSL_D(x3, x2, x3, 3);
-        LD_D(x3, x3, 0);
         BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
         ALSL_D(x3, x2, x3, 3);
         LD_D(x3, x3, 0);
@@ -601,10 +601,10 @@ void ret_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex)
         BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
         ALSL_D(x3, x2, x3, 3);
         LD_D(x3, x3, 0);
+        BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+        ALSL_D(x3, x2, x3, 3);
+        LD_D(x3, x3, 0);
     }
-    BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-    ALSL_D(x3, x2, x3, 3);
-    LD_D(x3, x3, 0);
     BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
     ALSL_D(x3, x2, x3, 3);
     LD_D(x3, x3, 0);
@@ -646,10 +646,10 @@ void retn_to_epilog(dynarec_la64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int
         BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
         ALSL_D(x3, x2, x3, 3);
         LD_D(x3, x3, 0);
+        BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+        ALSL_D(x3, x2, x3, 3);
+        LD_D(x3, x3, 0);
     }
-    BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-    ALSL_D(x3, x2, x3, 3);
-    LD_D(x3, x3, 0);
     BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
     ALSL_D(x3, x2, x3, 3);
     LD_D(x3, x3, 0);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index e9ba7119..1f4ef25b 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -605,10 +605,10 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3
                 TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
                 TH_ADDSL(x3, x3, x2, 3);
                 LD(x3, x3, 0);
+                TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+                TH_ADDSL(x3, x3, x2, 3);
+                LD(x3, x3, 0);
             }
-            TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-            TH_ADDSL(x3, x3, x2, 3);
-            LD(x3, x3, 0);
             TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
             TH_ADDSL(x3, x3, x2, 3);
             LD(x3, x3, 0);
@@ -625,14 +625,16 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3
                     ADD(x3, x3, x2);
                 }
                 LD(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety
-            }
-            MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
-            SRLI(x2, xRIP, JMPTABL_START2 - 3);
-            AND(x2, x2, x4);
-            ADD(x3, x3, x2);
-            LD(x3, x3, 0); // LR_D(x3, x3, 1, 1);
-            if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
-                MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+                MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
+                SRLI(x2, xRIP, JMPTABL_START2 - 3);
+                AND(x2, x2, x4);
+                ADD(x3, x3, x2);
+                LD(x3, x3, 0); // LR_D(x3, x3, 1, 1);
+                if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+                    MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+                }
+            } else {
+                MOV64x(x4, JMPTABLE_MASK1 << 3);
             }
             SRLI(x2, xRIP, JMPTABL_START1 - 3);
             AND(x2, x2, x4);
@@ -695,9 +697,9 @@ void ret_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex)
         if (!rex.is32bits) {
             TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
             TH_LRD(x3, x3, x2, 3);
+            TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+            TH_LRD(x3, x3, x2, 3);
         }
-        TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-        TH_LRD(x3, x3, x2, 3);
         TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
         TH_LRD(x3, x3, x2, 3);
         TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0);
@@ -707,13 +709,15 @@ void ret_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex)
             SRLI(x2, xRIP, JMPTABL_START3);
             ADDSL(x3, x3, x2, 3, x2);
             LD(x3, x3, 0);
-        }
-        MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
-        SRLI(x2, xRIP, JMPTABL_START2 - 3);
-        AND(x2, x2, x4);
-        ADD(x3, x3, x2);
-        LD(x3, x3, 0);
-        if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+            MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
+            SRLI(x2, xRIP, JMPTABL_START2 - 3);
+            AND(x2, x2, x4);
+            ADD(x3, x3, x2);
+            LD(x3, x3, 0);
+            if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+                MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+            }
+        } else {
             MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
         }
         SRLI(x2, xRIP, JMPTABL_START1 - 3);
@@ -772,9 +776,9 @@ void retn_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int
         if (!rex.is32bits) {
             TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
             TH_LRD(x3, x3, x2, 3);
+            TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+            TH_LRD(x3, x3, x2, 3);
         }
-        TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
-        TH_LRD(x3, x3, x2, 3);
         TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT2 - 1, JMPTABL_START1);
         TH_LRD(x3, x3, x2, 3);
         TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0);
@@ -784,13 +788,15 @@ void retn_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int
             SRLI(x2, xRIP, JMPTABL_START3);
             ADDSL(x3, x3, x2, 3, x2);
             LD(x3, x3, 0);
-        }
-        MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
-        SRLI(x2, xRIP, JMPTABL_START2 - 3);
-        AND(x2, x2, x4);
-        ADD(x3, x3, x2);
-        LD(x3, x3, 0);
-        if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+            MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
+            SRLI(x2, xRIP, JMPTABL_START2 - 3);
+            AND(x2, x2, x4);
+            ADD(x3, x3, x2);
+            LD(x3, x3, 0);
+            if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+                MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+            }
+        } else {
             MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
         }
         SRLI(x2, xRIP, JMPTABL_START1 - 3);
diff --git a/src/include/custommem.h b/src/include/custommem.h
index 4bfcc98a..d7b79296 100644
--- a/src/include/custommem.h
+++ b/src/include/custommem.h
@@ -47,13 +47,15 @@ void setJumpTableDefault64(void* addr);
 void setJumpTableDefaultRef64(void* addr, void* jmp);
 int isJumpTableDefault64(void* addr);
 uintptr_t getJumpTable64(void);
+uintptr_t getJumpTable48(void);
 uintptr_t getJumpTable32(void);
 uintptr_t getJumpTableAddress64(uintptr_t addr);
 uintptr_t getJumpAddress64(uintptr_t addr);
 
 #ifdef SAVE_MEM
+#define JMPTABL_SHIFTMAX   JMPTABL_SHIFT4
 #define JMPTABL_SHIFT4 16
-#define JMPTABL_SHIFT3 14
+#define JMPTABL_SHIFT3 16
 #define JMPTABL_SHIFT2 12
 #define JMPTABL_SHIFT1 12
 #define JMPTABL_SHIFT0 10
@@ -68,10 +70,11 @@ uintptr_t getJumpAddress64(uintptr_t addr);
 #define JMPTABLE_MASK1 ((1<<JMPTABL_SHIFT1)-1)
 #define JMPTABLE_MASK0 ((1<<JMPTABL_SHIFT0)-1)
 #else
+#define JMPTABL_SHIFTMAX   JMPTABL_SHIFT3
 #define JMPTABL_SHIFT3 16
-#define JMPTABL_SHIFT2 18
+#define JMPTABL_SHIFT2 16
 #define JMPTABL_SHIFT1 18
-#define JMPTABL_SHIFT0 12
+#define JMPTABL_SHIFT0 14
 #define JMPTABL_START3 (JMPTABL_START2+JMPTABL_SHIFT2)
 #define JMPTABL_START2 (JMPTABL_START1+JMPTABL_SHIFT1)
 #define JMPTABL_START1 (JMPTABL_START0+JMPTABL_SHIFT0)