about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-02-04 11:34:29 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-02-04 11:34:29 +0100
commit0c41d9810fcc4f67902988b9f7d0a15d83d6568a (patch)
treea2273828df0c76ab55115d5cef51080dd6433031
parent165961f27e264164dd62eea0164d16d9d436a8a5 (diff)
downloadbox64-0c41d9810fcc4f67902988b9f7d0a15d83d6568a.tar.gz
box64-0c41d9810fcc4f67902988b9f7d0a15d83d6568a.zip
[ARM64_DYNAREC][32BITS] Small optim on jump table for 32bits access (1 less read, or 2 in SAVE_MEM configuration)
-rw-r--r--src/custommem.c9
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c12
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_64.c4
-rw-r--r--src/dynarec/arm64/dynarec_arm64_67.c2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c50
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h2
-rw-r--r--src/dynarec/dynarec_native_pass.c2
-rw-r--r--src/include/custommem.h1
9 files changed, 50 insertions, 34 deletions
diff --git a/src/custommem.c b/src/custommem.c
index faac9740..6138a63e 100644
--- a/src/custommem.c
+++ b/src/custommem.c
@@ -960,6 +960,15 @@ uintptr_t getJumpTable64()
     #endif
 }
 
+uintptr_t getJumpTable32()
+{
+    #ifdef JMPTABL_SHIFT4
+    return (uintptr_t)box64_jmptbl4[0][0];
+    #else
+    return (uintptr_t)box64_jmptbl3[0];
+    #endif
+}
+
 uintptr_t getJumpTableAddress64(uintptr_t addr)
 {
     uintptr_t idx3, idx2, idx1, idx0;
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index a269e5cf..bcfced0b 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -923,7 +923,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(dyn->insts[ninst].x64.jmp_insts==-1) {               \
                     if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT))  \
                         fpu_purgecache(dyn, ninst, 1, x1, x2, x3);      \
-                    jump_to_next(dyn, addr+i8, 0, ninst);               \
+                    jump_to_next(dyn, addr+i8, 0, ninst, rex.is32bits); \
                 } else {                                                \
                     CacheTransform(dyn, ninst, cacheupd, x1, x2, x3);   \
                     i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\
@@ -2756,7 +2756,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(dyn->insts[ninst].x64.jmp_insts==-1) {               \
                     if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT))  \
                         fpu_purgecache(dyn, ninst, 1, x1, x2, x3);      \
-                    jump_to_next(dyn, addr+i8, 0, ninst);               \
+                    jump_to_next(dyn, addr+i8, 0, ninst, rex.is32bits); \
                 } else {                                                \
                     CacheTransform(dyn, ninst, cacheupd, x1, x2, x3);   \
                     i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);    \
@@ -2917,7 +2917,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         *ok = 0;
                         *need_epilog = 0;
                     }
-                    jump_to_next(dyn, addr+i32, 0, ninst);
+                    jump_to_next(dyn, addr+i32, 0, ninst, rex.is32bits);
                     break;
             }
             break;
@@ -2940,7 +2940,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             if(dyn->insts[ninst].x64.jmp_insts==-1) {
                 // out of the block
                 fpu_purgecache(dyn, ninst, 1, x1, x2, x3);
-                jump_to_next(dyn, (uintptr_t)getAlternate((void*)j64), 0, ninst);
+                jump_to_next(dyn, (uintptr_t)getAlternate((void*)j64), 0, ninst, rex.is32bits);
             } else {
                 // inside the block
                 CacheTransform(dyn, ninst, CHECK_CACHE(), x1, x2, x3);
@@ -3327,14 +3327,14 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         STPx_S7_preindex(x4, xRIP, xSP, -16);
                     }
                     PUSH1z(xRIP);
-                    jump_to_next(dyn, 0, ed, ninst);
+                    jump_to_next(dyn, 0, ed, ninst, rex.is32bits);
                     break;
                 case 4: // JMP Ed
                     INST_NAME("JMP Ed");
                     READFLAGS(X_PEND);
                     BARRIER(BARRIER_FLOAT);
                     GETEDz(0);
-                    jump_to_next(dyn, 0, ed, ninst);
+                    jump_to_next(dyn, 0, ed, ninst, rex.is32bits);
                     *need_epilog = 0;
                     *ok = 0;
                     break;
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 04573bd4..e3ac683e 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -1523,7 +1523,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(dyn->insts[ninst].x64.jmp_insts==-1) {               \

                     if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT))  \

                         fpu_purgecache(dyn, ninst, 1, x1, x2, x3);      \

-                    jump_to_next(dyn, addr+i32_, 0, ninst);             \

+                    jump_to_next(dyn, addr+i32_, 0, ninst, rex.is32bits); \

                 } else {                                                \

                     CacheTransform(dyn, ninst, cacheupd, x1, x2, x3);   \

                     i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);    \

diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c
index 33f5d315..8d4f3bfa 100644
--- a/src/dynarec/arm64/dynarec_arm64_64.c
+++ b/src/dynarec/arm64/dynarec_arm64_64.c
@@ -1155,14 +1155,14 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         STPx_S7_preindex(x4, xRIP, xSP, -16);
                     }
                     PUSH1z(xRIP);
-                    jump_to_next(dyn, 0, ed, ninst);
+                    jump_to_next(dyn, 0, ed, ninst, rex.is32bits);
                     break;
                 case 4: // JMP Ed
                     INST_NAME("JMP Ed");
                     READFLAGS(X_PEND);
                     BARRIER(BARRIER_FLOAT);
                     GETEDOz(x6, 0);
-                    jump_to_next(dyn, 0, ed, ninst);
+                    jump_to_next(dyn, 0, ed, ninst, rex.is32bits);
                     *need_epilog = 0;
                     *ok = 0;
                     break;
diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c
index cde17af2..c7d80a6f 100644
--- a/src/dynarec/arm64/dynarec_arm64_67.c
+++ b/src/dynarec/arm64/dynarec_arm64_67.c
@@ -1014,7 +1014,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(dyn->insts[ninst].x64.jmp_insts==-1) {               \

                     if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT))  \

                         fpu_purgecache(dyn, ninst, 1, x1, x2, x3);      \

-                    jump_to_next(dyn, addr+i8, 0, ninst);               \

+                    jump_to_next(dyn, addr+i8, 0, ninst, rex.is32bits); \

                 } else {                                                \

                     CacheTransform(dyn, ninst, cacheupd, x1, x2, x3);   \

                     i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\

diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index 54f29546..cf737369 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -564,7 +564,7 @@ void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
     BR(x2);
 }
 
-void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
+void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits)
 {
     MAYUSE(dyn); MAYUSE(ninst);
     MESSAGE(LOG_DUMP, "Jump to next\n");
@@ -575,15 +575,17 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
             MOVx_REG(xRIP, reg);
         }
         NOTEST(x2);
-        uintptr_t tbl = getJumpTable64();
+        uintptr_t tbl = is32bits?getJumpTable32():getJumpTable64();
         MAYUSE(tbl);
         TABLE64(x3, tbl);
-        #ifdef JMPTABL_SHIFT4
-        UBFXx(x2, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
-        LDRx_REG_LSL3(x3, x3, x2);
-        #endif
-        UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
-        LDRx_REG_LSL3(x3, x3, x2);
+        if(!is32bits) {
+            #ifdef JMPTABL_SHIFT4
+            UBFXx(x2, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
+            LDRx_REG_LSL3(x3, x3, x2);
+            #endif
+            UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
+            LDRx_REG_LSL3(x3, x3, x2);
+        }
         UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
         LDRx_REG_LSL3(x3, x3, x2);
         UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
@@ -624,15 +626,17 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex)
         // not the correct return address, regular jump, but purge the stack first, it's unsync now...
         SUBx_U12(xSP, xSavedSP, 16);
     }
-    uintptr_t tbl = getJumpTable64();
+    uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64();
     NOTEST(x2);
     MOV64x(x2, tbl);
-    #ifdef JMPTABL_SHIFT4
-    UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
-    LDRx_REG_LSL3(x2, x2, x3);
-    #endif
-    UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
-    LDRx_REG_LSL3(x2, x2, x3);
+    if(!rex.is32bits) {
+        #ifdef JMPTABL_SHIFT4
+        UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
+        LDRx_REG_LSL3(x2, x2, x3);
+        #endif
+        UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
+        LDRx_REG_LSL3(x2, x2, x3);
+    }
     UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
     LDRx_REG_LSL3(x2, x2, x3);
     UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
@@ -665,15 +669,17 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex, int n)
         // not the correct return address, regular jump
         SUBx_U12(xSP, xSavedSP, 16);
     }
-    uintptr_t tbl = getJumpTable64();
+    uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64();
     NOTEST(x2);
     MOV64x(x2, tbl);
-    #ifdef JMPTABL_SHIFT4
-    UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
-    LDRx_REG_LSL3(x2, x2, x3);
-    #endif
-    UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
-    LDRx_REG_LSL3(x2, x2, x3);
+    if(!rex.is32bits) {
+        #ifdef JMPTABL_SHIFT4
+        UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
+        LDRx_REG_LSL3(x2, x2, x3);
+        #endif
+        UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
+        LDRx_REG_LSL3(x2, x2, x3);
+    }
     UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
     LDRx_REG_LSL3(x2, x2, x3);
     UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index f66f4e08..99894327 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -1126,7 +1126,7 @@ uintptr_t geted16(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop,
 
 // generic x64 helper
 void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst);
-void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst);
+void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits);
 void ret_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex);
 void retn_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex, int n);
 void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits);
diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c
index c03b3155..acea77e0 100644
--- a/src/dynarec/dynarec_native_pass.c
+++ b/src/dynarec/dynarec_native_pass.c
@@ -315,7 +315,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
             ++ninst;
             NOTEST(x3);
             fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
-            jump_to_next(dyn, addr, 0, ninst);
+            jump_to_next(dyn, addr, 0, ninst, rex.is32bits);
             ok=0; need_epilog=0;
         }
     }
diff --git a/src/include/custommem.h b/src/include/custommem.h
index b80eba47..997afe85 100644
--- a/src/include/custommem.h
+++ b/src/include/custommem.h
@@ -37,6 +37,7 @@ void setJumpTableDefault64(void* addr);
 void setJumpTableDefaultRef64(void* addr, void* jmp);
 int isJumpTableDefault64(void* addr);
 uintptr_t getJumpTable64(void);
+uintptr_t getJumpTable32(void);
 uintptr_t getJumpTableAddress64(uintptr_t addr);
 uintptr_t getJumpAddress64(uintptr_t addr);