about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-08-29 05:05:02 +0800
committerGitHub <noreply@github.com>2024-08-28 23:05:02 +0200
commitd9e5f8183f78c5f3035d3abf12e367cf05a1d4b5 (patch)
tree3f6b060675c9270a669296593f7bf4f2fad5d3fd /src
parentabbaf9b593725d5f00d0482d0fc28310046cfd27 (diff)
downloadbox64-d9e5f8183f78c5f3035d3abf12e367cf05a1d4b5.tar.gz
box64-d9e5f8183f78c5f3035d3abf12e367cf05a1d4b5.zip
[RV64_DYNAREC] Optimized jump_to_next using XTheadBb instructions (#1768)
* [RV64_DYNAREC] Optimized jump_to_next using XTheadBb instructions

* fixed some bad bad typos
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c74
-rw-r--r--src/dynarec/rv64/rv64_emitter.h4
2 files changed, 53 insertions, 25 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 9810cf64..f46a8690 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -498,34 +498,62 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3
         if(reg!=xRIP) {
             MV(xRIP, reg);
         }
+        NOTEST(x2);
         uintptr_t tbl = is32bits?getJumpTable32():getJumpTable64();
         MAYUSE(tbl);
         TABLE64(x3, tbl);
-        if(!is32bits) {
-            SRLI(x2, xRIP, JMPTABL_START3);
-            if(rv64_zba) SH3ADD(x3, x2, x3); else {SLLI(x2, x2, 3); ADD(x3, x3, x2);}
-            LD(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety
-        }
-        MOV64x(x4, JMPTABLE_MASK2<<3);    // x4 = mask
-        SRLI(x2, xRIP, JMPTABL_START2-3);
-        AND(x2, x2, x4);
-        ADD(x3, x3, x2);
-        LD(x3, x3, 0); //LR_D(x3, x3, 1, 1);
-        if(JMPTABLE_MASK2!=JMPTABLE_MASK1) {
-            MOV64x(x4, JMPTABLE_MASK1<<3);    // x4 = mask
-        }
-        SRLI(x2, xRIP, JMPTABL_START1-3);
-        AND(x2, x2, x4);
-        ADD(x3, x3, x2);
-        LD(x3, x3, 0); //LR_D(x3, x3, 1, 1);
-        if(JMPTABLE_MASK0<2048) {
-            ANDI(x2, xRIP, JMPTABLE_MASK0);
+        if (rv64_xtheadbb) {
+            if (!is32bits) {
+                TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
+                TH_ADDSL(x3, x3, x2, 3);
+                LD(x3, x3, 0);
+            }
+            TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+            TH_ADDSL(x3, x3, x2, 3);
+            LD(x3, x3, 0);
+            TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
+            TH_ADDSL(x3, x3, x2, 3);
+            LD(x3, x3, 0);
+            TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0);
+            TH_ADDSL(x3, x3, x2, 3);
+            LD(x2, x3, 0);
         } else {
-            MOV64x(x4, JMPTABLE_MASK0); // x4 = mask
-            AND(x2, xRIP, x4);
+            if (!is32bits) {
+                SRLI(x2, xRIP, JMPTABL_START3);
+                if (rv64_zba)
+                    SH3ADD(x3, x2, x3);
+                else {
+                    SLLI(x2, x2, 3);
+                    ADD(x3, x3, x2);
+                }
+                LD(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety
+            }
+            MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
+            SRLI(x2, xRIP, JMPTABL_START2 - 3);
+            AND(x2, x2, x4);
+            ADD(x3, x3, x2);
+            LD(x3, x3, 0); // LR_D(x3, x3, 1, 1);
+            if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+                MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+            }
+            SRLI(x2, xRIP, JMPTABL_START1 - 3);
+            AND(x2, x2, x4);
+            ADD(x3, x3, x2);
+            LD(x3, x3, 0); // LR_D(x3, x3, 1, 1);
+            if (JMPTABLE_MASK0 < 2048) {
+                ANDI(x2, xRIP, JMPTABLE_MASK0);
+            } else {
+                MOV64x(x4, JMPTABLE_MASK0); // x4 = mask
+                AND(x2, xRIP, x4);
+            }
+            if (rv64_zba)
+                SH3ADD(x3, x2, x3);
+            else {
+                SLLI(x2, x2, 3);
+                ADD(x3, x3, x2);
+            }
+            LD(x2, x3, 0); // LR_D(x2, x3, 1, 1);
         }
-        if(rv64_zba) SH3ADD(x3, x2, x3); else {SLLI(x2, x2, 3); ADD(x3, x3, x2);}
-        LD(x2, x3, 0); //LR_D(x2, x3, 1, 1);
     } else {
         uintptr_t p = getJumpTableAddress64(ip);
         MAYUSE(p);
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 3f76135a..c17bb165 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -1028,11 +1028,11 @@ f28–31  ft8–11  FP temporaries                  Caller
 
 // Extract and sign-extend bits.
 // reg[rd] := sign_extend(reg[rs1][imm1:imm2])
-#define TH_EXT(rd, rs1, imm1, imm2) EMIT(I_type((((imm1) & 0x1f) << 6) | ((imm2) & 0x1f), rs1, 0b010, rd, 0b0001011))
+#define TH_EXT(rd, rs1, imm1, imm2) EMIT(I_type((((imm1) & 0x3f) << 6) | ((imm2) & 0x3f), rs1, 0b010, rd, 0b0001011))
 
 // Extract and zero-extend bits.
 // reg[rd] := zero_extend(reg[rs1][imm1:imm2])
-#define TH_EXTU(rd, rs1, imm1, imm2) EMIT(I_type((((imm1) & 0x1f) << 6) | ((imm2) & 0x1f), rs1, 0b011, rd, 0b0001011))
+#define TH_EXTU(rd, rs1, imm1, imm2) EMIT(I_type((((imm1) & 0x3f) << 6) | ((imm2) & 0x3f), rs1, 0b011, rd, 0b0001011))
 
 // Find first '0'-bit
 // for i=xlen..0: