about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-05-26 17:18:21 +0800
committerGitHub <noreply@github.com>2025-05-26 11:18:21 +0200
commit4c234b041814be28ca26a2dd1719a48185dc95c9 (patch)
tree4a53d886ed09cb314d4396fbbcb4914af56a25a3 /src
parentabd1ebb425d23e693847b5796ab207453e181bd6 (diff)
downloadbox64-4c234b041814be28ca26a2dd1719a48185dc95c9.tar.gz
box64-4c234b041814be28ca26a2dd1719a48185dc95c9.zip
[RV64_DYNAREC] Improved ret_to_epilog with xtheadmemidx (#2670)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c138
-rw-r--r--src/dynarec/rv64/rv64_emitter.h6
2 files changed, 83 insertions, 61 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index a6a047ca..6299de8d 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -691,44 +691,53 @@ void ret_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex)
     }
 
     uintptr_t tbl = rex.is32bits ? getJumpTable32() : getJumpTable64();
+    NOTEST(x2);
     MOV64x(x3, tbl);
-    if (!rex.is32bits) {
-        SRLI(x2, xRIP, JMPTABL_START3);
+    if (rv64_xtheadbb && rv64_xtheadmemidx) {
+        if (!rex.is32bits) {
+            TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
+            TH_LRD(x3, x3, x2, 3);
+        }
+        TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+        TH_LRD(x3, x3, x2, 3);
+        TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1);
+        TH_LRD(x3, x3, x2, 3);
+        TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0);
+        TH_LRD(x2, x3, x2, 3);
+    } else {
+        if (!rex.is32bits) {
+            SRLI(x2, xRIP, JMPTABL_START3);
+            ADDSL(x3, x3, x2, 3, x2);
+            LD(x3, x3, 0);
+        }
+        MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
+        SRLI(x2, xRIP, JMPTABL_START2 - 3);
+        AND(x2, x2, x4);
+        ADD(x3, x3, x2);
+        LD(x3, x3, 0);
+        if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+            MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+        }
+        SRLI(x2, xRIP, JMPTABL_START1 - 3);
+        AND(x2, x2, x4);
+        ADD(x3, x3, x2);
+        LD(x3, x3, 0);
+        if (JMPTABLE_MASK0 < 2048) {
+            ANDI(x2, xRIP, JMPTABLE_MASK0);
+        } else {
+            if (JMPTABLE_MASK1 != JMPTABLE_MASK0) {
+                MOV64x(x4, JMPTABLE_MASK0); // x4 = mask
+            }
+            AND(x2, xRIP, x4);
+        }
         if (rv64_zba)
             SH3ADD(x3, x2, x3);
         else {
             SLLI(x2, x2, 3);
             ADD(x3, x3, x2);
         }
-        LD(x3, x3, 0);
-    }
-    MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
-    SRLI(x2, xRIP, JMPTABL_START2 - 3);
-    AND(x2, x2, x4);
-    ADD(x3, x3, x2);
-    LD(x3, x3, 0);
-    if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
-        MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
-    }
-    SRLI(x2, xRIP, JMPTABL_START1 - 3);
-    AND(x2, x2, x4);
-    ADD(x3, x3, x2);
-    LD(x3, x3, 0);
-    if (JMPTABLE_MASK0 < 2048) {
-        ANDI(x2, xRIP, JMPTABLE_MASK0);
-    } else {
-        if (JMPTABLE_MASK1 != JMPTABLE_MASK0) {
-            MOV64x(x4, JMPTABLE_MASK0); // x4 = mask
-        }
-        AND(x2, xRIP, x4);
-    }
-    if (rv64_zba)
-        SH3ADD(x3, x2, x3);
-    else {
-        SLLI(x2, x2, 3);
-        ADD(x3, x3, x2);
+        LD(x2, x3, 0);
     }
-    LD(x2, x3, 0);
     BR(x2);
     CLEARIP();
 }
@@ -759,44 +768,53 @@ void retn_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int
         ADDI(xSP, xSP, -16);
     }
     uintptr_t tbl = rex.is32bits ? getJumpTable32() : getJumpTable64();
+    NOTEST(x2);
     MOV64x(x3, tbl);
-    if (!rex.is32bits) {
-        SRLI(x2, xRIP, JMPTABL_START3);
+    if (rv64_xtheadbb && rv64_xtheadmemidx) {
+        if (!rex.is32bits) {
+            TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3);
+            TH_LRD(x3, x3, x2, 3);
+        }
+        TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2);
+        TH_LRD(x3, x3, x2, 3);
+        TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT2 - 1, JMPTABL_START1);
+        TH_LRD(x3, x3, x2, 3);
+        TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0);
+        TH_LRD(x2, x3, x2, 3);
+    } else {
+        if (!rex.is32bits) {
+            SRLI(x2, xRIP, JMPTABL_START3);
+            ADDSL(x3, x3, x2, 3, x2);
+            LD(x3, x3, 0);
+        }
+        MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
+        SRLI(x2, xRIP, JMPTABL_START2 - 3);
+        AND(x2, x2, x4);
+        ADD(x3, x3, x2);
+        LD(x3, x3, 0);
+        if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+            MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+        }
+        SRLI(x2, xRIP, JMPTABL_START1 - 3);
+        AND(x2, x2, x4);
+        ADD(x3, x3, x2);
+        LD(x3, x3, 0);
+        if (JMPTABLE_MASK0 < 2048) {
+            ANDI(x2, xRIP, JMPTABLE_MASK0);
+        } else {
+            if (JMPTABLE_MASK1 != JMPTABLE_MASK0) {
+                MOV64x(x4, JMPTABLE_MASK0); // x4 = mask
+            }
+            AND(x2, xRIP, x4);
+        }
         if (rv64_zba)
             SH3ADD(x3, x2, x3);
         else {
             SLLI(x2, x2, 3);
             ADD(x3, x3, x2);
         }
-        LD(x3, x3, 0);
-    }
-    MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
-    SRLI(x2, xRIP, JMPTABL_START2 - 3);
-    AND(x2, x2, x4);
-    ADD(x3, x3, x2);
-    LD(x3, x3, 0);
-    if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
-        MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
-    }
-    SRLI(x2, xRIP, JMPTABL_START1 - 3);
-    AND(x2, x2, x4);
-    ADD(x3, x3, x2);
-    LD(x3, x3, 0);
-    if (JMPTABLE_MASK0 < 2048) {
-        ANDI(x2, xRIP, JMPTABLE_MASK0);
-    } else {
-        if (JMPTABLE_MASK1 != JMPTABLE_MASK0) {
-            MOV64x(x4, JMPTABLE_MASK0); // x4 = mask
-        }
-        AND(x2, xRIP, x4);
-    }
-    if (rv64_zba)
-        SH3ADD(x3, x2, x3);
-    else {
-        SLLI(x2, x2, 3);
-        ADD(x3, x3, x2);
+        LD(x2, x3, 0);
     }
-    LD(x2, x3, 0);
     BR(x2);
     CLEARIP();
 }
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 27ae0427..995e4bff 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -1309,6 +1309,11 @@
 // mem[rs1+7:rs1] := rd
 #define TH_SDIB(rd, rs1, imm5, imm2) EMIT(I_type(0b011010000000 | (((imm2) & 0b11) << 5) | ((imm5) & 0x1f), rs1, 0b101, rd, 0b0001011))
 
+// Load indexed word.
+// addr := rs1 + (rs2 << imm2)
+// rd := sign_extend(mem[addr+7:addr])
+#define TH_LRD(rd, rs1, rs2, imm2) EMIT(R_type(0b0110000 | ((imm2) & 0b11), rs2, rs1, 0b100, rd, 0b0001011))
+
 // TODO
 // th.lbib rd, (rs1), imm5, imm2 Load indexed byte
 // th.lbuia rd, (rs1), imm5, imm2 Load indexed unsigned byte
@@ -1333,7 +1338,6 @@
 // th.lrhu rd, rs1, rs2, imm2 Load indexed unsigned half-word
 // th.lrw rd, rs1, rs2, imm2 Load indexed word
 // th.lrwu rd, rs1, rs2, imm2 Load indexed unsigned word
-// th.lrd rd, rs1, rs2, imm2 Load indexed double-word
 // th.srb rd, rs1, rs2, imm2 Store indexed byte
 // th.srh rd, rs1, rs2, imm2 Store indexed half-word
 // th.srw rd, rs1, rs2, imm2 Store indexed word