diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-05-26 17:18:21 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-26 11:18:21 +0200 |
| commit | 4c234b041814be28ca26a2dd1719a48185dc95c9 (patch) | |
| tree | 4a53d886ed09cb314d4396fbbcb4914af56a25a3 /src | |
| parent | abd1ebb425d23e693847b5796ab207453e181bd6 (diff) | |
| download | box64-4c234b041814be28ca26a2dd1719a48185dc95c9.tar.gz box64-4c234b041814be28ca26a2dd1719a48185dc95c9.zip | |
[RV64_DYNAREC] Improved ret_to_epilog with xtheadmemidx (#2670)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 138 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 6 |
2 files changed, 83 insertions, 61 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index a6a047ca..6299de8d 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -691,44 +691,53 @@ void ret_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex) } uintptr_t tbl = rex.is32bits ? getJumpTable32() : getJumpTable64(); + NOTEST(x2); MOV64x(x3, tbl); - if (!rex.is32bits) { - SRLI(x2, xRIP, JMPTABL_START3); + if (rv64_xtheadbb && rv64_xtheadmemidx) { + if (!rex.is32bits) { + TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); + TH_LRD(x3, x3, x2, 3); + } + TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + TH_LRD(x3, x3, x2, 3); + TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); + TH_LRD(x3, x3, x2, 3); + TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0); + TH_LRD(x2, x3, x2, 3); + } else { + if (!rex.is32bits) { + SRLI(x2, xRIP, JMPTABL_START3); + ADDSL(x3, x3, x2, 3, x2); + LD(x3, x3, 0); + } + MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask + SRLI(x2, xRIP, JMPTABL_START2 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); + if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + } + SRLI(x2, xRIP, JMPTABL_START1 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); + if (JMPTABLE_MASK0 < 2048) { + ANDI(x2, xRIP, JMPTABLE_MASK0); + } else { + if (JMPTABLE_MASK1 != JMPTABLE_MASK0) { + MOV64x(x4, JMPTABLE_MASK0); // x4 = mask + } + AND(x2, xRIP, x4); + } if (rv64_zba) SH3ADD(x3, x2, x3); else { SLLI(x2, x2, 3); ADD(x3, x3, x2); } - LD(x3, x3, 0); - } - MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask - SRLI(x2, xRIP, JMPTABL_START2 - 3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); - if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { - MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask - } - SRLI(x2, xRIP, JMPTABL_START1 - 3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); - if (JMPTABLE_MASK0 < 2048) { - ANDI(x2, xRIP, JMPTABLE_MASK0); - } else { - if (JMPTABLE_MASK1 != JMPTABLE_MASK0) { - MOV64x(x4, JMPTABLE_MASK0); // x4 = mask - } - AND(x2, xRIP, x4); - } - if (rv64_zba) - SH3ADD(x3, x2, x3); - else { - SLLI(x2, x2, 3); - ADD(x3, x3, x2); + LD(x2, x3, 0); } - LD(x2, x3, 0); BR(x2); CLEARIP(); } @@ -759,44 +768,53 @@ void retn_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int ninst, rex_t rex, int ADDI(xSP, xSP, -16); } uintptr_t tbl = rex.is32bits ? getJumpTable32() : getJumpTable64(); + NOTEST(x2); MOV64x(x3, tbl); - if (!rex.is32bits) { - SRLI(x2, xRIP, JMPTABL_START3); + if (rv64_xtheadbb && rv64_xtheadmemidx) { + if (!rex.is32bits) { + TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); + TH_LRD(x3, x3, x2, 3); + } + TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + TH_LRD(x3, x3, x2, 3); + TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT2 - 1, JMPTABL_START1); + TH_LRD(x3, x3, x2, 3); + TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0); + TH_LRD(x2, x3, x2, 3); + } else { + if (!rex.is32bits) { + SRLI(x2, xRIP, JMPTABL_START3); + ADDSL(x3, x3, x2, 3, x2); + LD(x3, x3, 0); + } + MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask + SRLI(x2, xRIP, JMPTABL_START2 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); + if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + } + SRLI(x2, xRIP, JMPTABL_START1 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); + if (JMPTABLE_MASK0 < 2048) { + ANDI(x2, xRIP, JMPTABLE_MASK0); + } else { + if (JMPTABLE_MASK1 != JMPTABLE_MASK0) { + MOV64x(x4, JMPTABLE_MASK0); // x4 = mask + } + AND(x2, xRIP, x4); + } if (rv64_zba) SH3ADD(x3, x2, x3); else { SLLI(x2, x2, 3); ADD(x3, x3, x2); } - LD(x3, x3, 0); - } - MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask - SRLI(x2, xRIP, JMPTABL_START2 - 3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); - if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { - MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask - } - SRLI(x2, xRIP, JMPTABL_START1 - 3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); - if (JMPTABLE_MASK0 < 2048) { - ANDI(x2, xRIP, JMPTABLE_MASK0); - } else { - if (JMPTABLE_MASK1 != JMPTABLE_MASK0) { - MOV64x(x4, JMPTABLE_MASK0); // x4 = mask - } - AND(x2, xRIP, x4); - } - if (rv64_zba) - SH3ADD(x3, x2, x3); - else { - SLLI(x2, x2, 3); - ADD(x3, x3, x2); + LD(x2, x3, 0); } - LD(x2, x3, 0); BR(x2); CLEARIP(); } diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 27ae0427..995e4bff 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -1309,6 +1309,11 @@ // mem[rs1+7:rs1] := rd #define TH_SDIB(rd, rs1, imm5, imm2) EMIT(I_type(0b011010000000 | (((imm2) & 0b11) << 5) | ((imm5) & 0x1f), rs1, 0b101, rd, 0b0001011)) +// Load indexed word. +// addr := rs1 + (rs2 << imm2) +// rd := sign_extend(mem[addr+7:addr]) +#define TH_LRD(rd, rs1, rs2, imm2) EMIT(R_type(0b0110000 | ((imm2) & 0b11), rs2, rs1, 0b100, rd, 0b0001011)) + // TODO // th.lbib rd, (rs1), imm5, imm2 Load indexed byte // th.lbuia rd, (rs1), imm5, imm2 Load indexed unsigned byte @@ -1333,7 +1338,6 @@ // th.lrhu rd, rs1, rs2, imm2 Load indexed unsigned half-word // th.lrw rd, rs1, rs2, imm2 Load indexed word // th.lrwu rd, rs1, rs2, imm2 Load indexed unsigned word -// th.lrd rd, rs1, rs2, imm2 Load indexed double-word // th.srb rd, rs1, rs2, imm2 Store indexed byte // th.srh rd, rs1, rs2, imm2 Store indexed half-word // th.srw rd, rs1, rs2, imm2 Store indexed word |