diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-08-29 05:05:02 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-28 23:05:02 +0200 |
| commit | d9e5f8183f78c5f3035d3abf12e367cf05a1d4b5 (patch) | |
| tree | 3f6b060675c9270a669296593f7bf4f2fad5d3fd /src | |
| parent | abbaf9b593725d5f00d0482d0fc28310046cfd27 (diff) | |
| download | box64-d9e5f8183f78c5f3035d3abf12e367cf05a1d4b5.tar.gz box64-d9e5f8183f78c5f3035d3abf12e367cf05a1d4b5.zip | |
[RV64_DYNAREC] Optimized jump_to_next using XTheadBb instructions (#1768)
* [RV64_DYNAREC] Optimized jump_to_next using XTheadBb instructions * fixed some bad bad typos
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 74 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 4 |
2 files changed, 53 insertions, 25 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 9810cf64..f46a8690 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -498,34 +498,62 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 if(reg!=xRIP) { MV(xRIP, reg); } + NOTEST(x2); uintptr_t tbl = is32bits?getJumpTable32():getJumpTable64(); MAYUSE(tbl); TABLE64(x3, tbl); - if(!is32bits) { - SRLI(x2, xRIP, JMPTABL_START3); - if(rv64_zba) SH3ADD(x3, x2, x3); else {SLLI(x2, x2, 3); ADD(x3, x3, x2);} - LD(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety - } - MOV64x(x4, JMPTABLE_MASK2<<3); // x4 = mask - SRLI(x2, xRIP, JMPTABL_START2-3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); //LR_D(x3, x3, 1, 1); - if(JMPTABLE_MASK2!=JMPTABLE_MASK1) { - MOV64x(x4, JMPTABLE_MASK1<<3); // x4 = mask - } - SRLI(x2, xRIP, JMPTABL_START1-3); - AND(x2, x2, x4); - ADD(x3, x3, x2); - LD(x3, x3, 0); //LR_D(x3, x3, 1, 1); - if(JMPTABLE_MASK0<2048) { - ANDI(x2, xRIP, JMPTABLE_MASK0); + if (rv64_xtheadbb) { + if (!is32bits) { + TH_EXTU(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); + TH_ADDSL(x3, x3, x2, 3); + LD(x3, x3, 0); + } + TH_EXTU(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + TH_ADDSL(x3, x3, x2, 3); + LD(x3, x3, 0); + TH_EXTU(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); + TH_ADDSL(x3, x3, x2, 3); + LD(x3, x3, 0); + TH_EXTU(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0); + TH_ADDSL(x3, x3, x2, 3); + LD(x2, x3, 0); } else { - MOV64x(x4, JMPTABLE_MASK0); // x4 = mask - AND(x2, xRIP, x4); + if (!is32bits) { + SRLI(x2, xRIP, JMPTABL_START3); + if (rv64_zba) + SH3ADD(x3, x2, x3); + else { + SLLI(x2, x2, 3); + ADD(x3, x3, x2); + } + LD(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety + } + MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask + SRLI(x2, xRIP, JMPTABL_START2 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); // LR_D(x3, x3, 1, 1); + if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + } + SRLI(x2, xRIP, JMPTABL_START1 - 3); + AND(x2, x2, x4); + ADD(x3, x3, x2); + LD(x3, x3, 0); // LR_D(x3, x3, 1, 1); + if (JMPTABLE_MASK0 < 2048) { + ANDI(x2, xRIP, JMPTABLE_MASK0); + } else { + MOV64x(x4, JMPTABLE_MASK0); // x4 = mask + AND(x2, xRIP, x4); + } + if (rv64_zba) + SH3ADD(x3, x2, x3); + else { + SLLI(x2, x2, 3); + ADD(x3, x3, x2); + } + LD(x2, x3, 0); // LR_D(x2, x3, 1, 1); } - if(rv64_zba) SH3ADD(x3, x2, x3); else {SLLI(x2, x2, 3); ADD(x3, x3, x2);} - LD(x2, x3, 0); //LR_D(x2, x3, 1, 1); } else { uintptr_t p = getJumpTableAddress64(ip); MAYUSE(p); diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 3f76135a..c17bb165 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -1028,11 +1028,11 @@ f28–31 ft8–11 FP temporaries Caller // Extract and sign-extend bits. // reg[rd] := sign_extend(reg[rs1][imm1:imm2]) -#define TH_EXT(rd, rs1, imm1, imm2) EMIT(I_type((((imm1) & 0x1f) << 6) | ((imm2) & 0x1f), rs1, 0b010, rd, 0b0001011)) +#define TH_EXT(rd, rs1, imm1, imm2) EMIT(I_type((((imm1) & 0x3f) << 6) | ((imm2) & 0x3f), rs1, 0b010, rd, 0b0001011)) // Extract and zero-extend bits. // reg[rd] := zero_extend(reg[rs1][imm1:imm2]) -#define TH_EXTU(rd, rs1, imm1, imm2) EMIT(I_type((((imm1) & 0x1f) << 6) | ((imm2) & 0x1f), rs1, 0b011, rd, 0b0001011)) +#define TH_EXTU(rd, rs1, imm1, imm2) EMIT(I_type((((imm1) & 0x3f) << 6) | ((imm2) & 0x3f), rs1, 0b011, rd, 0b0001011)) // Find first '0'-bit // for i=xlen..0: |