diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-03-05 00:48:59 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-03-04 17:48:59 +0100 |
| commit | 1c10ae443dface55e243ee3ed11fcdd3b636153d (patch) | |
| tree | acc290e34785497500e25da8cafae0346f00e8b4 | |
| parent | d44f3d9baee8a0f7ce16bb3027f5a666a262aa07 (diff) | |
| download | box64-1c10ae443dface55e243ee3ed11fcdd3b636153d.tar.gz box64-1c10ae443dface55e243ee3ed11fcdd3b636153d.zip | |
[LA64_DYNAREC] Utilizing bitmanip instructions because they're great (#1324)
* [LA64_DYNAREC] Added bitmanip instructions to emitter and printer * [LA64_DYNAREC] Utilizing bitmanip instructions because they're great * Review
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_00.c | 30 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 70 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.h | 47 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 45 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_printer.c | 20 |
5 files changed, 86 insertions, 126 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c index 26c300d4..87c6a989 100644 --- a/src/dynarec/la64/dynarec_la64_00.c +++ b/src/dynarec/la64/dynarec_la64_00.c @@ -94,9 +94,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; ANDI(x1, xRAX, 0xff); emit_add8c(dyn, ninst, x1, u8, x3, x4, x5); - ADDI_W(x3, xZR, 0xf00); - AND(xRAX, xRAX, x3); - OR(xRAX, xRAX, x1); + BSTRINS_D(xRAX, x1, 7, 0); break; case 0x05: INST_NAME("ADD EAX, Id"); @@ -163,9 +161,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni u8 = F8; ANDI(x1, xRAX, 0xff); emit_sub8c(dyn, ninst, x1, u8, x2, x3, x4, x5); - ADDI_W(x3, xZR, 0xf00); - AND(xRAX, xRAX, x3); - OR(xRAX, xRAX, x1); + BSTRINS_D(xRAX, x1, 7, 0); break; case 0x2D: INST_NAME("SUB EAX, Id"); @@ -489,26 +485,8 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni eb1 = TO_LA64((nextop & 7) + (rex.b << 3)); eb2 = 0; } - if (eb2) { - // load a mask to x3 (ffffffffffff00ff) - LU12I_W(x3, 0xffff0); - ORI(x3, x3, 0xff); - // apply mask - AND(eb1, eb1, x3); - if (u8) { - if ((u8 << 8) < 2048) { - ADDI_D(x4, xZR, u8 << 8); - } else { - ADDI_D(x4, xZR, u8); - SLLI_D(x4, x4, 8); - } - OR(eb1, eb1, x4); - } - } else { - ADDI_W(x3, xZR, 0xf00); // mask ffffffffffffff00 - AND(eb1, eb1, x3); - ORI(eb1, eb1, u8); - } + MOV32w(x3, u8); + BSTRINS_D(eb1, x3, eb2 * 8 + 7, eb2 * 8); } else { // mem <= u8 addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 0, 1); u8 = F8; diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index ea0f7d11..03934883 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -369,37 +369,26 @@ void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is3 if (reg != xRIP) { MV(xRIP, reg); } + NOTEST(x2); uintptr_t tbl = is32bits ? getJumpTable32() : getJumpTable64(); MAYUSE(tbl); TABLE64(x3, tbl); if (!is32bits) { - SRLI_D(x2, xRIP, JMPTABL_START3); - ALSL_D(x3, x2, x3, 2); - LD_D(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety + BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); + ALSL_D(x3, x2, x3, 3); + LD_D(x3, x3, 0); } - MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask - SRLI_D(x2, xRIP, JMPTABL_START2 - 3); - AND(x2, x2, x4); - ADD_D(x3, x3, x2); - LD_D(x3, x3, 0); // LR_D(x3, x3, 1, 1); - if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { - MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask - } - SRLI_D(x2, xRIP, JMPTABL_START1 - 3); - AND(x2, x2, x4); - ADD_D(x3, x3, x2); - LD_D(x3, x3, 0); // LR_D(x3, x3, 1, 1); - if (JMPTABLE_MASK0 < 2048) { - ANDI(x2, xRIP, JMPTABLE_MASK0); - } else { - if (JMPTABLE_MASK1 != JMPTABLE_MASK0) { - MOV64x(x4, JMPTABLE_MASK0); // x4 = mask - } - AND(x2, xRIP, x4); - } - ALSL_D(x3, x2, x3, 2); - LD_D(x2, x3, 0); // LR_D(x2, x3, 1, 1); + BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + ALSL_D(x3, x2, x3, 3); + LD_D(x3, x3, 0); + BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); + ALSL_D(x3, x2, x3, 3); + LD_D(x3, x3, 0); + BSTRPICK_D(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0); + ALSL_D(x3, x2, x3, 3); + LD_D(x2, x3, 0); } else { + NOTEST(x2); uintptr_t p = getJumpTableAddress64(ip); MAYUSE(p); TABLE64(x3, p); @@ -439,33 +428,18 @@ void ret_to_epilog(dynarec_la64_t* dyn, int ninst, rex_t rex) uintptr_t tbl = rex.is32bits ? getJumpTable32() : getJumpTable64(); MOV64x(x3, tbl); if (!rex.is32bits) { - SRLI_D(x2, xRIP, JMPTABL_START3); - SLLI_D(x2, x2, 3); - ADD_D(x3, x3, x2); + BSTRPICK_D(x2, xRIP, JMPTABL_START3 + JMPTABL_SHIFT3 - 1, JMPTABL_START3); + ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); } - MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask - SRLI_D(x2, xRIP, JMPTABL_START2 - 3); - AND(x2, x2, x4); - ADD_D(x3, x3, x2); + BSTRPICK_D(x2, xRIP, JMPTABL_START2 + JMPTABL_SHIFT2 - 1, JMPTABL_START2); + ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); - if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { - MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask - } - SRLI_D(x2, xRIP, JMPTABL_START1 - 3); - AND(x2, x2, x4); - ADD_D(x3, x3, x2); + BSTRPICK_D(x2, xRIP, JMPTABL_START1 + JMPTABL_SHIFT1 - 1, JMPTABL_START1); + ALSL_D(x3, x2, x3, 3); LD_D(x3, x3, 0); - if (JMPTABLE_MASK0 < 2048) { - ANDI(x2, xRIP, JMPTABLE_MASK0); - } else { - if (JMPTABLE_MASK1 != JMPTABLE_MASK0) { - MOV64x(x4, JMPTABLE_MASK0); // x4 = mask - } - AND(x2, xRIP, x4); - } - SLLI_D(x2, x2, 3); - ADD_D(x3, x3, x2); + BSTRPICK_D(x2, xRIP, JMPTABL_START0 + JMPTABL_SHIFT0 - 1, JMPTABL_START0); + ALSL_D(x3, x2, x3, 3); LD_D(x2, x3, 0); BR(x2); // save LR CLEARIP(); diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h index e3c0c874..ea2b155f 100644 --- a/src/dynarec/la64/dynarec_la64_helper.h +++ b/src/dynarec/la64/dynarec_la64_helper.h @@ -134,12 +134,7 @@ wb2 = (wback >> 2) * 8; \ wback = TO_LA64((wback & 3)); \ } \ - if (wb2) { \ - MV(i, wback); \ - SRLI_D(i, i, wb2); \ - ANDI(i, i, 0xff); \ - } else \ - ANDI(i, wback, 0xff); \ + BSTRPICK_D(i, wback, wb2 + 7, wb2); \ wb1 = 0; \ ed = i; \ } else { \ @@ -161,42 +156,18 @@ gb1 = TO_LA64((gd & 3)); \ } \ gd = i; \ - if (gb2) { \ - MV(gd, gb1); \ - SRLI_D(gd, gd, 8); \ - ANDI(gd, gd, 0xff); \ - } else \ - ANDI(gd, gb1, 0xff); + BSTRPICK_D(gd, gb1, gb2 + 7, gb2); // Write gb (gd) back to original register / memory, using s1 as scratch -#define GBBACK(s1) \ - if (gb2) { \ - MOV64x(s1, 0xffffffffffff00ffLL); \ - AND(gb1, gb1, s1); \ - SLLI_D(s1, gd, 8); \ - OR(gb1, gb1, s1); \ - } else { \ - ADDI_W(s1, xZR, 0xf00); \ - AND(gb1, gb1, s1); \ - OR(gb1, gb1, gd); \ - } +#define GBBACK(s1) BSTRINS_D(gb1, gd, gb2 + 7, gb2); // Write eb (ed) back to original register / memory, using s1 as scratch -#define EBBACK(s1, c) \ - if (wb1) { \ - SUB_D(ed, wback, fixedaddress); \ - SMWRITE(); \ - } else if (wb2) { \ - MOV64x(s1, 0xffffffffffff00ffLL); \ - AND(wback, wback, s1); \ - if (c) { ANDI(ed, ed, 0xff); } \ - SLLI_D(s1, ed, 8); \ - OR(wback, wback, s1); \ - } else { \ - ADDI_W(s1, xZR, 0xf00); \ - AND(wback, wback, s1); \ - if (c) { ANDI(ed, ed, 0xff); } \ - OR(wback, wback, ed); \ +#define EBBACK(s1, c) \ + if (wb1) { \ + SUB_D(ed, wback, fixedaddress); \ + SMWRITE(); \ + } else { \ + BSTRINS_D(wback, ed, wb2 + 7, wb2); \ } // CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1) diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index a53ccd47..c72178ce 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -113,9 +113,6 @@ f24-f31 fs0-fs7 Static registers Callee #define SPLIT20(A) (((A) + 0x800) >> 12) #define SPLIT12(A) ((A) & 0xfff) -// ZERO the upper part -#define ZEROUP(r) AND(r, r, xMASK); - // Standard formats #define type_4R(opc, ra, rk, rj, rd) ((opc) << 20 | (ra) << 15 | (rk) << 10 | (rj) << 5 | (rd)) #define type_3R(opc, rk, rj, rd) ((opc) << 15 | (rk) << 10 | (rj) << 5 | (rd)) @@ -131,10 +128,10 @@ f24-f31 fs0-fs7 Static registers Callee #define type_I26(opc, imm26) ((opc) << 26 | ((imm26) & 0xFFFF) << 10 | ((imm26 >> 16) & 0x3FF)) // Made-up formats not found in the spec. -#define type_2RI3(opc, imm3, rj, rd) ((opc) << 13 | ((imm3) & 0x7 ) << 10 | (rj) << 5 | (rd)) -#define type_2RI4(opc, imm4, rj, rd) ((opc) << 14 | ((imm4) & 0xF ) << 10 | (rj) << 5 | (rd)) -#define type_2RI5(opc, imm5, rj, rd) ((opc) << 15 | ((imm5) & 0x1F) << 10 | (rj) << 5 | (rd)) -#define type_2RI6(opc, imm6, rj, rd) ((opc) << 16 | ((imm6) & 0x3F) << 10 | (rj) << 5 | (rd)) +#define type_2RI3(opc, imm3, rj, rd) ((opc) << 13 | ((imm3) & 0x7 ) << 10 | (rj) << 5 | (rd)) +#define type_2RI4(opc, imm4, rj, rd) ((opc) << 14 | ((imm4) & 0xF ) << 10 | (rj) << 5 | (rd)) +#define type_2RI5(opc, imm5, rj, rd) ((opc) << 15 | ((imm5) & 0x1F) << 10 | (rj) << 5 | (rd)) +#define type_2RI6(opc, imm6, rj, rd) ((opc) << 16 | ((imm6) & 0x3F) << 10 | (rj) << 5 | (rd)) // tmp = GR[rj][31:0] + GR[rk][31:0] // Gr[rd] = SignExtend(tmp[31:0], GRLEN) @@ -159,15 +156,15 @@ f24-f31 fs0-fs7 Static registers Callee // GR[rd] = tmp[63:0] #define ADDU16I_D(rd, rj, imm16) EMIT(type_2RI16(0b000100, imm16, rj, rd)) -// tmp = (GR[rj][31:0] << (imm2 + 1)) + GR[rk][31:0] +// tmp = (GR[rj][31:0] << imm) + GR[rk][31:0] // GR[rd] = SignExtend(tmp[31:0], GRLEN) -#define ALSL_W(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000000010, imm2, rk, rj, rd)) -// tmp = (GR[rj][31:0] << (imm2 + 1)) + GR[rk][31:0] +#define ALSL_W(rd, rj, rk, imm) EMIT(type_3RI2(0b000000000000010, (imm - 1), rk, rj, rd)) +// tmp = (GR[rj][31:0] << imm) + GR[rk][31:0] // GR[rd] = ZeroExtend(tmp[31:0], GRLEN) -#define ALSL_WU(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000000011, imm2, rk, rj, rd)) -// tmp = (GR[rj][63:0] << (imm2 + 1)) + GR[rk][63:0] +#define ALSL_WU(rd, rj, rk, imm) EMIT(type_3RI2(0b000000000000011, (imm - 1), rk, rj, rd)) +// tmp = (GR[rj][63:0] << imm) + GR[rk][63:0] // GR[rd] = tmp[63:0] -#define ALSL_D(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000010110, imm2, rk, rj, rd)) +#define ALSL_D(rd, rj, rk, imm) EMIT(type_3RI2(0b000000000010110, (imm - 1), rk, rj, rd)) // GR[rd] = SignExtend({imm20, 12'b0}, GRLEN) #define LU12I_W(rd, imm20) EMIT(type_1RI20(0b0001010, imm20, rd)) @@ -264,9 +261,29 @@ f24-f31 fs0-fs7 Static registers Callee ADD_D(rd, rs1, scratch); \ } - #define SEXT_W(rd, rs1) SLLI_W(rd, rs1, 0) +// bstr32[31:msbw+1] = GR[rd][31: msbw+1] +// bstr32[msbw:lsbw] = GR[rj][msbw-lsbw:0] +// bstr32[lsbw-1:0] = GR[rd][lsbw-1:0] +// GR[rd] = SignExtend(bstr32[31:0], GRLEN) +#define BSTRINS_W(rd, rj, msbw5, lsbw5) EMIT(type_2RI12(0b0000000001, 0b100000000000 | (msbw5 & 0x1F) << 6 | (lsbw5 & 0x1F), rj, rd)) + +// GR[rd][63:msbd+1] = GR[rd][63:msbd+1] +// GR[rd][msbd:lsbd] = GR[rj][msbd-lsbd:0] +// GR[rd][lsbd-1:0] = GR[rd][lsbd-1:0] +#define BSTRINS_D(rd, rj, msbd6, lsbd6) EMIT(type_2RI12(0b0000000010, (msbd6 & 0x3F) << 6 | (lsbd6 & 0x3F), rj, rd)) + +// bstr32[31:0] = ZeroExtend(GR[rj][msbw:lsbw], 32) +// GR[rd] = SignExtend(bstr32[31:0], GRLEN) +#define BSTRPICK_W(rd, rj, msbw5, lsbw5) EMIT(type_2RI12(0b0000000001, 0b100000100000 | (msbw5 & 0x1F) << 6 | (lsbw5 & 0x1F), rj, rd)) + +// GR[rd] = ZeroExtend(GR[rj][msbd:lsbd], 64) +#define BSTRPICK_D(rd, rj, msbd6, lsbd6) EMIT(type_2RI12(0b0000000011, (msbd6 & 0x3F) << 6 | (lsbd6 & 0x3F), rj, rd)) + +// ZERO the upper part +#define ZEROUP(rd) BSTRINS_D(rd, xZR, 63, 32); + // if GR[rj] == GR[rd]: // PC = PC + SignExtend({imm16, 2'b0}, GRLEN) #define BEQ(rj, rd, imm18) EMIT(type_2RI16(0b010110, ((imm18)>>2), rj, rd)) diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index cdb9a4c6..385be055 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -255,6 +255,26 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%-15s %s, %s, %u", "ROTRI.W", Xt[Rd], Xt[Rj], imm); return buff; } + // BSTRINS.W + if(isMask(opcode, "00000000011uuuuu0iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %u, %u", "BSTRINS.W", Xt[Rd], Xt[Rj], imm_up, imm); + return buff; + } + // BSTRINS.D + if(isMask(opcode, "0000000010uuuuuuiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %u, %u", "BSTRINS.D", Xt[Rd], Xt[Rj], imm_up, imm); + return buff; + } + // BSTRPICK.W + if(isMask(opcode, "00000000011uuuuu1iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %u, %u", "BSTRPICK.W", Xt[Rd], Xt[Rj], imm_up, imm); + return buff; + } + // BSTRPICK.D + if(isMask(opcode, "0000000011uuuuuuiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %u, %u", "BSTRPICK.D", Xt[Rd], Xt[Rj], imm_up, imm); + return buff; + } // BEQ if(isMask(opcode, "010110iiiiiiiiiiiiiiiijjjjjddddd", &a)) { snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "BEQ", Xt[Rd], Xt[Rj], imm << 2); |