diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2023-09-27 03:45:33 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-09-26 21:45:33 +0200 |
| commit | 684160f17847b3220ea49a7a4c0e214ce15ffb8d (patch) | |
| tree | 3e276a8d9f6a7f3038fe1ee0911878e6bf62ce85 /src | |
| parent | e3f193b3824cb46a147414b2efdcbcee6715abdb (diff) | |
| download | box64-684160f17847b3220ea49a7a4c0e214ce15ffb8d.tar.gz box64-684160f17847b3220ea49a7a4c0e214ce15ffb8d.zip | |
[RV64_DYNAREC] Added more support for XTheadBs extension (#993)
* [RV64_DYNAREC] Added more support for XTheadBs extension * Revert: FF0 is not CLZ
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 34 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 45 |
3 files changed, 39 insertions, 42 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 227ffda5..d7e4eeb8 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1009,9 +1009,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni LDxw(x1, x3, fixedaddress); ed = x1; } - ANDI(x2, gd, rex.w ? 0x3f : 0x1f); - SRL(x4, ed, x2); - ANDI(x4, x4, 1); + BEXT(x4, ed, gd, x2); ANDI(xFlags, xFlags, ~1); // F_CF is 1 OR(xFlags, xFlags, x4); break; @@ -1043,13 +1041,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ed = x1; wback = x3; } - if (rex.w) { - ANDI(x2, gd, 0x3f); - } else { - ANDI(x2, gd, 0x1f); - } - SRL(x4, ed, x2); - ANDI(x4, x4, 1); // F_CF is 1 + BEXT(x4, ed, gd, x2); ANDI(xFlags, xFlags, ~1); OR(xFlags, xFlags, x4); ADDI(x3, xZR, 1); @@ -1194,13 +1186,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ed = x1; wback = x3; } - if (rex.w) { - ANDI(x2, gd, 0x3f); - } else { - ANDI(x2, gd, 0x1f); - } - SRL(x4, ed, x2); - ANDI(x4, x4, 1); // F_CF is 1 + BEXT(x4, ed, gd, x2); // F_CF is 1 ANDI(xFlags, xFlags, ~1); OR(xFlags, xFlags, x4); ADDI(x5, xZR, 1); @@ -1260,8 +1246,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(1); u8 = F8; u8 &= rex.w ? 0x3f : 0x1f; - SRLIxw(x3, ed, u8); - ANDI(x3, x3, 1); // F_CF is 1 + BEXTI(x3, ed, u8); // F_CF is 1 ANDI(xFlags, xFlags, ~1); OR(xFlags, xFlags, x3); break; @@ -1326,8 +1311,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(1); u8 = F8; u8 &= rex.w ? 0x3f : 0x1f; - SRLIxw(x3, ed, u8); - ANDI(x3, x3, 1); // F_CF is 1 + BEXTI(x3, ed, u8); // F_CF is 1 ANDI(xFlags, xFlags, ~1); OR(xFlags, xFlags, x3); if (u8 <= 10) { @@ -1363,13 +1347,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ed = x1; wback = x3; } - if (rex.w) { - ANDI(x2, gd, 0x3f); - } else { - ANDI(x2, gd, 0x1f); - } - SRL(x4, ed, x2); - ANDI(x4, x4, 1); // F_CF is 1 + BEXT(x4, ed, gd, x2); // F_CF is 1 ANDI(xFlags, xFlags, ~1); OR(xFlags, xFlags, x4); ADDI(x3, xZR, 1); diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index 9007e46e..1b57e9ed 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -416,8 +416,6 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MARK; if (rv64_zbb) { CLZxw(gd, ed); - } else if (rv64_xtheadbb) { - TH_FF0(gd, ed); } else { if (ed != gd) u8 = gd; diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 63e62ad1..e7608781 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -841,10 +841,10 @@ f28–31 ft8–11 FP temporaries Caller #define BCLR(rd, rs1, rs2) EMIT(R_type(0b0100100, rs2, rs1, 0b001, rd, 0b0110011)) // Single-bit Clear (Immediate) #define BCLI(rd, rs1, imm) EMIT(R_type(0b0100100, imm, rs1, 0b001, rd, 0b0010011)) -// Single-bit Extreact (Register) -#define BEXT(rd, rs1, rs2) EMIT(R_type(0b0100100, rs2, rs1, 0b101, rd, 0b0110011)) +// Single-bit Extract (Register) +#define BEXT_(rd, rs1, rs2) EMIT(R_type(0b0100100, rs2, rs1, 0b101, rd, 0b0110011)) // Single-bit Extract (Immediate) -#define BEXTI(rd, rs1, imm) EMIT(R_type(0b0100100, imm, rs1, 0b101, rd, 0b0010011)) +#define BEXTI_(rd, rs1, imm) EMIT(R_type(0b0100100, imm, rs1, 0b101, rd, 0b0010011)) // Single-bit Invert (Register) #define BINV(rd, rs1, rs2) EMIT(R_type(0b0110100, rs2, rs1, 0b001, rd, 0b0110011)) // Single-bit Invert (Immediate) @@ -854,6 +854,27 @@ f28–31 ft8–11 FP temporaries Caller // Single-bit Set (Immediate) #define BSETI(rd, rs1, imm) EMIT(R_type(0b0010100, imm, rs1, 0b001, rd, 0b0010011)) +// Single-bit Extract (Register), s0 can be the same as rs2 +#define BEXT(rd, rs1, rs2, s0) \ + if (rv64_zbs) \ + BEXT_(rd, rs1, rs2); \ + else { \ + ANDI(s0, rs2, rex.w ? 0x3f : 0x1f); \ + SRL(rd, rs1, s0); \ + ANDI(rd, rd, 1); \ + } + +// Single-bit Extract (Immediate) +#define BEXTI(rd, rs1, imm) \ + if (rv64_zbs) \ + BEXTI_(rd, rs1, imm); \ + else if (rv64_xtheadbs) \ + TH_TST(rd, rs1, imm); \ + else { \ + SRLIxw(rd, rs1, imm); \ + ANDI(rd, rd, 1); \ + } + /// THead vendor extension /// https://github.com/T-head-Semi/thead-extension-spec/releases @@ -861,7 +882,7 @@ f28–31 ft8–11 FP temporaries Caller // Add a shifted operand to a second operand. // reg[rd] := reg[rs1] + (reg[rs2] << imm2) -#define TH_ADDSL(rd, rs1, rs2, imm2) EMIT(R_type(imm2 & 0b11, rs2, rs1, 0b001, rd, 0b0001011)) +#define TH_ADDSL(rd, rs1, rs2, imm2) EMIT(R_type((imm2)&0b11, rs2, rs1, 0b001, rd, 0b0001011)) // XTheadBb - Basic bit-manipulation @@ -874,20 +895,20 @@ f28–31 ft8–11 FP temporaries Caller // Perform a cyclic right shift. // reg[rd] := (reg[rs1] >> imm6) | (reg[rs1] << (xlen - imm6)) -#define TH_SRRI(rd, rs1, imm6) EMIT(I_type(0b000100000000 | (imm6 & 0x3f), rs1, 0b001, rd, 0b0001011)) +#define TH_SRRI(rd, rs1, imm6) EMIT(I_type(0b000100000000 | ((imm6)&0x3f), rs1, 0b001, rd, 0b0001011)) // Perform a cyclic right shift on word operand. // data := zext.w(reg[rs1]) // reg[rd] := (data >> imm5) | (data << (32 - imm5)) -#define TH_SRRIW(rd, rs1, imm5) EMIT(I_type(0b000101000000 | (imm5 & 0x1f), rs1, 0b001, rd, 0b0001011)) +#define TH_SRRIW(rd, rs1, imm5) EMIT(I_type(0b000101000000 | ((imm5)&0x1f), rs1, 0b001, rd, 0b0001011)) // Extract and sign-extend bits. // reg[rd] := sign_extend(reg[rs1][imm1:imm2]) -#define TH_EXT(rd, rs1, imm1, imm2) EMIT(I_type(((imm1 & 0x1f) << 6) | (imm2 & 0x1f), rs1, 0b010, rd, 0b0001011)) +#define TH_EXT(rd, rs1, imm1, imm2) EMIT(I_type((((imm1)&0x1f) << 6) | ((imm2)&0x1f), rs1, 0b010, rd, 0b0001011)) // Extract and zero-extend bits. // reg[rd] := zero_extend(reg[rs1][imm1:imm2]) -#define TH_EXTU(rd, rs1, imm1, imm2) EMIT(I_type(((imm1 & 0x1f) << 6) | (imm2 & 0x1f), rs1, 0b011, rd, 0b0001011)) +#define TH_EXTU(rd, rs1, imm1, imm2) EMIT(I_type((((imm1)&0x1f) << 6) | ((imm2)&0x1f), rs1, 0b011, rd, 0b0001011)) // Find first '0'-bit // for i=xlen..0: @@ -932,7 +953,7 @@ f28–31 ft8–11 FP temporaries Caller // rd := 1 // else // rd := 0 -#define TH_TST(rd, rs1, imm6) EMIT(I_type(0b100010000000 | (imm6 & 0x3f), rs1, 0b001, rd, 0b0001011)) +#define TH_TST(rd, rs1, imm6) EMIT(I_type(0b100010000000 | ((imm6)&0x3f), rs1, 0b001, rd, 0b0001011)) // XTheadCondMov - Conditional move @@ -952,7 +973,7 @@ f28–31 ft8–11 FP temporaries Caller // Load indexed byte, increment address after loading. // rd := sign_extend(mem[rs1]) // rs1 := rs1 + (sign_extend(imm5) << imm2) -#define TH_LBIA(rd, rs1, imm5, imm2) EMIT(I_type(0b000110000000 | ((imm2 & 0b11) << 5) | (imm5 & 0x1f), rs1, 0b100, rd, 0b0001011)) +#define TH_LBIA(rd, rs1, imm5, imm2) EMIT(I_type(0b000110000000 | (((imm2)&0b11) << 5) | ((imm5)&0x1f), rs1, 0b100, rd, 0b0001011)) // TODO // th.lbib rd, (rs1), imm5, imm2 Load indexed byte @@ -1006,7 +1027,7 @@ f28–31 ft8–11 FP temporaries Caller // addr := rs1 + (zero_extend(imm2) << 4) // rd1 := mem[addr+7:addr] // rd2 := mem[addr+15:addr+8] -#define TH_LDD(rd1, rd2, rs1, imm2) EMIT(R_type(0b1111100 | (imm2 & 0b11), rd2, rs1, 0b100, rd1, 0b0001011)) +#define TH_LDD(rd1, rd2, rs1, imm2) EMIT(R_type(0b1111100 | ((imm2)&0b11), rd2, rs1, 0b100, rd1, 0b0001011)) // TODO // th.lwd rd1, rd2, (rs1), imm2, 3 Load two signed 32-bit values @@ -1019,7 +1040,7 @@ f28–31 ft8–11 FP temporaries Caller // Load indexed double-precision floating point value. // addr := rs1 + (rs2 << imm2) // rd := fmem[addr+7:addr] -#define TH_FLRD(rd, rs1, rs2, imm2) EMIT(R_type(0b0110000 | (imm2 & 0b11), rs2, rs1, 0b110, rd, 0b0001011)) +#define TH_FLRD(rd, rs1, rs2, imm2) EMIT(R_type(0b0110000 | ((imm2)&0b11), rs2, rs1, 0b110, rd, 0b0001011)) // TODO // th.flrw rd, rs1, rs2, imm2 Load indexed float |