diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-05-01 11:36:02 +0000 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-05-01 11:36:02 +0000 |
| commit | 505ac0021dd69d56f5467302b6514c8435caee82 (patch) | |
| tree | 7d1dda73bdb2a289d1f6e03c49137041cd351b30 /src | |
| parent | c020154a2f3e6bbf8e566395bacc2190770269a1 (diff) | |
| download | box64-505ac0021dd69d56f5467302b6514c8435caee82.tar.gz box64-505ac0021dd69d56f5467302b6514c8435caee82.zip | |
[RV64_DYNAREC] Added Zbb path for 0F BC/BD and 66 0F BC/BD opcodes
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 82 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 85 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 22 |
3 files changed, 105 insertions, 84 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 3c6b4567..e5e44a12 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1058,14 +1058,18 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ORI(xFlags, xFlags, 1<<F_ZF); B_NEXT_nocond; MARK; - NEG(x2, ed); - AND(x2, x2, ed); - TABLE64(x3, 0x03f79d71b4ca8b09ULL); - MUL(x2, x2, x3); - SRLI(x2, x2, 64-6); - TABLE64(x1, (uintptr_t)&deBruijn64tab); - ADD(x1, x1, x2); - LBU(gd, x1, 0); + if(rv64_zbb) { + CTZxw(gd, ed); + } else { + NEG(x2, ed); + AND(x2, x2, ed); + TABLE64(x3, 0x03f79d71b4ca8b09ULL); + MUL(x2, x2, x3); + SRLI(x2, x2, 64-6); + TABLE64(x1, (uintptr_t)&deBruijn64tab); + ADD(x1, x1, x2); + LBU(gd, x1, 0); + } ANDI(xFlags, xFlags, ~(1<<F_ZF)); break; case 0xBD: @@ -1084,37 +1088,43 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni B_NEXT_nocond; MARK; ANDI(xFlags, xFlags, ~(1<<F_ZF)); - if(ed!=gd) - u8 = gd; - else - u8 = x1; - ADDI(u8, xZR, 0); - if(rex.w) { - MV(x2, ed); - SRLI(x3, x2, 32); + if(rv64_zbb) { + MOV32w(x1, rex.w?63:31); + CLZxw(gd, ed); + SUB(gd, x1, gd); + } else { + if(ed!=gd) + u8 = gd; + else + u8 = x1; + ADDI(u8, xZR, 0); + if(rex.w) { + MV(x2, ed); + SRLI(x3, x2, 32); + BEQZ(x3, 4+2*4); + ADDI(u8, u8, 32); + MV(x2, x3); + } else { + AND(x2, ed, xMASK); + } + SRLI(x3, x2, 16); BEQZ(x3, 4+2*4); - ADDI(u8, u8, 32); + ADDI(u8, u8, 16); MV(x2, x3); - } else { - AND(x2, ed, xMASK); + SRLI(x3, x2, 8); + BEQZ(x3, 4+2*4); + ADDI(u8, u8, 8); + MV(x2, x3); + SRLI(x3, x2, 4); + BEQZ(x3, 4+2*4); + ADDI(u8, u8, 4); + MV(x2, x3); + ANDI(x2, x2, 0b1111); + TABLE64(x3, (uintptr_t)&lead0tab); + ADD(x3, x3, x2); + LBU(x2, x3, 0); + ADD(gd, u8, x2); } - SRLI(x3, x2, 16); - BEQZ(x3, 4+2*4); - ADDI(u8, u8, 16); - MV(x2, x3); - SRLI(x3, x2, 8); - BEQZ(x3, 4+2*4); - ADDI(u8, u8, 8); - MV(x2, x3); - SRLI(x3, x2, 4); - BEQZ(x3, 4+2*4); - ADDI(u8, u8, 4); - MV(x2, x3); - ANDI(x2, x2, 0b1111); - TABLE64(x3, (uintptr_t)&lead0tab); - ADD(x3, x3, x2); - LBU(x2, x3, 0); - ADD(gd, u8, x2); break; case 0xBE: INST_NAME("MOVSX Gd, Eb"); diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index 489d5ca0..77b8bf2d 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -284,21 +284,24 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int AND(x4, ed, xMASK); ed = x4; } - BNE_MARK(ed, xZR); ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF))); + BNE_MARK(ed, xZR); ORI(xFlags, xFlags, 1<<F_CF); MOV32w(gd, rex.w?64:32); B_NEXT_nocond; MARK; - NEG(x2, ed); - AND(x2, x2, ed); - TABLE64(x3, 0x03f79d71b4ca8b09ULL); - MUL(x2, x2, x3); - SRLI(x2, x2, 64-6); - TABLE64(x1, (uintptr_t)&deBruijn64tab); - ADD(x1, x1, x2); - LBU(gd, x1, 0); - ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF))); + if(rv64_zbb) { + CTZxw(gd, ed); + } else { + NEG(x2, ed); + AND(x2, x2, ed); + TABLE64(x3, 0x03f79d71b4ca8b09ULL); + MUL(x2, x2, x3); + SRLI(x2, x2, 64-6); + TABLE64(x1, (uintptr_t)&deBruijn64tab); + ADD(x1, x1, x2); + LBU(gd, x1, 0); + } BNE(gd, xZR, 4+4); ORI(xFlags, xFlags, 1<<F_ZF); break; @@ -319,38 +322,42 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ORI(xFlags, xFlags, 1<<F_CF); B_NEXT_nocond; MARK; - if(ed!=gd) - u8 = gd; - else - u8 = x1; - ADDI(u8, xZR, rex.w?63:31); - if(rex.w) { - MV(x2, ed); - SRLI(x3, x2, 32); + if(rv64_zbb) { + CLZxw(gd, ed); + } else { + if(ed!=gd) + u8 = gd; + else + u8 = x1; + ADDI(u8, xZR, rex.w?63:31); + if(rex.w) { + MV(x2, ed); + SRLI(x3, x2, 32); + BEQZ(x3, 4+2*4); + SUBI(u8, u8, 32); + MV(x2, x3); + } else { + AND(x2, ed, xMASK); + } + SRLI(x3, x2, 16); BEQZ(x3, 4+2*4); - SUBI(u8, u8, 32); + SUBI(u8, u8, 16); MV(x2, x3); - } else { - AND(x2, ed, xMASK); + SRLI(x3, x2, 8); + BEQZ(x3, 4+2*4); + SUBI(u8, u8, 8); + MV(x2, x3); + SRLI(x3, x2, 4); + BEQZ(x3, 4+2*4); + SUBI(u8, u8, 4); + MV(x2, x3); + ANDI(x2, x2, 0b1111); + TABLE64(x3, (uintptr_t)&lead0tab); + ADD(x3, x3, x2); + LBU(x2, x3, 0); + SUB(gd, u8, x2); + MARK2; } - SRLI(x3, x2, 16); - BEQZ(x3, 4+2*4); - SUBI(u8, u8, 16); - MV(x2, x3); - SRLI(x3, x2, 8); - BEQZ(x3, 4+2*4); - SUBI(u8, u8, 8); - MV(x2, x3); - SRLI(x3, x2, 4); - BEQZ(x3, 4+2*4); - SUBI(u8, u8, 4); - MV(x2, x3); - ANDI(x2, x2, 0b1111); - TABLE64(x3, (uintptr_t)&lead0tab); - ADD(x3, x3, x2); - LBU(x2, x3, 0); - SUB(gd, u8, x2); - MARK2; ANDI(xFlags, xFlags, ~((1<<F_ZF) | (1<<F_CF))); BNE(gd, xZR, 4+4); ORI(xFlags, xFlags, 1<<F_ZF); diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index eec49a09..7ff2d4db 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -528,6 +528,10 @@ f28–31 ft8–11 FP temporaries Caller #define SH3ADDUW(rd, rs1, rs2) EMIT(R_type(0b0010000, rs2, rs1, 0b110, rd, 0b0111011)) // Shift left unsigned word (immediate) #define SLLIUW(rd, rs1, imm) EMIT(R_type(0b0000100, imm, rs1, 0b001, rd, 0b0011011)) +// Shift left by 1,2 or 3 and add (rd = X(rs2) + X(rs1)<<x) +#define SHxADD(rd, rs1, x, rs2) EMIT(R_type(0b0010000, rs2, rs1, (x)<<1, rd, 0b0110011)) +// Shift unsigned word left by 1,2 or 3 and add (rd = X(rs2) + Wz(rs1)<<x) +#define SHxADDUW(rd, rs1, x, rs2) EMIT(R_type(0b0010000, rs2, rs1, (x)<<1, rd, 0b0111011)) //Zbb // AND with reverted operand (rs1 & ~rs2) @@ -537,23 +541,23 @@ f28–31 ft8–11 FP temporaries Caller // Exclusive NOR (~(rs1 ^ rs2)) #define XNOR(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b100, rd, 0b0110011)) // Count leading zero bits -#define CLZ(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs1, 0b001, rd, 0b0010011)) +#define CLZ(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0010011)) // Count leading zero bits in word -#define CLZW(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs1, 0b001, rd, 0b0011011)) +#define CLZW(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0011011)) // Count leading zero bits -#define CLZxw(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs1, 0b001, rd, rex.w?0b0010011:0b0011011)) +#define CLZxw(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, rex.w?0b0010011:0b0011011)) // Count trailing zero bits -#define CTZ(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs1, 0b001, rd, 0b0010011)) +#define CTZ(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0010011)) // Count trailing zero bits in word -#define CTZW(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs1, 0b001, rd, 0b0011011)) +#define CTZW(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0011011)) // Count trailing zero bits -#define CTZxw(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs1, 0b001, rd, rex.w?0b0010011:0b0011011)) +#define CTZxw(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, rex.w?0b0010011:0b0011011)) // Count set bits -#define CPOP(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs1, 0b001, rd, 0b0010011)) +#define CPOP(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, 0b0010011)) // Count set bits in word -#define CPOPW(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs1, 0b001, rd, 0b0011011)) +#define CPOPW(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, 0b0011011)) // Count set bits -#define CPOPxw(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs1, 0b001, rd, rex.w?0b0010011:0b0011011)) +#define CPOPxw(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, rex.w?0b0010011:0b0011011)) // Maximum #define MAX(rd, rs1, rs2) EMIT(R_type(0b0000101, rs2, rs1, 0b110, rd, 0b0110011)) // Unisgned maximum |