diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-05-23 19:40:13 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-23 13:40:13 +0200 |
| commit | d40f51bdc53e07972a03284e0521d001035c996a (patch) | |
| tree | 28b6e03872e3d0f5e8e9e587d3712788682963e6 /src | |
| parent | b78ffcfad8a2c7235f3f959576417aa902a5a822 (diff) | |
| download | box64-d40f51bdc53e07972a03284e0521d001035c996a.tar.gz box64-d40f51bdc53e07972a03284e0521d001035c996a.zip | |
[RV64_DYNAREC] Optimized some opcodes with xtheadbb (#2663)
* [RV64_DYNAREC] Optimized *EXTH macros with xtheadbb * fix * [RV64_DYNAREC] Optimized scalar version of PCMPEQB with xtheadbb
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 25 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 21 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 30 |
3 files changed, 47 insertions, 29 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index b2f8c720..9429b701 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1666,14 +1666,23 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("PCMPEQB Gm,Em"); nextop = F8; GETGM(); - GETEM(x2, 0, 7); - for (int i = 0; i < 8; ++i) { - LBU(x3, gback, gdoffset + i); - LBU(x4, wback, fixedaddress + i); - SUB(x3, x3, x4); - SEQZ(x3, x3); - NEG(x3, x3); - SB(x3, gback, gdoffset + i); + if (rv64_xtheadbb) { + GETEM(x2, 0, 0); + LD(x3, gback, gdoffset); + LD(x4, wback, fixedaddress); + XOR(x3, x3, x4); + TH_TSTNBZ(x3, x3); + SD(x3, gback, gdoffset); + } else { + GETEM(x2, 0, 7); + for (int i = 0; i < 8; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); + SUB(x3, x3, x4); + SEQZ(x3, x3); + NEG(x3, x3); + SB(x3, gback, gdoffset + i); + } } break; case 0x75: diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index a3dc7999..bd40bb4b 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -1058,14 +1058,19 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("PCMPEQB Gx,Ex"); nextop = F8; GETGX(); - GETEX(x2, 0, 15); - for (int i = 0; i < 16; ++i) { - LBU(x3, gback, gdoffset + i); - LBU(x4, wback, fixedaddress + i); - SUB(x3, x3, x4); - SEQZ(x3, x3); - NEG(x3, x3); - SB(x3, gback, gdoffset + i); + if (rv64_xtheadbb) { + GETEX(x2, 0, 8); + SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4); TH_TSTNBZ(x3, x3);); + } else { + GETEX(x2, 0, 15); + for (int i = 0; i < 16; ++i) { + LBU(x3, gback, gdoffset + i); + LBU(x4, wback, fixedaddress + i); + SUB(x3, x3, x4); + SEQZ(x3, x3); + NEG(x3, x3); + SB(x3, gback, gdoffset + i); + } } break; case 0x75: diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 4f656cfc..38cda033 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -934,22 +934,26 @@ // Sign-extend half-word #define SEXTH_(rd, rs) EMIT(R_type(0b0110000, 0b00101, rs, 0b001, rd, 0b0010011)) // Sign-extend half-word -#define SEXTH(rd, rs) \ - if (rv64_zbb) \ - SEXTH_(rd, rs); \ - else { \ - SLLI(rd, rs, 48); \ - SRAI(rd, rd, 48); \ +#define SEXTH(rd, rs) \ + if (rv64_zbb) \ + SEXTH_(rd, rs); \ + else if (rv64_xtheadbb) \ + TH_EXT(rd, rs, 15, 0); \ + else { \ + SLLI(rd, rs, 48); \ + SRAI(rd, rd, 48); \ } // Zero-extend half-word #define ZEXTH_(rd, rs) EMIT(R_type(0b0000100, 0b00000, rs, 0b100, rd, 0b0111011)) // Zero-extend half-word -#define ZEXTH(rd, rs) \ - if (rv64_zbb) \ - ZEXTH_(rd, rs); \ - else { \ - SLLI(rd, rs, 48); \ - SRLI(rd, rd, 48); \ +#define ZEXTH(rd, rs) \ + if (rv64_zbb) \ + ZEXTH_(rd, rs); \ + else if (rv64_xtheadbb) \ + TH_EXTU(rd, rs, 15, 0); \ + else { \ + SLLI(rd, rs, 48); \ + SRLI(rd, rd, 48); \ } // Insert low 16bits in rs to low 16bits of rd @@ -1174,7 +1178,7 @@ // reg[rd][i] := 0xff // else // reg[rd][i] := 0 -#define TH_TSTNBZ(rd, rs1) EMIT(I_type(0b1000000000000, rs1, 0b001, rd, 0b0001011)) +#define TH_TSTNBZ(rd, rs1) EMIT(I_type(0b100000000000, rs1, 0b001, rd, 0b0001011)) // XTheadBs - Single-bit instructions |