diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-05-23 22:33:02 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-23 16:33:02 +0200 |
| commit | b7ddb92c80b45d5945a3339f8f7a36ee787e0901 (patch) | |
| tree | 61cdd68bc5cae881ea13c4e2e60e15e2f0bd0d1c /src | |
| parent | d40f51bdc53e07972a03284e0521d001035c996a (diff) | |
| download | box64-b7ddb92c80b45d5945a3339f8f7a36ee787e0901.tar.gz box64-b7ddb92c80b45d5945a3339f8f7a36ee787e0901.zip | |
[RV64_DYNAREC] Optimized CLZ macro with xtheadbb (#2664)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 84 |
2 files changed, 48 insertions, 40 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index bd40bb4b..4ecf1d7d 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -1426,8 +1426,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int B_NEXT_nocond; MARK; ANDI(xFlags, xFlags, ~(1 << F_ZF)); - CLZxw(gd, ed, 0, x1, x2, x6); - ADDI(x1, xZR, rex.w ? 63 : 31); + CLZxw(gd, ed, 1, x1, x2, x6); + ADDI(x1, xZR, 63); SUB(gd, x1, gd); GWBACK; break; diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 38cda033..9dd094f7 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -851,44 +851,52 @@ // Count leading zero bits in word #define CLZW(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0011011)) // Count leading zero bits -#define CLZxw(rd, rs, x, s1, s2, s3) \ - if (rv64_zbb) { \ - if (x) \ - CLZ(rd, rs); \ - else \ - CLZW(rd, rs); \ - } else { \ - if (rs != rd) \ - u8 = rd; \ - else \ - u8 = s1; \ - ADDI(u8, xZR, x ? 63 : 31); \ - if (x) { \ - MV(s2, rs); \ - SRLI(s3, s2, 32); \ - BEQZ(s3, 4 + 2 * 4); \ - SUBI(u8, u8, 32); \ - MV(s2, s3); \ - } else { \ - ZEXTW2(s2, rs); \ - } \ - SRLI(s3, s2, 16); \ - BEQZ(s3, 4 + 2 * 4); \ - SUBI(u8, u8, 16); \ - MV(s2, s3); \ - SRLI(s3, s2, 8); \ - BEQZ(s3, 4 + 2 * 4); \ - SUBI(u8, u8, 8); \ - MV(s2, s3); \ - SRLI(s3, s2, 4); \ - BEQZ(s3, 4 + 2 * 4); \ - SUBI(u8, u8, 4); \ - MV(s2, s3); \ - ANDI(s2, s2, 0b1111); \ - TABLE64(s3, (uintptr_t) & lead0tab); \ - ADD(s3, s3, s2); \ - LBU(s2, s3, 0); \ - SUB(rd, u8, s2); \ +#define CLZxw(rd, rs, x, s1, s2, s3) \ + if (rv64_zbb) { \ + if (x) \ + CLZ(rd, rs); \ + else \ + CLZW(rd, rs); \ + } else if (rv64_xtheadbb) { \ + if (x) { \ + TH_FF1(rd, rs); \ + } else { \ + ZEXTW2(rd, rs); \ + TH_FF1(rd, rd); \ + SUBI(rd, rd, 32); \ + } \ + } else { \ + if (rs != rd) \ + u8 = rd; \ + else \ + u8 = s1; \ + ADDI(u8, xZR, x ? 63 : 31); \ + if (x) { \ + MV(s2, rs); \ + SRLI(s3, s2, 32); \ + BEQZ(s3, 4 + 2 * 4); \ + SUBI(u8, u8, 32); \ + MV(s2, s3); \ + } else { \ + ZEXTW2(s2, rs); \ + } \ + SRLI(s3, s2, 16); \ + BEQZ(s3, 4 + 2 * 4); \ + SUBI(u8, u8, 16); \ + MV(s2, s3); \ + SRLI(s3, s2, 8); \ + BEQZ(s3, 4 + 2 * 4); \ + SUBI(u8, u8, 8); \ + MV(s2, s3); \ + SRLI(s3, s2, 4); \ + BEQZ(s3, 4 + 2 * 4); \ + SUBI(u8, u8, 4); \ + MV(s2, s3); \ + ANDI(s2, s2, 0b1111); \ + TABLE64(s3, (uintptr_t)&lead0tab); \ + ADD(s3, s3, s2); \ + LBU(s2, s3, 0); \ + SUB(rd, u8, s2); \ } // Count trailing zero bits |