diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-05-20 20:45:50 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-05-20 14:45:50 +0200 |
| commit | e1e3122a1fe834ca12344e0f8a51b995d887b350 (patch) | |
| tree | ad6991f23d8e675351023dbebb1b4110d4635b99 /src | |
| parent | 4f88b2bdd77d7e61c79f6ca632cbfaccc5371a6f (diff) | |
| download | box64-e1e3122a1fe834ca12344e0f8a51b995d887b350.tar.gz box64-e1e3122a1fe834ca12344e0f8a51b995d887b350.zip | |
[RV64_DYNAREC] Improved POPCNT and fixed some scratch conflicts (#2651)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_2.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 8 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_66.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f0.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 105 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 4 |
6 files changed, 95 insertions, 34 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index 34c805d9..ffc4d43a 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -329,9 +329,9 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if (MODREG) { GETGD; GETED(0); - MVxw(x1, gd); + MVxw(x3, gd); MVxw(gd, ed); - MVxw(ed, x1); + MVxw(ed, x3); } else { GETGD; addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index ae66e1b2..82f8f632 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -2257,7 +2257,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni B_NEXT_nocond; MARK; // gd is undefined if ed is all zeros, don't worry. - CTZxw(gd, ed, rex.w, x1, x2); + CTZxw(gd, ed, rex.w, x3, x5); ANDI(xFlags, xFlags, ~(1 << F_ZF)); break; case 0xBD: @@ -2276,9 +2276,9 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni B_NEXT_nocond; MARK; ANDI(xFlags, xFlags, ~(1 << F_ZF)); - CLZxw(gd, ed, rex.w, x1, x2, x3); - ADDI(x1, xZR, rex.w ? 63 : 31); - SUB(gd, x1, gd); + CLZxw(gd, ed, rex.w, x3, x5, x7); + ADDI(x3, xZR, rex.w ? 63 : 31); + SUB(gd, x3, gd); break; case 0xBE: INST_NAME("MOVSX Gd, Eb"); diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c index c054c3ba..f7134d6e 100644 --- a/src/dynarec/rv64/dynarec_rv64_66.c +++ b/src/dynarec/rv64/dynarec_rv64_66.c @@ -623,9 +623,9 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (MODREG) { GETGD; GETED(0); - MV(x1, gd); + MV(x5, gd); INSHz(gd, ed, x3, x4, 1, 1); - INSHz(ed, x1, x3, x4, 0, 1); + INSHz(ed, x5, x3, x4, 0, 1); } else { GETGD; addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0); diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c index f51c3bb9..84816cea 100644 --- a/src/dynarec/rv64/dynarec_rv64_f0.c +++ b/src/dynarec/rv64/dynarec_rv64_f0.c @@ -788,9 +788,9 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (MODREG) { GETGD; GETED(0); - MV(x1, gd); + MV(x5, gd); MV(gd, ed); - MV(ed, x1); + MV(ed, x5); } else { SMDMB(); GETGD; diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index 16dc167f..19a8b4b9 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -469,26 +469,87 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if (rv64_zbb) { CPOPxw(gd, ed); } else { - TABLE64(x1, 0x5555555555555555uLL); - SRLI(x5, ed, 1); - AND(x5, x5, x1); - SUB(x5, ed, x5); - TABLE64(x3, 0x3333333333333333uLL); - SRLI(x1, x5, 2); - AND(x1, x1, x3); - AND(x5, x5, x3); - ADD(x5, x5, x1); - TABLE64(x3, 0x0F0F0F0F0F0F0F0FuLL); - SRLI(x1, x5, 4); - ADD(x5, x5, x1); - AND(x5, x5, x3); - SRLI(x1, x5, 32); - ADDW(x5, x5, x1); - SRLIW(x1, x5, 16); - ADDW(x5, x5, x1); - SRLIW(x1, x5, 8); - ADDW(x5, x5, x1); - ANDI(gd, x5, 0x7F); + if (rex.w) { + // x7 = 0x5555555555555555 + LUI(x7, 0x55555); + ADDIW(x7, x7, 0x555); + SLLI(x6, x7, 32); + ADD(x7, x7, x6); + + // v = v - ((v >> 1) & x7) + SRLI(x5, ed, 1); + AND(x5, x5, x7); + SUB(x5, ed, x5); + + // x3 = 0x3333333333333333 + LUI(x3, 0x33333); + ADDIW(x3, x3, 0x333); + SLLI(x6, x3, 32); + ADD(x3, x3, x6); + + // v = (v & x3) + ((v >> 2) & x3); + SRLI(x7, x5, 2); + AND(x7, x7, x3); + AND(x5, x5, x3); + ADD(x5, x5, x7); + + // x3 = 0x0F0F0F0F0F0F0F0F + LUI(x3, 0xF0F1); + ADDIW(x3, x3, 0xF0F); + SLLI(x6, x3, 32); + ADD(x3, x3, x6); + + // v = (v + (v >> 4) & x3) + SRLI(x7, x5, 4); + ADD(x5, x5, x7); + AND(x5, x5, x3); + + // x3 = 0x0101010101010101 + LUI(x3, 0x1010); + ADDIW(x3, x3, 0x101); + SLLI(x6, x3, 32); + ADD(x3, x3, x6); + + // count = (v * x3) >> 56 + MUL(gd, x5, x3); + SRLI(gd, gd, 56); + } else { + // x7 = 0x55555555uLL + LUI(x7, 0x55555); + ADDIW(x7, x7, 0x555); + + // v = v - ((v >> 1) & x7) + SRLI(x5, ed, 1); + AND(x5, x5, x7); + SUB(x5, ed, x5); + + // x3 = 0x33333333uLL + LUI(x3, 0x33333); + ADDIW(x3, x3, 0x333); + + // v = (v & x3) + ((v >> 2) & x3); + SRLI(x7, x5, 2); + AND(x7, x7, x3); + AND(x5, x5, x3); + ADD(x5, x5, x7); + + // x3 = 0x0F0F0F0FuLL + LUI(x3, 0xF0F1); + ADDIW(x3, x3, 0xF0F); + + // v = (v + (v >> 4) & x3) + SRLI(x7, x5, 4); + ADD(x5, x5, x7); + AND(x5, x5, x3); + + // x3 = 01010101uLL + LUI(x3, 0x1010); + ADDIW(x3, x3, 0x101); + + // count = (v * x3) >> 24 + MULW(gd, x5, x3); + SRLIW(gd, gd, 24); + } } break; case 0xBC: @@ -508,7 +569,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MOV32w(gd, rex.w ? 64 : 32); B_NEXT_nocond; MARK; - CTZxw(gd, ed, rex.w, x1, x2); + CTZxw(gd, ed, rex.w, x3, x5); BNE(gd, xZR, 4 + 4); ORI(xFlags, xFlags, 1 << F_ZF); break; @@ -529,7 +590,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ORI(xFlags, xFlags, 1 << F_CF); B_NEXT_nocond; MARK; - CLZxw(gd, ed, rex.w, x1, x2, x3); + CLZxw(gd, ed, rex.w, x5, x2, x3); ANDI(xFlags, xFlags, ~((1 << F_ZF) | (1 << F_CF))); BNE(gd, xZR, 4 + 4); ORI(xFlags, xFlags, 1 << F_ZF); diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index c1dc6fc7..4f656cfc 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -862,8 +862,8 @@ u8 = rd; \ else \ u8 = s1; \ - ADDI(u8, xZR, rex.w ? 63 : 31); \ - if (rex.w) { \ + ADDI(u8, xZR, x ? 63 : 31); \ + if (x) { \ MV(s2, rs); \ SRLI(s3, s2, 32); \ BEQZ(s3, 4 + 2 * 4); \ |