diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-03-06 16:58:04 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-03-06 09:58:04 +0100 |
| commit | 584b258241b12f90d575ab43d9602ee9a5566d3d (patch) | |
| tree | 886d74ea299f3f25672aa8810e32054fea37c052 /src | |
| parent | fb13ce2bafc05697e84b095326ba95b3f5b0ca8b (diff) | |
| download | box64-584b258241b12f90d575ab43d9602ee9a5566d3d.tar.gz box64-584b258241b12f90d575ab43d9602ee9a5566d3d.zip | |
[RV64_DYNAREC] Added 66 0F 38 61 PCMPESTRI opcode and some refactors too (#1337)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 54 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f.c | 76 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 50 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 61 |
4 files changed, 108 insertions, 133 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index 8b9eb63c..bfe29a2c 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1677,18 +1677,8 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ORI(xFlags, xFlags, 1 << F_ZF); B_NEXT_nocond; MARK; - if (rv64_zbb) { - CTZxw(gd, ed); - } else { - NEG(x2, ed); - AND(x2, x2, ed); - TABLE64(x3, 0x03f79d71b4ca8b09ULL); - MUL(x2, x2, x3); - SRLI(x2, x2, 64 - 6); - TABLE64(x1, (uintptr_t)&deBruijn64tab); - ADD(x1, x1, x2); - LBU(gd, x1, 0); - } + // gd is undefined if ed is all zeros, don't worry. + CTZxw(gd, ed, rex.w, x1, x2); ANDI(xFlags, xFlags, ~(1 << F_ZF)); break; case 0xBD: @@ -1707,43 +1697,9 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni B_NEXT_nocond; MARK; ANDI(xFlags, xFlags, ~(1 << F_ZF)); - if (rv64_zbb) { - MOV32w(x1, rex.w ? 63 : 31); - CLZxw(gd, ed); - SUB(gd, x1, gd); - } else { - if (ed != gd) - u8 = gd; - else - u8 = x1; - ADDI(u8, xZR, 0); - if (rex.w) { - MV(x2, ed); - SRLI(x3, x2, 32); - BEQZ(x3, 4 + 2 * 4); - ADDI(u8, u8, 32); - MV(x2, x3); - } else { - AND(x2, ed, xMASK); - } - SRLI(x3, x2, 16); - BEQZ(x3, 4 + 2 * 4); - ADDI(u8, u8, 16); - MV(x2, x3); - SRLI(x3, x2, 8); - BEQZ(x3, 4 + 2 * 4); - ADDI(u8, u8, 8); - MV(x2, x3); - SRLI(x3, x2, 4); - BEQZ(x3, 4 + 2 * 4); - ADDI(u8, u8, 4); - MV(x2, x3); - ANDI(x2, x2, 0b1111); - TABLE64(x3, (uintptr_t)&lead0tab); - ADD(x3, x3, x2); - LBU(x2, x3, 0); - ADD(gd, u8, x2); - } + CLZxw(gd, ed, rex.w, x1, x2, x3); + ADDI(x1, xZR, rex.w ? 63 : 31); + SUB(gd, x1, gd); break; case 0xBE: INST_NAME("MOVSX Gd, Eb"); diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c index 2ace7fd4..e1553d69 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f.c +++ b/src/dynarec/rv64/dynarec_rv64_660f.c @@ -21,6 +21,7 @@ #include "dynarec_rv64_private.h" #include "dynarec_rv64_functions.h" #include "dynarec_rv64_helper.h" +#include "emu/x64compstrings.h" uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog) { @@ -898,6 +899,42 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SW(x3, gback, gdoffset + i * 4); } break; + case 0x61: + INST_NAME("PCMPESTRI Gx, Ex, Ib"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETG; + sse_reflect_reg(dyn, ninst, gd); + ADDI(x3, xEmu, offsetof(x64emu_t, xmm[gd])); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + sse_reflect_reg(dyn, ninst, ed); + ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 1); + if (ed != x1) { + MV(x1, ed); + } + } + // prepare rest arguments + MV(x2, xRDX); + MV(x4, xRAX); + u8 = F8; + MOV32w(x5, u8); + CALL(sse42_compare_string_explicit_len, x1); + ZEROUP(x1); + BNEZ_MARK(x1); + MOV32w(xRCX, (u8 & 1) ? 8 : 16); + B_NEXT_nocond; + MARK; + if (u8 & 0b1000000) { + CLZxw(xRCX, x1, 0, x2, x3, x4); + ADDI(x2, xZR, 31); + SUB(xRCX, x2, xRCX); + } else { + CTZxw(xRCX, xRCX, 0, x1, x2); + } + break; case 0xDB: INST_NAME("AESIMC Gx, Ex"); // AES-NI nextop = F8; @@ -2397,18 +2434,8 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ORI(xFlags, xFlags, 1 << F_ZF); B_NEXT_nocond; MARK; - if (rv64_zbb) { - CTZxw(gd, ed); - } else { - NEG(x2, ed); - AND(x2, x2, ed); - TABLE64(x3, 0x03f79d71b4ca8b09ULL); - MUL(x2, x2, x3); - SRLI(x2, x2, 64 - 6); - TABLE64(x1, (uintptr_t)&deBruijn64tab); - ADD(x1, x1, x2); - LBU(gd, x1, 0); - } + // gd is undefined if ed is all zeros, don't worry. + CTZxw(gd, ed, 0, x1, x2); ANDI(xFlags, xFlags, ~(1 << F_ZF)); GWBACK; break; @@ -2424,28 +2451,9 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int B_NEXT_nocond; MARK; ANDI(xFlags, xFlags, ~(1 << F_ZF)); - if (rv64_zbb) { - MOV32w(x1, rex.w ? 63 : 31); - CLZxw(gd, ed); - SUB(gd, x1, gd); - } else { - u8 = gd; - ADDI(u8, xZR, 0); - AND(x2, ed, xMASK); - SRLI(x3, x2, 8); - BEQZ(x3, 4 + 2 * 4); - ADDI(u8, u8, 8); - MV(x2, x3); - SRLI(x3, x2, 4); - BEQZ(x3, 4 + 2 * 4); - ADDI(u8, u8, 4); - MV(x2, x3); - ANDI(x2, x2, 0b1111); - TABLE64(x3, (uintptr_t)&lead0tab); - ADD(x3, x3, x2); - LBU(x2, x3, 0); - ADD(gd, u8, x2); - } + CLZxw(gd, ed, 0, x1, x2, x3); + ADDI(x1, xZR, rex.w ? 63 : 31); + SUB(gd, x1, gd); GWBACK; break; case 0xBE: diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index 45a6a2cf..bebcc551 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -408,18 +408,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int MOV32w(gd, rex.w ? 64 : 32); B_NEXT_nocond; MARK; - if (rv64_zbb) { - CTZxw(gd, ed); - } else { - NEG(x2, ed); - AND(x2, x2, ed); - TABLE64(x3, 0x03f79d71b4ca8b09ULL); - MUL(x2, x2, x3); - SRLI(x2, x2, 64 - 6); - TABLE64(x1, (uintptr_t)&deBruijn64tab); - ADD(x1, x1, x2); - LBU(gd, x1, 0); - } + CTZxw(gd, ed, rex.w, x1, x2); BNE(gd, xZR, 4 + 4); ORI(xFlags, xFlags, 1 << F_ZF); break; @@ -440,42 +429,7 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ORI(xFlags, xFlags, 1 << F_CF); B_NEXT_nocond; MARK; - if (rv64_zbb) { - CLZxw(gd, ed); - } else { - if (ed != gd) - u8 = gd; - else - u8 = x1; - ADDI(u8, xZR, rex.w ? 63 : 31); - if (rex.w) { - MV(x2, ed); - SRLI(x3, x2, 32); - BEQZ(x3, 4 + 2 * 4); - SUBI(u8, u8, 32); - MV(x2, x3); - } else { - AND(x2, ed, xMASK); - } - SRLI(x3, x2, 16); - BEQZ(x3, 4 + 2 * 4); - SUBI(u8, u8, 16); - MV(x2, x3); - SRLI(x3, x2, 8); - BEQZ(x3, 4 + 2 * 4); - SUBI(u8, u8, 8); - MV(x2, x3); - SRLI(x3, x2, 4); - BEQZ(x3, 4 + 2 * 4); - SUBI(u8, u8, 4); - MV(x2, x3); - ANDI(x2, x2, 0b1111); - TABLE64(x3, (uintptr_t)&lead0tab); - ADD(x3, x3, x2); - LBU(x2, x3, 0); - SUB(gd, u8, x2); - MARK2; - } + CLZxw(gd, ed, rex.w, x1, x2, x3); ANDI(xFlags, xFlags, ~((1 << F_ZF) | (1 << F_CF))); BNE(gd, xZR, 4 + 4); ORI(xFlags, xFlags, 1 << F_ZF); diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index 6b675f6e..87e5be54 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -723,13 +723,70 @@ f28–31 ft8–11 FP temporaries Caller // Count leading zero bits in word #define CLZW(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, 0b0011011)) // Count leading zero bits -#define CLZxw(rd, rs) EMIT(R_type(0b0110000, 0b00000, rs, 0b001, rd, rex.w ? 0b0010011 : 0b0011011)) +#define CLZxw(rd, rs, x, s1, s2, s3) \ + if (rv64_zbb) { \ + if (x) \ + CLZ(rd, rs); \ + else \ + CLZW(rd, rs); \ + } else { \ + if (rs != rd) \ + u8 = rd; \ + else \ + u8 = s1; \ + ADDI(u8, xZR, rex.w ? 63 : 31); \ + if (rex.w) { \ + MV(s2, rs); \ + SRLI(s3, s2, 32); \ + BEQZ(s3, 4 + 2 * 4); \ + SUBI(u8, u8, 32); \ + MV(s2, s3); \ + } else { \ + AND(s2, rs, xMASK); \ + } \ + SRLI(s3, s2, 16); \ + BEQZ(s3, 4 + 2 * 4); \ + SUBI(u8, u8, 16); \ + MV(s2, s3); \ + SRLI(s3, s2, 8); \ + BEQZ(s3, 4 + 2 * 4); \ + SUBI(u8, u8, 8); \ + MV(s2, s3); \ + SRLI(s3, s2, 4); \ + BEQZ(s3, 4 + 2 * 4); \ + SUBI(u8, u8, 4); \ + MV(s2, s3); \ + ANDI(s2, s2, 0b1111); \ + TABLE64(s3, (uintptr_t)&lead0tab); \ + ADD(s3, s3, s2); \ + LBU(s2, s3, 0); \ + SUB(rd, u8, s2); \ + } + // Count trailing zero bits #define CTZ(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0010011)) // Count trailing zero bits in word #define CTZW(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, 0b0011011)) // Count trailing zero bits -#define CTZxw(rd, rs) EMIT(R_type(0b0110000, 0b00001, rs, 0b001, rd, rex.w ? 0b0010011 : 0b0011011)) +// BEWARE: You should take care of the all zeros situation yourself, +// and clear the high 32bit when x is 1. +#define CTZxw(rd, rs, x, s1, s2) \ + if (rv64_zbb) { \ + if (x) \ + CTZ(rd, rs); \ + else \ + CTZW(rd, rs); \ + } else { \ + NEG(s2, ed); \ + AND(s2, s2, ed); \ + TABLE64(x3, 0x03f79d71b4ca8b09ULL); \ + MUL(s2, s2, x3); \ + SRLI(s2, s2, 64 - 6); \ + TABLE64(s1, (uintptr_t)&deBruijn64tab); \ + ADD(s1, s1, s2); \ + LBU(gd, s1, 0); \ + } + // Count set bits #define CPOP(rd, rs) EMIT(R_type(0b0110000, 0b00010, rs, 0b001, rd, 0b0010011)) // Count set bits in word |