diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-08-22 19:11:51 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-22 13:11:51 +0200 |
| commit | 031d3def2d7d1c0f84e8b2575f8c9e2aee47bbfa (patch) | |
| tree | 7030ab927152bca2ff0a3e739e86f209577d45de /src | |
| parent | 7c32cb24a05bff89b4b9ed4461ecb9e34fdf3d1d (diff) | |
| download | box64-031d3def2d7d1c0f84e8b2575f8c9e2aee47bbfa.tar.gz box64-031d3def2d7d1c0f84e8b2575f8c9e2aee47bbfa.zip | |
[RV64_DYNAREC] Added scalar SSE 66 0F 3A 60/61/62 opcodes (#2963)
* [RV64_DYNAREC] Added scalar SSE 66 0F 3A 60/61/62 opcodes * [CI] Bump timeout
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f38.c | 122 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 2 |
2 files changed, 123 insertions, 1 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f38.c b/src/dynarec/rv64/dynarec_rv64_660f38.c index aca87f82..6ba960e7 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f38.c +++ b/src/dynarec/rv64/dynarec_rv64_660f38.c @@ -1171,6 +1171,128 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode, MOV32w(x4, u8); CALL4(const_native_pclmul, -1, x1, x2, x3, x4); break; + case 0x60: + INST_NAME("PCMPESTRM Gx, Ex, Ib"); + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); + nextop = F8; + GETG; + sse_forget_reg(dyn, ninst, x6, gd); + ADDI(x3, xEmu, offsetof(x64emu_t, xmm[gd])); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + sse_reflect_reg(dyn, ninst, x6, ed); + ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + ed = x1; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); + } + MV(x2, xRDX); + MV(x4, xRAX); + u8 = F8; + ADDI(x5, xZR, u8); + CALL6(const_sse42_compare_string_explicit_len, x1, ed, x2, x3, x4, x5, 0); + if (u8 & 0b1000000) { + switch (u8 & 1) { + case 0b00: + for (int i = 0; i < 16; ++i) { + SRLI(x3, x1, i); + ANDI(x3, x3, 1); + NEG(x3, x3); + SB(x3, xEmu, offsetof(x64emu_t, xmm[0]) + i); + } + break; + case 0b01: + for (int i = 0; i < 8; ++i) { + SRLI(x3, x1, i); + ANDI(x3, x3, 1); + NEG(x3, x3); + SH(x3, xEmu, offsetof(x64emu_t, xmm[0]) + i * 2); + } + break; + } + } else { + SW(x1, xEmu, offsetof(x64emu_t, xmm[0])); + SW(xZR, xEmu, offsetof(x64emu_t, xmm[0]) + 4); + SD(xZR, xEmu, offsetof(x64emu_t, xmm[0]) + 8); + } + break; + case 0x61: + INST_NAME("PCMPESTRI Gx, Ex, Ib"); + nextop = F8; + GETG; + u8 = geted_ib(dyn, addr, ninst, nextop); + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); + sse_reflect_reg(dyn, ninst, x6, gd); + ADDI(x3, xEmu, offsetof(x64emu_t, xmm[gd])); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + sse_reflect_reg(dyn, ninst, x6, ed); + ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + ed = x1; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); + } + ZEXTW2(x2, xRDX); + ZEXTW2(x4, xRAX); + u8 = F8; + ADDI(x5, xZR, u8); + CALL6(const_sse42_compare_string_explicit_len, x1, ed, x2, x3, x4, x5, 0); + ZEROUP(x1); + BNEZ_MARK(x1); + MOV32w(xRCX, (u8 & 1) ? 8 : 16); + B_NEXT_nocond; + MARK; + if (u8 & 0b1000000) { + CLZxw(xRCX, x1, 0, x2, x3, x4); + ADDI(x2, xZR, 31); + SUB(xRCX, x2, xRCX); + } else { + CTZxw(xRCX, x1, 0, x2, x3); + } + break; + case 0x62: + INST_NAME("PCMPISTRM Gx, Ex, Ib"); + SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); + nextop = F8; + GETG; + sse_forget_reg(dyn, ninst, x6, gd); + ADDI(x2, xEmu, offsetof(x64emu_t, xmm[gd])); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + sse_reflect_reg(dyn, ninst, x6, ed); + ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed])); + ed = x1; + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); + } + u8 = F8; + ADDI(x3, xZR, u8); + CALL4(const_sse42_compare_string_implicit_len, x1, ed, x2, x3, 0); + if (u8 & 0b1000000) { + switch (u8 & 1) { + case 0b00: + for (int i = 0; i < 16; ++i) { + SRLI(x3, x1, i); + ANDI(x3, x3, 1); + NEG(x3, x3); + SB(x3, xEmu, offsetof(x64emu_t, xmm[0]) + i); + } + break; + case 0b01: + for (int i = 0; i < 8; ++i) { + SRLI(x3, x1, i); + ANDI(x3, x3, 1); + NEG(x3, x3); + SH(x3, xEmu, offsetof(x64emu_t, xmm[0]) + i * 2); + } + break; + } + } else { + SW(x1, xEmu, offsetof(x64emu_t, xmm[0])); + SW(xZR, xEmu, offsetof(x64emu_t, xmm[0]) + 4); + SD(xZR, xEmu, offsetof(x64emu_t, xmm[0]) + 8); + } + break; case 0x63: INST_NAME("PCMPISTRI Gx, Ex, Ib"); SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION); diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index d4d2a5b3..3b6f0b54 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -1066,7 +1066,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMERGE_VVM(q0, v1, d0); break; case 0x44: - case 0x63: + case 0x60 ... 0x63: case 0xDF: return 0; default: DEFAULT_VECTOR; } |