diff options
| author | xctan <xctan@cirno.icu> | 2024-11-15 17:11:11 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-11-15 10:11:11 +0100 |
| commit | bca0f4d66d3bcf235c7ec84f43d105fce7df3470 (patch) | |
| tree | 60540958a57b5f02e2ae016f0c050926fe9625f9 /src | |
| parent | 8c17a37d1ae4258e7a7a7fdf87a43305f40dc2a1 (diff) | |
| download | box64-bca0f4d66d3bcf235c7ec84f43d105fce7df3470.tar.gz box64-bca0f4d66d3bcf235c7ec84f43d105fce7df3470.zip | |
[RV64_DYNAREC] Added more MMX opcodes for vector (#2037)
* [RV64_DYNAREC] Added 0F 74-76 PCMPEQB/W/D opcodes * [RV64_DYNAREC] Added 0F 64-66 PCMPGTB/W/D opcodes * [RV64_DYNAREC] Added 0F E1-E2 PSRAW/D opcodes * [RV64_DYNAREC] Added 0F 6E MOVD opcode * [RV64_DYNAREC] Added 0F 73 /2 PSRLQ opcode * [RV64_DYNAREC] Added 0F 73 /6 PSLLQ opcode
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 111 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 8 |
2 files changed, 119 insertions, 0 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index 195d58f7..73c9395d 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -542,6 +542,26 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); VNCLIP_WI(v0, d0, 0, VECTOR_UNMASKED); break; + case 0x64 ... 0x66: + if (opcode == 0x64) { + INST_NAME("PCMPGTB Gm, Em"); + u8 = VECTOR_SEW8; + } else if (opcode == 0x65) { + INST_NAME("PCMPGTW Gm, Em"); + u8 = VECTOR_SEW16; + } else { + INST_NAME("PCMPGTD Gm, Em"); + u8 = VECTOR_SEW32; + } + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, u8, 1); + VMSLT_VV(VMASK, q1, q0, VECTOR_UNMASKED); + VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); + VMERGE_VIM(q0, q0, 0b11111); // implies vmask and widened it + break; case 0x67: INST_NAME("PACKUSWB Gm, Em"); nextop = F8; @@ -629,6 +649,19 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VNCLIP_WI(v0, d0, 0, VECTOR_UNMASKED); break; + case 0x6E: + INST_NAME("MOVD Gm, Ed"); + nextop = F8; + GETGM_vector(v0); + GETED(0); + if (rex.w) { + SET_ELEMENT_WIDTH(x3, VECTOR_SEW64, 1); + } else { + SET_ELEMENT_WIDTH(x3, VECTOR_SEW32, 1); + } + VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + VMV_S_X(v0, ed); + break; case 0x6F: INST_NAME("MOVQ Gm, Em"); nextop = F8; @@ -645,6 +678,62 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VMV_S_X(v0, x4); } break; + case 0x73: + nextop = F8; + switch ((nextop >> 3) & 7) { + case 2: + INST_NAME("PSRLQ Em, Ib"); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q0, 0); + u8 = F8; + if (u8) { + if (u8 > 63) { + VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); + } else { + MOV64x(x4, u8); + VSRL_VX(q0, q0, x4, VECTOR_UNMASKED); + } + PUTEM_vector(q0); + } + break; + case 6: + INST_NAME("PSLLQ Em, Ib"); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q0, 0); + u8 = F8; + if (u8) { + if (u8 > 63) { + VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); + } else { + MOV64x(x4, u8); + VSLL_VX(q0, q0, x4, VECTOR_UNMASKED); + } + PUTEM_vector(q0); + } + break; + default: DEFAULT_VECTOR; + } + break; + case 0x74 ... 0x76: + if (opcode == 0x74) { + INST_NAME("PCMPEQB Gm, Em"); + u8 = VECTOR_SEW8; + } else if (opcode == 0x75) { + INST_NAME("PCMPEQW Gm, Em"); + u8 = VECTOR_SEW16; + } else { + INST_NAME("PCMPEQD Gm, Em"); + u8 = VECTOR_SEW32; + } + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, u8, 1); + VMSEQ_VV(VMASK, q1, q0, VECTOR_UNMASKED); + VXOR_VV(q0, q0, q0, VECTOR_UNMASKED); + VMERGE_VIM(q0, q0, 0b11111); // implies vmask and widened it + break; case 0x7F: INST_NAME("MOVQ Em, Gm"); nextop = F8; @@ -835,6 +924,28 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VXOR_VI(v0, v0, 0x1F, VECTOR_UNMASKED); VAND_VV(v0, v0, v1, VECTOR_UNMASKED); break; + case 0xE1: + case 0xE2: + if (opcode == 0xE1) { + INST_NAME("PSRAW Gm, Em"); + u8 = VECTOR_SEW16; + i32 = 16; + } else { + INST_NAME("PSRAD Gm, Em"); + u8 = VECTOR_SEW32; + i32 = 32; + } + nextop = F8; + GETGM_vector(v0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(v1, 0); + SET_ELEMENT_WIDTH(x1, u8, 1); + MOV32w(x5, i32 - 1); + q0 = fpu_get_scratch(dyn); + VMINU_VX(q0, v1, x5, VECTOR_UNMASKED); + VMV_X_S(x4, q0); + VSRA_VX(v0, v0, x4, VECTOR_UNMASKED); + break; case 0xE5: INST_NAME("PMULHW Gm, Em"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 4e168ab6..074a000f 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -561,6 +561,14 @@ VFMV_S_F(a, a); \ } +// Put Back EM if it was a memory and not an mm register; requires SEW64 +#define PUTEM_vector(a) \ + if (!MODREG) { \ + VFMV_F_S(a, a); \ + FSD(a, ed, fixedaddress); \ + SMWRITE2(); \ + } + #define GETGX_empty_vector(a) \ gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \ a = sse_get_reg_empty_vector(dyn, ninst, x1, gd) |