diff options
| author | xctan <xctan@cirno.icu> | 2024-11-14 02:43:57 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-11-13 19:43:57 +0100 |
| commit | 09b8c3b94e1cc9ab7f963cbaad8630a366dafde2 (patch) | |
| tree | da0945fdb5da71546759da3998b4b41b3c8531e0 /src | |
| parent | 1dbb676f38417024ca6764d5888a40e58b82ae70 (diff) | |
| download | box64-09b8c3b94e1cc9ab7f963cbaad8630a366dafde2.tar.gz box64-09b8c3b94e1cc9ab7f963cbaad8630a366dafde2.zip | |
[RV64_DYNAREC] Added more MMX opcodes for vector (#2027)
* [RV64_DYNAREC] Added 0F D5 PMULLW opcode * [RV64_DYNAREC] Added 0F E5 PMULHW opcode * [RV64_DYNAREC] Added 0F F5 PMADDWD opcode * [RV64_DYNAREC] Added 0F 6B PACKSSDW opcode * [RV64_DYNAREC] Added 0F 63 PACKSSWB opcode * [RV64_DYNAREC] Added 0F 67 PACKUSWB opcode * [RV64_DYNAREC] Removed useless vsetvli in MMX PACKUSWB/SSWB/SSDW
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 79 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 1 |
2 files changed, 79 insertions, 1 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index 41e91486..f5c3a4d8 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -483,6 +483,51 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VADD_VX(q0, q1, xZR, VECTOR_MASKED); } break; + case 0x63: + INST_NAME("PACKSSWB Gm, Em"); + nextop = F8; + GETGM_vector(v0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(v1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VMV_V_V(d0, v0); + VSLIDEUP_VI(d0, v1, 4, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + VNCLIP_WI(v0, d0, 0, VECTOR_UNMASKED); + break; + case 0x67: + INST_NAME("PACKUSWB Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + if (q0 == q1) { + VMV_V_V(d0, q0); + VSLIDEUP_VI(d0, q1, 4, VECTOR_UNMASKED); // splice q0 and q1 here! + VMAX_VX(d0, d0, xZR, VECTOR_UNMASKED); + } else { + VSLIDEUP_VI(q0, q1, 4, VECTOR_UNMASKED); // splice q0 and q1 here! + VMAX_VX(d0, q0, xZR, VECTOR_UNMASKED); + } + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + VNCLIPU_WI(q0, d0, 0, VECTOR_UNMASKED); + break; + case 0x6B: + INST_NAME("PACKSSDW Gm, Em"); + nextop = F8; + GETGM_vector(v0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(v1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VMV_V_V(d0, v0); + VSLIDEUP_VI(d0, v1, 2, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VNCLIP_WI(v0, d0, 0, VECTOR_UNMASKED); + break; case 0x6F: INST_NAME("MOVQ Gm, Em"); nextop = F8; @@ -631,6 +676,15 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VAND_VX(q0, v0, x3, VECTOR_UNMASKED); VXOR_VV(v0, v0, q0, VECTOR_UNMASKED); break; + case 0xD5: + INST_NAME("PMULLW Gm, Em"); + nextop = F8; + GETGM_vector(v0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(v1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VMUL_VV(v0, v0, v1, VECTOR_UNMASKED); + break; case 0xD8: case 0xD9: if (opcode == 0xD8) { @@ -680,6 +734,15 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VXOR_VI(v0, v0, 0x1F, VECTOR_UNMASKED); VAND_VV(v0, v0, v1, VECTOR_UNMASKED); break; + case 0xE5: + INST_NAME("PMULHW Gm, Em"); + nextop = F8; + GETGM_vector(v0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(v1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VMULH_VV(v0, v0, v1, VECTOR_UNMASKED); + break; case 0xE8: INST_NAME("PSUBSB Gm, Em"); nextop = F8; @@ -775,6 +838,22 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VAND_VX(q0, v0, x3, VECTOR_UNMASKED); VXOR_VV(v0, v0, q0, VECTOR_UNMASKED); break; + case 0xF5: + INST_NAME("PMADDWD Gm, Em"); + nextop = F8; + GETGM_vector(v0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(v1, 0); + q1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); + q0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + ADDI(x3, xZR, 32); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VWMUL_VV(q0, v1, v0, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VNSRL_WX(q1, q0, x3, VECTOR_UNMASKED); + VNSRL_WI(v0, q0, 0, VECTOR_UNMASKED); + VADD_VV(v0, v0, q1, VECTOR_UNMASKED); + break; case 0xF8 ... 0xFB: nextop = F8; if (opcode == 0xF8) { diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 7cb244fe..bbdfdf47 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -1780,7 +1780,6 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETGX_vector(q0, 1, VECTOR_SEW16); GETEX_vector(q1, 0, 0, VECTOR_SEW16); - v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); VMUL_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xD6: |