diff options
| author | xctan <xctan@cirno.icu> | 2024-11-15 00:41:28 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-11-14 17:41:28 +0100 |
| commit | 979a3232f9d23c34b6c8c86fb3913fb34b3da333 (patch) | |
| tree | 165d670688faf706efd9e2b04d0e3487a49f57fa /src | |
| parent | 47dbbe030c2126159a31c00b6560cec7cd090a6c (diff) | |
| download | box64-979a3232f9d23c34b6c8c86fb3913fb34b3da333.tar.gz box64-979a3232f9d23c34b6c8c86fb3913fb34b3da333.zip | |
[RV64_DYNAREC] Added more MMX opcodes for vector (#2035)
* [RV64_DYNAREC] Added 0F 68 PUNPCKHBW opcode
* [RV64_DYNAREC] Added 0F 69 PUNPCKHWD opcode
* [RV64_DYNAREC] Added 0F 6A PUNPCKHDQ opcode
* [RV64_DYNAREC] Updated 0F 68-69 PUNPCKHBW/WD opcodes
* [RV64_DYNAREC] Added 0F 60 PUNPCKLBW opcode
* [RV64_DYNAREC] Added 0F 61 PUNPCKLWD opcode
* [RV64_DYNAREC] Added 0F 62 PUNPCKLDQ opcode
* [RV64_DYNAREC] Simplified MMX PUNPCK{L,H}{BW,WD,DQ}Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index f5c3a4d8..195d58f7 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -483,6 +483,52 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VADD_VX(q0, q1, xZR, VECTOR_MASKED); } break; + case 0x60: + INST_NAME("PUNPCKLBW Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VWADDU_VX(d0, q0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VSLL_VI(v0, v0, 8, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; + case 0x61: + INST_NAME("PUNPCKLWD Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VWADDU_VX(d0, q0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VSLL_VI(v0, v0, 16, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; + case 0x62: + INST_NAME("PUNPCKLDQ Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + MOV32w(x2, 32); + VWADDU_VX(d0, q0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, q1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VSLL_VX(v0, v0, x2, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; case 0x63: INST_NAME("PACKSSWB Gm, Em"); nextop = F8; @@ -515,6 +561,61 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); VNCLIPU_WI(q0, d0, 0, VECTOR_UNMASKED); break; + case 0x68: + INST_NAME("PUNPCKHBW Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VSLIDEDOWN_VI(v0, q0, 4, VECTOR_UNMASKED); + VSLIDEDOWN_VI(v1, q1, 4, VECTOR_UNMASKED); + VWADDU_VX(d0, v0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + VSLL_VI(v0, v0, 8, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; + case 0x69: + INST_NAME("PUNPCKHWD Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VSLIDEDOWN_VI(v0, q0, 2, VECTOR_UNMASKED); + VSLIDEDOWN_VI(v1, q1, 2, VECTOR_UNMASKED); + VWADDU_VX(d0, v0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + VSLL_VI(v0, v0, 16, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; + case 0x6A: + INST_NAME("PUNPCKHDQ Gm, Em"); + nextop = F8; + GETGM_vector(q0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETEM_vector(q1, 0); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + MOV32w(x2, 32); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + VSLIDEDOWN_VI(v0, q0, 1, VECTOR_UNMASKED); + VSLIDEDOWN_VI(v1, q1, 1, VECTOR_UNMASKED); + VWADDU_VX(d0, v0, xZR, VECTOR_UNMASKED); + VWADDU_VX(v0, v1, xZR, VECTOR_UNMASKED); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + VSLL_VX(v0, v0, x2, VECTOR_UNMASKED); + VOR_VV(q0, d0, v0, VECTOR_UNMASKED); + break; case 0x6B: INST_NAME("PACKSSDW Gm, Em"); nextop = F8; |