diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-09-22 21:36:25 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-09-22 15:36:25 +0200 |
| commit | 710d537f3949d55944bc96e73927c9d30bdf13c2 (patch) | |
| tree | 4da21db62f04374cb530be8c06f35b9d47dcec28 /src | |
| parent | 9e44c65391d9f05d2f9fa03b6c1f6b2a9c8c578f (diff) | |
| download | box64-710d537f3949d55944bc96e73927c9d30bdf13c2.tar.gz box64-710d537f3949d55944bc96e73927c9d30bdf13c2.zip | |
[RV64_DYNAREC] Added more opcodes for vector (#1853)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 69 |
1 files changed, 55 insertions, 14 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index c487d16e..46373c29 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -421,6 +421,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW32); VMAX_VV(q0, q0, q1, VECTOR_UNMASKED); break; + case 0x40: + INST_NAME("PMULLD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(q1, 0, 0, VECTOR_SEW32); + GETGX_vector(q0, 1, VECTOR_SEW32); + VMUL_VV(q0, q0, q1, VECTOR_UNMASKED); + break; default: DEFAULT_VECTOR; } @@ -693,7 +701,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMERGE_VVM(q0, d1, d0); break; case 0x62: - INST_NAME("PUNPCKLDQ Gx,Ex"); + INST_NAME("PUNPCKLDQ Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); ADDI(x1, xZR, 0b1010); @@ -729,10 +737,10 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i break; case 0x64 ... 0x66: if (opcode == 0x64) { - INST_NAME("PCMPGTB Gx,Ex"); + INST_NAME("PCMPGTB Gx, Ex"); u8 = VECTOR_SEW8; } else if (opcode == 0x65) { - INST_NAME("PCMPGTW Gx,Ex"); + INST_NAME("PCMPGTW Gx, Ex"); u8 = VECTOR_SEW16; } else { INST_NAME("PCMPGTD Gx, Ex"); @@ -774,7 +782,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i break; case 0x68 ... 0x6A: if (opcode == 0x68) { - INST_NAME("PUNPCKHBW Gx,Ex"); + INST_NAME("PUNPCKHBW Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); ADDI(x1, xZR, 0b1010101010101010); @@ -1117,20 +1125,19 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_vector(q0, 1, VECTOR_SEW16); if (MODREG) { u8 = (F8) & 7; - ADDI(x4, xZR, 1 << u8); - VMV_S_X(VMASK, x4); ed = xRAX + (nextop & 7) + (rex.b << 3); - v0 = fpu_get_scratch(dyn); - VMERGE_VXM(v0, ed, q0); // uses VMASK - VMV_V_V(q0, v0); } else { SMREAD(); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 1); u8 = (F8) & 7; - ADDI(x4, xZR, 1 << u8); - VMV_S_X(VMASK, x4); - VLE16_V(q0, ed, VECTOR_MASKED, VECTOR_NFIELD1); + LHU(x4, ed, 0); + ed = x4; } + ADDI(x5, xZR, 1 << u8); + VMV_S_X(VMASK, x5); + v0 = fpu_get_scratch(dyn); + VMERGE_VXM(v0, ed, q0); // uses VMASK + VMV_V_V(q0, v0); break; case 0xC5: INST_NAME("PEXTRW Gd, Ex, Ib"); @@ -1308,7 +1315,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VAADDU_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xE1: - INST_NAME("PSRAW Gx,Ex"); + INST_NAME("PSRAW Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(q0, 1, VECTOR_SEW64); @@ -1390,6 +1397,14 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETEX_vector(q1, 0, 0, VECTOR_SEW16); VSSUB_VV(q0, q1, q0, VECTOR_UNMASKED); break; + case 0xEA: + INST_NAME("PMINSW Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETGX_vector(q0, 1, VECTOR_SEW16); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + VMIN_VV(q0, q0, q1, VECTOR_UNMASKED); + break; case 0xEB: INST_NAME("POR Gx, Ex"); nextop = F8; @@ -1415,7 +1430,7 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VSADD_VV(q0, q1, q0, VECTOR_UNMASKED); break; case 0xEE: - INST_NAME("PMAXSW Gx,Ex"); + INST_NAME("PMAXSW Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); GETGX_vector(q0, 1, VECTOR_SEW16); @@ -1496,6 +1511,32 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); VADD_VV(q0, d0, v1, VECTOR_UNMASKED); break; + case 0xF6: + INST_NAME("PSADBW Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + GETGX_vector(q0, 1, VECTOR_SEW8); + GETEX_vector(q1, 0, 0, VECTOR_SEW8); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches! + VWSUBU_VV(v0, q1, q0, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2); + VSRA_VI(v1, 15, v0, VECTOR_UNMASKED); + VXOR_VV(v0, v0, v1, VECTOR_UNMASKED); + VSUB_VV(v1, v1, v0, VECTOR_UNMASKED); + ADDI(x4, xZR, 0xFF); + VXOR_VV(VMASK, VMASK, VMASK, VECTOR_UNMASKED); + VMV_S_X(VMASK, x4); + VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + VREDSUM_VS(v0, v0, v1, VECTOR_MASKED); // sum low 64 + VSLIDEDOWN_VI(d0, 8, v1, VECTOR_UNMASKED); + VXOR_VV(v1, v1, v1, VECTOR_UNMASKED); + VREDSUM_VS(v1, v1, d0, VECTOR_MASKED); // sum high 64 + VSLIDEUP_VI(v0, 4, v1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW8, VECTOR_LMUL1, 1); + VMV_V_V(q0, v0); + break; case 0xF8 ... 0xFB: if (opcode == 0xF8) { INST_NAME("PSUBB Gx, Ex"); |