diff options
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 42 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f_vector.c | 135 |
2 files changed, 177 insertions, 0 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index d76d26f2..e7a2f023 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -230,6 +230,27 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 1); VADD_VV(q0, d1, d0, VECTOR_UNMASKED); break; + case 0x03: + INST_NAME("PHADDSW Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETGX_vector(q0, 1, VECTOR_SEW16); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches! + VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + VMV_V_V(v0, q0); + if (q1 & 1) VMV_V_V(d1, q1); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2); + VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 8, VECTOR_UNMASKED); + vector_loadmask(dyn, ninst, VMASK, 0b0101010101010101, x4, 2); + VCOMPRESS_VM(d0, v0, VMASK); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + VCOMPRESS_VM(d1, v0, VMASK); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 1); + VSADD_VV(q0, d1, d0, VECTOR_UNMASKED); + break; case 0x04: INST_NAME("PMADDUBSW Gx, Ex"); nextop = F8; @@ -248,6 +269,27 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); VSADD_VV(q0, d1, d0, VECTOR_UNMASKED); break; + case 0x05: + INST_NAME("PHSUBW Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETGX_vector(q0, 1, VECTOR_SEW16); + GETEX_vector(q1, 0, 0, VECTOR_SEW16); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); + d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches! + VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + VMV_V_V(v0, q0); + if (q1 & 1) VMV_V_V(d1, q1); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2); + VSLIDEUP_VI(v0, (q1 & 1) ? d1 : q1, 8, VECTOR_UNMASKED); + vector_loadmask(dyn, ninst, VMASK, 0b0101010101010101, x4, 2); + VCOMPRESS_VM(d0, v0, VMASK); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + VCOMPRESS_VM(d1, v0, VMASK); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 1); + VSUB_VV(q0, d0, d1, VECTOR_UNMASKED); + break; case 0x08 ... 0x0A: if (nextop == 0x08) { INST_NAME("PSIGNB Gx, Ex"); diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c index f6064852..46c3db2d 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c @@ -197,11 +197,146 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_S_X(v0, x4); } break; + case 0x5D: + INST_NAME("MINSS Gx, Ex"); + nextop = F8; + if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(v0, 1, VECTOR_SEW32); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); + } else { + SMREAD(); + v1 = fpu_get_scratch(dyn); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); + VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(v0, 1, VECTOR_SEW32); + } + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + vector_loadmask(dyn, ninst, VMASK, 0b0001, x4, 1); + VMV_V_V(q1, VMASK); + VMFEQ_VV(d0, v0, v0, VECTOR_MASKED); + VMFEQ_VV(d1, v1, v1, VECTOR_MASKED); + VMAND_MM(d0, d0, d1); + VFMIN_VV(q0, v0, v1, VECTOR_MASKED); + VMANDN_MM(VMASK, VMASK, d0); + VMERGE_VVM(v0, v0, v1); + VMAND_MM(VMASK, q1, d0); + VMERGE_VVM(v0, v0, q0); + break; + case 0x5F: + INST_NAME("MAXSS Gx, Ex"); + nextop = F8; + if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(v0, 1, VECTOR_SEW32); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); + } else { + SMREAD(); + v1 = fpu_get_scratch(dyn); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); + VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(v0, 1, VECTOR_SEW32); + } + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + vector_loadmask(dyn, ninst, VMASK, 0b0001, x4, 1); + VMV_V_V(q1, VMASK); + VMFEQ_VV(d0, v0, v0, VECTOR_MASKED); + VMFEQ_VV(d1, v1, v1, VECTOR_MASKED); + VMAND_MM(d0, d0, d1); + VFMAX_VV(q0, v0, v1, VECTOR_MASKED); + VMANDN_MM(VMASK, VMASK, d0); + VMERGE_VVM(v0, v0, v1); + VMAND_MM(VMASK, q1, d0); + VMERGE_VVM(v0, v0, q0); + break; case 0xAE: case 0xB8: case 0xBC: case 0xBD: return 0; + case 0xC2: + INST_NAME("CMPSS Gx, Ex, Ib"); + nextop = F8; + if (MODREG) { + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(d0, 1, VECTOR_SEW32); + d1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); + } else { + SMREAD(); + d1 = fpu_get_scratch(dyn); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); + vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1); + VLE8_V(d1, ed, VECTOR_MASKED, VECTOR_NFIELD1); + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(d0, 1, VECTOR_SEW32); + } + u8 = F8; + VFMV_F_S(d0, d0); + VFMV_F_S(d1, d1); + if ((u8 & 7) == 0) { // Equal + FEQS(x2, d0, d1); + } else if ((u8 & 7) == 4) { // Not Equal or unordered + FEQS(x2, d0, d1); + XORI(x2, x2, 1); + } else { + // x2 = !(isnan(d0) || isnan(d1)) + FEQS(x3, d0, d0); + FEQS(x2, d1, d1); + AND(x2, x2, x3); + switch (u8 & 7) { + case 1: + BEQ_MARK(x2, xZR); + FLTS(x2, d0, d1); + break; // Less than + case 2: + BEQ_MARK(x2, xZR); + FLES(x2, d0, d1); + break; // Less or equal + case 3: XORI(x2, x2, 1); break; // NaN + case 5: { // Greater or equal or unordered + BEQ_MARK2(x2, xZR); + FLES(x2, d1, d0); + B_MARK_nocond; + break; + } + case 6: { // Greater or unordered, test inverted, N!=V so unordered or less than (inverted) + BEQ_MARK2(x2, xZR); + FLTS(x2, d1, d0); + B_MARK_nocond; + break; + } + case 7: break; // Not NaN + } + + MARK2; + if ((u8 & 7) == 5 || (u8 & 7) == 6) { + MOV32w(x2, 1); + } + MARK; + } + NEG(x2, x2); + if (rv64_xtheadvector) { + v0 = fpu_get_scratch(dyn); + VMV_S_X(v0, x2); + vector_loadmask(dyn, ninst, VMASK, 0b0001, x4, 1); + VMERGE_VVM(d0, d0, v0); // implies VMASK + } else { + VMV_S_X(d0, x2); + } + break; default: DEFAULT_VECTOR; } |