diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-10-29 03:16:47 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-28 20:16:47 +0100 |
| commit | aebfd61539c595dddea2e3612c2ad4c358d2eae8 (patch) | |
| tree | 5958d673d567024c543e62c1ed92b353b90435e1 /src | |
| parent | 8dee79d3800f658e13fea4052996b6497860f9e7 (diff) | |
| download | box64-aebfd61539c595dddea2e3612c2ad4c358d2eae8.tar.gz box64-aebfd61539c595dddea2e3612c2ad4c358d2eae8.zip | |
[RV64_DYNAREC] Added more opcodes for vector (#1970)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 20 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f_vector.c | 168 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f_vector.c | 84 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 25 |
4 files changed, 266 insertions, 31 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 79625819..b8db65aa 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -733,6 +733,26 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i if (q0 != q1) VMV_V_V(q0, q1); } break; + case 0x22: + INST_NAME("PINSRD Gx, Ed, Ib"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, rex.w ? VECTOR_SEW64 : VECTOR_SEW32, 1); + GETGX_vector(q0, 1, dyn->vector_eew); + if (MODREG) { + u8 = (F8) & (rex.w ? 1 : 3); + ed = xRAX + (nextop & 7) + (rex.b << 3); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 1); + u8 = (F8) & (rex.w ? 1 : 3); + LDxw(x4, ed, fixedaddress); + ed = x4; + } + VECTOR_LOAD_VMASK((1 << u8), x5, 1); + v0 = fpu_get_scratch(dyn); + VMERGE_VXM(v0, q0, ed); // uses VMASK + VMV_V_V(q0, v0); + break; default: DEFAULT_VECTOR; } break; diff --git a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c index 4786e502..e90e5956 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c @@ -33,7 +33,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i uint8_t u8; uint64_t u64, j64; int v0, v1; - int q0; + int q0, q1; int d0, d1; int s0, s1; int64_t fixedaddress, gdoffset; @@ -224,6 +224,46 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } } break; + case 0x58: + INST_NAME("ADDSD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + if (MODREG) { + GETGX_vector(v0, 1, VECTOR_SEW64); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); + } else { + SMREAD(); + v1 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + VMV_S_X(v1, x4); + GETGX_vector(v0, 1, VECTOR_SEW64); + } + if (box64_dynarec_fastnan) { + VECTOR_LOAD_VMASK(0b01, x4, 1); + VFADD_VV(v0, v0, v1, VECTOR_MASKED); + } else { + VFMV_F_S(v0, v0); + VFMV_F_S(v1, v1); + FEQD(x3, v0, v0); + FEQD(x4, v1, v1); + FADDD(v0, v0, v1); + AND(x3, x3, x4); + BEQZ_MARK(x3); + FEQD(x3, v0, v0); + BNEZ_MARK(x3); + FNEGD(v0, v0); + MARK; + if (rv64_xtheadvector) { + d0 = fpu_get_scratch(dyn); + VFMV_S_F(d0, v0); + VECTOR_LOAD_VMASK(0b01, x4, 1); + VMERGE_VVM(v0, v0, d0); // implies VMASK + } else { + VFMV_S_F(v0, v0); + } + } + break; case 0x59: INST_NAME("MULSD Gx, Ex"); nextop = F8; @@ -264,6 +304,89 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } } break; + case 0x5C: + INST_NAME("SUBSD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + if (MODREG) { + GETGX_vector(v0, 1, VECTOR_SEW64); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); + } else { + SMREAD(); + v1 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + VMV_S_X(v1, x4); + GETGX_vector(v0, 1, VECTOR_SEW64); + } + if (box64_dynarec_fastnan) { + VECTOR_LOAD_VMASK(0b01, x4, 1); + VFSUB_VV(v0, v0, v1, VECTOR_MASKED); + } else { + VFMV_F_S(v0, v0); + VFMV_F_S(v1, v1); + FEQD(x3, v0, v0); + FEQD(x4, v1, v1); + FSUBD(v0, v0, v1); + AND(x3, x3, x4); + BEQZ_MARK(x3); + FEQD(x3, v0, v0); + BNEZ_MARK(x3); + FNEGD(v0, v0); + MARK; + if (rv64_xtheadvector) { + d0 = fpu_get_scratch(dyn); + VFMV_S_F(d0, v0); + VECTOR_LOAD_VMASK(0b01, x4, 1); + VMERGE_VVM(v0, v0, d0); // implies VMASK + } else { + VFMV_S_F(v0, v0); + } + } + break; + case 0x5D: + INST_NAME("MINSD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + if (MODREG) { + GETGX_vector(v0, 1, VECTOR_SEW64); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); + } else { + SMREAD(); + v1 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + VMV_S_X(v1, x4); + GETGX_vector(v0, 1, VECTOR_SEW64); + } + if (box64_dynarec_fastnan) { + q0 = fpu_get_scratch(dyn); + VECTOR_LOAD_VMASK(0b01, x4, 1); + VFMIN_VV(q0, v0, v1, VECTOR_MASKED); + VMERGE_VVM(v0, v0, q0); + } else { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFMV_F_S(d0, v0); + VFMV_F_S(d1, v1); + FEQD(x2, d0, d0); + FEQD(x3, d1, d1); + AND(x2, x2, x3); + BEQ_MARK(x2, xZR); + FLED(x2, d1, d0); + BEQ_MARK2(x2, xZR); + MARK; + FMVD(d0, d1); + MARK2; + if (rv64_xtheadvector) { + VFMV_S_F(d0, d0); + VECTOR_LOAD_VMASK(0b0001, x4, 1); + VMERGE_VVM(v0, v0, d0); // implies VMASK + } else { + VFMV_S_F(v0, d0); + } + } + break; case 0x5E: INST_NAME("DIVSD Gx, Ex"); nextop = F8; @@ -304,6 +427,49 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VFDIV_VV(v0, v0, v1, VECTOR_MASKED); } break; + case 0x5F: + INST_NAME("MAXSD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + if (MODREG) { + GETGX_vector(v0, 1, VECTOR_SEW64); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); + } else { + SMREAD(); + v1 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + VMV_S_X(v1, x4); + GETGX_vector(v0, 1, VECTOR_SEW64); + } + if (box64_dynarec_fastnan) { + q0 = fpu_get_scratch(dyn); + VECTOR_LOAD_VMASK(0b01, x4, 1); + VFMIN_VV(q0, v0, v1, VECTOR_MASKED); + VMERGE_VVM(v0, v0, q0); + } else { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFMV_F_S(d0, v0); + VFMV_F_S(d1, v1); + FEQD(x2, d0, d0); + FEQD(x3, d1, d1); + AND(x2, x2, x3); + BEQ_MARK(x2, xZR); + FLED(x2, d0, d1); + BEQ_MARK2(x2, xZR); + MARK; + FMVD(d0, d1); + MARK2; + if (rv64_xtheadvector) { + VFMV_S_F(d0, d0); + VECTOR_LOAD_VMASK(0b0001, x4, 1); + VMERGE_VVM(v0, v0, d0); // implies VMASK + } else { + VFMV_S_F(v0, d0); + } + } + break; case 0xC2: INST_NAME("CMPSD Gx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c index a4bec879..2c62e35c 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c @@ -204,24 +204,36 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v1 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); LWU(x4, ed, fixedaddress); - VXOR_VV(v1, v1, v1, VECTOR_UNMASKED); VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW32); } - d0 = fpu_get_scratch(dyn); - d1 = fpu_get_scratch(dyn); - q0 = fpu_get_scratch(dyn); - q1 = fpu_get_scratch(dyn); - VECTOR_LOAD_VMASK(0b0001, x4, 1); - VMV_V_V(q1, VMASK); - VMFEQ_VV(d0, v0, v0, VECTOR_MASKED); - VMFEQ_VV(d1, v1, v1, VECTOR_MASKED); - VMAND_MM(d0, d0, d1); - VFMIN_VV(q0, v0, v1, VECTOR_MASKED); - VMANDN_MM(VMASK, VMASK, d0); - VMERGE_VVM(v0, v0, v1); - VMAND_MM(VMASK, q1, d0); - VMERGE_VVM(v0, v0, q0); + if (box64_dynarec_fastnan) { + q0 = fpu_get_scratch(dyn); + VECTOR_LOAD_VMASK(0b0001, x4, 1); + VFMIN_VV(q0, v0, v1, VECTOR_MASKED); + VMERGE_VVM(v0, v0, q0); + } else { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFMV_F_S(d0, v0); + VFMV_F_S(d1, v1); + FEQS(x2, d0, d0); + FEQS(x3, d1, d1); + AND(x2, x2, x3); + BEQ_MARK(x2, xZR); + FLES(x2, d1, d0); + BEQ_MARK2(x2, xZR); + MARK; + FMVS(d0, d1); + MARK2; + if (rv64_xtheadvector) { + VFMV_S_F(d0, d0); + VECTOR_LOAD_VMASK(0b0001, x4, 1); + VMERGE_VVM(v0, v0, d0); // implies VMASK + } else { + VFMV_S_F(v0, d0); + } + } break; case 0x5F: INST_NAME("MAXSS Gx, Ex"); @@ -235,24 +247,36 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i v1 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); LWU(x4, ed, fixedaddress); - VXOR_VV(v1, v1, v1, VECTOR_UNMASKED); VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW32); } - d0 = fpu_get_scratch(dyn); - d1 = fpu_get_scratch(dyn); - q0 = fpu_get_scratch(dyn); - q1 = fpu_get_scratch(dyn); - VECTOR_LOAD_VMASK(0b0001, x4, 1); - VMV_V_V(q1, VMASK); - VMFEQ_VV(d0, v0, v0, VECTOR_MASKED); - VMFEQ_VV(d1, v1, v1, VECTOR_MASKED); - VMAND_MM(d0, d0, d1); - VFMAX_VV(q0, v0, v1, VECTOR_MASKED); - VMANDN_MM(VMASK, VMASK, d0); - VMERGE_VVM(v0, v0, v1); - VMAND_MM(VMASK, q1, d0); - VMERGE_VVM(v0, v0, q0); + if (box64_dynarec_fastnan) { + q0 = fpu_get_scratch(dyn); + VECTOR_LOAD_VMASK(0b0001, x4, 1); + VFMIN_VV(q0, v0, v1, VECTOR_MASKED); + VMERGE_VVM(v0, v0, q0); + } else { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFMV_F_S(d0, v0); + VFMV_F_S(d1, v1); + FEQS(x2, d0, d0); + FEQS(x3, d1, d1); + AND(x2, x2, x3); + BEQ_MARK(x2, xZR); + FLES(x2, d0, d1); + BEQ_MARK2(x2, xZR); + MARK; + FMVS(d0, d1); + MARK2; + if (rv64_xtheadvector) { + VFMV_S_F(d0, d0); + VECTOR_LOAD_VMASK(0b0001, x4, 1); + VMERGE_VVM(v0, v0, d0); // implies VMASK + } else { + VFMV_S_F(v0, d0); + } + } break; case 0xAE: case 0xB8: diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 3a942f9b..7fa32d81 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -2840,11 +2840,36 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int ADDI(s1, xZR, 1); VMV_S_X(vreg, s1); return; + case 0b0010: + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x100000000ULL); + VMV_S_X(vreg, s1); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + case 0b0100: { + int scratch = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + VMV_V_I(scratch, 1); + VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED); + VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + } case 0b0101: vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); VMV_V_I(vreg, 1); vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); return; + case 0b1000: { + int scratch = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x100000000ULL); + VMV_V_X(scratch, s1); + VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED); + VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + } case 0b1010: vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); MOV64x(s1, 0x100000000ULL); |