diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-10-30 18:02:09 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-30 11:02:09 +0100 |
| commit | 34520c0b4f491bf6ae468efde98c0ed87c1b3e56 (patch) | |
| tree | f69d68ebac28bcb0e9801cdbd58dba9dda8d1743 /src | |
| parent | 121148da72f44937012ee14bcd48ab43b846977b (diff) | |
| download | box64-34520c0b4f491bf6ae468efde98c0ed87c1b3e56.tar.gz box64-34520c0b4f491bf6ae468efde98c0ed87c1b3e56.zip | |
[RV64_DYNAREC] Added more opcodes for vector (#1980)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 27 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 43 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f_vector.c | 101 |
3 files changed, 149 insertions, 22 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index dca311d4..cba9796d 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -251,6 +251,33 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; + case 0x50: + INST_NAME("MOVMSKPS Gd, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGD; + GETEX_vector(q0, 0, 0, VECTOR_SEW32); + v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8); + VSRL_VI(v0, q0, 31, VECTOR_UNMASKED); + if (rv64_xtheadvector) { + // Force the element width to 4bit + vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL8, 1); + VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL1, 1); + VMV_X_S(x4, VMASK); + BEXTI(gd, x4, 12); + BEXTI(x5, x4, 8); + ADDSL(gd, x5, gd, 1, x6); + BEXTI(x5, x4, 4); + ADDSL(gd, x5, gd, 1, x6); + BEXTI(x5, x4, 0); + ADDSL(gd, x5, gd, 1, x6); + } else { + VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED); + VMV_X_S(gd, VMASK); + ZEROUP(gd); + } + break; case 0x51: INST_NAME("SQRTPS Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index 97fa1935..67305342 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -242,6 +242,27 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int FCVTDS(v0, v1); } break; + case 0x5B: + INST_NAME("CVTTPS2DQ Gx, Ex"); + nextop = F8; + GETGX(); + GETEX(x2, 0, 12); + v0 = fpu_get_scratch(dyn); + for (int i = 0; i < 4; ++i) { + if (!box64_dynarec_fastround) { + FSFLAGSI(0); // reset all bits + } + FLW(v0, wback, fixedaddress + i * 4); + FCVTWS(x3, v0, RD_RTZ); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); + BEQZ(x5, 8); + MOV32w(x3, 0x80000000); + } + SW(x3, gback, gdoffset + i * 4); + } + break; case 0x5C: INST_NAME("SUBSS Gx, Ex"); nextop = F8; @@ -344,28 +365,6 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int SSE_LOOP_MV_Q2(x3); if (!MODREG) SMWRITE2(); break; - - case 0x5B: - INST_NAME("CVTTPS2DQ Gx, Ex"); - nextop = F8; - GETGX(); - GETEX(x2, 0, 12); - v0 = fpu_get_scratch(dyn); - for (int i = 0; i < 4; ++i) { - if (!box64_dynarec_fastround) { - FSFLAGSI(0); // reset all bits - } - FLW(v0, wback, fixedaddress + i * 4); - FCVTWS(x3, v0, RD_RTZ); - if (!box64_dynarec_fastround) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); - BEQZ(x5, 8); - MOV32w(x3, 0x80000000); - } - SW(x3, gback, gdoffset + i * 4); - } - break; case 0xAE: nextop = F8; switch ((nextop >> 3) & 7) { diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c index abd39e98..4c0088e6 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c @@ -122,6 +122,32 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VFMV_S_F(v0, v0); } break; + case 0x2C: + INST_NAME("CVTTSS2SI Gd, Ex"); + nextop = F8; + GETGD; + SET_ELEMENT_WIDTH(x3, VECTOR_SEW32, 1); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW32); + VFMV_F_S(d0, d0); + } else { + GETEXSS(d0, 0); + } + if (!box64_dynarec_fastround) FSFLAGSI(0); + FCVTSxw(gd, d0, RD_RTZ); + if (!rex.w) ZEROUP(gd); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); + ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF)); + CBZ_NEXT(x5); + if (rex.w) { + MOV64x(gd, 0x8000000000000000LL); + } else { + MOV32w(gd, 0x80000000); + } + } + break; case 0x2D: INST_NAME("CVTSS2SI Gd, Ex"); nextop = F8; @@ -152,6 +178,44 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i break; case 0x38: return 0; + case 0x51: + INST_NAME("SQRTSS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + if (MODREG) { + GETGX_vector(v0, 1, VECTOR_SEW32); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); + } else { + SMREAD(); + v1 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LWU(x4, ed, fixedaddress); + VMV_S_X(v1, x4); + GETGX_vector(v0, 1, VECTOR_SEW32); + } + VECTOR_LOAD_VMASK(0b0001, x4, 1); + VFSQRT_V(v0, v1, VECTOR_MASKED); + break; + case 0x53: + INST_NAME("RCPSS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + if (MODREG) { + GETGX_vector(v0, 1, VECTOR_SEW32); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); + } else { + SMREAD(); + v1 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LWU(x4, ed, fixedaddress); + VMV_S_X(v1, x4); + GETGX_vector(v0, 1, VECTOR_SEW32); + } + LUI(x4, 0x3f800); + FMVWX(v1, x4); // 1.0f + VECTOR_LOAD_VMASK(0b0001, x4, 1); + VFRDIV_VF(v0, v1, v1, VECTOR_MASKED); + break; case 0x58: INST_NAME("ADDSS Gx, Ex"); nextop = F8; @@ -236,6 +300,25 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMV_S_X(v0, x4); } break; + case 0x5B: + INST_NAME("CVTTPS2DQ Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(v1, 0, 0, VECTOR_SEW32); + GETGX_empty_vector(v0); + if (box64_dynarec_fastround) { + if (rv64_xtheadvector) { + ADDI(x4, xZR, 1); // RTZ + FSRM(x4, x4); + VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED); + FSRM(xZR, x4); + } else { + VFCVT_RTZ_X_F_V(v0, v1, VECTOR_UNMASKED); + } + } else { + return 0; + } + break; case 0x5C: INST_NAME("SUBSS Gx, Ex"); nextop = F8; @@ -295,6 +378,24 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } } break; + case 0x5E: + INST_NAME("DIVSS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + if (MODREG) { + GETGX_vector(v0, 1, VECTOR_SEW32); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); + } else { + SMREAD(); + v1 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LWU(x4, ed, fixedaddress); + VMV_S_X(v1, x4); + GETGX_vector(v0, 1, VECTOR_SEW32); + } + VECTOR_LOAD_VMASK(0b0001, x4, 1); + VFDIV_VV(v0, v0, v1, VECTOR_MASKED); + break; case 0x5F: INST_NAME("MAXSS Gx, Ex"); nextop = F8; |