diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-10-30 20:10:47 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-30 13:10:47 +0100 |
| commit | adb423d96b3a90b973a53388dba4c163dde2f8db (patch) | |
| tree | 4dc11f41e888edca7b92e756020e647838fa14b6 /src | |
| parent | 74acad36288d9433d3d0d24bf0eb74c5a239bb1f (diff) | |
| download | box64-adb423d96b3a90b973a53388dba4c163dde2f8db.tar.gz box64-adb423d96b3a90b973a53388dba4c163dde2f8db.zip | |
[RV64_DYNAREC] Added more opcodes for vector (#1981)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 56 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 11 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f_vector.c | 24 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f_vector.c | 38 |
4 files changed, 109 insertions, 20 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index cba9796d..74556ce1 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -120,6 +120,29 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VLE8_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1); } break; + case 0x13: + INST_NAME("MOVLPS Ex, Gx"); + nextop = F8; + GETG; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 0, VECTOR_SEW64); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 1, VECTOR_SEW64); + if (rv64_xtheadvector) { + VECTOR_LOAD_VMASK(0b01, x4, 1); + VMERGE_VVM(v0, v0, v1); // implies VMASK + } else { + VMV_X_S(x4, v1); + VMV_S_X(v0, x4); + } + } else { + VMV_X_S(x4, v0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + SD(x4, ed, fixedaddress); + SMWRITE2(); + } + break; case 0x14: INST_NAME("UNPCKLPS Gx, Ex"); nextop = F8; @@ -251,6 +274,21 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; + case 0x2B: + INST_NAME("MOVNTPS Ex, Gx"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1); + GETGX_vector(v0, 0, dyn->vector_eew); + if (MODREG) { + ed = (nextop & 7) + (rex.b << 3); + v1 = sse_get_reg_empty_vector(dyn, ninst, x1, ed); + VMV_V_V(v1, v0); + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0); + VSE_V(v0, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); + SMWRITE2(); + } + break; case 0x50: INST_NAME("MOVMSKPS Gd, Ex"); nextop = F8; @@ -286,6 +324,18 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, GETGX_empty_vector(v1); VFSQRT_V(v1, v0, VECTOR_UNMASKED); break; + case 0x52: + if (!box64_dynarec_fastround) return 0; + INST_NAME("RSQRTPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(v0, 0, 0, VECTOR_SEW32); + GETGX_empty_vector(v1); + LUI(x4, 0x3f800); + FMVWX(v0, x4); // 1.0f + VFSQRT_V(v1, v0, VECTOR_UNMASKED); + VFRDIV_VF(v1, v1, v0, VECTOR_UNMASKED); + break; case 0x53: INST_NAME("RCPPS Gx, Ex"); nextop = F8; @@ -293,9 +343,8 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, GETEX_vector(v0, 0, 0, VECTOR_SEW32); GETGX_empty_vector(v1); LUI(x4, 0x3f800); - d0 = fpu_get_scratch(dyn); - FMVWX(d0, x4); // 1.0f - VFRDIV_VF(v1, v0, d0, VECTOR_UNMASKED); + FMVWX(v0, x4); // 1.0f + VFRDIV_VF(v1, v0, v0, VECTOR_UNMASKED); break; case 0x54: INST_NAME("ANDPS Gx, Ex"); @@ -502,6 +551,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, case 0x40 ... 0x4F: case 0x60 ... 0x7F: case 0x80 ... 0xBF: + case 0xC3 ... 0xC5: case 0xC8 ... 0xCF: return 0; default: diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 278eac9e..56177200 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -897,18 +897,15 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0x5B: + if (!box64_dynarec_fastround) return 0; INST_NAME("CVTPS2DQ Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETEX_vector(v1, 0, 0, VECTOR_SEW32); GETGX_empty_vector(v0); - if (box64_dynarec_fastround) { - u8 = sse_setround(dyn, ninst, x6, x4); - VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED); - x87_restoreround(dyn, ninst, u8); - } else { - return 0; - } + u8 = sse_setround(dyn, ninst, x6, x4); + VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED); + x87_restoreround(dyn, ninst, u8); break; case 0x5C: INST_NAME("SUBPD Gx, Ex"); diff --git a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c index 8c5b5ffa..8cfe5b5d 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c @@ -36,6 +36,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i int q0, q1; int d0, d1; int s0, s1; + uint64_t tmp64u0, tmp64u1; int64_t fixedaddress, gdoffset; int unscaled; @@ -466,6 +467,29 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } } break; + case 0x70: + INST_NAME("PSHUFLW Gx, Ex, Ib"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1); + GETEX_vector(v1, 0, 1, VECTOR_SEW16); + GETGX_vector(v0, 1, VECTOR_SEW16); + u8 = F8; + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1); + tmp64u0 = 0x0007000600050004ULL; + MOV64x(x5, tmp64u0); + VMV_S_X(d1, x5); + tmp64u0 = ((((uint64_t)u8 >> 6) & 3) << 48) | ((((uint64_t)u8 >> 4) & 3) << 32) | (((u8 >> 2) & 3) << 16) | (u8 & 3); + MOV64x(x5, tmp64u0); + VSLIDE1UP_VX(d0, d1, x5, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 1); + if (v0 == v1) { + v1 = fpu_get_scratch(dyn); + VMV_V_V(v1, v0); + } + VRGATHER_VV(v0, v1, d0, VECTOR_UNMASKED); + break; case 0xC2: INST_NAME("CMPSD Gx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c index 4c0088e6..4e7f12d2 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c @@ -196,6 +196,27 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VECTOR_LOAD_VMASK(0b0001, x4, 1); VFSQRT_V(v0, v1, VECTOR_MASKED); break; + case 0x52: + INST_NAME("RSQRTSS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + if (MODREG) { + GETGX_vector(v0, 1, VECTOR_SEW32); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); + } else { + SMREAD(); + v1 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LWU(x4, ed, fixedaddress); + VMV_S_X(v1, x4); + GETGX_vector(v0, 1, VECTOR_SEW32); + } + LUI(x4, 0x3f800); + FMVWX(v1, x4); // 1.0f + VECTOR_LOAD_VMASK(0b0001, x4, 1); + VFSQRT_V(v0, v1, VECTOR_MASKED); + VFRDIV_VF(v0, v0, v1, VECTOR_MASKED); + break; case 0x53: INST_NAME("RCPSS Gx, Ex"); nextop = F8; @@ -301,22 +322,19 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } break; case 0x5B: + if (!box64_dynarec_fastround) return 0; INST_NAME("CVTTPS2DQ Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETEX_vector(v1, 0, 0, VECTOR_SEW32); GETGX_empty_vector(v0); - if (box64_dynarec_fastround) { - if (rv64_xtheadvector) { - ADDI(x4, xZR, 1); // RTZ - FSRM(x4, x4); - VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED); - FSRM(xZR, x4); - } else { - VFCVT_RTZ_X_F_V(v0, v1, VECTOR_UNMASKED); - } + if (rv64_xtheadvector) { + ADDI(x4, xZR, 1); // RTZ + FSRM(x4, x4); + VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED); + FSRM(xZR, x4); } else { - return 0; + VFCVT_RTZ_X_F_V(v0, v1, VECTOR_UNMASKED); } break; case 0x5C: |