diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-10-28 21:55:58 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-28 14:55:58 +0100 |
| commit | d3c1ea5b0fa1e6055a14f187798633461e1b6eab (patch) | |
| tree | 9e2b1aa5979f5085cd0aa38af7f9dafe2cca45f8 /src | |
| parent | 2835a2f87d293ce56ddf40f88520ad971de4f06b (diff) | |
| download | box64-d3c1ea5b0fa1e6055a14f187798633461e1b6eab.tar.gz box64-d3c1ea5b0fa1e6055a14f187798633461e1b6eab.zip | |
[RV64_DYNAREC] Added more opcodes for vector (#1968)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 112 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 28 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f_vector.c | 66 |
3 files changed, 159 insertions, 47 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index 1f5c0089..7435a468 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -251,6 +251,50 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; + case 0x51: + INST_NAME("SQRTPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(v0, 0, 0, VECTOR_SEW32); + GETGX_empty_vector(v1); + VFSQRT_V(v1, v0, VECTOR_UNMASKED); + break; + case 0x53: + INST_NAME("RCPPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(v0, 0, 0, VECTOR_SEW32); + GETGX_empty_vector(v1); + LUI(x4, 0x3f800); + d0 = fpu_get_scratch(dyn); + FMVWX(d0, x4); // 1.0f + VFRDIV_VF(v1, v0, d0, VECTOR_UNMASKED); + break; + case 0x54: + INST_NAME("ANDPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(q0, 0, 0, VECTOR_SEW32); + GETGX_vector(v0, 1, VECTOR_SEW32); + VAND_VV(v0, v0, q0, VECTOR_UNMASKED); + break; + case 0x55: + INST_NAME("ANDNPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(q0, 0, 0, VECTOR_SEW32); + GETGX_vector(v0, 1, VECTOR_SEW32); + VXOR_VI(v0, v0, 0x1f, VECTOR_UNMASKED); + VAND_VV(v0, v0, q0, VECTOR_UNMASKED); + break; + case 0x56: + INST_NAME("ORPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(q0, 0, 0, VECTOR_SEW32); + GETGX_vector(v0, 1, VECTOR_SEW32); + VOR_VV(v0, v0, q0, VECTOR_UNMASKED); + break; case 0x57: INST_NAME("XORPS Gx, Ex"); nextop = F8; @@ -266,6 +310,74 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VXOR_VV(q0, q1, q0, VECTOR_UNMASKED); } break; + case 0x58: + INST_NAME("ADDPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(q0, 0, 0, VECTOR_SEW32); + GETGX_vector(v0, 1, VECTOR_SEW32); + VFADD_VV(v0, v0, q0, VECTOR_UNMASKED); + break; + case 0x59: + INST_NAME("MULPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(q0, 0, 0, VECTOR_SEW32); + GETGX_vector(v0, 1, VECTOR_SEW32); + VFMUL_VV(v0, v0, q0, VECTOR_UNMASKED); + break; + case 0x5C: + INST_NAME("SUBPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(q0, 0, 0, VECTOR_SEW32); + GETGX_vector(v0, 1, VECTOR_SEW32); + VFSUB_VV(v0, v0, q0, VECTOR_UNMASKED); + break; + case 0x5D: + INST_NAME("MINPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(q0, 1, VECTOR_SEW32); + GETEX_vector(q1, 0, 0, VECTOR_SEW32); + if (!box64_dynarec_fastnan) { + v0 = fpu_get_scratch(dyn); + VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); + VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); + } + VFMIN_VV(q0, q0, q1, VECTOR_UNMASKED); + if (!box64_dynarec_fastnan) { + VMAND_MM(VMASK, v0, VMASK); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + VADD_VX(q0, q1, xZR, VECTOR_MASKED); + } + break; + case 0x5E: + INST_NAME("DIVPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(q0, 1, VECTOR_SEW32); + GETEX_vector(q1, 0, 0, VECTOR_SEW32); + VFDIV_VV(q0, q0, q1, VECTOR_UNMASKED); + break; + case 0x5F: + INST_NAME("MAXPS Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(q0, 1, VECTOR_SEW32); + GETEX_vector(q1, 0, 0, VECTOR_SEW32); + v0 = fpu_get_scratch(dyn); + if (!box64_dynarec_fastnan) { + VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); + VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); + } + VFMAX_VV(q0, q0, q1, VECTOR_UNMASKED); + if (!box64_dynarec_fastnan) { + VMAND_MM(VMASK, v0, VMASK); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + VADD_VX(q0, q1, xZR, VECTOR_MASKED); + } + break; case 0xC6: INST_NAME("SHUFPS Gx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 5d835fb4..438df522 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -862,12 +862,16 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_vector(q0, 1, VECTOR_SEW64); GETEX_vector(q1, 0, 0, VECTOR_SEW64); v0 = fpu_get_scratch(dyn); - VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); - VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); + if (!box64_dynarec_fastnan) { + VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); + VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); + } VFMIN_VV(q0, q0, q1, VECTOR_UNMASKED); - VMAND_MM(VMASK, v0, VMASK); - VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); - VADD_VX(q0, q1, xZR, VECTOR_MASKED); + if (!box64_dynarec_fastnan) { + VMAND_MM(VMASK, v0, VMASK); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + VADD_VX(q0, q1, xZR, VECTOR_MASKED); + } break; case 0x5E: INST_NAME("DIVPD Gx, Ex"); @@ -896,12 +900,16 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i GETGX_vector(q0, 1, VECTOR_SEW64); GETEX_vector(q1, 0, 0, VECTOR_SEW64); v0 = fpu_get_scratch(dyn); - VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); - VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); + if (!box64_dynarec_fastnan) { + VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED); + VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED); + } VFMAX_VV(q0, q0, q1, VECTOR_UNMASKED); - VMAND_MM(VMASK, v0, VMASK); - VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); - VADD_VX(q0, q1, xZR, VECTOR_MASKED); + if (!box64_dynarec_fastnan) { + VMAND_MM(VMASK, v0, VMASK); + VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED); + VADD_VX(q0, q1, xZR, VECTOR_MASKED); + } break; case 0x60: INST_NAME("PUNPCKLBW Gx, Ex"); diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c index 3dda70b1..a4bec879 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c @@ -52,8 +52,8 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("MOVSS Gx, Ex"); nextop = F8; GETG; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); ed = (nextop & 7) + (rex.b << 3); v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW32); v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW32); @@ -66,14 +66,11 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } } else { SMREAD(); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); - d0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VECTOR_LOAD_VMASK(0xF, x4, 1); - VLE8_V(d0, ed, VECTOR_MASKED, VECTOR_NFIELD1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LWU(x4, ed, fixedaddress); VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); - VMERGE_VVM(v0, v0, d0); // implies VMASK + VMV_S_X(v0, x4); } break; case 0x11: @@ -130,18 +127,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x59: INST_NAME("MULSS Gx, Ex"); nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETGX_vector(v0, 1, VECTOR_SEW32); v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); } else { SMREAD(); v1 = fpu_get_scratch(dyn); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VECTOR_LOAD_VMASK(0xFF, x4, 1); - VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LWU(x4, ed, fixedaddress); + VXOR_VV(v1, v1, v1, VECTOR_UNMASKED); + VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW32); } if (box64_dynarec_fastnan) { @@ -172,18 +168,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x5A: INST_NAME("CVTSS2SD Gx, Ex"); nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETGX_vector(v0, 1, VECTOR_SEW32); v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); } else { SMREAD(); v1 = fpu_get_scratch(dyn); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VECTOR_LOAD_VMASK(0xFF, x4, 1); - VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LWU(x4, ed, fixedaddress); + VXOR_VV(v1, v1, v1, VECTOR_UNMASKED); + VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW32); } d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); @@ -200,18 +195,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x5D: INST_NAME("MINSS Gx, Ex"); nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETGX_vector(v0, 1, VECTOR_SEW32); v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); } else { SMREAD(); v1 = fpu_get_scratch(dyn); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VECTOR_LOAD_VMASK(0xFF, x4, 1); - VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LWU(x4, ed, fixedaddress); + VXOR_VV(v1, v1, v1, VECTOR_UNMASKED); + VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW32); } d0 = fpu_get_scratch(dyn); @@ -232,18 +226,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x5F: INST_NAME("MAXSS Gx, Ex"); nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETGX_vector(v0, 1, VECTOR_SEW32); v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); } else { SMREAD(); v1 = fpu_get_scratch(dyn); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VECTOR_LOAD_VMASK(0xFF, x4, 1); - VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LWU(x4, ed, fixedaddress); + VXOR_VV(v1, v1, v1, VECTOR_UNMASKED); + VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW32); } d0 = fpu_get_scratch(dyn); @@ -269,18 +262,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0xC2: INST_NAME("CMPSS Gx, Ex, Ib"); nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); GETGX_vector(d0, 1, VECTOR_SEW32); d1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32); } else { SMREAD(); d1 = fpu_get_scratch(dyn); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); - VECTOR_LOAD_VMASK(0xFF, x4, 1); - VLE8_V(d1, ed, VECTOR_MASKED, VECTOR_NFIELD1); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 1); + LWU(x4, ed, fixedaddress); + VXOR_VV(d1, d1, d1, VECTOR_UNMASKED); + VMV_S_X(d1, x4); GETGX_vector(d0, 1, VECTOR_SEW32); } u8 = F8; |