diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-10-29 00:32:52 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-28 17:32:52 +0100 |
| commit | 8dee79d3800f658e13fea4052996b6497860f9e7 (patch) | |
| tree | f9531612a9805d87ca45aed81bd95c04b0673f3f /src | |
| parent | d3c1ea5b0fa1e6055a14f187798633461e1b6eab (diff) | |
| download | box64-8dee79d3800f658e13fea4052996b6497860f9e7.tar.gz box64-8dee79d3800f658e13fea4052996b6497860f9e7.zip | |
[RV64_DYNAREC] Added more opcodes for vector (#1969)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 22 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f20f_vector.c | 97 |
2 files changed, 81 insertions, 38 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index 438df522..79625819 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -81,6 +81,28 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i SMWRITE2(); } break; + case 0x12: + INST_NAME("MOVLPD Gx, Eq"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + GETGX_vector(v0, 1, VECTOR_SEW64); + if (MODREG) { + // access register instead of memory is bad opcode! + DEFAULT; + return addr; + } + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + if (rv64_xtheadvector) { + v1 = fpu_get_scratch(dyn); + VMV_S_X(v1, x4); + VECTOR_LOAD_VMASK(0b01, x3, 1); + VMERGE_VVM(v0, v0, v1); // implies VMASK + } else { + VMV_S_X(v0, x4); + } + break; case 0x14: INST_NAME("UNPCKLPD Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c index 120e1281..4786e502 100644 --- a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c @@ -50,8 +50,8 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("MOVSD Gx, Ex"); nextop = F8; GETG; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); ed = (nextop & 7) + (rex.b << 3); v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW64); v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW64); @@ -64,14 +64,11 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i } } else { SMREAD(); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); // unaligned v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd); - d0 = fpu_get_scratch(dyn); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VECTOR_LOAD_VMASK(0xFF, x4, 1); - VLE8_V(d0, ed, VECTOR_MASKED, VECTOR_NFIELD1); - VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); - VMERGE_VVM(v0, v0, d0); // implies VMASK + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + if (!rv64_xtheadvector) VXOR_VV(v0, v0, v0, VECTOR_UNMASKED); + VMV_S_X(v0, x4); } break; case 0x11: @@ -125,17 +122,15 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("CVTTSD2SI Gd, Ex"); nextop = F8; GETGD; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); v0 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, dyn->vector_eew); } else { SMREAD(); v0 = fpu_get_scratch(dyn); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VECTOR_LOAD_VMASK(0xFF, x4, 1); - VLE8_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + VMV_S_X(v0, x4); } if (box64_dynarec_fastround) { VFMV_F_S(v0, v0); @@ -160,17 +155,15 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("CVTSD2SI Gd, Ex"); nextop = F8; GETGD; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); v0 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, dyn->vector_eew); } else { SMREAD(); v0 = fpu_get_scratch(dyn); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VECTOR_LOAD_VMASK(0xFF, x4, 1); - VLE8_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + VMV_S_X(v0, x4); } if (box64_dynarec_fastround) { VFMV_F_S(v0, v0); @@ -197,21 +190,53 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i break; case 0x38: return 0; + case 0x51: + INST_NAME("SQRTSD Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + if (MODREG) { + GETGX_vector(v0, 1, VECTOR_SEW64); + v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); + } else { + SMREAD(); + GETGX_vector(v0, 1, VECTOR_SEW64); + v1 = fpu_get_scratch(dyn); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + VMV_S_X(v1, x4); + } + if (box64_dynarec_fastnan) { + VECTOR_LOAD_VMASK(0b01, x4, 1); + VFSQRT_V(v0, v1, VECTOR_MASKED); + } else { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFSQRT_V(d1, v1, VECTOR_UNMASKED); + FMVDX(d0, xZR); + VMFLT_VF(VMASK, v1, d0, VECTOR_UNMASKED); + VFSGNJN_VV(d1, d1, d1, VECTOR_MASKED); + if (rv64_xtheadvector) { + VECTOR_LOAD_VMASK(0b01, x4, 1); + VMERGE_VVM(v0, v0, d1); // implies VMASK + } else { + VMV_X_S(x4, d1); + VMV_S_X(v0, x4); + } + } + break; case 0x59: INST_NAME("MULSD Gx, Ex"); nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(v0, 1, VECTOR_SEW64); v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); } else { SMREAD(); v1 = fpu_get_scratch(dyn); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VECTOR_LOAD_VMASK(0xFF, x4, 1); - VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW64); } if (box64_dynarec_fastnan) { @@ -242,18 +267,16 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0x5E: INST_NAME("DIVSD Gx, Ex"); nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(v0, 1, VECTOR_SEW64); v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); } else { SMREAD(); v1 = fpu_get_scratch(dyn); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - VECTOR_LOAD_VMASK(0xFF, x4, 1); - VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); + LD(x4, ed, fixedaddress); + VMV_S_X(v1, x4); GETGX_vector(v0, 1, VECTOR_SEW64); } if (!box64_dynarec_fastnan) { @@ -284,18 +307,16 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i case 0xC2: INST_NAME("CMPSD Gx, Ex, Ib"); nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); if (MODREG) { - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); GETGX_vector(d0, 1, VECTOR_SEW64); d1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64); } else { SMREAD(); d1 = fpu_get_scratch(dyn); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1); - VECTOR_LOAD_VMASK(0xFF, x4, 1); - VLE8_V(d1, ed, VECTOR_MASKED, VECTOR_NFIELD1); - SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 1); + LD(x4, ed, fixedaddress); + VMV_S_X(d1, x4); GETGX_vector(d0, 1, VECTOR_SEW64); } u8 = F8; |