diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2024-10-29 05:34:49 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-10-28 22:34:49 +0100 |
| commit | 9ffabf4c859ba4933abffc2fba0f0460d05ca3c9 (patch) | |
| tree | b2521311f69637470de1e5bdc2102219cbad1e29 /src | |
| parent | aebfd61539c595dddea2e3612c2ad4c358d2eae8 (diff) | |
| download | box64-9ffabf4c859ba4933abffc2fba0f0460d05ca3c9.tar.gz box64-9ffabf4c859ba4933abffc2fba0f0460d05ca3c9.zip | |
[RV64_DYNAREC] Added more opcodes for vector (#1972)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_660f_vector.c | 33 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 89 |
2 files changed, 119 insertions, 3 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c index b8db65aa..278eac9e 100644 --- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c @@ -753,6 +753,25 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VMERGE_VXM(v0, q0, ed); // uses VMASK VMV_V_V(q0, v0); break; + case 0x40: + INST_NAME("DPPS Gx, Ex, Ib"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETGX_vector(q0, 1, VECTOR_SEW32); + GETEX_vector(q1, 0, 1, VECTOR_SEW32); + u8 = F8; + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + d0 = fpu_get_scratch(dyn); + VXOR_VV(v1, v1, v1, VECTOR_UNMASKED); + VECTOR_LOAD_VMASK((u8 >> 4), x4, 1); + VFMUL_VV(v0, q0, q1, VECTOR_MASKED); + VFREDUSUM_VS(d0, v0, v1, VECTOR_MASKED); + VMV_X_S(x4, d0); + VMV_V_X(d0, x4); + VECTOR_LOAD_VMASK((u8 & 0xf), x4, 1); + VMERGE_VVM(q0, v1, d0); + break; default: DEFAULT_VECTOR; } break; @@ -877,6 +896,20 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i VFSGNJN_VV(q0, q0, q0, VECTOR_MASKED); } break; + case 0x5B: + INST_NAME("CVTPS2DQ Gx, Ex"); + nextop = F8; + SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); + GETEX_vector(v1, 0, 0, VECTOR_SEW32); + GETGX_empty_vector(v0); + if (box64_dynarec_fastround) { + u8 = sse_setround(dyn, ninst, x6, x4); + VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED); + x87_restoreround(dyn, ninst, u8); + } else { + return 0; + } + break; case 0x5C: INST_NAME("SUBPD Gx, Ex"); nextop = F8; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 7fa32d81..538df9cf 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -2836,6 +2836,9 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int } } else if ((sew == VECTOR_SEW32 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW64 && vlmul == VECTOR_LMUL2)) { switch (imm) { + case 0b0000: + VMV_S_X(vreg, xZR); + return; case 0b0001: ADDI(s1, xZR, 1); VMV_S_X(vreg, s1); @@ -2846,11 +2849,17 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int VMV_S_X(vreg, s1); vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); return; + case 0b0011: + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x100000001ULL); + VMV_S_X(vreg, s1); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; case 0b0100: { int scratch = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); VMV_V_I(scratch, 1); - VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED); + VMV_S_X(vreg, xZR); VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); return; @@ -2860,28 +2869,102 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int VMV_V_I(vreg, 1); vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); return; + case 0b0110: { + int scratch = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + VMV_V_I(scratch, 1); + MOV64x(s1, 0x100000000ULL); + VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + } + case 0b0111: { + int scratch = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + VMV_V_I(scratch, 1); + MOV64x(s1, 0x100000001ULL); + VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + } case 0b1000: { int scratch = fpu_get_scratch(dyn); vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); MOV64x(s1, 0x100000000ULL); VMV_V_X(scratch, s1); - VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED); + VMV_S_X(vreg, xZR); VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED); vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); return; } + case 0b1001: { + int scratch = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x100000000ULL); + VMV_V_X(scratch, s1); + ADDI(s1, xZR, 1); + VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + } case 0b1010: vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); MOV64x(s1, 0x100000000ULL); VMV_V_X(vreg, s1); vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); return; + case 0b1011: { + int scratch = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x100000000ULL); + VMV_V_X(scratch, s1); + MOV64x(s1, 0x100000001ULL); + VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + } + case 0b1100: { + int scratch = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x100000001ULL); + VMV_V_X(scratch, s1); + VMV_S_X(vreg, xZR); + VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + } + case 0b1101: { + int scratch = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x100000001ULL); + VMV_V_X(scratch, s1); + ADDI(s1, xZR, 1); + VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + } + case 0b1110: { + int scratch = fpu_get_scratch(dyn); + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x100000001ULL); + VMV_V_X(scratch, s1); + ADDI(s1, s1, -1); + VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; + } + case 0b1111: + vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1); + MOV64x(s1, 0x100000001ULL); + VMV_V_X(vreg, s1); + vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple); + return; default: abort(); } } else if ((sew == VECTOR_SEW16 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW32 && vlmul == VECTOR_LMUL2)) { if (imm > 255) abort(); if (imm == 0) { - VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED); + VMV_S_X(vreg, xZR); return; } int low = imm & 0xF; |