[RV64_DYNAREC] Added more opcodes for vector (#1972)

author: Yang Liu <liuyang22@iscas.ac.cn> 2024-10-29 05:34:49 +0800
committer: GitHub <noreply@github.com> 2024-10-28 22:34:49 +0100
commit: 9ffabf4c859ba4933abffc2fba0f0460d05ca3c9 (patch)
tree: b2521311f69637470de1e5bdc2102219cbad1e29 /src
parent: aebfd61539c595dddea2e3612c2ad4c358d2eae8 (diff)
download: box64-9ffabf4c859ba4933abffc2fba0f0460d05ca3c9.tar.gz
box64-9ffabf4c859ba4933abffc2fba0f0460d05ca3c9.zip
2 files changed, 119 insertions, 3 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index b8db65aa..278eac9e 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -753,6 +753,25 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                     VMERGE_VXM(v0, q0, ed); // uses VMASK
                     VMV_V_V(q0, v0);
                     break;
+                case 0x40:
+                    INST_NAME("DPPS Gx, Ex, Ib");
+                    nextop = F8;
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+                    GETGX_vector(q0, 1, VECTOR_SEW32);
+                    GETEX_vector(q1, 0, 1, VECTOR_SEW32);
+                    u8 = F8;
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    d0 = fpu_get_scratch(dyn);
+                    VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
+                    VECTOR_LOAD_VMASK((u8 >> 4), x4, 1);
+                    VFMUL_VV(v0, q0, q1, VECTOR_MASKED);
+                    VFREDUSUM_VS(d0, v0, v1, VECTOR_MASKED);
+                    VMV_X_S(x4, d0);
+                    VMV_V_X(d0, x4);
+                    VECTOR_LOAD_VMASK((u8 & 0xf), x4, 1);
+                    VMERGE_VVM(q0, v1, d0);
+                    break;
                 default: DEFAULT_VECTOR;
             }
             break;
@@ -877,6 +896,20 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                 VFSGNJN_VV(q0, q0, q0, VECTOR_MASKED);
             }
             break;
+        case 0x5B:
+            INST_NAME("CVTPS2DQ Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETEX_vector(v1, 0, 0, VECTOR_SEW32);
+            GETGX_empty_vector(v0);
+            if (box64_dynarec_fastround) {
+                u8 = sse_setround(dyn, ninst, x6, x4);
+                VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
+                x87_restoreround(dyn, ninst, u8);
+            } else {
+                return 0;
+            }
+            break;
         case 0x5C:
             INST_NAME("SUBPD Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 7fa32d81..538df9cf 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -2836,6 +2836,9 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int
             }
         } else if ((sew == VECTOR_SEW32 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW64 && vlmul == VECTOR_LMUL2)) {
             switch (imm) {
+                case 0b0000:
+                    VMV_S_X(vreg, xZR);
+                    return;
                 case 0b0001:
                     ADDI(s1, xZR, 1);
                     VMV_S_X(vreg, s1);
@@ -2846,11 +2849,17 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int
                     VMV_S_X(vreg, s1);
                     vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
                     return;
+                case 0b0011:
+                    vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
+                    MOV64x(s1, 0x100000001ULL);
+                    VMV_S_X(vreg, s1);
+                    vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
+                    return;
                 case 0b0100: {
                     int scratch = fpu_get_scratch(dyn);
                     vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
                     VMV_V_I(scratch, 1);
-                    VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED);
+                    VMV_S_X(vreg, xZR);
                     VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED);
                     vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
                     return;
@@ -2860,28 +2869,102 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int
                     VMV_V_I(vreg, 1);
                     vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
                     return;
+                case 0b0110: {
+                    int scratch = fpu_get_scratch(dyn);
+                    vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
+                    VMV_V_I(scratch, 1);
+                    MOV64x(s1, 0x100000000ULL);
+                    VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
+                    return;
+                }
+                case 0b0111: {
+                    int scratch = fpu_get_scratch(dyn);
+                    vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
+                    VMV_V_I(scratch, 1);
+                    MOV64x(s1, 0x100000001ULL);
+                    VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
+                    return;
+                }
                 case 0b1000: {
                     int scratch = fpu_get_scratch(dyn);
                     vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
                     MOV64x(s1, 0x100000000ULL);
                     VMV_V_X(scratch, s1);
-                    VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED);
+                    VMV_S_X(vreg, xZR);
                     VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED);
                     vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
                     return;
                 }
+                case 0b1001: {
+                    int scratch = fpu_get_scratch(dyn);
+                    vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
+                    MOV64x(s1, 0x100000000ULL);
+                    VMV_V_X(scratch, s1);
+                    ADDI(s1, xZR, 1);
+                    VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
+                    return;
+                }
                 case 0b1010:
                     vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
                     MOV64x(s1, 0x100000000ULL);
                     VMV_V_X(vreg, s1);
                     vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
                     return;
+                case 0b1011: {
+                    int scratch = fpu_get_scratch(dyn);
+                    vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
+                    MOV64x(s1, 0x100000000ULL);
+                    VMV_V_X(scratch, s1);
+                    MOV64x(s1, 0x100000001ULL);
+                    VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
+                    return;
+                }
+                case 0b1100: {
+                    int scratch = fpu_get_scratch(dyn);
+                    vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
+                    MOV64x(s1, 0x100000001ULL);
+                    VMV_V_X(scratch, s1);
+                    VMV_S_X(vreg, xZR);
+                    VSLIDE1UP_VX(vreg, scratch, xZR, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
+                    return;
+                }
+                case 0b1101: {
+                    int scratch = fpu_get_scratch(dyn);
+                    vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
+                    MOV64x(s1, 0x100000001ULL);
+                    VMV_V_X(scratch, s1);
+                    ADDI(s1, xZR, 1);
+                    VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
+                    return;
+                }
+                case 0b1110: {
+                    int scratch = fpu_get_scratch(dyn);
+                    vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
+                    MOV64x(s1, 0x100000001ULL);
+                    VMV_V_X(scratch, s1);
+                    ADDI(s1, s1, -1);
+                    VSLIDE1UP_VX(vreg, scratch, s1, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
+                    return;
+                }
+                case 0b1111:
+                    vector_vsetvli(dyn, ninst, s1, VECTOR_SEW64, VECTOR_LMUL1, 1);
+                    MOV64x(s1, 0x100000001ULL);
+                    VMV_V_X(vreg, s1);
+                    vector_vsetvli(dyn, ninst, s1, sew, vlmul, multiple);
+                    return;
                 default: abort();
             }
         } else if ((sew == VECTOR_SEW16 && vlmul == VECTOR_LMUL1) || (sew == VECTOR_SEW32 && vlmul == VECTOR_LMUL2)) {
             if (imm > 255) abort();
             if (imm == 0) {
-                VXOR_VV(vreg, vreg, vreg, VECTOR_UNMASKED);
+                VMV_S_X(vreg, xZR);
                 return;
             }
             int low = imm & 0xF;
author	Yang Liu <liuyang22@iscas.ac.cn>	2024-10-29 05:34:49 +0800
committer	GitHub <noreply@github.com>	2024-10-28 22:34:49 +0100
commit	9ffabf4c859ba4933abffc2fba0f0460d05ca3c9 (patch)
tree	b2521311f69637470de1e5bdc2102219cbad1e29 /src
parent	aebfd61539c595dddea2e3612c2ad4c358d2eae8 (diff)
download	box64-9ffabf4c859ba4933abffc2fba0f0460d05ca3c9.tar.gz box64-9ffabf4c859ba4933abffc2fba0f0460d05ca3c9.zip