about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f_vector.c64
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f_vector.c13
2 files changed, 68 insertions, 9 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
index 151a735d..4c57b9c0 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
@@ -293,6 +293,21 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
         case 0x38:
             nextop = F8;
             switch (nextop) {
+                case 0x00:
+                    INST_NAME("PSHUFB Gm, Em");
+                    nextop = F8;
+                    GETGM_vector(q0);
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+                    GETEM_vector(q1, 0);
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    ADDI(x4, xZR, 0b000010000111);
+                    VMV_V_X(v0, x4); // broadcast the mask
+                    VAND_VV(v0, q1, v0, VECTOR_UNMASKED);
+                    VRGATHER_VV(v1, q0, v0, VECTOR_UNMASKED); // registers cannot be overlapped!!
+                    VMV_V_V(q0, v1);
+                    break;
                 case 0x01:
                     INST_NAME("PHADDW Gm, Em");
                     nextop = F8;
@@ -413,6 +428,55 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                     VNSRL_WI(d1, v0, 16, VECTOR_UNMASKED);
                     VSSUB_VV(q0, d0, d1, VECTOR_UNMASKED);
                     break;
+                case 0x08 ... 0x0A:
+                    if (nextop == 0x08) {
+                        INST_NAME("PSIGNB Gm, Em");
+                        i32 = 7;
+                        nextop = F8;
+                        GETGM_vector(q0);
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+                        GETEM_vector(q1, 0);
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
+                    } else if (nextop == 0x09) {
+                        INST_NAME("PSIGNW Gm, Em");
+                        i32 = 15;
+                        nextop = F8;
+                        GETGM_vector(q0);
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+                        GETEM_vector(q1, 0);
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+                    } else {
+                        INST_NAME("PSIGND Gm, Em");
+                        i32 = 31;
+                        nextop = F8;
+                        GETGM_vector(q0);
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+                        GETEM_vector(q1, 0);
+                        SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+                    }
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    VMSLT_VX(VMASK, q1, xZR, VECTOR_UNMASKED);
+                    VRSUB_VX(q0, q0, xZR, VECTOR_MASKED);
+                    VMSEQ_VX(VMASK, q1, xZR, VECTOR_UNMASKED);
+                    VXOR_VV(q0, q0, q0, VECTOR_MASKED);
+                    break;
+                case 0x0B:
+                    INST_NAME("PMULHRSW Gm, Em");
+                    nextop = F8;
+                    GETGM_vector(q0);
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+                    GETEM_vector(q1, 0);
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+                    v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
+                    fpu_get_scratch(dyn);
+                    VWMUL_VV(v0, q1, q0, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 1);
+                    VSRL_VI(v0, v0, 14, VECTOR_UNMASKED);
+                    VADD_VI(v0, v0, 1, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 1);
+                    VNSRL_WI(q0, v0, 1, VECTOR_UNMASKED);
+                    break;
                 default:
                     DEFAULT;
             }
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index 317a5786..5baaa65d 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -373,15 +373,10 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                     }
                     v0 = fpu_get_scratch(dyn);
                     v1 = fpu_get_scratch(dyn);
-                    // absolute
-                    VSRA_VI(v0, q1, i32, VECTOR_UNMASKED);
-                    VXOR_VV(v1, q0, v0, VECTOR_UNMASKED);
-                    VSUB_VV(v1, v1, v0, VECTOR_UNMASKED);
-                    // handle zeroing
-                    VMSEQ_VI(VMASK, q1, 0, VECTOR_UNMASKED);
-                    VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
-                    VADC_VIM(v0, v0, 0x1f); // implies VMASK
-                    VAND_VV(q0, v0, v1, VECTOR_UNMASKED);
+                    VMSLT_VX(VMASK, q1, xZR, VECTOR_UNMASKED);
+                    VRSUB_VX(q0, q0, xZR, VECTOR_MASKED);
+                    VMSEQ_VX(VMASK, q1, xZR, VECTOR_UNMASKED);
+                    VXOR_VV(q0, q0, q0, VECTOR_MASKED);
                     break;
                 case 0x0B:
                     INST_NAME("PMULHRSW Gx, Ex");