about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-09-23 18:42:59 +0800
committerGitHub <noreply@github.com>2024-09-23 12:42:59 +0200
commit31f574ed363b403c15445e3ff2b17cbbe8a85428 (patch)
treea71cb0567f685089a03a4e36be01b96fbec17807 /src
parentd9ff07b60564e100ab89331c6988797b15ac6711 (diff)
downloadbox64-31f574ed363b403c15445e3ff2b17cbbe8a85428.tar.gz
box64-31f574ed363b403c15445e3ff2b17cbbe8a85428.zip
[RV64_DYNAREC] Added more opcodes for vector (#1857)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f_vector.c66
1 files changed, 63 insertions, 3 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index fbd062b3..60ff40a1 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -86,9 +86,69 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                     VRGATHER_VV(v1, v0, q0, VECTOR_UNMASKED); // registers cannot be overlapped!!
                     VMV_V_V(q0, v1);
                     break;
-                case 0x01 ... 0x07:
-                    // pairwise opcodes are complicated, fallback to scalar.
-                    return 0;
+                case 0x01:
+                    INST_NAME("PHADDW Gx, Ex");
+                    nextop = F8;
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+                    GETGX_vector(q0, 1, VECTOR_SEW16);
+                    GETEX_vector(q1, 0, 0, VECTOR_SEW16);
+                    v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
+                    d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
+                    d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches!
+                    VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
+                    VMV_V_V(v0, q0);
+                    if (q1 & 1) VMV_V_V(d1, q1);
+                    vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2);
+                    VSLIDEUP_VI(v0, 8, (q1 & 1) ? d1 : q1, VECTOR_UNMASKED);
+                    MOV64x(x4, 0b0101010101010101);
+                    VMV_S_X(VMASK, x4);
+                    VCOMPRESS_VM(d0, VMASK, v0);
+                    VXOR_VI(VMASK, 0x1F, VMASK, VECTOR_UNMASKED);
+                    VCOMPRESS_VM(d1, VMASK, v0);
+                    vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 1);
+                    VADD_VV(q0, d0, d1, VECTOR_UNMASKED);
+                    break;
+                case 0x02:
+                    INST_NAME("PHADDD Gx, Ex");
+                    nextop = F8;
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+                    GETGX_vector(q0, 1, VECTOR_SEW32);
+                    GETEX_vector(q1, 0, 0, VECTOR_SEW32);
+                    v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
+                    d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
+                    d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches!
+                    VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
+                    VMV_V_V(v0, q0);
+                    if (q1 & 1) VMV_V_V(d1, q1);
+                    vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
+                    VSLIDEUP_VI(v0, 4, (q1 & 1) ? d1 : q1, VECTOR_UNMASKED);
+                    MOV64x(x4, 0b01010101);
+                    VMV_S_X(VMASK, x4);
+                    VCOMPRESS_VM(d0, VMASK, v0);
+                    VXOR_VI(VMASK, 0x1F, VMASK, VECTOR_UNMASKED);
+                    VCOMPRESS_VM(d1, VMASK, v0);
+                    vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 1);
+                    VADD_VV(q0, d0, d1, VECTOR_UNMASKED);
+                    break;
+                case 0x04:
+                    INST_NAME("PMADDUBSW Gx, Ex");
+                    nextop = F8;
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
+                    GETGX_vector(q0, 1, VECTOR_SEW8);
+                    GETEX_vector(q1, 0, 0, VECTOR_SEW8);
+                    v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
+                    d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
+                    d1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // no more scratches!
+                    VWMULSU_VV(v0, q0, q1, VECTOR_UNMASKED);
+                    vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL2, 2);
+                    MOV64x(x4, 0b0101010101010101);
+                    VMV_S_X(VMASK, x4);
+                    VCOMPRESS_VM(d0, VMASK, v0);
+                    VXOR_VI(VMASK, 0x1F, VMASK, VECTOR_UNMASKED);
+                    VCOMPRESS_VM(d1, VMASK, v0);
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+                    VSADD_VV(q0, d0, d1, VECTOR_UNMASKED);
+                    break;
                 case 0x08 ... 0x0A:
                     if (nextop == 0x08) {
                         INST_NAME("PSIGNB Gx, Ex");