about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorxctan <xctan@cirno.icu>2024-11-14 02:43:57 +0800
committerGitHub <noreply@github.com>2024-11-13 19:43:57 +0100
commit09b8c3b94e1cc9ab7f963cbaad8630a366dafde2 (patch)
treeda0945fdb5da71546759da3998b4b41b3c8531e0 /src
parent1dbb676f38417024ca6764d5888a40e58b82ae70 (diff)
downloadbox64-09b8c3b94e1cc9ab7f963cbaad8630a366dafde2.tar.gz
box64-09b8c3b94e1cc9ab7f963cbaad8630a366dafde2.zip
[RV64_DYNAREC] Added more MMX opcodes for vector (#2027)
* [RV64_DYNAREC] Added 0F D5 PMULLW opcode

* [RV64_DYNAREC] Added 0F E5 PMULHW opcode

* [RV64_DYNAREC] Added 0F F5 PMADDWD opcode

* [RV64_DYNAREC] Added 0F 6B PACKSSDW opcode

* [RV64_DYNAREC] Added 0F 63 PACKSSWB opcode

* [RV64_DYNAREC] Added 0F 67 PACKUSWB opcode

* [RV64_DYNAREC] Removed useless vsetvli in MMX PACKUSWB/SSWB/SSDW
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f_vector.c79
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f_vector.c1
2 files changed, 79 insertions, 1 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
index 41e91486..f5c3a4d8 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
@@ -483,6 +483,51 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VADD_VX(q0, q1, xZR, VECTOR_MASKED);
             }
             break;
+        case 0x63:
+            INST_NAME("PACKSSWB Gm, Em");
+            nextop = F8;
+            GETGM_vector(v0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(v1, 0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+            d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
+            VMV_V_V(d0, v0);
+            VSLIDEUP_VI(d0, v1, 4, VECTOR_UNMASKED);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
+            VNCLIP_WI(v0, d0, 0, VECTOR_UNMASKED);
+            break;
+        case 0x67:
+            INST_NAME("PACKUSWB Gm, Em");
+            nextop = F8;
+            GETGM_vector(q0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(q1, 0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+            d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
+            if (q0 == q1) {
+                VMV_V_V(d0, q0);
+                VSLIDEUP_VI(d0, q1, 4, VECTOR_UNMASKED); // splice q0 and q1 here!
+                VMAX_VX(d0, d0, xZR, VECTOR_UNMASKED);
+            } else {
+                VSLIDEUP_VI(q0, q1, 4, VECTOR_UNMASKED); // splice q0 and q1 here!
+                VMAX_VX(d0, q0, xZR, VECTOR_UNMASKED);
+            }
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
+            VNCLIPU_WI(q0, d0, 0, VECTOR_UNMASKED);
+            break;
+        case 0x6B:
+            INST_NAME("PACKSSDW Gm, Em");
+            nextop = F8;
+            GETGM_vector(v0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(v1, 0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
+            VMV_V_V(d0, v0);
+            VSLIDEUP_VI(d0, v1, 2, VECTOR_UNMASKED);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+            VNCLIP_WI(v0, d0, 0, VECTOR_UNMASKED);
+            break;
         case 0x6F:
             INST_NAME("MOVQ Gm, Em");
             nextop = F8;
@@ -631,6 +676,15 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             VAND_VX(q0, v0, x3, VECTOR_UNMASKED);
             VXOR_VV(v0, v0, q0, VECTOR_UNMASKED);
             break;
+        case 0xD5:
+            INST_NAME("PMULLW Gm, Em");
+            nextop = F8;
+            GETGM_vector(v0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(v1, 0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+            VMUL_VV(v0, v0, v1, VECTOR_UNMASKED);
+            break;
         case 0xD8:
         case 0xD9:
             if (opcode == 0xD8) {
@@ -680,6 +734,15 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             VXOR_VI(v0, v0, 0x1F, VECTOR_UNMASKED);
             VAND_VV(v0, v0, v1, VECTOR_UNMASKED);
             break;
+        case 0xE5:
+            INST_NAME("PMULHW Gm, Em");
+            nextop = F8;
+            GETGM_vector(v0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(v1, 0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+            VMULH_VV(v0, v0, v1, VECTOR_UNMASKED);
+            break;
         case 0xE8:
             INST_NAME("PSUBSB Gm, Em");
             nextop = F8;
@@ -775,6 +838,22 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             VAND_VX(q0, v0, x3, VECTOR_UNMASKED);
             VXOR_VV(v0, v0, q0, VECTOR_UNMASKED);
             break;
+        case 0xF5:
+            INST_NAME("PMADDWD Gm, Em");
+            nextop = F8;
+            GETGM_vector(v0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(v1, 0);
+            q1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL1);
+            q0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
+            ADDI(x3, xZR, 32);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+            VWMUL_VV(q0, v1, v0, VECTOR_UNMASKED);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            VNSRL_WX(q1, q0, x3, VECTOR_UNMASKED);
+            VNSRL_WI(v0, q0, 0, VECTOR_UNMASKED);
+            VADD_VV(v0, v0, q1, VECTOR_UNMASKED);
+            break;
         case 0xF8 ... 0xFB:
             nextop = F8;
             if (opcode == 0xF8) {
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index 7cb244fe..bbdfdf47 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -1780,7 +1780,6 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
             GETGX_vector(q0, 1, VECTOR_SEW16);
             GETEX_vector(q1, 0, 0, VECTOR_SEW16);
-            v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
             VMUL_VV(q0, q1, q0, VECTOR_UNMASKED);
             break;
         case 0xD6: