about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorxctan <xctan@cirno.icu>2024-11-15 17:11:11 +0800
committerGitHub <noreply@github.com>2024-11-15 10:11:11 +0100
commitbca0f4d66d3bcf235c7ec84f43d105fce7df3470 (patch)
tree60540958a57b5f02e2ae016f0c050926fe9625f9 /src
parent8c17a37d1ae4258e7a7a7fdf87a43305f40dc2a1 (diff)
downloadbox64-bca0f4d66d3bcf235c7ec84f43d105fce7df3470.tar.gz
box64-bca0f4d66d3bcf235c7ec84f43d105fce7df3470.zip
[RV64_DYNAREC] Added more MMX opcodes for vector (#2037)
* [RV64_DYNAREC] Added 0F 74-76 PCMPEQB/W/D opcodes

* [RV64_DYNAREC] Added 0F 64-66 PCMPGTB/W/D opcodes

* [RV64_DYNAREC] Added 0F E1-E2 PSRAW/D opcodes

* [RV64_DYNAREC] Added 0F 6E MOVD opcode

* [RV64_DYNAREC] Added 0F 73 /2 PSRLQ opcode

* [RV64_DYNAREC] Added 0F 73 /6 PSLLQ opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f_vector.c111
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h8
2 files changed, 119 insertions, 0 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
index 195d58f7..73c9395d 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
@@ -542,6 +542,26 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
             VNCLIP_WI(v0, d0, 0, VECTOR_UNMASKED);
             break;
+        case 0x64 ... 0x66:
+            if (opcode == 0x64) {
+                INST_NAME("PCMPGTB Gm, Em");
+                u8 = VECTOR_SEW8;
+            } else if (opcode == 0x65) {
+                INST_NAME("PCMPGTW Gm, Em");
+                u8 = VECTOR_SEW16;
+            } else {
+                INST_NAME("PCMPGTD Gm, Em");
+                u8 = VECTOR_SEW32;
+            }
+            nextop = F8;
+            GETGM_vector(q0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(q1, 0);
+            SET_ELEMENT_WIDTH(x1, u8, 1);
+            VMSLT_VV(VMASK, q1, q0, VECTOR_UNMASKED);
+            VXOR_VV(q0, q0, q0, VECTOR_UNMASKED);
+            VMERGE_VIM(q0, q0, 0b11111); // implies vmask and widened it
+            break;
         case 0x67:
             INST_NAME("PACKUSWB Gm, Em");
             nextop = F8;
@@ -629,6 +649,19 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
             VNCLIP_WI(v0, d0, 0, VECTOR_UNMASKED);
             break;
+        case 0x6E:
+            INST_NAME("MOVD Gm, Ed");
+            nextop = F8;
+            GETGM_vector(v0);
+            GETED(0);
+            if (rex.w) {
+                SET_ELEMENT_WIDTH(x3, VECTOR_SEW64, 1);
+            } else {
+                SET_ELEMENT_WIDTH(x3, VECTOR_SEW32, 1);
+            }
+            VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
+            VMV_S_X(v0, ed);
+            break;
         case 0x6F:
             INST_NAME("MOVQ Gm, Em");
             nextop = F8;
@@ -645,6 +678,62 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VMV_S_X(v0, x4);
             }
             break;
+        case 0x73:
+            nextop = F8;
+            switch ((nextop >> 3) & 7) {
+                case 2:
+                    INST_NAME("PSRLQ Em, Ib");
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+                    GETEM_vector(q0, 0);
+                    u8 = F8;
+                    if (u8) {
+                        if (u8 > 63) {
+                            VXOR_VV(q0, q0, q0, VECTOR_UNMASKED);
+                        } else {
+                            MOV64x(x4, u8);
+                            VSRL_VX(q0, q0, x4, VECTOR_UNMASKED);
+                        }
+                        PUTEM_vector(q0);
+                    }
+                    break;
+                case 6:
+                    INST_NAME("PSLLQ Em, Ib");
+                    SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+                    GETEM_vector(q0, 0);
+                    u8 = F8;
+                    if (u8) {
+                        if (u8 > 63) {
+                            VXOR_VV(q0, q0, q0, VECTOR_UNMASKED);
+                        } else {
+                            MOV64x(x4, u8);
+                            VSLL_VX(q0, q0, x4, VECTOR_UNMASKED);
+                        }
+                        PUTEM_vector(q0);
+                    }
+                    break;
+                default: DEFAULT_VECTOR;
+            }
+            break;
+        case 0x74 ... 0x76:
+            if (opcode == 0x74) {
+                INST_NAME("PCMPEQB Gm, Em");
+                u8 = VECTOR_SEW8;
+            } else if (opcode == 0x75) {
+                INST_NAME("PCMPEQW Gm, Em");
+                u8 = VECTOR_SEW16;
+            } else {
+                INST_NAME("PCMPEQD Gm, Em");
+                u8 = VECTOR_SEW32;
+            }
+            nextop = F8;
+            GETGM_vector(q0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(q1, 0);
+            SET_ELEMENT_WIDTH(x1, u8, 1);
+            VMSEQ_VV(VMASK, q1, q0, VECTOR_UNMASKED);
+            VXOR_VV(q0, q0, q0, VECTOR_UNMASKED);
+            VMERGE_VIM(q0, q0, 0b11111); // implies vmask and widened it
+            break;
         case 0x7F:
             INST_NAME("MOVQ Em, Gm");
             nextop = F8;
@@ -835,6 +924,28 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             VXOR_VI(v0, v0, 0x1F, VECTOR_UNMASKED);
             VAND_VV(v0, v0, v1, VECTOR_UNMASKED);
             break;
+        case 0xE1:
+        case 0xE2:
+            if (opcode == 0xE1) {
+                INST_NAME("PSRAW Gm, Em");
+                u8 = VECTOR_SEW16;
+                i32 = 16;
+            } else {
+                INST_NAME("PSRAD Gm, Em");
+                u8 = VECTOR_SEW32;
+                i32 = 32;
+            }
+            nextop = F8;
+            GETGM_vector(v0);
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            GETEM_vector(v1, 0);
+            SET_ELEMENT_WIDTH(x1, u8, 1);
+            MOV32w(x5, i32 - 1);
+            q0 = fpu_get_scratch(dyn);
+            VMINU_VX(q0, v1, x5, VECTOR_UNMASKED);
+            VMV_X_S(x4, q0);
+            VSRA_VX(v0, v0, x4, VECTOR_UNMASKED);
+            break;
         case 0xE5:
             INST_NAME("PMULHW Gm, Em");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 4e168ab6..074a000f 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -561,6 +561,14 @@
         VFMV_S_F(a, a);                                                                        \
     }
 
+// Put Back EM if it was a memory and not an mm register; requires SEW64
+#define PUTEM_vector(a)                                     \
+    if (!MODREG) {                                          \
+        VFMV_F_S(a, a);                                     \
+        FSD(a, ed, fixedaddress);                           \
+        SMWRITE2();                                         \
+    }
+
 #define GETGX_empty_vector(a)                   \
     gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \
     a = sse_get_reg_empty_vector(dyn, ninst, x1, gd)