about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-10-28 21:55:58 +0800
committerGitHub <noreply@github.com>2024-10-28 14:55:58 +0100
commitd3c1ea5b0fa1e6055a14f187798633461e1b6eab (patch)
tree9e2b1aa5979f5085cd0aa38af7f9dafe2cca45f8 /src
parent2835a2f87d293ce56ddf40f88520ad971de4f06b (diff)
downloadbox64-d3c1ea5b0fa1e6055a14f187798633461e1b6eab.tar.gz
box64-d3c1ea5b0fa1e6055a14f187798633461e1b6eab.zip
[RV64_DYNAREC] Added more opcodes for vector (#1968)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f_vector.c112
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f_vector.c28
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f_vector.c66
3 files changed, 159 insertions, 47 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
index 1f5c0089..7435a468 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
@@ -251,6 +251,50 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
+        case 0x51:
+            INST_NAME("SQRTPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETEX_vector(v0, 0, 0, VECTOR_SEW32);
+            GETGX_empty_vector(v1);
+            VFSQRT_V(v1, v0, VECTOR_UNMASKED);
+            break;
+        case 0x53:
+            INST_NAME("RCPPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETEX_vector(v0, 0, 0, VECTOR_SEW32);
+            GETGX_empty_vector(v1);
+            LUI(x4, 0x3f800);
+            d0 = fpu_get_scratch(dyn);
+            FMVWX(d0, x4); // 1.0f
+            VFRDIV_VF(v1, v0, d0, VECTOR_UNMASKED);
+            break;
+        case 0x54:
+            INST_NAME("ANDPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETEX_vector(q0, 0, 0, VECTOR_SEW32);
+            GETGX_vector(v0, 1, VECTOR_SEW32);
+            VAND_VV(v0, v0, q0, VECTOR_UNMASKED);
+            break;
+        case 0x55:
+            INST_NAME("ANDNPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETEX_vector(q0, 0, 0, VECTOR_SEW32);
+            GETGX_vector(v0, 1, VECTOR_SEW32);
+            VXOR_VI(v0, v0, 0x1f, VECTOR_UNMASKED);
+            VAND_VV(v0, v0, q0, VECTOR_UNMASKED);
+            break;
+        case 0x56:
+            INST_NAME("ORPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETEX_vector(q0, 0, 0, VECTOR_SEW32);
+            GETGX_vector(v0, 1, VECTOR_SEW32);
+            VOR_VV(v0, v0, q0, VECTOR_UNMASKED);
+            break;
         case 0x57:
             INST_NAME("XORPS Gx, Ex");
             nextop = F8;
@@ -266,6 +310,74 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VXOR_VV(q0, q1, q0, VECTOR_UNMASKED);
             }
             break;
+        case 0x58:
+            INST_NAME("ADDPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETEX_vector(q0, 0, 0, VECTOR_SEW32);
+            GETGX_vector(v0, 1, VECTOR_SEW32);
+            VFADD_VV(v0, v0, q0, VECTOR_UNMASKED);
+            break;
+        case 0x59:
+            INST_NAME("MULPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETEX_vector(q0, 0, 0, VECTOR_SEW32);
+            GETGX_vector(v0, 1, VECTOR_SEW32);
+            VFMUL_VV(v0, v0, q0, VECTOR_UNMASKED);
+            break;
+        case 0x5C:
+            INST_NAME("SUBPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETEX_vector(q0, 0, 0, VECTOR_SEW32);
+            GETGX_vector(v0, 1, VECTOR_SEW32);
+            VFSUB_VV(v0, v0, q0, VECTOR_UNMASKED);
+            break;
+        case 0x5D:
+            INST_NAME("MINPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETGX_vector(q0, 1, VECTOR_SEW32);
+            GETEX_vector(q1, 0, 0, VECTOR_SEW32);
+            if (!box64_dynarec_fastnan) {
+                v0 = fpu_get_scratch(dyn);
+                VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
+                VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
+            }
+            VFMIN_VV(q0, q0, q1, VECTOR_UNMASKED);
+            if (!box64_dynarec_fastnan) {
+                VMAND_MM(VMASK, v0, VMASK);
+                VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
+                VADD_VX(q0, q1, xZR, VECTOR_MASKED);
+            }
+            break;
+        case 0x5E:
+            INST_NAME("DIVPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETGX_vector(q0, 1, VECTOR_SEW32);
+            GETEX_vector(q1, 0, 0, VECTOR_SEW32);
+            VFDIV_VV(q0, q0, q1, VECTOR_UNMASKED);
+            break;
+        case 0x5F:
+            INST_NAME("MAXPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETGX_vector(q0, 1, VECTOR_SEW32);
+            GETEX_vector(q1, 0, 0, VECTOR_SEW32);
+            v0 = fpu_get_scratch(dyn);
+            if (!box64_dynarec_fastnan) {
+                VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
+                VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
+            }
+            VFMAX_VV(q0, q0, q1, VECTOR_UNMASKED);
+            if (!box64_dynarec_fastnan) {
+                VMAND_MM(VMASK, v0, VMASK);
+                VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
+                VADD_VX(q0, q1, xZR, VECTOR_MASKED);
+            }
+            break;
         case 0xC6:
             INST_NAME("SHUFPS Gx, Ex, Ib");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index 5d835fb4..438df522 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -862,12 +862,16 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             GETGX_vector(q0, 1, VECTOR_SEW64);
             GETEX_vector(q1, 0, 0, VECTOR_SEW64);
             v0 = fpu_get_scratch(dyn);
-            VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
-            VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
+            if (!box64_dynarec_fastnan) {
+                VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
+                VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
+            }
             VFMIN_VV(q0, q0, q1, VECTOR_UNMASKED);
-            VMAND_MM(VMASK, v0, VMASK);
-            VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
-            VADD_VX(q0, q1, xZR, VECTOR_MASKED);
+            if (!box64_dynarec_fastnan) {
+                VMAND_MM(VMASK, v0, VMASK);
+                VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
+                VADD_VX(q0, q1, xZR, VECTOR_MASKED);
+            }
             break;
         case 0x5E:
             INST_NAME("DIVPD Gx, Ex");
@@ -896,12 +900,16 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             GETGX_vector(q0, 1, VECTOR_SEW64);
             GETEX_vector(q1, 0, 0, VECTOR_SEW64);
             v0 = fpu_get_scratch(dyn);
-            VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
-            VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
+            if (!box64_dynarec_fastnan) {
+                VMFEQ_VV(VMASK, q0, q0, VECTOR_UNMASKED);
+                VMFEQ_VV(v0, q1, q1, VECTOR_UNMASKED);
+            }
             VFMAX_VV(q0, q0, q1, VECTOR_UNMASKED);
-            VMAND_MM(VMASK, v0, VMASK);
-            VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
-            VADD_VX(q0, q1, xZR, VECTOR_MASKED);
+            if (!box64_dynarec_fastnan) {
+                VMAND_MM(VMASK, v0, VMASK);
+                VXOR_VI(VMASK, VMASK, 0x1F, VECTOR_UNMASKED);
+                VADD_VX(q0, q1, xZR, VECTOR_MASKED);
+            }
             break;
         case 0x60:
             INST_NAME("PUNPCKLBW Gx, Ex");
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c
index 3dda70b1..a4bec879 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c
@@ -52,8 +52,8 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             INST_NAME("MOVSS Gx, Ex");
             nextop = F8;
             GETG;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
             if (MODREG) {
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
                 ed = (nextop & 7) + (rex.b << 3);
                 v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW32);
                 v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW32);
@@ -66,14 +66,11 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                 }
             } else {
                 SMREAD();
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
                 v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd);
-                d0 = fpu_get_scratch(dyn);
-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                VECTOR_LOAD_VMASK(0xF, x4, 1);
-                VLE8_V(d0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
+                LWU(x4, ed, fixedaddress);
                 VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
-                VMERGE_VVM(v0, v0, d0); // implies VMASK
+                VMV_S_X(v0, x4);
             }
             break;
         case 0x11:
@@ -130,18 +127,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
         case 0x59:
             INST_NAME("MULSS Gx, Ex");
             nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
             if (MODREG) {
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
                 GETGX_vector(v0, 1, VECTOR_SEW32);
                 v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
             } else {
                 SMREAD();
                 v1 = fpu_get_scratch(dyn);
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                VECTOR_LOAD_VMASK(0xFF, x4, 1);
-                VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
+                LWU(x4, ed, fixedaddress);
+                VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
+                VMV_S_X(v1, x4);
                 GETGX_vector(v0, 1, VECTOR_SEW32);
             }
             if (box64_dynarec_fastnan) {
@@ -172,18 +168,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
         case 0x5A:
             INST_NAME("CVTSS2SD Gx, Ex");
             nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
             if (MODREG) {
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
                 GETGX_vector(v0, 1, VECTOR_SEW32);
                 v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
             } else {
                 SMREAD();
                 v1 = fpu_get_scratch(dyn);
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                VECTOR_LOAD_VMASK(0xFF, x4, 1);
-                VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
+                LWU(x4, ed, fixedaddress);
+                VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
+                VMV_S_X(v1, x4);
                 GETGX_vector(v0, 1, VECTOR_SEW32);
             }
             d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
@@ -200,18 +195,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
         case 0x5D:
             INST_NAME("MINSS Gx, Ex");
             nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
             if (MODREG) {
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
                 GETGX_vector(v0, 1, VECTOR_SEW32);
                 v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
             } else {
                 SMREAD();
                 v1 = fpu_get_scratch(dyn);
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                VECTOR_LOAD_VMASK(0xFF, x4, 1);
-                VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
+                LWU(x4, ed, fixedaddress);
+                VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
+                VMV_S_X(v1, x4);
                 GETGX_vector(v0, 1, VECTOR_SEW32);
             }
             d0 = fpu_get_scratch(dyn);
@@ -232,18 +226,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
         case 0x5F:
             INST_NAME("MAXSS Gx, Ex");
             nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
             if (MODREG) {
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
                 GETGX_vector(v0, 1, VECTOR_SEW32);
                 v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
             } else {
                 SMREAD();
                 v1 = fpu_get_scratch(dyn);
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                VECTOR_LOAD_VMASK(0xFF, x4, 1);
-                VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
+                LWU(x4, ed, fixedaddress);
+                VXOR_VV(v1, v1, v1, VECTOR_UNMASKED);
+                VMV_S_X(v1, x4);
                 GETGX_vector(v0, 1, VECTOR_SEW32);
             }
             d0 = fpu_get_scratch(dyn);
@@ -269,18 +262,17 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
         case 0xC2:
             INST_NAME("CMPSS Gx, Ex, Ib");
             nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
             if (MODREG) {
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
                 GETGX_vector(d0, 1, VECTOR_SEW32);
                 d1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
             } else {
                 SMREAD();
                 d1 = fpu_get_scratch(dyn);
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1);
-                VECTOR_LOAD_VMASK(0xFF, x4, 1);
-                VLE8_V(d1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
-                SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 1);
+                LWU(x4, ed, fixedaddress);
+                VXOR_VV(d1, d1, d1, VECTOR_UNMASKED);
+                VMV_S_X(d1, x4);
                 GETGX_vector(d0, 1, VECTOR_SEW32);
             }
             u8 = F8;