about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-10-30 18:02:09 +0800
committerGitHub <noreply@github.com>2024-10-30 11:02:09 +0100
commit34520c0b4f491bf6ae468efde98c0ed87c1b3e56 (patch)
treef69d68ebac28bcb0e9801cdbd58dba9dda8d1743 /src
parent121148da72f44937012ee14bcd48ab43b846977b (diff)
downloadbox64-34520c0b4f491bf6ae468efde98c0ed87c1b3e56.tar.gz
box64-34520c0b4f491bf6ae468efde98c0ed87c1b3e56.zip
[RV64_DYNAREC] Added more opcodes for vector (#1980)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f_vector.c27
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f.c43
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f_vector.c101
3 files changed, 149 insertions, 22 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
index dca311d4..cba9796d 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
@@ -251,6 +251,33 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
+        case 0x50:
+            INST_NAME("MOVMSKPS Gd, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETGD;
+            GETEX_vector(q0, 0, 0, VECTOR_SEW32);
+            v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL8);
+            VSRL_VI(v0, q0, 31, VECTOR_UNMASKED);
+            if (rv64_xtheadvector) {
+                // Force the element width to 4bit
+                vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL8, 1);
+                VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED);
+                vector_vsetvli(dyn, ninst, x4, VECTOR_SEW32, VECTOR_LMUL1, 1);
+                VMV_X_S(x4, VMASK);
+                BEXTI(gd, x4, 12);
+                BEXTI(x5, x4, 8);
+                ADDSL(gd, x5, gd, 1, x6);
+                BEXTI(x5, x4, 4);
+                ADDSL(gd, x5, gd, 1, x6);
+                BEXTI(x5, x4, 0);
+                ADDSL(gd, x5, gd, 1, x6);
+            } else {
+                VMSNE_VX(VMASK, v0, xZR, VECTOR_UNMASKED);
+                VMV_X_S(gd, VMASK);
+                ZEROUP(gd);
+            }
+            break;
         case 0x51:
             INST_NAME("SQRTPS Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c
index 97fa1935..67305342 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f.c
@@ -242,6 +242,27 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 FCVTDS(v0, v1);
             }
             break;
+        case 0x5B:
+            INST_NAME("CVTTPS2DQ Gx, Ex");
+            nextop = F8;
+            GETGX();
+            GETEX(x2, 0, 12);
+            v0 = fpu_get_scratch(dyn);
+            for (int i = 0; i < 4; ++i) {
+                if (!box64_dynarec_fastround) {
+                    FSFLAGSI(0); // reset all bits
+                }
+                FLW(v0, wback, fixedaddress + i * 4);
+                FCVTWS(x3, v0, RD_RTZ);
+                if (!box64_dynarec_fastround) {
+                    FRFLAGS(x5); // get back FPSR to check the IOC bit
+                    ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF));
+                    BEQZ(x5, 8);
+                    MOV32w(x3, 0x80000000);
+                }
+                SW(x3, gback, gdoffset + i * 4);
+            }
+            break;
         case 0x5C:
             INST_NAME("SUBSS Gx, Ex");
             nextop = F8;
@@ -344,28 +365,6 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             SSE_LOOP_MV_Q2(x3);
             if (!MODREG) SMWRITE2();
             break;
-
-        case 0x5B:
-            INST_NAME("CVTTPS2DQ Gx, Ex");
-            nextop = F8;
-            GETGX();
-            GETEX(x2, 0, 12);
-            v0 = fpu_get_scratch(dyn);
-            for (int i = 0; i < 4; ++i) {
-                if (!box64_dynarec_fastround) {
-                    FSFLAGSI(0); // reset all bits
-                }
-                FLW(v0, wback, fixedaddress + i * 4);
-                FCVTWS(x3, v0, RD_RTZ);
-                if (!box64_dynarec_fastround) {
-                    FRFLAGS(x5); // get back FPSR to check the IOC bit
-                    ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF));
-                    BEQZ(x5, 8);
-                    MOV32w(x3, 0x80000000);
-                }
-                SW(x3, gback, gdoffset + i * 4);
-            }
-            break;
         case 0xAE:
             nextop = F8;
             switch ((nextop >> 3) & 7) {
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c
index abd39e98..4c0088e6 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c
@@ -122,6 +122,32 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                 VFMV_S_F(v0, v0);
             }
             break;
+        case 0x2C:
+            INST_NAME("CVTTSS2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            SET_ELEMENT_WIDTH(x3, VECTOR_SEW32, 1);
+            if (MODREG) {
+                ed = (nextop & 7) + (rex.b << 3);
+                d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW32);
+                VFMV_F_S(d0, d0);
+            } else {
+                GETEXSS(d0, 0);
+            }
+            if (!box64_dynarec_fastround) FSFLAGSI(0);
+            FCVTSxw(gd, d0, RD_RTZ);
+            if (!rex.w) ZEROUP(gd);
+            if (!box64_dynarec_fastround) {
+                FRFLAGS(x5);
+                ANDI(x5, x5, (1 << FR_NV) | (1 << FR_OF));
+                CBZ_NEXT(x5);
+                if (rex.w) {
+                    MOV64x(gd, 0x8000000000000000LL);
+                } else {
+                    MOV32w(gd, 0x80000000);
+                }
+            }
+            break;
         case 0x2D:
             INST_NAME("CVTSS2SI Gd, Ex");
             nextop = F8;
@@ -152,6 +178,44 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             break;
         case 0x38:
             return 0;
+        case 0x51:
+            INST_NAME("SQRTSS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            if (MODREG) {
+                GETGX_vector(v0, 1, VECTOR_SEW32);
+                v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
+            } else {
+                SMREAD();
+                v1 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
+                LWU(x4, ed, fixedaddress);
+                VMV_S_X(v1, x4);
+                GETGX_vector(v0, 1, VECTOR_SEW32);
+            }
+            VECTOR_LOAD_VMASK(0b0001, x4, 1);
+            VFSQRT_V(v0, v1, VECTOR_MASKED);
+            break;
+        case 0x53:
+            INST_NAME("RCPSS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            if (MODREG) {
+                GETGX_vector(v0, 1, VECTOR_SEW32);
+                v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
+            } else {
+                SMREAD();
+                v1 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
+                LWU(x4, ed, fixedaddress);
+                VMV_S_X(v1, x4);
+                GETGX_vector(v0, 1, VECTOR_SEW32);
+            }
+            LUI(x4, 0x3f800);
+            FMVWX(v1, x4); // 1.0f
+            VECTOR_LOAD_VMASK(0b0001, x4, 1);
+            VFRDIV_VF(v0, v1, v1, VECTOR_MASKED);
+            break;
         case 0x58:
             INST_NAME("ADDSS Gx, Ex");
             nextop = F8;
@@ -236,6 +300,25 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                 VMV_S_X(v0, x4);
             }
             break;
+        case 0x5B:
+            INST_NAME("CVTTPS2DQ Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETEX_vector(v1, 0, 0, VECTOR_SEW32);
+            GETGX_empty_vector(v0);
+            if (box64_dynarec_fastround) {
+                if (rv64_xtheadvector) {
+                    ADDI(x4, xZR, 1); // RTZ
+                    FSRM(x4, x4);
+                    VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
+                    FSRM(xZR, x4);
+                } else {
+                    VFCVT_RTZ_X_F_V(v0, v1, VECTOR_UNMASKED);
+                }
+            } else {
+                return 0;
+            }
+            break;
         case 0x5C:
             INST_NAME("SUBSS Gx, Ex");
             nextop = F8;
@@ -295,6 +378,24 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                 }
             }
             break;
+        case 0x5E:
+            INST_NAME("DIVSS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            if (MODREG) {
+                GETGX_vector(v0, 1, VECTOR_SEW32);
+                v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
+            } else {
+                SMREAD();
+                v1 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
+                LWU(x4, ed, fixedaddress);
+                VMV_S_X(v1, x4);
+                GETGX_vector(v0, 1, VECTOR_SEW32);
+            }
+            VECTOR_LOAD_VMASK(0b0001, x4, 1);
+            VFDIV_VV(v0, v0, v1, VECTOR_MASKED);
+            break;
         case 0x5F:
             INST_NAME("MAXSS Gx, Ex");
             nextop = F8;