about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-10-30 20:10:47 +0800
committerGitHub <noreply@github.com>2024-10-30 13:10:47 +0100
commitadb423d96b3a90b973a53388dba4c163dde2f8db (patch)
tree4dc11f41e888edca7b92e756020e647838fa14b6 /src
parent74acad36288d9433d3d0d24bf0eb74c5a239bb1f (diff)
downloadbox64-adb423d96b3a90b973a53388dba4c163dde2f8db.tar.gz
box64-adb423d96b3a90b973a53388dba4c163dde2f8db.zip
[RV64_DYNAREC] Added more opcodes for vector (#1981)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f_vector.c56
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f_vector.c11
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f20f_vector.c24
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f_vector.c38
4 files changed, 109 insertions, 20 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
index cba9796d..74556ce1 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c
@@ -120,6 +120,29 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VLE8_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
             }
             break;
+        case 0x13:
+            INST_NAME("MOVLPS Ex, Gx");
+            nextop = F8;
+            GETG;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
+            v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 0, VECTOR_SEW64);
+            if (MODREG) {
+                ed = (nextop & 7) + (rex.b << 3);
+                d0 = sse_get_reg_vector(dyn, ninst, x1, ed, 1, VECTOR_SEW64);
+                if (rv64_xtheadvector) {
+                    VECTOR_LOAD_VMASK(0b01, x4, 1);
+                    VMERGE_VVM(v0, v0, v1); // implies VMASK
+                } else {
+                    VMV_X_S(x4, v1);
+                    VMV_S_X(v0, x4);
+                }
+            } else {
+                VMV_X_S(x4, v0);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
+                SD(x4, ed, fixedaddress);
+                SMWRITE2();
+            }
+            break;
         case 0x14:
             INST_NAME("UNPCKLPS Gx, Ex");
             nextop = F8;
@@ -251,6 +274,21 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
+        case 0x2B:
+            INST_NAME("MOVNTPS Ex, Gx");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEWANY, 1);
+            GETGX_vector(v0, 0, dyn->vector_eew);
+            if (MODREG) {
+                ed = (nextop & 7) + (rex.b << 3);
+                v1 = sse_get_reg_empty_vector(dyn, ninst, x1, ed);
+                VMV_V_V(v1, v0);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0);
+                VSE_V(v0, ed, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1);
+                SMWRITE2();
+            }
+            break;
         case 0x50:
             INST_NAME("MOVMSKPS Gd, Ex");
             nextop = F8;
@@ -286,6 +324,18 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETGX_empty_vector(v1);
             VFSQRT_V(v1, v0, VECTOR_UNMASKED);
             break;
+        case 0x52:
+            if (!box64_dynarec_fastround) return 0;
+            INST_NAME("RSQRTPS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            GETEX_vector(v0, 0, 0, VECTOR_SEW32);
+            GETGX_empty_vector(v1);
+            LUI(x4, 0x3f800);
+            FMVWX(v0, x4); // 1.0f
+            VFSQRT_V(v1, v0, VECTOR_UNMASKED);
+            VFRDIV_VF(v1, v1, v0, VECTOR_UNMASKED);
+            break;
         case 0x53:
             INST_NAME("RCPPS Gx, Ex");
             nextop = F8;
@@ -293,9 +343,8 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETEX_vector(v0, 0, 0, VECTOR_SEW32);
             GETGX_empty_vector(v1);
             LUI(x4, 0x3f800);
-            d0 = fpu_get_scratch(dyn);
-            FMVWX(d0, x4); // 1.0f
-            VFRDIV_VF(v1, v0, d0, VECTOR_UNMASKED);
+            FMVWX(v0, x4); // 1.0f
+            VFRDIV_VF(v1, v0, v0, VECTOR_UNMASKED);
             break;
         case 0x54:
             INST_NAME("ANDPS Gx, Ex");
@@ -502,6 +551,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip,
         case 0x40 ... 0x4F:
         case 0x60 ... 0x7F:
         case 0x80 ... 0xBF:
+        case 0xC3 ... 0xC5:
         case 0xC8 ... 0xCF:
             return 0;
         default:
diff --git a/src/dynarec/rv64/dynarec_rv64_660f_vector.c b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
index 278eac9e..56177200 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f_vector.c
@@ -897,18 +897,15 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             }
             break;
         case 0x5B:
+            if (!box64_dynarec_fastround) return 0;
             INST_NAME("CVTPS2DQ Gx, Ex");
             nextop = F8;
             SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
             GETEX_vector(v1, 0, 0, VECTOR_SEW32);
             GETGX_empty_vector(v0);
-            if (box64_dynarec_fastround) {
-                u8 = sse_setround(dyn, ninst, x6, x4);
-                VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
-                x87_restoreround(dyn, ninst, u8);
-            } else {
-                return 0;
-            }
+            u8 = sse_setround(dyn, ninst, x6, x4);
+            VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
+            x87_restoreround(dyn, ninst, u8);
             break;
         case 0x5C:
             INST_NAME("SUBPD Gx, Ex");
diff --git a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c
index 8c5b5ffa..8cfe5b5d 100644
--- a/src/dynarec/rv64/dynarec_rv64_f20f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_f20f_vector.c
@@ -36,6 +36,7 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
     int q0, q1;
     int d0, d1;
     int s0, s1;
+    uint64_t tmp64u0, tmp64u1;
     int64_t fixedaddress, gdoffset;
     int unscaled;
 
@@ -466,6 +467,29 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
                 }
             }
             break;
+        case 0x70:
+            INST_NAME("PSHUFLW Gx, Ex, Ib");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
+            GETEX_vector(v1, 0, 1, VECTOR_SEW16);
+            GETGX_vector(v0, 1, VECTOR_SEW16);
+            u8 = F8;
+            d0 = fpu_get_scratch(dyn);
+            d1 = fpu_get_scratch(dyn);
+            vector_vsetvli(dyn, ninst, x1, VECTOR_SEW64, VECTOR_LMUL1, 1);
+            tmp64u0 = 0x0007000600050004ULL;
+            MOV64x(x5, tmp64u0);
+            VMV_S_X(d1, x5);
+            tmp64u0 = ((((uint64_t)u8 >> 6) & 3) << 48) | ((((uint64_t)u8 >> 4) & 3) << 32) | (((u8 >> 2) & 3) << 16) | (u8 & 3);
+            MOV64x(x5, tmp64u0);
+            VSLIDE1UP_VX(d0, d1, x5, VECTOR_UNMASKED);
+            vector_vsetvli(dyn, ninst, x1, VECTOR_SEW16, VECTOR_LMUL1, 1);
+            if (v0 == v1) {
+                v1 = fpu_get_scratch(dyn);
+                VMV_V_V(v1, v0);
+            }
+            VRGATHER_VV(v0, v1, d0, VECTOR_UNMASKED);
+            break;
         case 0xC2:
             INST_NAME("CMPSD Gx, Ex, Ib");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c
index 4c0088e6..4e7f12d2 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f_vector.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f_vector.c
@@ -196,6 +196,27 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             VECTOR_LOAD_VMASK(0b0001, x4, 1);
             VFSQRT_V(v0, v1, VECTOR_MASKED);
             break;
+        case 0x52:
+            INST_NAME("RSQRTSS Gx, Ex");
+            nextop = F8;
+            SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
+            if (MODREG) {
+                GETGX_vector(v0, 1, VECTOR_SEW32);
+                v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
+            } else {
+                SMREAD();
+                v1 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
+                LWU(x4, ed, fixedaddress);
+                VMV_S_X(v1, x4);
+                GETGX_vector(v0, 1, VECTOR_SEW32);
+            }
+            LUI(x4, 0x3f800);
+            FMVWX(v1, x4); // 1.0f
+            VECTOR_LOAD_VMASK(0b0001, x4, 1);
+            VFSQRT_V(v0, v1, VECTOR_MASKED);
+            VFRDIV_VF(v0, v0, v1, VECTOR_MASKED);
+            break;
         case 0x53:
             INST_NAME("RCPSS Gx, Ex");
             nextop = F8;
@@ -301,22 +322,19 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
             }
             break;
         case 0x5B:
+            if (!box64_dynarec_fastround) return 0;
             INST_NAME("CVTTPS2DQ Gx, Ex");
             nextop = F8;
             SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
             GETEX_vector(v1, 0, 0, VECTOR_SEW32);
             GETGX_empty_vector(v0);
-            if (box64_dynarec_fastround) {
-                if (rv64_xtheadvector) {
-                    ADDI(x4, xZR, 1); // RTZ
-                    FSRM(x4, x4);
-                    VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
-                    FSRM(xZR, x4);
-                } else {
-                    VFCVT_RTZ_X_F_V(v0, v1, VECTOR_UNMASKED);
-                }
+            if (rv64_xtheadvector) {
+                ADDI(x4, xZR, 1); // RTZ
+                FSRM(x4, x4);
+                VFCVT_X_F_V(v0, v1, VECTOR_UNMASKED);
+                FSRM(xZR, x4);
             } else {
-                return 0;
+                VFCVT_RTZ_X_F_V(v0, v1, VECTOR_UNMASKED);
             }
             break;
         case 0x5C: