about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f.c4
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c272
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f30f.c1
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h23
-rw-r--r--src/emu/x64runavxf30f.c4
6 files changed, 299 insertions, 7 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
index 871fefd7..5c569ab7 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
@@ -190,7 +190,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             } else {
                 for(int l=0; l<1+vex.l; ++l) {
                     if(!l) {
-                        GETGX_empty_VX(v0, v2, 0);
+                        GETGX_empty_VX(v0, v2);
                         addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
                     } else {
                         GETGY_empty_VY(v0, v2, 0, -1, -1);
@@ -219,7 +219,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             } else {
                 for(int l=0; l<1+vex.l; ++l) {
                     if(!l) {
-                        GETGX_empty_VX(v0, v2, 0);
+                        GETGX_empty_VX(v0, v2);
                         addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
                         ADDx_U12(x1, ed, 8);
                     } else {
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
index 3cfafcaa..72c93dbe 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
@@ -123,7 +123,7 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
         case 0x2A:
             INST_NAME("VCVTSI2SD Gx, Vx, Ed");
             nextop = F8;
-            GETGX_empty_VX(v0, v1, 0);
+            GETGX_empty_VX(v0, v1);
             GETED(0);
             d1 = fpu_get_scratch(dyn, ninst);
             if(rex.w) {
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
index 5f870cb5..30e4ea24 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
@@ -61,6 +61,278 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
 
     switch(opcode) {
 
+        case 0x10:
+            INST_NAME("VMOVSS Gx, Ex [,Vx]");
+            nextop = F8;
+            GETG;
+            if(MODREG) {
+                GETGX_empty_VXEX(v0, v2, v1, 0);
+                if((v0!=v2) && (v0!=v1)) VMOVQ(v0, v2);
+                if(v0!=v1) VMOVeS(v0, 0, v1, 0);
+                if((v0!=v2) && (v0==v1)) { VMOVeS(v0, 1, v2, 1); VMOVeD(v0, 1, v2, 1);}
+            } else {
+                GETGX_empty(v0);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v0, ed, fixedaddress);
+            }
+            YMM0(gd);
+            break;
+        case 0x11:
+            INST_NAME("VMOVSS Ex, Gx [,Vx]");
+            nextop = F8;
+            GETG;
+            if(MODREG) {
+                GETGXVXEX_empty(v0, v2, v1, 0);
+                if((v2!=v1) && (v2!=v0)) VMOVQ(v2, v1);
+                if(v2!=v0) VMOVeS(v2, 0, v0, 0);
+                if((v2!=v1) && (v2==v0)) { VMOVeS(v2, 1, v0, 1); VMOVeD(v2, 1, v0, 1);}
+                YMM0((nextop&7)+(rex.b<<3));
+            } else {
+                v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VST32(v0, ed, fixedaddress);
+                SMWRITE2();
+            }
+            break;
+        case 0x12:
+            INST_NAME("VMOVSLDUP Gx, Ex");
+            nextop = F8;
+            GETEX_Y(q1, 0, 0);
+            GETGX_empty(q0);
+            VTRNQ1_32(q0, q1, q1);
+            if(vex.l) {
+                GETGY_empty_EY(q0, q1);
+                VTRNQ1_32(q0, q1, q1);
+            } else YMM0(gd);
+            break;
+
+        case 0x16:
+            INST_NAME("MOVSHDUP Gx, Ex");
+            nextop = F8;
+            GETEX_Y(q1, 0, 0);
+            GETGX_empty(q0);
+            VTRNQ2_32(q0, q1, q1);
+            if(vex.l) {
+                GETGY_empty_EY(q0, q1);
+                VTRNQ2_32(q0, q1, q1);
+            } else YMM0(gd);
+            break;
+
+        case 0x2A:
+            INST_NAME("VCVTSI2SS Gx, Vx, Ed");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v1);
+            GETED(0);
+            if(rex.w) {
+                SCVTFSx(d1, ed);
+            } else {
+                SCVTFSw(d1, ed);
+            }
+            if(v0!=v1) VMOVQ(v0, v1);
+            VMOVeS(v0, 0, d1, 0);
+            YMM0(gd);
+            break;
+
+        case 0x2C:
+            INST_NAME("VCVTTSS2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEXSS(d0, 0, 0);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);
+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                MSR_fpsr(x5);
+            }
+            FCVTZSxwS(gd, d0);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                TBZ_NEXT(x5, FPSR_IOC);
+                if(rex.w) {
+                    ORRx_mask(gd, xZR, 1, 1, 0);    //0x8000000000000000
+                } else {
+                    ORRw_mask(gd, xZR, 1, 0);    //0x80000000
+                }
+            }
+            break;
+        case 0x2D:
+            INST_NAME("VCVTSS2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEXSS(q0, 0, 0);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);
+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                MSR_fpsr(x5);
+            }
+            u8 = sse_setround(dyn, ninst, x1, x2, x3);
+            d1 = fpu_get_scratch(dyn, ninst);
+            FRINTIS(d1, q0);
+            x87_restoreround(dyn, ninst, u8);
+            FCVTZSxwS(gd, d1);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                TBZ_NEXT(x5, FPSR_IOC);
+                if(rex.w) {
+                    ORRx_mask(gd, xZR, 1, 1, 0);    //0x8000000000000000
+                } else {
+                    ORRw_mask(gd, xZR, 1, 0);    //0x80000000
+                }
+            }
+            break;
+
+        case 0x58:
+            INST_NAME("VADDSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FADDS(d1, v0, v1);
+            VMOVeS(v0, 0, d1, 0);
+            YMM0(gd)
+            break;
+        case 0x59:
+            INST_NAME("VMULSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FMULS(d1, v0, v1);
+            VMOVeS(v0, 0, d1, 0);
+            YMM0(gd)
+            break;
+        case 0x5A:
+            INST_NAME("VCVTSS2SD Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FCVT_D_S(d1, v1);
+            VMOVeD(v0, 0, d1, 0);
+            YMM0(gd);
+            break;
+        case 0x5B:
+            INST_NAME("VCVTTPS2DQ Gx, Ex");
+            nextop = F8;
+            d0 = fpu_get_scratch(dyn, ninst);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);
+                ORRw_mask(x4, xZR, 1, 0);    //0x80000000
+            }
+            for(int l=0; l<1+vex.l; ++l) {
+                if(!l) { GETGX_empty_EX(v0, v1, 0); } else { GETGY_empty_EY(v0, v1); }
+                if(box64_dynarec_fastround) {
+                    VFCVTZSQS(v0, v1);
+                } else {
+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                    MSR_fpsr(x5);
+                    for(int i=0; i<4; ++i) {
+                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                        MSR_fpsr(x5);
+                        VMOVeS(d0, 0, v1, i);
+                        VFCVTZSs(d0, d0);
+                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                        TBZ(x5, FPSR_IOC, 4+4);
+                        VMOVQSfrom(d0, 0, x4);
+                        VMOVeS(v0, i, d0, 0);
+                    }
+                }
+            }
+            if(!vex.l) YMM0(gd);
+            break;
+        case 0x5C:
+            INST_NAME("VSUBSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FSUBS(d1, v0, v1);
+            VMOVeS(v0, 0, d1, 0);
+            YMM0(gd)
+            break;
+        case 0x5D:
+            INST_NAME("VMINSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FCMPS(v0, v1);
+            B_NEXT(cLS);    //Less than or equal
+            VMOVeS(v0, 0, v1, 0);   // to not erase uper part
+            YMM0(gd)
+            break;
+        case 0x5E:
+            INST_NAME("VDIVSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FDIVS(d1, v0, v1);
+            VMOVeS(v0, 0, d1, 0);
+            YMM0(gd)
+            break;
+        case 0x5F:
+            INST_NAME("VMAXSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FCMPS(v0, v1);
+            B_NEXT(cGE);    //Greater than or equal
+            VMOVeS(v0, 0, v1, 0);   // to not erase uper part
+            YMM0(gd)
+            break;
+
         case 0x6F:
             INST_NAME("VMOVDQU Gx, Ex");// no alignment constraint on NEON here, so same as MOVDQA
             nextop = F8;
diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c
index 96fe7543..a3144f21 100644
--- a/src/dynarec/arm64/dynarec_arm64_f30f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f30f.c
@@ -291,7 +291,6 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 }

             }

             break;

-

         case 0x5C:

             INST_NAME("SUBSS Gx, Ex");

             nextop = F8;

diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 8fd918d6..45296269 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -490,8 +490,13 @@
     GETEX_Y(ex, 0, D);                      \
     GETGX_empty(gx)
 
+// Get empty GX, and non-writen EX
+#define GETGX_empty_EX(gx, ex, D)           \
+    GETEX_Y(ex, 0, D);                      \
+    GETGX_empty(gx)
+
 // Get empty GX, and non-writen VX
-#define GETGX_empty_VX(gx, vx, D)           \
+#define GETGX_empty_VX(gx, vx)              \
     GETVX(vx, 0);                           \
     GETGX_empty(gx)
 
@@ -501,6 +506,11 @@
     GETEX_Y(ex, 1, D);                      \
     GETGX(gx, 0)
 
+#define GETGXVXEX_empty(gx, vx, ex, D)      \
+    GETVX(vx, 0);                           \
+    GETGX(gx, 0);                           \
+    GETEX_empty_Y(ex, D);
+
 // Get empty GY, and non-writen VY and EY
 #define GETGY_empty_VYEY(gy, vy, ey)                                                            \
     vy = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1, -1);    \
@@ -554,6 +564,17 @@
         a = fpu_get_scratch(dyn, ninst);                                                                \
         VLD128(a, ed, fixedaddress);                                                                    \
     }
+// Get EX as a quad, (x3 is used)
+#define GETEX_empty_Y(a, D)                                                                             \
+    if(MODREG) {                                                                                        \
+        a = sse_get_reg_empty(dyn, ninst, x3, (nextop&7)+(rex.b<<3));                                   \
+    } else {                                                                                            \
+        WILLWRITE2();                                                                                   \
+        addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, D);  \
+        unscaled = 0;                                                                                   \
+        a = fpu_get_scratch(dyn, ninst);                                                                \
+        VLD128(a, ed, fixedaddress);                                                                    \
+    }
 
 // Get EX as a quad, (x1 is used)
 #define GETEX(a, w, D)                                                                                  \
diff --git a/src/emu/x64runavxf30f.c b/src/emu/x64runavxf30f.c
index 511463fd..a6cd93d5 100644
--- a/src/emu/x64runavxf30f.c
+++ b/src/emu/x64runavxf30f.c
@@ -63,7 +63,7 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
 
     switch(opcode) {
 
-        case 0x10:  /* VMOVSS Gx Ex */
+        case 0x10:  /* VMOVSS Gx, [Vx,] Ex */
             nextop = F8;
             GETEX(0);
             GETGX;
@@ -78,7 +78,7 @@ uintptr_t RunAVX_F30F(x64emu_t *emu, vex_t vex, uintptr_t addr, int *step)
             GETGY;
             GY->u128 = 0;
             break;
-        case 0x11:  /* MOVSS Ex Gx */
+        case 0x11:  /* VMOVSS Ex, [Vx,] Gx */
             nextop = F8;
             GETEX(0);
             GETGX;