about summary refs log tree commit diff stats
path: root/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c272
1 files changed, 272 insertions, 0 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
index 5f870cb5..30e4ea24 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
@@ -61,6 +61,278 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
 
     switch(opcode) {
 
+        case 0x10:
+            INST_NAME("VMOVSS Gx, Ex [,Vx]");
+            nextop = F8;
+            GETG;
+            if(MODREG) {
+                GETGX_empty_VXEX(v0, v2, v1, 0);
+                if((v0!=v2) && (v0!=v1)) VMOVQ(v0, v2);
+                if(v0!=v1) VMOVeS(v0, 0, v1, 0);
+                if((v0!=v2) && (v0==v1)) { VMOVeS(v0, 1, v2, 1); VMOVeD(v0, 1, v2, 1);}
+            } else {
+                GETGX_empty(v0);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v0, ed, fixedaddress);
+            }
+            YMM0(gd);
+            break;
+        case 0x11:
+            INST_NAME("VMOVSS Ex, Gx [,Vx]");
+            nextop = F8;
+            GETG;
+            if(MODREG) {
+                GETGXVXEX_empty(v0, v2, v1, 0);
+                if((v2!=v1) && (v2!=v0)) VMOVQ(v2, v1);
+                if(v2!=v0) VMOVeS(v2, 0, v0, 0);
+                if((v2!=v1) && (v2==v0)) { VMOVeS(v2, 1, v0, 1); VMOVeD(v2, 1, v0, 1);}
+                YMM0((nextop&7)+(rex.b<<3));
+            } else {
+                v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VST32(v0, ed, fixedaddress);
+                SMWRITE2();
+            }
+            break;
+        case 0x12:
+            INST_NAME("VMOVSLDUP Gx, Ex");
+            nextop = F8;
+            GETEX_Y(q1, 0, 0);
+            GETGX_empty(q0);
+            VTRNQ1_32(q0, q1, q1);
+            if(vex.l) {
+                GETGY_empty_EY(q0, q1);
+                VTRNQ1_32(q0, q1, q1);
+            } else YMM0(gd);
+            break;
+
+        case 0x16:
+            INST_NAME("MOVSHDUP Gx, Ex");
+            nextop = F8;
+            GETEX_Y(q1, 0, 0);
+            GETGX_empty(q0);
+            VTRNQ2_32(q0, q1, q1);
+            if(vex.l) {
+                GETGY_empty_EY(q0, q1);
+                VTRNQ2_32(q0, q1, q1);
+            } else YMM0(gd);
+            break;
+
+        case 0x2A:
+            INST_NAME("VCVTSI2SS Gx, Vx, Ed");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v1);
+            GETED(0);
+            if(rex.w) {
+                SCVTFSx(d1, ed);
+            } else {
+                SCVTFSw(d1, ed);
+            }
+            if(v0!=v1) VMOVQ(v0, v1);
+            VMOVeS(v0, 0, d1, 0);
+            YMM0(gd);
+            break;
+
+        case 0x2C:
+            INST_NAME("VCVTTSS2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEXSS(d0, 0, 0);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);
+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                MSR_fpsr(x5);
+            }
+            FCVTZSxwS(gd, d0);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                TBZ_NEXT(x5, FPSR_IOC);
+                if(rex.w) {
+                    ORRx_mask(gd, xZR, 1, 1, 0);    //0x8000000000000000
+                } else {
+                    ORRw_mask(gd, xZR, 1, 0);    //0x80000000
+                }
+            }
+            break;
+        case 0x2D:
+            INST_NAME("VCVTSS2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEXSS(q0, 0, 0);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);
+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                MSR_fpsr(x5);
+            }
+            u8 = sse_setround(dyn, ninst, x1, x2, x3);
+            d1 = fpu_get_scratch(dyn, ninst);
+            FRINTIS(d1, q0);
+            x87_restoreround(dyn, ninst, u8);
+            FCVTZSxwS(gd, d1);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                TBZ_NEXT(x5, FPSR_IOC);
+                if(rex.w) {
+                    ORRx_mask(gd, xZR, 1, 1, 0);    //0x8000000000000000
+                } else {
+                    ORRw_mask(gd, xZR, 1, 0);    //0x80000000
+                }
+            }
+            break;
+
+        case 0x58:
+            INST_NAME("VADDSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FADDS(d1, v0, v1);
+            VMOVeS(v0, 0, d1, 0);
+            YMM0(gd)
+            break;
+        case 0x59:
+            INST_NAME("VMULSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FMULS(d1, v0, v1);
+            VMOVeS(v0, 0, d1, 0);
+            YMM0(gd)
+            break;
+        case 0x5A:
+            INST_NAME("VCVTSS2SD Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FCVT_D_S(d1, v1);
+            VMOVeD(v0, 0, d1, 0);
+            YMM0(gd);
+            break;
+        case 0x5B:
+            INST_NAME("VCVTTPS2DQ Gx, Ex");
+            nextop = F8;
+            d0 = fpu_get_scratch(dyn, ninst);
+            if(!box64_dynarec_fastround) {
+                MRS_fpsr(x5);
+                ORRw_mask(x4, xZR, 1, 0);    //0x80000000
+            }
+            for(int l=0; l<1+vex.l; ++l) {
+                if(!l) { GETGX_empty_EX(v0, v1, 0); } else { GETGY_empty_EY(v0, v1); }
+                if(box64_dynarec_fastround) {
+                    VFCVTZSQS(v0, v1);
+                } else {
+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                    MSR_fpsr(x5);
+                    for(int i=0; i<4; ++i) {
+                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                        MSR_fpsr(x5);
+                        VMOVeS(d0, 0, v1, i);
+                        VFCVTZSs(d0, d0);
+                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                        TBZ(x5, FPSR_IOC, 4+4);
+                        VMOVQSfrom(d0, 0, x4);
+                        VMOVeS(v0, i, d0, 0);
+                    }
+                }
+            }
+            if(!vex.l) YMM0(gd);
+            break;
+        case 0x5C:
+            INST_NAME("VSUBSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FSUBS(d1, v0, v1);
+            VMOVeS(v0, 0, d1, 0);
+            YMM0(gd)
+            break;
+        case 0x5D:
+            INST_NAME("VMINSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FCMPS(v0, v1);
+            B_NEXT(cLS);    //Less than or equal
+            VMOVeS(v0, 0, v1, 0);   // to not erase uper part
+            YMM0(gd)
+            break;
+        case 0x5E:
+            INST_NAME("VDIVSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FDIVS(d1, v0, v1);
+            VMOVeS(v0, 0, d1, 0);
+            YMM0(gd)
+            break;
+        case 0x5F:
+            INST_NAME("VMAXSS Gx, Vx, Ex");
+            nextop = F8;
+            d1 = fpu_get_scratch(dyn, ninst);
+            GETGX_empty_VX(v0, v2);
+            GETEXSS(v1, 0, 0);
+            if(v0!=v2) {
+                if(v0==v1)  {
+                    VMOV(d1, v1);
+                    v1 = d1;
+                }
+                VMOVQ(v0, v2);
+            }
+            FCMPS(v0, v1);
+            B_NEXT(cGE);    //Greater than or equal
+            VMOVeS(v0, 0, v1, 0);   // to not erase uper part
+            YMM0(gd)
+            break;
+
         case 0x6F:
             INST_NAME("VMOVDQU Gx, Ex");// no alignment constraint on NEON here, so same as MOVDQA
             nextop = F8;