about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c20
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c28
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_0f.c2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_66_0f.c65
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c119
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c68
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f20f.c72
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f30f.c67
8 files changed, 287 insertions, 154 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index b66f5bf3..2821c846 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -1189,7 +1189,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("CVTPS2PD Gx, Ex");

             nextop = F8;

             GETEX(q0, 0, 0);

-            GETGX(q1, 1);

+            GETGX_empty(q1);

             FCVTL(q1, q0);

             break;

         case 0x5B:

@@ -2433,22 +2433,24 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 0: VFCMEQQS(v0, v0, v1); break;   // Equal

                 case 1: VFCMGTQS(v0, v1, v0); break;   // Less than

                 case 2: VFCMGEQS(v0, v1, v0); break;   // Less or equal

-                case 3: VFCMEQQS(v0, v0, v0);

-                        if(v0!=v1) {

+                case 3: if(v0!=v1) {

                             q0 = fpu_get_scratch(dyn, ninst);

-                            VFCMEQQS(q0, v1, v1);

-                            VANDQ(v0, v0, q0);

+                            VFMAXQS(q0, v0, v1);    // propagate NAN

+                            VFCMEQQS(v0, q0, q0);

+                        } else {

+                            VFCMEQQS(v0, v0, v0);

                         }

                         VMVNQ(v0, v0);

                         break;   // NaN (NaN is not equal to himself)

                 case 4: VFCMEQQS(v0, v0, v1); VMVNQ(v0, v0); break;   // Not Equal (or unordered on ARM, not on X86...)

                 case 5: VFCMGTQS(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or equal or unordered

                 case 6: VFCMGEQS(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or unordered

-                case 7: VFCMEQQS(v0, v0, v0);

-                        if(v0!=v1) {

+                case 7: if(v0!=v1) {

                             q0 = fpu_get_scratch(dyn, ninst);

-                            VFCMEQQS(q0, v1, v1);

-                            VANDQ(v0, v0, q0);

+                            VFMAXQS(q0, v0, v1);    // propagate NAN

+                            VFCMEQQS(v0, q0, q0);

+                        } else {

+                            VFCMEQQS(v0, v0, v0);

                         }

                         break;   // not NaN

             }

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index be7acca3..de9eb515 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -1787,8 +1787,12 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     for(int i=0; i<4; ++i) {

                         BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

                         MSR_fpsr(x5);

-                        VMOVeS(d0, 0, v1, i);

-                        FRINTIS(d0, d0);

+                        if(i) {

+                            VMOVeS(d0, 0, v1, i);

+                            FRINTIS(d0, d0);

+                        } else {

+                            FRINTIS(d0, v1);

+                        }

                         VFCVTZSs(d0, d0);

                         MRS_fpsr(x5);   // get back FPSR to check the IOC bit

                         TBZ(x5, FPSR_IOC, 4+4);

@@ -2820,22 +2824,24 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 case 0: VFCMEQQD(v0, v0, v1); break;   // Equal

                 case 1: VFCMGTQD(v0, v1, v0); break;   // Less than

                 case 2: VFCMGEQD(v0, v1, v0); break;   // Less or equal

-                case 3: VFCMEQQD(v0, v0, v0);

-                        if(v0!=v1) {

+                case 3: if(v0!=v1) {

                             q0 = fpu_get_scratch(dyn, ninst);

-                            VFCMEQQD(q0, v1, v1);

-                            VANDQ(v0, v0, q0);

+                            VFMAXQD(q0, v0, v1);    // propagate NAN

+                            VFCMEQQD(v0, q0, q0);

+                        } else {

+                            VFCMEQQD(v0, v0, v0);

                         }

                         VMVNQ(v0, v0);

                         break;   // NaN (NaN is not equal to himself)

                 case 4: VFCMEQQD(v0, v0, v1); VMVNQ(v0, v0); break;   // Not Equal (or unordered on ARM, not on X86...)

                 case 5: VFCMGTQD(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or equal or unordered

                 case 6: VFCMGEQD(v0, v1, v0); VMVNQ(v0, v0); break;   // Greater or unordered

-                case 7: VFCMEQQD(v0, v0, v0);

-                        if(v0!=v1) {

+                case 7: if(v0!=v1) {

                             q0 = fpu_get_scratch(dyn, ninst);

-                            VFCMEQQD(q0, v1, v1);

-                            VANDQ(v0, v0, q0);

+                            VFMAXQD(q0, v0, v1);    // propagate NAN

+                            VFCMEQQD(v0, q0, q0);

+                        } else {

+                            VFCMEQQD(v0, v0, v0);

                         }

                         break;   // not NaN

             }

@@ -3146,8 +3152,6 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     SQXTN_32(v0, v0);   // convert int64 -> int32 with saturation in lower part, RaZ high part

                 } else {

                     MRS_fpsr(x5);

-                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

-                    MSR_fpsr(x5);

                     ORRw_mask(x4, xZR, 1, 0);    //0x80000000

                     d0 = fpu_get_scratch(dyn, ninst);

                     for(int i=0; i<2; ++i) {

diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_0f.c
index e8347bb4..50231d0e 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_0f.c
@@ -700,7 +700,7 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
             q0 = fpu_get_scratch(dyn, ninst);
             for(int l=0; l<1+vex.l; ++l) {
                 if(!l) { GETGX_empty_VXEX(v0, v2, v1, 1); u8 = F8; } else { GETGY_empty_VYEY(v0, v2, v1); }
-                if(((u8&15)==3) || ((u8&15)==7) || ((u8&15)==8) || ((u8&15)==9) || ((u8&15)==10) || ((u8&15)==12) || ((u8&15)==13) || ((u8&15)==14)) {
+                if(((u8&15)==3) || ((u8&15)==7) || ((u8&15)==8) || ((u8&15)==9) || ((u8&15)==10) || ((u8&15)==12)) {
                     VFMAXQS(q0, v2, v1);    // propagate NAN
                     VFCMEQQS(((u8&15)==7)?v0:q0, q0, q0);   // 0 if NAN, 1 if not NAN
                 }
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
index 3d187ebc..119e2bce 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f.c
@@ -416,10 +416,8 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             }
             if(vex.l) {
                 GETEY(v1);
-                if(BOX64ENV(dynarec_fastround)==2) {
-                    FCVTXN2(v0, v1);
-                } else {
-                    FCVTN2(v0, v1);
+                FCVTXN2(v0, v1);
+                if(BOX64ENV(dynarec_fastround)<2) {
                     x87_restoreround(dyn, ninst, u8);
                 }
             }
@@ -435,7 +433,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
                 MOVI_32_lsl(d1, 0x80, 3);
             }
             for(int l=0; l<1+vex.l; ++l) {
-                if(!l) { GETEX_Y(v1, 0, 0); GETGX_empty(v0); } else { GETGY_empty_EY(v0, v1); }
+                if(!l) { GETGX_empty_EX(v0, v1, 0); } else { GETGY_empty_EY(v0, v1); }
                 if(BOX64ENV(dynarec_fastround)) {
                     VFRINTISQ(v0, v1);
                     VFCVTZSQS(v0, v0);
@@ -448,8 +446,12 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
                         for(int i=0; i<4; ++i) {
                             BFCx(x5, FPSR_IOC, 1);  // reset IOC bits
                             MSR_fpsr(x5);
-                            VMOVeS(d0, 0, v1, i);
-                            FRINTIS(d0, d0);
+                            if(i) {
+                                VMOVeS(d0, 0, v1, i);
+                                FRINTIS(d0, d0);
+                            } else {
+                                FRINTIS(d0, v1);
+                            }
                             VFCVTZSs(d0, d0);
                             MRS_fpsr(x5);   // get back FPSR to check the IOC bit
                             TSTw_mask(x5, 0, 0);    // mask=(1<<IOC)
@@ -1203,7 +1205,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             q0 = fpu_get_scratch(dyn, ninst);
             for(int l=0; l<1+vex.l; ++l) {
                 if(!l) { GETGX_empty_VXEX(v0, v2, v1, 1); u8 = F8; } else { GETGY_empty_VYEY(v0, v2, v1); }
-                if(((u8&15)==3) || ((u8&15)==7) || ((u8&15)==8) || ((u8&15)==9) || ((u8&15)==10) || ((u8&15)==12) || ((u8&15)==13) || ((u8&15)==14)) {
+                if(((u8&15)==3) || ((u8&15)==7) || ((u8&15)==8) || ((u8&15)==9) || ((u8&15)==10) || ((u8&15)==12)) {
                     VFMAXQD(q0, v2, v1);    // propagate NAN
                     VFCMEQQD(((u8&15)==7)?v0:q0, q0, q0);   // 0 if NAN, 1 if not NAN
                 }
@@ -1629,28 +1631,35 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
                     else
                         SQXTN2_32(v0, d0);   // convert int64 -> int32 with saturation in higher part
                 } else {
-                    if(!l) {
-                        MRS_fpsr(x5);
-                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                        MSR_fpsr(x5);
-                        ORRw_mask(x4, xZR, 1, 0);    //0x80000000
-                        d0 = fpu_get_scratch(dyn, ninst);
-                    }
-                    for(int i=0; i<2; ++i) {
-                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                        MSR_fpsr(x5);
-                        if(i) {
-                            VMOVeD(d0, 0, v1, i);
-                            FCVTZSwD(x1, d0);
-                        } else {
-                            FCVTZSwD(x1, v1);
+                    if(arm64_frintts) {
+                        VFRINT32ZDQ(l?d0:v0, v1); // handle overflow
+                        VFCVTZSQD(l?d0:v0, l?d0:v0);  // convert double -> int64
+                        if(!l)
+                            SQXTN_32(v0, v0);   // convert int64 -> int32 with saturation in lower part, RaZ high part
+                        else
+                            SQXTN2_32(v0, d0);   // convert int64 -> int32 with saturation in higher part
+                    } else {
+                        if(!l) {
+                            MRS_fpsr(x5);
+                            ORRw_mask(x4, xZR, 1, 0);    //0x80000000
+                            d0 = fpu_get_scratch(dyn, ninst);
+                        }
+                        for(int i=0; i<2; ++i) {
+                            BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                            MSR_fpsr(x5);
+                            if(i) {
+                                VMOVeD(d0, 0, v1, i);
+                                FCVTZSwD(x1, d0);
+                            } else {
+                                FCVTZSwD(x1, v1);
+                            }
+                            MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                            TSTw_mask(x5, 0, 0);    // mask = 1 = FPSR_IOC
+                            CSELx(x1, x1, x4, cEQ);
+                            VMOVQSfrom(v0, i+l*2, x1);
                         }
-                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                        TSTw_mask(x5, 0, 0);    // mask = 1 = FPSR_IOC
-                        CSELx(x1, x1, x4, cEQ);
-                        VMOVQSfrom(v0, i+l*2, x1);
+                        if(!vex.l && !l) VMOVQDfrom(v0, 1, xZR);
                     }
-                    if(!vex.l && !l) VMOVQDfrom(v0, 1, xZR);
                 }
             }
             YMM0(gd);
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
index a9429bf7..156d9243 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f.c
@@ -123,11 +123,17 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             GETGX_empty_VX(v0, v1);
             GETED(0);
             d1 = fpu_get_scratch(dyn, ninst);
+            if(BOX64ENV(dynarec_fastround)<2) {
+                u8 = sse_setround(dyn, ninst, x3, x4, x5);
+            }
             if(rex.w) {
                 SCVTFDx(d1, ed);
             } else {
                 SCVTFDw(d1, ed);
             }
+            if(BOX64ENV(dynarec_fastround)<2) {
+                x87_restoreround(dyn, ninst, u8);
+            }
             if(v0!=v1) VMOVQ(v0, v1);
             VMOVeD(v0, 0, d1, 0);
             YMM0(gd);
@@ -138,13 +144,23 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             nextop = F8;
             GETGD;
             GETEXSD(q0, 0, 0);
-            if(!BOX64ENV(dynarec_fastround)) {
+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {
                 MRS_fpsr(x5);
                 BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
                 MSR_fpsr(x5);
             }
-            FCVTZSxwD(gd, q0);
-            if(!BOX64ENV(dynarec_fastround)) {
+            if(!BOX64ENV(dynarec_fastround) && arm64_frintts) {
+                v0 = fpu_get_scratch(dyn, ninst);
+                if(rex.w) {
+                    FRINT64ZD(v0, q0);
+                } else {
+                    FRINT32ZD(v0, q0);
+                }
+                FCVTZSxwD(gd, v0);
+            } else {
+                FCVTZSxwD(gd, q0);
+            }
+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {
                 MRS_fpsr(x5);   // get back FPSR to check the IOC bit
                 TBZ_NEXT(x5, FPSR_IOC);
                 if(rex.w) {
@@ -159,17 +175,25 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             nextop = F8;
             GETGD;
             GETEXSD(q0, 0, 0);
-            if(!BOX64ENV(dynarec_fastround)) {
+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {
                 MRS_fpsr(x5);
                 BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
                 MSR_fpsr(x5);
             }
             u8 = sse_setround(dyn, ninst, x1, x2, x3);
             d1 = fpu_get_scratch(dyn, ninst);
-            FRINTID(d1, q0);
+            if(!BOX64ENV(dynarec_fastround) && arm64_frintts) {
+                if(rex.w) {
+                    FRINT64XD(d1, q0);
+                } else {
+                    FRINT32XD(d1, q0);
+                }
+            } else {
+                FRINTID(d1, q0);
+            }
             x87_restoreround(dyn, ninst, u8);
             FCVTZSxwD(gd, d1);
-            if(!BOX64ENV(dynarec_fastround)) {
+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {
                 MRS_fpsr(x5);   // get back FPSR to check the IOC bit
                 TBZ_NEXT(x5, FPSR_IOC);
                 if(rex.w) {
@@ -449,22 +473,36 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             break;
 
         case 0xC2:
-            INST_NAME("CMPSD Gx, Ex, Ib");
+            INST_NAME("VCMPSD Gx, Ex, Ib");
             nextop = F8;
             GETEXSD(v1, 0, 1);
             GETGX_empty_VX(v0, v2);
             u8 = F8;
-            FCMPD(v2, v1);
+            if(((u8&15)!=0x0b) && ((u8&15)!=0x0f)) {
+                if((u8&15)>7)
+                    FCMPD(v1, v2);
+                else
+                    FCMPD(v2, v1);
+            }
+            // TODO: create a test for this one, there might be an issue with cases 9, 10 and 13
             if(v0!=v2) VMOVQ(v0, v2);
-            switch(u8&7) {
-                case 0: CSETMx(x2, cEQ); break;   // Equal
-                case 1: CSETMx(x2, cCC); break;   // Less than
-                case 2: CSETMx(x2, cLS); break;   // Less or equal
-                case 3: CSETMx(x2, cVS); break;   // NaN
-                case 4: CSETMx(x2, cNE); break;   // Not Equal or unordered
-                case 5: CSETMx(x2, cCS); break;   // Greater or equal or unordered
-                case 6: CSETMx(x2, cHI); break;   // Greater or unordered, test inverted, N!=V so unordered or less than (inverted)
-                case 7: CSETMx(x2, cVC); break;   // not NaN
+            switch(u8&15) {
+                case 0x00: CSETMx(x2, cEQ); break;  // Equal
+                case 0x01: CSETMx(x2, cCC); break;  // Less than
+                case 0x02: CSETMx(x2, cLS); break;  // Less or equal
+                case 0x03: CSETMx(x2, cVS); break;  // NaN
+                case 0x04: CSETMx(x2, cNE); break;  // Not Equal or unordered
+                case 0x05: CSETMx(x2, cCS); break;  // Greater or equal or unordered
+                case 0x06: CSETMx(x2, cHI); break;  // Greater or unordered
+                case 0x07: CSETMx(x2, cVC); break;  // not NaN
+                case 0x08: CSETMx(x2, cEQ); CSETMx(x3, cVS); ORRx_REG(x2, x2, x3); break;  // Equal or unordered
+                case 0x09: CSETMx(x2, cHI); break;  // Less than or unordered
+                case 0x0a: CSETMx(x2, cCS); break;  // Less or equal or unordered
+                case 0x0b: MOV32w(x2, 0); break;    // false
+                case 0x0c: CSETMw(x2, cNE); CSETMx(x3, cVS); BICx(x2, x2, x3); break;  // Not Equal not unordered
+                case 0x0d: CSETMw(x2, cLS); break;  // Greater or equal not unordered
+                case 0x0e: CSETMw(x2, cCC); break;  // Greater not unordered
+                case 0x0f: MOV64x(x2, 0xffffffffffffffffLL); break; // true
             }
             VMOVQDfrom(v0, 0, x2);
             YMM0(gd);
@@ -515,28 +553,35 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
                     else
                         SQXTN2_32(v0, d0);   // convert int64 -> int32 with saturation in higher part
                 } else {
-                    if(!l) {
-                        MRS_fpsr(x5);
-                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                        MSR_fpsr(x5);
-                        ORRw_mask(x4, xZR, 1, 0);    //0x80000000
-                    }
-                    for(int i=0; i<2; ++i) {
-                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                        MSR_fpsr(x5);
-                        if(i) {
-                            VMOVeD(d0, 0, v1, i);
-                            FRINTID(d0, d0);
-                        } else {
-                            FRINTID(d0, v1);
+                    if(arm64_frintts) {
+                        VFRINT32XDQ(l?d0:v0, v1);    // round, handling of overflow and Nan to 0x80000000
+                        VFCVTNSQD(l?d0:v0, l?d0:v0);  // convert double -> int64
+                        if(!l)
+                            SQXTN_32(v0, v0);   // convert int64 -> int32 with saturation in lower part, RaZ high part
+                        else
+                            SQXTN2_32(v0, d0);   // convert int64 -> int32 with saturation in higher part
+                    } else {
+                        if(!l) {
+                            MRS_fpsr(x5);
+                            ORRw_mask(x4, xZR, 1, 0);    //0x80000000
+                        }
+                        for(int i=0; i<2; ++i) {
+                            BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                            MSR_fpsr(x5);
+                            if(i) {
+                                VMOVeD(d0, 0, v1, i);
+                                FRINTID(d0, d0);
+                            } else {
+                                FRINTID(d0, v1);
+                            }
+                            FCVTZSwD(x1, d0);
+                            MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                            TSTw_mask(x5, 0, 0);    // mask = 1 = FPSR_IOC
+                            CSELx(x1, x1, x4, cEQ);
+                            VMOVQSfrom(v0, i+l*2, x1);
                         }
-                        FCVTZSwD(x1, d0);
-                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                        TSTw_mask(x5, 0, 0);    // mask = 1 = FPSR_IOC
-                        CSELx(x1, x1, x4, cEQ);
-                        VMOVQSfrom(v0, i+l*2, x1);
+                        if(!vex.l && !l) VMOVQDfrom(v0, 1, xZR);
                     }
-                    if(!vex.l && !l) VMOVQDfrom(v0, 1, xZR);
                 }
             }
             x87_restoreround(dyn, ninst, u8);
diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
index 74c8f1fc..9e523b7c 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_f3_0f.c
@@ -123,11 +123,17 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             d1 = fpu_get_scratch(dyn, ninst);
             GETGX_empty_VX(v0, v1);
             GETED(0);
+            if(BOX64ENV(dynarec_fastround)<2) {
+                u8 = sse_setround(dyn, ninst, x3, x4, x5);
+            }
             if(rex.w) {
                 SCVTFSx(d1, ed);
             } else {
                 SCVTFSw(d1, ed);
             }
+            if(BOX64ENV(dynarec_fastround)<2) {
+                x87_restoreround(dyn, ninst, u8);
+            }
             if(v0!=v1) VMOVQ(v0, v1);
             VMOVeS(v0, 0, d1, 0);
             YMM0(gd);
@@ -138,13 +144,23 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             nextop = F8;
             GETGD;
             GETEXSS(d0, 0, 0);
-            if(!BOX64ENV(dynarec_fastround)) {
+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {
                 MRS_fpsr(x5);
                 BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
                 MSR_fpsr(x5);
             }
-            FCVTZSxwS(gd, d0);
-            if(!BOX64ENV(dynarec_fastround)) {
+            if(!BOX64ENV(dynarec_fastround) && arm64_frintts) {
+                v0 = fpu_get_scratch(dyn, ninst);
+                if(rex.w) {
+                    FRINT64ZS(v0, q0);
+                } else {
+                    FRINT32ZS(v0, q0);
+                }
+                FCVTZSxwS(gd, v0);
+            } else {
+                FCVTZSxwS(gd, d0);
+            }
+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {
                 MRS_fpsr(x5);   // get back FPSR to check the IOC bit
                 TBZ_NEXT(x5, FPSR_IOC);
                 if(rex.w) {
@@ -159,17 +175,25 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             nextop = F8;
             GETGD;
             GETEXSS(q0, 0, 0);
-            if(!BOX64ENV(dynarec_fastround)) {
+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {
                 MRS_fpsr(x5);
                 BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
                 MSR_fpsr(x5);
             }
             u8 = sse_setround(dyn, ninst, x1, x2, x3);
             d1 = fpu_get_scratch(dyn, ninst);
-            FRINTIS(d1, q0);
+            if(!BOX64ENV(dynarec_fastround) && arm64_frintts) {
+                if(rex.w) {
+                    FRINT64XS(d1, q0);
+                } else {
+                    FRINT32XS(d1, q0);
+                }
+            } else {
+                FRINTIS(d1, q0);
+            }
             x87_restoreround(dyn, ninst, u8);
             FCVTZSxwS(gd, d1);
-            if(!BOX64ENV(dynarec_fastround)) {
+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {
                 MRS_fpsr(x5);   // get back FPSR to check the IOC bit
                 TBZ_NEXT(x5, FPSR_IOC);
                 if(rex.w) {
@@ -287,15 +311,10 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
             d1 = fpu_get_scratch(dyn, ninst);
             GETEXSS(v1, 0, 0);
             GETGX_empty_VX(v0, v2);
-            if(v0!=v2) {
-                if(v0==v1)  {
-                    VMOV(d1, v1);
-                    v1 = d1;
-                }
-                VMOVQ(v0, v2);
-            }
             FCVT_D_S(d1, v1);
             VMOVeD(v0, 0, d1, 0);
+            if(v0!=v2)
+                VMOVeD(v0, 1, v2, 1);
             YMM0(gd);
             break;
         case 0x5B:
@@ -311,17 +330,22 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip,
                 if(BOX64ENV(dynarec_fastround)) {
                     VFCVTZSQS(v0, v1);
                 } else {
-                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                    MSR_fpsr(x5);
-                    for(int i=0; i<4; ++i) {
+                    if(arm64_frintts) {
+                        VFRINT32ZSQ(v0, v1);
+                        VFCVTZSQS(v0, v0);
+                    } else {
                         BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
                         MSR_fpsr(x5);
-                        VMOVeS(d0, 0, v1, i);
-                        VFCVTZSs(d0, d0);
-                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                        TBZ(x5, FPSR_IOC, 4+4);
-                        VMOVQSfrom(d0, 0, x4);
-                        VMOVeS(v0, i, d0, 0);
+                        for(int i=0; i<4; ++i) {
+                            BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                            MSR_fpsr(x5);
+                            VMOVeS(d0, 0, v1, i);
+                            VFCVTZSs(d0, d0);
+                            MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                            TBZ(x5, FPSR_IOC, 4+4);
+                            VMOVQSfrom(d0, 0, x4);
+                            VMOVeS(v0, i, d0, 0);
+                        }
                     }
                 }
             }
diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c
index 919805d6..03240803 100644
--- a/src/dynarec/arm64/dynarec_arm64_f20f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f20f.c
@@ -118,13 +118,23 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETGD;

             GETEXSD(q0, 0, 0);

-            if(!BOX64ENV(dynarec_fastround)) {

+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {

                 MRS_fpsr(x5);

                 BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

                 MSR_fpsr(x5);

             }

-            FCVTZSxwD(gd, q0);

-            if(!BOX64ENV(dynarec_fastround)) {

+            if(!BOX64ENV(dynarec_fastround) && arm64_frintts) {

+                v0 = fpu_get_scratch(dyn, ninst);

+                if(rex.w) {

+                    FRINT64ZD(v0, q0);

+                } else {

+                    FRINT32ZD(v0, q0);

+                }

+                FCVTZSxwD(gd, v0);

+            } else {

+                FCVTZSxwD(gd, q0);

+            }

+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {

                 MRS_fpsr(x5);   // get back FPSR to check the IOC bit

                 TBZ_NEXT(x5, FPSR_IOC);

                 if(rex.w) {

@@ -139,17 +149,25 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETGD;

             GETEXSD(q0, 0, 0);

-            if(!BOX64ENV(dynarec_fastround)) {

+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {

                 MRS_fpsr(x5);

                 BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

                 MSR_fpsr(x5);

             }

             u8 = sse_setround(dyn, ninst, x1, x2, x3);

             d1 = fpu_get_scratch(dyn, ninst);

-            FRINTID(d1, q0);

+            if(!BOX64ENV(dynarec_fastround) && arm64_frintts) {

+                if(rex.w) {

+                    FRINT64XD(d1, q0);

+                } else {

+                    FRINT32XD(d1, q0);

+                }

+            } else {

+                FRINTID(d1, q0);

+            }

             x87_restoreround(dyn, ninst, u8);

             FCVTZSxwD(gd, d1);

-            if(!BOX64ENV(dynarec_fastround)) {

+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {

                 MRS_fpsr(x5);   // get back FPSR to check the IOC bit

                 TBZ_NEXT(x5, FPSR_IOC);

                 if(rex.w) {

@@ -549,28 +567,32 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 SQXTN_32(v0, v0);   // convert int64 -> int32 with saturation in lower part, RaZ high part

             } else {

                 u8 = sse_setround(dyn, ninst, x1, x2, x3);

-                MRS_fpsr(x5);

-                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

-                MSR_fpsr(x5);

-                ORRw_mask(x4, xZR, 1, 0);    //0x80000000

-                d0 = fpu_get_scratch(dyn, ninst);

-                for(int i=0; i<2; ++i) {

-                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

-                    MSR_fpsr(x5);

-                    if(i) {

-                        VMOVeD(d0, 0, v1, i);

-                        FRINTID(d0, d0);

-                    } else {

-                        FRINTID(d0, v1);

+                if(arm64_frintts) {

+                    VFRINT32XDQ(v0, v1);    // round, handling of overflow and Nan to 0x80000000

+                    VFCVTNSQD(v0, v0);  // convert double -> int64

+                    SQXTN_32(v0, v0);   // convert int64 -> int32 with saturation in lower part, RaZ high part

+                } else {

+                    MRS_fpsr(x5);

+                    ORRw_mask(x4, xZR, 1, 0);    //0x80000000

+                    d0 = fpu_get_scratch(dyn, ninst);

+                    for(int i=0; i<2; ++i) {

+                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                        MSR_fpsr(x5);

+                        if(i) {

+                            VMOVeD(d0, 0, v1, i);

+                            FRINTID(d0, d0);

+                        } else {

+                            FRINTID(d0, v1);

+                        }

+                        FCVTZSwD(x1, d0);

+                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit

+                        TBZ(x5, FPSR_IOC, 4+4);

+                        MOVw_REG(x1, x4);

+                        VMOVQSfrom(v0, i, x1);

                     }

-                    FCVTZSwD(x1, d0);

-                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit

-                    TBZ(x5, FPSR_IOC, 4+4);

-                    MOVw_REG(x1, x4);

-                    VMOVQSfrom(v0, i, x1);

+                    VMOVQDfrom(v0, 1, xZR);

                 }

                 x87_restoreround(dyn, ninst, u8);

-                VMOVQDfrom(v0, 1, xZR);

             }

             break;

 

diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c
index 28e01f26..f7e0bf3f 100644
--- a/src/dynarec/arm64/dynarec_arm64_f30f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f30f.c
@@ -116,11 +116,17 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETGX(v0, 1);

             GETED(0);

             d1 = fpu_get_scratch(dyn, ninst);

+            if(BOX64ENV(dynarec_fastround)<2) {

+                u8 = sse_setround(dyn, ninst, x3, x4, x5);

+            }

             if(rex.w) {

                 SCVTFSx(d1, ed);

             } else {

                 SCVTFSw(d1, ed);

             }

+            if(BOX64ENV(dynarec_fastround)<2) {

+                x87_restoreround(dyn, ninst, u8);

+            }

             VMOVeS(v0, 0, d1, 0);

             break;

 

@@ -129,13 +135,23 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETGD;

             GETEXSS(d0, 0, 0);

-            if(!BOX64ENV(dynarec_fastround)) {

+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {

                 MRS_fpsr(x5);

                 BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

                 MSR_fpsr(x5);

             }

-            FCVTZSxwS(gd, d0);

-            if(!BOX64ENV(dynarec_fastround)) {

+            if(!BOX64ENV(dynarec_fastround) && arm64_frintts) {

+                v0 = fpu_get_scratch(dyn, ninst);

+                if(rex.w) {

+                    FRINT64ZS(v0, q0);

+                } else {

+                    FRINT32ZS(v0, q0);

+                }

+                FCVTZSxwS(gd, v0);

+            } else {

+                FCVTZSxwS(gd, d0);

+            }

+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {

                 MRS_fpsr(x5);   // get back FPSR to check the IOC bit

                 TBZ_NEXT(x5, FPSR_IOC);

                 if(rex.w) {

@@ -150,17 +166,25 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETGD;

             GETEXSS(q0, 0, 0);

-            if(!BOX64ENV(dynarec_fastround)) {

+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {

                 MRS_fpsr(x5);

                 BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

                 MSR_fpsr(x5);

             }

             u8 = sse_setround(dyn, ninst, x1, x2, x3);

             d1 = fpu_get_scratch(dyn, ninst);

-            FRINTIS(d1, q0);

+            if(!BOX64ENV(dynarec_fastround) && arm64_frintts) {

+                if(rex.w) {

+                    FRINT64XS(d1, q0);

+                } else {

+                    FRINT32XS(d1, q0);

+                }

+            } else {

+                FRINTIS(d1, q0);

+            }

             x87_restoreround(dyn, ninst, u8);

             FCVTZSxwS(gd, d1);

-            if(!BOX64ENV(dynarec_fastround)) {

+            if(!BOX64ENV(dynarec_fastround) && !arm64_frintts) {

                 MRS_fpsr(x5);   // get back FPSR to check the IOC bit

                 TBZ_NEXT(x5, FPSR_IOC);

                 if(rex.w) {

@@ -302,20 +326,23 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             if(BOX64ENV(dynarec_fastround)) {

                 VFCVTZSQS(v0, v1);

             } else {

-                MRS_fpsr(x5);

-                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

-                MSR_fpsr(x5);

-                ORRw_mask(x4, xZR, 1, 0);    //0x80000000

-                d0 = fpu_get_scratch(dyn, ninst);

-                for(int i=0; i<4; ++i) {

-                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

-                    MSR_fpsr(x5);

-                    VMOVeS(d0, 0, v1, i);

-                    VFCVTZSs(d0, d0);

-                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit

-                    TBZ(x5, FPSR_IOC, 4+4);

-                    VMOVQSfrom(d0, 0, x4);

-                    VMOVeS(v0, i, d0, 0);

+                if(arm64_frintts) {

+                    VFRINT32ZSQ(v0, v1);

+                    VFCVTZSQS(v0, v0);

+                } else {

+                    MRS_fpsr(x5);

+                    ORRw_mask(x4, xZR, 1, 0);    //0x80000000

+                    d0 = fpu_get_scratch(dyn, ninst);

+                    for(int i=0; i<4; ++i) {

+                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                        MSR_fpsr(x5);

+                        VMOVeS(d0, 0, v1, i);

+                        VFCVTZSs(d0, d0);

+                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit

+                        TBZ(x5, FPSR_IOC, 4+4);

+                        VMOVQSfrom(d0, 0, x4);

+                        VMOVeS(v0, i, d0, 0);

+                    }

                 }

             }

             break;