about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_d8.c49
-rw-r--r--src/dynarec/arm64/dynarec_arm64_d9.c48
-rw-r--r--src/dynarec/arm64/dynarec_arm64_da.c25
-rw-r--r--src/dynarec/arm64/dynarec_arm64_dc.c49
-rw-r--r--src/dynarec/arm64/dynarec_arm64_de.c25
-rw-r--r--src/dynarec/dynarec_native_functions.c16
6 files changed, 203 insertions, 9 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_d8.c b/src/dynarec/arm64/dynarec_arm64_d8.c
index e8f002a4..826cad7d 100644
--- a/src/dynarec/arm64/dynarec_arm64_d8.c
+++ b/src/dynarec/arm64/dynarec_arm64_d8.c
@@ -29,6 +29,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
 
     uint8_t nextop = F8;
     uint8_t ed;
+    uint8_t u8;
     int64_t fixedaddress;
     int unscaled;
     int v1, v2;
@@ -51,11 +52,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FADD ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FADDS(v1, v1, v2);
             } else {
                 FADDD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         case 0xC8:
         case 0xC9:
@@ -68,11 +73,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FMUL ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FMULS(v1, v1, v2);
             } else {
                 FMULD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         case 0xD0:
         case 0xD1:
@@ -122,11 +131,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FSUB ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FSUBS(v1, v1, v2);
             } else {
                 FSUBD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         case 0xE8:
         case 0xE9:
@@ -139,11 +152,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FSUBR ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FSUBS(v1, v2, v1);
             } else {
                 FSUBD(v1, v2, v1);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         case 0xF0:
         case 0xF1:
@@ -156,11 +173,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FDIV ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FDIVS(v1, v1, v2);
             } else {
                 FDIVD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         case 0xF8:
         case 0xF9:
@@ -173,11 +194,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FDIVR ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FDIVS(v1, v2, v1);
             } else {
                 FDIVD(v1, v2, v1);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         default:
             DEFAULT;
@@ -190,12 +215,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 s0 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
                 VLD32(s0, ed, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 if(ST_IS_F(0)) {
                     FADDS(v1, v1, s0);
                 } else {
                     FCVT_D_S(s0, s0);
                     FADDD(v1, v1, s0);
                 }
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 1:
                 INST_NAME("FMUL ST0, float[ED]");
@@ -203,12 +232,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 s0 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
                 VLD32(s0, ed, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 if(ST_IS_F(0)) {
                     FMULS(v1, v1, s0);
                 } else {
                     FCVT_D_S(s0, s0);
                     FMULD(v1, v1, s0);
                 }
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 2:
                 INST_NAME("FCOM ST0, float[ED]");
@@ -245,12 +278,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 s0 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
                 VLD32(s0, ed, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 if(ST_IS_F(0)) {
                     FSUBS(v1, v1, s0);
                 } else {
                     FCVT_D_S(s0, s0);
                     FSUBD(v1, v1, s0);
                 }
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 5:
                 INST_NAME("FSUBR ST0, float[ED]");
@@ -258,12 +295,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 s0 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
                 VLD32(s0, ed, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 if(ST_IS_F(0)) {
                     FSUBS(v1, s0, v1);
                 } else {
                     FCVT_D_S(s0, s0);
                     FSUBD(v1, s0, v1);
                 }
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 6:
                 INST_NAME("FDIV ST0, float[ED]");
@@ -271,12 +312,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 s0 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
                 VLD32(s0, ed, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 if(ST_IS_F(0)) {
                     FDIVS(v1, v1, s0);
                 } else {
                     FCVT_D_S(s0, s0);
                     FDIVD(v1, v1, s0);
                 }
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 7:
                 INST_NAME("FDIVR ST0, float[ED]");
@@ -284,12 +329,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 s0 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
                 VLD32(s0, ed, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 if(ST_IS_F(0)) {
                     FDIVS(v1, s0, v1);
                 } else {
                     FCVT_D_S(s0, s0);
                     FDIVD(v1, s0, v1);
                 }
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             default:
                 DEFAULT;
diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c
index 378070a0..c1961a01 100644
--- a/src/dynarec/arm64/dynarec_arm64_d9.c
+++ b/src/dynarec/arm64/dynarec_arm64_d9.c
@@ -291,8 +291,12 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
-            CALL(native_ftan, -1);
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
+            CALL_(native_ftan, -1, box64_dynarec_fastround ? 0 : u8);
             x87_unstackcount(dyn, ninst, x1, i1);
+           if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             if(PK(0)==0xdd && PK(1)==0xd8) {
                 MESSAGE(LOG_DUMP, "Optimized next DD D8 fstp st0, st0, not emitting 1\n");
                 u8 = F8;
@@ -312,7 +316,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
-            CALL(native_fpatan, -1);
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
+            CALL_(native_fpatan, -1, box64_dynarec_fastround ? 0 : u8);
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x1, i1);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
@@ -418,11 +426,15 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0xFA:
             INST_NAME("FSQRT");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FSQRTS(v1, v1);
             } else {
                 FSQRTD(v1, v1);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         case 0xFB:
             INST_NAME("FSINCOS");
@@ -430,7 +442,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0);
             i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 1);
-            CALL(native_fsincos, -1);
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
+            CALL_(native_fsincos, -1, box64_dynarec_fastround ? 0 : u8);
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x1, i1);
             break;
         case 0xFC:
@@ -457,7 +473,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
-            CALL(native_fscale, -1);
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
+            CALL_(native_fscale, -1, box64_dynarec_fastround ? 0 : u8);
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x1, i1);
             break;
         case 0xFE:
@@ -465,7 +485,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
-            CALL(native_fsin, -1);
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
+            CALL_(native_fsin, -1, box64_dynarec_fastround ? 0 : u8);
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x1, i1);
             break;
         case 0xFF:
@@ -473,7 +497,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
-            CALL(native_fcos, -1);
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
+            CALL_(native_fcos, -1, box64_dynarec_fastround ? 0 : u8);
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x1, i1);
             break;
         default:
@@ -497,7 +525,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     s0 = v1;
                 else {
                     s0 = fpu_get_scratch(dyn, ninst);
+                    if(!box64_dynarec_fastround)
+                        u8 = x87_setround(dyn, ninst, x1, x2, x4);
                     FCVT_S_D(s0, v1);
+                    if(!box64_dynarec_fastround)
+                        x87_restoreround(dyn, ninst, u8);
                 }
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
                 VST32(s0, ed, fixedaddress);
@@ -507,7 +539,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
                 if(!ST_IS_F(0)) {
+                    if(!box64_dynarec_fastround)
+                        u8 = x87_setround(dyn, ninst, x1, x5, x4);
                     FCVT_S_D(v1, v1);
+                    if(!box64_dynarec_fastround)
+                        x87_restoreround(dyn, ninst, u8);
                 }
                 VST32(v1, ed, fixedaddress);
                 X87_POP_OR_FAIL(dyn, ninst, x3);
diff --git a/src/dynarec/arm64/dynarec_arm64_da.c b/src/dynarec/arm64/dynarec_arm64_da.c
index 52965b3f..6e4bb528 100644
--- a/src/dynarec/arm64/dynarec_arm64_da.c
+++ b/src/dynarec/arm64/dynarec_arm64_da.c
@@ -29,6 +29,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     int64_t j64;
     uint8_t ed;
     uint8_t wback;
+    uint8_t u8;
     int v1, v2;
     int d0;
     int s0;
@@ -148,7 +149,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VLD32(v2, ed, fixedaddress);
                 SXTL_32(v2, v2);    // i32 -> i64
                 SCVTFDD(v2, v2);    // i64 -> double
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FADDD(v1, v1, v2);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 1:
                 INST_NAME("FIMUL ST0, Ed");
@@ -158,7 +163,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VLD32(v2, ed, fixedaddress);
                 SXTL_32(v2, v2);    // i32 -> i64
                 SCVTFDD(v2, v2);    // i64 -> double
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FMULD(v1, v1, v2);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 2:
                 INST_NAME("FICOM ST0, Ed");
@@ -191,7 +200,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VLD32(v2, ed, fixedaddress);
                 SXTL_32(v2, v2);    // i32 -> i64
                 SCVTFDD(v2, v2);    // i64 -> double
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FSUBD(v1, v1, v2);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 5:
                 INST_NAME("FISUBR ST0, Ed");
@@ -201,7 +214,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VLD32(v2, ed, fixedaddress);
                 SXTL_32(v2, v2);    // i32 -> i64
                 SCVTFDD(v2, v2);    // i64 -> double
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FSUBD(v1, v2, v1);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 6:
                 INST_NAME("FIDIV ST0, Ed");
@@ -211,7 +228,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VLD32(v2, ed, fixedaddress);
                 SXTL_32(v2, v2);    // i32 -> i64
                 SCVTFDD(v2, v2);    // i64 -> double
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FDIVD(v1, v1, v2);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 7:
                 INST_NAME("FIDIVR ST0, Ed");
@@ -221,7 +242,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VLD32(v2, ed, fixedaddress);
                 SXTL_32(v2, v2);    // i32 -> i64
                 SCVTFDD(v2, v2);    // i64 -> double
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FDIVD(v1, v2, v1);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
         }
     return addr;
diff --git a/src/dynarec/arm64/dynarec_arm64_dc.c b/src/dynarec/arm64/dynarec_arm64_dc.c
index ee5b2c62..2fbac9d8 100644
--- a/src/dynarec/arm64/dynarec_arm64_dc.c
+++ b/src/dynarec/arm64/dynarec_arm64_dc.c
@@ -29,6 +29,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
 
     uint8_t nextop = F8;
     uint8_t wback;
+    uint8_t u8;
     int64_t fixedaddress;
     int unscaled;
     int v1, v2;
@@ -49,11 +50,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FADD STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FADDS(v1, v1, v2);
             } else {
                 FADDD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         case 0xC8:
         case 0xC9:
@@ -66,11 +71,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FMUL STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FMULS(v1, v1, v2);
             } else {
                 FMULD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         case 0xD0:
         case 0xD1:
@@ -120,11 +129,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FSUBR STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FSUBS(v1, v2, v1);
             } else {
                 FSUBD(v1, v2, v1);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         case 0xE8:
         case 0xE9:
@@ -137,11 +150,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FSUB STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FSUBS(v1, v1, v2);
             } else {
                 FSUBD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         case 0xF0:
         case 0xF1:
@@ -154,11 +171,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FDIVR STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FDIVS(v1, v2, v1);
             } else {
                 FDIVD(v1, v2, v1);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         case 0xF8:
         case 0xF9:
@@ -171,11 +192,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FDIV STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FDIVS(v1, v1, v2);
             } else {
                 FDIVD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             break;
         default:
             DEFAULT;
@@ -188,7 +213,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 v2 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
                 VLD64(v2, wback, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FADDD(v1, v1, v2);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 1:
                 INST_NAME("FMUL ST0, double[ED]");
@@ -196,7 +225,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 v2 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
                 VLD64(v2, wback, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FMULD(v1, v1, v2);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 2:
                 INST_NAME("FCOM ST0, double[ED]");
@@ -223,7 +256,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 v2 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
                 VLD64(v2, wback, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FSUBD(v1, v1, v2);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 5:
                 INST_NAME("FSUBR ST0, double[ED]");
@@ -231,7 +268,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 v2 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
                 VLD64(v2, wback, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FSUBD(v1, v2, v1);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 6:
                 INST_NAME("FDIV ST0, double[ED]");
@@ -239,7 +280,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 v2 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
                 VLD64(v2, wback, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FDIVD(v1, v1, v2);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
             case 7:
                 INST_NAME("FDIVR ST0, double[ED]");
@@ -247,7 +292,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 v2 = fpu_get_scratch(dyn, ninst);
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
                 VLD64(v2, wback, fixedaddress);
+                if(!box64_dynarec_fastround)
+                    u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FDIVD(v1, v2, v1);
+                if(!box64_dynarec_fastround)
+                    x87_restoreround(dyn, ninst, u8);
                 break;
         }
     return addr;
diff --git a/src/dynarec/arm64/dynarec_arm64_de.c b/src/dynarec/arm64/dynarec_arm64_de.c
index 7dbbb210..8dcc16c2 100644
--- a/src/dynarec/arm64/dynarec_arm64_de.c
+++ b/src/dynarec/arm64/dynarec_arm64_de.c
@@ -29,6 +29,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
 
     uint8_t nextop = F8;
     uint8_t wback;
+    uint8_t u8;
     int64_t fixedaddress;
     int unscaled;
     int v1, v2;
@@ -49,11 +50,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FADDP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FADDS(v1, v1, v2);
             } else {
                 FADDD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xC8:
@@ -67,11 +72,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FMULP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FMULS(v1, v1, v2);
             } else {
                 FMULD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xD0:
@@ -117,11 +126,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FSUBRP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FSUBS(v1, v2, v1);
             } else {
                 FSUBD(v1, v2, v1);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xE8:
@@ -135,11 +148,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FSUBP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FSUBS(v1, v1, v2);
             } else {
                 FSUBD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xF0:
@@ -153,11 +170,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FDIVRP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FDIVS(v1, v2, v1);
             } else {
                 FDIVD(v1, v2, v1);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xF8:
@@ -171,11 +192,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("FDIVP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7));
+            if(!box64_dynarec_fastround)
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
             if(ST_IS_F(0)) {
                 FDIVS(v1, v1, v2);
             } else {
                 FDIVD(v1, v1, v2);
             }
+            if(!box64_dynarec_fastround)
+                x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         default:
diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c
index 683d8256..18082d82 100644
--- a/src/dynarec/dynarec_native_functions.c
+++ b/src/dynarec/dynarec_native_functions.c
@@ -42,7 +42,7 @@ void native_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n)
 
 void native_f2xm1(x64emu_t* emu)
 {
-    ST0.d = exp2(ST0.d) - 1.0;
+    ST0.d = expm1(LN2 * ST0.d);
 }
 void native_fyl2x(x64emu_t* emu)
 {
@@ -50,11 +50,14 @@ void native_fyl2x(x64emu_t* emu)
 }
 void native_ftan(x64emu_t* emu)
 {
+#pragma STDC FENV_ACCESS ON
+    // seems that tan of glib doesn't follow the rounding direction mode
     ST0.d = tan(ST0.d);
     emu->sw.f.F87_C2 = 0;
 }
 void native_fpatan(x64emu_t* emu)
 {
+#pragma STDC FENV_ACCESS ON
     ST1.d = atan2(ST1.d, ST0.d);
 }
 void native_fxtract(x64emu_t* emu)
@@ -97,10 +100,12 @@ void native_fprem(x64emu_t* emu)
 }
 void native_fyl2xp1(x64emu_t* emu)
 {
-    ST(1).d = log2(ST0.d + 1.0)*ST(1).d;
+    ST(1).d = log1p(ST0.d)*ST(1).d/LN2;
 }
 void native_fsincos(x64emu_t* emu)
 {
+#pragma STDC FENV_ACCESS ON
+    // seems that sincos of glib doesn't follow the rounding direction mode
     sincos(ST1.d, &ST1.d, &ST0.d);
     emu->sw.f.F87_C2 = 0;
 }
@@ -110,16 +115,21 @@ void native_frndint(x64emu_t* emu)
 }
 void native_fscale(x64emu_t* emu)
 {
+#pragma STDC FENV_ACCESS ON
     if(ST0.d!=0.0)
-        ST0.d *= exp2(trunc(ST1.d));
+        ST0.d = ldexp(ST0.d, trunc(ST1.d));
 }
 void native_fsin(x64emu_t* emu)
 {
+#pragma STDC FENV_ACCESS ON
+    // seems that sin of glib doesn't follow the rounding direction mode
     ST0.d = sin(ST0.d);
     emu->sw.f.F87_C2 = 0;
 }
 void native_fcos(x64emu_t* emu)
 {
+#pragma STDC FENV_ACCESS ON
+    // seems that cos of glib doesn't follow the rounding direction mode
     ST0.d = cos(ST0.d);
     emu->sw.f.F87_C2 = 0;
 }