about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-03-13 17:37:22 +0800
committerGitHub <noreply@github.com>2025-03-13 10:37:22 +0100
commit1793c2a18292d3f3a1c1ba0fcdcd738b43997a2e (patch)
tree81200c7b95c92cba701b60f4789380ee215d5c36 /src
parentb72002703e707e86d712903f5278c4248fddf8a5 (diff)
downloadbox64-1793c2a18292d3f3a1c1ba0fcdcd738b43997a2e.tar.gz
box64-1793c2a18292d3f3a1c1ba0fcdcd738b43997a2e.zip
[RV64_DYNAREC] Fixed some x87 rounding cases for fastround=0 (#2437)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_d8.c24
-rw-r--r--src/dynarec/rv64/dynarec_rv64_d9.c30
-rw-r--r--src/dynarec/rv64/dynarec_rv64_da.c13
-rw-r--r--src/dynarec/rv64/dynarec_rv64_dc.c25
-rw-r--r--src/dynarec/rv64/dynarec_rv64_de.c15
-rw-r--r--src/dynarec/rv64/dynarec_rv64_df.c2
-rw-r--r--src/dynarec/rv64/rv64_emitter.h24
7 files changed, 114 insertions, 19 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_d8.c b/src/dynarec/rv64/dynarec_rv64_d8.c
index 1af3cfa0..94503ea9 100644
--- a/src/dynarec/rv64/dynarec_rv64_d8.c
+++ b/src/dynarec/rv64/dynarec_rv64_d8.c
@@ -48,21 +48,25 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("FADD ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
             if (ST_IS_F(0)) {
                 FADDS(v1, v1, v2);
             } else {
                 FADDD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         case 0xC8 ... 0xCF:
             INST_NAME("FMUL ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
             if (ST_IS_F(0)) {
                 FMULS(v1, v1, v2);
             } else {
                 FMULD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         case 0xD0 ... 0xD7:
             INST_NAME("FCOM ST0, STx");
@@ -89,41 +93,49 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("FSUB ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
             if (ST_IS_F(0)) {
                 FSUBS(v1, v1, v2);
             } else {
                 FSUBD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         case 0xE8 ... 0xEF:
             INST_NAME("FSUBR ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
             if (ST_IS_F(0)) {
                 FSUBS(v1, v2, v1);
             } else {
                 FSUBD(v1, v2, v1);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         case 0xF0 ... 0xF7:
             INST_NAME("FDIV ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
             if (ST_IS_F(0)) {
                 FDIVS(v1, v1, v2);
             } else {
                 FDIVD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         case 0xF8 ... 0xFF:
             INST_NAME("FDIVR ST0, STx");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
             if (ST_IS_F(0)) {
                 FDIVS(v1, v2, v1);
             } else {
                 FDIVD(v1, v2, v1);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
 
         default:
@@ -134,12 +146,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     s0 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLW(s0, ed, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
                     if (ST_IS_F(0)) {
                         FADDS(v1, v1, s0);
                     } else {
                         FCVTDS(s0, s0);
                         FADDD(v1, v1, s0);
                     }
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 1:
                     INST_NAME("FMUL ST0, float[ED]");
@@ -147,12 +161,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     s0 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLW(s0, ed, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
                     if (ST_IS_F(0)) {
                         FMULS(v1, v1, s0);
                     } else {
                         FCVTDS(s0, s0);
                         FMULD(v1, v1, s0);
                     }
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 2:
                     INST_NAME("FCOM ST0, float[ED]");
@@ -187,12 +203,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     s0 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLW(s0, ed, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
                     if (ST_IS_F(0)) {
                         FSUBS(v1, v1, s0);
                     } else {
                         FCVTDS(s0, s0);
                         FSUBD(v1, v1, s0);
                     }
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 5:
                     INST_NAME("FSUBR ST0, float[ED]");
@@ -200,12 +218,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     s0 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLW(s0, ed, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
                     if (ST_IS_F(0)) {
                         FSUBS(v1, s0, v1);
                     } else {
                         FCVTDS(s0, s0);
                         FSUBD(v1, s0, v1);
                     }
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 6:
                     INST_NAME("FDIV ST0, float[ED]");
@@ -213,12 +233,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     s0 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLW(s0, ed, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
                     if (ST_IS_F(0)) {
                         FDIVS(v1, v1, s0);
                     } else {
                         FCVTDS(s0, s0);
                         FDIVD(v1, v1, s0);
                     }
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 7:
                     INST_NAME("FDIVR ST0, float[ED]");
@@ -226,12 +248,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     s0 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLW(s0, ed, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
                     if (ST_IS_F(0)) {
                         FDIVS(v1, s0, v1);
                     } else {
                         FCVTDS(s0, s0);
                         FDIVD(v1, s0, v1);
                     }
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
             }
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c
index 02cf39a5..51321ab9 100644
--- a/src/dynarec/rv64/dynarec_rv64_d9.c
+++ b/src/dynarec/rv64/dynarec_rv64_d9.c
@@ -284,7 +284,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_ftan, -1, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+            CALL_(native_ftan, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x3, s0);
             X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F);
             if (ST_IS_F(0)) {
@@ -301,7 +303,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fpatan, -1, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+            CALL_(native_fpatan, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x3, s0);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
@@ -361,11 +365,13 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0xFA:
             INST_NAME("FSQRT");
             v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
             if (ST_IS_F(0)) {
                 FSQRTS(v1, v1);
             } else {
                 FSQRTD(v1, v1);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         case 0xFB:
             INST_NAME("FSINCOS");
@@ -373,7 +379,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fsincos, -1, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+            CALL_(native_fsincos, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xFC:
@@ -422,7 +430,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fscale, -1, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+            CALL_(native_fscale, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xFE:
@@ -430,7 +440,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fsin, -1, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+            CALL_(native_fsin, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xFF:
@@ -438,7 +450,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fcos, -1, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+            CALL_(native_fcos, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0);
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
 
@@ -474,7 +488,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         s0 = v1;
                     else {
                         s0 = fpu_get_scratch(dyn);
+                        if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
                         FCVTSD(s0, v1);
+                        if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     }
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FSW(s0, ed, fixedaddress);
@@ -484,7 +500,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     if (!ST_IS_F(0)) {
+                        if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
                         FCVTSD(v1, v1);
+                        if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     }
                     FSW(v1, ed, fixedaddress);
                     X87_POP_OR_FAIL(dyn, ninst, x3);
diff --git a/src/dynarec/rv64/dynarec_rv64_da.c b/src/dynarec/rv64/dynarec_rv64_da.c
index b1d2042b..7609d877 100644
--- a/src/dynarec/rv64/dynarec_rv64_da.c
+++ b/src/dynarec/rv64/dynarec_rv64_da.c
@@ -28,6 +28,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     uint8_t nextop = F8;
     int64_t j64;
     uint8_t ed;
+    uint8_t u8;
     uint8_t wback;
     int v1, v2;
     int d0;
@@ -153,7 +154,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     LW(x1, ed, fixedaddress);
                     FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FADDD(v1, v1, v2);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 1:
                     INST_NAME("FIMUL ST0, Ed");
@@ -162,7 +165,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     LW(x1, ed, fixedaddress);
                     FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FMULD(v1, v1, v2);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 2:
                     INST_NAME("FICOM ST0, Ed");
@@ -190,7 +195,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     LW(x1, ed, fixedaddress);
                     FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FSUBD(v1, v1, v2);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 5:
                     INST_NAME("FISUBR ST0, Ed");
@@ -199,7 +206,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     LW(x1, ed, fixedaddress);
                     FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FSUBD(v1, v2, v1);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 6:
                     INST_NAME("FIDIV ST0, Ed");
@@ -208,7 +217,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     LW(x1, ed, fixedaddress);
                     FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FDIVD(v1, v1, v2);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 7:
                     INST_NAME("FIDIVR ST0, Ed");
@@ -217,7 +228,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     LW(x1, ed, fixedaddress);
                     FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FDIVD(v1, v2, v1);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
             }
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_dc.c b/src/dynarec/rv64/dynarec_rv64_dc.c
index 955438ff..309abde8 100644
--- a/src/dynarec/rv64/dynarec_rv64_dc.c
+++ b/src/dynarec/rv64/dynarec_rv64_dc.c
@@ -31,6 +31,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
     uint8_t nextop = F8;
     uint8_t wback;
+    uint8_t u8;
     int64_t fixedaddress;
     int unscaled;
     int v1, v2;
@@ -43,21 +44,25 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("FADD STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FADDS(v1, v1, v2);
             } else {
                 FADDD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         case 0xC8 ... 0xCF:
             INST_NAME("FMUL STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FMULS(v1, v1, v2);
             } else {
                 FMULD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         case 0xD0 ... 0xD7:
             INST_NAME("FCOM ST0, STx"); // yep
@@ -84,41 +89,49 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("FSUBR STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FSUBS(v1, v2, v1);
             } else {
                 FSUBD(v1, v2, v1);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         case 0xE8 ... 0xEF:
             INST_NAME("FSUB STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FSUBS(v1, v1, v2);
             } else {
                 FSUBD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         case 0xF0 ... 0xF7:
             INST_NAME("FDIVR STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FDIVS(v1, v2, v1);
             } else {
                 FDIVD(v1, v2, v1);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         case 0xF8 ... 0xFF:
             INST_NAME("FDIV STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FDIVS(v1, v1, v2);
             } else {
                 FDIVD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             break;
         default:
             switch ((nextop >> 3) & 7) {
@@ -128,7 +141,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     v2 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLD(v2, wback, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FADDD(v1, v1, v2);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 1:
                     INST_NAME("FMUL ST0, double[ED]");
@@ -136,7 +151,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     v2 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLD(v2, wback, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FMULD(v1, v1, v2);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 2:
                     INST_NAME("FCOM ST0, double[ED]");
@@ -161,7 +178,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     v2 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLD(v2, wback, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FSUBD(v1, v1, v2);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 5:
                     INST_NAME("FSUBR ST0, double[ED]");
@@ -169,7 +188,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     v2 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLD(v2, wback, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FSUBD(v1, v2, v1);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 6:
                     INST_NAME("FDIV ST0, double[ED]");
@@ -177,7 +198,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     v2 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLD(v2, wback, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FDIVD(v1, v1, v2);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
                 case 7:
                     INST_NAME("FDIVR ST0, double[ED]");
@@ -185,7 +208,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     v2 = fpu_get_scratch(dyn);
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                     FLD(v2, wback, fixedaddress);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
                     FDIVD(v1, v2, v1);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
                     break;
             }
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_de.c b/src/dynarec/rv64/dynarec_rv64_de.c
index 5613a3eb..91a7b0c5 100644
--- a/src/dynarec/rv64/dynarec_rv64_de.c
+++ b/src/dynarec/rv64/dynarec_rv64_de.c
@@ -31,6 +31,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
     uint8_t nextop = F8;
     uint8_t wback;
+    uint8_t u8;
     int64_t fixedaddress;
     int v1, v2;
 
@@ -49,11 +50,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("FADDP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FADDS(v1, v1, v2);
             } else {
                 FADDD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xC8:
@@ -67,11 +70,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("FMULP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FMULS(v1, v1, v2);
             } else {
                 FMULD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xD0:
@@ -90,6 +95,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             } else {
                 FCOMD(v1, v2, x1, x2, x3, x4, x5);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xD9:
@@ -101,6 +107,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             } else {
                 FCOMD(v1, v2, x1, x2, x3, x4, x5);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
@@ -115,11 +122,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("FSUBRP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FSUBS(v1, v2, v1);
             } else {
                 FSUBD(v1, v2, v1);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xE8:
@@ -133,11 +142,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("FSUBP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FSUBS(v1, v1, v2);
             } else {
                 FSUBD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xF0:
@@ -151,11 +162,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("FDIVRP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FDIVS(v1, v2, v1);
             } else {
                 FDIVD(v1, v2, v1);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xF8:
@@ -169,11 +182,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("FDIVP STx, ST0");
             v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
             v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
             if (ST_IS_F(0)) {
                 FDIVS(v1, v1, v2);
             } else {
                 FDIVD(v1, v1, v2);
             }
+            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
         case 0xD8:
diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c
index 9d277713..cbb75923 100644
--- a/src/dynarec/rv64/dynarec_rv64_df.c
+++ b/src/dynarec/rv64/dynarec_rv64_df.c
@@ -242,7 +242,7 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             ADD(x5, x5, x4);
                             SD(x1, x5, 8); // ll
                         }
-                        FCVTDL(v1, x1, RD_RTZ);
+                        FCVTDL(v1, x1, RD_DYN);
                         if (rex.is32bits) {
                             FSD(v1, x5, 0); // ref
                         }
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index dbdccf24..31a6c290 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -672,11 +672,11 @@
 // Convert from Single to signed 32bits (truncated)
 #define FCVTWS(rd, frs1, rm) EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011))
 
-#define FADDS(frd, frs1, frs2) EMIT(R_type(0b0000000, frs2, frs1, 0b000, frd, 0b1010011))
-#define FSUBS(frd, frs1, frs2) EMIT(R_type(0b0000100, frs2, frs1, 0b000, frd, 0b1010011))
-#define FMULS(frd, frs1, frs2) EMIT(R_type(0b0001000, frs2, frs1, 0b000, frd, 0b1010011))
-#define FDIVS(frd, frs1, frs2) EMIT(R_type(0b0001100, frs2, frs1, 0b000, frd, 0b1010011))
-#define FSQRTS(frd, frs1)      EMIT(R_type(0b0101100, 0b00000, frs1, 0b000, frd, 0b1010011))
+#define FADDS(frd, frs1, frs2) EMIT(R_type(0b0000000, frs2, frs1, 0b111, frd, 0b1010011))
+#define FSUBS(frd, frs1, frs2) EMIT(R_type(0b0000100, frs2, frs1, 0b111, frd, 0b1010011))
+#define FMULS(frd, frs1, frs2) EMIT(R_type(0b0001000, frs2, frs1, 0b111, frd, 0b1010011))
+#define FDIVS(frd, frs1, frs2) EMIT(R_type(0b0001100, frs2, frs1, 0b111, frd, 0b1010011))
+#define FSQRTS(frd, frs1)      EMIT(R_type(0b0101100, 0b00000, frs1, 0b111, frd, 0b1010011))
 #define FMINS(frd, frs1, frs2) EMIT(R_type(0b0010100, frs2, frs1, 0b000, frd, 0b1010011))
 #define FMAXS(frd, frs1, frs2) EMIT(R_type(0b0010100, frs2, frs1, 0b001, frd, 0b1010011))
 
@@ -703,9 +703,9 @@
 // store double precision frs2 to rs1+imm12
 #define FSD(frs2, rs1, imm12) EMIT(S_type(imm12, frs2, rs1, 0b011, 0b0100111))
 // Convert Double frs1 to Single frd
-#define FCVTSD(frd, frs1) EMIT(R_type(0b0100000, 0b00001, frs1, 0b000, frd, 0b1010011))
+#define FCVTSD(frd, frs1) EMIT(R_type(0b0100000, 0b00001, frs1, 0b111, frd, 0b1010011))
 // Convert Single frs1 to Double frd
-#define FCVTDS(frd, frs1) EMIT(R_type(0b0100001, 0b00000, frs1, 0b000, frd, 0b1010011))
+#define FCVTDS(frd, frs1) EMIT(R_type(0b0100001, 0b00000, frs1, 0b111, frd, 0b1010011))
 // Convert from Double to signed 32bits
 #define FCVTWD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00000, frs1, rm, rd, 0b1010011))
 // Convert from Double to unsigned 32bits
@@ -729,11 +729,11 @@
 #define FLTD(rd, frs1, frs2) EMIT(R_type(0b1010001, frs2, frs1, 0b001, rd, 0b1010011))
 #define FLED(rd, frs1, frs2) EMIT(R_type(0b1010001, frs2, frs1, 0b000, rd, 0b1010011))
 
-#define FADDD(frd, frs1, frs2) EMIT(R_type(0b0000001, frs2, frs1, 0b000, frd, 0b1010011))
-#define FSUBD(frd, frs1, frs2) EMIT(R_type(0b0000101, frs2, frs1, 0b000, frd, 0b1010011))
-#define FMULD(frd, frs1, frs2) EMIT(R_type(0b0001001, frs2, frs1, 0b000, frd, 0b1010011))
-#define FDIVD(frd, frs1, frs2) EMIT(R_type(0b0001101, frs2, frs1, 0b000, frd, 0b1010011))
-#define FSQRTD(frd, frs1)      EMIT(R_type(0b0101101, 0b00000, frs1, 0b000, frd, 0b1010011))
+#define FADDD(frd, frs1, frs2) EMIT(R_type(0b0000001, frs2, frs1, 0b111, frd, 0b1010011))
+#define FSUBD(frd, frs1, frs2) EMIT(R_type(0b0000101, frs2, frs1, 0b111, frd, 0b1010011))
+#define FMULD(frd, frs1, frs2) EMIT(R_type(0b0001001, frs2, frs1, 0b111, frd, 0b1010011))
+#define FDIVD(frd, frs1, frs2) EMIT(R_type(0b0001101, frs2, frs1, 0b111, frd, 0b1010011))
+#define FSQRTD(frd, frs1)      EMIT(R_type(0b0101101, 0b00000, frs1, 0b111, frd, 0b1010011))
 #define FMIND(frd, frs1, frs2) EMIT(R_type(0b0010101, frs2, frs1, 0b000, frd, 0b1010011))
 #define FMAXD(frd, frs1, frs2) EMIT(R_type(0b0010101, frs2, frs1, 0b001, frd, 0b1010011))