about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-04-16 14:53:54 +0200
committerptitSeb <sebastien.chev@gmail.com>2025-04-16 14:53:54 +0200
commitd7eb87129eb6aa7eeac61ddeeb1357a654380e4b (patch)
treebcef36a1252ff8b00b33a2ce096e4dbbe4bd5cfd /src
parent7f569247d511e397e72b369de2315385c879e5d1 (diff)
downloadbox64-d7eb87129eb6aa7eeac61ddeeb1357a654380e4b.tar.gz
box64-d7eb87129eb6aa7eeac61ddeeb1357a654380e4b.zip
[DYNAREC] Introduce BOX64_DYNAREC_X87DOUBLE=2 to handle Low Precision x87 ([ARM64_DYNAREC] only for now)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h5
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c5
-rw-r--r--src/dynarec/arm64/dynarec_arm64_d8.c14
-rw-r--r--src/dynarec/arm64/dynarec_arm64_d9.c3
-rw-r--r--src/dynarec/arm64/dynarec_arm64_da.c6
-rw-r--r--src/dynarec/arm64/dynarec_arm64_dc.c14
-rw-r--r--src/dynarec/arm64/dynarec_arm64_de.c12
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c21
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h21
-rw-r--r--src/dynarec/arm64/dynarec_arm64_pass0.h3
-rw-r--r--src/dynarec/arm64/dynarec_arm64_pass1.h5
-rw-r--r--src/dynarec/arm64/dynarec_arm64_private.h1
-rw-r--r--src/dynarec/dynablock_private.h4
-rw-r--r--src/dynarec/dynarec_arch.h1
-rw-r--r--src/dynarec/dynarec_native.c13
-rw-r--r--src/dynarec/dynarec_native_pass.c5
-rw-r--r--src/dynarec/rv64/dynarec_rv64_d9.c2
-rw-r--r--src/include/env.h2
18 files changed, 115 insertions, 22 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 0ecfa33c..7d087fc5 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -89,7 +89,8 @@ p0-p3 are used to pass scalable predicate arguments to a subroutine and to retur
 #define x4      4
 #define x5      5
 #define x6      6
-#define x7      7
+#define x87pc   7
+// x87 can be a scratch, but check if it's used as x87 PC and restore if needed in that case
 // 32bits version of scratch
 #define w1      x1
 #define w2      x2
@@ -97,7 +98,7 @@ p0-p3 are used to pass scalable predicate arguments to a subroutine and to retur
 #define w4      x4
 #define w5      x5
 #define w6      x6
-#define w7      x7
+#define w87pc   x87pc
 // emu is r0
 #define xEmu    0
 // ARM64 LR
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 8280137b..cde9b851 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -1420,8 +1420,8 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         CSELw(x2, x2, x3, cLT); // x2 is min(lmem, lreg)

                         // x2 is min length 0-n_packed

                         MVNw_REG(x4, xZR);

-                        LSLw_REG(x7, x4, x2);

-                        BICw_REG(x1, x1, x7);

+                        LSLw_REG(x87pc, x4, x2);

+                        BICw_REG(x1, x1, x87pc);

                         LSLw_REG(x4, x4, x5);

                         ORRw_REG(x1, x1, x4);

                         ANDw_mask(x1, x1, 0, (u8&1)?7:15);

@@ -1474,6 +1474,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                                 BFCw(xFlags, F_PF, 1);

                             }

                         }

+                        ARM64_CHECK_PRECISION();    // to regen x87 if it has been used

                     } else {

                         SETFLAGS(X_ALL, SF_SET_DF);

                         if(gd>7)    // no need to reflect cache as xmm0-xmm7 will be saved before the function call anyway

diff --git a/src/dynarec/arm64/dynarec_arm64_d8.c b/src/dynarec/arm64/dynarec_arm64_d8.c
index 0867d6fe..1fb1e614 100644
--- a/src/dynarec/arm64/dynarec_arm64_d8.c
+++ b/src/dynarec/arm64/dynarec_arm64_d8.c
@@ -56,6 +56,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FADDS(v1, v1, v2);
             } else {
                 FADDD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -77,6 +78,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FMULS(v1, v1, v2);
             } else {
                 FMULD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -135,6 +137,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FSUBS(v1, v1, v2);
             } else {
                 FSUBD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -156,6 +159,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FSUBS(v1, v2, v1);
             } else {
                 FSUBD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -177,6 +181,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FDIVS(v1, v1, v2);
             } else {
                 FDIVD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -198,6 +203,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FDIVS(v1, v2, v1);
             } else {
                 FDIVD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -220,6 +226,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 } else {
                     FCVT_D_S(s0, s0);
                     FADDD(v1, v1, s0);
+                    X87_CHECK_PRECISION(v1);
                 }
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
@@ -237,6 +244,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 } else {
                     FCVT_D_S(s0, s0);
                     FMULD(v1, v1, s0);
+                    X87_CHECK_PRECISION(v1);
                 }
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
@@ -252,6 +260,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 } else {
                     FCVT_D_S(s0, s0);
                     FCMPD(v1, s0);
+                    X87_CHECK_PRECISION(v1);
                 }
                 FCOM(x1, x2, x3);
                 break;
@@ -266,6 +275,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 } else {
                     FCVT_D_S(s0, s0);
                     FCMPD(v1, s0);
+                    X87_CHECK_PRECISION(v1);
                 }
                 FCOM(x1, x2, x3);
                 X87_POP_OR_FAIL(dyn, ninst, x3);
@@ -283,6 +293,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 } else {
                     FCVT_D_S(s0, s0);
                     FSUBD(v1, v1, s0);
+                    X87_CHECK_PRECISION(v1);
                 }
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
@@ -300,6 +311,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 } else {
                     FCVT_D_S(s0, s0);
                     FSUBD(v1, s0, v1);
+                    X87_CHECK_PRECISION(v1);
                 }
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
@@ -317,6 +329,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 } else {
                     FCVT_D_S(s0, s0);
                     FDIVD(v1, v1, s0);
+                    X87_CHECK_PRECISION(v1);
                 }
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
@@ -334,6 +347,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 } else {
                     FCVT_D_S(s0, s0);
                     FDIVD(v1, s0, v1);
+                    X87_CHECK_PRECISION(v1);
                 }
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c
index c067d556..ca18951a 100644
--- a/src/dynarec/arm64/dynarec_arm64_d9.c
+++ b/src/dynarec/arm64/dynarec_arm64_d9.c
@@ -430,6 +430,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FSQRTS(v1, v1);
             } else {
                 FSQRTD(v1, v1);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -509,7 +510,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         switch((nextop>>3)&7) {
             case 0:
                 INST_NAME("FLD ST0, float[ED]");
-                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, BOX64ENV(dynarec_x87double)?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, (BOX64ENV(dynarec_x87double)==1)?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
                 VLD32(v1, ed, fixedaddress);
                 if(!ST_IS_F(0)) {
diff --git a/src/dynarec/arm64/dynarec_arm64_da.c b/src/dynarec/arm64/dynarec_arm64_da.c
index 80127fc2..8189f43f 100644
--- a/src/dynarec/arm64/dynarec_arm64_da.c
+++ b/src/dynarec/arm64/dynarec_arm64_da.c
@@ -150,6 +150,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FADDD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
@@ -164,6 +165,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FMULD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
@@ -201,6 +203,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FSUBD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
@@ -215,6 +218,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FSUBD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
@@ -229,6 +233,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FDIVD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
@@ -243,6 +248,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x5, x4);
                 FDIVD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
diff --git a/src/dynarec/arm64/dynarec_arm64_dc.c b/src/dynarec/arm64/dynarec_arm64_dc.c
index bab8cb60..3ffb8dbb 100644
--- a/src/dynarec/arm64/dynarec_arm64_dc.c
+++ b/src/dynarec/arm64/dynarec_arm64_dc.c
@@ -54,6 +54,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FADDS(v1, v1, v2);
             } else {
                 FADDD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -75,6 +76,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FMULS(v1, v1, v2);
             } else {
                 FMULD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -94,6 +96,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FCMPS(v1, v2);
             } else {
                 FCMPD(v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             FCOM(x1, x2, x3);
             break;
@@ -112,6 +115,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FCMPS(v1, v2);
             } else {
                 FCMPD(v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             FCOM(x1, x2, x3);
             X87_POP_OR_FAIL(dyn, ninst, x3);
@@ -133,6 +137,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FSUBS(v1, v2, v1);
             } else {
                 FSUBD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -154,6 +159,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FSUBS(v1, v1, v2);
             } else {
                 FSUBD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -175,6 +181,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FDIVS(v1, v2, v1);
             } else {
                 FDIVD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -196,6 +203,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FDIVS(v1, v1, v2);
             } else {
                 FDIVD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -214,6 +222,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FADDD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
@@ -226,6 +235,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FMULD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
@@ -257,6 +267,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FSUBD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
@@ -269,6 +280,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FSUBD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
@@ -281,6 +293,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FDIVD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
@@ -293,6 +306,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(!BOX64ENV(dynarec_fastround))
                     u8 = x87_setround(dyn, ninst, x1, x2, x4);
                 FDIVD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
                 if(!BOX64ENV(dynarec_fastround))
                     x87_restoreround(dyn, ninst, u8);
                 break;
diff --git a/src/dynarec/arm64/dynarec_arm64_de.c b/src/dynarec/arm64/dynarec_arm64_de.c
index 9a29aebe..0c4122bd 100644
--- a/src/dynarec/arm64/dynarec_arm64_de.c
+++ b/src/dynarec/arm64/dynarec_arm64_de.c
@@ -54,6 +54,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FADDS(v1, v1, v2);
             } else {
                 FADDD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -76,6 +77,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FMULS(v1, v1, v2);
             } else {
                 FMULD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -130,6 +132,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FSUBS(v1, v2, v1);
             } else {
                 FSUBD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -152,6 +155,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FSUBS(v1, v1, v2);
             } else {
                 FSUBD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -174,6 +178,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FDIVS(v1, v2, v1);
             } else {
                 FDIVD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -196,6 +201,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FDIVS(v1, v1, v2);
             } else {
                 FDIVD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
             }
             if(!BOX64ENV(dynarec_fastround))
                 x87_restoreround(dyn, ninst, u8);
@@ -216,6 +222,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 SXTL_32(v2, v2);
                 SCVTFDD(v2, v2);
                 FADDD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 break;
             case 1:
                 INST_NAME("FIMUL ST0, word[ED]");
@@ -227,6 +234,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 SXTL_32(v2, v2);
                 SCVTFDD(v2, v2);
                 FMULD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 break;
             case 2:
                 INST_NAME("FICOM ST0, word[ED]");
@@ -263,6 +271,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 SXTL_32(v2, v2);
                 SCVTFDD(v2, v2);
                 FSUBD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 break;
             case 5:
                 INST_NAME("FISUBR ST0, word[ED]");
@@ -274,6 +283,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 SXTL_32(v2, v2);
                 SCVTFDD(v2, v2);
                 FSUBD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
                 break;
             case 6:
                 INST_NAME("FIDIV ST0, word[ED]");
@@ -285,6 +295,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 SXTL_32(v2, v2);
                 SCVTFDD(v2, v2);
                 FDIVD(v1, v1, v2);
+                X87_CHECK_PRECISION(v1);
                 break;
             case 7:
                 INST_NAME("FIDIVR ST0, word[ED]");
@@ -296,6 +307,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 SXTL_32(v2, v2);
                 SCVTFDD(v2, v2);
                 FDIVD(v1, v2, v1);
+                X87_CHECK_PRECISION(v1);
                 break;
         }
     return addr;
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index e8aa70c2..69855502 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -766,7 +766,7 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save
     dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE;
     #endif
     if(savereg==0)
-        savereg = 7;
+        savereg = x87pc;
     if(saveflags) {
         STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
     }
@@ -804,6 +804,9 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save
     if(saveflags) {
         LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
     }
+    if(reg==x87pc && savereg!=x87pc && dyn->need_x87check) {
+        ARM64_CHECK_PRECISION();    // regen x87 mask
+    }
     //SET_NODF();
 }
 
@@ -813,7 +816,7 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc)
     #if STEP == 0
     dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE;
     #endif
-    STPx_S7_preindex(x6, x7, xSP, -16);
+    STPx_S7_preindex(x6, x87pc, xSP, -16);
     STPx_S7_preindex(x4, x5, xSP, -16);
     STPx_S7_preindex(x2, x3, xSP, -16);
     STPx_S7_preindex(xEmu, x1, xSP, -16);   // ARM64 stack needs to be 16byte aligned
@@ -823,10 +826,10 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc)
     STPx_S7_offset(xRSI, xRDI, xEmu, offsetof(x64emu_t, regs[_SI]));
     STPx_S7_offset(xR8,  xR9,  xEmu, offsetof(x64emu_t, regs[_R8]));
     STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
-    fpu_pushcache(dyn, ninst, x7, 0);
+    fpu_pushcache(dyn, ninst, x87pc, 0);
 
-    TABLE64(x7, (uintptr_t)fnc);
-    BLR(x7);
+    TABLE64(x87pc, (uintptr_t)fnc);
+    BLR(x87pc);
     LDPx_S7_postindex(xEmu, x1, xSP, 16);
     LDPx_S7_postindex(x2, x3, xSP, 16);
     LDPx_S7_postindex(x4, x5, xSP, 16);
@@ -838,8 +841,8 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc)
     GO(R8, R9);
     #undef GO
     LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
-    fpu_popcache(dyn, ninst, x7, 0);   // savereg will not be used
-    LDPx_S7_postindex(x6, x7, xSP, 16);
+    fpu_popcache(dyn, ninst, x87pc, 0);   // savereg will not be used
+    LDPx_S7_postindex(x6, x87pc, xSP, 16);
     //SET_NODF();
 }
 
@@ -859,12 +862,12 @@ void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w)
     if(abs(w)>1) {
         MESSAGE(LOG_DUMP, "Getting %d XMM args\n", abs(w)-1);
         for(int i=0; i<abs(w)-1; ++i) {
-            sse_get_reg(dyn, ninst, x7, i, w);
+            sse_get_reg(dyn, ninst, x3, i, w);
         }
     }
     if(w<0) {
         MESSAGE(LOG_DUMP, "Return in XMM0\n");
-        sse_get_reg_empty(dyn, ninst, x7, 0);
+        sse_get_reg_empty(dyn, ninst, x3, 0);
     }
     // prepare regs for native call
     MOVx_REG(0, xRDI);
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 2e152dfb..95681b3e 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -721,13 +721,13 @@
 
 // CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2
-#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0)
+#define CALL(F, ret) call_c(dyn, ninst, F, x87pc, ret, 1, 0)
 // CALL_ will use x7 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2
-#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x7, ret, 1, reg)
+#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x87pc, ret, 1, reg)
 // CALL_S will use x7 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2. Flags are not save/restored
-#define CALL_S(F, ret) call_c(dyn, ninst, F, x7, ret, 0, 0)
+#define CALL_S(F, ret) call_c(dyn, ninst, F, x87pc, ret, 0, 0)
 // CALL_ will use x7 for the call address.
 // All regs are saved, including scratch. This is use to call internal function that should not change state
 #define CALL_I(F) call_i(dyn, ninst, F)
@@ -998,6 +998,21 @@
 #define CALLRET_LOOP()  NOP
 #endif
 
+#ifndef ARM64_CHECK_PRECISION
+#define ARM64_CHECK_PRECISION()                         \
+    if(dyn->need_x87check) {                            \
+        LDRH_U12(x87pc, xEmu, offsetof(x64emu_t, cw));  \
+        UBFXw(x87pc, x87pc, 8, 2);                      \
+    }
+#endif
+#ifndef X87_CHECK_PRECISION
+#define X87_CHECK_PRECISION(A)                      \
+    if(dyn->need_x87check) {                        \
+        CBNZw(x87pc, 4+8);                          \
+        FCVT_S_D(A, A);                             \
+        FCVT_D_S(A, A);                             \
+    }
+#endif
 #define STORE_REG(A)    STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
 #define STP_REGS(A, B)  STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A]))
 #define LDP_REGS(A, B)  LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A]))
diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h
index ec792acb..bdfa1785 100644
--- a/src/dynarec/arm64/dynarec_arm64_pass0.h
+++ b/src/dynarec/arm64/dynarec_arm64_pass0.h
@@ -72,3 +72,6 @@
 // mark opcode as "unaligned" possible only if the current address is not marked as already unaligned
 #define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=is_addr_unaligned(A)))
 #define IF_ALIGNED(A)   if(!(dyn->insts[ninst].unaligned=is_addr_unaligned(A)))
+
+#define ARM64_CHECK_PRECISION()
+#define X87_CHECK_PRECISION(A)
diff --git a/src/dynarec/arm64/dynarec_arm64_pass1.h b/src/dynarec/arm64/dynarec_arm64_pass1.h
index f5ad6a7b..14a716cd 100644
--- a/src/dynarec/arm64/dynarec_arm64_pass1.h
+++ b/src/dynarec/arm64/dynarec_arm64_pass1.h
@@ -22,3 +22,8 @@
         dyn->insts[ninst].f_exit = dyn->f
 
 #define INST_NAME(name)  
+
+#define ARM64_CHECK_PRECISION()
+#define X87_CHECK_PRECISION(A)          \
+        if(dyn->need_x87check)          \
+                dyn->need_x87check=2
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index 1c5f7008..f397e949 100644
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -171,6 +171,7 @@ typedef struct dynarec_arm_s {
     uint8_t             always_test;
     uint8_t             abort;      // abort the creation of the block
     void*               gdbjit_block;
+    uint32_t            need_x87check;  // needs x87 precision control check if non-null, or 0 if not
 } dynarec_arm_t;
 
 void add_next(dynarec_arm_t *dyn, uintptr_t addr);
diff --git a/src/dynarec/dynablock_private.h b/src/dynarec/dynablock_private.h
index 8e174a63..388fcc4d 100644
--- a/src/dynarec/dynablock_private.h
+++ b/src/dynarec/dynablock_private.h
@@ -24,11 +24,11 @@ typedef struct dynablock_s {
     uint8_t         dirty;      // if need to be tested as soon as it's created
     uint8_t         always_test:1;
     uint8_t         is32bits:1;
+    int             callret_size;   // size of the array
     int             isize;
+    size_t          arch_size;  // size of of arch dependant infos
     instsize_t*     instsize;
     void*           arch;       // arch dependant per inst info (can be NULL)
-    size_t          arch_size;  // size of of arch dependant infos
-    int             callret_size;   // size of the array
     callret_t*      callrets;   // array of callret return, with NOP / UDF depending if the block is clean or dirty
     void*           jmpnext;    // a branch jmpnext code when block is marked
 } dynablock_t;
diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h
index 44d767b2..c65a0682 100644
--- a/src/dynarec/dynarec_arch.h
+++ b/src/dynarec/dynarec_arch.h
@@ -35,6 +35,7 @@ extern uint32_t arm64_crc(void* p, uint32_t len);
 

 #define ARCH_NOP    0b11010101000000110010000000011111

 #define ARCH_UDF    0xcafe

+#define ARCH_PRECISION()   ARM64_CHECK_PRECISION()

 #elif defined(LA64)

 

 #define instruction_native_t        instruction_la64_t

diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c
index 7574b124..2c4aa72b 100644
--- a/src/dynarec/dynarec_native.c
+++ b/src/dynarec/dynarec_native.c
@@ -636,6 +636,11 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
         CancelBlock64(0);
         return NULL;
     }
+    #ifdef ARCH_PRECISION
+    if(BOX64ENV(dynarec_x87double)==2) {
+        helper.need_x87check = 1;
+    }
+    #endif
     // basic checks
     if(!helper.size) {
         dynarec_log(LOG_INFO, "Warning, null-sized dynarec block (%p)\n", (void*)addr);
@@ -768,6 +773,12 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
         CancelBlock64(0);
         return NULL;
     }
+    #ifdef ARCH_PRECISION
+    if(BOX64ENV(dynarec_x87double)==2) {
+        if(helper.need_x87check==1)
+            helper.need_x87check = 0;
+    }
+    #endif
 
     // pass 2, instruction size
     helper.callrets = static_callrets;
@@ -796,7 +807,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
     size_t insts_rsize = (helper.insts_size+2)*sizeof(instsize_t);
     insts_rsize = (insts_rsize+7)&~7;   // round the size...
     size_t arch_size = ARCH_SIZE(&helper);
-    size_t callret_size = helper.callret_size*4;
+    size_t callret_size = helper.callret_size*sizeof(callret_t);
     // ok, now allocate mapped memory, with executable flag on
     size_t sz = sizeof(void*) + native_size + helper.table64size*sizeof(uint64_t) + 4*sizeof(void*) + insts_rsize + arch_size + callret_size;
     //           dynablock_t*     block (arm insts)            table64               jmpnext code       instsize     arch         callrets
diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c
index be9c5fe9..5613ad57 100644
--- a/src/dynarec/dynarec_native_pass.c
+++ b/src/dynarec/dynarec_native_pass.c
@@ -83,6 +83,11 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
             break;
         }
         #endif
+        #ifdef ARCH_PRECISION
+        if(!ninst && dyn->need_x87check) {
+            ARCH_PRECISION();
+        }
+        #endif
         fpu_propagate_stack(dyn, ninst);
         ip = addr;
         if (reset_n!=-1) {
diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c
index 9a34d583..315d35f2 100644
--- a/src/dynarec/rv64/dynarec_rv64_d9.c
+++ b/src/dynarec/rv64/dynarec_rv64_d9.c
@@ -442,7 +442,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         switch ((nextop >> 3) & 7) {
             case 0:
                 INST_NAME("FLD ST0, float[ED]");
-                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, BOX64ENV(dynarec_x87double) ? EXT_CACHE_ST_D : EXT_CACHE_ST_F);
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, (BOX64ENV(dynarec_x87double)==1) ? EXT_CACHE_ST_D : EXT_CACHE_ST_F);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
                 FLW(v1, ed, fixedaddress);
                 if (!ST_IS_F(0)) {
diff --git a/src/include/env.h b/src/include/env.h
index 3398f3b0..edfbbb33 100644
--- a/src/include/env.h
+++ b/src/include/env.h
@@ -59,7 +59,7 @@ extern char* ftrace_name;
     BOOLEAN(BOX64_DYNAREC_TRACE, dynarec_trace, 0)                      \
     BOOLEAN(BOX64_DYNAREC_WAIT, dynarec_wait, 1)                        \
     BOOLEAN(BOX64_DYNAREC_WEAKBARRIER, dynarec_weakbarrier, 1)          \
-    BOOLEAN(BOX64_DYNAREC_X87DOUBLE, dynarec_x87double, 0)              \
+    INTEGER(BOX64_DYNAREC_X87DOUBLE, dynarec_x87double, 0, 0, 2)        \
     STRING(BOX64_EMULATED_LIBS, emulated_libs)                          \
     STRING(BOX64_ENV, env)                                              \
     STRING(BOX64_ENV1, env1)                                            \