diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-04-16 14:53:54 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-04-16 14:53:54 +0200 |
| commit | d7eb87129eb6aa7eeac61ddeeb1357a654380e4b (patch) | |
| tree | bcef36a1252ff8b00b33a2ce096e4dbbe4bd5cfd /src | |
| parent | 7f569247d511e397e72b369de2315385c879e5d1 (diff) | |
| download | box64-d7eb87129eb6aa7eeac61ddeeb1357a654380e4b.tar.gz box64-d7eb87129eb6aa7eeac61ddeeb1357a654380e4b.zip | |
[DYNAREC] Introduce BOX64_DYNAREC_X87DOUBLE=2 to handle Low Precision x87 ([ARM64_DYNAREC] only for now)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_d8.c | 14 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_d9.c | 3 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_da.c | 6 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_dc.c | 14 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_de.c | 12 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 21 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 21 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass0.h | 3 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass1.h | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 1 | ||||
| -rw-r--r-- | src/dynarec/dynablock_private.h | 4 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arch.h | 1 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native.c | 13 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_pass.c | 5 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_d9.c | 2 | ||||
| -rw-r--r-- | src/include/env.h | 2 |
18 files changed, 115 insertions, 22 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 0ecfa33c..7d087fc5 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -89,7 +89,8 @@ p0-p3 are used to pass scalable predicate arguments to a subroutine and to retur #define x4 4 #define x5 5 #define x6 6 -#define x7 7 +#define x87pc 7 +// x87 can be a scratch, but check if it's used as x87 PC and restore if needed in that case // 32bits version of scratch #define w1 x1 #define w2 x2 @@ -97,7 +98,7 @@ p0-p3 are used to pass scalable predicate arguments to a subroutine and to retur #define w4 x4 #define w5 x5 #define w6 x6 -#define w7 x7 +#define w87pc x87pc // emu is r0 #define xEmu 0 // ARM64 LR diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 8280137b..cde9b851 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -1420,8 +1420,8 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n CSELw(x2, x2, x3, cLT); // x2 is min(lmem, lreg) // x2 is min length 0-n_packed MVNw_REG(x4, xZR); - LSLw_REG(x7, x4, x2); - BICw_REG(x1, x1, x7); + LSLw_REG(x87pc, x4, x2); + BICw_REG(x1, x1, x87pc); LSLw_REG(x4, x4, x5); ORRw_REG(x1, x1, x4); ANDw_mask(x1, x1, 0, (u8&1)?7:15); @@ -1474,6 +1474,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFCw(xFlags, F_PF, 1); } } + ARM64_CHECK_PRECISION(); // to regen x87 if it has been used } else { SETFLAGS(X_ALL, SF_SET_DF); if(gd>7) // no need to reflect cache as xmm0-xmm7 will be saved before the function call anyway diff --git a/src/dynarec/arm64/dynarec_arm64_d8.c b/src/dynarec/arm64/dynarec_arm64_d8.c index 0867d6fe..1fb1e614 100644 --- a/src/dynarec/arm64/dynarec_arm64_d8.c +++ b/src/dynarec/arm64/dynarec_arm64_d8.c @@ -56,6 +56,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -77,6 +78,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -135,6 +137,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -156,6 +159,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -177,6 +181,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -198,6 +203,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -220,6 +226,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FADDD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -237,6 +244,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FMULD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -252,6 +260,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FCMPD(v1, s0); + X87_CHECK_PRECISION(v1); } FCOM(x1, x2, x3); break; @@ -266,6 +275,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FCMPD(v1, s0); + X87_CHECK_PRECISION(v1); } FCOM(x1, x2, x3); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -283,6 +293,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FSUBD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -300,6 +311,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FSUBD(v1, s0, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -317,6 +329,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FDIVD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -334,6 +347,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FDIVD(v1, s0, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c index c067d556..ca18951a 100644 --- a/src/dynarec/arm64/dynarec_arm64_d9.c +++ b/src/dynarec/arm64/dynarec_arm64_d9.c @@ -430,6 +430,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSQRTS(v1, v1); } else { FSQRTD(v1, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -509,7 +510,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin switch((nextop>>3)&7) { case 0: INST_NAME("FLD ST0, float[ED]"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, BOX64ENV(dynarec_x87double)?NEON_CACHE_ST_D:NEON_CACHE_ST_F); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, (BOX64ENV(dynarec_x87double)==1)?NEON_CACHE_ST_D:NEON_CACHE_ST_F); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(v1, ed, fixedaddress); if(!ST_IS_F(0)) { diff --git a/src/dynarec/arm64/dynarec_arm64_da.c b/src/dynarec/arm64/dynarec_arm64_da.c index 80127fc2..8189f43f 100644 --- a/src/dynarec/arm64/dynarec_arm64_da.c +++ b/src/dynarec/arm64/dynarec_arm64_da.c @@ -150,6 +150,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -164,6 +165,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -201,6 +203,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -215,6 +218,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -229,6 +233,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -243,6 +248,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; diff --git a/src/dynarec/arm64/dynarec_arm64_dc.c b/src/dynarec/arm64/dynarec_arm64_dc.c index bab8cb60..3ffb8dbb 100644 --- a/src/dynarec/arm64/dynarec_arm64_dc.c +++ b/src/dynarec/arm64/dynarec_arm64_dc.c @@ -54,6 +54,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -75,6 +76,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -94,6 +96,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FCMPS(v1, v2); } else { FCMPD(v1, v2); + X87_CHECK_PRECISION(v1); } FCOM(x1, x2, x3); break; @@ -112,6 +115,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FCMPS(v1, v2); } else { FCMPD(v1, v2); + X87_CHECK_PRECISION(v1); } FCOM(x1, x2, x3); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -133,6 +137,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -154,6 +159,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -175,6 +181,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -196,6 +203,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -214,6 +222,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -226,6 +235,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -257,6 +267,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -269,6 +280,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -281,6 +293,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -293,6 +306,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; diff --git a/src/dynarec/arm64/dynarec_arm64_de.c b/src/dynarec/arm64/dynarec_arm64_de.c index 9a29aebe..0c4122bd 100644 --- a/src/dynarec/arm64/dynarec_arm64_de.c +++ b/src/dynarec/arm64/dynarec_arm64_de.c @@ -54,6 +54,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -76,6 +77,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -130,6 +132,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -152,6 +155,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -174,6 +178,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -196,6 +201,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -216,6 +222,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); break; case 1: INST_NAME("FIMUL ST0, word[ED]"); @@ -227,6 +234,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); break; case 2: INST_NAME("FICOM ST0, word[ED]"); @@ -263,6 +271,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); break; case 5: INST_NAME("FISUBR ST0, word[ED]"); @@ -274,6 +283,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); break; case 6: INST_NAME("FIDIV ST0, word[ED]"); @@ -285,6 +295,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); break; case 7: INST_NAME("FIDIVR ST0, word[ED]"); @@ -296,6 +307,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); break; } return addr; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index e8aa70c2..69855502 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -766,7 +766,7 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE; #endif if(savereg==0) - savereg = 7; + savereg = x87pc; if(saveflags) { STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); } @@ -804,6 +804,9 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save if(saveflags) { LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); } + if(reg==x87pc && savereg!=x87pc && dyn->need_x87check) { + ARM64_CHECK_PRECISION(); // regen x87 mask + } //SET_NODF(); } @@ -813,7 +816,7 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc) #if STEP == 0 dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE; #endif - STPx_S7_preindex(x6, x7, xSP, -16); + STPx_S7_preindex(x6, x87pc, xSP, -16); STPx_S7_preindex(x4, x5, xSP, -16); STPx_S7_preindex(x2, x3, xSP, -16); STPx_S7_preindex(xEmu, x1, xSP, -16); // ARM64 stack needs to be 16byte aligned @@ -823,10 +826,10 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc) STPx_S7_offset(xRSI, xRDI, xEmu, offsetof(x64emu_t, regs[_SI])); STPx_S7_offset(xR8, xR9, xEmu, offsetof(x64emu_t, regs[_R8])); STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); - fpu_pushcache(dyn, ninst, x7, 0); + fpu_pushcache(dyn, ninst, x87pc, 0); - TABLE64(x7, (uintptr_t)fnc); - BLR(x7); + TABLE64(x87pc, (uintptr_t)fnc); + BLR(x87pc); LDPx_S7_postindex(xEmu, x1, xSP, 16); LDPx_S7_postindex(x2, x3, xSP, 16); LDPx_S7_postindex(x4, x5, xSP, 16); @@ -838,8 +841,8 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc) GO(R8, R9); #undef GO LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); - fpu_popcache(dyn, ninst, x7, 0); // savereg will not be used - LDPx_S7_postindex(x6, x7, xSP, 16); + fpu_popcache(dyn, ninst, x87pc, 0); // savereg will not be used + LDPx_S7_postindex(x6, x87pc, xSP, 16); //SET_NODF(); } @@ -859,12 +862,12 @@ void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w) if(abs(w)>1) { MESSAGE(LOG_DUMP, "Getting %d XMM args\n", abs(w)-1); for(int i=0; i<abs(w)-1; ++i) { - sse_get_reg(dyn, ninst, x7, i, w); + sse_get_reg(dyn, ninst, x3, i, w); } } if(w<0) { MESSAGE(LOG_DUMP, "Return in XMM0\n"); - sse_get_reg_empty(dyn, ninst, x7, 0); + sse_get_reg_empty(dyn, ninst, x3, 0); } // prepare regs for native call MOVx_REG(0, xRDI); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 2e152dfb..95681b3e 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -721,13 +721,13 @@ // CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 -#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0) +#define CALL(F, ret) call_c(dyn, ninst, F, x87pc, ret, 1, 0) // CALL_ will use x7 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 -#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x7, ret, 1, reg) +#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x87pc, ret, 1, reg) // CALL_S will use x7 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2. Flags are not save/restored -#define CALL_S(F, ret) call_c(dyn, ninst, F, x7, ret, 0, 0) +#define CALL_S(F, ret) call_c(dyn, ninst, F, x87pc, ret, 0, 0) // CALL_ will use x7 for the call address. // All regs are saved, including scratch. This is use to call internal function that should not change state #define CALL_I(F) call_i(dyn, ninst, F) @@ -998,6 +998,21 @@ #define CALLRET_LOOP() NOP #endif +#ifndef ARM64_CHECK_PRECISION +#define ARM64_CHECK_PRECISION() \ + if(dyn->need_x87check) { \ + LDRH_U12(x87pc, xEmu, offsetof(x64emu_t, cw)); \ + UBFXw(x87pc, x87pc, 8, 2); \ + } +#endif +#ifndef X87_CHECK_PRECISION +#define X87_CHECK_PRECISION(A) \ + if(dyn->need_x87check) { \ + CBNZw(x87pc, 4+8); \ + FCVT_S_D(A, A); \ + FCVT_D_S(A, A); \ + } +#endif #define STORE_REG(A) STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define STP_REGS(A, B) STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) #define LDP_REGS(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index ec792acb..bdfa1785 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -72,3 +72,6 @@ // mark opcode as "unaligned" possible only if the current address is not marked as already unaligned #define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=is_addr_unaligned(A))) #define IF_ALIGNED(A) if(!(dyn->insts[ninst].unaligned=is_addr_unaligned(A))) + +#define ARM64_CHECK_PRECISION() +#define X87_CHECK_PRECISION(A) diff --git a/src/dynarec/arm64/dynarec_arm64_pass1.h b/src/dynarec/arm64/dynarec_arm64_pass1.h index f5ad6a7b..14a716cd 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass1.h +++ b/src/dynarec/arm64/dynarec_arm64_pass1.h @@ -22,3 +22,8 @@ dyn->insts[ninst].f_exit = dyn->f #define INST_NAME(name) + +#define ARM64_CHECK_PRECISION() +#define X87_CHECK_PRECISION(A) \ + if(dyn->need_x87check) \ + dyn->need_x87check=2 diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 1c5f7008..f397e949 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -171,6 +171,7 @@ typedef struct dynarec_arm_s { uint8_t always_test; uint8_t abort; // abort the creation of the block void* gdbjit_block; + uint32_t need_x87check; // needs x87 precision control check if non-null, or 0 if not } dynarec_arm_t; void add_next(dynarec_arm_t *dyn, uintptr_t addr); diff --git a/src/dynarec/dynablock_private.h b/src/dynarec/dynablock_private.h index 8e174a63..388fcc4d 100644 --- a/src/dynarec/dynablock_private.h +++ b/src/dynarec/dynablock_private.h @@ -24,11 +24,11 @@ typedef struct dynablock_s { uint8_t dirty; // if need to be tested as soon as it's created uint8_t always_test:1; uint8_t is32bits:1; + int callret_size; // size of the array int isize; + size_t arch_size; // size of of arch dependant infos instsize_t* instsize; void* arch; // arch dependant per inst info (can be NULL) - size_t arch_size; // size of of arch dependant infos - int callret_size; // size of the array callret_t* callrets; // array of callret return, with NOP / UDF depending if the block is clean or dirty void* jmpnext; // a branch jmpnext code when block is marked } dynablock_t; diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index 44d767b2..c65a0682 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -35,6 +35,7 @@ extern uint32_t arm64_crc(void* p, uint32_t len); #define ARCH_NOP 0b11010101000000110010000000011111 #define ARCH_UDF 0xcafe +#define ARCH_PRECISION() ARM64_CHECK_PRECISION() #elif defined(LA64) #define instruction_native_t instruction_la64_t diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index 7574b124..2c4aa72b 100644 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -636,6 +636,11 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit CancelBlock64(0); return NULL; } + #ifdef ARCH_PRECISION + if(BOX64ENV(dynarec_x87double)==2) { + helper.need_x87check = 1; + } + #endif // basic checks if(!helper.size) { dynarec_log(LOG_INFO, "Warning, null-sized dynarec block (%p)\n", (void*)addr); @@ -768,6 +773,12 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit CancelBlock64(0); return NULL; } + #ifdef ARCH_PRECISION + if(BOX64ENV(dynarec_x87double)==2) { + if(helper.need_x87check==1) + helper.need_x87check = 0; + } + #endif // pass 2, instruction size helper.callrets = static_callrets; @@ -796,7 +807,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit size_t insts_rsize = (helper.insts_size+2)*sizeof(instsize_t); insts_rsize = (insts_rsize+7)&~7; // round the size... size_t arch_size = ARCH_SIZE(&helper); - size_t callret_size = helper.callret_size*4; + size_t callret_size = helper.callret_size*sizeof(callret_t); // ok, now allocate mapped memory, with executable flag on size_t sz = sizeof(void*) + native_size + helper.table64size*sizeof(uint64_t) + 4*sizeof(void*) + insts_rsize + arch_size + callret_size; // dynablock_t* block (arm insts) table64 jmpnext code instsize arch callrets diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index be9c5fe9..5613ad57 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -83,6 +83,11 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int break; } #endif + #ifdef ARCH_PRECISION + if(!ninst && dyn->need_x87check) { + ARCH_PRECISION(); + } + #endif fpu_propagate_stack(dyn, ninst); ip = addr; if (reset_n!=-1) { diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c index 9a34d583..315d35f2 100644 --- a/src/dynarec/rv64/dynarec_rv64_d9.c +++ b/src/dynarec/rv64/dynarec_rv64_d9.c @@ -442,7 +442,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni switch ((nextop >> 3) & 7) { case 0: INST_NAME("FLD ST0, float[ED]"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, BOX64ENV(dynarec_x87double) ? EXT_CACHE_ST_D : EXT_CACHE_ST_F); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, (BOX64ENV(dynarec_x87double)==1) ? EXT_CACHE_ST_D : EXT_CACHE_ST_F); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLW(v1, ed, fixedaddress); if (!ST_IS_F(0)) { diff --git a/src/include/env.h b/src/include/env.h index 3398f3b0..edfbbb33 100644 --- a/src/include/env.h +++ b/src/include/env.h @@ -59,7 +59,7 @@ extern char* ftrace_name; BOOLEAN(BOX64_DYNAREC_TRACE, dynarec_trace, 0) \ BOOLEAN(BOX64_DYNAREC_WAIT, dynarec_wait, 1) \ BOOLEAN(BOX64_DYNAREC_WEAKBARRIER, dynarec_weakbarrier, 1) \ - BOOLEAN(BOX64_DYNAREC_X87DOUBLE, dynarec_x87double, 0) \ + INTEGER(BOX64_DYNAREC_X87DOUBLE, dynarec_x87double, 0, 0, 2) \ STRING(BOX64_EMULATED_LIBS, emulated_libs) \ STRING(BOX64_ENV, env) \ STRING(BOX64_ENV1, env1) \ |