diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-04-16 14:53:54 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-04-16 14:53:54 +0200 |
| commit | d7eb87129eb6aa7eeac61ddeeb1357a654380e4b (patch) | |
| tree | bcef36a1252ff8b00b33a2ce096e4dbbe4bd5cfd /src/dynarec/arm64 | |
| parent | 7f569247d511e397e72b369de2315385c879e5d1 (diff) | |
| download | box64-d7eb87129eb6aa7eeac61ddeeb1357a654380e4b.tar.gz box64-d7eb87129eb6aa7eeac61ddeeb1357a654380e4b.zip | |
[DYNAREC] Introduce BOX64_DYNAREC_X87DOUBLE=2 to handle Low Precision x87 ([ARM64_DYNAREC] only for now)
Diffstat (limited to 'src/dynarec/arm64')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_d8.c | 14 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_d9.c | 3 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_da.c | 6 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_dc.c | 14 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_de.c | 12 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 21 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 21 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass0.h | 3 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass1.h | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 1 |
12 files changed, 93 insertions, 17 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 0ecfa33c..7d087fc5 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -89,7 +89,8 @@ p0-p3 are used to pass scalable predicate arguments to a subroutine and to retur #define x4 4 #define x5 5 #define x6 6 -#define x7 7 +#define x87pc 7 +// x87 can be a scratch, but check if it's used as x87 PC and restore if needed in that case // 32bits version of scratch #define w1 x1 #define w2 x2 @@ -97,7 +98,7 @@ p0-p3 are used to pass scalable predicate arguments to a subroutine and to retur #define w4 x4 #define w5 x5 #define w6 x6 -#define w7 x7 +#define w87pc x87pc // emu is r0 #define xEmu 0 // ARM64 LR diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 8280137b..cde9b851 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -1420,8 +1420,8 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n CSELw(x2, x2, x3, cLT); // x2 is min(lmem, lreg) // x2 is min length 0-n_packed MVNw_REG(x4, xZR); - LSLw_REG(x7, x4, x2); - BICw_REG(x1, x1, x7); + LSLw_REG(x87pc, x4, x2); + BICw_REG(x1, x1, x87pc); LSLw_REG(x4, x4, x5); ORRw_REG(x1, x1, x4); ANDw_mask(x1, x1, 0, (u8&1)?7:15); @@ -1474,6 +1474,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFCw(xFlags, F_PF, 1); } } + ARM64_CHECK_PRECISION(); // to regen x87 if it has been used } else { SETFLAGS(X_ALL, SF_SET_DF); if(gd>7) // no need to reflect cache as xmm0-xmm7 will be saved before the function call anyway diff --git a/src/dynarec/arm64/dynarec_arm64_d8.c b/src/dynarec/arm64/dynarec_arm64_d8.c index 0867d6fe..1fb1e614 100644 --- a/src/dynarec/arm64/dynarec_arm64_d8.c +++ b/src/dynarec/arm64/dynarec_arm64_d8.c @@ -56,6 +56,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -77,6 +78,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -135,6 +137,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -156,6 +159,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -177,6 +181,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -198,6 +203,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -220,6 +226,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FADDD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -237,6 +244,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FMULD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -252,6 +260,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FCMPD(v1, s0); + X87_CHECK_PRECISION(v1); } FCOM(x1, x2, x3); break; @@ -266,6 +275,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FCMPD(v1, s0); + X87_CHECK_PRECISION(v1); } FCOM(x1, x2, x3); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -283,6 +293,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FSUBD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -300,6 +311,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FSUBD(v1, s0, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -317,6 +329,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FDIVD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -334,6 +347,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } else { FCVT_D_S(s0, s0); FDIVD(v1, s0, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c index c067d556..ca18951a 100644 --- a/src/dynarec/arm64/dynarec_arm64_d9.c +++ b/src/dynarec/arm64/dynarec_arm64_d9.c @@ -430,6 +430,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSQRTS(v1, v1); } else { FSQRTD(v1, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -509,7 +510,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin switch((nextop>>3)&7) { case 0: INST_NAME("FLD ST0, float[ED]"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, BOX64ENV(dynarec_x87double)?NEON_CACHE_ST_D:NEON_CACHE_ST_F); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, (BOX64ENV(dynarec_x87double)==1)?NEON_CACHE_ST_D:NEON_CACHE_ST_F); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(v1, ed, fixedaddress); if(!ST_IS_F(0)) { diff --git a/src/dynarec/arm64/dynarec_arm64_da.c b/src/dynarec/arm64/dynarec_arm64_da.c index 80127fc2..8189f43f 100644 --- a/src/dynarec/arm64/dynarec_arm64_da.c +++ b/src/dynarec/arm64/dynarec_arm64_da.c @@ -150,6 +150,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -164,6 +165,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -201,6 +203,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -215,6 +218,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -229,6 +233,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -243,6 +248,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5, x4); FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; diff --git a/src/dynarec/arm64/dynarec_arm64_dc.c b/src/dynarec/arm64/dynarec_arm64_dc.c index bab8cb60..3ffb8dbb 100644 --- a/src/dynarec/arm64/dynarec_arm64_dc.c +++ b/src/dynarec/arm64/dynarec_arm64_dc.c @@ -54,6 +54,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -75,6 +76,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -94,6 +96,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FCMPS(v1, v2); } else { FCMPD(v1, v2); + X87_CHECK_PRECISION(v1); } FCOM(x1, x2, x3); break; @@ -112,6 +115,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FCMPS(v1, v2); } else { FCMPD(v1, v2); + X87_CHECK_PRECISION(v1); } FCOM(x1, x2, x3); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -133,6 +137,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -154,6 +159,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -175,6 +181,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -196,6 +203,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -214,6 +222,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -226,6 +235,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -257,6 +267,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -269,6 +280,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -281,6 +293,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -293,6 +306,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2, x4); FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; diff --git a/src/dynarec/arm64/dynarec_arm64_de.c b/src/dynarec/arm64/dynarec_arm64_de.c index 9a29aebe..0c4122bd 100644 --- a/src/dynarec/arm64/dynarec_arm64_de.c +++ b/src/dynarec/arm64/dynarec_arm64_de.c @@ -54,6 +54,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -76,6 +77,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -130,6 +132,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -152,6 +155,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -174,6 +178,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -196,6 +201,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if(!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); @@ -216,6 +222,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); break; case 1: INST_NAME("FIMUL ST0, word[ED]"); @@ -227,6 +234,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); break; case 2: INST_NAME("FICOM ST0, word[ED]"); @@ -263,6 +271,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); break; case 5: INST_NAME("FISUBR ST0, word[ED]"); @@ -274,6 +283,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); break; case 6: INST_NAME("FIDIV ST0, word[ED]"); @@ -285,6 +295,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); break; case 7: INST_NAME("FIDIVR ST0, word[ED]"); @@ -296,6 +307,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin SXTL_32(v2, v2); SCVTFDD(v2, v2); FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); break; } return addr; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index e8aa70c2..69855502 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -766,7 +766,7 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE; #endif if(savereg==0) - savereg = 7; + savereg = x87pc; if(saveflags) { STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); } @@ -804,6 +804,9 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save if(saveflags) { LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); } + if(reg==x87pc && savereg!=x87pc && dyn->need_x87check) { + ARM64_CHECK_PRECISION(); // regen x87 mask + } //SET_NODF(); } @@ -813,7 +816,7 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc) #if STEP == 0 dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE; #endif - STPx_S7_preindex(x6, x7, xSP, -16); + STPx_S7_preindex(x6, x87pc, xSP, -16); STPx_S7_preindex(x4, x5, xSP, -16); STPx_S7_preindex(x2, x3, xSP, -16); STPx_S7_preindex(xEmu, x1, xSP, -16); // ARM64 stack needs to be 16byte aligned @@ -823,10 +826,10 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc) STPx_S7_offset(xRSI, xRDI, xEmu, offsetof(x64emu_t, regs[_SI])); STPx_S7_offset(xR8, xR9, xEmu, offsetof(x64emu_t, regs[_R8])); STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); - fpu_pushcache(dyn, ninst, x7, 0); + fpu_pushcache(dyn, ninst, x87pc, 0); - TABLE64(x7, (uintptr_t)fnc); - BLR(x7); + TABLE64(x87pc, (uintptr_t)fnc); + BLR(x87pc); LDPx_S7_postindex(xEmu, x1, xSP, 16); LDPx_S7_postindex(x2, x3, xSP, 16); LDPx_S7_postindex(x4, x5, xSP, 16); @@ -838,8 +841,8 @@ void call_i(dynarec_arm_t* dyn, int ninst, void* fnc) GO(R8, R9); #undef GO LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); - fpu_popcache(dyn, ninst, x7, 0); // savereg will not be used - LDPx_S7_postindex(x6, x7, xSP, 16); + fpu_popcache(dyn, ninst, x87pc, 0); // savereg will not be used + LDPx_S7_postindex(x6, x87pc, xSP, 16); //SET_NODF(); } @@ -859,12 +862,12 @@ void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w) if(abs(w)>1) { MESSAGE(LOG_DUMP, "Getting %d XMM args\n", abs(w)-1); for(int i=0; i<abs(w)-1; ++i) { - sse_get_reg(dyn, ninst, x7, i, w); + sse_get_reg(dyn, ninst, x3, i, w); } } if(w<0) { MESSAGE(LOG_DUMP, "Return in XMM0\n"); - sse_get_reg_empty(dyn, ninst, x7, 0); + sse_get_reg_empty(dyn, ninst, x3, 0); } // prepare regs for native call MOVx_REG(0, xRDI); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 2e152dfb..95681b3e 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -721,13 +721,13 @@ // CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 -#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0) +#define CALL(F, ret) call_c(dyn, ninst, F, x87pc, ret, 1, 0) // CALL_ will use x7 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2 -#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x7, ret, 1, reg) +#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x87pc, ret, 1, reg) // CALL_S will use x7 for the call address. Return value can be put in ret (unless ret is -1) // R0 will not be pushed/popd if ret is -2. Flags are not save/restored -#define CALL_S(F, ret) call_c(dyn, ninst, F, x7, ret, 0, 0) +#define CALL_S(F, ret) call_c(dyn, ninst, F, x87pc, ret, 0, 0) // CALL_ will use x7 for the call address. // All regs are saved, including scratch. This is use to call internal function that should not change state #define CALL_I(F) call_i(dyn, ninst, F) @@ -998,6 +998,21 @@ #define CALLRET_LOOP() NOP #endif +#ifndef ARM64_CHECK_PRECISION +#define ARM64_CHECK_PRECISION() \ + if(dyn->need_x87check) { \ + LDRH_U12(x87pc, xEmu, offsetof(x64emu_t, cw)); \ + UBFXw(x87pc, x87pc, 8, 2); \ + } +#endif +#ifndef X87_CHECK_PRECISION +#define X87_CHECK_PRECISION(A) \ + if(dyn->need_x87check) { \ + CBNZw(x87pc, 4+8); \ + FCVT_S_D(A, A); \ + FCVT_D_S(A, A); \ + } +#endif #define STORE_REG(A) STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define STP_REGS(A, B) STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) #define LDP_REGS(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index ec792acb..bdfa1785 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -72,3 +72,6 @@ // mark opcode as "unaligned" possible only if the current address is not marked as already unaligned #define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=is_addr_unaligned(A))) #define IF_ALIGNED(A) if(!(dyn->insts[ninst].unaligned=is_addr_unaligned(A))) + +#define ARM64_CHECK_PRECISION() +#define X87_CHECK_PRECISION(A) diff --git a/src/dynarec/arm64/dynarec_arm64_pass1.h b/src/dynarec/arm64/dynarec_arm64_pass1.h index f5ad6a7b..14a716cd 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass1.h +++ b/src/dynarec/arm64/dynarec_arm64_pass1.h @@ -22,3 +22,8 @@ dyn->insts[ninst].f_exit = dyn->f #define INST_NAME(name) + +#define ARM64_CHECK_PRECISION() +#define X87_CHECK_PRECISION(A) \ + if(dyn->need_x87check) \ + dyn->need_x87check=2 diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 1c5f7008..f397e949 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -171,6 +171,7 @@ typedef struct dynarec_arm_s { uint8_t always_test; uint8_t abort; // abort the creation of the block void* gdbjit_block; + uint32_t need_x87check; // needs x87 precision control check if non-null, or 0 if not } dynarec_arm_t; void add_next(dynarec_arm_t *dyn, uintptr_t addr); |