diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-04-21 16:19:44 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-04-21 10:19:44 +0200 |
| commit | e7b4c79d12969566fb72a7499c693a1ded3db25e (patch) | |
| tree | 1deffb87c1670870f78ec8e49cf327bba3dafefa /src | |
| parent | 7cdfa187c5eaa6f97588bfe9be6f69290eacdfdb (diff) | |
| download | box64-e7b4c79d12969566fb72a7499c693a1ded3db25e.tar.gz box64-e7b4c79d12969566fb72a7499c693a1ded3db25e.zip | |
[RV64_DYNAREC] Added X87DOUBLE=2 support (#2553)
Diffstat (limited to 'src')
22 files changed, 88 insertions, 24 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index cde9b851..dbb4538e 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -1474,7 +1474,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n BFCw(xFlags, F_PF, 1); } } - ARM64_CHECK_PRECISION(); // to regen x87 if it has been used + NATIVE_RESTORE_X87PC(); } else { SETFLAGS(X_ALL, SF_SET_DF); if(gd>7) // no need to reflect cache as xmm0-xmm7 will be saved before the function call anyway diff --git a/src/dynarec/arm64/dynarec_arm64_db.c b/src/dynarec/arm64/dynarec_arm64_db.c index 1cda1140..3a835593 100644 --- a/src/dynarec/arm64/dynarec_arm64_db.c +++ b/src/dynarec/arm64/dynarec_arm64_db.c @@ -134,7 +134,7 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization (FNINIT)\n"); x87_purgecache(dyn, ninst, 0, x1, x2, x3); CALL(reset_fpu, -1); - ARM64_CHECK_PRECISION(); + NATIVE_RESTORE_X87PC(); break; case 0xE8: case 0xE9: diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c index 68a430da..35a442be 100644 --- a/src/dynarec/arm64/dynarec_arm64_dd.c +++ b/src/dynarec/arm64/dynarec_arm64_dd.c @@ -206,7 +206,7 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(ed!=x1) {MOVx_REG(x1, ed);} CALL(native_fsave, -1); CALL(reset_fpu, -1); - ARM64_CHECK_PRECISION(); + NATIVE_RESTORE_X87PC(); break; case 7: INST_NAME("FNSTSW m2byte"); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 6a67b4e9..c844e430 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -805,7 +805,7 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); } if(savereg!=x87pc && dyn->need_x87check) { - ARM64_CHECK_PRECISION(); // regen x87 mask + NATIVE_RESTORE_X87PC(); } //SET_NODF(); } @@ -892,7 +892,7 @@ void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w) fpu_popcache(dyn, ninst, x3, 1); LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); - ARM64_CHECK_PRECISION(); // restore x87pc if needed + NATIVE_RESTORE_X87PC(); //SET_NODF(); } diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 95681b3e..f135cc91 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -998,8 +998,8 @@ #define CALLRET_LOOP() NOP #endif -#ifndef ARM64_CHECK_PRECISION -#define ARM64_CHECK_PRECISION() \ +#ifndef NATIVE_RESTORE_X87PC +#define NATIVE_RESTORE_X87PC() \ if(dyn->need_x87check) { \ LDRH_U12(x87pc, xEmu, offsetof(x64emu_t, cw)); \ UBFXw(x87pc, x87pc, 8, 2); \ diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index bdfa1785..5307e0e3 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -73,5 +73,5 @@ #define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=is_addr_unaligned(A))) #define IF_ALIGNED(A) if(!(dyn->insts[ninst].unaligned=is_addr_unaligned(A))) -#define ARM64_CHECK_PRECISION() +#define NATIVE_RESTORE_X87PC() #define X87_CHECK_PRECISION(A) diff --git a/src/dynarec/arm64/dynarec_arm64_pass1.h b/src/dynarec/arm64/dynarec_arm64_pass1.h index 14a716cd..aaf27385 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass1.h +++ b/src/dynarec/arm64/dynarec_arm64_pass1.h @@ -21,9 +21,11 @@ dyn->insts[ninst].n = dyn->n; \ dyn->insts[ninst].f_exit = dyn->f -#define INST_NAME(name) +#define INST_NAME(name) -#define ARM64_CHECK_PRECISION() -#define X87_CHECK_PRECISION(A) \ - if(dyn->need_x87check) \ - dyn->need_x87check=2 +#define NATIVE_RESTORE_X87PC() +#define X87_CHECK_PRECISION(A) \ + do { \ + if (dyn->need_x87check) \ + dyn->need_x87check = 2; \ + } while (0) diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index c65a0682..c337523e 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -35,7 +35,6 @@ extern uint32_t arm64_crc(void* p, uint32_t len); #define ARCH_NOP 0b11010101000000110010000000011111 #define ARCH_UDF 0xcafe -#define ARCH_PRECISION() ARM64_CHECK_PRECISION() #elif defined(LA64) #define instruction_native_t instruction_la64_t @@ -63,6 +62,9 @@ extern uint32_t arm64_crc(void* p, uint32_t len); #define ARCH_ADJUST(A, B, C, D) {} #define STOP_NATIVE_FLAGS(A, B) {} #define ARCH_UNALIGNED(A, B) 0 + +// NYI +#define NATIVE_RESTORE_X87PC() #elif defined(RV64) #define instruction_native_t instruction_rv64_t diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index 2c4aa72b..431ee644 100644 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -636,11 +636,9 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit CancelBlock64(0); return NULL; } - #ifdef ARCH_PRECISION if(BOX64ENV(dynarec_x87double)==2) { helper.need_x87check = 1; } - #endif // basic checks if(!helper.size) { dynarec_log(LOG_INFO, "Warning, null-sized dynarec block (%p)\n", (void*)addr); @@ -773,13 +771,10 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit CancelBlock64(0); return NULL; } - #ifdef ARCH_PRECISION if(BOX64ENV(dynarec_x87double)==2) { if(helper.need_x87check==1) helper.need_x87check = 0; } - #endif - // pass 2, instruction size helper.callrets = static_callrets; native_pass2(&helper, addr, alternate, is32bits, inst_max); diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index 5613ad57..31de0080 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -83,11 +83,9 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int break; } #endif - #ifdef ARCH_PRECISION if(!ninst && dyn->need_x87check) { - ARCH_PRECISION(); + NATIVE_RESTORE_X87PC(); } - #endif fpu_propagate_stack(dyn, ninst); ip = addr; if (reset_n!=-1) { diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h index 0246007e..e2dea342 100644 --- a/src/dynarec/la64/dynarec_la64_private.h +++ b/src/dynarec/la64/dynarec_la64_private.h @@ -147,6 +147,7 @@ typedef struct dynarec_la64_s { uint8_t always_test; uint8_t abort; void* gdbjit_block; + uint32_t need_x87check; // x87 low precision check } dynarec_la64_t; void add_next(dynarec_la64_t *dyn, uintptr_t addr); diff --git a/src/dynarec/rv64/dynarec_rv64_d8.c b/src/dynarec/rv64/dynarec_rv64_d8.c index d58b029b..e5497973 100644 --- a/src/dynarec/rv64/dynarec_rv64_d8.c +++ b/src/dynarec/rv64/dynarec_rv64_d8.c @@ -52,6 +52,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -64,6 +65,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -97,6 +99,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -109,6 +112,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -121,6 +125,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -133,6 +138,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -154,6 +160,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FADDD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -169,6 +176,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FMULD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -211,6 +219,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FSUBD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -226,6 +235,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FSUBD(v1, s0, v1); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -241,6 +251,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FDIVD(v1, v1, s0); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -256,6 +267,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCVTDS(s0, s0); FDIVD(v1, s0, v1); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c index 315d35f2..bcdf3854 100644 --- a/src/dynarec/rv64/dynarec_rv64_d9.c +++ b/src/dynarec/rv64/dynarec_rv64_d9.c @@ -92,6 +92,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FNEGS(v1, v1); } else { FNEGD(v1, v1); + X87_CHECK_PRECISION(v1); } break; case 0xE1: @@ -101,6 +102,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FABSS(v1, v1); } else { FABSD(v1, v1); + X87_CHECK_PRECISION(v1); } break; @@ -349,6 +351,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSQRTS(v1, v1); } else { FSQRTD(v1, v1); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; diff --git a/src/dynarec/rv64/dynarec_rv64_da.c b/src/dynarec/rv64/dynarec_rv64_da.c index 690b364d..72921282 100644 --- a/src/dynarec/rv64/dynarec_rv64_da.c +++ b/src/dynarec/rv64/dynarec_rv64_da.c @@ -118,6 +118,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FCVTDW(v2, x1, RD_RNE); // i32 -> double if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 1: @@ -129,6 +130,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FCVTDW(v2, x1, RD_RNE); // i32 -> double if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 2: @@ -159,6 +161,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FCVTDW(v2, x1, RD_RNE); // i32 -> double if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 5: @@ -170,6 +173,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FCVTDW(v2, x1, RD_RNE); // i32 -> double if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 6: @@ -181,6 +185,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FCVTDW(v2, x1, RD_RNE); // i32 -> double if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 7: @@ -192,6 +197,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FCVTDW(v2, x1, RD_RNE); // i32 -> double if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; } diff --git a/src/dynarec/rv64/dynarec_rv64_dc.c b/src/dynarec/rv64/dynarec_rv64_dc.c index 98332fe8..c73e0393 100644 --- a/src/dynarec/rv64/dynarec_rv64_dc.c +++ b/src/dynarec/rv64/dynarec_rv64_dc.c @@ -48,6 +48,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -60,6 +61,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -93,6 +95,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -105,6 +108,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -117,6 +121,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; @@ -129,6 +134,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; diff --git a/src/dynarec/rv64/dynarec_rv64_de.c b/src/dynarec/rv64/dynarec_rv64_de.c index 0807417d..6932d3e1 100644 --- a/src/dynarec/rv64/dynarec_rv64_de.c +++ b/src/dynarec/rv64/dynarec_rv64_de.c @@ -47,6 +47,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -60,6 +61,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -96,6 +98,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -109,6 +112,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -122,6 +126,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); @@ -135,6 +140,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); + X87_CHECK_PRECISION(v1); } if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 2f0a58ce..1b0d1553 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -779,7 +779,7 @@ void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav { MAYUSE(fnc); if (savereg == 0) - savereg = x6; + savereg = x87pc; if (saveflags) { FLAGS_ADJUST_TO11(xFlags, xFlags, reg); SD(xFlags, xEmu, offsetof(x64emu_t, eflags)); @@ -834,6 +834,8 @@ void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav LD(xFlags, xEmu, offsetof(x64emu_t, eflags)); FLAGS_ADJUST_FROM11(xFlags, xFlags, reg); } + if (savereg != x87pc && dyn->need_x87check) + NATIVE_RESTORE_X87PC(); // SET_NODF(); CLEARIP(); } @@ -867,6 +869,7 @@ void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w) vector_vsetvli(dyn, ninst, x3, dyn->vector_sew, VECTOR_LMUL1, 1); fpu_popcache(dyn, ninst, x3, 1); + NATIVE_RESTORE_X87PC(); // SET_NODF(); } diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index a519b71a..a7886065 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -840,6 +840,23 @@ #define IF_ALIGNED(A) if (!is_addr_unaligned(A)) #endif +#ifndef NATIVE_RESTORE_X87PC +#define NATIVE_RESTORE_X87PC() \ + if (dyn->need_x87check) { \ + LD(x87pc, xEmu, offsetof(x64emu_t, cw)); \ + SRLI(x87pc, x87pc, 8); \ + ANDI(x87pc, x87pc, 0b11); \ + } +#endif +#ifndef X87_CHECK_PRECISION +#define X87_CHECK_PRECISION(A) \ + if (dyn->need_x87check) { \ + BNEZ(x87pc, 4 + 8); \ + FCVTSD(A, A); \ + FCVTDS(A, A); \ + } +#endif + #define STORE_REG(A) SD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define LOAD_REG(A) LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index 2f77b610..14b3fcc8 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -100,3 +100,6 @@ // mark opcode as "unaligned" possible only if the current address is not marked as already unaligned #define IF_UNALIGNED(A) if ((dyn->insts[ninst].unaligned = (is_addr_unaligned(A) ? 0 : 1))) #define IF_ALIGNED(A) if ((dyn->insts[ninst].unaligned = (is_addr_unaligned(A) ? 1 : 0))) + +#define NATIVE_RESTORE_X87PC() +#define X87_CHECK_PRECISION(A) diff --git a/src/dynarec/rv64/dynarec_rv64_pass1.h b/src/dynarec/rv64/dynarec_rv64_pass1.h index 6d154fb9..7a72378b 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass1.h +++ b/src/dynarec/rv64/dynarec_rv64_pass1.h @@ -24,3 +24,11 @@ dyn->insts[ninst].vector_sew_exit = dyn->vector_sew; #define INST_NAME(name) + + +#define NATIVE_RESTORE_X87PC() +#define X87_CHECK_PRECISION(A) \ + do { \ + if (dyn->need_x87check) \ + dyn->need_x87check = 2; \ + } while (0) diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 7519d99c..c6907f3b 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -187,6 +187,7 @@ typedef struct dynarec_rv64_s { uint8_t inst_vl; // vl inside current instruction, for vsetvli elimination uint8_t inst_vlmul; // vlmul inside current instruction void* gdbjit_block; + uint32_t need_x87check; // x87 low precision check } dynarec_rv64_t; // v0 is hardware wired to vector mask register, which should be always reserved diff --git a/src/dynarec/rv64/rv64_mapping.h b/src/dynarec/rv64/rv64_mapping.h index 93ecdf23..53e71f3c 100644 --- a/src/dynarec/rv64/rv64_mapping.h +++ b/src/dynarec/rv64/rv64_mapping.h @@ -10,7 +10,7 @@ x1 ra native ra Return address N/A x2 sp native sp Stack pointer N/A Callee x3 gp native gp Global pointer N/A — x4 tp native tp Thread pointer N/A — -x5 t0 - Temporary Unused Caller +x5 t0 - Temporary X87 Precision Control Caller x6 t1 x1 Temporary Scratch Caller x7 t2 x2 Temporary Scratch Caller x8 s0/fp RBP Saved register/frame pointer - Callee @@ -75,6 +75,7 @@ x31 t6 x6 Temporary Scratch #define xEmu 25 #define x7 17 +#define x87pc 5 #define xRA 1 #define xSP 2 |