diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-04-22 17:07:34 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-04-22 11:07:34 +0200 |
| commit | 39a66e25eecb9e0c449bfc868ff781ae6ee16ca1 (patch) | |
| tree | a427de48f928ed8cc4cd47382c27e34a4ff5af38 /src | |
| parent | 574d6f9dabbca1d7d7dbcf739f4a9a394fafacde (diff) | |
| download | box64-39a66e25eecb9e0c449bfc868ff781ae6ee16ca1.tar.gz box64-39a66e25eecb9e0c449bfc868ff781ae6ee16ca1.zip | |
[RV64_DYNAREC] Better handling of x87double=2 (#2560)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_d8.c | 12 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_d9.c | 1 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_da.c | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_dc.c | 8 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_de.c | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.c | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 3 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass0.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass1.h | 3 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 3 |
10 files changed, 47 insertions, 1 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_d8.c b/src/dynarec/rv64/dynarec_rv64_d8.c index e5497973..83de0eec 100644 --- a/src/dynarec/rv64/dynarec_rv64_d8.c +++ b/src/dynarec/rv64/dynarec_rv64_d8.c @@ -54,6 +54,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FADDD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xC8 ... 0xCF: @@ -67,6 +68,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMULD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xD0 ... 0xD7: @@ -101,6 +103,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xE8 ... 0xEF: @@ -114,6 +117,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBD(v1, v2, v1); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF0 ... 0xF7: @@ -127,6 +131,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF8 ... 0xFF: @@ -140,6 +145,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVD(v1, v2, v1); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; default: @@ -162,6 +168,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FADDD(v1, v1, s0); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 1: @@ -178,6 +185,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMULD(v1, v1, s0); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 2: @@ -221,6 +229,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBD(v1, v1, s0); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 5: @@ -237,6 +246,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBD(v1, s0, v1); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 6: @@ -253,6 +263,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVD(v1, v1, s0); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 7: @@ -269,6 +280,7 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVD(v1, s0, v1); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; } diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c index 39dafc19..5e2e0e32 100644 --- a/src/dynarec/rv64/dynarec_rv64_d9.c +++ b/src/dynarec/rv64/dynarec_rv64_d9.c @@ -351,6 +351,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSQRTD(v1, v1); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xFB: diff --git a/src/dynarec/rv64/dynarec_rv64_da.c b/src/dynarec/rv64/dynarec_rv64_da.c index 72921282..884edb88 100644 --- a/src/dynarec/rv64/dynarec_rv64_da.c +++ b/src/dynarec/rv64/dynarec_rv64_da.c @@ -119,6 +119,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FADDD(v1, v1, v2); X87_CHECK_PRECISION(v1); + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 1: @@ -131,6 +132,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FMULD(v1, v1, v2); X87_CHECK_PRECISION(v1); + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 2: @@ -162,6 +164,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FSUBD(v1, v1, v2); X87_CHECK_PRECISION(v1); + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 5: @@ -174,6 +177,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FSUBD(v1, v2, v1); X87_CHECK_PRECISION(v1); + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 6: @@ -186,6 +190,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v1, v2); X87_CHECK_PRECISION(v1); + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 7: @@ -198,6 +203,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v2, v1); X87_CHECK_PRECISION(v1); + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; } diff --git a/src/dynarec/rv64/dynarec_rv64_dc.c b/src/dynarec/rv64/dynarec_rv64_dc.c index c73e0393..80a7f082 100644 --- a/src/dynarec/rv64/dynarec_rv64_dc.c +++ b/src/dynarec/rv64/dynarec_rv64_dc.c @@ -50,6 +50,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FADDD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xC8 ... 0xCF: @@ -63,6 +64,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMULD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xD0 ... 0xD7: @@ -97,6 +99,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBD(v1, v2, v1); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xE8 ... 0xEF: @@ -110,6 +113,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF0 ... 0xF7: @@ -123,6 +127,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVD(v1, v2, v1); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF8 ... 0xFF: @@ -136,6 +141,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; default: @@ -219,6 +225,8 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FLD(v2, wback, fixedaddress); if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v2, v1); + X87_CHECK_PRECISION(v1); + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; } diff --git a/src/dynarec/rv64/dynarec_rv64_de.c b/src/dynarec/rv64/dynarec_rv64_de.c index 6932d3e1..50b29a5b 100644 --- a/src/dynarec/rv64/dynarec_rv64_de.c +++ b/src/dynarec/rv64/dynarec_rv64_de.c @@ -49,6 +49,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FADDD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -63,6 +64,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMULD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -100,6 +102,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBD(v1, v2, v1); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -114,6 +117,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FSUBD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -128,6 +132,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVD(v1, v2, v1); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -142,6 +147,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FDIVD(v1, v1, v2); X87_CHECK_PRECISION(v1); } + MARK_X87PC(); if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index 8e289d5d..43ba8987 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -220,6 +220,7 @@ static void extcache_promote_double_combined(dynarec_rv64_t* dyn, int ninst, int // if(BOX64DRENV(dynarec_dump)) dynarec_log(LOG_NONE, "extcache_promote_double_combined, ninst=%d combined%c %d i=%d (stack:%d/%d)\n", ninst, (a == dyn->insts[ninst].e.combined2)?'2':'1', a ,i, dyn->insts[ninst].e.stack_push, -dyn->insts[ninst].e.stack_pop); if (i >= 0) { dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D; + if (dyn->insts[ninst].x87precision) dyn->need_x87check = 2; if (!dyn->insts[ninst].e.barrier) extcache_promote_double_internal(dyn, ninst - 1, maxinst, a - dyn->insts[ninst].e.stack_push); // go forward is combined is not pop'd @@ -239,6 +240,7 @@ static void extcache_promote_double_internal(dynarec_rv64_t* dyn, int ninst, int // if(BOX64DRENV(dynarec_dump)) dynarec_log(LOG_NONE, "extcache_promote_double_internal, ninst=%d, a=%d st=%d:%d, i=%d\n", ninst, a, dyn->insts[ninst].e.stack, dyn->insts[ninst].e.stack_next, i); if (i < 0) return; dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D; + if (dyn->insts[ninst].x87precision) dyn->need_x87check = 2; // check combined propagation too if (dyn->insts[ninst].e.combined1 || dyn->insts[ninst].e.combined2) { if (dyn->insts[ninst].e.swapped) { @@ -274,6 +276,7 @@ static void extcache_promote_double_forward(dynarec_rv64_t* dyn, int ninst, int // if(BOX64DRENV(dynarec_dump)) dynarec_log(LOG_NONE, "extcache_promote_double_forward, ninst=%d, a=%d st=%d:%d(%d/%d), i=%d\n", ninst, a, dyn->insts[ninst].e.stack, dyn->insts[ninst].e.stack_next, dyn->insts[ninst].e.stack_push, -dyn->insts[ninst].e.stack_pop, i); if (i < 0) return; dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D; + if (dyn->insts[ninst].x87precision) dyn->need_x87check = 2; // check combined propagation too if ((dyn->insts[ninst].e.combined1 || dyn->insts[ninst].e.combined2) && !dyn->insts[ninst].e.swapped) { // if(BOX64DRENV(dynarec_dump)) dynarec_log(LOG_NONE, "extcache_promote_double_forward, ninst=%d combined %d/%d vs %d with st %d\n", ninst, dyn->insts[ninst].e.combined1 ,dyn->insts[ninst].e.combined2, a, dyn->insts[ninst].e.stack); @@ -296,6 +299,7 @@ void extcache_promote_double(dynarec_rv64_t* dyn, int ninst, int a) if (i < 0) return; dyn->e.extcache[i].t = EXT_CACHE_ST_D; dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D; + if (dyn->insts[ninst].x87precision) dyn->need_x87check = 2; // check combined propagation too if (dyn->e.combined1 || dyn->e.combined2) { if (dyn->e.swapped) { diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index a7886065..9e9e9f42 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -856,6 +856,9 @@ FCVTDS(A, A); \ } #endif +#ifndef MARK_X87PC +#define MARK_X87PC() +#endif #define STORE_REG(A) SD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define LOAD_REG(A) LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index 14b3fcc8..b1f2302c 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -103,3 +103,5 @@ #define NATIVE_RESTORE_X87PC() #define X87_CHECK_PRECISION(A) +#define MARK_X87PC() \ + if (dyn->need_x87check) dyn->insts[ninst].x87precision = 1 diff --git a/src/dynarec/rv64/dynarec_rv64_pass1.h b/src/dynarec/rv64/dynarec_rv64_pass1.h index 7a72378b..fd7d2433 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass1.h +++ b/src/dynarec/rv64/dynarec_rv64_pass1.h @@ -32,3 +32,6 @@ if (dyn->need_x87check) \ dyn->need_x87check = 2; \ } while (0) + +#define MARK_X87PC() \ + if (dyn->need_x87check) dyn->insts[ninst].x87precision = 1 diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index c6907f3b..54f40820 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -134,7 +134,8 @@ typedef struct instruction_rv64_s { uint8_t nat_flags_carry:1; uint8_t nat_flags_sign:1; uint8_t nat_flags_needsign:1; - uint8_t unaligned:1; // this opcode can be re-generated for unaligned special case + uint8_t unaligned:1; // this opcode can be re-generated for unaligned special case + uint8_t x87precision:1; // this opcode can handle x87pc uint8_t nat_flags_op1; uint8_t nat_flags_op2; flagcache_t f_exit; // flags status at end of instruction |