diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-03-13 17:37:22 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-03-13 10:37:22 +0100 |
| commit | 1793c2a18292d3f3a1c1ba0fcdcd738b43997a2e (patch) | |
| tree | 81200c7b95c92cba701b60f4789380ee215d5c36 /src | |
| parent | b72002703e707e86d712903f5278c4248fddf8a5 (diff) | |
| download | box64-1793c2a18292d3f3a1c1ba0fcdcd738b43997a2e.tar.gz box64-1793c2a18292d3f3a1c1ba0fcdcd738b43997a2e.zip | |
[RV64_DYNAREC] Fixed some x87 rounding cases for fastround=0 (#2437)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_d8.c | 24 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_d9.c | 30 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_da.c | 13 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_dc.c | 25 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_de.c | 15 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_df.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/rv64_emitter.h | 24 |
7 files changed, 114 insertions, 19 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_d8.c b/src/dynarec/rv64/dynarec_rv64_d8.c index 1af3cfa0..94503ea9 100644 --- a/src/dynarec/rv64/dynarec_rv64_d8.c +++ b/src/dynarec/rv64/dynarec_rv64_d8.c @@ -48,21 +48,25 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FADD ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xC8 ... 0xCF: INST_NAME("FMUL ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xD0 ... 0xD7: INST_NAME("FCOM ST0, STx"); @@ -89,41 +93,49 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FSUB ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xE8 ... 0xEF: INST_NAME("FSUBR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF0 ... 0xF7: INST_NAME("FDIV ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF8 ... 0xFF: INST_NAME("FDIVR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; default: @@ -134,12 +146,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni s0 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FADDS(v1, v1, s0); } else { FCVTDS(s0, s0); FADDD(v1, v1, s0); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, float[ED]"); @@ -147,12 +161,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni s0 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FMULS(v1, v1, s0); } else { FCVTDS(s0, s0); FMULD(v1, v1, s0); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, float[ED]"); @@ -187,12 +203,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni s0 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FSUBS(v1, v1, s0); } else { FCVTDS(s0, s0); FSUBD(v1, v1, s0); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, float[ED]"); @@ -200,12 +218,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni s0 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FSUBS(v1, s0, v1); } else { FCVTDS(s0, s0); FSUBD(v1, s0, v1); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, float[ED]"); @@ -213,12 +233,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni s0 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FDIVS(v1, v1, s0); } else { FCVTDS(s0, s0); FDIVD(v1, v1, s0); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, float[ED]"); @@ -226,12 +248,14 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni s0 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLW(s0, ed, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FDIVS(v1, s0, v1); } else { FCVTDS(s0, s0); FDIVD(v1, s0, v1); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; } } diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c index 02cf39a5..51321ab9 100644 --- a/src/dynarec/rv64/dynarec_rv64_d9.c +++ b/src/dynarec/rv64/dynarec_rv64_d9.c @@ -284,7 +284,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_ftan, -1, 0, 0); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + CALL_(native_ftan, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x3, s0); X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F); if (ST_IS_F(0)) { @@ -301,7 +303,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fpatan, -1, 0, 0); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + CALL_(native_fpatan, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x3, s0); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -361,11 +365,13 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0xFA: INST_NAME("FSQRT"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); if (ST_IS_F(0)) { FSQRTS(v1, v1); } else { FSQRTD(v1, v1); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xFB: INST_NAME("FSINCOS"); @@ -373,7 +379,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0); x87_forget(dyn, ninst, x1, x2, 1); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fsincos, -1, 0, 0); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + CALL_(native_fsincos, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x3, s0); break; case 0xFC: @@ -422,7 +430,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fscale, -1, 0, 0); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + CALL_(native_fscale, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x3, s0); break; case 0xFE: @@ -430,7 +440,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fsin, -1, 0, 0); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + CALL_(native_fsin, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x3, s0); break; case 0xFF: @@ -438,7 +450,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni MESSAGE(LOG_DUMP, "Need Optimization\n"); x87_forget(dyn, ninst, x1, x2, 0); s0 = x87_stackcount(dyn, ninst, x3); - CALL(native_fcos, -1, 0, 0); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); + CALL_(native_fcos, -1, BOX64ENV(dynarec_fastround) ? 0 : u8, 0, 0); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x3, s0); break; @@ -474,7 +488,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni s0 = v1; else { s0 = fpu_get_scratch(dyn); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); FCVTSD(s0, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); } addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); FSW(s0, ed, fixedaddress); @@ -484,7 +500,9 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F); addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); if (!ST_IS_F(0)) { + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2); FCVTSD(v1, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); } FSW(v1, ed, fixedaddress); X87_POP_OR_FAIL(dyn, ninst, x3); diff --git a/src/dynarec/rv64/dynarec_rv64_da.c b/src/dynarec/rv64/dynarec_rv64_da.c index b1d2042b..7609d877 100644 --- a/src/dynarec/rv64/dynarec_rv64_da.c +++ b/src/dynarec/rv64/dynarec_rv64_da.c @@ -28,6 +28,7 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uint8_t nextop = F8; int64_t j64; uint8_t ed; + uint8_t u8; uint8_t wback; int v1, v2; int d0; @@ -153,7 +154,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); LW(x1, ed, fixedaddress); FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FADDD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FIMUL ST0, Ed"); @@ -162,7 +165,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); LW(x1, ed, fixedaddress); FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FMULD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FICOM ST0, Ed"); @@ -190,7 +195,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); LW(x1, ed, fixedaddress); FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FSUBD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FISUBR ST0, Ed"); @@ -199,7 +206,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); LW(x1, ed, fixedaddress); FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FSUBD(v1, v2, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FIDIV ST0, Ed"); @@ -208,7 +217,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); LW(x1, ed, fixedaddress); FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FIDIVR ST0, Ed"); @@ -217,7 +228,9 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); LW(x1, ed, fixedaddress); FCVTDW(v2, x1, RD_RNE); // i32 -> double + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v2, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; } } diff --git a/src/dynarec/rv64/dynarec_rv64_dc.c b/src/dynarec/rv64/dynarec_rv64_dc.c index 955438ff..309abde8 100644 --- a/src/dynarec/rv64/dynarec_rv64_dc.c +++ b/src/dynarec/rv64/dynarec_rv64_dc.c @@ -31,6 +31,7 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uint8_t nextop = F8; uint8_t wback; + uint8_t u8; int64_t fixedaddress; int unscaled; int v1, v2; @@ -43,21 +44,25 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FADD STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xC8 ... 0xCF: INST_NAME("FMUL STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xD0 ... 0xD7: INST_NAME("FCOM ST0, STx"); // yep @@ -84,41 +89,49 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FSUBR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xE8 ... 0xEF: INST_NAME("FSUB STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF0 ... 0xF7: INST_NAME("FDIVR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 0xF8 ... 0xFF: INST_NAME("FDIV STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; default: switch ((nextop >> 3) & 7) { @@ -128,7 +141,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni v2 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FADDD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, double[ED]"); @@ -136,7 +151,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni v2 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FMULD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, double[ED]"); @@ -161,7 +178,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni v2 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FSUBD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, double[ED]"); @@ -169,7 +188,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni v2 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FSUBD(v1, v2, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, double[ED]"); @@ -177,7 +198,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni v2 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v1, v2); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, double[ED]"); @@ -185,7 +208,9 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni v2 = fpu_get_scratch(dyn); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0); FLD(v2, wback, fixedaddress); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); FDIVD(v1, v2, v1); + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); break; } } diff --git a/src/dynarec/rv64/dynarec_rv64_de.c b/src/dynarec/rv64/dynarec_rv64_de.c index 5613a3eb..91a7b0c5 100644 --- a/src/dynarec/rv64/dynarec_rv64_de.c +++ b/src/dynarec/rv64/dynarec_rv64_de.c @@ -31,6 +31,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni uint8_t nextop = F8; uint8_t wback; + uint8_t u8; int64_t fixedaddress; int v1, v2; @@ -49,11 +50,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FADDP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xC8: @@ -67,11 +70,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FMULP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xD0: @@ -90,6 +95,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCOMD(v1, v2, x1, x2, x3, x4, x5); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xD9: @@ -101,6 +107,7 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else { FCOMD(v1, v2, x1, x2, x3, x4, x5); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -115,11 +122,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FSUBRP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xE8: @@ -133,11 +142,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FSUBP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xF0: @@ -151,11 +162,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FDIVRP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xF8: @@ -169,11 +182,13 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("FDIVP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7)); + if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5); if (ST_IS_F(0)) { FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); } + if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xD8: diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c index 9d277713..cbb75923 100644 --- a/src/dynarec/rv64/dynarec_rv64_df.c +++ b/src/dynarec/rv64/dynarec_rv64_df.c @@ -242,7 +242,7 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni ADD(x5, x5, x4); SD(x1, x5, 8); // ll } - FCVTDL(v1, x1, RD_RTZ); + FCVTDL(v1, x1, RD_DYN); if (rex.is32bits) { FSD(v1, x5, 0); // ref } diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h index dbdccf24..31a6c290 100644 --- a/src/dynarec/rv64/rv64_emitter.h +++ b/src/dynarec/rv64/rv64_emitter.h @@ -672,11 +672,11 @@ // Convert from Single to signed 32bits (truncated) #define FCVTWS(rd, frs1, rm) EMIT(R_type(0b1100000, 0b00000, frs1, rm, rd, 0b1010011)) -#define FADDS(frd, frs1, frs2) EMIT(R_type(0b0000000, frs2, frs1, 0b000, frd, 0b1010011)) -#define FSUBS(frd, frs1, frs2) EMIT(R_type(0b0000100, frs2, frs1, 0b000, frd, 0b1010011)) -#define FMULS(frd, frs1, frs2) EMIT(R_type(0b0001000, frs2, frs1, 0b000, frd, 0b1010011)) -#define FDIVS(frd, frs1, frs2) EMIT(R_type(0b0001100, frs2, frs1, 0b000, frd, 0b1010011)) -#define FSQRTS(frd, frs1) EMIT(R_type(0b0101100, 0b00000, frs1, 0b000, frd, 0b1010011)) +#define FADDS(frd, frs1, frs2) EMIT(R_type(0b0000000, frs2, frs1, 0b111, frd, 0b1010011)) +#define FSUBS(frd, frs1, frs2) EMIT(R_type(0b0000100, frs2, frs1, 0b111, frd, 0b1010011)) +#define FMULS(frd, frs1, frs2) EMIT(R_type(0b0001000, frs2, frs1, 0b111, frd, 0b1010011)) +#define FDIVS(frd, frs1, frs2) EMIT(R_type(0b0001100, frs2, frs1, 0b111, frd, 0b1010011)) +#define FSQRTS(frd, frs1) EMIT(R_type(0b0101100, 0b00000, frs1, 0b111, frd, 0b1010011)) #define FMINS(frd, frs1, frs2) EMIT(R_type(0b0010100, frs2, frs1, 0b000, frd, 0b1010011)) #define FMAXS(frd, frs1, frs2) EMIT(R_type(0b0010100, frs2, frs1, 0b001, frd, 0b1010011)) @@ -703,9 +703,9 @@ // store double precision frs2 to rs1+imm12 #define FSD(frs2, rs1, imm12) EMIT(S_type(imm12, frs2, rs1, 0b011, 0b0100111)) // Convert Double frs1 to Single frd -#define FCVTSD(frd, frs1) EMIT(R_type(0b0100000, 0b00001, frs1, 0b000, frd, 0b1010011)) +#define FCVTSD(frd, frs1) EMIT(R_type(0b0100000, 0b00001, frs1, 0b111, frd, 0b1010011)) // Convert Single frs1 to Double frd -#define FCVTDS(frd, frs1) EMIT(R_type(0b0100001, 0b00000, frs1, 0b000, frd, 0b1010011)) +#define FCVTDS(frd, frs1) EMIT(R_type(0b0100001, 0b00000, frs1, 0b111, frd, 0b1010011)) // Convert from Double to signed 32bits #define FCVTWD(rd, frs1, rm) EMIT(R_type(0b1100001, 0b00000, frs1, rm, rd, 0b1010011)) // Convert from Double to unsigned 32bits @@ -729,11 +729,11 @@ #define FLTD(rd, frs1, frs2) EMIT(R_type(0b1010001, frs2, frs1, 0b001, rd, 0b1010011)) #define FLED(rd, frs1, frs2) EMIT(R_type(0b1010001, frs2, frs1, 0b000, rd, 0b1010011)) -#define FADDD(frd, frs1, frs2) EMIT(R_type(0b0000001, frs2, frs1, 0b000, frd, 0b1010011)) -#define FSUBD(frd, frs1, frs2) EMIT(R_type(0b0000101, frs2, frs1, 0b000, frd, 0b1010011)) -#define FMULD(frd, frs1, frs2) EMIT(R_type(0b0001001, frs2, frs1, 0b000, frd, 0b1010011)) -#define FDIVD(frd, frs1, frs2) EMIT(R_type(0b0001101, frs2, frs1, 0b000, frd, 0b1010011)) -#define FSQRTD(frd, frs1) EMIT(R_type(0b0101101, 0b00000, frs1, 0b000, frd, 0b1010011)) +#define FADDD(frd, frs1, frs2) EMIT(R_type(0b0000001, frs2, frs1, 0b111, frd, 0b1010011)) +#define FSUBD(frd, frs1, frs2) EMIT(R_type(0b0000101, frs2, frs1, 0b111, frd, 0b1010011)) +#define FMULD(frd, frs1, frs2) EMIT(R_type(0b0001001, frs2, frs1, 0b111, frd, 0b1010011)) +#define FDIVD(frd, frs1, frs2) EMIT(R_type(0b0001101, frs2, frs1, 0b111, frd, 0b1010011)) +#define FSQRTD(frd, frs1) EMIT(R_type(0b0101101, 0b00000, frs1, 0b111, frd, 0b1010011)) #define FMIND(frd, frs1, frs2) EMIT(R_type(0b0010101, frs2, frs1, 0b000, frd, 0b1010011)) #define FMAXD(frd, frs1, frs2) EMIT(R_type(0b0010101, frs2, frs1, 0b001, frd, 0b1010011)) |