diff options
| author | Hagb (Junyu Guo 郭俊余) <hagb_green@qq.com> | 2025-01-08 22:34:15 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-01-08 15:34:15 +0100 |
| commit | 653a67c8addcb980ce10a27765e582c972f8d69c (patch) | |
| tree | 98ac1036f34b2810a1f41a6126827445f7774fd4 | |
| parent | b99893d1c3506524103eebc2e4497a8be14cd6d0 (diff) | |
| download | box64-653a67c8addcb980ce10a27765e582c972f8d69c.tar.gz box64-653a67c8addcb980ce10a27765e582c972f8d69c.zip | |
Port rounding of some x87 instructions from Box86 (#2242)
* Port rounding of some x87 instructions from Box86 Ported from https://github.com/ptitSeb/box86/pull/951. The original pull request and this commit also contain some improvements on precision of `F2XM1` and `FYL2XP1`. * Run fpu_rounding test with dynarec only for ARM64 They have been implemented on dynarec only for ARM64.
| -rw-r--r-- | CMakeLists.txt | 11 | ||||
| -rw-r--r-- | docs/USAGE.md | 4 | ||||
| -rw-r--r-- | docs/box64.pod | 4 | ||||
| -rw-r--r-- | src/core.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_d8.c | 49 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_d9.c | 48 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_da.c | 25 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_dc.c | 49 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_de.c | 25 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_functions.c | 16 | ||||
| -rw-r--r-- | src/emu/x64rund8.c | 8 | ||||
| -rw-r--r-- | src/emu/x64rund9.c | 47 | ||||
| -rw-r--r-- | src/emu/x64runda.c | 7 | ||||
| -rw-r--r-- | src/emu/x64rundb.c | 8 | ||||
| -rw-r--r-- | src/emu/x64rundc.c | 7 | ||||
| -rw-r--r-- | src/emu/x64runde.c | 8 | ||||
| -rw-r--r-- | src/emu/x87emu_setround.h | 29 | ||||
| -rw-r--r-- | tests/ref31.txt | 810 | ||||
| -rw-r--r-- | tests/roundtest.h | 114 | ||||
| -rwxr-xr-x | tests/test31 | bin | 0 -> 81928 bytes | |||
| -rw-r--r-- | tests/test31.c | 114 |
21 files changed, 1361 insertions, 24 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index f15e1921..b59e7bfc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1497,6 +1497,17 @@ add_test(avx_intrinsics ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${B set_tests_properties(avx_intrinsics PROPERTIES ENVIRONMENT "BOX64_DYNAREC_FASTNAN=0;BOX64_DYNAREC_FASTROUND=0;BOX64_AVX=2") +add_test(fpu_rounding ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64} + -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test31 -D TEST_OUTPUT=tmpfile31.txt + -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref31.txt + -P ${CMAKE_SOURCE_DIR}/runTest.cmake ) + +if(ARM_DYNAREC) +set_tests_properties(fpu_rounding PROPERTIES ENVIRONMENT "BOX64_DYNAREC_FASTROUND=0;BOX64_DYNAREC_TEST=1") +else() +set_tests_properties(fpu_rounding PROPERTIES ENVIRONMENT "BOX64_DYNAREC=0") +endif() + else() add_test(bootSyscall ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX64} diff --git a/docs/USAGE.md b/docs/USAGE.md index e74c11ad..03eef42e 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -368,8 +368,8 @@ Generate x86 -NAN. #### BOX64_DYNAREC_FASTROUND * Generate precise x86 rounding. - * 0 : Generate float/double -> int rounding like on x86. - * 1 : Do not do anything special with edge case Rounding, to go as fast as possible (no INF/NAN/Overflow -> MIN_INT conversion). (default, faster) + * 0 : Generate float/double -> int rounding and use current rounding mode for float/double computation like on x86. + * 1 : Do not do anything special with edge case Rounding, to go as fast as possible (no INF/NAN/Overflow -> MIN_INT conversion, and no non-default rounding modes). (default, faster) * 2 : Everything from 1 plus also fast round of double->float (not taking into account current rounding mode). #### BOX64_DYNAREC_SAFEFLAGS * diff --git a/docs/box64.pod b/docs/box64.pod index e12f7c0a..59813110 100644 --- a/docs/box64.pod +++ b/docs/box64.pod @@ -335,8 +335,8 @@ Enable/Disable generation of -NAN Enable/Disable generation of precise x86 rounding - * 0 : Generate float/double -> int rounding like on x86 - * 1 : Don't do anything special with edge case Rounding, to go as fast as possible (no INF/NAN/Overflow -> MIN_INT conversion) (default, faster) + * 0 : Generate float/double -> int rounding and use current rounding mode for float/double computation like on x86 + * 1 : Don't do anything special with edge case Rounding, to go as fast as possible (no INF/NAN/Overflow -> MIN_INT conversion, and no non-default rounding modes) (default, faster) =item B<BOX64_DYNAREC_SAFEFLAGS>=I<0|1|2> diff --git a/src/core.c b/src/core.c index a39fa795..c7dc95c3 100644 --- a/src/core.c +++ b/src/core.c @@ -847,7 +847,7 @@ void LoadLogEnv() box64_dynarec_fastround = p[0]-'0'; } if(!box64_dynarec_fastround) - printf_log(LOG_INFO, "Dynarec will try to generate x86 precise IEEE->int rounding\n"); + printf_log(LOG_INFO, "Dynarec will try to generate x86 precise IEEE->int rounding and and set rounding mode for computation\n"); else if(box64_dynarec_fastround==2) printf_log(LOG_INFO, "Dynarec will generate x86 very imprecise double->float rounding\n"); } diff --git a/src/dynarec/arm64/dynarec_arm64_d8.c b/src/dynarec/arm64/dynarec_arm64_d8.c index e8f002a4..826cad7d 100644 --- a/src/dynarec/arm64/dynarec_arm64_d8.c +++ b/src/dynarec/arm64/dynarec_arm64_d8.c @@ -29,6 +29,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin uint8_t nextop = F8; uint8_t ed; + uint8_t u8; int64_t fixedaddress; int unscaled; int v1, v2; @@ -51,11 +52,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FADD ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xC8: case 0xC9: @@ -68,11 +73,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FMUL ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xD0: case 0xD1: @@ -122,11 +131,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUB ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xE8: case 0xE9: @@ -139,11 +152,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUBR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF0: case 0xF1: @@ -156,11 +173,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIV ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF8: case 0xF9: @@ -173,11 +194,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIVR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: DEFAULT; @@ -190,12 +215,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FADDS(v1, v1, s0); } else { FCVT_D_S(s0, s0); FADDD(v1, v1, s0); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, float[ED]"); @@ -203,12 +232,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FMULS(v1, v1, s0); } else { FCVT_D_S(s0, s0); FMULD(v1, v1, s0); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, float[ED]"); @@ -245,12 +278,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FSUBS(v1, v1, s0); } else { FCVT_D_S(s0, s0); FSUBD(v1, v1, s0); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, float[ED]"); @@ -258,12 +295,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FSUBS(v1, s0, v1); } else { FCVT_D_S(s0, s0); FSUBD(v1, s0, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, float[ED]"); @@ -271,12 +312,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FDIVS(v1, v1, s0); } else { FCVT_D_S(s0, s0); FDIVD(v1, v1, s0); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, float[ED]"); @@ -284,12 +329,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FDIVS(v1, s0, v1); } else { FCVT_D_S(s0, s0); FDIVD(v1, s0, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: DEFAULT; diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c index 378070a0..c1961a01 100644 --- a/src/dynarec/arm64/dynarec_arm64_d9.c +++ b/src/dynarec/arm64/dynarec_arm64_d9.c @@ -291,8 +291,12 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); - CALL(native_ftan, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_ftan, -1, box64_dynarec_fastround ? 0 : u8); x87_unstackcount(dyn, ninst, x1, i1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); if(PK(0)==0xdd && PK(1)==0xd8) { MESSAGE(LOG_DUMP, "Optimized next DD D8 fstp st0, st0, not emitting 1\n"); u8 = F8; @@ -312,7 +316,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(native_fpatan, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_fpatan, -1, box64_dynarec_fastround ? 0 : u8); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x1, i1); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -418,11 +426,15 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xFA: INST_NAME("FSQRT"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSQRTS(v1, v1); } else { FSQRTD(v1, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xFB: INST_NAME("FSINCOS"); @@ -430,7 +442,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0); i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 1); - CALL(native_fsincos, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_fsincos, -1, box64_dynarec_fastround ? 0 : u8); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x1, i1); break; case 0xFC: @@ -457,7 +473,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(native_fscale, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_fscale, -1, box64_dynarec_fastround ? 0 : u8); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x1, i1); break; case 0xFE: @@ -465,7 +485,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); - CALL(native_fsin, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_fsin, -1, box64_dynarec_fastround ? 0 : u8); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x1, i1); break; case 0xFF: @@ -473,7 +497,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); - CALL(native_fcos, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_fcos, -1, box64_dynarec_fastround ? 0 : u8); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x1, i1); break; default: @@ -497,7 +525,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = v1; else { s0 = fpu_get_scratch(dyn, ninst); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FCVT_S_D(s0, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); } addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VST32(s0, ed, fixedaddress); @@ -507,7 +539,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); if(!ST_IS_F(0)) { + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FCVT_S_D(v1, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); } VST32(v1, ed, fixedaddress); X87_POP_OR_FAIL(dyn, ninst, x3); diff --git a/src/dynarec/arm64/dynarec_arm64_da.c b/src/dynarec/arm64/dynarec_arm64_da.c index 52965b3f..6e4bb528 100644 --- a/src/dynarec/arm64/dynarec_arm64_da.c +++ b/src/dynarec/arm64/dynarec_arm64_da.c @@ -29,6 +29,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin int64_t j64; uint8_t ed; uint8_t wback; + uint8_t u8; int v1, v2; int d0; int s0; @@ -148,7 +149,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FADDD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FIMUL ST0, Ed"); @@ -158,7 +163,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FMULD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FICOM ST0, Ed"); @@ -191,7 +200,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FSUBD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FISUBR ST0, Ed"); @@ -201,7 +214,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FSUBD(v1, v2, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FIDIV ST0, Ed"); @@ -211,7 +228,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FDIVD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FIDIVR ST0, Ed"); @@ -221,7 +242,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FDIVD(v1, v2, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; } return addr; diff --git a/src/dynarec/arm64/dynarec_arm64_dc.c b/src/dynarec/arm64/dynarec_arm64_dc.c index ee5b2c62..2fbac9d8 100644 --- a/src/dynarec/arm64/dynarec_arm64_dc.c +++ b/src/dynarec/arm64/dynarec_arm64_dc.c @@ -29,6 +29,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin uint8_t nextop = F8; uint8_t wback; + uint8_t u8; int64_t fixedaddress; int unscaled; int v1, v2; @@ -49,11 +50,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FADD STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xC8: case 0xC9: @@ -66,11 +71,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FMUL STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xD0: case 0xD1: @@ -120,11 +129,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUBR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xE8: case 0xE9: @@ -137,11 +150,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUB STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF0: case 0xF1: @@ -154,11 +171,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIVR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF8: case 0xF9: @@ -171,11 +192,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIV STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: DEFAULT; @@ -188,7 +213,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FADDD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, double[ED]"); @@ -196,7 +225,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FMULD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, double[ED]"); @@ -223,7 +256,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FSUBD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, double[ED]"); @@ -231,7 +268,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FSUBD(v1, v2, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, double[ED]"); @@ -239,7 +280,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FDIVD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, double[ED]"); @@ -247,7 +292,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FDIVD(v1, v2, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; } return addr; diff --git a/src/dynarec/arm64/dynarec_arm64_de.c b/src/dynarec/arm64/dynarec_arm64_de.c index 7dbbb210..8dcc16c2 100644 --- a/src/dynarec/arm64/dynarec_arm64_de.c +++ b/src/dynarec/arm64/dynarec_arm64_de.c @@ -29,6 +29,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin uint8_t nextop = F8; uint8_t wback; + uint8_t u8; int64_t fixedaddress; int unscaled; int v1, v2; @@ -49,11 +50,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FADDP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xC8: @@ -67,11 +72,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FMULP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xD0: @@ -117,11 +126,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUBRP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xE8: @@ -135,11 +148,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUBP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xF0: @@ -153,11 +170,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIVRP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xF8: @@ -171,11 +192,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIVP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; default: diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c index 683d8256..18082d82 100644 --- a/src/dynarec/dynarec_native_functions.c +++ b/src/dynarec/dynarec_native_functions.c @@ -42,7 +42,7 @@ void native_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n) void native_f2xm1(x64emu_t* emu) { - ST0.d = exp2(ST0.d) - 1.0; + ST0.d = expm1(LN2 * ST0.d); } void native_fyl2x(x64emu_t* emu) { @@ -50,11 +50,14 @@ void native_fyl2x(x64emu_t* emu) } void native_ftan(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON + // seems that tan of glib doesn't follow the rounding direction mode ST0.d = tan(ST0.d); emu->sw.f.F87_C2 = 0; } void native_fpatan(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON ST1.d = atan2(ST1.d, ST0.d); } void native_fxtract(x64emu_t* emu) @@ -97,10 +100,12 @@ void native_fprem(x64emu_t* emu) } void native_fyl2xp1(x64emu_t* emu) { - ST(1).d = log2(ST0.d + 1.0)*ST(1).d; + ST(1).d = log1p(ST0.d)*ST(1).d/LN2; } void native_fsincos(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON + // seems that sincos of glib doesn't follow the rounding direction mode sincos(ST1.d, &ST1.d, &ST0.d); emu->sw.f.F87_C2 = 0; } @@ -110,16 +115,21 @@ void native_frndint(x64emu_t* emu) } void native_fscale(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON if(ST0.d!=0.0) - ST0.d *= exp2(trunc(ST1.d)); + ST0.d = ldexp(ST0.d, trunc(ST1.d)); } void native_fsin(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON + // seems that sin of glib doesn't follow the rounding direction mode ST0.d = sin(ST0.d); emu->sw.f.F87_C2 = 0; } void native_fcos(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON + // seems that cos of glib doesn't follow the rounding direction mode ST0.d = cos(ST0.d); emu->sw.f.F87_C2 = 0; } diff --git a/src/emu/x64rund8.c b/src/emu/x64rund8.c index 000eab21..fd5ccf66 100644 --- a/src/emu/x64rund8.c +++ b/src/emu/x64rund8.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include <fenv.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -17,6 +18,7 @@ #include "x64primop.h" #include "x64trace.h" #include "x87emu_private.h" +#include "x87emu_setround.h" #include "box64context.h" #include "bridge.h" @@ -34,6 +36,7 @@ uintptr_t RunD8(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs) #ifdef TEST_INTERPRETER x64emu_t*emu = test->emu; #endif + int oldround = fpu_setround(emu); nextop = F8; if(MODREG) @@ -121,6 +124,7 @@ uintptr_t RunD8(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs) ST0.d = ST(nextop&7).d / ST0.d; break; default: + fesetround(oldround); return 0; } else switch((nextop>>3)&7) { @@ -190,7 +194,9 @@ uintptr_t RunD8(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs) ST0.d = *(float*)ED / ST0.d; break; default: + fesetround(oldround); return 0; } - return addr; + fesetround(oldround); + return addr; } \ No newline at end of file diff --git a/src/emu/x64rund9.c b/src/emu/x64rund9.c index 648eb0a7..c7b8d308 100644 --- a/src/emu/x64rund9.c +++ b/src/emu/x64rund9.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include <fenv.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -17,6 +18,7 @@ #include "x64primop.h" #include "x64trace.h" #include "x87emu_private.h" +#include "x87emu_setround.h" #include "box64context.h" #include "bridge.h" @@ -33,6 +35,7 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs) uint64_t ll; float f; reg64_t *oped; + int oldround; #ifdef TEST_INTERPRETER x64emu_t*emu = test->emu; #endif @@ -123,20 +126,30 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs) break; case 0xF0: /* F2XM1 */ - ST0.d = exp2(ST0.d) - 1.0; + if (ST0.d == 0) + break; + // Using the expm1 instead of exp2(ST0)-1 can avoid losing precision much, + // expecially when ST0 is close to zero (which loses the precise when -1). + // printf("%a, %a\n", LN2 * ST0.d, expm1(LN2 * ST0.d)); + ST0.d = expm1(LN2 * ST0.d); + // = 2^ST0 - 1 + error. (in math) break; case 0xF1: /* FYL2X */ ST(1).d *= log2(ST0.d); fpu_do_pop(emu); break; case 0xF2: /* FPTAN */ + oldround = fpu_setround(emu); ST0.d = tan(ST0.d); + fesetround(oldround); fpu_do_push(emu); ST0.d = 1.0; emu->sw.f.F87_C2 = 0; break; case 0xF3: /* FPATAN */ + oldround = fpu_setround(emu); ST1.d = atan2(ST1.d, ST0.d); + fesetround(oldround); fpu_do_pop(emu); break; case 0xF4: /* FXTRACT */ @@ -209,15 +222,22 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs) emu->top=(emu->top+1)&7; // this will probably break a few things break; case 0xF9: /* FYL2XP1 */ - ST(1).d *= log2(ST0.d + 1.0); + // Using the log1p instead of log2(ST0+1) can avoid losing precision much, + // expecially when ST0 is close to zero (which loses the precise when +1). + ST(1).d = (ST(1).d * log1p(ST0.d)) / M_LN2; + // = ST1 * log2(ST0 + 1) + error. (in math) fpu_do_pop(emu); break; case 0xFA: /* FSQRT */ + oldround = fpu_setround(emu); ST0.d = sqrt(ST0.d); + fesetround(oldround); break; case 0xFB: /* FSINCOS */ fpu_do_push(emu); + oldround = fpu_setround(emu); sincos(ST1.d, &ST1.d, &ST0.d); + fesetround(oldround); emu->sw.f.F87_C2 = 0; break; case 0xFC: /* FRNDINT */ @@ -225,15 +245,28 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs) break; case 0xFD: /* FSCALE */ // this could probably be done by just altering the exponant part of the float... - if(ST0.d!=0.0) - ST0.d *= exp2(trunc(ST1.d)); + if (ST1.d > INT32_MAX) + tmp32s = INT32_MAX; + else if (ST1.d < INT32_MIN) + tmp32s = INT32_MIN; + else + tmp32s = ST1.d; + if(ST0.d!=0.0) { + oldround = fpu_setround(emu); + ST0.d = ldexp(ST0.d, tmp32s); + fesetround(oldround); + } break; case 0xFE: /* FSIN */ + oldround = fpu_setround(emu); ST0.d = sin(ST0.d); + fesetround(oldround); emu->sw.f.F87_C2 = 0; break; case 0xFF: /* FCOS */ + oldround = fpu_setround(emu); ST0.d = cos(ST0.d); + fesetround(oldround); emu->sw.f.F87_C2 = 0; break; @@ -257,7 +290,9 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs) } else { GETE4(0); } + oldround = fpu_setround(emu); *(float*)ED = ST0.d; + fesetround(oldround); break; case 3: /* FSTP Ed, ST0 */ if(offs) { @@ -265,7 +300,9 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs) } else { GETE4(0); } + oldround = fpu_setround(emu); *(float*)ED = ST0.d; + fesetround(oldround); fpu_do_pop(emu); break; case 4: /* FLDENV m */ @@ -309,5 +346,5 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs) default: return 0; } - return addr; + return addr; } \ No newline at end of file diff --git a/src/emu/x64runda.c b/src/emu/x64runda.c index aed775f9..056937e7 100644 --- a/src/emu/x64runda.c +++ b/src/emu/x64runda.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include <fenv.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -17,6 +18,7 @@ #include "x64primop.h" #include "x64trace.h" #include "x87emu_private.h" +#include "x87emu_setround.h" #include "box64context.h" #include "bridge.h" @@ -94,7 +96,8 @@ uintptr_t RunDA(x64emu_t *emu, rex_t rex, uintptr_t addr) default: return 0; - } else + } else { + int oldround = fpu_setround(emu); switch((nextop>>3)&7) { case 0: /* FIADD ST0, Ed int */ GETE4(0); @@ -130,5 +133,7 @@ uintptr_t RunDA(x64emu_t *emu, rex_t rex, uintptr_t addr) ST0.d = (double)ED->sdword[0] / ST0.d; break; } + fesetround(oldround); + } return addr; } \ No newline at end of file diff --git a/src/emu/x64rundb.c b/src/emu/x64rundb.c index 82ea43ff..b6ba0ff1 100644 --- a/src/emu/x64rundb.c +++ b/src/emu/x64rundb.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include <fenv.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -17,6 +18,7 @@ #include "x64primop.h" #include "x64trace.h" #include "x87emu_private.h" +#include "x87emu_setround.h" #include "box64context.h" #include "bridge.h" @@ -35,6 +37,7 @@ uintptr_t RunDB(x64emu_t *emu, rex_t rex, uintptr_t addr) x64emu_t*emu = test->emu; #endif + int oldround = fpu_setround(emu); nextop = F8; if(MODREG) switch(nextop) { @@ -128,6 +131,7 @@ uintptr_t RunDB(x64emu_t *emu, rex_t rex, uintptr_t addr) break; default: + fesetround(oldround); return 0; } else switch((nextop>>3)&7) { @@ -179,7 +183,9 @@ uintptr_t RunDB(x64emu_t *emu, rex_t rex, uintptr_t addr) fpu_do_pop(emu); break; default: + fesetround(oldround); return 0; } - return addr; + fesetround(oldround); + return addr; } diff --git a/src/emu/x64rundc.c b/src/emu/x64rundc.c index 6d9ff07b..c9ffe738 100644 --- a/src/emu/x64rundc.c +++ b/src/emu/x64rundc.c @@ -17,6 +17,7 @@ #include "x64primop.h" #include "x64trace.h" #include "x87emu_private.h" +#include "x87emu_setround.h" #include "box64context.h" #include "bridge.h" @@ -34,6 +35,7 @@ uintptr_t RunDC(x64emu_t *emu, rex_t rex, uintptr_t addr) x64emu_t*emu = test->emu; #endif + int oldround = fpu_setround(emu); nextop = F8; if(MODREG) switch(nextop) { @@ -119,6 +121,7 @@ uintptr_t RunDC(x64emu_t *emu, rex_t rex, uintptr_t addr) ST(nextop&7).d /= ST0.d; break; default: + fesetround(oldround); return 0; } else { GETE8(0); @@ -149,8 +152,10 @@ uintptr_t RunDC(x64emu_t *emu, rex_t rex, uintptr_t addr) ST0.d = *(double*)ED / ST0.d; break; default: + fesetround(oldround); return 0; } } - return addr; + fesetround(oldround); + return addr; } diff --git a/src/emu/x64runde.c b/src/emu/x64runde.c index 4cb5e4a2..8b082aa5 100644 --- a/src/emu/x64runde.c +++ b/src/emu/x64runde.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include <fenv.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -17,6 +18,7 @@ #include "x64primop.h" #include "x64trace.h" #include "x87emu_private.h" +#include "x87emu_setround.h" #include "box64context.h" #include "bridge.h" @@ -34,6 +36,7 @@ uintptr_t RunDE(x64emu_t *emu, rex_t rex, uintptr_t addr) x64emu_t*emu = test->emu; #endif + int oldround = fpu_setround(emu); nextop = F8; if(MODREG) switch(nextop) { @@ -126,6 +129,7 @@ uintptr_t RunDE(x64emu_t *emu, rex_t rex, uintptr_t addr) break; default: + fesetround(oldround); return 0; } else switch((nextop>>3)&7) { @@ -154,7 +158,9 @@ uintptr_t RunDE(x64emu_t *emu, rex_t rex, uintptr_t addr) ST0.d = (double)EW->sword[0] / ST0.d; break; default: + fesetround(oldround); return 0; } - return addr; + fesetround(oldround); + return addr; } \ No newline at end of file diff --git a/src/emu/x87emu_setround.h b/src/emu/x87emu_setround.h new file mode 100644 index 00000000..4791c3ea --- /dev/null +++ b/src/emu/x87emu_setround.h @@ -0,0 +1,29 @@ +#ifndef __SETROUND_H__ +#define __SETROUND_H__ +#pragma STDC FENV_ACCESS ON +#include <fenv.h> +#include <stdint.h> +#include "x64emu.h" +#include "x64emu_private.h" +// set the rounding mode to the emulator's one, and return the old one +static inline int fpu_setround(x64emu_t* emu) { + int ret = fegetround(); + int rounding_direction; + switch (emu->cw.f.C87_RD) { + case ROUND_Nearest: + rounding_direction = FE_TONEAREST; + break; + case ROUND_Down: + rounding_direction = FE_DOWNWARD; + break; + case ROUND_Up: + rounding_direction = FE_UPWARD; + break; + case ROUND_Chop: + rounding_direction = FE_TOWARDZERO; + break; + } + fesetround(rounding_direction); + return ret; +} +#endif \ No newline at end of file diff --git a/tests/ref31.txt b/tests/ref31.txt new file mode 100644 index 00000000..358877e1 --- /dev/null +++ b/tests/ref31.txt @@ -0,0 +1,810 @@ +Testing: s = (0x1.123456789abcp2) -> (double)s +FE_TONEAREST 0x1.123456789abcp+2 +FE_DOWNWARD 0x1.123456789abcp+2 +FE_UPWARD 0x1.123456789abcp+2 +FE_TOWARDZERO 0x1.123456789abcp+2 + +Testing: s = (0x1.123456789abcp2) -> (float)s +FE_TONEAREST 0x1.123456p+2 +FE_DOWNWARD 0x1.123456p+2 +FE_UPWARD 0x1.123458p+2 +FE_TOWARDZERO 0x1.123456p+2 + +Testing: s = (-(0x1.123456789abcp2)) -> (double)s +FE_TONEAREST -0x1.123456789abcp+2 +FE_DOWNWARD -0x1.123456789abcp+2 +FE_UPWARD -0x1.123456789abcp+2 +FE_TOWARDZERO -0x1.123456789abcp+2 + +Testing: s = (-(0x1.123456789abcp2)) -> (float)s +FE_TONEAREST -0x1.123456p+2 +FE_DOWNWARD -0x1.123458p+2 +FE_UPWARD -0x1.123456p+2 +FE_TOWARDZERO -0x1.123456p+2 + +Testing: d = (0x1.123456789abcp512) -> (float)d +FE_TONEAREST inf +FE_DOWNWARD 0x1.fffffep+127 +FE_UPWARD inf +FE_TOWARDZERO 0x1.fffffep+127 + +Testing: s = (0x1.123456789abcp29) -> (double)s +FE_TONEAREST 0x1.123456789abcp+29 +FE_DOWNWARD 0x1.123456789abcp+29 +FE_UPWARD 0x1.123456789abcp+29 +FE_TOWARDZERO 0x1.123456789abcp+29 + +Testing: s = (0x1.123456789abcp29) -> (float)s +FE_TONEAREST 0x1.123456p+29 +FE_DOWNWARD 0x1.123456p+29 +FE_UPWARD 0x1.123458p+29 +FE_TOWARDZERO 0x1.123456p+29 + +Testing: s = (0x1.123456789abcp29) -> (int16_t)s +FE_TONEAREST -32768 +FE_DOWNWARD -32768 +FE_UPWARD -32768 +FE_TOWARDZERO -32768 + +Testing: s = (0x1.123456789abcp29) -> (int8_t)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (0x1.123456789abcp29) -> (unsigned short)s +FE_TONEAREST 35535 +FE_DOWNWARD 35535 +FE_UPWARD 35535 +FE_TOWARDZERO 35535 + +Testing: s = (0x1.123456789abcp29) -> (unsigned char)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (-(0x1.123456789abcp29)) -> (double)s +FE_TONEAREST -0x1.123456789abcp+29 +FE_DOWNWARD -0x1.123456789abcp+29 +FE_UPWARD -0x1.123456789abcp+29 +FE_TOWARDZERO -0x1.123456789abcp+29 + +Testing: s = (-(0x1.123456789abcp29)) -> (float)s +FE_TONEAREST -0x1.123456p+29 +FE_DOWNWARD -0x1.123458p+29 +FE_UPWARD -0x1.123456p+29 +FE_TOWARDZERO -0x1.123456p+29 + +Testing: d = (-0x1.123456789abcp30) -> (int32_t)d +FE_TONEAREST -1150096798 +FE_DOWNWARD -1150096798 +FE_UPWARD -1150096798 +FE_TOWARDZERO -1150096798 + +Testing: d = (-0x1.123456789abcp62) -> (int64_t)d +FE_TONEAREST -4939628135293321216 +FE_DOWNWARD -4939628135293321216 +FE_UPWARD -4939628135293321216 +FE_TOWARDZERO -4939628135293321216 + +Testing: s = (0x1.123456789abcp2f) -> (double)s +FE_TONEAREST 0x1.123456p+2 +FE_DOWNWARD 0x1.123456p+2 +FE_UPWARD 0x1.123458p+2 +FE_TOWARDZERO 0x1.123456p+2 + +Testing: s = (0x1.123456789abcp2f) -> (float)s +FE_TONEAREST 0x1.123456p+2 +FE_DOWNWARD 0x1.123456p+2 +FE_UPWARD 0x1.123458p+2 +FE_TOWARDZERO 0x1.123456p+2 + +Testing: s = (-(0x1.123456789abcp2f)) -> (double)s +FE_TONEAREST -0x1.123456p+2 +FE_DOWNWARD -0x1.123458p+2 +FE_UPWARD -0x1.123456p+2 +FE_TOWARDZERO -0x1.123456p+2 + +Testing: s = (-(0x1.123456789abcp2f)) -> (float)s +FE_TONEAREST -0x1.123456p+2 +FE_DOWNWARD -0x1.123458p+2 +FE_UPWARD -0x1.123456p+2 +FE_TOWARDZERO -0x1.123456p+2 + +Testing: s = (0x1.123456789abcp29f) -> (double)s +FE_TONEAREST 0x1.123456p+29 +FE_DOWNWARD 0x1.123456p+29 +FE_UPWARD 0x1.123458p+29 +FE_TOWARDZERO 0x1.123456p+29 + +Testing: s = (0x1.123456789abcp29f) -> (float)s +FE_TONEAREST 0x1.123456p+29 +FE_DOWNWARD 0x1.123456p+29 +FE_UPWARD 0x1.123458p+29 +FE_TOWARDZERO 0x1.123456p+29 + +Testing: s = (0x1.123456789abcp29f) -> (int16_t)s +FE_TONEAREST -32768 +FE_DOWNWARD -32768 +FE_UPWARD -32768 +FE_TOWARDZERO -32768 + +Testing: s = (0x1.123456789abcp29f) -> (int8_t)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (0x1.123456789abcp29f) -> (unsigned short)s +FE_TONEAREST 35520 +FE_DOWNWARD 35520 +FE_UPWARD 35584 +FE_TOWARDZERO 35520 + +Testing: s = (0x1.123456789abcp29f) -> (unsigned char)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (-(0x1.123456789abcp29f)) -> (double)s +FE_TONEAREST -0x1.123456p+29 +FE_DOWNWARD -0x1.123458p+29 +FE_UPWARD -0x1.123456p+29 +FE_TOWARDZERO -0x1.123456p+29 + +Testing: s = (-(0x1.123456789abcp29f)) -> (float)s +FE_TONEAREST -0x1.123456p+29 +FE_DOWNWARD -0x1.123458p+29 +FE_UPWARD -0x1.123456p+29 +FE_TOWARDZERO -0x1.123456p+29 + +Testing: f = -0x1.123456789abcp30f -> (int32_t)f +FE_TONEAREST -1150096768 +FE_DOWNWARD -1150096896 +FE_UPWARD -1150096768 +FE_TOWARDZERO -1150096768 + +Testing: d = -0x1.1234567p0 -> (double)((int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: d = 0x1.9234567p0 -> (double)((int)d) +FE_TONEAREST 0x1p+0 +FE_DOWNWARD 0x1p+0 +FE_UPWARD 0x1p+0 +FE_TOWARDZERO 0x1p+0 + +Testing: d = -0x1.9234567p0 -> (double)((int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: d = 0x1.1234567p0 -> (double)((long int)d) +FE_TONEAREST 0x1p+0 +FE_DOWNWARD 0x1p+0 +FE_UPWARD 0x1p+0 +FE_TOWARDZERO 0x1p+0 + +Testing: d = -0x1.1234567p0 -> (double)((long int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: d = 0x1.9234567p0 -> (double)((long int)d) +FE_TONEAREST 0x1p+0 +FE_DOWNWARD 0x1p+0 +FE_UPWARD 0x1p+0 +FE_TOWARDZERO 0x1p+0 + +Testing: d = -0x1.9234567p0 -> (double)((long int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST 0x1.2bc55ef8922bp+1 +FE_DOWNWARD 0x1.2bc55ef8922bp+1 +FE_UPWARD 0x1.2bc55ef8922bp+1 +FE_TOWARDZERO 0x1.2bc55ef8922bp+1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST 0x1.12233445566p-4 +FE_DOWNWARD 0x1.12233445566p-4 +FE_UPWARD 0x1.12233445566p-4 +FE_TOWARDZERO 0x1.12233445566p-4 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST -0x1.12233445566p-4 +FE_DOWNWARD -0x1.12233445566p-4 +FE_UPWARD -0x1.12233445566p-4 +FE_TOWARDZERO -0x1.12233445566p-4 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST -0x1.2bc55ef8922bp+1 +FE_DOWNWARD -0x1.2bc55ef8922bp+1 +FE_UPWARD -0x1.2bc55ef8922bp+1 +FE_TOWARDZERO -0x1.2bc55ef8922bp+1 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd3ddf57ep+0 +FE_UPWARD 0x1.5ebd428e6d5cp+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd404804dp+0 +FE_UPWARD -0x1.5ebd4025c068p+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd4025c068p+0 +FE_UPWARD -0x1.5ebd404804dp+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd428e6d5cp+0 +FE_UPWARD 0x1.5ebd3ddf57ep+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd3e8f5c27dp+0 +FE_UPWARD 0x1.5ebd40f809191p+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd40f809191p+0 +FE_UPWARD -0x1.5ebd3e8f5c27dp+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd3e8f5c27ep+0 +FE_UPWARD -0x1.5ebd40f80919p+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd40f80919p+0 +FE_UPWARD 0x1.5ebd3e8f5c27ep+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd402bc44c4p+0 +FE_DOWNWARD 0x1.5ebd402bc44c4p+0 +FE_UPWARD 0x1.5ebd402bc44c5p+0 +FE_TOWARDZERO 0x1.5ebd402bc44c4p+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd402bc44c4p+0 +FE_DOWNWARD -0x1.5ebd402bc44c5p+0 +FE_UPWARD -0x1.5ebd402bc44c4p+0 +FE_TOWARDZERO -0x1.5ebd402bc44c4p+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd402bc44c4p+0 +FE_DOWNWARD -0x1.5ebd402bc44c5p+0 +FE_UPWARD -0x1.5ebd402bc44c4p+0 +FE_TOWARDZERO -0x1.5ebd402bc44c4p+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd402bc44c4p+0 +FE_DOWNWARD 0x1.5ebd402bc44c4p+0 +FE_UPWARD 0x1.5ebd402bc44c5p+0 +FE_TOWARDZERO 0x1.5ebd402bc44c4p+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd3e8f5c27dp+0 +FE_UPWARD 0x1.5ebd40f809191p+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd40f809191p+0 +FE_UPWARD -0x1.5ebd3e8f5c27dp+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd3e8f5c27ep+0 +FE_UPWARD -0x1.5ebd40f80919p+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd40f80919p+0 +FE_UPWARD 0x1.5ebd3e8f5c27ep+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD 0x1.5ebd3f7bc003ap+0 +FE_UPWARD 0x1.5ebd41c2288e5p+0 +FE_TOWARDZERO 0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD -0x1.5ebd3f7bc003bp+0 +FE_UPWARD -0x1.5ebd41c2288e4p+0 +FE_TOWARDZERO -0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD -0x1.5ebd41c2288e5p+0 +FE_UPWARD -0x1.5ebd3f7bc003ap+0 +FE_TOWARDZERO -0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD 0x1.5ebd41c2288e4p+0 +FE_UPWARD 0x1.5ebd3f7bc003bp+0 +FE_TOWARDZERO 0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd3ddf57ep+0 +FE_UPWARD 0x1.5ebd428e6d5cp+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd404804dp+0 +FE_UPWARD -0x1.5ebd4025c068p+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd4025c068p+0 +FE_UPWARD -0x1.5ebd404804dp+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd428e6d5cp+0 +FE_UPWARD 0x1.5ebd3ddf57ep+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = (5)) -> d1 *d2 +FE_TONEAREST 0x1.6c0156ac0156p+2 +FE_DOWNWARD 0x1.6c0156ac0156p+2 +FE_UPWARD 0x1.6c0156ac0156p+2 +FE_TOWARDZERO 0x1.6c0156ac0156p+2 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (5)) -> d1 *d2 +FE_TONEAREST -0x1.6c0156ac0156p+2 +FE_DOWNWARD -0x1.6c0156ac0156p+2 +FE_UPWARD -0x1.6c0156ac0156p+2 +FE_TOWARDZERO -0x1.6c0156ac0156p+2 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(5)) -> d1 *d2 +FE_TONEAREST -0x1.6c0156ac0156p+2 +FE_DOWNWARD -0x1.6c0156ac0156p+2 +FE_UPWARD -0x1.6c0156ac0156p+2 +FE_TOWARDZERO -0x1.6c0156ac0156p+2 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(5)) -> d1 *d2 +FE_TONEAREST 0x1.6c0156ac0156p+2 +FE_DOWNWARD 0x1.6c0156ac0156p+2 +FE_UPWARD 0x1.6c0156ac0156p+2 +FE_TOWARDZERO 0x1.6c0156ac0156p+2 + +Testing: (d1 = (15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111111111102p+4 +FE_DOWNWARD 0x1.2111111111102p+4 +FE_UPWARD 0x1.2111111111102p+4 +FE_TOWARDZERO 0x1.2111111111102p+4 + +Testing: (d1 = -(15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111111111102p+4 +FE_DOWNWARD -0x1.2111111111102p+4 +FE_UPWARD -0x1.2111111111102p+4 +FE_TOWARDZERO -0x1.2111111111102p+4 + +Testing: (d1 = (15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111111111102p+4 +FE_DOWNWARD -0x1.2111111111102p+4 +FE_UPWARD -0x1.2111111111102p+4 +FE_TOWARDZERO -0x1.2111111111102p+4 + +Testing: (d1 = -(15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111111111102p+4 +FE_DOWNWARD 0x1.2111111111102p+4 +FE_UPWARD 0x1.2111111111102p+4 +FE_TOWARDZERO 0x1.2111111111102p+4 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (15)) -> d1 *d2 +FE_TONEAREST 0x1.110101ap+4 +FE_DOWNWARD 0x1.1100ffcp+4 +FE_UPWARD 0x1.110101ap+4 +FE_TOWARDZERO 0x1.1100ffcp+4 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (15)) -> d1 *d2 +FE_TONEAREST -0x1.110101ap+4 +FE_DOWNWARD -0x1.110101ap+4 +FE_UPWARD -0x1.1100ffcp+4 +FE_TOWARDZERO -0x1.1100ffcp+4 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(15)) -> d1 *d2 +FE_TONEAREST -0x1.110101ap+4 +FE_DOWNWARD -0x1.1100ffcp+4 +FE_UPWARD -0x1.110101ap+4 +FE_TOWARDZERO -0x1.1100ffcp+4 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(15)) -> d1 *d2 +FE_TONEAREST 0x1.110101ap+4 +FE_DOWNWARD 0x1.110101ap+4 +FE_UPWARD 0x1.1100ffcp+4 +FE_TOWARDZERO 0x1.1100ffcp+4 + +Testing: (d1 = (15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111108p+4 +FE_DOWNWARD 0x1.2111108p+4 +FE_UPWARD 0x1.2111126p+4 +FE_TOWARDZERO 0x1.2111108p+4 + +Testing: (d1 = -(15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111108p+4 +FE_DOWNWARD -0x1.2111108p+4 +FE_UPWARD -0x1.2111126p+4 +FE_TOWARDZERO -0x1.2111108p+4 + +Testing: (d1 = (15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111108p+4 +FE_DOWNWARD -0x1.2111126p+4 +FE_UPWARD -0x1.2111108p+4 +FE_TOWARDZERO -0x1.2111108p+4 + +Testing: (d1 = -(15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111108p+4 +FE_DOWNWARD 0x1.2111126p+4 +FE_UPWARD 0x1.2111108p+4 +FE_TOWARDZERO 0x1.2111108p+4 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca534ae61p-1 +FE_DOWNWARD 0x1.e38ca534ae61p-1 +FE_UPWARD 0x1.e38ca211bd4adp-1 +FE_TOWARDZERO 0x1.e38ca534ae61p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca534ae61p-1 +FE_DOWNWARD -0x1.e38ca534ae611p-1 +FE_UPWARD -0x1.e38ca211bd4acp-1 +FE_TOWARDZERO -0x1.e38ca534ae61p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca534ae61p-1 +FE_DOWNWARD -0x1.e38ca211bd4adp-1 +FE_UPWARD -0x1.e38ca534ae61p-1 +FE_TOWARDZERO -0x1.e38ca534ae61p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca534ae61p-1 +FE_DOWNWARD 0x1.e38ca211bd4acp-1 +FE_UPWARD 0x1.e38ca534ae611p-1 +FE_TOWARDZERO 0x1.e38ca534ae61p-1 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1p+2, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1p+1 +FE_DOWNWARD ST0 = 0x1p+1 +FE_UPWARD ST0 = 0x1p+1 +FE_TOWARDZERO ST0 = 0x1p+1 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1.0000000000001p+1, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1.6a09e667f3bcdp+0 +FE_DOWNWARD ST0 = 0x1.6a09e667f3bcdp+0 +FE_UPWARD ST0 = 0x1.6a09e667f3bcep+0 +FE_TOWARDZERO ST0 = 0x1.6a09e667f3bcdp+0 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1.123456789abcp+31, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1.76b0aac9e6a5p+15 +FE_DOWNWARD ST0 = 0x1.76b0aac9e6a4fp+15 +FE_UPWARD ST0 = 0x1.76b0aac9e6a5p+15 +FE_TOWARDZERO ST0 = 0x1.76b0aac9e6a4fp+15 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1.123456789abdp+31, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1.76b0aac9e6a5bp+15 +FE_DOWNWARD ST0 = 0x1.76b0aac9e6a5ap+15 +FE_UPWARD ST0 = 0x1.76b0aac9e6a5bp+15 +FE_TOWARDZERO ST0 = 0x1.76b0aac9e6a5ap+15 + diff --git a/tests/roundtest.h b/tests/roundtest.h new file mode 100644 index 00000000..320e22c9 --- /dev/null +++ b/tests/roundtest.h @@ -0,0 +1,114 @@ +#pragma STDC FENV_ACCESS ON +#include <assert.h> +#include <stdio.h> + +#ifdef USE_ASM_ROUNDING +int fesetround_(int rounding_direction) { + uint16_t old_cw; + __asm__("FNSTCW %0" : "=m"(old_cw)::); + uint16_t new_cw = (old_cw & ~0xc00) | rounding_direction; + __asm__("FLDCW %0" ::"m"(new_cw)); + return old_cw & 0xc00; +} +int fegetround_() { + uint16_t cw; + __asm__("FNSTCW %0" : "=m"(cw)::); + return cw & 0xc00; +} +#define fesetround fesetround_ +#define fegetround fegetround_ +#define FE_TONEAREST 0 +#define FE_DOWNWARD 0x400 +#define FE_UPWARD 0x800 +#define FE_TOWARDZERO 0xc00 +#else +#include <fenv.h> +#endif + +#define FE_TONEAREST_INDEX 0 +#define FE_DOWNWARD_INDEX 1 +#define FE_UPWARD_INDEX 2 +#define FE_TOWARDZERO_INDEX 3 +int FE_MODES[] = {FE_TONEAREST, FE_DOWNWARD, FE_UPWARD, FE_TOWARDZERO}; +char *FE_MODES_STR[] = { + "FE_TONEAREST", + "FE_DOWNWARD", + "FE_UPWARD", + "FE_TOWARDZERO", +}; + +void assert_round(double *array) { + assert(array[FE_DOWNWARD_INDEX] <= array[FE_TONEAREST_INDEX]); + assert(array[FE_TONEAREST_INDEX] <= array[FE_UPWARD_INDEX]); + if (array[FE_TOWARDZERO_INDEX] < 0) + assert(array[FE_TOWARDZERO_INDEX] == array[FE_UPWARD_INDEX]); + else if (array[FE_TOWARDZERO_INDEX] > 0) + assert(array[FE_TOWARDZERO_INDEX] == array[FE_DOWNWARD_INDEX]); + else if (array[FE_TOWARDZERO_INDEX] == 0) + assert(array[FE_TOWARDZERO_INDEX] == array[FE_UPWARD_INDEX] || + array[FE_TOWARDZERO_INDEX] == array[FE_DOWNWARD_INDEX]); +} + +#define TEST_(exec, expr, format) \ + do { \ + if (sizeof(#exec) == 1) \ + printf("Testing: %s\n", #expr); \ + else \ + printf("Testing: %s -> %s\n", #exec, #expr); \ + for (int i = 0; i < sizeof(FE_MODES) / sizeof(FE_MODES[0]); i++) { \ + fesetround(FE_MODES[i]); \ + exec; \ + printf("%-15s" format "\n", FE_MODES_STR[i], expr); \ + assert(FE_MODES[i] == fegetround()); \ + } \ + printf("\n"); \ + } while (0) + +#define TEST(exec, expr) TEST_(exec, expr, "%a") + +#if defined(i386) || defined(__i386__) || defined(__i386) || \ + defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +#define TEST_X87(instruction, st0, st1, deep_change) \ + do { \ + double _st0 = (st0), _st1 = (st1); \ + double array1[4], array2[4]; \ + double __st0, __st1; \ + printf("Testing X87 instruction: %s (ST0 = %a, ST1 = %a)\n", #instruction, \ + _st0, _st1); \ + for (int i = 0; i < sizeof(FE_MODES) / sizeof(FE_MODES[0]); i++) { \ + fesetround(FE_MODES[i]); \ + __st0 = _st0, __st1 = _st1; \ + switch (deep_change) { \ + case -1: /* the instruction pops */ \ + __asm__(instruction : "+t"(__st0) : "u"(__st1) : "st(1)"); \ + printf("%-15s ST0 = %a\n", FE_MODES_STR[i], __st0); \ + break; \ + case 0: \ + __asm__(instruction : "+t"(__st0) : "u"(__st1) :); \ + printf("%-15s ST0 = %a\n", FE_MODES_STR[i], __st0); \ + break; \ + case 1: /* the instruction pushes */ \ + __asm__(instruction : "+t"(__st0), "=u"(__st1)::); \ + printf("%-15s ST0 = %a, ST1 = %a\n", FE_MODES_STR[i], __st0, __st1); \ + array2[i] = __st1; \ + } \ + array1[i] = __st0; \ + assert(FE_MODES[i] == fegetround()); \ + } \ + if (deep_change == 1) \ + assert_round(array2); \ + assert_round(array1); \ + printf("\n"); \ + } while (0) +#else +#define TEST_X87(instruction, st0, st1, deep_change) \ + do { \ + double _st0 = (st0), _st1 = (st1); \ + printf("Cannot test X87 instruction: %s (ST0 = %a, ST1 = %a) because it " \ + "is not compiled to x86\n\n", \ + #instruction, _st0, _st1); \ + } while (0) +#endif + +#define TEST_X87_1(i, st0) TEST_X87(i, st0, 0.0, 0) +#define TEST_X87_2(i, st0, st1) TEST_X87(i, st0, st1, -1) diff --git a/tests/test31 b/tests/test31 new file mode 100755 index 00000000..432bc57f --- /dev/null +++ b/tests/test31 Binary files differdiff --git a/tests/test31.c b/tests/test31.c new file mode 100644 index 00000000..379ddec5 --- /dev/null +++ b/tests/test31.c @@ -0,0 +1,114 @@ +// Copy from Box86/tests/test26.c (Box64/tests32/test26.c) +// Build with `gcc -march=core2 -O0 test31.c -o test31 -std=c99 -masm=intel -mfpmath=387 -frounding-math` +#include <fenv.h> +#include <float.h> +#include <inttypes.h> +#include <stdint.h> +#define USE_ASM_ROUNDING +#include "roundtest.h" + +#define TEST_CONVERT_(stype, s_) \ + do { \ + stype s; \ + TEST_(s = (s_), (double)s, "%a"); \ + TEST_(s = (s_), (float)s, "%a"); \ + /* converting too large float to integer, the result is undefined, on both \ + * c99 and FISTP instruction */ \ + if (INT64_MIN <= s && INT64_MAX <= s) \ + TEST_(s = (s_), (int64_t)s, "%" PRId64); \ + if (INT32_MIN <= s && INT32_MAX <= s) \ + TEST_(s = (s_), (int32_t)s, "%" PRId32); \ + if (INT16_MIN <= s && INT16_MAX <= s) \ + TEST_(s = (s_), (int16_t)s, "%" PRId16); \ + if (INT8_MIN <= s && INT8_MAX <= s) \ + TEST_(s = (s_), (int8_t)s, "%" PRId8); \ + if (0 <= s && UINT64_MAX <= s) \ + TEST_(s = (s_), (uint64_t)s, "%" PRIu64); \ + if (0 <= s && UINT32_MAX <= s) \ + TEST_(s = (s_), (unsigned int)s, "%" PRIu32); \ + if (0 <= s && UINT16_MAX <= s) \ + TEST_(s = (s_), (unsigned short)s, "%" PRIu16); \ + if (0 <= s && UINT8_MAX <= s) \ + TEST_(s = (s_), (unsigned char)s, "%" PRIu8); \ + } while (0) + +#define TEST_CONVERT(stype, s_) \ + do { \ + TEST_CONVERT_(stype, s_); \ + TEST_CONVERT_(stype, -(s_)); \ + } while (0) + +#define TEST_2NUMBER(d1type, d1_, d2type, d2_, operation) \ + do { \ + d1type d1; \ + d2type d2; \ + TEST((d1 = (d1_), d2 = (d2_)), operation); \ + TEST((d1 = -(d1_), d2 = (d2_)), operation); \ + TEST((d1 = (d1_), d2 = -(d2_)), operation); \ + TEST((d1 = -(d1_), d2 = -(d2_)), operation); \ + } while (0) + +int main() { + double d; + float f; + int64_t i64; + TEST_CONVERT(double, 0x1.123456789abcp2); // FISTTP + TEST_(d = (0x1.123456789abcp512), (float)d, "%a"); + TEST_CONVERT(double, 0x1.123456789abcp29); + TEST_(d = (-0x1.123456789abcp30), (int32_t)d, "%" PRId32); + TEST_(d = (-0x1.123456789abcp62), (int64_t)d, "%" PRId64); + + TEST_CONVERT(float, 0x1.123456789abcp2f); + TEST_CONVERT(float, 0x1.123456789abcp29f); + TEST_(f = -0x1.123456789abcp30f, (int32_t)f, "%" PRId32); + // to be fixed: + //TEST_(f = -0x1.123456789abcp62f, (int64_t)f, "%" PRId64); + // The direction of rounding when an integer is converted to a floating-point + // number that cannot exactly represent the original value + // https://gcc.gnu.org/onlinedocs/gcc/Floating-point-implementation.html + // to be fixed: + //TEST_(i64 = INT64_MAX, (double)i64, "%a"); // FILD and FSTP + TEST(d = -0x1.1234567p0, (double)((int)d)); + TEST(d = 0x1.9234567p0, (double)((int)d)); + TEST(d = -0x1.9234567p0, (double)((int)d)); + + TEST(d = 0x1.1234567p0, (double)((long int)d)); + TEST(d = -0x1.1234567p0, (double)((long int)d)); + TEST(d = 0x1.9234567p0, (double)((long int)d)); + TEST(d = -0x1.9234567p0, (double)((long int)d)); + + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d1 + d2); + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d1 - d2); + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d2 - d1); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 + d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 - d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d2 - d1); + + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 + d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, float, 0x1.3456789abcdep0f, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, double, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, double, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(double, 0x1.233445566778p0, float, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, float, 0x1.3456789abcdep0f, d1 *d2); + TEST_2NUMBER(double, 0x1.233445566778p0, int, 5, d1 *d2); + TEST_2NUMBER(int, 15, double, 0x1.3456789abcdep0f, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, int, 15, d1 *d2); + TEST_2NUMBER(int, 15, float, 0x1.3456789abcdep0f, d1 *d2); + + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 / d2); + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 / d2); + TEST_2NUMBER(double, 0x1.233445566778p0, float, 0x1.3456789abcdep0f, d1 / d2); + + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d2 - d1); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 + d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 - d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d2 - d1); + + TEST_X87_1("FSQRT", 0x1.0000000000000p2); + TEST_X87_1("FSQRT", 0x1.0000000000001p1); + TEST_X87_1("FSQRT", 0x1.123456789abcp31); + TEST_X87_1("FSQRT", 0x1.123456789abdp31); + + return 0; +} |