diff options
Diffstat (limited to 'src/dynarec')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_d8.c | 49 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_d9.c | 48 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_da.c | 25 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_dc.c | 49 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_de.c | 25 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_functions.c | 16 |
6 files changed, 203 insertions, 9 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_d8.c b/src/dynarec/arm64/dynarec_arm64_d8.c index e8f002a4..826cad7d 100644 --- a/src/dynarec/arm64/dynarec_arm64_d8.c +++ b/src/dynarec/arm64/dynarec_arm64_d8.c @@ -29,6 +29,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin uint8_t nextop = F8; uint8_t ed; + uint8_t u8; int64_t fixedaddress; int unscaled; int v1, v2; @@ -51,11 +52,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FADD ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xC8: case 0xC9: @@ -68,11 +73,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FMUL ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xD0: case 0xD1: @@ -122,11 +131,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUB ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xE8: case 0xE9: @@ -139,11 +152,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUBR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF0: case 0xF1: @@ -156,11 +173,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIV ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF8: case 0xF9: @@ -173,11 +194,15 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIVR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: DEFAULT; @@ -190,12 +215,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FADDS(v1, v1, s0); } else { FCVT_D_S(s0, s0); FADDD(v1, v1, s0); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, float[ED]"); @@ -203,12 +232,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FMULS(v1, v1, s0); } else { FCVT_D_S(s0, s0); FMULD(v1, v1, s0); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, float[ED]"); @@ -245,12 +278,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FSUBS(v1, v1, s0); } else { FCVT_D_S(s0, s0); FSUBD(v1, v1, s0); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, float[ED]"); @@ -258,12 +295,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FSUBS(v1, s0, v1); } else { FCVT_D_S(s0, s0); FSUBD(v1, s0, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, float[ED]"); @@ -271,12 +312,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FDIVS(v1, v1, s0); } else { FCVT_D_S(s0, s0); FDIVD(v1, v1, s0); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, float[ED]"); @@ -284,12 +329,16 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VLD32(s0, ed, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); if(ST_IS_F(0)) { FDIVS(v1, s0, v1); } else { FCVT_D_S(s0, s0); FDIVD(v1, s0, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: DEFAULT; diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c index 378070a0..c1961a01 100644 --- a/src/dynarec/arm64/dynarec_arm64_d9.c +++ b/src/dynarec/arm64/dynarec_arm64_d9.c @@ -291,8 +291,12 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); - CALL(native_ftan, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_ftan, -1, box64_dynarec_fastround ? 0 : u8); x87_unstackcount(dyn, ninst, x1, i1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); if(PK(0)==0xdd && PK(1)==0xd8) { MESSAGE(LOG_DUMP, "Optimized next DD D8 fstp st0, st0, not emitting 1\n"); u8 = F8; @@ -312,7 +316,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(native_fpatan, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_fpatan, -1, box64_dynarec_fastround ? 0 : u8); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x1, i1); X87_POP_OR_FAIL(dyn, ninst, x3); break; @@ -418,11 +426,15 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xFA: INST_NAME("FSQRT"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSQRTS(v1, v1); } else { FSQRTD(v1, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xFB: INST_NAME("FSINCOS"); @@ -430,7 +442,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0); i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 1); - CALL(native_fsincos, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_fsincos, -1, box64_dynarec_fastround ? 0 : u8); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x1, i1); break; case 0xFC: @@ -457,7 +473,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); - CALL(native_fscale, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_fscale, -1, box64_dynarec_fastround ? 0 : u8); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x1, i1); break; case 0xFE: @@ -465,7 +485,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); - CALL(native_fsin, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_fsin, -1, box64_dynarec_fastround ? 0 : u8); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x1, i1); break; case 0xFF: @@ -473,7 +497,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin MESSAGE(LOG_DUMP, "Need Optimization\n"); i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); - CALL(native_fcos, -1); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); + CALL_(native_fcos, -1, box64_dynarec_fastround ? 0 : u8); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_unstackcount(dyn, ninst, x1, i1); break; default: @@ -497,7 +525,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin s0 = v1; else { s0 = fpu_get_scratch(dyn, ninst); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FCVT_S_D(s0, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); } addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); VST32(s0, ed, fixedaddress); @@ -507,7 +539,11 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); if(!ST_IS_F(0)) { + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FCVT_S_D(v1, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); } VST32(v1, ed, fixedaddress); X87_POP_OR_FAIL(dyn, ninst, x3); diff --git a/src/dynarec/arm64/dynarec_arm64_da.c b/src/dynarec/arm64/dynarec_arm64_da.c index 52965b3f..6e4bb528 100644 --- a/src/dynarec/arm64/dynarec_arm64_da.c +++ b/src/dynarec/arm64/dynarec_arm64_da.c @@ -29,6 +29,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin int64_t j64; uint8_t ed; uint8_t wback; + uint8_t u8; int v1, v2; int d0; int s0; @@ -148,7 +149,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FADDD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FIMUL ST0, Ed"); @@ -158,7 +163,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FMULD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FICOM ST0, Ed"); @@ -191,7 +200,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FSUBD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FISUBR ST0, Ed"); @@ -201,7 +214,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FSUBD(v1, v2, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FIDIV ST0, Ed"); @@ -211,7 +228,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FDIVD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FIDIVR ST0, Ed"); @@ -221,7 +242,11 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VLD32(v2, ed, fixedaddress); SXTL_32(v2, v2); // i32 -> i64 SCVTFDD(v2, v2); // i64 -> double + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x5, x4); FDIVD(v1, v2, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; } return addr; diff --git a/src/dynarec/arm64/dynarec_arm64_dc.c b/src/dynarec/arm64/dynarec_arm64_dc.c index ee5b2c62..2fbac9d8 100644 --- a/src/dynarec/arm64/dynarec_arm64_dc.c +++ b/src/dynarec/arm64/dynarec_arm64_dc.c @@ -29,6 +29,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin uint8_t nextop = F8; uint8_t wback; + uint8_t u8; int64_t fixedaddress; int unscaled; int v1, v2; @@ -49,11 +50,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FADD STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xC8: case 0xC9: @@ -66,11 +71,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FMUL STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xD0: case 0xD1: @@ -120,11 +129,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUBR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xE8: case 0xE9: @@ -137,11 +150,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUB STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF0: case 0xF1: @@ -154,11 +171,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIVR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF8: case 0xF9: @@ -171,11 +192,15 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIV STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: DEFAULT; @@ -188,7 +213,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FADDD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, double[ED]"); @@ -196,7 +225,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FMULD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, double[ED]"); @@ -223,7 +256,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FSUBD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, double[ED]"); @@ -231,7 +268,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FSUBD(v1, v2, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, double[ED]"); @@ -239,7 +280,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FDIVD(v1, v1, v2); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, double[ED]"); @@ -247,7 +292,11 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v2 = fpu_get_scratch(dyn, ninst); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); VLD64(v2, wback, fixedaddress); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); FDIVD(v1, v2, v1); + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; } return addr; diff --git a/src/dynarec/arm64/dynarec_arm64_de.c b/src/dynarec/arm64/dynarec_arm64_de.c index 7dbbb210..8dcc16c2 100644 --- a/src/dynarec/arm64/dynarec_arm64_de.c +++ b/src/dynarec/arm64/dynarec_arm64_de.c @@ -29,6 +29,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin uint8_t nextop = F8; uint8_t wback; + uint8_t u8; int64_t fixedaddress; int unscaled; int v1, v2; @@ -49,11 +50,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FADDP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FADDS(v1, v1, v2); } else { FADDD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xC8: @@ -67,11 +72,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FMULP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FMULS(v1, v1, v2); } else { FMULD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xD0: @@ -117,11 +126,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUBRP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v2, v1); } else { FSUBD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xE8: @@ -135,11 +148,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FSUBP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FSUBS(v1, v1, v2); } else { FSUBD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xF0: @@ -153,11 +170,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIVRP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v2, v1); } else { FDIVD(v1, v2, v1); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; case 0xF8: @@ -171,11 +192,15 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("FDIVP STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box64_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x4); if(ST_IS_F(0)) { FDIVS(v1, v1, v2); } else { FDIVD(v1, v1, v2); } + if(!box64_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; default: diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c index 683d8256..18082d82 100644 --- a/src/dynarec/dynarec_native_functions.c +++ b/src/dynarec/dynarec_native_functions.c @@ -42,7 +42,7 @@ void native_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n) void native_f2xm1(x64emu_t* emu) { - ST0.d = exp2(ST0.d) - 1.0; + ST0.d = expm1(LN2 * ST0.d); } void native_fyl2x(x64emu_t* emu) { @@ -50,11 +50,14 @@ void native_fyl2x(x64emu_t* emu) } void native_ftan(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON + // seems that tan of glib doesn't follow the rounding direction mode ST0.d = tan(ST0.d); emu->sw.f.F87_C2 = 0; } void native_fpatan(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON ST1.d = atan2(ST1.d, ST0.d); } void native_fxtract(x64emu_t* emu) @@ -97,10 +100,12 @@ void native_fprem(x64emu_t* emu) } void native_fyl2xp1(x64emu_t* emu) { - ST(1).d = log2(ST0.d + 1.0)*ST(1).d; + ST(1).d = log1p(ST0.d)*ST(1).d/LN2; } void native_fsincos(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON + // seems that sincos of glib doesn't follow the rounding direction mode sincos(ST1.d, &ST1.d, &ST0.d); emu->sw.f.F87_C2 = 0; } @@ -110,16 +115,21 @@ void native_frndint(x64emu_t* emu) } void native_fscale(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON if(ST0.d!=0.0) - ST0.d *= exp2(trunc(ST1.d)); + ST0.d = ldexp(ST0.d, trunc(ST1.d)); } void native_fsin(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON + // seems that sin of glib doesn't follow the rounding direction mode ST0.d = sin(ST0.d); emu->sw.f.F87_C2 = 0; } void native_fcos(x64emu_t* emu) { +#pragma STDC FENV_ACCESS ON + // seems that cos of glib doesn't follow the rounding direction mode ST0.d = cos(ST0.d); emu->sw.f.F87_C2 = 0; } |