1 files changed, 42 insertions, 5 deletions
diff --git a/src/emu/x64rund9.c b/src/emu/x64rund9.c
index 648eb0a7..c7b8d308 100644
--- a/src/emu/x64rund9.c
+++ b/src/emu/x64rund9.c
@@ -1,4 +1,5 @@
 #define _GNU_SOURCE
+#include <fenv.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -17,6 +18,7 @@
 #include "x64primop.h"
 #include "x64trace.h"
 #include "x87emu_private.h"
+#include "x87emu_setround.h"
 #include "box64context.h"
 #include "bridge.h"
 
@@ -33,6 +35,7 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs)
     uint64_t ll;
     float f;
     reg64_t *oped;
+    int oldround;
     #ifdef TEST_INTERPRETER
     x64emu_t*emu = test->emu;
     #endif
@@ -123,20 +126,30 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs)
             break;
 
         case 0xF0:  /* F2XM1 */
-            ST0.d = exp2(ST0.d) - 1.0;
+            if (ST0.d == 0)
+                break;
+            // Using the expm1 instead of exp2(ST0)-1 can avoid losing precision much,
+            // expecially when ST0 is close to zero (which loses the precise when -1).
+            // printf("%a, %a\n", LN2 * ST0.d, expm1(LN2 * ST0.d));
+            ST0.d = expm1(LN2 * ST0.d);
+            //    = 2^ST0 - 1 + error. (in math)
             break;
         case 0xF1:  /* FYL2X */
             ST(1).d *= log2(ST0.d);
             fpu_do_pop(emu);
             break;
         case 0xF2:  /* FPTAN */
+            oldround = fpu_setround(emu);
             ST0.d = tan(ST0.d);
+            fesetround(oldround);
             fpu_do_push(emu);
             ST0.d = 1.0;
             emu->sw.f.F87_C2 = 0;
             break;
         case 0xF3:  /* FPATAN */
+            oldround = fpu_setround(emu);
             ST1.d = atan2(ST1.d, ST0.d);
+            fesetround(oldround);
             fpu_do_pop(emu);
             break;
         case 0xF4:  /* FXTRACT */
@@ -209,15 +222,22 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs)
                 emu->top=(emu->top+1)&7;    // this will probably break a few things
             break;
         case 0xF9:  /* FYL2XP1 */
-            ST(1).d *= log2(ST0.d + 1.0);
+            // Using the log1p instead of log2(ST0+1) can avoid losing precision much,
+            // expecially when ST0 is close to zero (which loses the precise when +1).
+            ST(1).d = (ST(1).d * log1p(ST0.d)) / M_LN2;
+            //      = ST1 * log2(ST0 + 1) + error. (in math)
             fpu_do_pop(emu);
             break;
         case 0xFA:  /* FSQRT */
+            oldround = fpu_setround(emu);
             ST0.d = sqrt(ST0.d);
+            fesetround(oldround);
             break;
         case 0xFB:  /* FSINCOS */
             fpu_do_push(emu);
+            oldround = fpu_setround(emu);
             sincos(ST1.d, &ST1.d, &ST0.d);
+            fesetround(oldround);
             emu->sw.f.F87_C2 = 0;
             break;
         case 0xFC:  /* FRNDINT */
@@ -225,15 +245,28 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs)
             break;
         case 0xFD:  /* FSCALE */
             // this could probably be done by just altering the exponant part of the float...
-            if(ST0.d!=0.0)
-                ST0.d *= exp2(trunc(ST1.d));
+            if (ST1.d > INT32_MAX)
+                tmp32s = INT32_MAX;
+            else if (ST1.d < INT32_MIN)
+                tmp32s = INT32_MIN;
+            else
+                tmp32s = ST1.d;
+            if(ST0.d!=0.0) {
+                oldround = fpu_setround(emu);
+                ST0.d = ldexp(ST0.d, tmp32s);
+                fesetround(oldround);
+            }
             break;
         case 0xFE:  /* FSIN */
+            oldround = fpu_setround(emu);
             ST0.d = sin(ST0.d);
+            fesetround(oldround);
             emu->sw.f.F87_C2 = 0;
             break;
         case 0xFF:  /* FCOS */
+            oldround = fpu_setround(emu);
             ST0.d = cos(ST0.d);
+            fesetround(oldround);
             emu->sw.f.F87_C2 = 0;
             break;
 
@@ -257,7 +290,9 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs)
                 } else {
                     GETE4(0);
                 }
+                oldround = fpu_setround(emu);
                 *(float*)ED = ST0.d;
+                fesetround(oldround);
                 break;
             case 3:     /* FSTP Ed, ST0 */
                 if(offs) {
@@ -265,7 +300,9 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs)
                 } else {
                     GETE4(0);
                 }
+                oldround = fpu_setround(emu);
                 *(float*)ED = ST0.d;
+                fesetround(oldround);
                 fpu_do_pop(emu);
                 break;
             case 4:     /* FLDENV m */
@@ -309,5 +346,5 @@ uintptr_t RunD9(x64emu_t *emu, rex_t rex, uintptr_t addr, uintptr_t offs)
             default:
                 return 0;
         }
-   return addr;
+    return addr;
 }
\ No newline at end of file