about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2023-10-25 20:14:11 +0800
committerGitHub <noreply@github.com>2023-10-25 14:14:11 +0200
commit87bf751b115267d7c388c849c43fca6d3f0d0881 (patch)
treeedc6f3976538daf8dd1d9a49694d4bda19ef591d /src
parent12c40a5b804143cee0d538c4de4b526522bcfcd2 (diff)
downloadbox64-87bf751b115267d7c388c849c43fca6d3f0d0881.tar.gz
box64-87bf751b115267d7c388c849c43fca6d3f0d0881.zip
[INTERP] Fix fpu_round (#1030)
* [INTERP] Fix fpu_round

* Should be working this time

* Handling wrappedlibm

* Format

* Fix loongarch

* Make it optional

* Fix android build
Diffstat (limited to 'src')
-rw-r--r--src/emu/x64run0f.c7
-rw-r--r--src/emu/x64run660f.c37
-rw-r--r--src/emu/x64runf20f.c19
-rw-r--r--src/emu/x64runf30f.c13
-rw-r--r--src/emu/x87emu_private.h10
-rw-r--r--src/include/debug.h1
-rw-r--r--src/main.c7
-rw-r--r--src/tools/rcfile.c3
-rw-r--r--src/wrapped/generated/functions_list.txt30
-rw-r--r--src/wrapped/generated/wrappedlibmtypes.h20
-rw-r--r--src/wrapped/generated/wrapper.c20
-rw-r--r--src/wrapped/generated/wrapper.h10
-rw-r--r--src/wrapped/wrappedlibm.c112
-rw-r--r--src/wrapped/wrappedlibm_private.h24
14 files changed, 274 insertions, 39 deletions
diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index a05b3596..e8a047f2 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -3,6 +3,7 @@
 #include <stdio.h>

 #include <stdlib.h>

 #include <math.h>

+#include <fenv.h>

 #include <string.h>

 #include <signal.h>

 #include <sys/types.h>

@@ -232,9 +233,13 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                     tmp64s = INT32_MIN;

                 else

                     switch(emu->mxcsr.f.MXCSR_RC) {

-                        case ROUND_Nearest:

+                        case ROUND_Nearest: {

+                            int round = fegetround();

+                            fesetround(FE_TONEAREST);

                             tmp64s = nearbyintf(EX->f[i]);

+                            fesetround(round);

                             break;

+                        }

                         case ROUND_Down:

                             tmp64s = floorf(EX->f[i]);

                             break;

diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c
index 84f5a38e..9b969c83 100644
--- a/src/emu/x64run660f.c
+++ b/src/emu/x64run660f.c
@@ -3,6 +3,7 @@
 #include <stdio.h>

 #include <stdlib.h>

 #include <math.h>

+#include <fenv.h>

 #include <string.h>

 #include <signal.h>

 #include <sys/types.h>

@@ -221,10 +222,14 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         GETEX(0);

         GETGM;

         switch(emu->mxcsr.f.MXCSR_RC) {

-            case ROUND_Nearest:

+            case ROUND_Nearest: {

+                int round = fegetround();

+                fesetround(FE_TONEAREST);

                 i64[0] = nearbyint(EX->d[0]);

                 i64[1] = nearbyint(EX->d[1]);

+                fesetround(round);

                 break;

+            }

             case ROUND_Down:

                 i64[0] = floor(EX->d[0]);

                 i64[1] = floor(EX->d[1]);

@@ -818,10 +823,14 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 else

                     tmp8u &= 3;

                 switch(tmp8u) {

-                    case ROUND_Nearest:

+                    case ROUND_Nearest: {

+                        int round = fegetround();

+                        fesetround(FE_TONEAREST);

                         for(int i=0; i<4; ++i)

                             GX->f[i] = nearbyintf(EX->f[i]);

+                        fesetround(round);

                         break;

+                    }

                     case ROUND_Down:

                         for(int i=0; i<4; ++i)

                             GX->f[i] = floorf(EX->f[i]);

@@ -846,10 +855,14 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 else

                     tmp8u &= 3;

                 switch(tmp8u) {

-                    case ROUND_Nearest:

+                    case ROUND_Nearest: {

+                        int round = fegetround();

+                        fesetround(FE_TONEAREST);

                         GX->d[0] = nearbyint(EX->d[0]);

                         GX->d[1] = nearbyint(EX->d[1]);

+                        fesetround(round);

                         break;

+                    }

                     case ROUND_Down:

                         GX->d[0] = floor(EX->d[0]);

                         GX->d[1] = floor(EX->d[1]);

@@ -874,9 +887,13 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 else

                     tmp8u &= 3;

                 switch(tmp8u) {

-                    case ROUND_Nearest:

+                    case ROUND_Nearest: {

+                        int round = fegetround();

+                        fesetround(FE_TONEAREST);

                         GX->f[0] = nearbyintf(EX->f[0]);

+                        fesetround(round);

                         break;

+                    }

                     case ROUND_Down:

                         GX->f[0] = floorf(EX->f[0]);

                         break;

@@ -898,9 +915,13 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 else

                     tmp8u &= 3;

                 switch(tmp8u) {

-                    case ROUND_Nearest:

+                    case ROUND_Nearest: {

+                        int round = fegetround();

+                        fesetround(FE_TONEAREST);

                         GX->d[0] = nearbyint(EX->d[0]);

+                        fesetround(round);

                         break;

+                    }

                     case ROUND_Down:

                         GX->d[0] = floor(EX->d[0]);

                         break;

@@ -1227,9 +1248,13 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 tmp64s = INT32_MIN;

             else

                 switch(emu->mxcsr.f.MXCSR_RC) {

-                    case ROUND_Nearest:

+                    case ROUND_Nearest: {

+                        int round = fegetround();

+                        fesetround(FE_TONEAREST);

                         tmp64s = nearbyintf(EX->f[i]);

+                        fesetround(round);

                         break;

+                    }

                     case ROUND_Down:

                         tmp64s = floorf(EX->f[i]);

                         break;

diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c
index 33750318..d5ce598f 100644
--- a/src/emu/x64runf20f.c
+++ b/src/emu/x64runf20f.c
@@ -3,6 +3,7 @@
 #include <stdio.h>

 #include <stdlib.h>

 #include <math.h>

+#include <fenv.h>

 #include <string.h>

 #include <signal.h>

 #include <sys/types.h>

@@ -113,9 +114,13 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                 GD->q[0] = 0x8000000000000000LL;

             else

                 switch(emu->mxcsr.f.MXCSR_RC) {

-                    case ROUND_Nearest:

+                    case ROUND_Nearest: {

+                        int round = fegetround();

+                        fesetround(FE_TONEAREST);

                         GD->sq[0] = nearbyint(EX->d[0]);

+                        fesetround(round);

                         break;

+                    }

                     case ROUND_Down:

                         GD->sq[0] = floor(EX->d[0]);

                         break;

@@ -131,9 +136,13 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                 GD->dword[0] = 0x80000000;

             else

                 switch(emu->mxcsr.f.MXCSR_RC) {

-                    case ROUND_Nearest:

+                    case ROUND_Nearest: {

+                        int round = fegetround();

+                        fesetround(FE_TONEAREST);

                         GD->sdword[0] = nearbyint(EX->d[0]);

+                        fesetround(round);

                         break;

+                    }

                     case ROUND_Down:

                         GD->sdword[0] = floor(EX->d[0]);

                         break;

@@ -325,10 +334,14 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
         GETEX(0);

         GETGX;

         switch(emu->mxcsr.f.MXCSR_RC) {

-            case ROUND_Nearest:

+            case ROUND_Nearest: {

+                int round = fegetround();

+                fesetround(FE_TONEAREST);

                 tmp64s0 = nearbyint(EX->d[0]);

                 tmp64s1 = nearbyint(EX->d[1]);

+                fesetround(round);

                 break;

+            }

             case ROUND_Down:

                 tmp64s0 = floor(EX->d[0]);

                 tmp64s1 = floor(EX->d[1]);

diff --git a/src/emu/x64runf30f.c b/src/emu/x64runf30f.c
index d5b70b20..bd7c97e9 100644
--- a/src/emu/x64runf30f.c
+++ b/src/emu/x64runf30f.c
@@ -3,6 +3,7 @@
 #include <stdio.h>

 #include <stdlib.h>

 #include <math.h>

+#include <fenv.h>

 #include <string.h>

 #include <signal.h>

 #include <sys/types.h>

@@ -119,9 +120,13 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 GD->q[0] = 0x8000000000000000LL;

             else

                 switch(emu->mxcsr.f.MXCSR_RC) {

-                    case ROUND_Nearest:

+                    case ROUND_Nearest: {

+                        int round = fegetround();

+                        fesetround(FE_TONEAREST);

                         GD->sq[0] = nearbyintf(EX->f[0]);

+                        fesetround(round);

                         break;

+                    }

                     case ROUND_Down:

                         GD->sq[0] = floorf(EX->f[0]);

                         break;

@@ -137,9 +142,13 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 tmp64s = INT32_MIN;

             else

                 switch(emu->mxcsr.f.MXCSR_RC) {

-                    case ROUND_Nearest:

+                    case ROUND_Nearest: {

+                        int round = fegetround();

+                        fesetround(FE_TONEAREST);

                         tmp64s = nearbyintf(EX->f[0]);

+                        fesetround(round);

                         break;

+                    }

                     case ROUND_Down:

                         tmp64s = floorf(EX->f[0]);

                         break;

diff --git a/src/emu/x87emu_private.h b/src/emu/x87emu_private.h
index 5721088f..a8e2d39c 100644
--- a/src/emu/x87emu_private.h
+++ b/src/emu/x87emu_private.h
@@ -3,6 +3,7 @@
 
 #include <stdint.h>
 #include <math.h>
+#include <fenv.h>
 #include "regs.h"
 #include "x64run_private.h"
 #include "debug.h"
@@ -108,8 +109,13 @@ static inline double fpu_round(x64emu_t* emu, double d) {
     if (!isfinite(d))
         return d;
     switch(emu->cw.f.C87_RD) {
-        case ROUND_Nearest:
-            return nearbyint(d);
+        case ROUND_Nearest: {
+            int round = fegetround();
+            fesetround(FE_TONEAREST);
+            double res = nearbyint(d);
+            fesetround(round);
+            return res;
+        }
         case ROUND_Down:
             return floor(d);
         case ROUND_Up:
diff --git a/src/include/debug.h b/src/include/debug.h
index f50e8554..59c608cd 100644
--- a/src/include/debug.h
+++ b/src/include/debug.h
@@ -74,6 +74,7 @@ extern int box64_malloc_hack;
 extern int box64_dummy_crashhandler;
 extern int box64_sse_flushto0;
 extern int box64_x87_no80bits;
+extern int box64_sync_rounding;
 extern int allow_missing_libs;
 extern int box64_mapclean;
 extern int box64_prefer_wrapped;
diff --git a/src/main.c b/src/main.c
index bc4a12e4..eb185c66 100644
--- a/src/main.c
+++ b/src/main.c
@@ -123,6 +123,7 @@ int box64_prefer_emulated = 0;
 int box64_prefer_wrapped = 0;
 int box64_sse_flushto0 = 0;
 int box64_x87_no80bits = 0;
+int box64_sync_rounding = 0;
 int fix_64bit_inodes = 0;
 int box64_dummy_crashhandler = 1;
 int box64_mapclean = 0;
@@ -1155,6 +1156,12 @@ void LoadEnvVars(box64context_t *context)
             printf_log(LOG_INFO, "BOX64: all 80bits x87 long double will be handle as double\n");
     	}
     }
+    if(getenv("BOX64_SYNC_ROUNDING")) {
+        if (strcmp(getenv("BOX64_SYNC_ROUNDING"), "1")==0) {
+            box64_sync_rounding = 1;
+            printf_log(LOG_INFO, "BOX64: rouding mode with be synced with fesetround/fegetround\n");
+    	}
+    }
     if(getenv("BOX64_PREFER_WRAPPED")) {
         if (strcmp(getenv("BOX64_PREFER_WRAPPED"), "1")==0) {
             box64_prefer_wrapped = 1;
diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c
index cd6c70a7..6dbc6204 100644
--- a/src/tools/rcfile.c
+++ b/src/tools/rcfile.c
@@ -89,6 +89,7 @@ ENTRYBOOL(BOX64_X11GLX, box64_x11glx)                   \
 ENTRYDSTRING(BOX64_LIBGL, box64_libGL)                  \
 ENTRYBOOL(BOX64_SSE_FLUSHTO0, box64_sse_flushto0)       \
 ENTRYBOOL(BOX64_X87_NO80BITS, box64_x87_no80bits)       \
+ENTRYBOOL(BOX64_SYNC_ROUNDING, box64_sync_rounding)     \
 ENTRYSTRING_(BOX64_EMULATED_LIBS, emulated_libs)        \
 ENTRYBOOL(BOX64_ALLOWMISSINGLIBS, allow_missing_libs)   \
 ENTRYBOOL(BOX64_PREFER_WRAPPED, box64_prefer_wrapped)   \
@@ -593,4 +594,4 @@ void ApplyParams(const char* name)
         box64_log = 2;
         box64_dump = 1;
     }
-}
\ No newline at end of file
+}
diff --git a/src/wrapped/generated/functions_list.txt b/src/wrapped/generated/functions_list.txt
index 78bd957a..a2651dd5 100644
--- a/src/wrapped/generated/functions_list.txt
+++ b/src/wrapped/generated/functions_list.txt
@@ -164,6 +164,8 @@
 #() cFpp
 #() wFpi
 #() iFEi
+#() iFEf
+#() iFEd
 #() iFEL
 #() iFEp
 #() iFwp
@@ -200,6 +202,8 @@
 #() iFpp
 #() iFpO
 #() iFSi
+#() IFEf
+#() IFEd
 #() IFEp
 #() IFip
 #() IFII
@@ -239,6 +243,7 @@
 #() UFpi
 #() UFpU
 #() UFpp
+#() fFEf
 #() fFEp
 #() fFif
 #() fFfi
@@ -247,6 +252,7 @@
 #() fFfp
 #() fFpu
 #() fFpp
+#() dFEd
 #() dFid
 #() dFdi
 #() dFdd
@@ -2950,10 +2956,10 @@
 #() uFippuuuuiiiiuuiiiiiiiipp
 #() vFpppppppppppppppppppppppp
 #() iFpppppppppppppppppppppppppppppppppp
-#defined(HAVE_LD80BITS) IFD
 #defined(HAVE_LD80BITS) DFD
-#!defined(HAVE_LD80BITS) IFK
+#defined(HAVE_LD80BITS) IFED
 #!defined(HAVE_LD80BITS) KFK
+#!defined(HAVE_LD80BITS) IFEK
 #!defined(HAVE_LD80BITS) KFKK
 #!defined(HAVE_LD80BITS) KFKp
 #defined(NOALIGN) iFipiip
@@ -4390,6 +4396,22 @@ wrappedlibibus:
   - ibus_config_set_value_async
 wrappedlibice:
 wrappedlibm:
+- iFv:
+  - fegetround
+- iFi:
+  - fesetround
+- iFf:
+  - lrintf
+- iFd:
+  - lrint
+- IFf:
+  - llrintf
+- IFd:
+  - llrint
+- IFD:
+  - llrintl
+- IFK:
+  - llrintl
 - fFf:
   - __acosf_finite
   - __acoshf_finite
@@ -4402,6 +4424,8 @@ wrappedlibm:
   - __logf_finite
   - __sinhf_finite
   - __sqrtf_finite
+  - nearbyintf
+  - rintf
 - dFd:
   - __acos_finite
   - __acosh_finite
@@ -4414,6 +4438,8 @@ wrappedlibm:
   - __log_finite
   - __sinh_finite
   - __sqrt_finite
+  - nearbyint
+  - rint
 - xFx:
   - catanf
   - catanhf
diff --git a/src/wrapped/generated/wrappedlibmtypes.h b/src/wrapped/generated/wrappedlibmtypes.h
index 001794ba..6a0a8bf5 100644
--- a/src/wrapped/generated/wrappedlibmtypes.h
+++ b/src/wrapped/generated/wrappedlibmtypes.h
@@ -11,6 +11,14 @@
 #define ADDED_FUNCTIONS() 
 #endif
 
+typedef int32_t (*iFv_t)(void);
+typedef int32_t (*iFi_t)(int32_t);
+typedef int32_t (*iFf_t)(float);
+typedef int32_t (*iFd_t)(double);
+typedef int64_t (*IFf_t)(float);
+typedef int64_t (*IFd_t)(double);
+typedef int64_t (*IFD_t)(long double);
+typedef int64_t (*IFK_t)(double);
 typedef float (*fFf_t)(float);
 typedef double (*dFd_t)(double);
 typedef complexf_t (*xFx_t)(complexf_t);
@@ -18,6 +26,14 @@ typedef float (*fFff_t)(float, float);
 typedef double (*dFdd_t)(double, double);
 
 #define SUPER() ADDED_FUNCTIONS() \
+	GO(fegetround, iFv_t) \
+	GO(fesetround, iFi_t) \
+	GO(lrintf, iFf_t) \
+	GO(lrint, iFd_t) \
+	GO(llrintf, IFf_t) \
+	GO(llrint, IFd_t) \
+	GO(llrintl, IFD_t) \
+	GO(llrintl, IFK_t) \
 	GO(__acosf_finite, fFf_t) \
 	GO(__acoshf_finite, fFf_t) \
 	GO(__asinf_finite, fFf_t) \
@@ -29,6 +45,8 @@ typedef double (*dFdd_t)(double, double);
 	GO(__logf_finite, fFf_t) \
 	GO(__sinhf_finite, fFf_t) \
 	GO(__sqrtf_finite, fFf_t) \
+	GO(nearbyintf, fFf_t) \
+	GO(rintf, fFf_t) \
 	GO(__acos_finite, dFd_t) \
 	GO(__acosh_finite, dFd_t) \
 	GO(__asin_finite, dFd_t) \
@@ -40,6 +58,8 @@ typedef double (*dFdd_t)(double, double);
 	GO(__log_finite, dFd_t) \
 	GO(__sinh_finite, dFd_t) \
 	GO(__sqrt_finite, dFd_t) \
+	GO(nearbyint, dFd_t) \
+	GO(rint, dFd_t) \
 	GO(catanf, xFx_t) \
 	GO(catanhf, xFx_t) \
 	GO(__atan2f_finite, fFff_t) \
diff --git a/src/wrapped/generated/wrapper.c b/src/wrapped/generated/wrapper.c
index e56e55d9..fa68049f 100644
--- a/src/wrapped/generated/wrapper.c
+++ b/src/wrapped/generated/wrapper.c
@@ -200,6 +200,8 @@ typedef int8_t (*cFpi_t)(void*, int32_t);
 typedef int8_t (*cFpp_t)(void*, void*);
 typedef int16_t (*wFpi_t)(void*, int32_t);
 typedef int32_t (*iFEi_t)(x64emu_t*, int32_t);
+typedef int32_t (*iFEf_t)(x64emu_t*, float);
+typedef int32_t (*iFEd_t)(x64emu_t*, double);
 typedef int32_t (*iFEL_t)(x64emu_t*, uintptr_t);
 typedef int32_t (*iFEp_t)(x64emu_t*, void*);
 typedef int32_t (*iFwp_t)(int16_t, void*);
@@ -236,6 +238,8 @@ typedef int32_t (*iFpL_t)(void*, uintptr_t);
 typedef int32_t (*iFpp_t)(void*, void*);
 typedef int32_t (*iFpO_t)(void*, int32_t);
 typedef int32_t (*iFSi_t)(void*, int32_t);
+typedef int64_t (*IFEf_t)(x64emu_t*, float);
+typedef int64_t (*IFEd_t)(x64emu_t*, double);
 typedef int64_t (*IFEp_t)(x64emu_t*, void*);
 typedef int64_t (*IFip_t)(int32_t, void*);
 typedef int64_t (*IFII_t)(int64_t, int64_t);
@@ -275,6 +279,7 @@ typedef uint64_t (*UFuu_t)(uint32_t, uint32_t);
 typedef uint64_t (*UFpi_t)(void*, int32_t);
 typedef uint64_t (*UFpU_t)(void*, uint64_t);
 typedef uint64_t (*UFpp_t)(void*, void*);
+typedef float (*fFEf_t)(x64emu_t*, float);
 typedef float (*fFEp_t)(x64emu_t*, void*);
 typedef float (*fFif_t)(int32_t, float);
 typedef float (*fFfi_t)(float, int32_t);
@@ -283,6 +288,7 @@ typedef float (*fFfD_t)(float, long double);
 typedef float (*fFfp_t)(float, void*);
 typedef float (*fFpu_t)(void*, uint32_t);
 typedef float (*fFpp_t)(void*, void*);
+typedef double (*dFEd_t)(x64emu_t*, double);
 typedef double (*dFid_t)(int32_t, double);
 typedef double (*dFdi_t)(double, int32_t);
 typedef double (*dFdd_t)(double, double);
@@ -2988,13 +2994,13 @@ typedef void (*vFpppppppppppppppppppppppp_t)(void*, void*, void*, void*, void*,
 typedef int32_t (*iFpppppppppppppppppppppppppppppppppp_t)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*);
 
 #if defined(HAVE_LD80BITS)
-typedef int64_t (*IFD_t)(long double);
 typedef long double (*DFD_t)(long double);
+typedef int64_t (*IFED_t)(x64emu_t*, long double);
 #endif
 
 #if !defined(HAVE_LD80BITS)
-typedef int64_t (*IFK_t)(double);
 typedef double (*KFK_t)(double);
+typedef int64_t (*IFEK_t)(x64emu_t*, double);
 typedef double (*KFKK_t)(double, double);
 typedef double (*KFKp_t)(double, void*);
 #endif
@@ -3175,6 +3181,8 @@ void cFpi(x64emu_t *emu, uintptr_t fcn) { cFpi_t fn = (cFpi_t)fcn; R_RAX=fn((voi
 void cFpp(x64emu_t *emu, uintptr_t fcn) { cFpp_t fn = (cFpp_t)fcn; R_RAX=fn((void*)R_RDI, (void*)R_RSI); }
 void wFpi(x64emu_t *emu, uintptr_t fcn) { wFpi_t fn = (wFpi_t)fcn; R_RAX=fn((void*)R_RDI, (int32_t)R_RSI); }
 void iFEi(x64emu_t *emu, uintptr_t fcn) { iFEi_t fn = (iFEi_t)fcn; R_RAX=(int32_t)fn(emu, (int32_t)R_RDI); }
+void iFEf(x64emu_t *emu, uintptr_t fcn) { iFEf_t fn = (iFEf_t)fcn; R_RAX=(int32_t)fn(emu, emu->xmm[0].f[0]); }
+void iFEd(x64emu_t *emu, uintptr_t fcn) { iFEd_t fn = (iFEd_t)fcn; R_RAX=(int32_t)fn(emu, emu->xmm[0].d[0]); }
 void iFEL(x64emu_t *emu, uintptr_t fcn) { iFEL_t fn = (iFEL_t)fcn; R_RAX=(int32_t)fn(emu, (uintptr_t)R_RDI); }
 void iFEp(x64emu_t *emu, uintptr_t fcn) { iFEp_t fn = (iFEp_t)fcn; R_RAX=(int32_t)fn(emu, (void*)R_RDI); }
 void iFwp(x64emu_t *emu, uintptr_t fcn) { iFwp_t fn = (iFwp_t)fcn; R_RAX=(int32_t)fn((int16_t)R_RDI, (void*)R_RSI); }
@@ -3211,6 +3219,8 @@ void iFpL(x64emu_t *emu, uintptr_t fcn) { iFpL_t fn = (iFpL_t)fcn; R_RAX=(int32_
 void iFpp(x64emu_t *emu, uintptr_t fcn) { iFpp_t fn = (iFpp_t)fcn; R_RAX=(int32_t)fn((void*)R_RDI, (void*)R_RSI); }
 void iFpO(x64emu_t *emu, uintptr_t fcn) { iFpO_t fn = (iFpO_t)fcn; R_RAX=(int32_t)fn((void*)R_RDI, of_convert((int32_t)R_RSI)); }
 void iFSi(x64emu_t *emu, uintptr_t fcn) { iFSi_t fn = (iFSi_t)fcn; R_RAX=(int32_t)fn(io_convert((void*)R_RDI), (int32_t)R_RSI); }
+void IFEf(x64emu_t *emu, uintptr_t fcn) { IFEf_t fn = (IFEf_t)fcn; R_RAX=(int64_t)fn(emu, emu->xmm[0].f[0]); }
+void IFEd(x64emu_t *emu, uintptr_t fcn) { IFEd_t fn = (IFEd_t)fcn; R_RAX=(int64_t)fn(emu, emu->xmm[0].d[0]); }
 void IFEp(x64emu_t *emu, uintptr_t fcn) { IFEp_t fn = (IFEp_t)fcn; R_RAX=(int64_t)fn(emu, (void*)R_RDI); }
 void IFip(x64emu_t *emu, uintptr_t fcn) { IFip_t fn = (IFip_t)fcn; R_RAX=(int64_t)fn((int32_t)R_RDI, (void*)R_RSI); }
 void IFII(x64emu_t *emu, uintptr_t fcn) { IFII_t fn = (IFII_t)fcn; R_RAX=(int64_t)fn((int64_t)R_RDI, (int64_t)R_RSI); }
@@ -3250,6 +3260,7 @@ void UFuu(x64emu_t *emu, uintptr_t fcn) { UFuu_t fn = (UFuu_t)fcn; R_RAX=fn((uin
 void UFpi(x64emu_t *emu, uintptr_t fcn) { UFpi_t fn = (UFpi_t)fcn; R_RAX=fn((void*)R_RDI, (int32_t)R_RSI); }
 void UFpU(x64emu_t *emu, uintptr_t fcn) { UFpU_t fn = (UFpU_t)fcn; R_RAX=fn((void*)R_RDI, (uint64_t)R_RSI); }
 void UFpp(x64emu_t *emu, uintptr_t fcn) { UFpp_t fn = (UFpp_t)fcn; R_RAX=fn((void*)R_RDI, (void*)R_RSI); }
+void fFEf(x64emu_t *emu, uintptr_t fcn) { fFEf_t fn = (fFEf_t)fcn; emu->xmm[0].f[0]=fn(emu, emu->xmm[0].f[0]); }
 void fFEp(x64emu_t *emu, uintptr_t fcn) { fFEp_t fn = (fFEp_t)fcn; emu->xmm[0].f[0]=fn(emu, (void*)R_RDI); }
 void fFif(x64emu_t *emu, uintptr_t fcn) { fFif_t fn = (fFif_t)fcn; emu->xmm[0].f[0]=fn((int32_t)R_RDI, emu->xmm[0].f[0]); }
 void fFfi(x64emu_t *emu, uintptr_t fcn) { fFfi_t fn = (fFfi_t)fcn; emu->xmm[0].f[0]=fn(emu->xmm[0].f[0], (int32_t)R_RDI); }
@@ -3258,6 +3269,7 @@ void fFfD(x64emu_t *emu, uintptr_t fcn) { fFfD_t fn = (fFfD_t)fcn; emu->xmm[0].f
 void fFfp(x64emu_t *emu, uintptr_t fcn) { fFfp_t fn = (fFfp_t)fcn; emu->xmm[0].f[0]=fn(emu->xmm[0].f[0], (void*)R_RDI); }
 void fFpu(x64emu_t *emu, uintptr_t fcn) { fFpu_t fn = (fFpu_t)fcn; emu->xmm[0].f[0]=fn((void*)R_RDI, (uint32_t)R_RSI); }
 void fFpp(x64emu_t *emu, uintptr_t fcn) { fFpp_t fn = (fFpp_t)fcn; emu->xmm[0].f[0]=fn((void*)R_RDI, (void*)R_RSI); }
+void dFEd(x64emu_t *emu, uintptr_t fcn) { dFEd_t fn = (dFEd_t)fcn; emu->xmm[0].d[0]=fn(emu, emu->xmm[0].d[0]); }
 void dFid(x64emu_t *emu, uintptr_t fcn) { dFid_t fn = (dFid_t)fcn; emu->xmm[0].d[0]=fn((int32_t)R_RDI, emu->xmm[0].d[0]); }
 void dFdi(x64emu_t *emu, uintptr_t fcn) { dFdi_t fn = (dFdi_t)fcn; emu->xmm[0].d[0]=fn(emu->xmm[0].d[0], (int32_t)R_RDI); }
 void dFdd(x64emu_t *emu, uintptr_t fcn) { dFdd_t fn = (dFdd_t)fcn; emu->xmm[0].d[0]=fn(emu->xmm[0].d[0], emu->xmm[1].d[0]); }
@@ -5963,13 +5975,13 @@ void vFpppppppppppppppppppppppp(x64emu_t *emu, uintptr_t fcn) { vFpppppppppppppp
 void iFpppppppppppppppppppppppppppppppppp(x64emu_t *emu, uintptr_t fcn) { iFpppppppppppppppppppppppppppppppppp_t fn = (iFpppppppppppppppppppppppppppppppppp_t)fcn; R_RAX=(int32_t)fn((void*)R_RDI, (void*)R_RSI, (void*)R_RDX, (void*)R_RCX, (void*)R_R8, (void*)R_R9, *(void**)(R_RSP + 8), *(void**)(R_RSP + 16), *(void**)(R_RSP + 24), *(void**)(R_RSP + 32), *(void**)(R_RSP + 40), *(void**)(R_RSP + 48), *(void**)(R_RSP + 56), *(void**)(R_RSP + 64), *(void**)(R_RSP + 72), *(void**)(R_RSP + 80), *(void**)(R_RSP + 88), *(void**)(R_RSP + 96), *(void**)(R_RSP + 104), *(void**)(R_RSP + 112), *(void**)(R_RSP + 120), *(void**)(R_RSP + 128), *(void**)(R_RSP + 136), *(void**)(R_RSP + 144), *(void**)(R_RSP + 152), *(void**)(R_RSP + 160), *(void**)(R_RSP + 168), *(void**)(R_RSP + 176), *(void**)(R_RSP + 184), *(void**)(R_RSP + 192), *(void**)(R_RSP + 200), *(void**)(R_RSP + 208), *(void**)(R_RSP + 216), *(void**)(R_RSP + 224)); }
 
 #if defined(HAVE_LD80BITS)
-void IFD(x64emu_t *emu, uintptr_t fcn) { IFD_t fn = (IFD_t)fcn; R_RAX=(int64_t)fn(LD2localLD((void*)(R_RSP + 8))); }
 void DFD(x64emu_t *emu, uintptr_t fcn) { DFD_t fn = (DFD_t)fcn; long double ld=fn(LD2localLD((void*)(R_RSP + 8))); fpu_do_push(emu); ST0val = ld; }
+void IFED(x64emu_t *emu, uintptr_t fcn) { IFED_t fn = (IFED_t)fcn; R_RAX=(int64_t)fn(emu, LD2localLD((void*)(R_RSP + 8))); }
 #endif
 
 #if !defined(HAVE_LD80BITS)
-void IFK(x64emu_t *emu, uintptr_t fcn) { IFK_t fn = (IFK_t)fcn; R_RAX=(int64_t)fn(FromLD((void*)(R_RSP + 8))); }
 void KFK(x64emu_t *emu, uintptr_t fcn) { KFK_t fn = (KFK_t)fcn; double db=fn(FromLD((void*)(R_RSP + 8))); fpu_do_push(emu); ST0val = db; }
+void IFEK(x64emu_t *emu, uintptr_t fcn) { IFEK_t fn = (IFEK_t)fcn; R_RAX=(int64_t)fn(emu, FromLD((void*)(R_RSP + 8))); }
 void KFKK(x64emu_t *emu, uintptr_t fcn) { KFKK_t fn = (KFKK_t)fcn; double db=fn(FromLD((void*)(R_RSP + 8)), FromLD((void*)(R_RSP + 24))); fpu_do_push(emu); ST0val = db; }
 void KFKp(x64emu_t *emu, uintptr_t fcn) { KFKp_t fn = (KFKp_t)fcn; double db=fn(FromLD((void*)(R_RSP + 8)), (void*)R_RDI); fpu_do_push(emu); ST0val = db; }
 #endif
diff --git a/src/wrapped/generated/wrapper.h b/src/wrapped/generated/wrapper.h
index e498d101..66cbc51f 100644
--- a/src/wrapped/generated/wrapper.h
+++ b/src/wrapped/generated/wrapper.h
@@ -201,6 +201,8 @@ void cFpi(x64emu_t *emu, uintptr_t fnc);
 void cFpp(x64emu_t *emu, uintptr_t fnc);
 void wFpi(x64emu_t *emu, uintptr_t fnc);
 void iFEi(x64emu_t *emu, uintptr_t fnc);
+void iFEf(x64emu_t *emu, uintptr_t fnc);
+void iFEd(x64emu_t *emu, uintptr_t fnc);
 void iFEL(x64emu_t *emu, uintptr_t fnc);
 void iFEp(x64emu_t *emu, uintptr_t fnc);
 void iFwp(x64emu_t *emu, uintptr_t fnc);
@@ -237,6 +239,8 @@ void iFpL(x64emu_t *emu, uintptr_t fnc);
 void iFpp(x64emu_t *emu, uintptr_t fnc);
 void iFpO(x64emu_t *emu, uintptr_t fnc);
 void iFSi(x64emu_t *emu, uintptr_t fnc);
+void IFEf(x64emu_t *emu, uintptr_t fnc);
+void IFEd(x64emu_t *emu, uintptr_t fnc);
 void IFEp(x64emu_t *emu, uintptr_t fnc);
 void IFip(x64emu_t *emu, uintptr_t fnc);
 void IFII(x64emu_t *emu, uintptr_t fnc);
@@ -276,6 +280,7 @@ void UFuu(x64emu_t *emu, uintptr_t fnc);
 void UFpi(x64emu_t *emu, uintptr_t fnc);
 void UFpU(x64emu_t *emu, uintptr_t fnc);
 void UFpp(x64emu_t *emu, uintptr_t fnc);
+void fFEf(x64emu_t *emu, uintptr_t fnc);
 void fFEp(x64emu_t *emu, uintptr_t fnc);
 void fFif(x64emu_t *emu, uintptr_t fnc);
 void fFfi(x64emu_t *emu, uintptr_t fnc);
@@ -284,6 +289,7 @@ void fFfD(x64emu_t *emu, uintptr_t fnc);
 void fFfp(x64emu_t *emu, uintptr_t fnc);
 void fFpu(x64emu_t *emu, uintptr_t fnc);
 void fFpp(x64emu_t *emu, uintptr_t fnc);
+void dFEd(x64emu_t *emu, uintptr_t fnc);
 void dFid(x64emu_t *emu, uintptr_t fnc);
 void dFdi(x64emu_t *emu, uintptr_t fnc);
 void dFdd(x64emu_t *emu, uintptr_t fnc);
@@ -2989,13 +2995,13 @@ void vFpppppppppppppppppppppppp(x64emu_t *emu, uintptr_t fnc);
 void iFpppppppppppppppppppppppppppppppppp(x64emu_t *emu, uintptr_t fnc);
 
 #if defined(HAVE_LD80BITS)
-void IFD(x64emu_t *emu, uintptr_t fnc);
 void DFD(x64emu_t *emu, uintptr_t fnc);
+void IFED(x64emu_t *emu, uintptr_t fnc);
 #endif
 
 #if !defined(HAVE_LD80BITS)
-void IFK(x64emu_t *emu, uintptr_t fnc);
 void KFK(x64emu_t *emu, uintptr_t fnc);
+void IFEK(x64emu_t *emu, uintptr_t fnc);
 void KFKK(x64emu_t *emu, uintptr_t fnc);
 void KFKp(x64emu_t *emu, uintptr_t fnc);
 #endif
diff --git a/src/wrapped/wrappedlibm.c b/src/wrapped/wrappedlibm.c
index 0e4f4cf9..670ce370 100644
--- a/src/wrapped/wrappedlibm.c
+++ b/src/wrapped/wrappedlibm.c
@@ -1,10 +1,11 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#define _GNU_SOURCE         /* See feature_test_macros(7) */
+#define _GNU_SOURCE /* See feature_test_macros(7) */
 #include <dlfcn.h>
 #include <complex.h>
 #include <math.h>
+#include <fenv.h>
 
 #include "wrappedlibs.h"
 
@@ -13,8 +14,9 @@
 #include "librarian/library_private.h"
 #include "x64emu.h"
 #include "debug.h"
+#include "emu/x64emu_private.h"
 
-const char* libmName = 
+const char* libmName =
 #ifdef ANDROID
     "libm.so"
 #else
@@ -90,12 +92,114 @@ F2D(fmod)
 #undef F1F
 #undef FINITE
 
+// x86-64
+// FE_TONEAREST     0x0
+// FE_DOWNWARD      0x400
+// FE_UPWARD        0x800
+// FE_TOWARDZERO    0xc00
+
+#if defined(__aarch64__)
+// AArch64
+// #define FE_TONEAREST  0x000000
+// #define FE_DOWNWARD   0x800000
+// #define FE_UPWARD     0x400000
+// #define FE_TOWARDZERO 0xc00000
+#define TO_NATIVE(round) ((round == 0x400 ? 0x800 : (round == 0x800 ? 0x400 : round)) << 12)
+#elif defined(__riscv)
+// RISC-V
+// #define FE_TONEAREST     0x0
+// #define FE_DOWNWARD      0x2
+// #define FE_UPWARD        0x3
+// #define FE_TOWARDZERO    0x1
+#define TO_NATIVE(round) ((round == 0xc00 ? 0x400 : (round == 0x0 ? round : round + 0x400)) >> 10)
+#elif defined(__loongarch64)
+// LOONGARCH
+// FE_TONEAREST     0x000
+// FE_DOWNWARD      0x300
+// FE_UPWARD        0x200
+// FE_TOWARDZERO    0x100
+#define TO_NATIVE(round) ((round == 0x400 ? 0xc00 : (round == 0xc00 ? 0x400 : round)) >> 2)
+#elif defined(__powerpc64__)
+// PPC
+// FE_TONEAREST     0x0
+// FE_DOWNWARD      0x3
+// FE_UPWARD        0x2
+// FE_TOWARDZERO    0x1
+#define TO_NATIVE(round) ((round == 0x400 ? 0xc00 : (round == 0xc00 ? 0x400 : round)) >> 10)
+#elif defined(__x86_64__)
+#define TO_NATIVE(round) round
+#else
+#error Unknown architecture!
+#endif
+
+// See https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/fpu/fesetround.c
+EXPORT int my_fesetround(x64emu_t* emu, int round)
+{
+    if (box64_sync_rounding) {
+        if ((round & ~0xc00) != 0)
+            // round is not valid.
+            return 1;
+
+        emu->cw.x16 &= ~0xc00;
+        emu->cw.x16 |= round;
 
-#define CUSTOM_INIT     \
+        emu->mxcsr.x32 &= ~0x6000;
+        emu->mxcsr.x32 |= round << 3;
+
+        return 0;
+    } else {
+        return fesetround(round);
+    }
+}
+
+// See https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/fpu/fegetround.c
+EXPORT int my_fegetround(x64emu_t* emu)
+{
+    if (box64_sync_rounding) {
+        return emu->cw.x16 & 0xc00;
+    } else {
+        return fegetround();
+    }
+}
+
+#define FROUND(N, T, R)                      \
+    EXPORT R my_##N(x64emu_t* emu, T val)    \
+    {                                        \
+        if (box64_sync_rounding) {           \
+            int round = emu->cw.x16 & 0xc00; \
+            fesetround(TO_NATIVE(round));    \
+        }                                    \
+        return N(val);                       \
+    }
+
+FROUND(rint, double, double)
+FROUND(rintf, float, float)
+FROUND(lrint, double, int)
+FROUND(lrintf, float, int)
+FROUND(llrint, double, long)
+FROUND(llrintf, float, long)
+FROUND(nearbyint, double, double)
+FROUND(nearbyintf, float, float)
+#ifdef HAVE_LD80BITS
+FROUND(llrintl, long double, long double)
+#else
+EXPORT double my_llrintl(x64emu_t* emu, double val)
+{
+    if (box64_sync_rounding) {
+        int round = emu->cw.x16 & 0xc00;
+        fesetround(TO_NATIVE(round));
+    }
+    return llrint(val);
+}
+#endif
+
+#undef FROUND
+#undef TO_NATIVE
+
+#define CUSTOM_INIT \
     my_lib = lib;
 
 #define CUSTOM_FINI     \
     my_lib = NULL;
 
 #include "wrappedlib_init.h"
-
diff --git a/src/wrapped/wrappedlibm_private.h b/src/wrapped/wrappedlibm_private.h
index 16a2f0ed..d6b289ce 100644
--- a/src/wrapped/wrappedlibm_private.h
+++ b/src/wrapped/wrappedlibm_private.h
@@ -202,12 +202,12 @@ GO(feenableexcept, iFi)
 GO(fegetenv, iFp)
 GO(fegetexcept, iFv)
 GO(fegetexceptflag, iFpi)
-GO(fegetround, iFv)
+GOM(fegetround, iFEv)
 GO(feholdexcept, iFp)
 GO(feraiseexcept, iFi)
 GO(fesetenv, iFp)
 GO(fesetexceptflag, iFpi)
-GO(fesetround, iFi)
+GOM(fesetround, iFEi)
 GO(fetestexcept, iFi)
 GO(feupdateenv, iFp)
 GOW(finite, iFd)
@@ -307,12 +307,12 @@ GO2(lgammal_r, KFKp, lgamma_r)
 GOW(lgamma_r, dFdp)
 // __lgamma_r_finite
 DATAV(_LIB_VERSION, 8)
-GOW(llrint, IFd)
-GOW(llrintf, IFf)
+GOWM(llrint, IFEd)
+GOWM(llrintf, IFEf)
 #ifdef HAVE_LD80BITS
-GOW(llrintl, IFD)
+GOWM(llrintl, IFED)
 #else
-GO2(llrintl, IFK, llrint)
+GOWM(llrintl, IFEK)
 #endif
 GOW(llround, IFd)
 GOW(llroundf, IFf)
@@ -346,8 +346,8 @@ GOW(logl, DFD)
 #else
 GO2(logl, KFK, log)
 #endif
-GOW(lrint, iFd)
-GOW(lrintf, iFf)
+GOWM(lrint, iFEd)
+GOWM(lrintf, iFEf)
 // lrintl   // Weak
 GOW(lround, iFd)
 GOW(lroundf, iFf)
@@ -359,8 +359,8 @@ GOW(modff, fFfp)
 GOW(nan, dFp)
 GOW(nanf, fFp)
 // nanl // Weak
-GOW(nearbyint, dFd)
-GOW(nearbyintf, fFf)
+GOWM(nearbyint, dFEd)
+GOWM(nearbyintf, fFEf)
 // nearbyintl   // Weak
 GOW(nextafter, dFdd)
 GOW(nextafterf, fFff)
@@ -392,8 +392,8 @@ GOW(remainderf, fFff)
 GOW(remquo, dFddp)
 GOW(remquof, fFffp)
 // remquol  // Weak
-GOW(rint, dFd)
-GOW(rintf, fFf)
+GOWM(rint, dFEd)
+GOWM(rintf, fFEf)
 // rintl    // Weak
 GOW(round, dFd)
 GOW(roundf, fFf)