about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-12-04 10:13:35 +0100
committerptitSeb <sebastien.chev@gmail.com>2022-12-04 10:13:35 +0100
commitfb02ae0e9afb089d0807af0b8c8419f2d16df363 (patch)
tree9c4c082aecdc4c58f87bafc1b04eefece85b7296 /src
parentffb293fe0a9952c32fe2ebd164d6409facf8d921 (diff)
downloadbox64-fb02ae0e9afb089d0807af0b8c8419f2d16df363.tar.gz
box64-fb02ae0e9afb089d0807af0b8c8419f2d16df363.zip
Added experimental (undocumented) BOX64_SSE_FLUSHTO0 env. var. (not sure it's really usefull)
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_0f.c19
-rwxr-xr-xsrc/emu/x64emu.c22
-rwxr-xr-xsrc/emu/x64emu_private.h4
-rw-r--r--src/emu/x64run0f.c8
-rw-r--r--src/emu/x64run660f.c8
-rw-r--r--src/emu/x64runf20f.c6
-rw-r--r--src/emu/x64runf30f.c4
-rwxr-xr-xsrc/emu/x87emu_private.c8
-rwxr-xr-xsrc/include/debug.h1
-rwxr-xr-xsrc/include/regs.h72
-rwxr-xr-xsrc/main.c7
-rw-r--r--src/tools/rcfile.c1
12 files changed, 113 insertions, 47 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 3e958790..bcdf66b4 100755
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -1279,17 +1279,20 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         INST_NAME("LDMXCSR Md");

                         GETED(0);

                         STRw_U12(ed, xEmu, offsetof(x64emu_t, mxcsr));

+                        if(box64_sse_flushto0) {

+                            MRS_fpcr(x1);                   // get fpscr

+                            LSRw_IMM(x3, ed, 15);           // get FZ bit

+                            BFIw(x1, x3, 24, 1);            // inject FZ bit

+                            EORw_REG_LSR(x3, x3, ed, 1);    // FZ xor DAZ

+                            BFIw(x1, x3, 1, 1);             // inject AH bit

+                            MSR_fpcr(x1);                   // put new fpscr

+                        }

                         break;

                     case 3:

                         INST_NAME("STMXCSR Md");

-                        if(MODREG) {

-                            ed = xRAX+(nextop&7)+(rex.b<<3);

-                            LDRw_U12(ed, xEmu, offsetof(x64emu_t, mxcsr));

-                        } else {

-                            addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0);

-                            LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr));

-                            STRw_U12(x4, ed, fixedaddress);

-                        }

+                        addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0);

+                        LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr));

+                        STRw_U12(x4, ed, fixedaddress);

                         break;

                     case 7:

                         INST_NAME("CLFLUSH Ed");

diff --git a/src/emu/x64emu.c b/src/emu/x64emu.c
index 9e4fc7e0..d23e10e7 100755
--- a/src/emu/x64emu.c
+++ b/src/emu/x64emu.c
@@ -23,6 +23,13 @@
 #ifdef DYNAREC
 #include "custommem.h"
 #endif
+// for the applyFlushTo0
+#ifdef __x86_64__
+#include <immintrin.h>
+#elif defined(__aarch64__)
+#else
+#warning Architecture cannot follow SSE Flush to 0 flag
+#endif
 
 typedef struct cleanup_s {
     void*       f;
@@ -81,7 +88,7 @@ static void internalX64Setup(x64emu_t* emu, box64context_t *context, uintptr_t s
     emu->segs[_GS] = default_gs;
     // setup fpu regs
     reset_fpu(emu);
-    emu->mxcsr = 0x1f80;
+    emu->mxcsr.x32 = 0x1f80;
 }
 
 EXPORTDYN
@@ -505,3 +512,16 @@ void ResetSegmentsCache(x64emu_t *emu)
         return;
     memset(emu->segs_serial, 0, sizeof(emu->segs_serial));
 }
+
+void applyFlushTo0(x64emu_t* emu)
+{
+    #ifdef __x86_64__
+    _mm_setcsr(_mm_getcsr() | (emu->mxcsr.x32&0x8040));
+    #elif defined(__aarch64__)
+    uint64_t fpcr = __builtin_aarch64_get_fpcr();
+    fpcr &= ~((1<<24) | (1<<1));    // clear bit FZ (24) and AH (1)
+    fpcr |= (emu->mxcsr.f.MXCSR_FZ)<<24;  // set FZ as mxcsr FZ
+    fpcr |= ((emu->mxcsr.f.MXCSR_DAZ)^(emu->mxcsr.f.MXCSR_FZ))<<1; // set AH if DAZ different from FZ
+    __builtin_aarch64_set_fpcr(fpcr);
+    #endif
+}
\ No newline at end of file
diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h
index 093706bf..68fa5f83 100755
--- a/src/emu/x64emu_private.h
+++ b/src/emu/x64emu_private.h
@@ -39,7 +39,7 @@ typedef struct x64emu_s {
 	x87flags_t  sw;
 	uint32_t    top;        // top is part of sw, but it's faster to have it separatly
     int         fpu_stack;
-    uint32_t    mxcsr;
+    mmxcontrol_t mxcsr;
     fpu_ld_t    fpu_ld[8]; // for long double emulation / 80bits fld fst
     fpu_ll_t    fpu_ll[8]; // for 64bits fild / fist sequence
 	fpu_p_reg_t p_regs[8];
@@ -97,4 +97,6 @@ typedef struct x64emu_s {
 //#define INTR_RAISE_DIV0(emu) {emu->error |= ERR_DIVBY0; emu->quit=1;}
 #define INTR_RAISE_DIV0(emu) {emu->error |= ERR_DIVBY0;} // should rise a SIGFPE and not quit
 
+void applyFlushTo0(x64emu_t* emu);
+
 #endif //__X86EMU_PRIVATE_H_
diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index 5cde4f53..491f373d 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -201,7 +201,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             nextop = F8;

             GETEX(0);

             GETGM;

-            switch((emu->mxcsr>>13)&3) {

+            switch(emu->mxcsr.f.MXCSR_RC) {

                 case ROUND_Nearest:

                     GM->sd[1] = floorf(EX->f[1]+0.5f);

                     GM->sd[0] = floorf(EX->f[0]+0.5f);

@@ -897,10 +897,12 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                         fpu_fxrstor32(emu, ED);

                     break;

                 case 2:                 /* LDMXCSR Md */

-                    emu->mxcsr = ED->dword[0];

+                    emu->mxcsr.x32 = ED->dword[0];

+                    if(box64_sse_flushto0)

+                        applyFlushTo0(emu);

                     break;

                 case 3:                 /* STMXCSR Md */

-                    ED->dword[0] = emu->mxcsr;

+                    ED->dword[0] = emu->mxcsr.x32;

                     break;

                 case 7:                 /* CLFLUSH Ed */

                     #ifdef DYNAREC

diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c
index 4d5dd1b7..d2d97c3d 100644
--- a/src/emu/x64run660f.c
+++ b/src/emu/x64run660f.c
@@ -197,7 +197,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         nextop = F8;

         GETEX(0);

         GETGM;

-        switch((emu->mxcsr>>13)&3) {

+        switch(emu->mxcsr.f.MXCSR_RC) {

             case ROUND_Nearest:

                 GM->sd[0] = floor(EX->d[0]+0.5);

                 GM->sd[1] = floor(EX->d[1]+0.5);

@@ -668,7 +668,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 GETGX;

                 tmp8u = F8; // ignoring bit 3 interupt thingy

                 if(tmp8u&4)

-                    tmp8u = (emu->mxcsr>>13)&3;

+                    tmp8u = emu->mxcsr.f.MXCSR_RC;

                 else

                     tmp8u &= 3;

                 switch(tmp8u) {

@@ -692,7 +692,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 GETGX;

                 tmp8u = F8; // ignoring bit 3 interupt thingy

                 if(tmp8u&4)

-                    tmp8u = (emu->mxcsr>>13)&3;

+                    tmp8u = emu->mxcsr.f.MXCSR_RC;

                 else

                     tmp8u &= 3;

                 switch(tmp8u) {

@@ -951,7 +951,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         nextop = F8;

         GETEX(0);

         GETGX;

-        switch((emu->mxcsr>>13)&3) {

+        switch(emu->mxcsr.f.MXCSR_RC) {

             case ROUND_Nearest:

                 GX->sd[0] = floorf(EX->f[0]+0.5f);

                 GX->sd[1] = floorf(EX->f[1]+0.5f);

diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c
index 18eea952..f8b89dfb 100644
--- a/src/emu/x64runf20f.c
+++ b/src/emu/x64runf20f.c
@@ -95,7 +95,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
         GETEX(0);

         GETGD;

         if(rex.w) {

-            switch((emu->mxcsr>>13)&3) {

+            switch(emu->mxcsr.f.MXCSR_RC) {

                 case ROUND_Nearest:

                     GD->q[0] = floor(EX->d[0]+0.5);

                     break;

@@ -110,7 +110,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                     break;

             }

         } else {

-            switch((emu->mxcsr>>13)&3) {

+            switch(emu->mxcsr.f.MXCSR_RC) {

                 case ROUND_Nearest:

                     GD->sdword[0] = floor(EX->d[0]+0.5);

                     break;

@@ -300,7 +300,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
         nextop = F8;

         GETEX(0);

         GETGX;

-        switch((emu->mxcsr>>13)&3) {

+        switch(emu->mxcsr.f.MXCSR_RC) {

             case ROUND_Nearest:

                 GX->sd[0] = floor(EX->d[0]+0.5);

                 GX->sd[1] = floor(EX->d[1]+0.5);

diff --git a/src/emu/x64runf30f.c b/src/emu/x64runf30f.c
index 82f8e2f5..150ade09 100644
--- a/src/emu/x64runf30f.c
+++ b/src/emu/x64runf30f.c
@@ -101,7 +101,7 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         GETEX(0);

         GETGD;

         if(rex.w) {

-            switch((emu->mxcsr>>13)&3) {

+            switch(emu->mxcsr.f.MXCSR_RC) {

                 case ROUND_Nearest:

                     GD->sq[0] = floorf(EX->f[0]+0.5f);

                     break;

@@ -116,7 +116,7 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                     break;

             }

         } else {

-            switch((emu->mxcsr>>13)&3) {

+            switch(emu->mxcsr.f.MXCSR_RC) {

                 case ROUND_Nearest:

                     GD->sdword[0] = floorf(EX->f[0]+0.5f);

                     break;

diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c
index cd92354b..7a16cd97 100755
--- a/src/emu/x87emu_private.c
+++ b/src/emu/x87emu_private.c
@@ -301,6 +301,7 @@ void fpu_fxsave32(x64emu_t* emu, void* ed)
     emu->sw.f.F87_TOP = top;
     p->ControlWord = emu->cw.x16;
     p->StatusWord = emu->sw.x16;
+    p->MxCsr = emu->mxcsr.x32;
     uint8_t tags = 0;
     for (int i=0; i<8; ++i)
         tags |= ((emu->p_regs[i].tag)<<(i*2)==0b11)?0:1;
@@ -330,6 +331,7 @@ void fpu_fxsave64(x64emu_t* emu, void* ed)
     emu->sw.f.F87_TOP = top;
     p->ControlWord = emu->cw.x16;
     p->StatusWord = emu->sw.x16;
+    p->MxCsr = emu->mxcsr.x32;
     uint8_t tags = 0;
     for (int i=0; i<8; ++i)
         tags |= ((emu->p_regs[i].tag)<<(i*2)==0b11)?0:1;
@@ -351,6 +353,9 @@ void fpu_fxrstor32(x64emu_t* emu, void* ed)
     xsave32_t *p = (xsave32_t*)ed;
     emu->cw.x16 = p->ControlWord;
     emu->sw.x16 = p->StatusWord;
+    emu->mxcsr.x32 = p->MxCsr;
+    if(box64_sse_flushto0)
+        applyFlushTo0(emu);
     emu->top = emu->sw.f.F87_TOP;
     uint8_t tags = p->TagWord;
     for(int i=0; i<8; ++i)
@@ -371,6 +376,9 @@ void fpu_fxrstor64(x64emu_t* emu, void* ed)
     xsave64_t *p = (xsave64_t*)ed;
     emu->cw.x16 = p->ControlWord;
     emu->sw.x16 = p->StatusWord;
+    emu->mxcsr.x32 = p->MxCsr;
+    if(box64_sse_flushto0)
+        applyFlushTo0(emu);
     emu->top = emu->sw.f.F87_TOP;
     uint8_t tags = p->TagWord;
     for(int i=0; i<8; ++i)
diff --git a/src/include/debug.h b/src/include/debug.h
index 6db06b9e..3ff15229 100755
--- a/src/include/debug.h
+++ b/src/include/debug.h
@@ -43,6 +43,7 @@ extern char* box64_trace;
 extern uint64_t start_cnt;
 #endif
 extern int box64_dummy_crashhandler;
+extern int box64_sse_flushto0;
 extern int allow_missing_libs;
 extern int box64_mapclean;
 extern int box64_prefer_wrapped;
diff --git a/src/include/regs.h b/src/include/regs.h
index d66e8065..0097e723 100755
--- a/src/include/regs.h
+++ b/src/include/regs.h
@@ -223,42 +223,64 @@ typedef enum {
 
 typedef union {
     struct __attribute__ ((__packed__)) {
-        unsigned int F87_IE:1;
-        unsigned int F87_DE:1;
-        unsigned int F87_ZE:1;
-        unsigned int F87_OE:1;
-        unsigned int F87_UE:1;
-        unsigned int F87_PE:1;
-        unsigned int F87_SF:1;
-        unsigned int F87_ES:1;
-        unsigned int F87_C0:1;
-		unsigned int F87_C1:1;
-		unsigned int F87_C2:1;
-		unsigned int F87_TOP:3;
-		unsigned int F87_C3:1;
-		unsigned int F87_B:1;
+        uint16_t F87_IE:1;
+        uint16_t F87_DE:1;
+        uint16_t F87_ZE:1;
+        uint16_t F87_OE:1;
+        uint16_t F87_UE:1;
+        uint16_t F87_PE:1;
+        uint16_t F87_SF:1;
+        uint16_t F87_ES:1;
+        uint16_t F87_C0:1;
+		uint16_t F87_C1:1;
+		uint16_t F87_C2:1;
+		uint16_t F87_TOP:3;
+		uint16_t F87_C3:1;
+		uint16_t F87_B:1;
     } f;
     uint16_t    x16;
 } x87flags_t;
 
 typedef union {
     struct __attribute__ ((__packed__)) {
-        unsigned int C87_IM:1;	// interupt masks
-        unsigned int C87_DM:1;
-        unsigned int C87_ZM:1;
-        unsigned int C87_OM:1;
-        unsigned int C87_UM:1;
-        unsigned int C87_PM:1;
-        unsigned int C87_R1:2;	// reserved
-        unsigned int C87_PC:2;	// precision control (24bits, reserved, 53bits, 64bits)
-        unsigned int C87_RD:2;	// Rounds
-		unsigned int C87_IC:1;
-		unsigned int C87_R2:3;	// reserved
+        uint16_t C87_IM:1;	// interupt masks
+        uint16_t C87_DM:1;
+        uint16_t C87_ZM:1;
+        uint16_t C87_OM:1;
+        uint16_t C87_UM:1;
+        uint16_t C87_PM:1;
+        uint16_t C87_R1:2;	// reserved
+        uint16_t C87_PC:2;	// precision control (24bits, reserved, 53bits, 64bits)
+        uint16_t C87_RD:2;	// Rounds
+		uint16_t C87_IC:1;
+		uint16_t C87_R2:3;	// reserved
     } f;
     uint16_t    x16;
 } x87control_t;
 
 typedef union {
+	struct __attribute__ ((__packed__)) {
+		uint32_t	MXCSR_IE:1;
+		uint32_t	MXCSR_DE:1;
+		uint32_t	MXCSR_ZE:1;
+		uint32_t	MXCSR_OE:1;
+		uint32_t	MXCSR_UE:1;
+		uint32_t	MXCSR_PE:1;
+		uint32_t	MXCSR_DAZ:1;
+		uint32_t	MXCSR_IM:1;
+		uint32_t	MXCSR_DM:1;
+		uint32_t	MXCSR_ZM:1;
+		uint32_t	MXCSR_OM:1;
+		uint32_t	MXCSR_UM:1;
+		uint32_t	MXCSR_PM:1;
+		uint32_t	MXCSR_RC:2;
+		uint32_t	MXCSR_FZ:1;
+		uint32_t	MXCSR_RES:16;
+	} f;
+	uint32_t	x32;
+} mmxcontrol_t;
+
+typedef union {
 	uint64_t	q;
 	int64_t		sq;
 	double		d;
diff --git a/src/main.c b/src/main.c
index e7bc7d39..83348dac 100755
--- a/src/main.c
+++ b/src/main.c
@@ -86,6 +86,7 @@ int box64_x11glx = 1;
 int allow_missing_libs = 0;
 int box64_prefer_emulated = 0;
 int box64_prefer_wrapped = 0;
+int box64_sse_flushto0 = 0;
 int fix_64bit_inodes = 0;
 int box64_dummy_crashhandler = 1;
 int box64_mapclean = 0;
@@ -896,6 +897,12 @@ void LoadEnvVars(box64context_t *context)
     AddPath("libcrypto.so.1.0.0", &context->box64_emulated_libs, 0);
     AddPath("libunwind.so.8", &context->box64_emulated_libs, 0);
 
+    if(getenv("BOX64_SSE_FLUSHTO0")) {
+        if (strcmp(getenv("BOX64_SSE_FLUSHTO0"), "1")==0) {
+            box64_sse_flushto0 = 1;
+            printf_log(LOG_INFO, "BOX64: Direct apply of SSE Flush to 0 flag\n");
+    	}
+    }
     if(getenv("BOX64_PREFER_WRAPPED")) {
         if (strcmp(getenv("BOX64_PREFER_WRAPPED"), "1")==0) {
             box64_prefer_wrapped = 1;
diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c
index 6a9abb87..cf0f4e2c 100644
--- a/src/tools/rcfile.c
+++ b/src/tools/rcfile.c
@@ -34,6 +34,7 @@ ENTRYBOOL(BOX64_SHOWBT, box64_showbt)                   \
 ENTRYBOOL(BOX64_X11THREADS, box64_x11threads)           \
 ENTRYBOOL(BOX64_X11GLX, box64_x11glx)                   \
 ENTRYDSTRING(BOX64_LIBGL, box64_libGL)                  \
+ENTRYBOOL(BOX64_SSE_FLUSHTO0, box64_sse_flushto0)       \
 ENTRYSTRING_(BOX64_EMULATED_LIBS, emulated_libs)        \
 ENTRYBOOL(BOX64_ALLOWMISSINGLIBS, allow_missing_libs)   \
 ENTRYBOOL(BOX64_PREFER_WRAPPED, box64_prefer_wrapped)   \