about summary refs log tree commit diff stats
path: root/src/emu
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-12-04 10:13:35 +0100
committerptitSeb <sebastien.chev@gmail.com>2022-12-04 10:13:35 +0100
commitfb02ae0e9afb089d0807af0b8c8419f2d16df363 (patch)
tree9c4c082aecdc4c58f87bafc1b04eefece85b7296 /src/emu
parentffb293fe0a9952c32fe2ebd164d6409facf8d921 (diff)
downloadbox64-fb02ae0e9afb089d0807af0b8c8419f2d16df363.tar.gz
box64-fb02ae0e9afb089d0807af0b8c8419f2d16df363.zip
Added experimental (undocumented) BOX64_SSE_FLUSHTO0 env. var. (not sure it's really usefull)
Diffstat (limited to 'src/emu')
-rwxr-xr-xsrc/emu/x64emu.c22
-rwxr-xr-xsrc/emu/x64emu_private.h4
-rw-r--r--src/emu/x64run0f.c8
-rw-r--r--src/emu/x64run660f.c8
-rw-r--r--src/emu/x64runf20f.c6
-rw-r--r--src/emu/x64runf30f.c4
-rwxr-xr-xsrc/emu/x87emu_private.c8
7 files changed, 46 insertions, 14 deletions
diff --git a/src/emu/x64emu.c b/src/emu/x64emu.c
index 9e4fc7e0..d23e10e7 100755
--- a/src/emu/x64emu.c
+++ b/src/emu/x64emu.c
@@ -23,6 +23,13 @@
 #ifdef DYNAREC
 #include "custommem.h"
 #endif
+// for the applyFlushTo0
+#ifdef __x86_64__
+#include <immintrin.h>
+#elif defined(__aarch64__)
+#else
+#warning Architecture cannot follow SSE Flush to 0 flag
+#endif
 
 typedef struct cleanup_s {
     void*       f;
@@ -81,7 +88,7 @@ static void internalX64Setup(x64emu_t* emu, box64context_t *context, uintptr_t s
     emu->segs[_GS] = default_gs;
     // setup fpu regs
     reset_fpu(emu);
-    emu->mxcsr = 0x1f80;
+    emu->mxcsr.x32 = 0x1f80;
 }
 
 EXPORTDYN
@@ -505,3 +512,16 @@ void ResetSegmentsCache(x64emu_t *emu)
         return;
     memset(emu->segs_serial, 0, sizeof(emu->segs_serial));
 }
+
+void applyFlushTo0(x64emu_t* emu)
+{
+    #ifdef __x86_64__
+    _mm_setcsr(_mm_getcsr() | (emu->mxcsr.x32&0x8040));
+    #elif defined(__aarch64__)
+    uint64_t fpcr = __builtin_aarch64_get_fpcr();
+    fpcr &= ~((1<<24) | (1<<1));    // clear bit FZ (24) and AH (1)
+    fpcr |= (emu->mxcsr.f.MXCSR_FZ)<<24;  // set FZ as mxcsr FZ
+    fpcr |= ((emu->mxcsr.f.MXCSR_DAZ)^(emu->mxcsr.f.MXCSR_FZ))<<1; // set AH if DAZ different from FZ
+    __builtin_aarch64_set_fpcr(fpcr);
+    #endif
+}
\ No newline at end of file
diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h
index 093706bf..68fa5f83 100755
--- a/src/emu/x64emu_private.h
+++ b/src/emu/x64emu_private.h
@@ -39,7 +39,7 @@ typedef struct x64emu_s {
 	x87flags_t  sw;
 	uint32_t    top;        // top is part of sw, but it's faster to have it separatly
     int         fpu_stack;
-    uint32_t    mxcsr;
+    mmxcontrol_t mxcsr;
     fpu_ld_t    fpu_ld[8]; // for long double emulation / 80bits fld fst
     fpu_ll_t    fpu_ll[8]; // for 64bits fild / fist sequence
 	fpu_p_reg_t p_regs[8];
@@ -97,4 +97,6 @@ typedef struct x64emu_s {
 //#define INTR_RAISE_DIV0(emu) {emu->error |= ERR_DIVBY0; emu->quit=1;}
 #define INTR_RAISE_DIV0(emu) {emu->error |= ERR_DIVBY0;} // should rise a SIGFPE and not quit
 
+void applyFlushTo0(x64emu_t* emu);
+
 #endif //__X86EMU_PRIVATE_H_
diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index 5cde4f53..491f373d 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -201,7 +201,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
             nextop = F8;

             GETEX(0);

             GETGM;

-            switch((emu->mxcsr>>13)&3) {

+            switch(emu->mxcsr.f.MXCSR_RC) {

                 case ROUND_Nearest:

                     GM->sd[1] = floorf(EX->f[1]+0.5f);

                     GM->sd[0] = floorf(EX->f[0]+0.5f);

@@ -897,10 +897,12 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                         fpu_fxrstor32(emu, ED);

                     break;

                 case 2:                 /* LDMXCSR Md */

-                    emu->mxcsr = ED->dword[0];

+                    emu->mxcsr.x32 = ED->dword[0];

+                    if(box64_sse_flushto0)

+                        applyFlushTo0(emu);

                     break;

                 case 3:                 /* STMXCSR Md */

-                    ED->dword[0] = emu->mxcsr;

+                    ED->dword[0] = emu->mxcsr.x32;

                     break;

                 case 7:                 /* CLFLUSH Ed */

                     #ifdef DYNAREC

diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c
index 4d5dd1b7..d2d97c3d 100644
--- a/src/emu/x64run660f.c
+++ b/src/emu/x64run660f.c
@@ -197,7 +197,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         nextop = F8;

         GETEX(0);

         GETGM;

-        switch((emu->mxcsr>>13)&3) {

+        switch(emu->mxcsr.f.MXCSR_RC) {

             case ROUND_Nearest:

                 GM->sd[0] = floor(EX->d[0]+0.5);

                 GM->sd[1] = floor(EX->d[1]+0.5);

@@ -668,7 +668,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 GETGX;

                 tmp8u = F8; // ignoring bit 3 interupt thingy

                 if(tmp8u&4)

-                    tmp8u = (emu->mxcsr>>13)&3;

+                    tmp8u = emu->mxcsr.f.MXCSR_RC;

                 else

                     tmp8u &= 3;

                 switch(tmp8u) {

@@ -692,7 +692,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                 GETGX;

                 tmp8u = F8; // ignoring bit 3 interupt thingy

                 if(tmp8u&4)

-                    tmp8u = (emu->mxcsr>>13)&3;

+                    tmp8u = emu->mxcsr.f.MXCSR_RC;

                 else

                     tmp8u &= 3;

                 switch(tmp8u) {

@@ -951,7 +951,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         nextop = F8;

         GETEX(0);

         GETGX;

-        switch((emu->mxcsr>>13)&3) {

+        switch(emu->mxcsr.f.MXCSR_RC) {

             case ROUND_Nearest:

                 GX->sd[0] = floorf(EX->f[0]+0.5f);

                 GX->sd[1] = floorf(EX->f[1]+0.5f);

diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c
index 18eea952..f8b89dfb 100644
--- a/src/emu/x64runf20f.c
+++ b/src/emu/x64runf20f.c
@@ -95,7 +95,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
         GETEX(0);

         GETGD;

         if(rex.w) {

-            switch((emu->mxcsr>>13)&3) {

+            switch(emu->mxcsr.f.MXCSR_RC) {

                 case ROUND_Nearest:

                     GD->q[0] = floor(EX->d[0]+0.5);

                     break;

@@ -110,7 +110,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
                     break;

             }

         } else {

-            switch((emu->mxcsr>>13)&3) {

+            switch(emu->mxcsr.f.MXCSR_RC) {

                 case ROUND_Nearest:

                     GD->sdword[0] = floor(EX->d[0]+0.5);

                     break;

@@ -300,7 +300,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step)
         nextop = F8;

         GETEX(0);

         GETGX;

-        switch((emu->mxcsr>>13)&3) {

+        switch(emu->mxcsr.f.MXCSR_RC) {

             case ROUND_Nearest:

                 GX->sd[0] = floor(EX->d[0]+0.5);

                 GX->sd[1] = floor(EX->d[1]+0.5);

diff --git a/src/emu/x64runf30f.c b/src/emu/x64runf30f.c
index 82f8e2f5..150ade09 100644
--- a/src/emu/x64runf30f.c
+++ b/src/emu/x64runf30f.c
@@ -101,7 +101,7 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
         GETEX(0);

         GETGD;

         if(rex.w) {

-            switch((emu->mxcsr>>13)&3) {

+            switch(emu->mxcsr.f.MXCSR_RC) {

                 case ROUND_Nearest:

                     GD->sq[0] = floorf(EX->f[0]+0.5f);

                     break;

@@ -116,7 +116,7 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr)
                     break;

             }

         } else {

-            switch((emu->mxcsr>>13)&3) {

+            switch(emu->mxcsr.f.MXCSR_RC) {

                 case ROUND_Nearest:

                     GD->sdword[0] = floorf(EX->f[0]+0.5f);

                     break;

diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c
index cd92354b..7a16cd97 100755
--- a/src/emu/x87emu_private.c
+++ b/src/emu/x87emu_private.c
@@ -301,6 +301,7 @@ void fpu_fxsave32(x64emu_t* emu, void* ed)
     emu->sw.f.F87_TOP = top;
     p->ControlWord = emu->cw.x16;
     p->StatusWord = emu->sw.x16;
+    p->MxCsr = emu->mxcsr.x32;
     uint8_t tags = 0;
     for (int i=0; i<8; ++i)
         tags |= ((emu->p_regs[i].tag)<<(i*2)==0b11)?0:1;
@@ -330,6 +331,7 @@ void fpu_fxsave64(x64emu_t* emu, void* ed)
     emu->sw.f.F87_TOP = top;
     p->ControlWord = emu->cw.x16;
     p->StatusWord = emu->sw.x16;
+    p->MxCsr = emu->mxcsr.x32;
     uint8_t tags = 0;
     for (int i=0; i<8; ++i)
         tags |= ((emu->p_regs[i].tag)<<(i*2)==0b11)?0:1;
@@ -351,6 +353,9 @@ void fpu_fxrstor32(x64emu_t* emu, void* ed)
     xsave32_t *p = (xsave32_t*)ed;
     emu->cw.x16 = p->ControlWord;
     emu->sw.x16 = p->StatusWord;
+    emu->mxcsr.x32 = p->MxCsr;
+    if(box64_sse_flushto0)
+        applyFlushTo0(emu);
     emu->top = emu->sw.f.F87_TOP;
     uint8_t tags = p->TagWord;
     for(int i=0; i<8; ++i)
@@ -371,6 +376,9 @@ void fpu_fxrstor64(x64emu_t* emu, void* ed)
     xsave64_t *p = (xsave64_t*)ed;
     emu->cw.x16 = p->ControlWord;
     emu->sw.x16 = p->StatusWord;
+    emu->mxcsr.x32 = p->MxCsr;
+    if(box64_sse_flushto0)
+        applyFlushTo0(emu);
     emu->top = emu->sw.f.F87_TOP;
     uint8_t tags = p->TagWord;
     for(int i=0; i<8; ++i)