diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-12-04 10:13:35 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2022-12-04 10:13:35 +0100 |
| commit | fb02ae0e9afb089d0807af0b8c8419f2d16df363 (patch) | |
| tree | 9c4c082aecdc4c58f87bafc1b04eefece85b7296 /src | |
| parent | ffb293fe0a9952c32fe2ebd164d6409facf8d921 (diff) | |
| download | box64-fb02ae0e9afb089d0807af0b8c8419f2d16df363.tar.gz box64-fb02ae0e9afb089d0807af0b8c8419f2d16df363.zip | |
Added experimental (undocumented) BOX64_SSE_FLUSHTO0 env. var. (not sure it's really usefull)
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_0f.c | 19 | ||||
| -rwxr-xr-x | src/emu/x64emu.c | 22 | ||||
| -rwxr-xr-x | src/emu/x64emu_private.h | 4 | ||||
| -rw-r--r-- | src/emu/x64run0f.c | 8 | ||||
| -rw-r--r-- | src/emu/x64run660f.c | 8 | ||||
| -rw-r--r-- | src/emu/x64runf20f.c | 6 | ||||
| -rw-r--r-- | src/emu/x64runf30f.c | 4 | ||||
| -rwxr-xr-x | src/emu/x87emu_private.c | 8 | ||||
| -rwxr-xr-x | src/include/debug.h | 1 | ||||
| -rwxr-xr-x | src/include/regs.h | 72 | ||||
| -rwxr-xr-x | src/main.c | 7 | ||||
| -rw-r--r-- | src/tools/rcfile.c | 1 |
12 files changed, 113 insertions, 47 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 3e958790..bcdf66b4 100755 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -1279,17 +1279,20 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin INST_NAME("LDMXCSR Md"); GETED(0); STRw_U12(ed, xEmu, offsetof(x64emu_t, mxcsr)); + if(box64_sse_flushto0) { + MRS_fpcr(x1); // get fpscr + LSRw_IMM(x3, ed, 15); // get FZ bit + BFIw(x1, x3, 24, 1); // inject FZ bit + EORw_REG_LSR(x3, x3, ed, 1); // FZ xor DAZ + BFIw(x1, x3, 1, 1); // inject AH bit + MSR_fpcr(x1); // put new fpscr + } break; case 3: INST_NAME("STMXCSR Md"); - if(MODREG) { - ed = xRAX+(nextop&7)+(rex.b<<3); - LDRw_U12(ed, xEmu, offsetof(x64emu_t, mxcsr)); - } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); - LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr)); - STRw_U12(x4, ed, fixedaddress); - } + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0); + LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr)); + STRw_U12(x4, ed, fixedaddress); break; case 7: INST_NAME("CLFLUSH Ed"); diff --git a/src/emu/x64emu.c b/src/emu/x64emu.c index 9e4fc7e0..d23e10e7 100755 --- a/src/emu/x64emu.c +++ b/src/emu/x64emu.c @@ -23,6 +23,13 @@ #ifdef DYNAREC #include "custommem.h" #endif +// for the applyFlushTo0 +#ifdef __x86_64__ +#include <immintrin.h> +#elif defined(__aarch64__) +#else +#warning Architecture cannot follow SSE Flush to 0 flag +#endif typedef struct cleanup_s { void* f; @@ -81,7 +88,7 @@ static void internalX64Setup(x64emu_t* emu, box64context_t *context, uintptr_t s emu->segs[_GS] = default_gs; // setup fpu regs reset_fpu(emu); - emu->mxcsr = 0x1f80; + emu->mxcsr.x32 = 0x1f80; } EXPORTDYN @@ -505,3 +512,16 @@ void ResetSegmentsCache(x64emu_t *emu) return; memset(emu->segs_serial, 0, sizeof(emu->segs_serial)); } + +void applyFlushTo0(x64emu_t* emu) +{ + #ifdef __x86_64__ + _mm_setcsr(_mm_getcsr() | (emu->mxcsr.x32&0x8040)); + #elif defined(__aarch64__) + uint64_t fpcr = __builtin_aarch64_get_fpcr(); + fpcr &= ~((1<<24) | (1<<1)); // clear bit FZ (24) and AH (1) + fpcr |= (emu->mxcsr.f.MXCSR_FZ)<<24; // set FZ as mxcsr FZ + fpcr |= ((emu->mxcsr.f.MXCSR_DAZ)^(emu->mxcsr.f.MXCSR_FZ))<<1; // set AH if DAZ different from FZ + __builtin_aarch64_set_fpcr(fpcr); + #endif +} \ No newline at end of file diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h index 093706bf..68fa5f83 100755 --- a/src/emu/x64emu_private.h +++ b/src/emu/x64emu_private.h @@ -39,7 +39,7 @@ typedef struct x64emu_s { x87flags_t sw; uint32_t top; // top is part of sw, but it's faster to have it separatly int fpu_stack; - uint32_t mxcsr; + mmxcontrol_t mxcsr; fpu_ld_t fpu_ld[8]; // for long double emulation / 80bits fld fst fpu_ll_t fpu_ll[8]; // for 64bits fild / fist sequence fpu_p_reg_t p_regs[8]; @@ -97,4 +97,6 @@ typedef struct x64emu_s { //#define INTR_RAISE_DIV0(emu) {emu->error |= ERR_DIVBY0; emu->quit=1;} #define INTR_RAISE_DIV0(emu) {emu->error |= ERR_DIVBY0;} // should rise a SIGFPE and not quit +void applyFlushTo0(x64emu_t* emu); + #endif //__X86EMU_PRIVATE_H_ diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c index 5cde4f53..491f373d 100644 --- a/src/emu/x64run0f.c +++ b/src/emu/x64run0f.c @@ -201,7 +201,7 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) nextop = F8; GETEX(0); GETGM; - switch((emu->mxcsr>>13)&3) { + switch(emu->mxcsr.f.MXCSR_RC) { case ROUND_Nearest: GM->sd[1] = floorf(EX->f[1]+0.5f); GM->sd[0] = floorf(EX->f[0]+0.5f); @@ -897,10 +897,12 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) fpu_fxrstor32(emu, ED); break; case 2: /* LDMXCSR Md */ - emu->mxcsr = ED->dword[0]; + emu->mxcsr.x32 = ED->dword[0]; + if(box64_sse_flushto0) + applyFlushTo0(emu); break; case 3: /* STMXCSR Md */ - ED->dword[0] = emu->mxcsr; + ED->dword[0] = emu->mxcsr.x32; break; case 7: /* CLFLUSH Ed */ #ifdef DYNAREC diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c index 4d5dd1b7..d2d97c3d 100644 --- a/src/emu/x64run660f.c +++ b/src/emu/x64run660f.c @@ -197,7 +197,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) nextop = F8; GETEX(0); GETGM; - switch((emu->mxcsr>>13)&3) { + switch(emu->mxcsr.f.MXCSR_RC) { case ROUND_Nearest: GM->sd[0] = floor(EX->d[0]+0.5); GM->sd[1] = floor(EX->d[1]+0.5); @@ -668,7 +668,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GETGX; tmp8u = F8; // ignoring bit 3 interupt thingy if(tmp8u&4) - tmp8u = (emu->mxcsr>>13)&3; + tmp8u = emu->mxcsr.f.MXCSR_RC; else tmp8u &= 3; switch(tmp8u) { @@ -692,7 +692,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) GETGX; tmp8u = F8; // ignoring bit 3 interupt thingy if(tmp8u&4) - tmp8u = (emu->mxcsr>>13)&3; + tmp8u = emu->mxcsr.f.MXCSR_RC; else tmp8u &= 3; switch(tmp8u) { @@ -951,7 +951,7 @@ uintptr_t Run660F(x64emu_t *emu, rex_t rex, uintptr_t addr) nextop = F8; GETEX(0); GETGX; - switch((emu->mxcsr>>13)&3) { + switch(emu->mxcsr.f.MXCSR_RC) { case ROUND_Nearest: GX->sd[0] = floorf(EX->f[0]+0.5f); GX->sd[1] = floorf(EX->f[1]+0.5f); diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c index 18eea952..f8b89dfb 100644 --- a/src/emu/x64runf20f.c +++ b/src/emu/x64runf20f.c @@ -95,7 +95,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) GETEX(0); GETGD; if(rex.w) { - switch((emu->mxcsr>>13)&3) { + switch(emu->mxcsr.f.MXCSR_RC) { case ROUND_Nearest: GD->q[0] = floor(EX->d[0]+0.5); break; @@ -110,7 +110,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) break; } } else { - switch((emu->mxcsr>>13)&3) { + switch(emu->mxcsr.f.MXCSR_RC) { case ROUND_Nearest: GD->sdword[0] = floor(EX->d[0]+0.5); break; @@ -300,7 +300,7 @@ uintptr_t RunF20F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) nextop = F8; GETEX(0); GETGX; - switch((emu->mxcsr>>13)&3) { + switch(emu->mxcsr.f.MXCSR_RC) { case ROUND_Nearest: GX->sd[0] = floor(EX->d[0]+0.5); GX->sd[1] = floor(EX->d[1]+0.5); diff --git a/src/emu/x64runf30f.c b/src/emu/x64runf30f.c index 82f8e2f5..150ade09 100644 --- a/src/emu/x64runf30f.c +++ b/src/emu/x64runf30f.c @@ -101,7 +101,7 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr) GETEX(0); GETGD; if(rex.w) { - switch((emu->mxcsr>>13)&3) { + switch(emu->mxcsr.f.MXCSR_RC) { case ROUND_Nearest: GD->sq[0] = floorf(EX->f[0]+0.5f); break; @@ -116,7 +116,7 @@ uintptr_t RunF30F(x64emu_t *emu, rex_t rex, uintptr_t addr) break; } } else { - switch((emu->mxcsr>>13)&3) { + switch(emu->mxcsr.f.MXCSR_RC) { case ROUND_Nearest: GD->sdword[0] = floorf(EX->f[0]+0.5f); break; diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c index cd92354b..7a16cd97 100755 --- a/src/emu/x87emu_private.c +++ b/src/emu/x87emu_private.c @@ -301,6 +301,7 @@ void fpu_fxsave32(x64emu_t* emu, void* ed) emu->sw.f.F87_TOP = top; p->ControlWord = emu->cw.x16; p->StatusWord = emu->sw.x16; + p->MxCsr = emu->mxcsr.x32; uint8_t tags = 0; for (int i=0; i<8; ++i) tags |= ((emu->p_regs[i].tag)<<(i*2)==0b11)?0:1; @@ -330,6 +331,7 @@ void fpu_fxsave64(x64emu_t* emu, void* ed) emu->sw.f.F87_TOP = top; p->ControlWord = emu->cw.x16; p->StatusWord = emu->sw.x16; + p->MxCsr = emu->mxcsr.x32; uint8_t tags = 0; for (int i=0; i<8; ++i) tags |= ((emu->p_regs[i].tag)<<(i*2)==0b11)?0:1; @@ -351,6 +353,9 @@ void fpu_fxrstor32(x64emu_t* emu, void* ed) xsave32_t *p = (xsave32_t*)ed; emu->cw.x16 = p->ControlWord; emu->sw.x16 = p->StatusWord; + emu->mxcsr.x32 = p->MxCsr; + if(box64_sse_flushto0) + applyFlushTo0(emu); emu->top = emu->sw.f.F87_TOP; uint8_t tags = p->TagWord; for(int i=0; i<8; ++i) @@ -371,6 +376,9 @@ void fpu_fxrstor64(x64emu_t* emu, void* ed) xsave64_t *p = (xsave64_t*)ed; emu->cw.x16 = p->ControlWord; emu->sw.x16 = p->StatusWord; + emu->mxcsr.x32 = p->MxCsr; + if(box64_sse_flushto0) + applyFlushTo0(emu); emu->top = emu->sw.f.F87_TOP; uint8_t tags = p->TagWord; for(int i=0; i<8; ++i) diff --git a/src/include/debug.h b/src/include/debug.h index 6db06b9e..3ff15229 100755 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -43,6 +43,7 @@ extern char* box64_trace; extern uint64_t start_cnt; #endif extern int box64_dummy_crashhandler; +extern int box64_sse_flushto0; extern int allow_missing_libs; extern int box64_mapclean; extern int box64_prefer_wrapped; diff --git a/src/include/regs.h b/src/include/regs.h index d66e8065..0097e723 100755 --- a/src/include/regs.h +++ b/src/include/regs.h @@ -223,42 +223,64 @@ typedef enum { typedef union { struct __attribute__ ((__packed__)) { - unsigned int F87_IE:1; - unsigned int F87_DE:1; - unsigned int F87_ZE:1; - unsigned int F87_OE:1; - unsigned int F87_UE:1; - unsigned int F87_PE:1; - unsigned int F87_SF:1; - unsigned int F87_ES:1; - unsigned int F87_C0:1; - unsigned int F87_C1:1; - unsigned int F87_C2:1; - unsigned int F87_TOP:3; - unsigned int F87_C3:1; - unsigned int F87_B:1; + uint16_t F87_IE:1; + uint16_t F87_DE:1; + uint16_t F87_ZE:1; + uint16_t F87_OE:1; + uint16_t F87_UE:1; + uint16_t F87_PE:1; + uint16_t F87_SF:1; + uint16_t F87_ES:1; + uint16_t F87_C0:1; + uint16_t F87_C1:1; + uint16_t F87_C2:1; + uint16_t F87_TOP:3; + uint16_t F87_C3:1; + uint16_t F87_B:1; } f; uint16_t x16; } x87flags_t; typedef union { struct __attribute__ ((__packed__)) { - unsigned int C87_IM:1; // interupt masks - unsigned int C87_DM:1; - unsigned int C87_ZM:1; - unsigned int C87_OM:1; - unsigned int C87_UM:1; - unsigned int C87_PM:1; - unsigned int C87_R1:2; // reserved - unsigned int C87_PC:2; // precision control (24bits, reserved, 53bits, 64bits) - unsigned int C87_RD:2; // Rounds - unsigned int C87_IC:1; - unsigned int C87_R2:3; // reserved + uint16_t C87_IM:1; // interupt masks + uint16_t C87_DM:1; + uint16_t C87_ZM:1; + uint16_t C87_OM:1; + uint16_t C87_UM:1; + uint16_t C87_PM:1; + uint16_t C87_R1:2; // reserved + uint16_t C87_PC:2; // precision control (24bits, reserved, 53bits, 64bits) + uint16_t C87_RD:2; // Rounds + uint16_t C87_IC:1; + uint16_t C87_R2:3; // reserved } f; uint16_t x16; } x87control_t; typedef union { + struct __attribute__ ((__packed__)) { + uint32_t MXCSR_IE:1; + uint32_t MXCSR_DE:1; + uint32_t MXCSR_ZE:1; + uint32_t MXCSR_OE:1; + uint32_t MXCSR_UE:1; + uint32_t MXCSR_PE:1; + uint32_t MXCSR_DAZ:1; + uint32_t MXCSR_IM:1; + uint32_t MXCSR_DM:1; + uint32_t MXCSR_ZM:1; + uint32_t MXCSR_OM:1; + uint32_t MXCSR_UM:1; + uint32_t MXCSR_PM:1; + uint32_t MXCSR_RC:2; + uint32_t MXCSR_FZ:1; + uint32_t MXCSR_RES:16; + } f; + uint32_t x32; +} mmxcontrol_t; + +typedef union { uint64_t q; int64_t sq; double d; diff --git a/src/main.c b/src/main.c index e7bc7d39..83348dac 100755 --- a/src/main.c +++ b/src/main.c @@ -86,6 +86,7 @@ int box64_x11glx = 1; int allow_missing_libs = 0; int box64_prefer_emulated = 0; int box64_prefer_wrapped = 0; +int box64_sse_flushto0 = 0; int fix_64bit_inodes = 0; int box64_dummy_crashhandler = 1; int box64_mapclean = 0; @@ -896,6 +897,12 @@ void LoadEnvVars(box64context_t *context) AddPath("libcrypto.so.1.0.0", &context->box64_emulated_libs, 0); AddPath("libunwind.so.8", &context->box64_emulated_libs, 0); + if(getenv("BOX64_SSE_FLUSHTO0")) { + if (strcmp(getenv("BOX64_SSE_FLUSHTO0"), "1")==0) { + box64_sse_flushto0 = 1; + printf_log(LOG_INFO, "BOX64: Direct apply of SSE Flush to 0 flag\n"); + } + } if(getenv("BOX64_PREFER_WRAPPED")) { if (strcmp(getenv("BOX64_PREFER_WRAPPED"), "1")==0) { box64_prefer_wrapped = 1; diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c index 6a9abb87..cf0f4e2c 100644 --- a/src/tools/rcfile.c +++ b/src/tools/rcfile.c @@ -34,6 +34,7 @@ ENTRYBOOL(BOX64_SHOWBT, box64_showbt) \ ENTRYBOOL(BOX64_X11THREADS, box64_x11threads) \ ENTRYBOOL(BOX64_X11GLX, box64_x11glx) \ ENTRYDSTRING(BOX64_LIBGL, box64_libGL) \ +ENTRYBOOL(BOX64_SSE_FLUSHTO0, box64_sse_flushto0) \ ENTRYSTRING_(BOX64_EMULATED_LIBS, emulated_libs) \ ENTRYBOOL(BOX64_ALLOWMISSINGLIBS, allow_missing_libs) \ ENTRYBOOL(BOX64_PREFER_WRAPPED, box64_prefer_wrapped) \ |