diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-01-25 18:52:02 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-01-25 18:52:02 +0100 |
| commit | dab01b8b9ff763bb14d9615f30b47bcfed555c5b (patch) | |
| tree | d6fd8287d822e1939426bb8b89861b69636237a5 /src | |
| parent | 9e58047980175fbd2bde97b7b0b2de5fa8dffbed (diff) | |
| download | box64-dab01b8b9ff763bb14d9615f30b47bcfed555c5b.tar.gz box64-dab01b8b9ff763bb14d9615f30b47bcfed555c5b.zip | |
[ARM64_DYNAREC] Added experimental synch of fpsr and mxcsr for the flags, when BOX64_SSE_FLUSHTO0 is used
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 41 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_0f.c | 41 |
2 files changed, 74 insertions, 8 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 65a3ac09..f6e815df 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -1916,18 +1916,51 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin GETED(0); STRw_U12(ed, xEmu, offsetof(x64emu_t, mxcsr)); if(BOX64ENV(sse_flushto0)) { - MRS_fpcr(x1); // get fpscr + MRS_fpcr(x2); // get fpscr LSRw_IMM(x3, ed, 15); // get FZ bit - BFIw(x1, x3, 24, 1); // inject FZ bit + BFIw(x2, x3, 24, 1); // inject FZ bit EORw_REG_LSR(x3, x3, ed, 1); // FZ xor DAZ - BFIw(x1, x3, 1, 1); // inject AH bit - MSR_fpcr(x1); // put new fpscr + BFIw(x2, x3, 1, 1); // inject AH bit + MSR_fpcr(x2); // put new fpscr + } + if(BOX64ENV(sse_flushto0)) { + // try to sync mxcsr with fpsr on the flag side + /* mapping is + ARM -> X86 + 0 -> 0 // Invalid operation + 1 -> 2 // Divide by 0 + 2 -> 3 // Overflow + 3 -> 4 // underflow + 4 -> 5 // Inexact + 5 -> 1 // denormal + */ + // doing X86 -> ARM here, 0 1 2 3 4 5 -> 0 5 1 2 3 4 + if(ed!=x1) + MOVw_REG(x1, ed); + BFXILw(x2, x1, 1, 5); // x2 = 1 2 3 4 5 ... + BFIw(x1, x2, 2, 4); // x1 = 0 1 1 2 3 4 + RORw(x2, x2, 4); // x2 = 5 .... 1 2 3 4 + BFIw(x1, x2, 1, 1); // x1 = 0 5 1 2 3 4 + MRS_fpsr(x2); + BFIx(x2, x1, 0, 6); + MSR_fpsr(x2); } break; case 3: INST_NAME("STMXCSR Md"); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr)); + if(BOX64ENV(sse_flushto0)) { + // sync with fpsr, with mask from mxcsr + // doing ARM -> X86 here, 0 1 2 3 4 5 -> 0 2 3 4 5 1 + MRS_fpsr(x1); + RORw(x3, x1, 2); //x3 = 2 3 4 5 .... 0 1 + BFIw(x1, x3, 1, 4); + RORw(x3, x3, 32-1); + BFIw(x1, x3, 5, 1); // x1 is Flags + //BFXILw(x3, x4, 7, 6); // this would the mask, but let's ignore that for now + BFIw(x4, x1, 0, 6); // inject back the flags + } STW(x4, ed, fixedaddress); break; case 4: diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_0f.c index 5ce00466..9e9e6a76 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_0f.c @@ -625,18 +625,51 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int GETED(0); STRw_U12(ed, xEmu, offsetof(x64emu_t, mxcsr)); if(BOX64ENV(sse_flushto0)) { - MRS_fpcr(x1); // get fpscr + MRS_fpcr(x2); // get fpscr LSRw_IMM(x3, ed, 15); // get FZ bit - BFIw(x1, x3, 24, 1); // inject FZ bit + BFIw(x2, x3, 24, 1); // inject FZ bit EORw_REG_LSR(x3, x3, ed, 1); // FZ xor DAZ - BFIw(x1, x3, 1, 1); // inject AH bit - MSR_fpcr(x1); // put new fpscr + BFIw(x2, x3, 1, 1); // inject AH bit + MSR_fpcr(x2); // put new fpscr + } + if(BOX64ENV(sse_flushto0)) { + // try to sync mxcsr with fpsr on the flag side + /* mapping is + ARM -> X86 + 0 -> 0 // Invalid operation + 1 -> 2 // Divide by 0 + 2 -> 3 // Overflow + 3 -> 4 // underflow + 4 -> 5 // Inexact + 5 -> 1 // denormal + */ + // doing X86 -> ARM here, 0 1 2 3 4 5 -> 0 5 1 2 3 4 + if(ed!=x1) + MOVw_REG(x1, ed); + BFXILw(x2, x1, 1, 5); // x2 = 1 2 3 4 5 ... + BFIw(x1, x2, 2, 4); // x1 = 0 1 1 2 3 4 + RORw(x2, x2, 4); // x2 = 5 .... 1 2 3 4 + BFIw(x1, x2, 1, 1); // x1 = 0 5 1 2 3 4 + MRS_fpsr(x2); + BFIx(x2, x1, 0, 6); + MSR_fpsr(x2); } break; case 3: INST_NAME("VSTMXCSR Md"); addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0); LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr)); + if(BOX64ENV(sse_flushto0)) { + // sync with fpsr, with mask from mxcsr + // doing ARM -> X86 here, 0 1 2 3 4 5 -> 0 2 3 4 5 1 + MRS_fpsr(x1); + RORw(x3, x1, 2); //x3 = 2 3 4 5 .... 0 1 + BFIw(x1, x3, 1, 4); + RORw(x3, x3, 32-1); + BFIw(x1, x3, 5, 1); // x1 is Flags + //BFXILw(x3, x4, 7, 6); // this would the mask, but let's ignore that for now + BFIw(x4, x1, 0, 6); // inject back the flags + } STW(x4, ed, fixedaddress); break; default: |