about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-07-29 11:42:29 +0200
committerptitSeb <sebastien.chev@gmail.com>2025-07-29 11:42:44 +0200
commitf628c37a61cc215fa4acc53157c3c7ae6df9bc53 (patch)
treeca82707d4f21104608ebbc6d8dcbc78a54b9fdc5 /src
parent0734efea394ad1086f33a5e01df6aab900f01b19 (diff)
downloadbox64-f628c37a61cc215fa4acc53157c3c7ae6df9bc53.tar.gz
box64-f628c37a61cc215fa4acc53157c3c7ae6df9bc53.zip
[ARM+4_DYNAREC] Fixed (V)[LD/ST]MXCSR opcodes when using SSE_FLUSHTO0
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c34
-rw-r--r--src/dynarec/arm64/dynarec_arm64_avx_0f.c36
2 files changed, 35 insertions, 35 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 9d9ebda4..221b10a0 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -1959,21 +1959,21 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         if(BOX64ENV(sse_flushto0)) {

                             // try to sync mxcsr with fpsr on the flag side

                             /* mapping is

-                                ARM -> X86

-                                0 -> 0  // Invalid operation

-                                1 -> 2  // Divide by 0

-                                2 -> 3  // Overflow

-                                3 -> 4  // underflow

-                                4 -> 5  // Inexact

-                                5 -> 1  // denormal

+                                ARM <-> X86

+                                 0  <->  0  // Invalid operation

+                                 1  <->  2  // Divide by 0

+                                 2  <->  3  // Overflow

+                                 3  <->  4  // underflow

+                                 4  <->  5  // Inexact

+                                 5  <->  1  // denormal

                             */

-                            // doing X86 -> ARM here, 0 1 2 3 4 5 -> 0 2 3 4 5 1

+                            // doing X86 -> ARM here, 543210 => 432150

                             if(ed!=x1)

                                 MOVw_REG(x1, ed);   // x1 = 543210

-                            RORw(x3, x1, 2);    // x3 = 10.....5432

-                            BFIw(x1, x3, 1, 4); // x1 = 54320

-                            RORw(x3, x3, 32-1); // x3 = 0.....54321

-                            BFIw(x1, x3, 5, 1); // x1 = 154320

+                            UBFXw(x2, x1, 1, 5);   // x2 = 54321

+                            BFIw(x1, x2, 2, 4); // x1 = 432110

+                            LSRw(x2, x2, 4);    // x2 = 5

+                            BFIw(x1, x2, 1, 1); // x1 = 432150

                             MRS_fpsr(x2);

                             BFIx(x2, x1, 0, 6);

                             MSR_fpsr(x2);

@@ -1987,11 +1987,11 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         if(BOX64ENV(sse_flushto0)) {

                             // sync with fpsr, with mask from mxcsr

                             MRS_fpsr(x1);

-                            // doing ARM -> X86 here,  543210 => 432150

-                            UBFXw(x2, x1, 1, 5);   // x2 = 54321

-                            BFIw(x1, x2, 2, 4); // x1 = 432110

-                            LSRw(x2, x2, 4);    // x2 = 5

-                            BFIw(x1, x2, 1, 1); // x1 = 432150

+                            // doing ARM -> X86 here,  0 1 2 3 4 5 -> 0 2 3 4 5 1

+                            RORw(x3, x1, 2);    // x3 = 10.....5432

+                            BFIw(x1, x3, 1, 4); // x1 = 54320

+                            RORw(x3, x3, 32-1); // x3 = 0.....54321

+                            BFIw(x1, x3, 5, 1); // x1 = 154320

                             //BFXILw(x3, x4, 7, 6); // this would the mask, but let's ignore that for now

                             BFIw(x4, x1, 0, 6); // inject back the flags

                         }

diff --git a/src/dynarec/arm64/dynarec_arm64_avx_0f.c b/src/dynarec/arm64/dynarec_arm64_avx_0f.c
index 47ad8949..122b25c2 100644
--- a/src/dynarec/arm64/dynarec_arm64_avx_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_avx_0f.c
@@ -647,21 +647,21 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
                         if(BOX64ENV(sse_flushto0)) {
                             // try to sync mxcsr with fpsr on the flag side
                             /* mapping is 
-                                ARM -> X86
-                                0 -> 0  // Invalid operation
-                                1 -> 2  // Divide by 0
-                                2 -> 3  // Overflow
-                                3 -> 4  // underflow
-                                4 -> 5  // Inexact
-                                5 -> 1  // denormal
+                                ARM <-> X86
+                                 0  <->  0  // Invalid operation
+                                 1  <->  2  // Divide by 0
+                                 2  <->  3  // Overflow
+                                 3  <->  4  // underflow
+                                 4  <->  5  // Inexact
+                                 5  <->  1  // denormal
                             */
-                            // doing X86 -> ARM here, 0 1 2 3 4 5 -> 0 2 3 4 5 1
+                            // doing X86 -> ARM here, 543210 => 432150
                             if(ed!=x1)
                                 MOVw_REG(x1, ed);   // x1 = 543210
-                            RORw(x3, x1, 2);    // x3 = 10.....5432
-                            BFIw(x1, x3, 1, 4); // x1 = 54320
-                            RORw(x3, x3, 32-1); // x3 = 0.....54321
-                            BFIw(x1, x3, 5, 1); // x1 = 154320
+                            UBFXw(x2, x1, 1, 5);   // x2 = 54321
+                            BFIw(x1, x2, 2, 4); // x1 = 432110
+                            LSRw(x2, x2, 4);    // x2 = 5
+                            BFIw(x1, x2, 1, 1); // x1 = 432150
                             MRS_fpsr(x2);
                             BFIx(x2, x1, 0, 6);
                             MSR_fpsr(x2);
@@ -671,18 +671,18 @@ uintptr_t dynarec64_AVX_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int
                         INST_NAME("VSTMXCSR Md");
                         addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
                         LDRw_U12(x4, xEmu, offsetof(x64emu_t, mxcsr));
-                        STW(x4, ed, fixedaddress);
                         if(BOX64ENV(sse_flushto0)) {
                             // sync with fpsr, with mask from mxcsr
                             MRS_fpsr(x1);
-                            // doing ARM -> X86 here,  543210 => 432150
-                            UBFXw(x2, x1, 1, 5);   // x2 = 54321
-                            BFIw(x1, x2, 2, 4); // x1 = 432110
-                            LSRw(x2, x2, 4);    // x2 = 5
-                            BFIw(x1, x2, 1, 1); // x1 = 432150
+                            // doing ARM -> X86 here,  0 1 2 3 4 5 -> 0 2 3 4 5 1
+                            RORw(x3, x1, 2);    // x3 = 10.....5432
+                            BFIw(x1, x3, 1, 4); // x1 = 54320
+                            RORw(x3, x3, 32-1); // x3 = 0.....54321
+                            BFIw(x1, x3, 5, 1); // x1 = 154320
                             //BFXILw(x3, x4, 7, 6); // this would the mask, but let's ignore that for now
                             BFIw(x4, x1, 0, 6); // inject back the flags
                         }
+                        STW(x4, ed, fixedaddress);
                         break;
                     default:
                         DEFAULT;