about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-02-07 19:31:35 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-02-07 19:31:35 +0100
commit4a3595133a2004043934d405d2c58a6c5e15ceea (patch)
tree4ef864eec4c509ea7e7dd1c133610bd68ab0fcd8 /src
parent4c9a6a5d9878ec7bf4095be00b46507d9c23c023 (diff)
downloadbox64-4a3595133a2004043934d405d2c58a6c5e15ceea.tar.gz
box64-4a3595133a2004043934d405d2c58a6c5e15ceea.zip
[ARM64_DYNAREC] Improved stability when USCAT extention is supported, qn d improved reliqbility of unaligned fallback for LOCK CMPXCHG8B/CMPXCHG16B opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66f0.c30
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f0.c91
-rw-r--r--src/include/debug.h1
3 files changed, 99 insertions, 23 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c
index f26a17c3..a86bd625 100644
--- a/src/dynarec/arm64/dynarec_arm64_66f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_66f0.c
@@ -98,8 +98,14 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
                         if(!ALIGNED_ATOMICH) {
-                            TSTx_mask(wback, 1, 0, 0);    // mask=1
-                            B_MARK3(cNE);
+                            if(arm64_uscat) {
+                                ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                                CMPSw_U12(x1, 0xF);
+                                B_MARK3(cGE);
+                            } else {
+                                TSTx_mask(wback, 1, 0, 0);    // mask=1
+                                B_MARK3(cNE);
+                            }
                         }
                         // Aligned version
                         if(arm64_atomics) {
@@ -215,8 +221,14 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         if(opcode==0x81) i32 = F16S; else i32 = F8S;
                         MOV32w(x5, i32);
                         if(!ALIGNED_ATOMICH) {
-                            TSTx_mask(wback, 1, 0, 0);    // mask=1
-                            B_MARK(cNE);
+                            if(arm64_uscat) {
+                                ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                                CMPSw_U12(x1, 0xF);
+                                B_MARK(cGE);
+                            } else {
+                                TSTx_mask(wback, 1, 0, 0);    // mask=1
+                                B_MARK(cNE);
+                            }
                         }
                         if(arm64_atomics) {
                             LDADDALH(x5, x1, wback);
@@ -370,8 +382,14 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         if(opcode==0x81) i32 = F16S; else i32 = F8S;
                         MOV32w(x5, i32);
                         if(!ALIGNED_ATOMICH) {
-                            TSTx_mask(wback, 1, 0, 0);    // mask=1
-                            B_MARK(cNE);
+                            if(arm64_uscat) {
+                                ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                                CMPSw_U12(x1, 0xF);
+                                B_MARK(cGE);
+                            } else {
+                                TSTx_mask(wback, 1, 0, 0);    // mask=1
+                                B_MARK(cNE);
+                            }
                         }
                         if(arm64_atomics) {
                             NEGw_REG(x4, x5);
diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c
index 37940eec..890775ae 100644
--- a/src/dynarec/arm64/dynarec_arm64_f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_f0.c
@@ -302,8 +302,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             } else {
                                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
                                 if(!ALIGNED_ATOMICxw) {
-                                    TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                                    B_MARK3(cNE);
+                                    if(arm64_uscat) {
+                                        ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                                        CMPSw_U12(x1, 16-(1<<(2+rex.w)));
+                                        B_MARK3(cGT);
+                                    } else {
+                                        TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                                        B_MARK3(cNE);
+                                    }
                                 }
                                 // Aligned version
                                 if(arm64_atomics) {
@@ -590,8 +596,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             } else {
                                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
                                 if(!ALIGNED_ATOMICxw) {
-                                    TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                                    B_MARK(cNE);    // unaligned
+                                    if(arm64_uscat) {
+                                        ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                                        CMPSw_U12(x1, 16-(1<<(2+rex.w)));
+                                        B_MARK(cGT);
+                                    } else {
+                                        TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                                        B_MARK(cNE);    // unaligned
+                                    }
                                 }
                                 if(arm64_atomics) {
                                     UFLAG_IF {
@@ -652,8 +664,19 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         SETFLAGS(X_ZF, SF_SUBSET);
                         addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
                         if(!ALIGNED_ATOMICxw) {
-                            TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                            B_MARK2(cNE);    // unaligned
+                            if(arm64_uscat) {
+                                if(rex.w) {
+                                    TSTx_mask(wback, 1, 0, 3);
+                                    B_MARK2(cNE);    
+                                } else {
+                                    ANDx_mask(x2, wback, 1, 0, 3);  // mask = F
+                                    CMPSw_U12(x2, 8);
+                                    B_MARK2(cGT);
+                                }
+                            } else {
+                                TSTx_mask(wback, 1, 0, 2+rex.w);    // mask=7 or F
+                                B_MARK2(cNE);    // unaligned
+                            }
                         }
                         if(arm64_atomics) {
                             MOVx_REG(x2, xRAX);
@@ -684,6 +707,8 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             }
                             B_MARK3_nocond;
                             MARK;
+                            STLXPxw(x4, x2, x3, wback); // write back, to be sure it was "atomic"
+                            CBNZx_MARKLOCK(x4);
                             MOVxw_REG(xRAX, x2);
                             MOVxw_REG(xRDX, x3);
                             UFLAG_IF {
@@ -696,7 +721,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         if(!ALIGNED_ATOMICxw) {
                             MARK2;
                             LDPxw_S7_offset(x2, x3, wback, 0);
-                            LDAXRB(x4, wback);
+                            LDAXRB(x5, wback);
                             CMPSxw_REG(xRAX, x2);
                             CCMPxw(xRDX, x3, 0, cEQ);
                             B_MARKSEG(cNE);    // EAX!=ED[0] || EDX!=Ed[1]
@@ -708,6 +733,8 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             }
                             B_MARK3_nocond;
                             MARKSEG;
+                            STLXRB(x4, x5, wback); //write back
+                            CBNZx_MARK2(x4);
                             MOVxw_REG(xRAX, x2);
                             MOVxw_REG(xRDX, x3);
                             UFLAG_IF {
@@ -1076,8 +1103,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
                         if(opcode==0x81) i64 = F32S; else i64 = F8S;
                         if(!ALIGNED_ATOMICxw) {
-                            TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                            B_MARK(cNE);
+                            if(arm64_uscat) {
+                                ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                                CMPSw_U12(x1, 16-(1<<(2+rex.w)));
+                                B_MARK(cGT);
+                            } else {
+                                TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                                B_MARK(cNE);
+                            }
                         }
                         if(arm64_atomics) {
                             MOV64xw(x3, i64);
@@ -1225,8 +1258,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
                         if(opcode==0x81) i64 = F32S; else i64 = F8S;
                         if(!ALIGNED_ATOMICxw) {
-                            TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                            B_MARK(cNE);
+                            if(arm64_uscat) {
+                                ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                                CMPSw_U12(x1, 16-(1<<(2+rex.w)));
+                                B_MARK(cGT);
+                            } else {
+                                TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                                B_MARK(cNE);
+                            }
                         }
                         if(arm64_atomics) {
                             MOV64xw(x5, -i64);
@@ -1355,8 +1394,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 GETGD;
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
                 if(!ALIGNED_ATOMICxw) {
-                    TSTx_mask(ed, 1, 0, 1+rex.w);    // mask=3 or 7
-                    B_MARK(cNE);
+                    if(arm64_uscat) {
+                        ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                        CMPSw_U12(x1, 16-(1<<(2+rex.w)));
+                        B_MARK(cGT);
+                    } else {
+                        TSTx_mask(ed, 1, 0, 1+rex.w);    // mask=3 or 7
+                        B_MARK(cNE);
+                    }
                 }
                 if(arm64_atomics) {
                     SWPALxw(gd, gd, ed);
@@ -1497,8 +1542,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
                         if(!ALIGNED_ATOMICxw) {
-                            TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                            B_MARK(cNE);    // unaligned
+                            if(arm64_uscat) {
+                                ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                                CMPSw_U12(x1, 16-(1<<(2+rex.w)));
+                                B_MARK(cGT);
+                            } else {
+                                TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                                B_MARK(cNE);    // unaligned
+                            }
                         }
                         if(arm64_atomics) {
                             MOV32w(x3, 1);
@@ -1539,8 +1590,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
                         if(!ALIGNED_ATOMICxw) {
-                            TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                            B_MARK(cNE);    // unaligned
+                            if(arm64_uscat) {
+                                ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                                CMPSw_U12(x1, 16-(1<<(2+rex.w)));
+                                B_MARK(cGT);
+                            } else {
+                                TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                                B_MARK(cNE);    // unaligned
+                            }
                         }
                         if(arm64_atomics) {
                             MOV64xw(x3, -1);
diff --git a/src/include/debug.h b/src/include/debug.h
index c3a08258..468fe5c4 100644
--- a/src/include/debug.h
+++ b/src/include/debug.h
@@ -41,6 +41,7 @@ extern int arm64_crc32;
 extern int arm64_atomics;
 extern int arm64_sha1;
 extern int arm64_sha2;
+extern int arm64_uscat;
 extern int arm64_flagm;
 extern int arm64_flagm2;
 extern int arm64_frintts;