about summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f0.c47
1 files changed, 46 insertions, 1 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c
index daded2ef..5c239f91 100644
--- a/src/dynarec/arm64/dynarec_arm64_f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_f0.c
@@ -100,6 +100,16 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 emit_add32(dyn, ninst, rex, ed, gd, x3, x4);
             } else {
                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
+                if(!ALIGNED_ATOMICxw) {
+                    if(arm64_uscat) {
+                        ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                        CMPSw_U12(x1, 16-(1<<(2+rex.w)));
+                        B_MARK(cGT);
+                    } else {
+                        TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                        B_MARK(cNE);
+                    }
+                }
                 if(arm64_atomics) {
                     UFLAG_IF {
                         LDADDALxw(gd, x1, wback);
@@ -107,14 +117,27 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     } else {
                         STADDLxw(gd, wback);
                     }
+                    SMDMB();
                 } else {
                     MARKLOCK;
                     LDAXRxw(x1, wback);
                     emit_add32(dyn, ninst, rex, x1, gd, x3, x4);
                     STLXRxw(x3, x1, wback);
                     CBNZx_MARKLOCK(x3);
+                    SMDMB();
+                }
+                if(!ALIGNED_ATOMICxw) {
+                    B_NEXT_nocond;
+                    MARK;   // unaligned! also, not enough
+                    LDRxw_U12(x1, wback, 0);
+                    LDAXRB(x4, wback);
+                    BFIxw(x1, x4, 0, 8); // re-inject
+                    emit_add32(dyn, ninst, rex, x1, gd, x3, x4);
+                    STLXRB(x3, x1, wback);
+                    CBNZx_MARK(x3);
+                    STRxw_U12(x1, wback, 0);    // put the whole value
+                    SMDMB();
                 }
-                SMDMB();
             }
             break;
 
@@ -846,6 +869,16 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 emit_sub32(dyn, ninst, rex, ed, gd, x3, x4);
             } else {
                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
+                if(!ALIGNED_ATOMICxw) {
+                    if(arm64_uscat) {
+                        ANDx_mask(x1, wback, 1, 0, 3);  // mask = F
+                        CMPSw_U12(x1, 16-(1<<(2+rex.w)));
+                        B_MARK(cGT);
+                    } else {
+                        TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                        B_MARK(cNE);
+                    }
+                }
                 if(arm64_atomics) {
                     NEGxw_REG(x1, gd);
                     UFLAG_IF {
@@ -862,6 +895,18 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     CBNZx_MARKLOCK(x3);
                     SMDMB();
                 }
+                if(!ALIGNED_ATOMICxw) {
+                    B_NEXT_nocond;
+                    MARK;   // unaligned! also, not enough
+                    LDRxw_U12(x1, wback, 0);
+                    LDAXRB(x4, wback);
+                    BFIxw(x1, x4, 0, 8); // re-inject
+                    emit_sub32(dyn, ninst, rex, x1, gd, x3, x4);
+                    STLXRB(x3, x1, wback);
+                    CBNZx_MARK(x3);
+                    STRxw_U12(x1, wback, 0);    // put the whole value
+                    SMDMB();
+                }
             }
             break;