about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-10-17 18:25:16 +0200
committerptitSeb <sebastien.chev@gmail.com>2023-10-17 18:25:16 +0200
commit6ef2b22a3ce4aaf5df5069dab226c1eeab806b19 (patch)
treef55bcee33bd10845d34de282f23cfeb1f7bb1507 /src
parentda19b2008a8e4afc75ec46a43453dcc93256d48e (diff)
downloadbox64-6ef2b22a3ce4aaf5df5069dab226c1eeab806b19.tar.gz
box64-6ef2b22a3ce4aaf5df5069dab226c1eeab806b19.zip
[ARM64_DYNAREC] Added support for BOX4_DYNAREC_ALIGNED_ATOMICS
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c37
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66f0.c87
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f0.c262
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c12
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h3
-rw-r--r--src/include/debug.h1
-rw-r--r--src/main.c10
-rw-r--r--src/tools/rcfile.c2
8 files changed, 250 insertions, 164 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index f2c3cd60..49f0a757 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -1079,28 +1079,37 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {
                 GETGD;
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
-                TSTx_mask(ed, 1, 0, 1+rex.w);    // mask=3 or 7
-                B_MARK(cNE);
+                if(!ALIGNED_ATOMICxw) {
+                    TSTx_mask(ed, 1, 0, 1+rex.w);    // mask=3 or 7
+                    B_MARK(cNE);
+                }
                 if(arm64_atomics) {
                     SWPALxw(gd, gd, ed);
-                    B_NEXT_nocond;
+                    if(!ALIGNED_ATOMICxw) {
+                        B_NEXT_nocond;
+                    }
                 } else {
                     MARKLOCK;
                     LDAXRxw(x1, ed);
                     STLXRxw(x3, gd, ed);
                     CBNZx_MARKLOCK(x3);
-                    B_MARK2_nocond;
+                    if(!ALIGNED_ATOMICxw) {
+                        B_MARK2_nocond;
+                    }
+                }
+                if(!ALIGNED_ATOMICxw) {
+                    MARK;
+                    LDRxw_U12(x1, ed, 0);
+                    LDAXRB(x3, ed);
+                    STLXRB(x3, gd, ed);
+                    CBNZx_MARK(x3);
+                    STRxw_U12(gd, ed, 0);
+                    SMDMB();
+                    MARK2;
+                }
+                if(!ALIGNED_ATOMICxw || !arm64_atomics) {
+                    MOVxw_REG(gd, x1);
                 }
-                MARK;
-                SMDMB();
-                LDRxw_U12(x1, ed, 0);
-                LDAXRB(x3, ed);
-                STLXRB(x3, gd, ed);
-                CBNZx_MARK(x3);
-                STRxw_U12(gd, ed, 0);
-                SMDMB();
-                MARK2;
-                MOVxw_REG(gd, x1);
             }
             break;
         case 0x88:
diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c
index df60a75f..3e8fb480 100644
--- a/src/dynarec/arm64/dynarec_arm64_66f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_66f0.c
@@ -97,8 +97,10 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         BFIx(ed, gd, 0, 16);
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
-                        TSTx_mask(wback, 1, 0, 0);    // mask=1
-                        B_MARK3(cNE);
+                        if(!ALIGNED_ATOMICH) {
+                            TSTx_mask(wback, 1, 0, 0);    // mask=1
+                            B_MARK3(cNE);
+                        }
                         // Aligned version
                         if(arm64_atomics) {
                             MOVw_REG(x1, x6);
@@ -113,18 +115,20 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                             CBNZx_MARKLOCK(x4);
                             // done
                         }
-                        B_MARK_nocond;
-                        // Unaligned version
-                        MARK3;
-                        LDRH_U12(x1, wback, 0);
-                        LDAXRB(x3, wback); // dummy read, to arm the write...
-                        CMPSw_REG(x6, x1);
-                        B_MARK(cNE);
-                        // EAX == Ed
-                        STLXRB(x4, gd, wback);
-                        CBNZx_MARK3(x4);
-                        STRH_U12(gd, wback, 0);
-                        SMDMB();
+                        if(!ALIGNED_ATOMICH) {
+                            B_MARK_nocond;
+                            // Unaligned version
+                            MARK3;
+                            LDRH_U12(x1, wback, 0);
+                            LDAXRB(x3, wback); // dummy read, to arm the write...
+                            CMPSw_REG(x6, x1);
+                            B_MARK(cNE);
+                            // EAX == Ed
+                            STLXRB(x4, gd, wback);
+                            CBNZx_MARK3(x4);
+                            STRH_U12(gd, wback, 0);
+                            SMDMB();
+                        }
                     }
                     MARK;
                     // Common part (and fallback for EAX != Ed)
@@ -213,8 +217,10 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1);
                         if(opcode==0x81) i32 = F16S; else i32 = F8S;
                         MOV32w(x5, i32);
-                        TSTx_mask(wback, 1, 0, 0);    // mask=1
-                        B_MARK(cNE);
+                        if(!ALIGNED_ATOMICH) {
+                            TSTx_mask(wback, 1, 0, 0);    // mask=1
+                            B_MARK(cNE);
+                        }
                         if(arm64_atomics) {
                             LDADDALH(x5, x1, wback);
                             UFLAG_IF {
@@ -227,15 +233,18 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                             STLXRH(x3, x1, wback);
                             CBNZx_MARKLOCK(x3);
                         }
-                        B_NEXT_nocond;
-                        MARK;   // unaligned! also, not enough
-                        LDRH_U12(x1, wback, 0);
-                        LDAXRB(x4, wback);
-                        BFIw(x1, x4, 0, 8); // re-inject
-                        emit_add16(dyn, ninst, x1, x5, x3, x4);
-                        STLXRB(x3, x1, wback);
-                        CBNZx_MARK(x3);
-                        STRH_U12(x1, wback, 0);    // put the whole value
+                        if(!ALIGNED_ATOMICH) {
+                            B_NEXT_nocond;
+                            MARK;   // unaligned! also, not enough
+                            LDRH_U12(x1, wback, 0);
+                            LDAXRB(x4, wback);
+                            BFIw(x1, x4, 0, 8); // re-inject
+                            emit_add16(dyn, ninst, x1, x5, x3, x4);
+                            STLXRB(x3, x1, wback);
+                            CBNZx_MARK(x3);
+                            STRH_U12(x1, wback, 0);    // put the whole value
+                            SMDMB();
+                        }
                     }
                     break;
                 case 1: //OR
@@ -358,8 +367,10 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?2:1);
                         if(opcode==0x81) i32 = F16S; else i32 = F8S;
                         MOV32w(x5, i32);
-                        TSTx_mask(wback, 1, 0, 0);    // mask=1
-                        B_MARK(cNE);
+                        if(!ALIGNED_ATOMICH) {
+                            TSTx_mask(wback, 1, 0, 0);    // mask=1
+                            B_MARK(cNE);
+                        }
                         if(arm64_atomics) {
                             NEGw_REG(x4, x5);
                             UFLAG_IF {
@@ -375,15 +386,18 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                             STLXRH(x3, x1, wback);
                             CBNZx_MARKLOCK(x3);
                         }
-                        B_NEXT_nocond;
-                        MARK;   // unaligned! also, not enough
-                        LDRH_U12(x1, wback, 0);
-                        LDAXRB(x4, wback);
-                        BFIw(x1, x4, 0, 8); // re-inject
-                        emit_sub16(dyn, ninst, x1, x5, x3, x4);
-                        STLXRB(x3, x1, wback);
-                        CBNZx_MARK(x3);
-                        STRH_U12(x1, wback, 0);    // put the whole value
+                        if(!ALIGNED_ATOMICH) {
+                            B_NEXT_nocond;
+                            MARK;   // unaligned! also, not enough
+                            LDRH_U12(x1, wback, 0);
+                            LDAXRB(x4, wback);
+                            BFIw(x1, x4, 0, 8); // re-inject
+                            emit_sub16(dyn, ninst, x1, x5, x3, x4);
+                            STLXRB(x3, x1, wback);
+                            CBNZx_MARK(x3);
+                            STRH_U12(x1, wback, 0);    // put the whole value
+                            SMDMB();
+                        }
                     }
                     break;
                 case 6: //XOR
@@ -431,7 +445,6 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     }
                     break;
             }
-            SMDMB();
             break;
 
             case 0xFF:
diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c
index b5e9fe3c..e5a8d097 100644
--- a/src/dynarec/arm64/dynarec_arm64_f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_f0.c
@@ -234,7 +234,6 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             nextop = F8;
                             GETGB(x1);
                             UBFXx(x6, xRAX, 0, 8);
-                            SMDMB();
                             if(MODREG) {
                                 if(rex.rex) {
                                     wback = xRAX+(nextop&7)+(rex.b<<3);
@@ -258,8 +257,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             } else {
                                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
                                 if(arm64_atomics) {
-                                    MOVw_REG(x2, x6);
-                                    CASALB(x2, gd, wback);
+                                    UFLAG_IF {
+                                        MOVw_REG(x2, x6);
+                                        CASALB(x6, gd, wback);
+                                        emit_cmp8(dyn, ninst, x2, x6, x3, x4, x5);
+                                    } else {
+                                        CASALB(x6, gd, wback);
+                                    }
+                                    BFIx(xRAX, x6, 0, 8);
                                 } else {
                                     MARKLOCK;
                                     LDAXRB(x2, wback);
@@ -270,11 +275,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                     CBNZx_MARKLOCK(x4);
                                     // done
                                     MARK;
+                                    UFLAG_IF {emit_cmp8(dyn, ninst, x6, x2, x3, x4, x5);}
+                                    BFIx(xRAX, x2, 0, 8);
                                 }
-                                UFLAG_IF {emit_cmp8(dyn, ninst, x6, x2, x3, x4, x5);}
-                                BFIx(xRAX, x2, 0, 8);
                             }
-                            SMDMB();
                             break;
                         default:
                             DEFAULT;
@@ -300,8 +304,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 B_NEXT_nocond;
                             } else {
                                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
-                                TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                                B_MARK3(cNE);
+                                if(!ALIGNED_ATOMICxw) {
+                                    TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                                    B_MARK3(cNE);
+                                }
                                 // Aligned version
                                 if(arm64_atomics) {
                                     UFLAG_IF {
@@ -312,7 +318,9 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                     } else {
                                         CASALxw(xRAX, gd, wback);
                                     }
-                                    B_NEXT_nocond;
+                                    if(!ALIGNED_ATOMICxw) {
+                                        B_NEXT_nocond;
+                                    }
                                 } else {
                                     MARKLOCK;
                                     LDAXRxw(x1, wback);
@@ -322,23 +330,29 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                     STLXRxw(x4, gd, wback);
                                     CBNZx_MARKLOCK(x4);
                                     // done
-                                    B_MARK_nocond;
+                                    if(!ALIGNED_ATOMICxw) {
+                                        B_MARK_nocond;
+                                    }
+                                }
+                                if(!ALIGNED_ATOMICxw) {
+                                    // Unaligned version
+                                    MARK3;
+                                    LDRxw_U12(x1, wback, 0);
+                                    LDAXRB(x3, wback); // dummy read, to arm the write...
+                                    CMPSxw_REG(xRAX, x1);
+                                    B_MARK(cNE);
+                                    // EAX == Ed
+                                    STLXRB(x4, gd, wback);
+                                    CBNZx_MARK3(x4);
+                                    STRxw_U12(gd, wback, 0);
+                                    SMDMB();
+                                }
+                                if(!ALIGNED_ATOMICxw || !arm64_atomics) {
+                                    MARK;
+                                    // Common part (and fallback for EAX != Ed)
+                                    UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);}
+                                    MOVxw_REG(xRAX, x1);    // upper par of RAX will be erase on 32bits, no mater what
                                 }
-                                // Unaligned version
-                                MARK3;
-                                LDRxw_U12(x1, wback, 0);
-                                LDAXRB(x3, wback); // dummy read, to arm the write...
-                                CMPSxw_REG(xRAX, x1);
-                                B_MARK(cNE);
-                                // EAX == Ed
-                                STLXRB(x4, gd, wback);
-                                CBNZx_MARK3(x4);
-                                STRxw_U12(gd, wback, 0);
-                                SMDMB();
-                                MARK;
-                                // Common part (and fallback for EAX != Ed)
-                                UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);}
-                                MOVxw_REG(xRAX, x1);    // upper par of RAX will be erase on 32bits, no mater what
                             }
                             break;
                         default:
@@ -413,8 +427,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 emit_add32(dyn, ninst, rex, ed, gd, x3, x4);
                             } else {
                                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
-                                TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                                B_MARK(cNE);    // unaligned
+                                if(!ALIGNED_ATOMICxw) {
+                                    TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                                    B_MARK(cNE);    // unaligned
+                                }
                                 if(arm64_atomics) {
                                     UFLAG_IF {
                                         MOVxw_REG(x3, gd);
@@ -423,30 +439,38 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                     } else {
                                         LDADDALxw(gd, gd, wback);
                                     }
-                                    B_NEXT_nocond;
+                                    if(!ALIGNED_ATOMICxw) {
+                                        B_NEXT_nocond;
+                                    }
                                 } else {
                                     MARKLOCK;
                                     LDAXRxw(x1, wback);
                                     ADDxw_REG(x4, x1, gd);
                                     STLXRxw(x3, x4, wback);
                                     CBNZx_MARKLOCK(x3);
-                                    B_MARK2_nocond;
+                                    if(!ALIGNED_ATOMICxw) {
+                                        B_MARK2_nocond;
+                                    }
                                 }
-                                MARK;
-                                LDRxw_U12(x1, wback, 0);
-                                LDAXRB(x4, wback);
-                                BFIxw(x1, x4, 0, 8);
-                                ADDxw_REG(x4, x1, gd);
-                                STLXRB(x3, x4, wback);
-                                CBNZx_MARK(x3);
-                                STRxw_U12(x4, wback, 0);
-                                SMDMB();
-                                MARK2;
-                                IFX(X_ALL|X_PEND) {
-                                    MOVxw_REG(x2, x1);
-                                    emit_add32(dyn, ninst, rex, x2, gd, x3, x4);
+                                if(!ALIGNED_ATOMICxw) {
+                                    MARK;
+                                    LDRxw_U12(x1, wback, 0);
+                                    LDAXRB(x4, wback);
+                                    BFIxw(x1, x4, 0, 8);
+                                    ADDxw_REG(x4, x1, gd);
+                                    STLXRB(x3, x4, wback);
+                                    CBNZx_MARK(x3);
+                                    STRxw_U12(x4, wback, 0);
+                                    SMDMB();
+                                }
+                                if(!ALIGNED_ATOMICxw || !arm64_atomics) {
+                                    MARK2;
+                                    IFX(X_ALL|X_PEND) {
+                                        MOVxw_REG(x2, x1);
+                                        emit_add32(dyn, ninst, rex, x2, gd, x3, x4);
+                                    }
+                                    MOVxw_REG(gd, x1);
                                 }
-                                MOVxw_REG(gd, x1);
                             }
                             break;
                         default:
@@ -852,8 +876,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
                         if(opcode==0x81) i64 = F32S; else i64 = F8S;
-                        TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                        B_MARK(cNE);
+                        if(!ALIGNED_ATOMICxw) {
+                            TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                            B_MARK(cNE);
+                        }
                         if(arm64_atomics) {
                             MOV64xw(x3, i64);
                             UFLAG_IF {
@@ -869,16 +895,18 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             STLXRxw(x3, x1, wback);
                             CBNZx_MARKLOCK(x3);
                         }
-                        B_NEXT_nocond;
-                        MARK;   // unaligned! also, not enough
-                        LDRxw_U12(x1, wback, 0);
-                        LDAXRB(x4, wback);
-                        BFIxw(x1, x4, 0, 8); // re-inject
-                        emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5);
-                        STLXRB(x3, x1, wback);
-                        CBNZx_MARK(x3);
-                        STRxw_U12(x1, wback, 0);    // put the whole value
-                        SMDMB();
+                        if(!ALIGNED_ATOMICxw) {
+                            B_NEXT_nocond;
+                            MARK;   // unaligned! also, not enough
+                            LDRxw_U12(x1, wback, 0);
+                            LDAXRB(x4, wback);
+                            BFIxw(x1, x4, 0, 8); // re-inject
+                            emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5);
+                            STLXRB(x3, x1, wback);
+                            CBNZx_MARK(x3);
+                            STRxw_U12(x1, wback, 0);    // put the whole value
+                            SMDMB();
+                        }
                     }
                     break;
                 case 1: //OR
@@ -990,8 +1018,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, (opcode==0x81)?4:1);
                         if(opcode==0x81) i64 = F32S; else i64 = F8S;
-                        TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                        B_MARK(cNE);
+                        if(!ALIGNED_ATOMICxw) {
+                            TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                            B_MARK(cNE);
+                        }
                         if(arm64_atomics) {
                             MOV64xw(x5, -i64);
                             UFLAG_IF {
@@ -1008,16 +1038,18 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             STLXRxw(x3, x1, wback);
                             CBNZx_MARKLOCK(x3);
                         }
-                        B_NEXT_nocond;
-                        MARK;   // unaligned! also, not enough
-                        LDRxw_U12(x1, wback, 0);
-                        LDAXRB(x4, wback);
-                        BFIxw(x1, x4, 0, 8); // re-inject
-                        emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5);
-                        STLXRB(x3, x1, wback);
-                        CBNZx_MARK(x3);
-                        STRxw_U12(x1, wback, 0);    // put the whole value
-                        SMDMB();
+                        if(!ALIGNED_ATOMICxw) {
+                            B_NEXT_nocond;
+                            MARK;   // unaligned! also, not enough
+                            LDRxw_U12(x1, wback, 0);
+                            LDAXRB(x4, wback);
+                            BFIxw(x1, x4, 0, 8); // re-inject
+                            emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5);
+                            STLXRB(x3, x1, wback);
+                            CBNZx_MARK(x3);
+                            STRxw_U12(x1, wback, 0);    // put the whole value
+                            SMDMB();
+                        }
                     }
                     break;
                 case 6: //XOR
@@ -1084,7 +1116,6 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 BFIx(gb1, x1, gb2, 8);
                 BFIx(eb1, x4, eb2, 8);
             } else {
-                SMDMB();
                 GETGB(x4);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
                 if(arm64_atomics) {
@@ -1096,7 +1127,6 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     // do the swap 14 -> strb(ed), 1 -> gd
                     STLXRB(x3, x4, ed);
                     CBNZx_MARKLOCK(x3);
-                    SMDMB();
                 }
                 BFIx(gb1, x1, gb2, 8);
             }
@@ -1114,27 +1144,37 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 GETGD;
                 SMDMB();
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
-                TSTx_mask(ed, 1, 0, 1+rex.w);    // mask=3 or 7
-                B_MARK(cNE);
+                if(!ALIGNED_ATOMICxw) {
+                    TSTx_mask(ed, 1, 0, 1+rex.w);    // mask=3 or 7
+                    B_MARK(cNE);
+                }
                 if(arm64_atomics) {
                     SWPALxw(gd, gd, ed);
-                    B_NEXT_nocond;
+                    if(!ALIGNED_ATOMICxw) {
+                        B_NEXT_nocond;
+                    }
                 } else {
                     MARKLOCK;
                     LDAXRxw(x1, ed);
                     STLXRxw(x3, gd, ed);
                     CBNZx_MARKLOCK(x3);
-                    B_MARK2_nocond;
+                    if(!ALIGNED_ATOMICxw) {
+                        B_MARK2_nocond;
+                    }
+                }
+                if(!ALIGNED_ATOMICxw) {
+                    MARK;
+                    LDRxw_U12(x1, ed, 0);
+                    LDAXRB(x3, ed);
+                    STLXRB(x3, gd, ed);
+                    CBNZx_MARK(x3);
+                    STRxw_U12(gd, ed, 0);
+                    SMDMB();
+                    MARK2;
+                }
+                if(!ALIGNED_ATOMICxw || !arm64_atomics) {
+                    MOVxw_REG(gd, x1);
                 }
-                MARK;
-                LDRxw_U12(x1, ed, 0);
-                LDAXRB(x3, ed);
-                STLXRB(x3, gd, ed);
-                CBNZx_MARK(x3);
-                STRxw_U12(gd, ed, 0);
-                MARK2;
-                SMDMB();
-                MOVxw_REG(gd, x1);
             }
             break;
 
@@ -1248,8 +1288,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         emit_inc32(dyn, ninst, rex, ed, x3, x4);
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
-                        TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                        B_MARK(cNE);    // unaligned
+                        if(!ALIGNED_ATOMICxw) {
+                            TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                            B_MARK(cNE);    // unaligned
+                        }
                         if(arm64_atomics) {
                             MOV32w(x3, 1);
                             UFLAG_IF {
@@ -1265,16 +1307,18 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             STLXRxw(x3, x1, wback);
                             CBNZx_MARKLOCK(x3);
                         }
-                        B_NEXT_nocond;
-                        MARK;
-                        LDRxw_U12(x1, wback, 0);
-                        LDAXRB(x4, wback);
-                        BFIxw(x1, x4, 0, 8); // re-inject
-                        emit_inc32(dyn, ninst, rex, x1, x3, x4);
-                        STLXRB(x3, x1, wback);
-                        CBNZw_MARK(x3);
-                        STRxw_U12(x1, wback, 0);
-                        SMDMB();
+                        if(!ALIGNED_ATOMICxw) {
+                            B_NEXT_nocond;
+                            MARK;
+                            LDRxw_U12(x1, wback, 0);
+                            LDAXRB(x4, wback);
+                            BFIxw(x1, x4, 0, 8); // re-inject
+                            emit_inc32(dyn, ninst, rex, x1, x3, x4);
+                            STLXRB(x3, x1, wback);
+                            CBNZw_MARK(x3);
+                            STRxw_U12(x1, wback, 0);
+                            SMDMB();
+                        }
                     }
                     break;
                 case 1: //DEC Ed
@@ -1286,9 +1330,10 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         emit_dec32(dyn, ninst, rex, ed, x3, x4);
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
-                        TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
-                        B_MARK(cNE);    // unaligned
-                        MARKLOCK;
+                        if(!ALIGNED_ATOMICxw) {
+                            TSTx_mask(wback, 1, 0, 1+rex.w);    // mask=3 or 7
+                            B_MARK(cNE);    // unaligned
+                        }
                         if(arm64_atomics) {
                             MOV64xw(x3, -1);
                             UFLAG_IF {
@@ -1298,21 +1343,24 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 STADDLxw(x3, wback);
                             }
                         } else {
+                            MARKLOCK;
                             LDAXRxw(x1, wback);
                             emit_dec32(dyn, ninst, rex, x1, x3, x4);
                             STLXRxw(x3, x1, wback);
                             CBNZx_MARKLOCK(x3);
                         }
-                        B_NEXT_nocond;
-                        MARK;
-                        LDRxw_U12(x1, wback, 0);
-                        LDAXRB(x4, wback);
-                        BFIxw(x1, x4, 0, 8); // re-inject
-                        emit_dec32(dyn, ninst, rex, x1, x3, x4);
-                        STLXRB(x3, x1, wback);
-                        CBNZw_MARK(x3);
-                        STRxw_U12(x1, wback, 0);
-                        SMDMB();
+                        if(!ALIGNED_ATOMICxw) {
+                            B_NEXT_nocond;
+                            MARK;
+                            LDRxw_U12(x1, wback, 0);
+                            LDAXRB(x4, wback);
+                            BFIxw(x1, x4, 0, 8); // re-inject
+                            emit_dec32(dyn, ninst, rex, x1, x3, x4);
+                            STLXRB(x3, x1, wback);
+                            CBNZw_MARK(x3);
+                            STRxw_U12(x1, wback, 0);
+                            SMDMB();
+                        }
                     }
                     break;
                 default:
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index 2b0a28e0..59f6c4f4 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -69,7 +69,7 @@ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, u
                     }
                 } else {
                     switch(lock) {
-                        case 1: addLockAddress(tmp); break;
+                        case 1: addLockAddress(tmp); if(fixaddress) *fixaddress=tmp; break;
                         case 2: if(isLockAddress(tmp)) *l=1; break;
                     }
                     MOV64x(ret, tmp);
@@ -106,7 +106,7 @@ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, u
                 ADDx_REG(ret, ret, xRIP);
             }
             switch(lock) {
-                case 1: addLockAddress(addr+delta+tmp); break;
+                case 1: addLockAddress(addr+delta+tmp); if(fixaddress) *fixaddress=addr+delta+tmp; break;
                 case 2: if(isLockAddress(addr+delta+tmp)) *l=1; break;
             }
         } else {
@@ -126,7 +126,7 @@ uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, u
             i64 = F8S;
         if(i64==0 || ((i64>=absmin) && (i64<=absmax)  && !(i64&mask)) || (unscaled && (i64>-256) && (i64<256))) {
             *fixaddress = i64;
-            if(unscaled && (i64>-256) && (i64<256))
+            if(unscaled && i64 && (i64>-256) && (i64<256))
                 *unscaled = 1;
             if((nextop&7)==4) {
                 if (sib_reg!=4) {
@@ -221,7 +221,7 @@ static uintptr_t geted_32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t
                     }
                 } else {
                     switch(lock) {
-                        case 1: addLockAddress((int32_t)tmp); break;
+                        case 1: addLockAddress((int32_t)tmp); if(fixaddress) *fixaddress=(int32_t)tmp; break;
                         case 2: if(isLockAddress((int32_t)tmp)) *l=1; break;
                     }
                     MOV32w(ret, tmp);
@@ -237,7 +237,7 @@ static uintptr_t geted_32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t
             uint64_t tmp = F32;
             MOV32w(ret, tmp);
             switch(lock) {
-                case 1: addLockAddress(tmp); break;
+                case 1: addLockAddress(tmp); if(fixaddress) *fixaddress=tmp; break;
                 case 2: if(isLockAddress(tmp)) *l=1; break;
             }
         } else {
@@ -260,7 +260,7 @@ static uintptr_t geted_32(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t
             i32 = F8S;
         if(i32==0 || ((i32>=absmin) && (i32<=absmax)  && !(i32&mask)) || (unscaled && (i32>-256) && (i32<256))) {
             *fixaddress = i32;
-            if(unscaled && (i32>-256) && (i32<256))
+            if(unscaled && i32 && (i32>-256) && (i32<256))
                 *unscaled = 1;
             if((nextop&7)==4) {
                 if (sib_reg!=4) {
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 6a457ec0..a5c9170b 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -481,6 +481,9 @@
     TSTw_mask(xFlags, 0b010110, 0); \
     CNEGx(r, r, cNE)
 
+#define ALIGNED_ATOMICxw ((fixedaddress && !(fixedaddress&((1<<(2+rex.w)-1)))) || box64_dynarec_aligned_atomics)
+#define ALIGNED_ATOMICH ((fixedaddress && !(fixedaddress&1)) || box64_dynarec_aligned_atomics)
+
 // CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2
 #define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0)
diff --git a/src/include/debug.h b/src/include/debug.h
index 6c1ed419..c21ff7ec 100644
--- a/src/include/debug.h
+++ b/src/include/debug.h
@@ -29,6 +29,7 @@ extern int box64_dynarec_fastpage;
 extern int box64_dynarec_wait;
 extern int box64_dynarec_test;
 extern int box64_dynarec_missing;
+extern int box64_dynarec_aligned_atomics;
 #ifdef ARM64
 extern int arm64_asimd;
 extern int arm64_aes;
diff --git a/src/main.c b/src/main.c
index a66e743b..cdbff19c 100644
--- a/src/main.c
+++ b/src/main.c
@@ -66,6 +66,7 @@ int box64_dynarec_jvm = 1;
 int box64_dynarec_wait = 1;
 int box64_dynarec_test = 0;
 int box64_dynarec_missing = 0;
+int box64_dynarec_aligned_atomics = 0;
 uintptr_t box64_nodynarec_start = 0;
 uintptr_t box64_nodynarec_end = 0;
 #ifdef ARM64
@@ -637,6 +638,15 @@ void LoadLogEnv()
         if(box64_dynarec_fastpage)
             printf_log(LOG_INFO, "Dynarec will use Fast HotPage\n");
     }
+    p = getenv("BOX64_DYNAREC_ALIGNED_ATOMICS");
+    if(p) {
+        if(strlen(p)==1) {
+            if(p[0]>='0' && p[0]<='1')
+                box64_dynarec_aligned_atomics = p[0]-'0';
+        }
+        if(box64_dynarec_aligned_atomics)
+            printf_log(LOG_INFO, "Dynarec will generate only aligned atomics code\n");
+    }
     p = getenv("BOX64_DYNAREC_MISSING");
     if(p) {
         if(strlen(p)==1) {
diff --git a/src/tools/rcfile.c b/src/tools/rcfile.c
index 98b0d4cb..098d2616 100644
--- a/src/tools/rcfile.c
+++ b/src/tools/rcfile.c
@@ -142,6 +142,7 @@ ENTRYBOOL(BOX64_DYNAREC_BLEEDING_EDGE, box64_dynarec_bleeding_edge) \
 ENTRYBOOL(BOX64_DYNAREC_JVM, box64_dynarec_jvm)                     \
 ENTRYINT(BOX64_DYNAREC_HOTPAGE, box64_dynarec_hotpage, 0, 255, 8)   \
 ENTRYBOOL(BOX64_DYNAREC_FASTPAGE, box64_dynarec_fastpage)           \
+ENTRYBOOL(BOX64_DYNAREC_ALIGNED_ATOMICS, box64_dynarec_aligned_atomics) \
 ENTRYBOOL(BOX64_DYNAREC_WAIT, box64_dynarec_wait)                   \
 ENTRYSTRING_(BOX64_NODYNAREC, box64_nodynarec)                      \
 ENTRYBOOL(BOX64_DYNAREC_TEST, box64_dynarec_test)                   \
@@ -164,6 +165,7 @@ IGNORE(BOX64_DYNAREC_BLEEDING_EDGE)                                 \
 IGNORE(BOX64_DYNAREC_JVM)                                           \
 IGNORE(BOX64_DYNAREC_HOTPAGE)                                       \
 IGNORE(BOX64_DYNAREC_FASTPAGE)                                      \
+IGNORE(BOX64_DYNAREC_ALIGNED_ATOMICS)                               \
 IGNORE(BOX64_DYNAREC_WAIT)                                          \
 IGNORE(BOX64_NODYNAREC)                                             \
 IGNORE(BOX64_DYNAREC_TEST)                                          \